1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
3 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
34 #include "insn-codes.h"
35 #include "insn-attr.h"
42 #include "diagnostic-core.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "common/common-target.h"
49 #include "langhooks.h"
54 #include "tm-constrs.h"
58 #include "sched-int.h"
62 #include "diagnostic.h"
64 enum upper_128bits_state
71 typedef struct block_info_def
73 /* State of the upper 128bits of AVX registers at exit. */
74 enum upper_128bits_state state
;
75 /* TRUE if state of the upper 128bits of AVX registers is unchanged
78 /* TRUE if block has been processed. */
80 /* TRUE if block has been scanned. */
82 /* Previous state of the upper 128bits of AVX registers at entry. */
83 enum upper_128bits_state prev
;
86 #define BLOCK_INFO(B) ((block_info) (B)->aux)
88 enum call_avx256_state
90 /* Callee returns 256bit AVX register. */
91 callee_return_avx256
= -1,
92 /* Callee returns and passes 256bit AVX register. */
93 callee_return_pass_avx256
,
94 /* Callee passes 256bit AVX register. */
96 /* Callee doesn't return nor passe 256bit AVX register, or no
97 256bit AVX register in function return. */
99 /* vzeroupper intrinsic. */
103 /* Check if a 256bit AVX register is referenced in stores. */
106 check_avx256_stores (rtx dest
, const_rtx set
, void *data
)
109 && VALID_AVX256_REG_MODE (GET_MODE (dest
)))
110 || (GET_CODE (set
) == SET
111 && REG_P (SET_SRC (set
))
112 && VALID_AVX256_REG_MODE (GET_MODE (SET_SRC (set
)))))
114 enum upper_128bits_state
*state
115 = (enum upper_128bits_state
*) data
;
120 /* Helper function for move_or_delete_vzeroupper_1. Look for vzeroupper
121 in basic block BB. Delete it if upper 128bit AVX registers are
122 unused. If it isn't deleted, move it to just before a jump insn.
124 STATE is state of the upper 128bits of AVX registers at entry. */
127 move_or_delete_vzeroupper_2 (basic_block bb
,
128 enum upper_128bits_state state
)
131 rtx vzeroupper_insn
= NULL_RTX
;
136 if (BLOCK_INFO (bb
)->unchanged
)
139 fprintf (dump_file
, " [bb %i] unchanged: upper 128bits: %d\n",
142 BLOCK_INFO (bb
)->state
= state
;
146 if (BLOCK_INFO (bb
)->scanned
&& BLOCK_INFO (bb
)->prev
== state
)
149 fprintf (dump_file
, " [bb %i] scanned: upper 128bits: %d\n",
150 bb
->index
, BLOCK_INFO (bb
)->state
);
154 BLOCK_INFO (bb
)->prev
= state
;
157 fprintf (dump_file
, " [bb %i] entry: upper 128bits: %d\n",
162 /* BB_END changes when it is deleted. */
163 bb_end
= BB_END (bb
);
165 while (insn
!= bb_end
)
167 insn
= NEXT_INSN (insn
);
169 if (!NONDEBUG_INSN_P (insn
))
172 /* Move vzeroupper before jump/call. */
173 if (JUMP_P (insn
) || CALL_P (insn
))
175 if (!vzeroupper_insn
)
178 if (PREV_INSN (insn
) != vzeroupper_insn
)
182 fprintf (dump_file
, "Move vzeroupper after:\n");
183 print_rtl_single (dump_file
, PREV_INSN (insn
));
184 fprintf (dump_file
, "before:\n");
185 print_rtl_single (dump_file
, insn
);
187 reorder_insns_nobb (vzeroupper_insn
, vzeroupper_insn
,
190 vzeroupper_insn
= NULL_RTX
;
194 pat
= PATTERN (insn
);
196 /* Check insn for vzeroupper intrinsic. */
197 if (GET_CODE (pat
) == UNSPEC_VOLATILE
198 && XINT (pat
, 1) == UNSPECV_VZEROUPPER
)
202 /* Found vzeroupper intrinsic. */
203 fprintf (dump_file
, "Found vzeroupper:\n");
204 print_rtl_single (dump_file
, insn
);
209 /* Check insn for vzeroall intrinsic. */
210 if (GET_CODE (pat
) == PARALLEL
211 && GET_CODE (XVECEXP (pat
, 0, 0)) == UNSPEC_VOLATILE
212 && XINT (XVECEXP (pat
, 0, 0), 1) == UNSPECV_VZEROALL
)
217 /* Delete pending vzeroupper insertion. */
220 delete_insn (vzeroupper_insn
);
221 vzeroupper_insn
= NULL_RTX
;
224 else if (state
!= used
)
226 note_stores (pat
, check_avx256_stores
, &state
);
233 /* Process vzeroupper intrinsic. */
234 avx256
= INTVAL (XVECEXP (pat
, 0, 0));
238 /* Since the upper 128bits are cleared, callee must not pass
239 256bit AVX register. We only need to check if callee
240 returns 256bit AVX register. */
241 if (avx256
== callee_return_avx256
)
247 /* Remove unnecessary vzeroupper since upper 128bits are
251 fprintf (dump_file
, "Delete redundant vzeroupper:\n");
252 print_rtl_single (dump_file
, insn
);
258 /* Set state to UNUSED if callee doesn't return 256bit AVX
260 if (avx256
!= callee_return_pass_avx256
)
263 if (avx256
== callee_return_pass_avx256
264 || avx256
== callee_pass_avx256
)
266 /* Must remove vzeroupper since callee passes in 256bit
270 fprintf (dump_file
, "Delete callee pass vzeroupper:\n");
271 print_rtl_single (dump_file
, insn
);
277 vzeroupper_insn
= insn
;
283 BLOCK_INFO (bb
)->state
= state
;
284 BLOCK_INFO (bb
)->unchanged
= unchanged
;
285 BLOCK_INFO (bb
)->scanned
= true;
288 fprintf (dump_file
, " [bb %i] exit: %s: upper 128bits: %d\n",
289 bb
->index
, unchanged
? "unchanged" : "changed",
293 /* Helper function for move_or_delete_vzeroupper. Process vzeroupper
294 in BLOCK and check its predecessor blocks. Treat UNKNOWN state
295 as USED if UNKNOWN_IS_UNUSED is true. Return TRUE if the exit
299 move_or_delete_vzeroupper_1 (basic_block block
, bool unknown_is_unused
)
303 enum upper_128bits_state state
, old_state
, new_state
;
307 fprintf (dump_file
, " Process [bb %i]: status: %d\n",
308 block
->index
, BLOCK_INFO (block
)->processed
);
310 if (BLOCK_INFO (block
)->processed
)
315 /* Check all predecessor edges of this block. */
316 seen_unknown
= false;
317 FOR_EACH_EDGE (e
, ei
, block
->preds
)
321 switch (BLOCK_INFO (e
->src
)->state
)
324 if (!unknown_is_unused
)
338 old_state
= BLOCK_INFO (block
)->state
;
339 move_or_delete_vzeroupper_2 (block
, state
);
340 new_state
= BLOCK_INFO (block
)->state
;
342 if (state
!= unknown
|| new_state
== used
)
343 BLOCK_INFO (block
)->processed
= true;
345 /* Need to rescan if the upper 128bits of AVX registers are changed
347 if (new_state
!= old_state
)
349 if (new_state
== used
)
350 cfun
->machine
->rescan_vzeroupper_p
= 1;
357 /* Go through the instruction stream looking for vzeroupper. Delete
358 it if upper 128bit AVX registers are unused. If it isn't deleted,
359 move it to just before a jump insn. */
362 move_or_delete_vzeroupper (void)
367 fibheap_t worklist
, pending
, fibheap_swap
;
368 sbitmap visited
, in_worklist
, in_pending
, sbitmap_swap
;
373 /* Set up block info for each basic block. */
374 alloc_aux_for_blocks (sizeof (struct block_info_def
));
376 /* Process outgoing edges of entry point. */
378 fprintf (dump_file
, "Process outgoing edges of entry point\n");
380 FOR_EACH_EDGE (e
, ei
, ENTRY_BLOCK_PTR
->succs
)
382 move_or_delete_vzeroupper_2 (e
->dest
,
383 cfun
->machine
->caller_pass_avx256_p
385 BLOCK_INFO (e
->dest
)->processed
= true;
388 /* Compute reverse completion order of depth first search of the CFG
389 so that the data-flow runs faster. */
390 rc_order
= XNEWVEC (int, n_basic_blocks
- NUM_FIXED_BLOCKS
);
391 bb_order
= XNEWVEC (int, last_basic_block
);
392 pre_and_rev_post_order_compute (NULL
, rc_order
, false);
393 for (i
= 0; i
< n_basic_blocks
- NUM_FIXED_BLOCKS
; i
++)
394 bb_order
[rc_order
[i
]] = i
;
397 worklist
= fibheap_new ();
398 pending
= fibheap_new ();
399 visited
= sbitmap_alloc (last_basic_block
);
400 in_worklist
= sbitmap_alloc (last_basic_block
);
401 in_pending
= sbitmap_alloc (last_basic_block
);
402 sbitmap_zero (in_worklist
);
404 /* Don't check outgoing edges of entry point. */
405 sbitmap_ones (in_pending
);
407 if (BLOCK_INFO (bb
)->processed
)
408 RESET_BIT (in_pending
, bb
->index
);
411 move_or_delete_vzeroupper_1 (bb
, false);
412 fibheap_insert (pending
, bb_order
[bb
->index
], bb
);
416 fprintf (dump_file
, "Check remaining basic blocks\n");
418 while (!fibheap_empty (pending
))
420 fibheap_swap
= pending
;
422 worklist
= fibheap_swap
;
423 sbitmap_swap
= in_pending
;
424 in_pending
= in_worklist
;
425 in_worklist
= sbitmap_swap
;
427 sbitmap_zero (visited
);
429 cfun
->machine
->rescan_vzeroupper_p
= 0;
431 while (!fibheap_empty (worklist
))
433 bb
= (basic_block
) fibheap_extract_min (worklist
);
434 RESET_BIT (in_worklist
, bb
->index
);
435 gcc_assert (!TEST_BIT (visited
, bb
->index
));
436 if (!TEST_BIT (visited
, bb
->index
))
440 SET_BIT (visited
, bb
->index
);
442 if (move_or_delete_vzeroupper_1 (bb
, false))
443 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
445 if (e
->dest
== EXIT_BLOCK_PTR
446 || BLOCK_INFO (e
->dest
)->processed
)
449 if (TEST_BIT (visited
, e
->dest
->index
))
451 if (!TEST_BIT (in_pending
, e
->dest
->index
))
453 /* Send E->DEST to next round. */
454 SET_BIT (in_pending
, e
->dest
->index
);
455 fibheap_insert (pending
,
456 bb_order
[e
->dest
->index
],
460 else if (!TEST_BIT (in_worklist
, e
->dest
->index
))
462 /* Add E->DEST to current round. */
463 SET_BIT (in_worklist
, e
->dest
->index
);
464 fibheap_insert (worklist
, bb_order
[e
->dest
->index
],
471 if (!cfun
->machine
->rescan_vzeroupper_p
)
476 fibheap_delete (worklist
);
477 fibheap_delete (pending
);
478 sbitmap_free (visited
);
479 sbitmap_free (in_worklist
);
480 sbitmap_free (in_pending
);
483 fprintf (dump_file
, "Process remaining basic blocks\n");
486 move_or_delete_vzeroupper_1 (bb
, true);
488 free_aux_for_blocks ();
491 static rtx
legitimize_dllimport_symbol (rtx
, bool);
493 #ifndef CHECK_STACK_LIMIT
494 #define CHECK_STACK_LIMIT (-1)
497 /* Return index of given mode in mult and division cost tables. */
498 #define MODE_INDEX(mode) \
499 ((mode) == QImode ? 0 \
500 : (mode) == HImode ? 1 \
501 : (mode) == SImode ? 2 \
502 : (mode) == DImode ? 3 \
505 /* Processor costs (relative to an add) */
506 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
507 #define COSTS_N_BYTES(N) ((N) * 2)
509 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
512 struct processor_costs ix86_size_cost
= {/* costs for tuning for size */
513 COSTS_N_BYTES (2), /* cost of an add instruction */
514 COSTS_N_BYTES (3), /* cost of a lea instruction */
515 COSTS_N_BYTES (2), /* variable shift costs */
516 COSTS_N_BYTES (3), /* constant shift costs */
517 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
518 COSTS_N_BYTES (3), /* HI */
519 COSTS_N_BYTES (3), /* SI */
520 COSTS_N_BYTES (3), /* DI */
521 COSTS_N_BYTES (5)}, /* other */
522 0, /* cost of multiply per each bit set */
523 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
524 COSTS_N_BYTES (3), /* HI */
525 COSTS_N_BYTES (3), /* SI */
526 COSTS_N_BYTES (3), /* DI */
527 COSTS_N_BYTES (5)}, /* other */
528 COSTS_N_BYTES (3), /* cost of movsx */
529 COSTS_N_BYTES (3), /* cost of movzx */
530 0, /* "large" insn */
532 2, /* cost for loading QImode using movzbl */
533 {2, 2, 2}, /* cost of loading integer registers
534 in QImode, HImode and SImode.
535 Relative to reg-reg move (2). */
536 {2, 2, 2}, /* cost of storing integer registers */
537 2, /* cost of reg,reg fld/fst */
538 {2, 2, 2}, /* cost of loading fp registers
539 in SFmode, DFmode and XFmode */
540 {2, 2, 2}, /* cost of storing fp registers
541 in SFmode, DFmode and XFmode */
542 3, /* cost of moving MMX register */
543 {3, 3}, /* cost of loading MMX registers
544 in SImode and DImode */
545 {3, 3}, /* cost of storing MMX registers
546 in SImode and DImode */
547 3, /* cost of moving SSE register */
548 {3, 3, 3}, /* cost of loading SSE registers
549 in SImode, DImode and TImode */
550 {3, 3, 3}, /* cost of storing SSE registers
551 in SImode, DImode and TImode */
552 3, /* MMX or SSE register to integer */
553 0, /* size of l1 cache */
554 0, /* size of l2 cache */
555 0, /* size of prefetch block */
556 0, /* number of parallel prefetches */
558 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
559 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
560 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
561 COSTS_N_BYTES (2), /* cost of FABS instruction. */
562 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
563 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
564 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
565 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}}},
566 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
567 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}}},
568 1, /* scalar_stmt_cost. */
569 1, /* scalar load_cost. */
570 1, /* scalar_store_cost. */
571 1, /* vec_stmt_cost. */
572 1, /* vec_to_scalar_cost. */
573 1, /* scalar_to_vec_cost. */
574 1, /* vec_align_load_cost. */
575 1, /* vec_unalign_load_cost. */
576 1, /* vec_store_cost. */
577 1, /* cond_taken_branch_cost. */
578 1, /* cond_not_taken_branch_cost. */
581 /* Processor costs (relative to an add) */
583 struct processor_costs i386_cost
= { /* 386 specific costs */
584 COSTS_N_INSNS (1), /* cost of an add instruction */
585 COSTS_N_INSNS (1), /* cost of a lea instruction */
586 COSTS_N_INSNS (3), /* variable shift costs */
587 COSTS_N_INSNS (2), /* constant shift costs */
588 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
589 COSTS_N_INSNS (6), /* HI */
590 COSTS_N_INSNS (6), /* SI */
591 COSTS_N_INSNS (6), /* DI */
592 COSTS_N_INSNS (6)}, /* other */
593 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
594 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
595 COSTS_N_INSNS (23), /* HI */
596 COSTS_N_INSNS (23), /* SI */
597 COSTS_N_INSNS (23), /* DI */
598 COSTS_N_INSNS (23)}, /* other */
599 COSTS_N_INSNS (3), /* cost of movsx */
600 COSTS_N_INSNS (2), /* cost of movzx */
601 15, /* "large" insn */
603 4, /* cost for loading QImode using movzbl */
604 {2, 4, 2}, /* cost of loading integer registers
605 in QImode, HImode and SImode.
606 Relative to reg-reg move (2). */
607 {2, 4, 2}, /* cost of storing integer registers */
608 2, /* cost of reg,reg fld/fst */
609 {8, 8, 8}, /* cost of loading fp registers
610 in SFmode, DFmode and XFmode */
611 {8, 8, 8}, /* cost of storing fp registers
612 in SFmode, DFmode and XFmode */
613 2, /* cost of moving MMX register */
614 {4, 8}, /* cost of loading MMX registers
615 in SImode and DImode */
616 {4, 8}, /* cost of storing MMX registers
617 in SImode and DImode */
618 2, /* cost of moving SSE register */
619 {4, 8, 16}, /* cost of loading SSE registers
620 in SImode, DImode and TImode */
621 {4, 8, 16}, /* cost of storing SSE registers
622 in SImode, DImode and TImode */
623 3, /* MMX or SSE register to integer */
624 0, /* size of l1 cache */
625 0, /* size of l2 cache */
626 0, /* size of prefetch block */
627 0, /* number of parallel prefetches */
629 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
630 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
631 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
632 COSTS_N_INSNS (22), /* cost of FABS instruction. */
633 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
634 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
635 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
636 DUMMY_STRINGOP_ALGS
},
637 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
638 DUMMY_STRINGOP_ALGS
},
639 1, /* scalar_stmt_cost. */
640 1, /* scalar load_cost. */
641 1, /* scalar_store_cost. */
642 1, /* vec_stmt_cost. */
643 1, /* vec_to_scalar_cost. */
644 1, /* scalar_to_vec_cost. */
645 1, /* vec_align_load_cost. */
646 2, /* vec_unalign_load_cost. */
647 1, /* vec_store_cost. */
648 3, /* cond_taken_branch_cost. */
649 1, /* cond_not_taken_branch_cost. */
653 struct processor_costs i486_cost
= { /* 486 specific costs */
654 COSTS_N_INSNS (1), /* cost of an add instruction */
655 COSTS_N_INSNS (1), /* cost of a lea instruction */
656 COSTS_N_INSNS (3), /* variable shift costs */
657 COSTS_N_INSNS (2), /* constant shift costs */
658 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
659 COSTS_N_INSNS (12), /* HI */
660 COSTS_N_INSNS (12), /* SI */
661 COSTS_N_INSNS (12), /* DI */
662 COSTS_N_INSNS (12)}, /* other */
663 1, /* cost of multiply per each bit set */
664 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
665 COSTS_N_INSNS (40), /* HI */
666 COSTS_N_INSNS (40), /* SI */
667 COSTS_N_INSNS (40), /* DI */
668 COSTS_N_INSNS (40)}, /* other */
669 COSTS_N_INSNS (3), /* cost of movsx */
670 COSTS_N_INSNS (2), /* cost of movzx */
671 15, /* "large" insn */
673 4, /* cost for loading QImode using movzbl */
674 {2, 4, 2}, /* cost of loading integer registers
675 in QImode, HImode and SImode.
676 Relative to reg-reg move (2). */
677 {2, 4, 2}, /* cost of storing integer registers */
678 2, /* cost of reg,reg fld/fst */
679 {8, 8, 8}, /* cost of loading fp registers
680 in SFmode, DFmode and XFmode */
681 {8, 8, 8}, /* cost of storing fp registers
682 in SFmode, DFmode and XFmode */
683 2, /* cost of moving MMX register */
684 {4, 8}, /* cost of loading MMX registers
685 in SImode and DImode */
686 {4, 8}, /* cost of storing MMX registers
687 in SImode and DImode */
688 2, /* cost of moving SSE register */
689 {4, 8, 16}, /* cost of loading SSE registers
690 in SImode, DImode and TImode */
691 {4, 8, 16}, /* cost of storing SSE registers
692 in SImode, DImode and TImode */
693 3, /* MMX or SSE register to integer */
694 4, /* size of l1 cache. 486 has 8kB cache
695 shared for code and data, so 4kB is
696 not really precise. */
697 4, /* size of l2 cache */
698 0, /* size of prefetch block */
699 0, /* number of parallel prefetches */
701 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
702 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
703 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
704 COSTS_N_INSNS (3), /* cost of FABS instruction. */
705 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
706 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
707 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
}}},
708 DUMMY_STRINGOP_ALGS
},
709 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
}}},
710 DUMMY_STRINGOP_ALGS
},
711 1, /* scalar_stmt_cost. */
712 1, /* scalar load_cost. */
713 1, /* scalar_store_cost. */
714 1, /* vec_stmt_cost. */
715 1, /* vec_to_scalar_cost. */
716 1, /* scalar_to_vec_cost. */
717 1, /* vec_align_load_cost. */
718 2, /* vec_unalign_load_cost. */
719 1, /* vec_store_cost. */
720 3, /* cond_taken_branch_cost. */
721 1, /* cond_not_taken_branch_cost. */
725 struct processor_costs pentium_cost
= {
726 COSTS_N_INSNS (1), /* cost of an add instruction */
727 COSTS_N_INSNS (1), /* cost of a lea instruction */
728 COSTS_N_INSNS (4), /* variable shift costs */
729 COSTS_N_INSNS (1), /* constant shift costs */
730 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
731 COSTS_N_INSNS (11), /* HI */
732 COSTS_N_INSNS (11), /* SI */
733 COSTS_N_INSNS (11), /* DI */
734 COSTS_N_INSNS (11)}, /* other */
735 0, /* cost of multiply per each bit set */
736 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
737 COSTS_N_INSNS (25), /* HI */
738 COSTS_N_INSNS (25), /* SI */
739 COSTS_N_INSNS (25), /* DI */
740 COSTS_N_INSNS (25)}, /* other */
741 COSTS_N_INSNS (3), /* cost of movsx */
742 COSTS_N_INSNS (2), /* cost of movzx */
743 8, /* "large" insn */
745 6, /* cost for loading QImode using movzbl */
746 {2, 4, 2}, /* cost of loading integer registers
747 in QImode, HImode and SImode.
748 Relative to reg-reg move (2). */
749 {2, 4, 2}, /* cost of storing integer registers */
750 2, /* cost of reg,reg fld/fst */
751 {2, 2, 6}, /* cost of loading fp registers
752 in SFmode, DFmode and XFmode */
753 {4, 4, 6}, /* cost of storing fp registers
754 in SFmode, DFmode and XFmode */
755 8, /* cost of moving MMX register */
756 {8, 8}, /* cost of loading MMX registers
757 in SImode and DImode */
758 {8, 8}, /* cost of storing MMX registers
759 in SImode and DImode */
760 2, /* cost of moving SSE register */
761 {4, 8, 16}, /* cost of loading SSE registers
762 in SImode, DImode and TImode */
763 {4, 8, 16}, /* cost of storing SSE registers
764 in SImode, DImode and TImode */
765 3, /* MMX or SSE register to integer */
766 8, /* size of l1 cache. */
767 8, /* size of l2 cache */
768 0, /* size of prefetch block */
769 0, /* number of parallel prefetches */
771 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
772 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
773 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
774 COSTS_N_INSNS (1), /* cost of FABS instruction. */
775 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
776 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
777 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
778 DUMMY_STRINGOP_ALGS
},
779 {{libcall
, {{-1, rep_prefix_4_byte
}}},
780 DUMMY_STRINGOP_ALGS
},
781 1, /* scalar_stmt_cost. */
782 1, /* scalar load_cost. */
783 1, /* scalar_store_cost. */
784 1, /* vec_stmt_cost. */
785 1, /* vec_to_scalar_cost. */
786 1, /* scalar_to_vec_cost. */
787 1, /* vec_align_load_cost. */
788 2, /* vec_unalign_load_cost. */
789 1, /* vec_store_cost. */
790 3, /* cond_taken_branch_cost. */
791 1, /* cond_not_taken_branch_cost. */
795 struct processor_costs pentiumpro_cost
= {
796 COSTS_N_INSNS (1), /* cost of an add instruction */
797 COSTS_N_INSNS (1), /* cost of a lea instruction */
798 COSTS_N_INSNS (1), /* variable shift costs */
799 COSTS_N_INSNS (1), /* constant shift costs */
800 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
801 COSTS_N_INSNS (4), /* HI */
802 COSTS_N_INSNS (4), /* SI */
803 COSTS_N_INSNS (4), /* DI */
804 COSTS_N_INSNS (4)}, /* other */
805 0, /* cost of multiply per each bit set */
806 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
807 COSTS_N_INSNS (17), /* HI */
808 COSTS_N_INSNS (17), /* SI */
809 COSTS_N_INSNS (17), /* DI */
810 COSTS_N_INSNS (17)}, /* other */
811 COSTS_N_INSNS (1), /* cost of movsx */
812 COSTS_N_INSNS (1), /* cost of movzx */
813 8, /* "large" insn */
815 2, /* cost for loading QImode using movzbl */
816 {4, 4, 4}, /* cost of loading integer registers
817 in QImode, HImode and SImode.
818 Relative to reg-reg move (2). */
819 {2, 2, 2}, /* cost of storing integer registers */
820 2, /* cost of reg,reg fld/fst */
821 {2, 2, 6}, /* cost of loading fp registers
822 in SFmode, DFmode and XFmode */
823 {4, 4, 6}, /* cost of storing fp registers
824 in SFmode, DFmode and XFmode */
825 2, /* cost of moving MMX register */
826 {2, 2}, /* cost of loading MMX registers
827 in SImode and DImode */
828 {2, 2}, /* cost of storing MMX registers
829 in SImode and DImode */
830 2, /* cost of moving SSE register */
831 {2, 2, 8}, /* cost of loading SSE registers
832 in SImode, DImode and TImode */
833 {2, 2, 8}, /* cost of storing SSE registers
834 in SImode, DImode and TImode */
835 3, /* MMX or SSE register to integer */
836 8, /* size of l1 cache. */
837 256, /* size of l2 cache */
838 32, /* size of prefetch block */
839 6, /* number of parallel prefetches */
841 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
842 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
843 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
844 COSTS_N_INSNS (2), /* cost of FABS instruction. */
845 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
846 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
847 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
848 (we ensure the alignment). For small blocks inline loop is still a
849 noticeable win, for bigger blocks either rep movsl or rep movsb is
850 way to go. Rep movsb has apparently more expensive startup time in CPU,
851 but after 4K the difference is down in the noise. */
852 {{rep_prefix_4_byte
, {{128, loop
}, {1024, unrolled_loop
},
853 {8192, rep_prefix_4_byte
}, {-1, rep_prefix_1_byte
}}},
854 DUMMY_STRINGOP_ALGS
},
855 {{rep_prefix_4_byte
, {{1024, unrolled_loop
},
856 {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
857 DUMMY_STRINGOP_ALGS
},
858 1, /* scalar_stmt_cost. */
859 1, /* scalar load_cost. */
860 1, /* scalar_store_cost. */
861 1, /* vec_stmt_cost. */
862 1, /* vec_to_scalar_cost. */
863 1, /* scalar_to_vec_cost. */
864 1, /* vec_align_load_cost. */
865 2, /* vec_unalign_load_cost. */
866 1, /* vec_store_cost. */
867 3, /* cond_taken_branch_cost. */
868 1, /* cond_not_taken_branch_cost. */
872 struct processor_costs geode_cost
= {
873 COSTS_N_INSNS (1), /* cost of an add instruction */
874 COSTS_N_INSNS (1), /* cost of a lea instruction */
875 COSTS_N_INSNS (2), /* variable shift costs */
876 COSTS_N_INSNS (1), /* constant shift costs */
877 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
878 COSTS_N_INSNS (4), /* HI */
879 COSTS_N_INSNS (7), /* SI */
880 COSTS_N_INSNS (7), /* DI */
881 COSTS_N_INSNS (7)}, /* other */
882 0, /* cost of multiply per each bit set */
883 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
884 COSTS_N_INSNS (23), /* HI */
885 COSTS_N_INSNS (39), /* SI */
886 COSTS_N_INSNS (39), /* DI */
887 COSTS_N_INSNS (39)}, /* other */
888 COSTS_N_INSNS (1), /* cost of movsx */
889 COSTS_N_INSNS (1), /* cost of movzx */
890 8, /* "large" insn */
892 1, /* cost for loading QImode using movzbl */
893 {1, 1, 1}, /* cost of loading integer registers
894 in QImode, HImode and SImode.
895 Relative to reg-reg move (2). */
896 {1, 1, 1}, /* cost of storing integer registers */
897 1, /* cost of reg,reg fld/fst */
898 {1, 1, 1}, /* cost of loading fp registers
899 in SFmode, DFmode and XFmode */
900 {4, 6, 6}, /* cost of storing fp registers
901 in SFmode, DFmode and XFmode */
903 1, /* cost of moving MMX register */
904 {1, 1}, /* cost of loading MMX registers
905 in SImode and DImode */
906 {1, 1}, /* cost of storing MMX registers
907 in SImode and DImode */
908 1, /* cost of moving SSE register */
909 {1, 1, 1}, /* cost of loading SSE registers
910 in SImode, DImode and TImode */
911 {1, 1, 1}, /* cost of storing SSE registers
912 in SImode, DImode and TImode */
913 1, /* MMX or SSE register to integer */
914 64, /* size of l1 cache. */
915 128, /* size of l2 cache. */
916 32, /* size of prefetch block */
917 1, /* number of parallel prefetches */
919 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
920 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
921 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
922 COSTS_N_INSNS (1), /* cost of FABS instruction. */
923 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
924 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
925 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
926 DUMMY_STRINGOP_ALGS
},
927 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
928 DUMMY_STRINGOP_ALGS
},
929 1, /* scalar_stmt_cost. */
930 1, /* scalar load_cost. */
931 1, /* scalar_store_cost. */
932 1, /* vec_stmt_cost. */
933 1, /* vec_to_scalar_cost. */
934 1, /* scalar_to_vec_cost. */
935 1, /* vec_align_load_cost. */
936 2, /* vec_unalign_load_cost. */
937 1, /* vec_store_cost. */
938 3, /* cond_taken_branch_cost. */
939 1, /* cond_not_taken_branch_cost. */
943 struct processor_costs k6_cost
= {
944 COSTS_N_INSNS (1), /* cost of an add instruction */
945 COSTS_N_INSNS (2), /* cost of a lea instruction */
946 COSTS_N_INSNS (1), /* variable shift costs */
947 COSTS_N_INSNS (1), /* constant shift costs */
948 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
949 COSTS_N_INSNS (3), /* HI */
950 COSTS_N_INSNS (3), /* SI */
951 COSTS_N_INSNS (3), /* DI */
952 COSTS_N_INSNS (3)}, /* other */
953 0, /* cost of multiply per each bit set */
954 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
955 COSTS_N_INSNS (18), /* HI */
956 COSTS_N_INSNS (18), /* SI */
957 COSTS_N_INSNS (18), /* DI */
958 COSTS_N_INSNS (18)}, /* other */
959 COSTS_N_INSNS (2), /* cost of movsx */
960 COSTS_N_INSNS (2), /* cost of movzx */
961 8, /* "large" insn */
963 3, /* cost for loading QImode using movzbl */
964 {4, 5, 4}, /* cost of loading integer registers
965 in QImode, HImode and SImode.
966 Relative to reg-reg move (2). */
967 {2, 3, 2}, /* cost of storing integer registers */
968 4, /* cost of reg,reg fld/fst */
969 {6, 6, 6}, /* cost of loading fp registers
970 in SFmode, DFmode and XFmode */
971 {4, 4, 4}, /* cost of storing fp registers
972 in SFmode, DFmode and XFmode */
973 2, /* cost of moving MMX register */
974 {2, 2}, /* cost of loading MMX registers
975 in SImode and DImode */
976 {2, 2}, /* cost of storing MMX registers
977 in SImode and DImode */
978 2, /* cost of moving SSE register */
979 {2, 2, 8}, /* cost of loading SSE registers
980 in SImode, DImode and TImode */
981 {2, 2, 8}, /* cost of storing SSE registers
982 in SImode, DImode and TImode */
983 6, /* MMX or SSE register to integer */
984 32, /* size of l1 cache. */
985 32, /* size of l2 cache. Some models
986 have integrated l2 cache, but
987 optimizing for k6 is not important
988 enough to worry about that. */
989 32, /* size of prefetch block */
990 1, /* number of parallel prefetches */
992 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
993 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
994 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
995 COSTS_N_INSNS (2), /* cost of FABS instruction. */
996 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
997 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
998 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
999 DUMMY_STRINGOP_ALGS
},
1000 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
1001 DUMMY_STRINGOP_ALGS
},
1002 1, /* scalar_stmt_cost. */
1003 1, /* scalar load_cost. */
1004 1, /* scalar_store_cost. */
1005 1, /* vec_stmt_cost. */
1006 1, /* vec_to_scalar_cost. */
1007 1, /* scalar_to_vec_cost. */
1008 1, /* vec_align_load_cost. */
1009 2, /* vec_unalign_load_cost. */
1010 1, /* vec_store_cost. */
1011 3, /* cond_taken_branch_cost. */
1012 1, /* cond_not_taken_branch_cost. */
1016 struct processor_costs athlon_cost
= {
1017 COSTS_N_INSNS (1), /* cost of an add instruction */
1018 COSTS_N_INSNS (2), /* cost of a lea instruction */
1019 COSTS_N_INSNS (1), /* variable shift costs */
1020 COSTS_N_INSNS (1), /* constant shift costs */
1021 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
1022 COSTS_N_INSNS (5), /* HI */
1023 COSTS_N_INSNS (5), /* SI */
1024 COSTS_N_INSNS (5), /* DI */
1025 COSTS_N_INSNS (5)}, /* other */
1026 0, /* cost of multiply per each bit set */
1027 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1028 COSTS_N_INSNS (26), /* HI */
1029 COSTS_N_INSNS (42), /* SI */
1030 COSTS_N_INSNS (74), /* DI */
1031 COSTS_N_INSNS (74)}, /* other */
1032 COSTS_N_INSNS (1), /* cost of movsx */
1033 COSTS_N_INSNS (1), /* cost of movzx */
1034 8, /* "large" insn */
1036 4, /* cost for loading QImode using movzbl */
1037 {3, 4, 3}, /* cost of loading integer registers
1038 in QImode, HImode and SImode.
1039 Relative to reg-reg move (2). */
1040 {3, 4, 3}, /* cost of storing integer registers */
1041 4, /* cost of reg,reg fld/fst */
1042 {4, 4, 12}, /* cost of loading fp registers
1043 in SFmode, DFmode and XFmode */
1044 {6, 6, 8}, /* cost of storing fp registers
1045 in SFmode, DFmode and XFmode */
1046 2, /* cost of moving MMX register */
1047 {4, 4}, /* cost of loading MMX registers
1048 in SImode and DImode */
1049 {4, 4}, /* cost of storing MMX registers
1050 in SImode and DImode */
1051 2, /* cost of moving SSE register */
1052 {4, 4, 6}, /* cost of loading SSE registers
1053 in SImode, DImode and TImode */
1054 {4, 4, 5}, /* cost of storing SSE registers
1055 in SImode, DImode and TImode */
1056 5, /* MMX or SSE register to integer */
1057 64, /* size of l1 cache. */
1058 256, /* size of l2 cache. */
1059 64, /* size of prefetch block */
1060 6, /* number of parallel prefetches */
1061 5, /* Branch cost */
1062 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1063 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1064 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
1065 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1066 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1067 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1068 /* For some reason, Athlon deals better with REP prefix (relative to loops)
1069 compared to K8. Alignment becomes important after 8 bytes for memcpy and
1070 128 bytes for memset. */
1071 {{libcall
, {{2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1072 DUMMY_STRINGOP_ALGS
},
1073 {{libcall
, {{2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1074 DUMMY_STRINGOP_ALGS
},
1075 1, /* scalar_stmt_cost. */
1076 1, /* scalar load_cost. */
1077 1, /* scalar_store_cost. */
1078 1, /* vec_stmt_cost. */
1079 1, /* vec_to_scalar_cost. */
1080 1, /* scalar_to_vec_cost. */
1081 1, /* vec_align_load_cost. */
1082 2, /* vec_unalign_load_cost. */
1083 1, /* vec_store_cost. */
1084 3, /* cond_taken_branch_cost. */
1085 1, /* cond_not_taken_branch_cost. */
1089 struct processor_costs k8_cost
= {
1090 COSTS_N_INSNS (1), /* cost of an add instruction */
1091 COSTS_N_INSNS (2), /* cost of a lea instruction */
1092 COSTS_N_INSNS (1), /* variable shift costs */
1093 COSTS_N_INSNS (1), /* constant shift costs */
1094 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1095 COSTS_N_INSNS (4), /* HI */
1096 COSTS_N_INSNS (3), /* SI */
1097 COSTS_N_INSNS (4), /* DI */
1098 COSTS_N_INSNS (5)}, /* other */
1099 0, /* cost of multiply per each bit set */
1100 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1101 COSTS_N_INSNS (26), /* HI */
1102 COSTS_N_INSNS (42), /* SI */
1103 COSTS_N_INSNS (74), /* DI */
1104 COSTS_N_INSNS (74)}, /* other */
1105 COSTS_N_INSNS (1), /* cost of movsx */
1106 COSTS_N_INSNS (1), /* cost of movzx */
1107 8, /* "large" insn */
1109 4, /* cost for loading QImode using movzbl */
1110 {3, 4, 3}, /* cost of loading integer registers
1111 in QImode, HImode and SImode.
1112 Relative to reg-reg move (2). */
1113 {3, 4, 3}, /* cost of storing integer registers */
1114 4, /* cost of reg,reg fld/fst */
1115 {4, 4, 12}, /* cost of loading fp registers
1116 in SFmode, DFmode and XFmode */
1117 {6, 6, 8}, /* cost of storing fp registers
1118 in SFmode, DFmode and XFmode */
1119 2, /* cost of moving MMX register */
1120 {3, 3}, /* cost of loading MMX registers
1121 in SImode and DImode */
1122 {4, 4}, /* cost of storing MMX registers
1123 in SImode and DImode */
1124 2, /* cost of moving SSE register */
1125 {4, 3, 6}, /* cost of loading SSE registers
1126 in SImode, DImode and TImode */
1127 {4, 4, 5}, /* cost of storing SSE registers
1128 in SImode, DImode and TImode */
1129 5, /* MMX or SSE register to integer */
1130 64, /* size of l1 cache. */
1131 512, /* size of l2 cache. */
1132 64, /* size of prefetch block */
1133 /* New AMD processors never drop prefetches; if they cannot be performed
1134 immediately, they are queued. We set number of simultaneous prefetches
1135 to a large constant to reflect this (it probably is not a good idea not
1136 to limit number of prefetches at all, as their execution also takes some
1138 100, /* number of parallel prefetches */
1139 3, /* Branch cost */
1140 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1141 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1142 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1143 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1144 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1145 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1146 /* K8 has optimized REP instruction for medium sized blocks, but for very
1147 small blocks it is better to use loop. For large blocks, libcall can
1148 do nontemporary accesses and beat inline considerably. */
1149 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1150 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1151 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
1152 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1153 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1154 4, /* scalar_stmt_cost. */
1155 2, /* scalar load_cost. */
1156 2, /* scalar_store_cost. */
1157 5, /* vec_stmt_cost. */
1158 0, /* vec_to_scalar_cost. */
1159 2, /* scalar_to_vec_cost. */
1160 2, /* vec_align_load_cost. */
1161 3, /* vec_unalign_load_cost. */
1162 3, /* vec_store_cost. */
1163 3, /* cond_taken_branch_cost. */
1164 2, /* cond_not_taken_branch_cost. */
1167 struct processor_costs amdfam10_cost
= {
1168 COSTS_N_INSNS (1), /* cost of an add instruction */
1169 COSTS_N_INSNS (2), /* cost of a lea instruction */
1170 COSTS_N_INSNS (1), /* variable shift costs */
1171 COSTS_N_INSNS (1), /* constant shift costs */
1172 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1173 COSTS_N_INSNS (4), /* HI */
1174 COSTS_N_INSNS (3), /* SI */
1175 COSTS_N_INSNS (4), /* DI */
1176 COSTS_N_INSNS (5)}, /* other */
1177 0, /* cost of multiply per each bit set */
1178 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1179 COSTS_N_INSNS (35), /* HI */
1180 COSTS_N_INSNS (51), /* SI */
1181 COSTS_N_INSNS (83), /* DI */
1182 COSTS_N_INSNS (83)}, /* other */
1183 COSTS_N_INSNS (1), /* cost of movsx */
1184 COSTS_N_INSNS (1), /* cost of movzx */
1185 8, /* "large" insn */
1187 4, /* cost for loading QImode using movzbl */
1188 {3, 4, 3}, /* cost of loading integer registers
1189 in QImode, HImode and SImode.
1190 Relative to reg-reg move (2). */
1191 {3, 4, 3}, /* cost of storing integer registers */
1192 4, /* cost of reg,reg fld/fst */
1193 {4, 4, 12}, /* cost of loading fp registers
1194 in SFmode, DFmode and XFmode */
1195 {6, 6, 8}, /* cost of storing fp registers
1196 in SFmode, DFmode and XFmode */
1197 2, /* cost of moving MMX register */
1198 {3, 3}, /* cost of loading MMX registers
1199 in SImode and DImode */
1200 {4, 4}, /* cost of storing MMX registers
1201 in SImode and DImode */
1202 2, /* cost of moving SSE register */
1203 {4, 4, 3}, /* cost of loading SSE registers
1204 in SImode, DImode and TImode */
1205 {4, 4, 5}, /* cost of storing SSE registers
1206 in SImode, DImode and TImode */
1207 3, /* MMX or SSE register to integer */
1209 MOVD reg64, xmmreg Double FSTORE 4
1210 MOVD reg32, xmmreg Double FSTORE 4
1212 MOVD reg64, xmmreg Double FADD 3
1214 MOVD reg32, xmmreg Double FADD 3
1216 64, /* size of l1 cache. */
1217 512, /* size of l2 cache. */
1218 64, /* size of prefetch block */
1219 /* New AMD processors never drop prefetches; if they cannot be performed
1220 immediately, they are queued. We set number of simultaneous prefetches
1221 to a large constant to reflect this (it probably is not a good idea not
1222 to limit number of prefetches at all, as their execution also takes some
1224 100, /* number of parallel prefetches */
1225 2, /* Branch cost */
1226 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1227 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1228 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1229 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1230 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1231 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1233 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
1234 very small blocks it is better to use loop. For large blocks, libcall can
1235 do nontemporary accesses and beat inline considerably. */
1236 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1237 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1238 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
1239 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1240 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1241 4, /* scalar_stmt_cost. */
1242 2, /* scalar load_cost. */
1243 2, /* scalar_store_cost. */
1244 6, /* vec_stmt_cost. */
1245 0, /* vec_to_scalar_cost. */
1246 2, /* scalar_to_vec_cost. */
1247 2, /* vec_align_load_cost. */
1248 2, /* vec_unalign_load_cost. */
1249 2, /* vec_store_cost. */
1250 2, /* cond_taken_branch_cost. */
1251 1, /* cond_not_taken_branch_cost. */
1254 struct processor_costs bdver1_cost
= {
1255 COSTS_N_INSNS (1), /* cost of an add instruction */
1256 COSTS_N_INSNS (1), /* cost of a lea instruction */
1257 COSTS_N_INSNS (1), /* variable shift costs */
1258 COSTS_N_INSNS (1), /* constant shift costs */
1259 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1260 COSTS_N_INSNS (4), /* HI */
1261 COSTS_N_INSNS (4), /* SI */
1262 COSTS_N_INSNS (6), /* DI */
1263 COSTS_N_INSNS (6)}, /* other */
1264 0, /* cost of multiply per each bit set */
1265 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1266 COSTS_N_INSNS (35), /* HI */
1267 COSTS_N_INSNS (51), /* SI */
1268 COSTS_N_INSNS (83), /* DI */
1269 COSTS_N_INSNS (83)}, /* other */
1270 COSTS_N_INSNS (1), /* cost of movsx */
1271 COSTS_N_INSNS (1), /* cost of movzx */
1272 8, /* "large" insn */
1274 4, /* cost for loading QImode using movzbl */
1275 {5, 5, 4}, /* cost of loading integer registers
1276 in QImode, HImode and SImode.
1277 Relative to reg-reg move (2). */
1278 {4, 4, 4}, /* cost of storing integer registers */
1279 2, /* cost of reg,reg fld/fst */
1280 {5, 5, 12}, /* cost of loading fp registers
1281 in SFmode, DFmode and XFmode */
1282 {4, 4, 8}, /* cost of storing fp registers
1283 in SFmode, DFmode and XFmode */
1284 2, /* cost of moving MMX register */
1285 {4, 4}, /* cost of loading MMX registers
1286 in SImode and DImode */
1287 {4, 4}, /* cost of storing MMX registers
1288 in SImode and DImode */
1289 2, /* cost of moving SSE register */
1290 {4, 4, 4}, /* cost of loading SSE registers
1291 in SImode, DImode and TImode */
1292 {4, 4, 4}, /* cost of storing SSE registers
1293 in SImode, DImode and TImode */
1294 2, /* MMX or SSE register to integer */
1296 MOVD reg64, xmmreg Double FSTORE 4
1297 MOVD reg32, xmmreg Double FSTORE 4
1299 MOVD reg64, xmmreg Double FADD 3
1301 MOVD reg32, xmmreg Double FADD 3
1303 16, /* size of l1 cache. */
1304 2048, /* size of l2 cache. */
1305 64, /* size of prefetch block */
1306 /* New AMD processors never drop prefetches; if they cannot be performed
1307 immediately, they are queued. We set number of simultaneous prefetches
1308 to a large constant to reflect this (it probably is not a good idea not
1309 to limit number of prefetches at all, as their execution also takes some
1311 100, /* number of parallel prefetches */
1312 2, /* Branch cost */
1313 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1314 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1315 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1316 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1317 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1318 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1320 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
1321 very small blocks it is better to use loop. For large blocks, libcall
1322 can do nontemporary accesses and beat inline considerably. */
1323 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1324 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1325 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
1326 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1327 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1328 6, /* scalar_stmt_cost. */
1329 4, /* scalar load_cost. */
1330 4, /* scalar_store_cost. */
1331 6, /* vec_stmt_cost. */
1332 0, /* vec_to_scalar_cost. */
1333 2, /* scalar_to_vec_cost. */
1334 4, /* vec_align_load_cost. */
1335 4, /* vec_unalign_load_cost. */
1336 4, /* vec_store_cost. */
1337 2, /* cond_taken_branch_cost. */
1338 1, /* cond_not_taken_branch_cost. */
1341 struct processor_costs bdver2_cost
= {
1342 COSTS_N_INSNS (1), /* cost of an add instruction */
1343 COSTS_N_INSNS (1), /* cost of a lea instruction */
1344 COSTS_N_INSNS (1), /* variable shift costs */
1345 COSTS_N_INSNS (1), /* constant shift costs */
1346 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1347 COSTS_N_INSNS (4), /* HI */
1348 COSTS_N_INSNS (4), /* SI */
1349 COSTS_N_INSNS (6), /* DI */
1350 COSTS_N_INSNS (6)}, /* other */
1351 0, /* cost of multiply per each bit set */
1352 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1353 COSTS_N_INSNS (35), /* HI */
1354 COSTS_N_INSNS (51), /* SI */
1355 COSTS_N_INSNS (83), /* DI */
1356 COSTS_N_INSNS (83)}, /* other */
1357 COSTS_N_INSNS (1), /* cost of movsx */
1358 COSTS_N_INSNS (1), /* cost of movzx */
1359 8, /* "large" insn */
1361 4, /* cost for loading QImode using movzbl */
1362 {5, 5, 4}, /* cost of loading integer registers
1363 in QImode, HImode and SImode.
1364 Relative to reg-reg move (2). */
1365 {4, 4, 4}, /* cost of storing integer registers */
1366 2, /* cost of reg,reg fld/fst */
1367 {5, 5, 12}, /* cost of loading fp registers
1368 in SFmode, DFmode and XFmode */
1369 {4, 4, 8}, /* cost of storing fp registers
1370 in SFmode, DFmode and XFmode */
1371 2, /* cost of moving MMX register */
1372 {4, 4}, /* cost of loading MMX registers
1373 in SImode and DImode */
1374 {4, 4}, /* cost of storing MMX registers
1375 in SImode and DImode */
1376 2, /* cost of moving SSE register */
1377 {4, 4, 4}, /* cost of loading SSE registers
1378 in SImode, DImode and TImode */
1379 {4, 4, 4}, /* cost of storing SSE registers
1380 in SImode, DImode and TImode */
1381 2, /* MMX or SSE register to integer */
1383 MOVD reg64, xmmreg Double FSTORE 4
1384 MOVD reg32, xmmreg Double FSTORE 4
1386 MOVD reg64, xmmreg Double FADD 3
1388 MOVD reg32, xmmreg Double FADD 3
1390 16, /* size of l1 cache. */
1391 2048, /* size of l2 cache. */
1392 64, /* size of prefetch block */
1393 /* New AMD processors never drop prefetches; if they cannot be performed
1394 immediately, they are queued. We set number of simultaneous prefetches
1395 to a large constant to reflect this (it probably is not a good idea not
1396 to limit number of prefetches at all, as their execution also takes some
1398 100, /* number of parallel prefetches */
1399 2, /* Branch cost */
1400 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1401 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1402 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1403 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1404 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1405 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1407 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1408 very small blocks it is better to use loop. For large blocks, libcall
1409 can do nontemporary accesses and beat inline considerably. */
1410 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1411 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1412 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
1413 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1414 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1415 6, /* scalar_stmt_cost. */
1416 4, /* scalar load_cost. */
1417 4, /* scalar_store_cost. */
1418 6, /* vec_stmt_cost. */
1419 0, /* vec_to_scalar_cost. */
1420 2, /* scalar_to_vec_cost. */
1421 4, /* vec_align_load_cost. */
1422 4, /* vec_unalign_load_cost. */
1423 4, /* vec_store_cost. */
1424 2, /* cond_taken_branch_cost. */
1425 1, /* cond_not_taken_branch_cost. */
1428 struct processor_costs btver1_cost
= {
1429 COSTS_N_INSNS (1), /* cost of an add instruction */
1430 COSTS_N_INSNS (2), /* cost of a lea instruction */
1431 COSTS_N_INSNS (1), /* variable shift costs */
1432 COSTS_N_INSNS (1), /* constant shift costs */
1433 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1434 COSTS_N_INSNS (4), /* HI */
1435 COSTS_N_INSNS (3), /* SI */
1436 COSTS_N_INSNS (4), /* DI */
1437 COSTS_N_INSNS (5)}, /* other */
1438 0, /* cost of multiply per each bit set */
1439 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1440 COSTS_N_INSNS (35), /* HI */
1441 COSTS_N_INSNS (51), /* SI */
1442 COSTS_N_INSNS (83), /* DI */
1443 COSTS_N_INSNS (83)}, /* other */
1444 COSTS_N_INSNS (1), /* cost of movsx */
1445 COSTS_N_INSNS (1), /* cost of movzx */
1446 8, /* "large" insn */
1448 4, /* cost for loading QImode using movzbl */
1449 {3, 4, 3}, /* cost of loading integer registers
1450 in QImode, HImode and SImode.
1451 Relative to reg-reg move (2). */
1452 {3, 4, 3}, /* cost of storing integer registers */
1453 4, /* cost of reg,reg fld/fst */
1454 {4, 4, 12}, /* cost of loading fp registers
1455 in SFmode, DFmode and XFmode */
1456 {6, 6, 8}, /* cost of storing fp registers
1457 in SFmode, DFmode and XFmode */
1458 2, /* cost of moving MMX register */
1459 {3, 3}, /* cost of loading MMX registers
1460 in SImode and DImode */
1461 {4, 4}, /* cost of storing MMX registers
1462 in SImode and DImode */
1463 2, /* cost of moving SSE register */
1464 {4, 4, 3}, /* cost of loading SSE registers
1465 in SImode, DImode and TImode */
1466 {4, 4, 5}, /* cost of storing SSE registers
1467 in SImode, DImode and TImode */
1468 3, /* MMX or SSE register to integer */
1470 MOVD reg64, xmmreg Double FSTORE 4
1471 MOVD reg32, xmmreg Double FSTORE 4
1473 MOVD reg64, xmmreg Double FADD 3
1475 MOVD reg32, xmmreg Double FADD 3
1477 32, /* size of l1 cache. */
1478 512, /* size of l2 cache. */
1479 64, /* size of prefetch block */
1480 100, /* number of parallel prefetches */
1481 2, /* Branch cost */
1482 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1483 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1484 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1485 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1486 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1487 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1489 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1490 very small blocks it is better to use loop. For large blocks, libcall can
1491 do nontemporary accesses and beat inline considerably. */
1492 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1493 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1494 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
1495 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1496 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1497 4, /* scalar_stmt_cost. */
1498 2, /* scalar load_cost. */
1499 2, /* scalar_store_cost. */
1500 6, /* vec_stmt_cost. */
1501 0, /* vec_to_scalar_cost. */
1502 2, /* scalar_to_vec_cost. */
1503 2, /* vec_align_load_cost. */
1504 2, /* vec_unalign_load_cost. */
1505 2, /* vec_store_cost. */
1506 2, /* cond_taken_branch_cost. */
1507 1, /* cond_not_taken_branch_cost. */
1511 struct processor_costs pentium4_cost
= {
1512 COSTS_N_INSNS (1), /* cost of an add instruction */
1513 COSTS_N_INSNS (3), /* cost of a lea instruction */
1514 COSTS_N_INSNS (4), /* variable shift costs */
1515 COSTS_N_INSNS (4), /* constant shift costs */
1516 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1517 COSTS_N_INSNS (15), /* HI */
1518 COSTS_N_INSNS (15), /* SI */
1519 COSTS_N_INSNS (15), /* DI */
1520 COSTS_N_INSNS (15)}, /* other */
1521 0, /* cost of multiply per each bit set */
1522 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1523 COSTS_N_INSNS (56), /* HI */
1524 COSTS_N_INSNS (56), /* SI */
1525 COSTS_N_INSNS (56), /* DI */
1526 COSTS_N_INSNS (56)}, /* other */
1527 COSTS_N_INSNS (1), /* cost of movsx */
1528 COSTS_N_INSNS (1), /* cost of movzx */
1529 16, /* "large" insn */
1531 2, /* cost for loading QImode using movzbl */
1532 {4, 5, 4}, /* cost of loading integer registers
1533 in QImode, HImode and SImode.
1534 Relative to reg-reg move (2). */
1535 {2, 3, 2}, /* cost of storing integer registers */
1536 2, /* cost of reg,reg fld/fst */
1537 {2, 2, 6}, /* cost of loading fp registers
1538 in SFmode, DFmode and XFmode */
1539 {4, 4, 6}, /* cost of storing fp registers
1540 in SFmode, DFmode and XFmode */
1541 2, /* cost of moving MMX register */
1542 {2, 2}, /* cost of loading MMX registers
1543 in SImode and DImode */
1544 {2, 2}, /* cost of storing MMX registers
1545 in SImode and DImode */
1546 12, /* cost of moving SSE register */
1547 {12, 12, 12}, /* cost of loading SSE registers
1548 in SImode, DImode and TImode */
1549 {2, 2, 8}, /* cost of storing SSE registers
1550 in SImode, DImode and TImode */
1551 10, /* MMX or SSE register to integer */
1552 8, /* size of l1 cache. */
1553 256, /* size of l2 cache. */
1554 64, /* size of prefetch block */
1555 6, /* number of parallel prefetches */
1556 2, /* Branch cost */
1557 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1558 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1559 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1560 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1561 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1562 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1563 {{libcall
, {{12, loop_1_byte
}, {-1, rep_prefix_4_byte
}}},
1564 DUMMY_STRINGOP_ALGS
},
1565 {{libcall
, {{6, loop_1_byte
}, {48, loop
}, {20480, rep_prefix_4_byte
},
1567 DUMMY_STRINGOP_ALGS
},
1568 1, /* scalar_stmt_cost. */
1569 1, /* scalar load_cost. */
1570 1, /* scalar_store_cost. */
1571 1, /* vec_stmt_cost. */
1572 1, /* vec_to_scalar_cost. */
1573 1, /* scalar_to_vec_cost. */
1574 1, /* vec_align_load_cost. */
1575 2, /* vec_unalign_load_cost. */
1576 1, /* vec_store_cost. */
1577 3, /* cond_taken_branch_cost. */
1578 1, /* cond_not_taken_branch_cost. */
1582 struct processor_costs nocona_cost
= {
1583 COSTS_N_INSNS (1), /* cost of an add instruction */
1584 COSTS_N_INSNS (1), /* cost of a lea instruction */
1585 COSTS_N_INSNS (1), /* variable shift costs */
1586 COSTS_N_INSNS (1), /* constant shift costs */
1587 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1588 COSTS_N_INSNS (10), /* HI */
1589 COSTS_N_INSNS (10), /* SI */
1590 COSTS_N_INSNS (10), /* DI */
1591 COSTS_N_INSNS (10)}, /* other */
1592 0, /* cost of multiply per each bit set */
1593 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1594 COSTS_N_INSNS (66), /* HI */
1595 COSTS_N_INSNS (66), /* SI */
1596 COSTS_N_INSNS (66), /* DI */
1597 COSTS_N_INSNS (66)}, /* other */
1598 COSTS_N_INSNS (1), /* cost of movsx */
1599 COSTS_N_INSNS (1), /* cost of movzx */
1600 16, /* "large" insn */
1601 17, /* MOVE_RATIO */
1602 4, /* cost for loading QImode using movzbl */
1603 {4, 4, 4}, /* cost of loading integer registers
1604 in QImode, HImode and SImode.
1605 Relative to reg-reg move (2). */
1606 {4, 4, 4}, /* cost of storing integer registers */
1607 3, /* cost of reg,reg fld/fst */
1608 {12, 12, 12}, /* cost of loading fp registers
1609 in SFmode, DFmode and XFmode */
1610 {4, 4, 4}, /* cost of storing fp registers
1611 in SFmode, DFmode and XFmode */
1612 6, /* cost of moving MMX register */
1613 {12, 12}, /* cost of loading MMX registers
1614 in SImode and DImode */
1615 {12, 12}, /* cost of storing MMX registers
1616 in SImode and DImode */
1617 6, /* cost of moving SSE register */
1618 {12, 12, 12}, /* cost of loading SSE registers
1619 in SImode, DImode and TImode */
1620 {12, 12, 12}, /* cost of storing SSE registers
1621 in SImode, DImode and TImode */
1622 8, /* MMX or SSE register to integer */
1623 8, /* size of l1 cache. */
1624 1024, /* size of l2 cache. */
1625 128, /* size of prefetch block */
1626 8, /* number of parallel prefetches */
1627 1, /* Branch cost */
1628 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1629 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1630 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1631 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1632 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1633 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1634 {{libcall
, {{12, loop_1_byte
}, {-1, rep_prefix_4_byte
}}},
1635 {libcall
, {{32, loop
}, {20000, rep_prefix_8_byte
},
1636 {100000, unrolled_loop
}, {-1, libcall
}}}},
1637 {{libcall
, {{6, loop_1_byte
}, {48, loop
}, {20480, rep_prefix_4_byte
},
1639 {libcall
, {{24, loop
}, {64, unrolled_loop
},
1640 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1641 1, /* scalar_stmt_cost. */
1642 1, /* scalar load_cost. */
1643 1, /* scalar_store_cost. */
1644 1, /* vec_stmt_cost. */
1645 1, /* vec_to_scalar_cost. */
1646 1, /* scalar_to_vec_cost. */
1647 1, /* vec_align_load_cost. */
1648 2, /* vec_unalign_load_cost. */
1649 1, /* vec_store_cost. */
1650 3, /* cond_taken_branch_cost. */
1651 1, /* cond_not_taken_branch_cost. */
1655 struct processor_costs atom_cost
= {
1656 COSTS_N_INSNS (1), /* cost of an add instruction */
1657 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1658 COSTS_N_INSNS (1), /* variable shift costs */
1659 COSTS_N_INSNS (1), /* constant shift costs */
1660 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1661 COSTS_N_INSNS (4), /* HI */
1662 COSTS_N_INSNS (3), /* SI */
1663 COSTS_N_INSNS (4), /* DI */
1664 COSTS_N_INSNS (2)}, /* other */
1665 0, /* cost of multiply per each bit set */
1666 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1667 COSTS_N_INSNS (26), /* HI */
1668 COSTS_N_INSNS (42), /* SI */
1669 COSTS_N_INSNS (74), /* DI */
1670 COSTS_N_INSNS (74)}, /* other */
1671 COSTS_N_INSNS (1), /* cost of movsx */
1672 COSTS_N_INSNS (1), /* cost of movzx */
1673 8, /* "large" insn */
1674 17, /* MOVE_RATIO */
1675 4, /* cost for loading QImode using movzbl */
1676 {4, 4, 4}, /* cost of loading integer registers
1677 in QImode, HImode and SImode.
1678 Relative to reg-reg move (2). */
1679 {4, 4, 4}, /* cost of storing integer registers */
1680 4, /* cost of reg,reg fld/fst */
1681 {12, 12, 12}, /* cost of loading fp registers
1682 in SFmode, DFmode and XFmode */
1683 {6, 6, 8}, /* cost of storing fp registers
1684 in SFmode, DFmode and XFmode */
1685 2, /* cost of moving MMX register */
1686 {8, 8}, /* cost of loading MMX registers
1687 in SImode and DImode */
1688 {8, 8}, /* cost of storing MMX registers
1689 in SImode and DImode */
1690 2, /* cost of moving SSE register */
1691 {8, 8, 8}, /* cost of loading SSE registers
1692 in SImode, DImode and TImode */
1693 {8, 8, 8}, /* cost of storing SSE registers
1694 in SImode, DImode and TImode */
1695 5, /* MMX or SSE register to integer */
1696 32, /* size of l1 cache. */
1697 256, /* size of l2 cache. */
1698 64, /* size of prefetch block */
1699 6, /* number of parallel prefetches */
1700 3, /* Branch cost */
1701 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1702 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1703 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1704 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1705 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1706 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1707 {{libcall
, {{11, loop
}, {-1, rep_prefix_4_byte
}}},
1708 {libcall
, {{32, loop
}, {64, rep_prefix_4_byte
},
1709 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1710 {{libcall
, {{8, loop
}, {15, unrolled_loop
},
1711 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1712 {libcall
, {{24, loop
}, {32, unrolled_loop
},
1713 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1714 1, /* scalar_stmt_cost. */
1715 1, /* scalar load_cost. */
1716 1, /* scalar_store_cost. */
1717 1, /* vec_stmt_cost. */
1718 1, /* vec_to_scalar_cost. */
1719 1, /* scalar_to_vec_cost. */
1720 1, /* vec_align_load_cost. */
1721 2, /* vec_unalign_load_cost. */
1722 1, /* vec_store_cost. */
1723 3, /* cond_taken_branch_cost. */
1724 1, /* cond_not_taken_branch_cost. */
1727 /* Generic64 should produce code tuned for Nocona and K8. */
1729 struct processor_costs generic64_cost
= {
1730 COSTS_N_INSNS (1), /* cost of an add instruction */
1731 /* On all chips taken into consideration lea is 2 cycles and more. With
1732 this cost however our current implementation of synth_mult results in
1733 use of unnecessary temporary registers causing regression on several
1734 SPECfp benchmarks. */
1735 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1736 COSTS_N_INSNS (1), /* variable shift costs */
1737 COSTS_N_INSNS (1), /* constant shift costs */
1738 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1739 COSTS_N_INSNS (4), /* HI */
1740 COSTS_N_INSNS (3), /* SI */
1741 COSTS_N_INSNS (4), /* DI */
1742 COSTS_N_INSNS (2)}, /* other */
1743 0, /* cost of multiply per each bit set */
1744 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1745 COSTS_N_INSNS (26), /* HI */
1746 COSTS_N_INSNS (42), /* SI */
1747 COSTS_N_INSNS (74), /* DI */
1748 COSTS_N_INSNS (74)}, /* other */
1749 COSTS_N_INSNS (1), /* cost of movsx */
1750 COSTS_N_INSNS (1), /* cost of movzx */
1751 8, /* "large" insn */
1752 17, /* MOVE_RATIO */
1753 4, /* cost for loading QImode using movzbl */
1754 {4, 4, 4}, /* cost of loading integer registers
1755 in QImode, HImode and SImode.
1756 Relative to reg-reg move (2). */
1757 {4, 4, 4}, /* cost of storing integer registers */
1758 4, /* cost of reg,reg fld/fst */
1759 {12, 12, 12}, /* cost of loading fp registers
1760 in SFmode, DFmode and XFmode */
1761 {6, 6, 8}, /* cost of storing fp registers
1762 in SFmode, DFmode and XFmode */
1763 2, /* cost of moving MMX register */
1764 {8, 8}, /* cost of loading MMX registers
1765 in SImode and DImode */
1766 {8, 8}, /* cost of storing MMX registers
1767 in SImode and DImode */
1768 2, /* cost of moving SSE register */
1769 {8, 8, 8}, /* cost of loading SSE registers
1770 in SImode, DImode and TImode */
1771 {8, 8, 8}, /* cost of storing SSE registers
1772 in SImode, DImode and TImode */
1773 5, /* MMX or SSE register to integer */
1774 32, /* size of l1 cache. */
1775 512, /* size of l2 cache. */
1776 64, /* size of prefetch block */
1777 6, /* number of parallel prefetches */
1778 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1779 value is increased to perhaps more appropriate value of 5. */
1780 3, /* Branch cost */
1781 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1782 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1783 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1784 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1785 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1786 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1787 {DUMMY_STRINGOP_ALGS
,
1788 {libcall
, {{32, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1789 {DUMMY_STRINGOP_ALGS
,
1790 {libcall
, {{32, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1791 1, /* scalar_stmt_cost. */
1792 1, /* scalar load_cost. */
1793 1, /* scalar_store_cost. */
1794 1, /* vec_stmt_cost. */
1795 1, /* vec_to_scalar_cost. */
1796 1, /* scalar_to_vec_cost. */
1797 1, /* vec_align_load_cost. */
1798 2, /* vec_unalign_load_cost. */
1799 1, /* vec_store_cost. */
1800 3, /* cond_taken_branch_cost. */
1801 1, /* cond_not_taken_branch_cost. */
1804 /* Generic32 should produce code tuned for PPro, Pentium4, Nocona,
1807 struct processor_costs generic32_cost
= {
1808 COSTS_N_INSNS (1), /* cost of an add instruction */
1809 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1810 COSTS_N_INSNS (1), /* variable shift costs */
1811 COSTS_N_INSNS (1), /* constant shift costs */
1812 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1813 COSTS_N_INSNS (4), /* HI */
1814 COSTS_N_INSNS (3), /* SI */
1815 COSTS_N_INSNS (4), /* DI */
1816 COSTS_N_INSNS (2)}, /* other */
1817 0, /* cost of multiply per each bit set */
1818 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1819 COSTS_N_INSNS (26), /* HI */
1820 COSTS_N_INSNS (42), /* SI */
1821 COSTS_N_INSNS (74), /* DI */
1822 COSTS_N_INSNS (74)}, /* other */
1823 COSTS_N_INSNS (1), /* cost of movsx */
1824 COSTS_N_INSNS (1), /* cost of movzx */
1825 8, /* "large" insn */
1826 17, /* MOVE_RATIO */
1827 4, /* cost for loading QImode using movzbl */
1828 {4, 4, 4}, /* cost of loading integer registers
1829 in QImode, HImode and SImode.
1830 Relative to reg-reg move (2). */
1831 {4, 4, 4}, /* cost of storing integer registers */
1832 4, /* cost of reg,reg fld/fst */
1833 {12, 12, 12}, /* cost of loading fp registers
1834 in SFmode, DFmode and XFmode */
1835 {6, 6, 8}, /* cost of storing fp registers
1836 in SFmode, DFmode and XFmode */
1837 2, /* cost of moving MMX register */
1838 {8, 8}, /* cost of loading MMX registers
1839 in SImode and DImode */
1840 {8, 8}, /* cost of storing MMX registers
1841 in SImode and DImode */
1842 2, /* cost of moving SSE register */
1843 {8, 8, 8}, /* cost of loading SSE registers
1844 in SImode, DImode and TImode */
1845 {8, 8, 8}, /* cost of storing SSE registers
1846 in SImode, DImode and TImode */
1847 5, /* MMX or SSE register to integer */
1848 32, /* size of l1 cache. */
1849 256, /* size of l2 cache. */
1850 64, /* size of prefetch block */
1851 6, /* number of parallel prefetches */
1852 3, /* Branch cost */
1853 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1854 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1855 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1856 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1857 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1858 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1859 {{libcall
, {{32, loop
}, {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
1860 DUMMY_STRINGOP_ALGS
},
1861 {{libcall
, {{32, loop
}, {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
1862 DUMMY_STRINGOP_ALGS
},
1863 1, /* scalar_stmt_cost. */
1864 1, /* scalar load_cost. */
1865 1, /* scalar_store_cost. */
1866 1, /* vec_stmt_cost. */
1867 1, /* vec_to_scalar_cost. */
1868 1, /* scalar_to_vec_cost. */
1869 1, /* vec_align_load_cost. */
1870 2, /* vec_unalign_load_cost. */
1871 1, /* vec_store_cost. */
1872 3, /* cond_taken_branch_cost. */
1873 1, /* cond_not_taken_branch_cost. */
1876 const struct processor_costs
*ix86_cost
= &pentium_cost
;
1878 /* Processor feature/optimization bitmasks. */
1879 #define m_386 (1<<PROCESSOR_I386)
1880 #define m_486 (1<<PROCESSOR_I486)
1881 #define m_PENT (1<<PROCESSOR_PENTIUM)
1882 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1883 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1884 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1885 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
1886 #define m_CORE2_32 (1<<PROCESSOR_CORE2_32)
1887 #define m_CORE2_64 (1<<PROCESSOR_CORE2_64)
1888 #define m_COREI7_32 (1<<PROCESSOR_COREI7_32)
1889 #define m_COREI7_64 (1<<PROCESSOR_COREI7_64)
1890 #define m_COREI7 (m_COREI7_32 | m_COREI7_64)
1891 #define m_CORE2I7_32 (m_CORE2_32 | m_COREI7_32)
1892 #define m_CORE2I7_64 (m_CORE2_64 | m_COREI7_64)
1893 #define m_CORE2I7 (m_CORE2I7_32 | m_CORE2I7_64)
1894 #define m_ATOM (1<<PROCESSOR_ATOM)
1896 #define m_GEODE (1<<PROCESSOR_GEODE)
1897 #define m_K6 (1<<PROCESSOR_K6)
1898 #define m_K6_GEODE (m_K6 | m_GEODE)
1899 #define m_K8 (1<<PROCESSOR_K8)
1900 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1901 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1902 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1903 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
1904 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
1905 #define m_BDVER (m_BDVER1 | m_BDVER2)
1906 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
1907 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1)
1909 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1910 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1912 /* Generic instruction choice should be common subset of supported CPUs
1913 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1914 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1916 /* Feature tests against the various tunings. */
1917 unsigned char ix86_tune_features
[X86_TUNE_LAST
];
1919 /* Feature tests against the various tunings used to create ix86_tune_features
1920 based on the processor mask. */
1921 static unsigned int initial_ix86_tune_features
[X86_TUNE_LAST
] = {
1922 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1923 negatively, so enabling for Generic64 seems like good code size
1924 tradeoff. We can't enable it for 32bit generic because it does not
1925 work well with PPro base chips. */
1926 m_386
| m_CORE2I7_64
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC64
,
1928 /* X86_TUNE_PUSH_MEMORY */
1929 m_386
| m_P4_NOCONA
| m_CORE2I7
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
1931 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1934 /* X86_TUNE_UNROLL_STRLEN */
1935 m_486
| m_PENT
| m_PPRO
| m_ATOM
| m_CORE2I7
| m_K6
| m_AMD_MULTIPLE
| m_GENERIC
,
1937 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1938 on simulation result. But after P4 was made, no performance benefit
1939 was observed with branch hints. It also increases the code size.
1940 As a result, icc never generates branch hints. */
1943 /* X86_TUNE_DOUBLE_WITH_ADD */
1946 /* X86_TUNE_USE_SAHF */
1947 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_K6_GEODE
| m_K8
| m_AMDFAM10
| m_BDVER
| m_BTVER1
| m_GENERIC
,
1949 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1950 partial dependencies. */
1951 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
1953 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1954 register stalls on Generic32 compilation setting as well. However
1955 in current implementation the partial register stalls are not eliminated
1956 very well - they can be introduced via subregs synthesized by combine
1957 and can happen in caller/callee saving sequences. Because this option
1958 pays back little on PPro based chips and is in conflict with partial reg
1959 dependencies used by Athlon/P4 based chips, it is better to leave it off
1960 for generic32 for now. */
1963 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1964 m_CORE2I7
| m_GENERIC
,
1966 /* X86_TUNE_USE_HIMODE_FIOP */
1967 m_386
| m_486
| m_K6_GEODE
,
1969 /* X86_TUNE_USE_SIMODE_FIOP */
1970 ~(m_PENT
| m_PPRO
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
),
1972 /* X86_TUNE_USE_MOV0 */
1975 /* X86_TUNE_USE_CLTD */
1976 ~(m_PENT
| m_CORE2I7
| m_ATOM
| m_K6
| m_GENERIC
),
1978 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1981 /* X86_TUNE_SPLIT_LONG_MOVES */
1984 /* X86_TUNE_READ_MODIFY_WRITE */
1987 /* X86_TUNE_READ_MODIFY */
1990 /* X86_TUNE_PROMOTE_QIMODE */
1991 m_386
| m_486
| m_PENT
| m_CORE2I7
| m_ATOM
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
1993 /* X86_TUNE_FAST_PREFIX */
1994 ~(m_386
| m_486
| m_PENT
),
1996 /* X86_TUNE_SINGLE_STRINGOP */
1997 m_386
| m_P4_NOCONA
,
1999 /* X86_TUNE_QIMODE_MATH */
2002 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
2003 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
2004 might be considered for Generic32 if our scheme for avoiding partial
2005 stalls was more effective. */
2008 /* X86_TUNE_PROMOTE_QI_REGS */
2011 /* X86_TUNE_PROMOTE_HI_REGS */
2014 /* X86_TUNE_SINGLE_POP: Enable if single pop insn is preferred
2015 over esp addition. */
2016 m_386
| m_486
| m_PENT
| m_PPRO
,
2018 /* X86_TUNE_DOUBLE_POP: Enable if double pop insn is preferred
2019 over esp addition. */
2022 /* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred
2023 over esp subtraction. */
2024 m_386
| m_486
| m_PENT
| m_K6_GEODE
,
2026 /* X86_TUNE_DOUBLE_PUSH. Enable if double push insn is preferred
2027 over esp subtraction. */
2028 m_PENT
| m_K6_GEODE
,
2030 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
2031 for DFmode copies */
2032 ~(m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_GEODE
| m_AMD_MULTIPLE
| m_ATOM
| m_GENERIC
),
2034 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
2035 m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
,
2037 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
2038 conflict here in between PPro/Pentium4 based chips that thread 128bit
2039 SSE registers as single units versus K8 based chips that divide SSE
2040 registers to two 64bit halves. This knob promotes all store destinations
2041 to be 128bit to allow register renaming on 128bit SSE units, but usually
2042 results in one extra microop on 64bit SSE units. Experimental results
2043 shows that disabling this option on P4 brings over 20% SPECfp regression,
2044 while enabling it on K8 brings roughly 2.4% regression that can be partly
2045 masked by careful scheduling of moves. */
2046 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMDFAM10
| m_BDVER
| m_GENERIC
,
2048 /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
2049 m_COREI7
| m_AMDFAM10
| m_BDVER
| m_BTVER1
,
2051 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
2054 /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
2057 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
2058 are resolved on SSE register parts instead of whole registers, so we may
2059 maintain just lower part of scalar values in proper format leaving the
2060 upper part undefined. */
2063 /* X86_TUNE_SSE_TYPELESS_STORES */
2066 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
2067 m_PPRO
| m_P4_NOCONA
,
2069 /* X86_TUNE_MEMORY_MISMATCH_STALL */
2070 m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
,
2072 /* X86_TUNE_PROLOGUE_USING_MOVE */
2073 m_PPRO
| m_CORE2I7
| m_ATOM
| m_ATHLON_K8
| m_GENERIC
,
2075 /* X86_TUNE_EPILOGUE_USING_MOVE */
2076 m_PPRO
| m_CORE2I7
| m_ATOM
| m_ATHLON_K8
| m_GENERIC
,
2078 /* X86_TUNE_SHIFT1 */
2081 /* X86_TUNE_USE_FFREEP */
2084 /* X86_TUNE_INTER_UNIT_MOVES */
2085 ~(m_AMD_MULTIPLE
| m_GENERIC
),
2087 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
2088 ~(m_AMDFAM10
| m_BDVER
),
2090 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
2091 than 4 branch instructions in the 16 byte window. */
2092 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
,
2094 /* X86_TUNE_SCHEDULE */
2095 m_PENT
| m_PPRO
| m_CORE2I7
| m_ATOM
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
2097 /* X86_TUNE_USE_BT */
2098 m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
,
2100 /* X86_TUNE_USE_INCDEC */
2101 ~(m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_GENERIC
),
2103 /* X86_TUNE_PAD_RETURNS */
2104 m_CORE2I7
| m_AMD_MULTIPLE
| m_GENERIC
,
2106 /* X86_TUNE_PAD_SHORT_FUNCTION: Pad short funtion. */
2109 /* X86_TUNE_EXT_80387_CONSTANTS */
2110 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_K6_GEODE
| m_ATHLON_K8
| m_GENERIC
,
2112 /* X86_TUNE_SHORTEN_X87_SSE */
2115 /* X86_TUNE_AVOID_VECTOR_DECODE */
2116 m_CORE2I7_64
| m_K8
| m_GENERIC64
,
2118 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
2119 and SImode multiply, but 386 and 486 do HImode multiply faster. */
2122 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
2123 vector path on AMD machines. */
2124 m_CORE2I7_64
| m_K8
| m_AMDFAM10
| m_BDVER
| m_BTVER1
| m_GENERIC64
,
2126 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
2128 m_CORE2I7_64
| m_K8
| m_AMDFAM10
| m_BDVER
| m_BTVER1
| m_GENERIC64
,
2130 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
2134 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
2135 but one byte longer. */
2138 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
2139 operand that cannot be represented using a modRM byte. The XOR
2140 replacement is long decoded, so this split helps here as well. */
2143 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
2145 m_CORE2I7
| m_AMDFAM10
| m_GENERIC
,
2147 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
2148 from integer to FP. */
2151 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
2152 with a subsequent conditional jump instruction into a single
2153 compare-and-branch uop. */
2156 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
2157 will impact LEA instruction selection. */
2160 /* X86_TUNE_VECTORIZE_DOUBLE: Enable double precision vector
2164 /* X86_SOFTARE_PREFETCHING_BENEFICIAL: Enable software prefetching
2165 at -O3. For the moment, the prefetching seems badly tuned for Intel
2167 m_K6_GEODE
| m_AMD_MULTIPLE
,
2169 /* X86_TUNE_AVX128_OPTIMAL: Enable 128-bit AVX instruction generation for
2170 the auto-vectorizer. */
2173 /* X86_TUNE_REASSOC_INT_TO_PARALLEL: Try to produce parallel computations
2174 during reassociation of integer computation. */
2177 /* X86_TUNE_REASSOC_FP_TO_PARALLEL: Try to produce parallel computations
2178 during reassociation of fp computation. */
2182 /* Feature tests against the various architecture variations. */
2183 unsigned char ix86_arch_features
[X86_ARCH_LAST
];
2185 /* Feature tests against the various architecture variations, used to create
2186 ix86_arch_features based on the processor mask. */
2187 static unsigned int initial_ix86_arch_features
[X86_ARCH_LAST
] = {
2188 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
2189 ~(m_386
| m_486
| m_PENT
| m_K6
),
2191 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2194 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2197 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2200 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2204 static const unsigned int x86_accumulate_outgoing_args
2205 = m_PPRO
| m_P4_NOCONA
| m_ATOM
| m_CORE2I7
| m_AMD_MULTIPLE
| m_GENERIC
;
2207 static const unsigned int x86_arch_always_fancy_math_387
2208 = m_PENT
| m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
;
2210 static const unsigned int x86_avx256_split_unaligned_load
2211 = m_COREI7
| m_GENERIC
;
2213 static const unsigned int x86_avx256_split_unaligned_store
2214 = m_COREI7
| m_BDVER
| m_GENERIC
;
2216 /* In case the average insn count for single function invocation is
2217 lower than this constant, emit fast (but longer) prologue and
2219 #define FAST_PROLOGUE_INSN_COUNT 20
2221 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2222 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
;
2223 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
2224 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
;
2226 /* Array of the smallest class containing reg number REGNO, indexed by
2227 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2229 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
2231 /* ax, dx, cx, bx */
2232 AREG
, DREG
, CREG
, BREG
,
2233 /* si, di, bp, sp */
2234 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
2236 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
2237 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
2240 /* flags, fpsr, fpcr, frame */
2241 NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
2243 SSE_FIRST_REG
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
2246 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
2249 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
2250 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
2251 /* SSE REX registers */
2252 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
2256 /* The "default" register map used in 32bit mode. */
2258 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
2260 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2261 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2262 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2263 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2264 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2265 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2266 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2269 /* The "default" register map used in 64bit mode. */
2271 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
2273 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2274 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2275 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2276 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2277 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2278 8,9,10,11,12,13,14,15, /* extended integer registers */
2279 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2282 /* Define the register numbers to be used in Dwarf debugging information.
2283 The SVR4 reference port C compiler uses the following register numbers
2284 in its Dwarf output code:
2285 0 for %eax (gcc regno = 0)
2286 1 for %ecx (gcc regno = 2)
2287 2 for %edx (gcc regno = 1)
2288 3 for %ebx (gcc regno = 3)
2289 4 for %esp (gcc regno = 7)
2290 5 for %ebp (gcc regno = 6)
2291 6 for %esi (gcc regno = 4)
2292 7 for %edi (gcc regno = 5)
2293 The following three DWARF register numbers are never generated by
2294 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2295 believes these numbers have these meanings.
2296 8 for %eip (no gcc equivalent)
2297 9 for %eflags (gcc regno = 17)
2298 10 for %trapno (no gcc equivalent)
2299 It is not at all clear how we should number the FP stack registers
2300 for the x86 architecture. If the version of SDB on x86/svr4 were
2301 a bit less brain dead with respect to floating-point then we would
2302 have a precedent to follow with respect to DWARF register numbers
2303 for x86 FP registers, but the SDB on x86/svr4 is so completely
2304 broken with respect to FP registers that it is hardly worth thinking
2305 of it as something to strive for compatibility with.
2306 The version of x86/svr4 SDB I have at the moment does (partially)
2307 seem to believe that DWARF register number 11 is associated with
2308 the x86 register %st(0), but that's about all. Higher DWARF
2309 register numbers don't seem to be associated with anything in
2310 particular, and even for DWARF regno 11, SDB only seems to under-
2311 stand that it should say that a variable lives in %st(0) (when
2312 asked via an `=' command) if we said it was in DWARF regno 11,
2313 but SDB still prints garbage when asked for the value of the
2314 variable in question (via a `/' command).
2315 (Also note that the labels SDB prints for various FP stack regs
2316 when doing an `x' command are all wrong.)
2317 Note that these problems generally don't affect the native SVR4
2318 C compiler because it doesn't allow the use of -O with -g and
2319 because when it is *not* optimizing, it allocates a memory
2320 location for each floating-point variable, and the memory
2321 location is what gets described in the DWARF AT_location
2322 attribute for the variable in question.
2323 Regardless of the severe mental illness of the x86/svr4 SDB, we
2324 do something sensible here and we use the following DWARF
2325 register numbers. Note that these are all stack-top-relative
2327 11 for %st(0) (gcc regno = 8)
2328 12 for %st(1) (gcc regno = 9)
2329 13 for %st(2) (gcc regno = 10)
2330 14 for %st(3) (gcc regno = 11)
2331 15 for %st(4) (gcc regno = 12)
2332 16 for %st(5) (gcc regno = 13)
2333 17 for %st(6) (gcc regno = 14)
2334 18 for %st(7) (gcc regno = 15)
2336 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
2338 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2339 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2340 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2341 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2342 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2343 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2344 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2347 /* Define parameter passing and return registers. */
2349 static int const x86_64_int_parameter_registers
[6] =
2351 DI_REG
, SI_REG
, DX_REG
, CX_REG
, R8_REG
, R9_REG
2354 static int const x86_64_ms_abi_int_parameter_registers
[4] =
2356 CX_REG
, DX_REG
, R8_REG
, R9_REG
2359 static int const x86_64_int_return_registers
[4] =
2361 AX_REG
, DX_REG
, DI_REG
, SI_REG
2364 /* Define the structure for the machine field in struct function. */
2366 struct GTY(()) stack_local_entry
{
2367 unsigned short mode
;
2370 struct stack_local_entry
*next
;
2373 /* Structure describing stack frame layout.
2374 Stack grows downward:
2380 saved static chain if ix86_static_chain_on_stack
2382 saved frame pointer if frame_pointer_needed
2383 <- HARD_FRAME_POINTER
2389 <- sse_regs_save_offset
2392 [va_arg registers] |
2396 [padding2] | = to_allocate
2405 int outgoing_arguments_size
;
2406 HOST_WIDE_INT frame
;
2408 /* The offsets relative to ARG_POINTER. */
2409 HOST_WIDE_INT frame_pointer_offset
;
2410 HOST_WIDE_INT hard_frame_pointer_offset
;
2411 HOST_WIDE_INT stack_pointer_offset
;
2412 HOST_WIDE_INT hfp_save_offset
;
2413 HOST_WIDE_INT reg_save_offset
;
2414 HOST_WIDE_INT sse_reg_save_offset
;
2416 /* When save_regs_using_mov is set, emit prologue using
2417 move instead of push instructions. */
2418 bool save_regs_using_mov
;
2421 /* Which cpu are we scheduling for. */
2422 enum attr_cpu ix86_schedule
;
2424 /* Which cpu are we optimizing for. */
2425 enum processor_type ix86_tune
;
2427 /* Which instruction set architecture to use. */
2428 enum processor_type ix86_arch
;
2430 /* true if sse prefetch instruction is not NOOP. */
2431 int x86_prefetch_sse
;
2433 /* -mstackrealign option */
2434 static const char ix86_force_align_arg_pointer_string
[]
2435 = "force_align_arg_pointer";
2437 static rtx (*ix86_gen_leave
) (void);
2438 static rtx (*ix86_gen_add3
) (rtx
, rtx
, rtx
);
2439 static rtx (*ix86_gen_sub3
) (rtx
, rtx
, rtx
);
2440 static rtx (*ix86_gen_sub3_carry
) (rtx
, rtx
, rtx
, rtx
, rtx
);
2441 static rtx (*ix86_gen_one_cmpl2
) (rtx
, rtx
);
2442 static rtx (*ix86_gen_monitor
) (rtx
, rtx
, rtx
);
2443 static rtx (*ix86_gen_andsp
) (rtx
, rtx
, rtx
);
2444 static rtx (*ix86_gen_allocate_stack_worker
) (rtx
, rtx
);
2445 static rtx (*ix86_gen_adjust_stack_and_probe
) (rtx
, rtx
, rtx
);
2446 static rtx (*ix86_gen_probe_stack_range
) (rtx
, rtx
, rtx
);
2448 /* Preferred alignment for stack boundary in bits. */
2449 unsigned int ix86_preferred_stack_boundary
;
2451 /* Alignment for incoming stack boundary in bits specified at
2453 static unsigned int ix86_user_incoming_stack_boundary
;
2455 /* Default alignment for incoming stack boundary in bits. */
2456 static unsigned int ix86_default_incoming_stack_boundary
;
2458 /* Alignment for incoming stack boundary in bits. */
2459 unsigned int ix86_incoming_stack_boundary
;
2461 /* Calling abi specific va_list type nodes. */
2462 static GTY(()) tree sysv_va_list_type_node
;
2463 static GTY(()) tree ms_va_list_type_node
;
2465 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2466 char internal_label_prefix
[16];
2467 int internal_label_prefix_len
;
2469 /* Fence to use after loop using movnt. */
2472 /* Register class used for passing given 64bit part of the argument.
2473 These represent classes as documented by the PS ABI, with the exception
2474 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2475 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2477 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2478 whenever possible (upper half does contain padding). */
2479 enum x86_64_reg_class
2482 X86_64_INTEGER_CLASS
,
2483 X86_64_INTEGERSI_CLASS
,
2490 X86_64_COMPLEX_X87_CLASS
,
2494 #define MAX_CLASSES 4
2496 /* Table of constants used by fldpi, fldln2, etc.... */
2497 static REAL_VALUE_TYPE ext_80387_constants_table
[5];
2498 static bool ext_80387_constants_init
= 0;
2501 static struct machine_function
* ix86_init_machine_status (void);
2502 static rtx
ix86_function_value (const_tree
, const_tree
, bool);
2503 static bool ix86_function_value_regno_p (const unsigned int);
2504 static unsigned int ix86_function_arg_boundary (enum machine_mode
,
2506 static rtx
ix86_static_chain (const_tree
, bool);
2507 static int ix86_function_regparm (const_tree
, const_tree
);
2508 static void ix86_compute_frame_layout (struct ix86_frame
*);
2509 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode
,
2511 static void ix86_add_new_builtins (HOST_WIDE_INT
);
2512 static tree
ix86_canonical_va_list_type (tree
);
2513 static void predict_jump (int);
2514 static unsigned int split_stack_prologue_scratch_regno (void);
2515 static bool i386_asm_output_addr_const_extra (FILE *, rtx
);
2517 enum ix86_function_specific_strings
2519 IX86_FUNCTION_SPECIFIC_ARCH
,
2520 IX86_FUNCTION_SPECIFIC_TUNE
,
2521 IX86_FUNCTION_SPECIFIC_MAX
2524 static char *ix86_target_string (HOST_WIDE_INT
, int, const char *,
2525 const char *, enum fpmath_unit
, bool);
2526 static void ix86_debug_options (void) ATTRIBUTE_UNUSED
;
2527 static void ix86_function_specific_save (struct cl_target_option
*);
2528 static void ix86_function_specific_restore (struct cl_target_option
*);
2529 static void ix86_function_specific_print (FILE *, int,
2530 struct cl_target_option
*);
2531 static bool ix86_valid_target_attribute_p (tree
, tree
, tree
, int);
2532 static bool ix86_valid_target_attribute_inner_p (tree
, char *[],
2533 struct gcc_options
*);
2534 static bool ix86_can_inline_p (tree
, tree
);
2535 static void ix86_set_current_function (tree
);
2536 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2538 static enum calling_abi
ix86_function_abi (const_tree
);
2541 #ifndef SUBTARGET32_DEFAULT_CPU
2542 #define SUBTARGET32_DEFAULT_CPU "i386"
2545 /* The svr4 ABI for the i386 says that records and unions are returned
2547 #ifndef DEFAULT_PCC_STRUCT_RETURN
2548 #define DEFAULT_PCC_STRUCT_RETURN 1
2551 /* Whether -mtune= or -march= were specified */
2552 static int ix86_tune_defaulted
;
2553 static int ix86_arch_specified
;
2555 /* Vectorization library interface and handlers. */
2556 static tree (*ix86_veclib_handler
) (enum built_in_function
, tree
, tree
);
2558 static tree
ix86_veclibabi_svml (enum built_in_function
, tree
, tree
);
2559 static tree
ix86_veclibabi_acml (enum built_in_function
, tree
, tree
);
2561 /* Processor target table, indexed by processor number */
2564 const struct processor_costs
*cost
; /* Processor costs */
2565 const int align_loop
; /* Default alignments. */
2566 const int align_loop_max_skip
;
2567 const int align_jump
;
2568 const int align_jump_max_skip
;
2569 const int align_func
;
2572 static const struct ptt processor_target_table
[PROCESSOR_max
] =
2574 {&i386_cost
, 4, 3, 4, 3, 4},
2575 {&i486_cost
, 16, 15, 16, 15, 16},
2576 {&pentium_cost
, 16, 7, 16, 7, 16},
2577 {&pentiumpro_cost
, 16, 15, 16, 10, 16},
2578 {&geode_cost
, 0, 0, 0, 0, 0},
2579 {&k6_cost
, 32, 7, 32, 7, 32},
2580 {&athlon_cost
, 16, 7, 16, 7, 16},
2581 {&pentium4_cost
, 0, 0, 0, 0, 0},
2582 {&k8_cost
, 16, 7, 16, 7, 16},
2583 {&nocona_cost
, 0, 0, 0, 0, 0},
2584 /* Core 2 32-bit. */
2585 {&generic32_cost
, 16, 10, 16, 10, 16},
2586 /* Core 2 64-bit. */
2587 {&generic64_cost
, 16, 10, 16, 10, 16},
2588 /* Core i7 32-bit. */
2589 {&generic32_cost
, 16, 10, 16, 10, 16},
2590 /* Core i7 64-bit. */
2591 {&generic64_cost
, 16, 10, 16, 10, 16},
2592 {&generic32_cost
, 16, 7, 16, 7, 16},
2593 {&generic64_cost
, 16, 10, 16, 10, 16},
2594 {&amdfam10_cost
, 32, 24, 32, 7, 32},
2595 {&bdver1_cost
, 32, 24, 32, 7, 32},
2596 {&bdver2_cost
, 32, 24, 32, 7, 32},
2597 {&btver1_cost
, 32, 24, 32, 7, 32},
2598 {&atom_cost
, 16, 15, 16, 7, 16}
2601 static const char *const cpu_names
[TARGET_CPU_DEFAULT_max
] =
2631 /* Return true if a red-zone is in use. */
2634 ix86_using_red_zone (void)
2636 return TARGET_RED_ZONE
&& !TARGET_64BIT_MS_ABI
;
2639 /* Return a string that documents the current -m options. The caller is
2640 responsible for freeing the string. */
2643 ix86_target_string (HOST_WIDE_INT isa
, int flags
, const char *arch
,
2644 const char *tune
, enum fpmath_unit fpmath
,
2647 struct ix86_target_opts
2649 const char *option
; /* option string */
2650 HOST_WIDE_INT mask
; /* isa mask options */
2653 /* This table is ordered so that options like -msse4.2 that imply
2654 preceding options while match those first. */
2655 static struct ix86_target_opts isa_opts
[] =
2657 { "-m64", OPTION_MASK_ISA_64BIT
},
2658 { "-mfma4", OPTION_MASK_ISA_FMA4
},
2659 { "-mfma", OPTION_MASK_ISA_FMA
},
2660 { "-mxop", OPTION_MASK_ISA_XOP
},
2661 { "-mlwp", OPTION_MASK_ISA_LWP
},
2662 { "-msse4a", OPTION_MASK_ISA_SSE4A
},
2663 { "-msse4.2", OPTION_MASK_ISA_SSE4_2
},
2664 { "-msse4.1", OPTION_MASK_ISA_SSE4_1
},
2665 { "-mssse3", OPTION_MASK_ISA_SSSE3
},
2666 { "-msse3", OPTION_MASK_ISA_SSE3
},
2667 { "-msse2", OPTION_MASK_ISA_SSE2
},
2668 { "-msse", OPTION_MASK_ISA_SSE
},
2669 { "-m3dnow", OPTION_MASK_ISA_3DNOW
},
2670 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A
},
2671 { "-mmmx", OPTION_MASK_ISA_MMX
},
2672 { "-mabm", OPTION_MASK_ISA_ABM
},
2673 { "-mbmi", OPTION_MASK_ISA_BMI
},
2674 { "-mbmi2", OPTION_MASK_ISA_BMI2
},
2675 { "-mlzcnt", OPTION_MASK_ISA_LZCNT
},
2676 { "-mtbm", OPTION_MASK_ISA_TBM
},
2677 { "-mpopcnt", OPTION_MASK_ISA_POPCNT
},
2678 { "-mmovbe", OPTION_MASK_ISA_MOVBE
},
2679 { "-mcrc32", OPTION_MASK_ISA_CRC32
},
2680 { "-maes", OPTION_MASK_ISA_AES
},
2681 { "-mpclmul", OPTION_MASK_ISA_PCLMUL
},
2682 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE
},
2683 { "-mrdrnd", OPTION_MASK_ISA_RDRND
},
2684 { "-mf16c", OPTION_MASK_ISA_F16C
},
2688 static struct ix86_target_opts flag_opts
[] =
2690 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE
},
2691 { "-m80387", MASK_80387
},
2692 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS
},
2693 { "-malign-double", MASK_ALIGN_DOUBLE
},
2694 { "-mcld", MASK_CLD
},
2695 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS
},
2696 { "-mieee-fp", MASK_IEEE_FP
},
2697 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS
},
2698 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY
},
2699 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT
},
2700 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS
},
2701 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387
},
2702 { "-mno-push-args", MASK_NO_PUSH_ARGS
},
2703 { "-mno-red-zone", MASK_NO_RED_ZONE
},
2704 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER
},
2705 { "-mrecip", MASK_RECIP
},
2706 { "-mrtd", MASK_RTD
},
2707 { "-msseregparm", MASK_SSEREGPARM
},
2708 { "-mstack-arg-probe", MASK_STACK_PROBE
},
2709 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS
},
2710 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS
},
2711 { "-m8bit-idiv", MASK_USE_8BIT_IDIV
},
2712 { "-mvzeroupper", MASK_VZEROUPPER
},
2713 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD
},
2714 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE
},
2715 { "-mprefer-avx128", MASK_PREFER_AVX128
},
2718 const char *opts
[ARRAY_SIZE (isa_opts
) + ARRAY_SIZE (flag_opts
) + 6][2];
2721 char target_other
[40];
2730 memset (opts
, '\0', sizeof (opts
));
2732 /* Add -march= option. */
2735 opts
[num
][0] = "-march=";
2736 opts
[num
++][1] = arch
;
2739 /* Add -mtune= option. */
2742 opts
[num
][0] = "-mtune=";
2743 opts
[num
++][1] = tune
;
2746 /* Pick out the options in isa options. */
2747 for (i
= 0; i
< ARRAY_SIZE (isa_opts
); i
++)
2749 if ((isa
& isa_opts
[i
].mask
) != 0)
2751 opts
[num
++][0] = isa_opts
[i
].option
;
2752 isa
&= ~ isa_opts
[i
].mask
;
2756 if (isa
&& add_nl_p
)
2758 opts
[num
++][0] = isa_other
;
2759 sprintf (isa_other
, "(other isa: %#" HOST_WIDE_INT_PRINT
"x)",
2763 /* Add flag options. */
2764 for (i
= 0; i
< ARRAY_SIZE (flag_opts
); i
++)
2766 if ((flags
& flag_opts
[i
].mask
) != 0)
2768 opts
[num
++][0] = flag_opts
[i
].option
;
2769 flags
&= ~ flag_opts
[i
].mask
;
2773 if (flags
&& add_nl_p
)
2775 opts
[num
++][0] = target_other
;
2776 sprintf (target_other
, "(other flags: %#x)", flags
);
2779 /* Add -fpmath= option. */
2782 opts
[num
][0] = "-mfpmath=";
2783 switch ((int) fpmath
)
2786 opts
[num
++][1] = "387";
2790 opts
[num
++][1] = "sse";
2793 case FPMATH_387
| FPMATH_SSE
:
2794 opts
[num
++][1] = "sse+387";
2806 gcc_assert (num
< ARRAY_SIZE (opts
));
2808 /* Size the string. */
2810 sep_len
= (add_nl_p
) ? 3 : 1;
2811 for (i
= 0; i
< num
; i
++)
2814 for (j
= 0; j
< 2; j
++)
2816 len
+= strlen (opts
[i
][j
]);
2819 /* Build the string. */
2820 ret
= ptr
= (char *) xmalloc (len
);
2823 for (i
= 0; i
< num
; i
++)
2827 for (j
= 0; j
< 2; j
++)
2828 len2
[j
] = (opts
[i
][j
]) ? strlen (opts
[i
][j
]) : 0;
2835 if (add_nl_p
&& line_len
+ len2
[0] + len2
[1] > 70)
2843 for (j
= 0; j
< 2; j
++)
2846 memcpy (ptr
, opts
[i
][j
], len2
[j
]);
2848 line_len
+= len2
[j
];
2853 gcc_assert (ret
+ len
>= ptr
);
2858 /* Return true, if profiling code should be emitted before
2859 prologue. Otherwise it returns false.
2860 Note: For x86 with "hotfix" it is sorried. */
2862 ix86_profile_before_prologue (void)
2864 return flag_fentry
!= 0;
2867 /* Function that is callable from the debugger to print the current
2870 ix86_debug_options (void)
2872 char *opts
= ix86_target_string (ix86_isa_flags
, target_flags
,
2873 ix86_arch_string
, ix86_tune_string
,
2878 fprintf (stderr
, "%s\n\n", opts
);
2882 fputs ("<no options>\n\n", stderr
);
2887 /* Override various settings based on options. If MAIN_ARGS_P, the
2888 options are from the command line, otherwise they are from
2892 ix86_option_override_internal (bool main_args_p
)
2895 unsigned int ix86_arch_mask
, ix86_tune_mask
;
2896 const bool ix86_tune_specified
= (ix86_tune_string
!= NULL
);
2901 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
2902 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
2903 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
2904 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
2905 #define PTA_AES (HOST_WIDE_INT_1 << 4)
2906 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
2907 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
2908 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
2909 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
2910 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
2911 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
2912 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
2913 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
2914 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
2915 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
2916 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
2917 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
2918 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
2919 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
2920 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
2921 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
2922 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
2923 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
2924 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
2925 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
2926 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
2927 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
2928 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
2929 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
2930 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
2931 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
2932 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
2933 /* if this reaches 64, need to widen struct pta flags below */
2937 const char *const name
; /* processor name or nickname. */
2938 const enum processor_type processor
;
2939 const enum attr_cpu schedule
;
2940 const unsigned HOST_WIDE_INT flags
;
2942 const processor_alias_table
[] =
2944 {"i386", PROCESSOR_I386
, CPU_NONE
, 0},
2945 {"i486", PROCESSOR_I486
, CPU_NONE
, 0},
2946 {"i586", PROCESSOR_PENTIUM
, CPU_PENTIUM
, 0},
2947 {"pentium", PROCESSOR_PENTIUM
, CPU_PENTIUM
, 0},
2948 {"pentium-mmx", PROCESSOR_PENTIUM
, CPU_PENTIUM
, PTA_MMX
},
2949 {"winchip-c6", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
},
2950 {"winchip2", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
| PTA_3DNOW
},
2951 {"c3", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
| PTA_3DNOW
},
2952 {"c3-2", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, PTA_MMX
| PTA_SSE
},
2953 {"i686", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, 0},
2954 {"pentiumpro", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, 0},
2955 {"pentium2", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, PTA_MMX
},
2956 {"pentium3", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
2958 {"pentium3m", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
2960 {"pentium-m", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
2961 PTA_MMX
| PTA_SSE
| PTA_SSE2
},
2962 {"pentium4", PROCESSOR_PENTIUM4
, CPU_NONE
,
2963 PTA_MMX
|PTA_SSE
| PTA_SSE2
},
2964 {"pentium4m", PROCESSOR_PENTIUM4
, CPU_NONE
,
2965 PTA_MMX
| PTA_SSE
| PTA_SSE2
},
2966 {"prescott", PROCESSOR_NOCONA
, CPU_NONE
,
2967 PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
},
2968 {"nocona", PROCESSOR_NOCONA
, CPU_NONE
,
2969 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2970 | PTA_CX16
| PTA_NO_SAHF
},
2971 {"core2", PROCESSOR_CORE2_64
, CPU_CORE2
,
2972 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2973 | PTA_SSSE3
| PTA_CX16
},
2974 {"corei7", PROCESSOR_COREI7_64
, CPU_COREI7
,
2975 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2976 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_CX16
},
2977 {"corei7-avx", PROCESSOR_COREI7_64
, CPU_COREI7
,
2978 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2979 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AVX
2980 | PTA_CX16
| PTA_POPCNT
| PTA_AES
| PTA_PCLMUL
},
2981 {"core-avx-i", PROCESSOR_COREI7_64
, CPU_COREI7
,
2982 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2983 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AVX
2984 | PTA_CX16
| PTA_POPCNT
| PTA_AES
| PTA_PCLMUL
| PTA_FSGSBASE
2985 | PTA_RDRND
| PTA_F16C
},
2986 {"core-avx2", PROCESSOR_COREI7_64
, CPU_COREI7
,
2987 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2988 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AVX
| PTA_AVX2
2989 | PTA_CX16
| PTA_POPCNT
| PTA_AES
| PTA_PCLMUL
| PTA_FSGSBASE
2990 | PTA_RDRND
| PTA_F16C
| PTA_BMI
| PTA_BMI2
| PTA_LZCNT
2991 | PTA_FMA
| PTA_MOVBE
},
2992 {"atom", PROCESSOR_ATOM
, CPU_ATOM
,
2993 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2994 | PTA_SSSE3
| PTA_CX16
| PTA_MOVBE
},
2995 {"geode", PROCESSOR_GEODE
, CPU_GEODE
,
2996 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
|PTA_PREFETCH_SSE
},
2997 {"k6", PROCESSOR_K6
, CPU_K6
, PTA_MMX
},
2998 {"k6-2", PROCESSOR_K6
, CPU_K6
, PTA_MMX
| PTA_3DNOW
},
2999 {"k6-3", PROCESSOR_K6
, CPU_K6
, PTA_MMX
| PTA_3DNOW
},
3000 {"athlon", PROCESSOR_ATHLON
, CPU_ATHLON
,
3001 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_PREFETCH_SSE
},
3002 {"athlon-tbird", PROCESSOR_ATHLON
, CPU_ATHLON
,
3003 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_PREFETCH_SSE
},
3004 {"athlon-4", PROCESSOR_ATHLON
, CPU_ATHLON
,
3005 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
},
3006 {"athlon-xp", PROCESSOR_ATHLON
, CPU_ATHLON
,
3007 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
},
3008 {"athlon-mp", PROCESSOR_ATHLON
, CPU_ATHLON
,
3009 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
},
3010 {"x86-64", PROCESSOR_K8
, CPU_K8
,
3011 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_NO_SAHF
},
3012 {"k8", PROCESSOR_K8
, CPU_K8
,
3013 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3014 | PTA_SSE2
| PTA_NO_SAHF
},
3015 {"k8-sse3", PROCESSOR_K8
, CPU_K8
,
3016 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3017 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
},
3018 {"opteron", PROCESSOR_K8
, CPU_K8
,
3019 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3020 | PTA_SSE2
| PTA_NO_SAHF
},
3021 {"opteron-sse3", PROCESSOR_K8
, CPU_K8
,
3022 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3023 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
},
3024 {"athlon64", PROCESSOR_K8
, CPU_K8
,
3025 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3026 | PTA_SSE2
| PTA_NO_SAHF
},
3027 {"athlon64-sse3", PROCESSOR_K8
, CPU_K8
,
3028 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3029 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
},
3030 {"athlon-fx", PROCESSOR_K8
, CPU_K8
,
3031 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3032 | PTA_SSE2
| PTA_NO_SAHF
},
3033 {"amdfam10", PROCESSOR_AMDFAM10
, CPU_AMDFAM10
,
3034 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3035 | PTA_SSE2
| PTA_SSE3
| PTA_SSE4A
| PTA_CX16
| PTA_ABM
},
3036 {"barcelona", PROCESSOR_AMDFAM10
, CPU_AMDFAM10
,
3037 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3038 | PTA_SSE2
| PTA_SSE3
| PTA_SSE4A
| PTA_CX16
| PTA_ABM
},
3039 {"bdver1", PROCESSOR_BDVER1
, CPU_BDVER1
,
3040 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3041 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
3042 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
| PTA_FMA4
3043 | PTA_XOP
| PTA_LWP
},
3044 {"bdver2", PROCESSOR_BDVER2
, CPU_BDVER2
,
3045 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3046 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
3047 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
3048 | PTA_XOP
| PTA_LWP
| PTA_BMI
| PTA_TBM
| PTA_F16C
3050 {"btver1", PROCESSOR_BTVER1
, CPU_GENERIC64
,
3051 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3052 | PTA_SSSE3
| PTA_SSE4A
|PTA_ABM
| PTA_CX16
},
3053 {"generic32", PROCESSOR_GENERIC32
, CPU_PENTIUMPRO
,
3054 0 /* flags are only used for -march switch. */ },
3055 {"generic64", PROCESSOR_GENERIC64
, CPU_GENERIC64
,
3056 PTA_64BIT
/* flags are only used for -march switch. */ },
3059 /* -mrecip options. */
3062 const char *string
; /* option name */
3063 unsigned int mask
; /* mask bits to set */
3065 const recip_options
[] =
3067 { "all", RECIP_MASK_ALL
},
3068 { "none", RECIP_MASK_NONE
},
3069 { "div", RECIP_MASK_DIV
},
3070 { "sqrt", RECIP_MASK_SQRT
},
3071 { "vec-div", RECIP_MASK_VEC_DIV
},
3072 { "vec-sqrt", RECIP_MASK_VEC_SQRT
},
3075 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
3077 /* Set up prefix/suffix so the error messages refer to either the command
3078 line argument, or the attribute(target). */
3087 prefix
= "option(\"";
3092 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3093 SUBTARGET_OVERRIDE_OPTIONS
;
3096 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3097 SUBSUBTARGET_OVERRIDE_OPTIONS
;
3101 ix86_isa_flags
|= OPTION_MASK_ISA_64BIT
;
3103 /* -fPIC is the default for x86_64. */
3104 if (TARGET_MACHO
&& TARGET_64BIT
)
3107 /* Need to check -mtune=generic first. */
3108 if (ix86_tune_string
)
3110 if (!strcmp (ix86_tune_string
, "generic")
3111 || !strcmp (ix86_tune_string
, "i686")
3112 /* As special support for cross compilers we read -mtune=native
3113 as -mtune=generic. With native compilers we won't see the
3114 -mtune=native, as it was changed by the driver. */
3115 || !strcmp (ix86_tune_string
, "native"))
3118 ix86_tune_string
= "generic64";
3120 ix86_tune_string
= "generic32";
3122 /* If this call is for setting the option attribute, allow the
3123 generic32/generic64 that was previously set. */
3124 else if (!main_args_p
3125 && (!strcmp (ix86_tune_string
, "generic32")
3126 || !strcmp (ix86_tune_string
, "generic64")))
3128 else if (!strncmp (ix86_tune_string
, "generic", 7))
3129 error ("bad value (%s) for %stune=%s %s",
3130 ix86_tune_string
, prefix
, suffix
, sw
);
3131 else if (!strcmp (ix86_tune_string
, "x86-64"))
3132 warning (OPT_Wdeprecated
, "%stune=x86-64%s is deprecated; use "
3133 "%stune=k8%s or %stune=generic%s instead as appropriate",
3134 prefix
, suffix
, prefix
, suffix
, prefix
, suffix
);
3138 if (ix86_arch_string
)
3139 ix86_tune_string
= ix86_arch_string
;
3140 if (!ix86_tune_string
)
3142 ix86_tune_string
= cpu_names
[TARGET_CPU_DEFAULT
];
3143 ix86_tune_defaulted
= 1;
3146 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
3147 need to use a sensible tune option. */
3148 if (!strcmp (ix86_tune_string
, "generic")
3149 || !strcmp (ix86_tune_string
, "x86-64")
3150 || !strcmp (ix86_tune_string
, "i686"))
3153 ix86_tune_string
= "generic64";
3155 ix86_tune_string
= "generic32";
3159 if (ix86_stringop_alg
== rep_prefix_8_byte
&& !TARGET_64BIT
)
3161 /* rep; movq isn't available in 32-bit code. */
3162 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3163 ix86_stringop_alg
= no_stringop
;
3166 if (!ix86_arch_string
)
3167 ix86_arch_string
= TARGET_64BIT
? "x86-64" : SUBTARGET32_DEFAULT_CPU
;
3169 ix86_arch_specified
= 1;
3171 if (!global_options_set
.x_ix86_abi
)
3172 ix86_abi
= DEFAULT_ABI
;
3174 if (global_options_set
.x_ix86_cmodel
)
3176 switch (ix86_cmodel
)
3181 ix86_cmodel
= CM_SMALL_PIC
;
3183 error ("code model %qs not supported in the %s bit mode",
3190 ix86_cmodel
= CM_MEDIUM_PIC
;
3192 error ("code model %qs not supported in the %s bit mode",
3194 else if (TARGET_X32
)
3195 error ("code model %qs not supported in x32 mode",
3202 ix86_cmodel
= CM_LARGE_PIC
;
3204 error ("code model %qs not supported in the %s bit mode",
3206 else if (TARGET_X32
)
3207 error ("code model %qs not supported in x32 mode",
3213 error ("code model %s does not support PIC mode", "32");
3215 error ("code model %qs not supported in the %s bit mode",
3222 error ("code model %s does not support PIC mode", "kernel");
3223 ix86_cmodel
= CM_32
;
3226 error ("code model %qs not supported in the %s bit mode",
3236 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3237 use of rip-relative addressing. This eliminates fixups that
3238 would otherwise be needed if this object is to be placed in a
3239 DLL, and is essentially just as efficient as direct addressing. */
3240 if (TARGET_64BIT
&& DEFAULT_ABI
== MS_ABI
)
3241 ix86_cmodel
= CM_SMALL_PIC
, flag_pic
= 1;
3242 else if (TARGET_64BIT
)
3243 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
3245 ix86_cmodel
= CM_32
;
3247 if (TARGET_MACHO
&& ix86_asm_dialect
== ASM_INTEL
)
3249 error ("-masm=intel not supported in this configuration");
3250 ix86_asm_dialect
= ASM_ATT
;
3252 if ((TARGET_64BIT
!= 0) != ((ix86_isa_flags
& OPTION_MASK_ISA_64BIT
) != 0))
3253 sorry ("%i-bit mode not compiled in",
3254 (ix86_isa_flags
& OPTION_MASK_ISA_64BIT
) ? 64 : 32);
3256 for (i
= 0; i
< pta_size
; i
++)
3257 if (! strcmp (ix86_arch_string
, processor_alias_table
[i
].name
))
3259 ix86_schedule
= processor_alias_table
[i
].schedule
;
3260 ix86_arch
= processor_alias_table
[i
].processor
;
3261 /* Default cpu tuning to the architecture. */
3262 ix86_tune
= ix86_arch
;
3264 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
3265 error ("CPU you selected does not support x86-64 "
3268 if (processor_alias_table
[i
].flags
& PTA_MMX
3269 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_MMX
))
3270 ix86_isa_flags
|= OPTION_MASK_ISA_MMX
;
3271 if (processor_alias_table
[i
].flags
& PTA_3DNOW
3272 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_3DNOW
))
3273 ix86_isa_flags
|= OPTION_MASK_ISA_3DNOW
;
3274 if (processor_alias_table
[i
].flags
& PTA_3DNOW_A
3275 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_3DNOW_A
))
3276 ix86_isa_flags
|= OPTION_MASK_ISA_3DNOW_A
;
3277 if (processor_alias_table
[i
].flags
& PTA_SSE
3278 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE
))
3279 ix86_isa_flags
|= OPTION_MASK_ISA_SSE
;
3280 if (processor_alias_table
[i
].flags
& PTA_SSE2
3281 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE2
))
3282 ix86_isa_flags
|= OPTION_MASK_ISA_SSE2
;
3283 if (processor_alias_table
[i
].flags
& PTA_SSE3
3284 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE3
))
3285 ix86_isa_flags
|= OPTION_MASK_ISA_SSE3
;
3286 if (processor_alias_table
[i
].flags
& PTA_SSSE3
3287 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSSE3
))
3288 ix86_isa_flags
|= OPTION_MASK_ISA_SSSE3
;
3289 if (processor_alias_table
[i
].flags
& PTA_SSE4_1
3290 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4_1
))
3291 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_1
;
3292 if (processor_alias_table
[i
].flags
& PTA_SSE4_2
3293 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4_2
))
3294 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_2
;
3295 if (processor_alias_table
[i
].flags
& PTA_AVX
3296 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX
))
3297 ix86_isa_flags
|= OPTION_MASK_ISA_AVX
;
3298 if (processor_alias_table
[i
].flags
& PTA_AVX2
3299 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX2
))
3300 ix86_isa_flags
|= OPTION_MASK_ISA_AVX2
;
3301 if (processor_alias_table
[i
].flags
& PTA_FMA
3302 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FMA
))
3303 ix86_isa_flags
|= OPTION_MASK_ISA_FMA
;
3304 if (processor_alias_table
[i
].flags
& PTA_SSE4A
3305 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4A
))
3306 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4A
;
3307 if (processor_alias_table
[i
].flags
& PTA_FMA4
3308 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FMA4
))
3309 ix86_isa_flags
|= OPTION_MASK_ISA_FMA4
;
3310 if (processor_alias_table
[i
].flags
& PTA_XOP
3311 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_XOP
))
3312 ix86_isa_flags
|= OPTION_MASK_ISA_XOP
;
3313 if (processor_alias_table
[i
].flags
& PTA_LWP
3314 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_LWP
))
3315 ix86_isa_flags
|= OPTION_MASK_ISA_LWP
;
3316 if (processor_alias_table
[i
].flags
& PTA_ABM
3317 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_ABM
))
3318 ix86_isa_flags
|= OPTION_MASK_ISA_ABM
;
3319 if (processor_alias_table
[i
].flags
& PTA_BMI
3320 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_BMI
))
3321 ix86_isa_flags
|= OPTION_MASK_ISA_BMI
;
3322 if (processor_alias_table
[i
].flags
& (PTA_LZCNT
| PTA_ABM
)
3323 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_LZCNT
))
3324 ix86_isa_flags
|= OPTION_MASK_ISA_LZCNT
;
3325 if (processor_alias_table
[i
].flags
& PTA_TBM
3326 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_TBM
))
3327 ix86_isa_flags
|= OPTION_MASK_ISA_TBM
;
3328 if (processor_alias_table
[i
].flags
& PTA_BMI2
3329 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_BMI2
))
3330 ix86_isa_flags
|= OPTION_MASK_ISA_BMI2
;
3331 if (processor_alias_table
[i
].flags
& PTA_CX16
3332 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_CX16
))
3333 ix86_isa_flags
|= OPTION_MASK_ISA_CX16
;
3334 if (processor_alias_table
[i
].flags
& (PTA_POPCNT
| PTA_ABM
)
3335 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_POPCNT
))
3336 ix86_isa_flags
|= OPTION_MASK_ISA_POPCNT
;
3337 if (!(TARGET_64BIT
&& (processor_alias_table
[i
].flags
& PTA_NO_SAHF
))
3338 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SAHF
))
3339 ix86_isa_flags
|= OPTION_MASK_ISA_SAHF
;
3340 if (processor_alias_table
[i
].flags
& PTA_MOVBE
3341 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_MOVBE
))
3342 ix86_isa_flags
|= OPTION_MASK_ISA_MOVBE
;
3343 if (processor_alias_table
[i
].flags
& PTA_AES
3344 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_AES
))
3345 ix86_isa_flags
|= OPTION_MASK_ISA_AES
;
3346 if (processor_alias_table
[i
].flags
& PTA_PCLMUL
3347 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_PCLMUL
))
3348 ix86_isa_flags
|= OPTION_MASK_ISA_PCLMUL
;
3349 if (processor_alias_table
[i
].flags
& PTA_FSGSBASE
3350 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FSGSBASE
))
3351 ix86_isa_flags
|= OPTION_MASK_ISA_FSGSBASE
;
3352 if (processor_alias_table
[i
].flags
& PTA_RDRND
3353 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_RDRND
))
3354 ix86_isa_flags
|= OPTION_MASK_ISA_RDRND
;
3355 if (processor_alias_table
[i
].flags
& PTA_F16C
3356 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_F16C
))
3357 ix86_isa_flags
|= OPTION_MASK_ISA_F16C
;
3358 if (processor_alias_table
[i
].flags
& (PTA_PREFETCH_SSE
| PTA_SSE
))
3359 x86_prefetch_sse
= true;
3364 if (!strcmp (ix86_arch_string
, "generic"))
3365 error ("generic CPU can be used only for %stune=%s %s",
3366 prefix
, suffix
, sw
);
3367 else if (!strncmp (ix86_arch_string
, "generic", 7) || i
== pta_size
)
3368 error ("bad value (%s) for %sarch=%s %s",
3369 ix86_arch_string
, prefix
, suffix
, sw
);
3371 ix86_arch_mask
= 1u << ix86_arch
;
3372 for (i
= 0; i
< X86_ARCH_LAST
; ++i
)
3373 ix86_arch_features
[i
] = !!(initial_ix86_arch_features
[i
] & ix86_arch_mask
);
3375 for (i
= 0; i
< pta_size
; i
++)
3376 if (! strcmp (ix86_tune_string
, processor_alias_table
[i
].name
))
3378 ix86_schedule
= processor_alias_table
[i
].schedule
;
3379 ix86_tune
= processor_alias_table
[i
].processor
;
3382 if (!(processor_alias_table
[i
].flags
& PTA_64BIT
))
3384 if (ix86_tune_defaulted
)
3386 ix86_tune_string
= "x86-64";
3387 for (i
= 0; i
< pta_size
; i
++)
3388 if (! strcmp (ix86_tune_string
,
3389 processor_alias_table
[i
].name
))
3391 ix86_schedule
= processor_alias_table
[i
].schedule
;
3392 ix86_tune
= processor_alias_table
[i
].processor
;
3395 error ("CPU you selected does not support x86-64 "
3401 /* Adjust tuning when compiling for 32-bit ABI. */
3404 case PROCESSOR_GENERIC64
:
3405 ix86_tune
= PROCESSOR_GENERIC32
;
3406 ix86_schedule
= CPU_PENTIUMPRO
;
3409 case PROCESSOR_CORE2_64
:
3410 ix86_tune
= PROCESSOR_CORE2_32
;
3413 case PROCESSOR_COREI7_64
:
3414 ix86_tune
= PROCESSOR_COREI7_32
;
3421 /* Intel CPUs have always interpreted SSE prefetch instructions as
3422 NOPs; so, we can enable SSE prefetch instructions even when
3423 -mtune (rather than -march) points us to a processor that has them.
3424 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3425 higher processors. */
3427 && (processor_alias_table
[i
].flags
& (PTA_PREFETCH_SSE
| PTA_SSE
)))
3428 x86_prefetch_sse
= true;
3432 if (ix86_tune_specified
&& i
== pta_size
)
3433 error ("bad value (%s) for %stune=%s %s",
3434 ix86_tune_string
, prefix
, suffix
, sw
);
3436 ix86_tune_mask
= 1u << ix86_tune
;
3437 for (i
= 0; i
< X86_TUNE_LAST
; ++i
)
3438 ix86_tune_features
[i
] = !!(initial_ix86_tune_features
[i
] & ix86_tune_mask
);
3440 #ifndef USE_IX86_FRAME_POINTER
3441 #define USE_IX86_FRAME_POINTER 0
3444 #ifndef USE_X86_64_FRAME_POINTER
3445 #define USE_X86_64_FRAME_POINTER 0
3448 /* Set the default values for switches whose default depends on TARGET_64BIT
3449 in case they weren't overwritten by command line options. */
3452 if (optimize
> 1 && !global_options_set
.x_flag_zee
)
3454 if (optimize
>= 1 && !global_options_set
.x_flag_omit_frame_pointer
)
3455 flag_omit_frame_pointer
= !USE_X86_64_FRAME_POINTER
;
3456 if (flag_asynchronous_unwind_tables
== 2)
3457 flag_unwind_tables
= flag_asynchronous_unwind_tables
= 1;
3458 if (flag_pcc_struct_return
== 2)
3459 flag_pcc_struct_return
= 0;
3463 if (optimize
>= 1 && !global_options_set
.x_flag_omit_frame_pointer
)
3464 flag_omit_frame_pointer
= !(USE_IX86_FRAME_POINTER
|| optimize_size
);
3465 if (flag_asynchronous_unwind_tables
== 2)
3466 flag_asynchronous_unwind_tables
= !USE_IX86_FRAME_POINTER
;
3467 if (flag_pcc_struct_return
== 2)
3468 flag_pcc_struct_return
= DEFAULT_PCC_STRUCT_RETURN
;
3472 ix86_cost
= &ix86_size_cost
;
3474 ix86_cost
= processor_target_table
[ix86_tune
].cost
;
3476 /* Arrange to set up i386_stack_locals for all functions. */
3477 init_machine_status
= ix86_init_machine_status
;
3479 /* Validate -mregparm= value. */
3480 if (global_options_set
.x_ix86_regparm
)
3483 warning (0, "-mregparm is ignored in 64-bit mode");
3484 if (ix86_regparm
> REGPARM_MAX
)
3486 error ("-mregparm=%d is not between 0 and %d",
3487 ix86_regparm
, REGPARM_MAX
);
3492 ix86_regparm
= REGPARM_MAX
;
3494 /* Default align_* from the processor table. */
3495 if (align_loops
== 0)
3497 align_loops
= processor_target_table
[ix86_tune
].align_loop
;
3498 align_loops_max_skip
= processor_target_table
[ix86_tune
].align_loop_max_skip
;
3500 if (align_jumps
== 0)
3502 align_jumps
= processor_target_table
[ix86_tune
].align_jump
;
3503 align_jumps_max_skip
= processor_target_table
[ix86_tune
].align_jump_max_skip
;
3505 if (align_functions
== 0)
3507 align_functions
= processor_target_table
[ix86_tune
].align_func
;
3510 /* Provide default for -mbranch-cost= value. */
3511 if (!global_options_set
.x_ix86_branch_cost
)
3512 ix86_branch_cost
= ix86_cost
->branch_cost
;
3516 target_flags
|= TARGET_SUBTARGET64_DEFAULT
& ~target_flags_explicit
;
3518 /* Enable by default the SSE and MMX builtins. Do allow the user to
3519 explicitly disable any of these. In particular, disabling SSE and
3520 MMX for kernel code is extremely useful. */
3521 if (!ix86_arch_specified
)
3523 |= ((OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_MMX
3524 | TARGET_SUBTARGET64_ISA_DEFAULT
) & ~ix86_isa_flags_explicit
);
3527 warning (0, "%srtd%s is ignored in 64bit mode", prefix
, suffix
);
3531 target_flags
|= TARGET_SUBTARGET32_DEFAULT
& ~target_flags_explicit
;
3533 if (!ix86_arch_specified
)
3535 |= TARGET_SUBTARGET32_ISA_DEFAULT
& ~ix86_isa_flags_explicit
;
3537 /* i386 ABI does not specify red zone. It still makes sense to use it
3538 when programmer takes care to stack from being destroyed. */
3539 if (!(target_flags_explicit
& MASK_NO_RED_ZONE
))
3540 target_flags
|= MASK_NO_RED_ZONE
;
3543 /* Keep nonleaf frame pointers. */
3544 if (flag_omit_frame_pointer
)
3545 target_flags
&= ~MASK_OMIT_LEAF_FRAME_POINTER
;
3546 else if (TARGET_OMIT_LEAF_FRAME_POINTER
)
3547 flag_omit_frame_pointer
= 1;
3549 /* If we're doing fast math, we don't care about comparison order
3550 wrt NaNs. This lets us use a shorter comparison sequence. */
3551 if (flag_finite_math_only
)
3552 target_flags
&= ~MASK_IEEE_FP
;
3554 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3555 since the insns won't need emulation. */
3556 if (x86_arch_always_fancy_math_387
& ix86_arch_mask
)
3557 target_flags
&= ~MASK_NO_FANCY_MATH_387
;
3559 /* Likewise, if the target doesn't have a 387, or we've specified
3560 software floating point, don't use 387 inline intrinsics. */
3562 target_flags
|= MASK_NO_FANCY_MATH_387
;
3564 /* Turn on MMX builtins for -msse. */
3567 ix86_isa_flags
|= OPTION_MASK_ISA_MMX
& ~ix86_isa_flags_explicit
;
3568 x86_prefetch_sse
= true;
3571 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3572 if (TARGET_SSE4_2
|| TARGET_ABM
)
3573 ix86_isa_flags
|= OPTION_MASK_ISA_POPCNT
& ~ix86_isa_flags_explicit
;
3575 /* Turn on lzcnt instruction for -mabm. */
3577 ix86_isa_flags
|= OPTION_MASK_ISA_LZCNT
& ~ix86_isa_flags_explicit
;
3579 /* Validate -mpreferred-stack-boundary= value or default it to
3580 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3581 ix86_preferred_stack_boundary
= PREFERRED_STACK_BOUNDARY_DEFAULT
;
3582 if (global_options_set
.x_ix86_preferred_stack_boundary_arg
)
3584 int min
= (TARGET_64BIT
? 4 : 2);
3585 int max
= (TARGET_SEH
? 4 : 12);
3587 if (ix86_preferred_stack_boundary_arg
< min
3588 || ix86_preferred_stack_boundary_arg
> max
)
3591 error ("-mpreferred-stack-boundary is not supported "
3594 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3595 ix86_preferred_stack_boundary_arg
, min
, max
);
3598 ix86_preferred_stack_boundary
3599 = (1 << ix86_preferred_stack_boundary_arg
) * BITS_PER_UNIT
;
3602 /* Set the default value for -mstackrealign. */
3603 if (ix86_force_align_arg_pointer
== -1)
3604 ix86_force_align_arg_pointer
= STACK_REALIGN_DEFAULT
;
3606 ix86_default_incoming_stack_boundary
= PREFERRED_STACK_BOUNDARY
;
3608 /* Validate -mincoming-stack-boundary= value or default it to
3609 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3610 ix86_incoming_stack_boundary
= ix86_default_incoming_stack_boundary
;
3611 if (global_options_set
.x_ix86_incoming_stack_boundary_arg
)
3613 if (ix86_incoming_stack_boundary_arg
< (TARGET_64BIT
? 4 : 2)
3614 || ix86_incoming_stack_boundary_arg
> 12)
3615 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3616 ix86_incoming_stack_boundary_arg
, TARGET_64BIT
? 4 : 2);
3619 ix86_user_incoming_stack_boundary
3620 = (1 << ix86_incoming_stack_boundary_arg
) * BITS_PER_UNIT
;
3621 ix86_incoming_stack_boundary
3622 = ix86_user_incoming_stack_boundary
;
3626 /* Accept -msseregparm only if at least SSE support is enabled. */
3627 if (TARGET_SSEREGPARM
3629 error ("%ssseregparm%s used without SSE enabled", prefix
, suffix
);
3631 if (global_options_set
.x_ix86_fpmath
)
3633 if (ix86_fpmath
& FPMATH_SSE
)
3637 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3638 ix86_fpmath
= FPMATH_387
;
3640 else if ((ix86_fpmath
& FPMATH_387
) && !TARGET_80387
)
3642 warning (0, "387 instruction set disabled, using SSE arithmetics");
3643 ix86_fpmath
= FPMATH_SSE
;
3648 ix86_fpmath
= TARGET_FPMATH_DEFAULT
;
3650 /* If the i387 is disabled, then do not return values in it. */
3652 target_flags
&= ~MASK_FLOAT_RETURNS
;
3654 /* Use external vectorized library in vectorizing intrinsics. */
3655 if (global_options_set
.x_ix86_veclibabi_type
)
3656 switch (ix86_veclibabi_type
)
3658 case ix86_veclibabi_type_svml
:
3659 ix86_veclib_handler
= ix86_veclibabi_svml
;
3662 case ix86_veclibabi_type_acml
:
3663 ix86_veclib_handler
= ix86_veclibabi_acml
;
3670 if ((!USE_IX86_FRAME_POINTER
3671 || (x86_accumulate_outgoing_args
& ix86_tune_mask
))
3672 && !(target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
3674 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
3676 /* ??? Unwind info is not correct around the CFG unless either a frame
3677 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3678 unwind info generation to be aware of the CFG and propagating states
3680 if ((flag_unwind_tables
|| flag_asynchronous_unwind_tables
3681 || flag_exceptions
|| flag_non_call_exceptions
)
3682 && flag_omit_frame_pointer
3683 && !(target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
3685 if (target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
3686 warning (0, "unwind tables currently require either a frame pointer "
3687 "or %saccumulate-outgoing-args%s for correctness",
3689 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
3692 /* If stack probes are required, the space used for large function
3693 arguments on the stack must also be probed, so enable
3694 -maccumulate-outgoing-args so this happens in the prologue. */
3695 if (TARGET_STACK_PROBE
3696 && !(target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
3698 if (target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
3699 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3700 "for correctness", prefix
, suffix
);
3701 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
3704 /* For sane SSE instruction set generation we need fcomi instruction.
3705 It is safe to enable all CMOVE instructions. Also, RDRAND intrinsic
3706 expands to a sequence that includes conditional move. */
3707 if (TARGET_SSE
|| TARGET_RDRND
)
3710 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3713 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix
, "LX", 0);
3714 p
= strchr (internal_label_prefix
, 'X');
3715 internal_label_prefix_len
= p
- internal_label_prefix
;
3719 /* When scheduling description is not available, disable scheduler pass
3720 so it won't slow down the compilation and make x87 code slower. */
3721 if (!TARGET_SCHEDULE
)
3722 flag_schedule_insns_after_reload
= flag_schedule_insns
= 0;
3724 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES
,
3725 ix86_cost
->simultaneous_prefetches
,
3726 global_options
.x_param_values
,
3727 global_options_set
.x_param_values
);
3728 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
, ix86_cost
->prefetch_block
,
3729 global_options
.x_param_values
,
3730 global_options_set
.x_param_values
);
3731 maybe_set_param_value (PARAM_L1_CACHE_SIZE
, ix86_cost
->l1_cache_size
,
3732 global_options
.x_param_values
,
3733 global_options_set
.x_param_values
);
3734 maybe_set_param_value (PARAM_L2_CACHE_SIZE
, ix86_cost
->l2_cache_size
,
3735 global_options
.x_param_values
,
3736 global_options_set
.x_param_values
);
3738 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
3739 if (flag_prefetch_loop_arrays
< 0
3742 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL
)
3743 flag_prefetch_loop_arrays
= 1;
3745 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3746 can be optimized to ap = __builtin_next_arg (0). */
3747 if (!TARGET_64BIT
&& !flag_split_stack
)
3748 targetm
.expand_builtin_va_start
= NULL
;
3752 ix86_gen_leave
= gen_leave_rex64
;
3753 ix86_gen_add3
= gen_adddi3
;
3754 ix86_gen_sub3
= gen_subdi3
;
3755 ix86_gen_sub3_carry
= gen_subdi3_carry
;
3756 ix86_gen_one_cmpl2
= gen_one_cmpldi2
;
3757 ix86_gen_monitor
= gen_sse3_monitor64
;
3758 ix86_gen_andsp
= gen_anddi3
;
3759 ix86_gen_allocate_stack_worker
= gen_allocate_stack_worker_probe_di
;
3760 ix86_gen_adjust_stack_and_probe
= gen_adjust_stack_and_probedi
;
3761 ix86_gen_probe_stack_range
= gen_probe_stack_rangedi
;
3765 ix86_gen_leave
= gen_leave
;
3766 ix86_gen_add3
= gen_addsi3
;
3767 ix86_gen_sub3
= gen_subsi3
;
3768 ix86_gen_sub3_carry
= gen_subsi3_carry
;
3769 ix86_gen_one_cmpl2
= gen_one_cmplsi2
;
3770 ix86_gen_monitor
= gen_sse3_monitor
;
3771 ix86_gen_andsp
= gen_andsi3
;
3772 ix86_gen_allocate_stack_worker
= gen_allocate_stack_worker_probe_si
;
3773 ix86_gen_adjust_stack_and_probe
= gen_adjust_stack_and_probesi
;
3774 ix86_gen_probe_stack_range
= gen_probe_stack_rangesi
;
3778 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3780 target_flags
|= MASK_CLD
& ~target_flags_explicit
;
3783 if (!TARGET_64BIT
&& flag_pic
)
3785 if (flag_fentry
> 0)
3786 sorry ("-mfentry isn%'t supported for 32-bit in combination "
3790 else if (TARGET_SEH
)
3792 if (flag_fentry
== 0)
3793 sorry ("-mno-fentry isn%'t compatible with SEH");
3796 else if (flag_fentry
< 0)
3798 #if defined(PROFILE_BEFORE_PROLOGUE)
3807 /* When not optimize for size, enable vzeroupper optimization for
3808 TARGET_AVX with -fexpensive-optimizations and split 32-byte
3809 AVX unaligned load/store. */
3812 if (flag_expensive_optimizations
3813 && !(target_flags_explicit
& MASK_VZEROUPPER
))
3814 target_flags
|= MASK_VZEROUPPER
;
3815 if ((x86_avx256_split_unaligned_load
& ix86_tune_mask
)
3816 && !(target_flags_explicit
& MASK_AVX256_SPLIT_UNALIGNED_LOAD
))
3817 target_flags
|= MASK_AVX256_SPLIT_UNALIGNED_LOAD
;
3818 if ((x86_avx256_split_unaligned_store
& ix86_tune_mask
)
3819 && !(target_flags_explicit
& MASK_AVX256_SPLIT_UNALIGNED_STORE
))
3820 target_flags
|= MASK_AVX256_SPLIT_UNALIGNED_STORE
;
3821 /* Enable 128-bit AVX instruction generation for the auto-vectorizer. */
3822 if (TARGET_AVX128_OPTIMAL
&& !(target_flags_explicit
& MASK_PREFER_AVX128
))
3823 target_flags
|= MASK_PREFER_AVX128
;
3828 /* Disable vzeroupper pass if TARGET_AVX is disabled. */
3829 target_flags
&= ~MASK_VZEROUPPER
;
3832 if (ix86_recip_name
)
3834 char *p
= ASTRDUP (ix86_recip_name
);
3836 unsigned int mask
, i
;
3839 while ((q
= strtok (p
, ",")) != NULL
)
3850 if (!strcmp (q
, "default"))
3851 mask
= RECIP_MASK_ALL
;
3854 for (i
= 0; i
< ARRAY_SIZE (recip_options
); i
++)
3855 if (!strcmp (q
, recip_options
[i
].string
))
3857 mask
= recip_options
[i
].mask
;
3861 if (i
== ARRAY_SIZE (recip_options
))
3863 error ("unknown option for -mrecip=%s", q
);
3865 mask
= RECIP_MASK_NONE
;
3869 recip_mask_explicit
|= mask
;
3871 recip_mask
&= ~mask
;
3878 recip_mask
|= RECIP_MASK_ALL
& ~recip_mask_explicit
;
3879 else if (target_flags_explicit
& MASK_RECIP
)
3880 recip_mask
&= ~(RECIP_MASK_ALL
& ~recip_mask_explicit
);
3882 /* Save the initial options in case the user does function specific
3885 target_option_default_node
= target_option_current_node
3886 = build_target_option_node ();
3889 /* Return TRUE if VAL is passed in register with 256bit AVX modes. */
3892 function_pass_avx256_p (const_rtx val
)
3897 if (REG_P (val
) && VALID_AVX256_REG_MODE (GET_MODE (val
)))
3900 if (GET_CODE (val
) == PARALLEL
)
3905 for (i
= XVECLEN (val
, 0) - 1; i
>= 0; i
--)
3907 r
= XVECEXP (val
, 0, i
);
3908 if (GET_CODE (r
) == EXPR_LIST
3910 && REG_P (XEXP (r
, 0))
3911 && (GET_MODE (XEXP (r
, 0)) == OImode
3912 || VALID_AVX256_REG_MODE (GET_MODE (XEXP (r
, 0)))))
3920 /* Implement the TARGET_OPTION_OVERRIDE hook. */
3923 ix86_option_override (void)
3925 ix86_option_override_internal (true);
3928 /* Update register usage after having seen the compiler flags. */
3931 ix86_conditional_register_usage (void)
3936 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
3938 if (fixed_regs
[i
] > 1)
3939 fixed_regs
[i
] = (fixed_regs
[i
] == (TARGET_64BIT
? 3 : 2));
3940 if (call_used_regs
[i
] > 1)
3941 call_used_regs
[i
] = (call_used_regs
[i
] == (TARGET_64BIT
? 3 : 2));
3944 /* The PIC register, if it exists, is fixed. */
3945 j
= PIC_OFFSET_TABLE_REGNUM
;
3946 if (j
!= INVALID_REGNUM
)
3947 fixed_regs
[j
] = call_used_regs
[j
] = 1;
3949 /* The 64-bit MS_ABI changes the set of call-used registers. */
3950 if (TARGET_64BIT_MS_ABI
)
3952 call_used_regs
[SI_REG
] = 0;
3953 call_used_regs
[DI_REG
] = 0;
3954 call_used_regs
[XMM6_REG
] = 0;
3955 call_used_regs
[XMM7_REG
] = 0;
3956 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
3957 call_used_regs
[i
] = 0;
3960 /* The default setting of CLOBBERED_REGS is for 32-bit; add in the
3961 other call-clobbered regs for 64-bit. */
3964 CLEAR_HARD_REG_SET (reg_class_contents
[(int)CLOBBERED_REGS
]);
3966 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
3967 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)GENERAL_REGS
], i
)
3968 && call_used_regs
[i
])
3969 SET_HARD_REG_BIT (reg_class_contents
[(int)CLOBBERED_REGS
], i
);
3972 /* If MMX is disabled, squash the registers. */
3974 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
3975 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)MMX_REGS
], i
))
3976 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
3978 /* If SSE is disabled, squash the registers. */
3980 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
3981 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)SSE_REGS
], i
))
3982 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
3984 /* If the FPU is disabled, squash the registers. */
3985 if (! (TARGET_80387
|| TARGET_FLOAT_RETURNS_IN_80387
))
3986 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
3987 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)FLOAT_REGS
], i
))
3988 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
3990 /* If 32-bit, squash the 64-bit registers. */
3993 for (i
= FIRST_REX_INT_REG
; i
<= LAST_REX_INT_REG
; i
++)
3995 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
4001 /* Save the current options */
4004 ix86_function_specific_save (struct cl_target_option
*ptr
)
4006 ptr
->arch
= ix86_arch
;
4007 ptr
->schedule
= ix86_schedule
;
4008 ptr
->tune
= ix86_tune
;
4009 ptr
->branch_cost
= ix86_branch_cost
;
4010 ptr
->tune_defaulted
= ix86_tune_defaulted
;
4011 ptr
->arch_specified
= ix86_arch_specified
;
4012 ptr
->x_ix86_isa_flags_explicit
= ix86_isa_flags_explicit
;
4013 ptr
->ix86_target_flags_explicit
= target_flags_explicit
;
4014 ptr
->x_recip_mask_explicit
= recip_mask_explicit
;
4016 /* The fields are char but the variables are not; make sure the
4017 values fit in the fields. */
4018 gcc_assert (ptr
->arch
== ix86_arch
);
4019 gcc_assert (ptr
->schedule
== ix86_schedule
);
4020 gcc_assert (ptr
->tune
== ix86_tune
);
4021 gcc_assert (ptr
->branch_cost
== ix86_branch_cost
);
4024 /* Restore the current options */
4027 ix86_function_specific_restore (struct cl_target_option
*ptr
)
4029 enum processor_type old_tune
= ix86_tune
;
4030 enum processor_type old_arch
= ix86_arch
;
4031 unsigned int ix86_arch_mask
, ix86_tune_mask
;
4034 ix86_arch
= (enum processor_type
) ptr
->arch
;
4035 ix86_schedule
= (enum attr_cpu
) ptr
->schedule
;
4036 ix86_tune
= (enum processor_type
) ptr
->tune
;
4037 ix86_branch_cost
= ptr
->branch_cost
;
4038 ix86_tune_defaulted
= ptr
->tune_defaulted
;
4039 ix86_arch_specified
= ptr
->arch_specified
;
4040 ix86_isa_flags_explicit
= ptr
->x_ix86_isa_flags_explicit
;
4041 target_flags_explicit
= ptr
->ix86_target_flags_explicit
;
4042 recip_mask_explicit
= ptr
->x_recip_mask_explicit
;
4044 /* Recreate the arch feature tests if the arch changed */
4045 if (old_arch
!= ix86_arch
)
4047 ix86_arch_mask
= 1u << ix86_arch
;
4048 for (i
= 0; i
< X86_ARCH_LAST
; ++i
)
4049 ix86_arch_features
[i
]
4050 = !!(initial_ix86_arch_features
[i
] & ix86_arch_mask
);
4053 /* Recreate the tune optimization tests */
4054 if (old_tune
!= ix86_tune
)
4056 ix86_tune_mask
= 1u << ix86_tune
;
4057 for (i
= 0; i
< X86_TUNE_LAST
; ++i
)
4058 ix86_tune_features
[i
]
4059 = !!(initial_ix86_tune_features
[i
] & ix86_tune_mask
);
4063 /* Print the current options */
4066 ix86_function_specific_print (FILE *file
, int indent
,
4067 struct cl_target_option
*ptr
)
4070 = ix86_target_string (ptr
->x_ix86_isa_flags
, ptr
->x_target_flags
,
4071 NULL
, NULL
, ptr
->x_ix86_fpmath
, false);
4073 fprintf (file
, "%*sarch = %d (%s)\n",
4076 ((ptr
->arch
< TARGET_CPU_DEFAULT_max
)
4077 ? cpu_names
[ptr
->arch
]
4080 fprintf (file
, "%*stune = %d (%s)\n",
4083 ((ptr
->tune
< TARGET_CPU_DEFAULT_max
)
4084 ? cpu_names
[ptr
->tune
]
4087 fprintf (file
, "%*sbranch_cost = %d\n", indent
, "", ptr
->branch_cost
);
4091 fprintf (file
, "%*s%s\n", indent
, "", target_string
);
4092 free (target_string
);
4097 /* Inner function to process the attribute((target(...))), take an argument and
4098 set the current options from the argument. If we have a list, recursively go
4102 ix86_valid_target_attribute_inner_p (tree args
, char *p_strings
[],
4103 struct gcc_options
*enum_opts_set
)
4108 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4109 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4110 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4111 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4112 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4128 enum ix86_opt_type type
;
4133 IX86_ATTR_ISA ("3dnow", OPT_m3dnow
),
4134 IX86_ATTR_ISA ("abm", OPT_mabm
),
4135 IX86_ATTR_ISA ("bmi", OPT_mbmi
),
4136 IX86_ATTR_ISA ("bmi2", OPT_mbmi2
),
4137 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt
),
4138 IX86_ATTR_ISA ("tbm", OPT_mtbm
),
4139 IX86_ATTR_ISA ("aes", OPT_maes
),
4140 IX86_ATTR_ISA ("avx", OPT_mavx
),
4141 IX86_ATTR_ISA ("avx2", OPT_mavx2
),
4142 IX86_ATTR_ISA ("mmx", OPT_mmmx
),
4143 IX86_ATTR_ISA ("pclmul", OPT_mpclmul
),
4144 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt
),
4145 IX86_ATTR_ISA ("sse", OPT_msse
),
4146 IX86_ATTR_ISA ("sse2", OPT_msse2
),
4147 IX86_ATTR_ISA ("sse3", OPT_msse3
),
4148 IX86_ATTR_ISA ("sse4", OPT_msse4
),
4149 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1
),
4150 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2
),
4151 IX86_ATTR_ISA ("sse4a", OPT_msse4a
),
4152 IX86_ATTR_ISA ("ssse3", OPT_mssse3
),
4153 IX86_ATTR_ISA ("fma4", OPT_mfma4
),
4154 IX86_ATTR_ISA ("fma", OPT_mfma
),
4155 IX86_ATTR_ISA ("xop", OPT_mxop
),
4156 IX86_ATTR_ISA ("lwp", OPT_mlwp
),
4157 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase
),
4158 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd
),
4159 IX86_ATTR_ISA ("f16c", OPT_mf16c
),
4162 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_
),
4164 /* string options */
4165 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH
),
4166 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE
),
4169 IX86_ATTR_YES ("cld",
4173 IX86_ATTR_NO ("fancy-math-387",
4174 OPT_mfancy_math_387
,
4175 MASK_NO_FANCY_MATH_387
),
4177 IX86_ATTR_YES ("ieee-fp",
4181 IX86_ATTR_YES ("inline-all-stringops",
4182 OPT_minline_all_stringops
,
4183 MASK_INLINE_ALL_STRINGOPS
),
4185 IX86_ATTR_YES ("inline-stringops-dynamically",
4186 OPT_minline_stringops_dynamically
,
4187 MASK_INLINE_STRINGOPS_DYNAMICALLY
),
4189 IX86_ATTR_NO ("align-stringops",
4190 OPT_mno_align_stringops
,
4191 MASK_NO_ALIGN_STRINGOPS
),
4193 IX86_ATTR_YES ("recip",
4199 /* If this is a list, recurse to get the options. */
4200 if (TREE_CODE (args
) == TREE_LIST
)
4204 for (; args
; args
= TREE_CHAIN (args
))
4205 if (TREE_VALUE (args
)
4206 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args
),
4207 p_strings
, enum_opts_set
))
4213 else if (TREE_CODE (args
) != STRING_CST
)
4216 /* Handle multiple arguments separated by commas. */
4217 next_optstr
= ASTRDUP (TREE_STRING_POINTER (args
));
4219 while (next_optstr
&& *next_optstr
!= '\0')
4221 char *p
= next_optstr
;
4223 char *comma
= strchr (next_optstr
, ',');
4224 const char *opt_string
;
4225 size_t len
, opt_len
;
4230 enum ix86_opt_type type
= ix86_opt_unknown
;
4236 len
= comma
- next_optstr
;
4237 next_optstr
= comma
+ 1;
4245 /* Recognize no-xxx. */
4246 if (len
> 3 && p
[0] == 'n' && p
[1] == 'o' && p
[2] == '-')
4255 /* Find the option. */
4258 for (i
= 0; i
< ARRAY_SIZE (attrs
); i
++)
4260 type
= attrs
[i
].type
;
4261 opt_len
= attrs
[i
].len
;
4262 if (ch
== attrs
[i
].string
[0]
4263 && ((type
!= ix86_opt_str
&& type
!= ix86_opt_enum
)
4266 && memcmp (p
, attrs
[i
].string
, opt_len
) == 0)
4269 mask
= attrs
[i
].mask
;
4270 opt_string
= attrs
[i
].string
;
4275 /* Process the option. */
4278 error ("attribute(target(\"%s\")) is unknown", orig_p
);
4282 else if (type
== ix86_opt_isa
)
4284 struct cl_decoded_option decoded
;
4286 generate_option (opt
, NULL
, opt_set_p
, CL_TARGET
, &decoded
);
4287 ix86_handle_option (&global_options
, &global_options_set
,
4288 &decoded
, input_location
);
4291 else if (type
== ix86_opt_yes
|| type
== ix86_opt_no
)
4293 if (type
== ix86_opt_no
)
4294 opt_set_p
= !opt_set_p
;
4297 target_flags
|= mask
;
4299 target_flags
&= ~mask
;
4302 else if (type
== ix86_opt_str
)
4306 error ("option(\"%s\") was already specified", opt_string
);
4310 p_strings
[opt
] = xstrdup (p
+ opt_len
);
4313 else if (type
== ix86_opt_enum
)
4318 arg_ok
= opt_enum_arg_to_value (opt
, p
+ opt_len
, &value
, CL_TARGET
);
4320 set_option (&global_options
, enum_opts_set
, opt
, value
,
4321 p
+ opt_len
, DK_UNSPECIFIED
, input_location
,
4325 error ("attribute(target(\"%s\")) is unknown", orig_p
);
4337 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4340 ix86_valid_target_attribute_tree (tree args
)
4342 const char *orig_arch_string
= ix86_arch_string
;
4343 const char *orig_tune_string
= ix86_tune_string
;
4344 enum fpmath_unit orig_fpmath_set
= global_options_set
.x_ix86_fpmath
;
4345 int orig_tune_defaulted
= ix86_tune_defaulted
;
4346 int orig_arch_specified
= ix86_arch_specified
;
4347 char *option_strings
[IX86_FUNCTION_SPECIFIC_MAX
] = { NULL
, NULL
};
4350 struct cl_target_option
*def
4351 = TREE_TARGET_OPTION (target_option_default_node
);
4352 struct gcc_options enum_opts_set
;
4354 memset (&enum_opts_set
, 0, sizeof (enum_opts_set
));
4356 /* Process each of the options on the chain. */
4357 if (! ix86_valid_target_attribute_inner_p (args
, option_strings
,
4361 /* If the changed options are different from the default, rerun
4362 ix86_option_override_internal, and then save the options away.
4363 The string options are are attribute options, and will be undone
4364 when we copy the save structure. */
4365 if (ix86_isa_flags
!= def
->x_ix86_isa_flags
4366 || target_flags
!= def
->x_target_flags
4367 || option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
]
4368 || option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
]
4369 || enum_opts_set
.x_ix86_fpmath
)
4371 /* If we are using the default tune= or arch=, undo the string assigned,
4372 and use the default. */
4373 if (option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
])
4374 ix86_arch_string
= option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
];
4375 else if (!orig_arch_specified
)
4376 ix86_arch_string
= NULL
;
4378 if (option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
])
4379 ix86_tune_string
= option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
];
4380 else if (orig_tune_defaulted
)
4381 ix86_tune_string
= NULL
;
4383 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4384 if (enum_opts_set
.x_ix86_fpmath
)
4385 global_options_set
.x_ix86_fpmath
= (enum fpmath_unit
) 1;
4386 else if (!TARGET_64BIT
&& TARGET_SSE
)
4388 ix86_fpmath
= (enum fpmath_unit
) (FPMATH_SSE
| FPMATH_387
);
4389 global_options_set
.x_ix86_fpmath
= (enum fpmath_unit
) 1;
4392 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4393 ix86_option_override_internal (false);
4395 /* Add any builtin functions with the new isa if any. */
4396 ix86_add_new_builtins (ix86_isa_flags
);
4398 /* Save the current options unless we are validating options for
4400 t
= build_target_option_node ();
4402 ix86_arch_string
= orig_arch_string
;
4403 ix86_tune_string
= orig_tune_string
;
4404 global_options_set
.x_ix86_fpmath
= orig_fpmath_set
;
4406 /* Free up memory allocated to hold the strings */
4407 for (i
= 0; i
< IX86_FUNCTION_SPECIFIC_MAX
; i
++)
4408 free (option_strings
[i
]);
4414 /* Hook to validate attribute((target("string"))). */
4417 ix86_valid_target_attribute_p (tree fndecl
,
4418 tree
ARG_UNUSED (name
),
4420 int ARG_UNUSED (flags
))
4422 struct cl_target_option cur_target
;
4424 tree old_optimize
= build_optimization_node ();
4425 tree new_target
, new_optimize
;
4426 tree func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
4428 /* If the function changed the optimization levels as well as setting target
4429 options, start with the optimizations specified. */
4430 if (func_optimize
&& func_optimize
!= old_optimize
)
4431 cl_optimization_restore (&global_options
,
4432 TREE_OPTIMIZATION (func_optimize
));
4434 /* The target attributes may also change some optimization flags, so update
4435 the optimization options if necessary. */
4436 cl_target_option_save (&cur_target
, &global_options
);
4437 new_target
= ix86_valid_target_attribute_tree (args
);
4438 new_optimize
= build_optimization_node ();
4445 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = new_target
;
4447 if (old_optimize
!= new_optimize
)
4448 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
4451 cl_target_option_restore (&global_options
, &cur_target
);
4453 if (old_optimize
!= new_optimize
)
4454 cl_optimization_restore (&global_options
,
4455 TREE_OPTIMIZATION (old_optimize
));
4461 /* Hook to determine if one function can safely inline another. */
4464 ix86_can_inline_p (tree caller
, tree callee
)
4467 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
4468 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
4470 /* If callee has no option attributes, then it is ok to inline. */
4474 /* If caller has no option attributes, but callee does then it is not ok to
4476 else if (!caller_tree
)
4481 struct cl_target_option
*caller_opts
= TREE_TARGET_OPTION (caller_tree
);
4482 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
4484 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4485 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4487 if ((caller_opts
->x_ix86_isa_flags
& callee_opts
->x_ix86_isa_flags
)
4488 != callee_opts
->x_ix86_isa_flags
)
4491 /* See if we have the same non-isa options. */
4492 else if (caller_opts
->x_target_flags
!= callee_opts
->x_target_flags
)
4495 /* See if arch, tune, etc. are the same. */
4496 else if (caller_opts
->arch
!= callee_opts
->arch
)
4499 else if (caller_opts
->tune
!= callee_opts
->tune
)
4502 else if (caller_opts
->x_ix86_fpmath
!= callee_opts
->x_ix86_fpmath
)
4505 else if (caller_opts
->branch_cost
!= callee_opts
->branch_cost
)
4516 /* Remember the last target of ix86_set_current_function. */
4517 static GTY(()) tree ix86_previous_fndecl
;
4519 /* Establish appropriate back-end context for processing the function
4520 FNDECL. The argument might be NULL to indicate processing at top
4521 level, outside of any function scope. */
4523 ix86_set_current_function (tree fndecl
)
4525 /* Only change the context if the function changes. This hook is called
4526 several times in the course of compiling a function, and we don't want to
4527 slow things down too much or call target_reinit when it isn't safe. */
4528 if (fndecl
&& fndecl
!= ix86_previous_fndecl
)
4530 tree old_tree
= (ix86_previous_fndecl
4531 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl
)
4534 tree new_tree
= (fndecl
4535 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl
)
4538 ix86_previous_fndecl
= fndecl
;
4539 if (old_tree
== new_tree
)
4544 cl_target_option_restore (&global_options
,
4545 TREE_TARGET_OPTION (new_tree
));
4551 struct cl_target_option
*def
4552 = TREE_TARGET_OPTION (target_option_current_node
);
4554 cl_target_option_restore (&global_options
, def
);
4561 /* Return true if this goes in large data/bss. */
4564 ix86_in_large_data_p (tree exp
)
4566 if (ix86_cmodel
!= CM_MEDIUM
&& ix86_cmodel
!= CM_MEDIUM_PIC
)
4569 /* Functions are never large data. */
4570 if (TREE_CODE (exp
) == FUNCTION_DECL
)
4573 if (TREE_CODE (exp
) == VAR_DECL
&& DECL_SECTION_NAME (exp
))
4575 const char *section
= TREE_STRING_POINTER (DECL_SECTION_NAME (exp
));
4576 if (strcmp (section
, ".ldata") == 0
4577 || strcmp (section
, ".lbss") == 0)
4583 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (exp
));
4585 /* If this is an incomplete type with size 0, then we can't put it
4586 in data because it might be too big when completed. */
4587 if (!size
|| size
> ix86_section_threshold
)
4594 /* Switch to the appropriate section for output of DECL.
4595 DECL is either a `VAR_DECL' node or a constant of some sort.
4596 RELOC indicates whether forming the initial value of DECL requires
4597 link-time relocations. */
4599 static section
* x86_64_elf_select_section (tree
, int, unsigned HOST_WIDE_INT
)
4603 x86_64_elf_select_section (tree decl
, int reloc
,
4604 unsigned HOST_WIDE_INT align
)
4606 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4607 && ix86_in_large_data_p (decl
))
4609 const char *sname
= NULL
;
4610 unsigned int flags
= SECTION_WRITE
;
4611 switch (categorize_decl_for_section (decl
, reloc
))
4616 case SECCAT_DATA_REL
:
4617 sname
= ".ldata.rel";
4619 case SECCAT_DATA_REL_LOCAL
:
4620 sname
= ".ldata.rel.local";
4622 case SECCAT_DATA_REL_RO
:
4623 sname
= ".ldata.rel.ro";
4625 case SECCAT_DATA_REL_RO_LOCAL
:
4626 sname
= ".ldata.rel.ro.local";
4630 flags
|= SECTION_BSS
;
4633 case SECCAT_RODATA_MERGE_STR
:
4634 case SECCAT_RODATA_MERGE_STR_INIT
:
4635 case SECCAT_RODATA_MERGE_CONST
:
4639 case SECCAT_SRODATA
:
4646 /* We don't split these for medium model. Place them into
4647 default sections and hope for best. */
4652 /* We might get called with string constants, but get_named_section
4653 doesn't like them as they are not DECLs. Also, we need to set
4654 flags in that case. */
4656 return get_section (sname
, flags
, NULL
);
4657 return get_named_section (decl
, sname
, reloc
);
4660 return default_elf_select_section (decl
, reloc
, align
);
4663 /* Build up a unique section name, expressed as a
4664 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4665 RELOC indicates whether the initial value of EXP requires
4666 link-time relocations. */
4668 static void ATTRIBUTE_UNUSED
4669 x86_64_elf_unique_section (tree decl
, int reloc
)
4671 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4672 && ix86_in_large_data_p (decl
))
4674 const char *prefix
= NULL
;
4675 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4676 bool one_only
= DECL_ONE_ONLY (decl
) && !HAVE_COMDAT_GROUP
;
4678 switch (categorize_decl_for_section (decl
, reloc
))
4681 case SECCAT_DATA_REL
:
4682 case SECCAT_DATA_REL_LOCAL
:
4683 case SECCAT_DATA_REL_RO
:
4684 case SECCAT_DATA_REL_RO_LOCAL
:
4685 prefix
= one_only
? ".ld" : ".ldata";
4688 prefix
= one_only
? ".lb" : ".lbss";
4691 case SECCAT_RODATA_MERGE_STR
:
4692 case SECCAT_RODATA_MERGE_STR_INIT
:
4693 case SECCAT_RODATA_MERGE_CONST
:
4694 prefix
= one_only
? ".lr" : ".lrodata";
4696 case SECCAT_SRODATA
:
4703 /* We don't split these for medium model. Place them into
4704 default sections and hope for best. */
4709 const char *name
, *linkonce
;
4712 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
4713 name
= targetm
.strip_name_encoding (name
);
4715 /* If we're using one_only, then there needs to be a .gnu.linkonce
4716 prefix to the section name. */
4717 linkonce
= one_only
? ".gnu.linkonce" : "";
4719 string
= ACONCAT ((linkonce
, prefix
, ".", name
, NULL
));
4721 DECL_SECTION_NAME (decl
) = build_string (strlen (string
), string
);
4725 default_unique_section (decl
, reloc
);
4728 #ifdef COMMON_ASM_OP
4729 /* This says how to output assembler code to declare an
4730 uninitialized external linkage data object.
4732 For medium model x86-64 we need to use .largecomm opcode for
4735 x86_elf_aligned_common (FILE *file
,
4736 const char *name
, unsigned HOST_WIDE_INT size
,
4739 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4740 && size
> (unsigned int)ix86_section_threshold
)
4741 fputs (".largecomm\t", file
);
4743 fputs (COMMON_ASM_OP
, file
);
4744 assemble_name (file
, name
);
4745 fprintf (file
, "," HOST_WIDE_INT_PRINT_UNSIGNED
",%u\n",
4746 size
, align
/ BITS_PER_UNIT
);
4750 /* Utility function for targets to use in implementing
4751 ASM_OUTPUT_ALIGNED_BSS. */
4754 x86_output_aligned_bss (FILE *file
, tree decl ATTRIBUTE_UNUSED
,
4755 const char *name
, unsigned HOST_WIDE_INT size
,
4758 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4759 && size
> (unsigned int)ix86_section_threshold
)
4760 switch_to_section (get_named_section (decl
, ".lbss", 0));
4762 switch_to_section (bss_section
);
4763 ASM_OUTPUT_ALIGN (file
, floor_log2 (align
/ BITS_PER_UNIT
));
4764 #ifdef ASM_DECLARE_OBJECT_NAME
4765 last_assemble_variable_decl
= decl
;
4766 ASM_DECLARE_OBJECT_NAME (file
, name
, decl
);
4768 /* Standard thing is just output label for the object. */
4769 ASM_OUTPUT_LABEL (file
, name
);
4770 #endif /* ASM_DECLARE_OBJECT_NAME */
4771 ASM_OUTPUT_SKIP (file
, size
? size
: 1);
4774 /* Decide whether we must probe the stack before any space allocation
4775 on this target. It's essentially TARGET_STACK_PROBE except when
4776 -fstack-check causes the stack to be already probed differently. */
4779 ix86_target_stack_probe (void)
4781 /* Do not probe the stack twice if static stack checking is enabled. */
4782 if (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
4785 return TARGET_STACK_PROBE
;
4788 /* Decide whether we can make a sibling call to a function. DECL is the
4789 declaration of the function being targeted by the call and EXP is the
4790 CALL_EXPR representing the call. */
4793 ix86_function_ok_for_sibcall (tree decl
, tree exp
)
4795 tree type
, decl_or_type
;
4798 /* If we are generating position-independent code, we cannot sibcall
4799 optimize any indirect call, or a direct call to a global function,
4800 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
4804 && (!decl
|| !targetm
.binds_local_p (decl
)))
4807 /* If we need to align the outgoing stack, then sibcalling would
4808 unalign the stack, which may break the called function. */
4809 if (ix86_minimum_incoming_stack_boundary (true)
4810 < PREFERRED_STACK_BOUNDARY
)
4815 decl_or_type
= decl
;
4816 type
= TREE_TYPE (decl
);
4820 /* We're looking at the CALL_EXPR, we need the type of the function. */
4821 type
= CALL_EXPR_FN (exp
); /* pointer expression */
4822 type
= TREE_TYPE (type
); /* pointer type */
4823 type
= TREE_TYPE (type
); /* function type */
4824 decl_or_type
= type
;
4827 /* Check that the return value locations are the same. Like
4828 if we are returning floats on the 80387 register stack, we cannot
4829 make a sibcall from a function that doesn't return a float to a
4830 function that does or, conversely, from a function that does return
4831 a float to a function that doesn't; the necessary stack adjustment
4832 would not be executed. This is also the place we notice
4833 differences in the return value ABI. Note that it is ok for one
4834 of the functions to have void return type as long as the return
4835 value of the other is passed in a register. */
4836 a
= ix86_function_value (TREE_TYPE (exp
), decl_or_type
, false);
4837 b
= ix86_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
4839 if (STACK_REG_P (a
) || STACK_REG_P (b
))
4841 if (!rtx_equal_p (a
, b
))
4844 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
4846 /* Disable sibcall if we need to generate vzeroupper after
4848 if (TARGET_VZEROUPPER
4849 && cfun
->machine
->callee_return_avx256_p
4850 && !cfun
->machine
->caller_return_avx256_p
)
4853 else if (!rtx_equal_p (a
, b
))
4858 /* The SYSV ABI has more call-clobbered registers;
4859 disallow sibcalls from MS to SYSV. */
4860 if (cfun
->machine
->call_abi
== MS_ABI
4861 && ix86_function_type_abi (type
) == SYSV_ABI
)
4866 /* If this call is indirect, we'll need to be able to use a
4867 call-clobbered register for the address of the target function.
4868 Make sure that all such registers are not used for passing
4869 parameters. Note that DLLIMPORT functions are indirect. */
4871 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES
&& DECL_DLLIMPORT_P (decl
)))
4873 if (ix86_function_regparm (type
, NULL
) >= 3)
4875 /* ??? Need to count the actual number of registers to be used,
4876 not the possible number of registers. Fix later. */
4882 /* Otherwise okay. That also includes certain types of indirect calls. */
4886 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
4887 and "sseregparm" calling convention attributes;
4888 arguments as in struct attribute_spec.handler. */
4891 ix86_handle_cconv_attribute (tree
*node
, tree name
,
4893 int flags ATTRIBUTE_UNUSED
,
4896 if (TREE_CODE (*node
) != FUNCTION_TYPE
4897 && TREE_CODE (*node
) != METHOD_TYPE
4898 && TREE_CODE (*node
) != FIELD_DECL
4899 && TREE_CODE (*node
) != TYPE_DECL
)
4901 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
4903 *no_add_attrs
= true;
4907 /* Can combine regparm with all attributes but fastcall, and thiscall. */
4908 if (is_attribute_p ("regparm", name
))
4912 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
4914 error ("fastcall and regparm attributes are not compatible");
4917 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
4919 error ("regparam and thiscall attributes are not compatible");
4922 cst
= TREE_VALUE (args
);
4923 if (TREE_CODE (cst
) != INTEGER_CST
)
4925 warning (OPT_Wattributes
,
4926 "%qE attribute requires an integer constant argument",
4928 *no_add_attrs
= true;
4930 else if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
4932 warning (OPT_Wattributes
, "argument to %qE attribute larger than %d",
4934 *no_add_attrs
= true;
4942 /* Do not warn when emulating the MS ABI. */
4943 if ((TREE_CODE (*node
) != FUNCTION_TYPE
4944 && TREE_CODE (*node
) != METHOD_TYPE
)
4945 || ix86_function_type_abi (*node
) != MS_ABI
)
4946 warning (OPT_Wattributes
, "%qE attribute ignored",
4948 *no_add_attrs
= true;
4952 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
4953 if (is_attribute_p ("fastcall", name
))
4955 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
4957 error ("fastcall and cdecl attributes are not compatible");
4959 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
4961 error ("fastcall and stdcall attributes are not compatible");
4963 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node
)))
4965 error ("fastcall and regparm attributes are not compatible");
4967 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
4969 error ("fastcall and thiscall attributes are not compatible");
4973 /* Can combine stdcall with fastcall (redundant), regparm and
4975 else if (is_attribute_p ("stdcall", name
))
4977 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
4979 error ("stdcall and cdecl attributes are not compatible");
4981 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
4983 error ("stdcall and fastcall attributes are not compatible");
4985 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
4987 error ("stdcall and thiscall attributes are not compatible");
4991 /* Can combine cdecl with regparm and sseregparm. */
4992 else if (is_attribute_p ("cdecl", name
))
4994 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
4996 error ("stdcall and cdecl attributes are not compatible");
4998 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5000 error ("fastcall and cdecl attributes are not compatible");
5002 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5004 error ("cdecl and thiscall attributes are not compatible");
5007 else if (is_attribute_p ("thiscall", name
))
5009 if (TREE_CODE (*node
) != METHOD_TYPE
&& pedantic
)
5010 warning (OPT_Wattributes
, "%qE attribute is used for none class-method",
5012 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
5014 error ("stdcall and thiscall attributes are not compatible");
5016 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5018 error ("fastcall and thiscall attributes are not compatible");
5020 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
5022 error ("cdecl and thiscall attributes are not compatible");
5026 /* Can combine sseregparm with all attributes. */
5031 /* This function determines from TYPE the calling-convention. */
5034 ix86_get_callcvt (const_tree type
)
5036 unsigned int ret
= 0;
5041 return IX86_CALLCVT_CDECL
;
5043 attrs
= TYPE_ATTRIBUTES (type
);
5044 if (attrs
!= NULL_TREE
)
5046 if (lookup_attribute ("cdecl", attrs
))
5047 ret
|= IX86_CALLCVT_CDECL
;
5048 else if (lookup_attribute ("stdcall", attrs
))
5049 ret
|= IX86_CALLCVT_STDCALL
;
5050 else if (lookup_attribute ("fastcall", attrs
))
5051 ret
|= IX86_CALLCVT_FASTCALL
;
5052 else if (lookup_attribute ("thiscall", attrs
))
5053 ret
|= IX86_CALLCVT_THISCALL
;
5055 /* Regparam isn't allowed for thiscall and fastcall. */
5056 if ((ret
& (IX86_CALLCVT_THISCALL
| IX86_CALLCVT_FASTCALL
)) == 0)
5058 if (lookup_attribute ("regparm", attrs
))
5059 ret
|= IX86_CALLCVT_REGPARM
;
5060 if (lookup_attribute ("sseregparm", attrs
))
5061 ret
|= IX86_CALLCVT_SSEREGPARM
;
5064 if (IX86_BASE_CALLCVT(ret
) != 0)
5068 is_stdarg
= stdarg_p (type
);
5069 if (TARGET_RTD
&& !is_stdarg
)
5070 return IX86_CALLCVT_STDCALL
| ret
;
5074 || TREE_CODE (type
) != METHOD_TYPE
5075 || ix86_function_type_abi (type
) != MS_ABI
)
5076 return IX86_CALLCVT_CDECL
| ret
;
5078 return IX86_CALLCVT_THISCALL
;
5081 /* Return 0 if the attributes for two types are incompatible, 1 if they
5082 are compatible, and 2 if they are nearly compatible (which causes a
5083 warning to be generated). */
5086 ix86_comp_type_attributes (const_tree type1
, const_tree type2
)
5088 unsigned int ccvt1
, ccvt2
;
5090 if (TREE_CODE (type1
) != FUNCTION_TYPE
5091 && TREE_CODE (type1
) != METHOD_TYPE
)
5094 ccvt1
= ix86_get_callcvt (type1
);
5095 ccvt2
= ix86_get_callcvt (type2
);
5098 if (ix86_function_regparm (type1
, NULL
)
5099 != ix86_function_regparm (type2
, NULL
))
5105 /* Return the regparm value for a function with the indicated TYPE and DECL.
5106 DECL may be NULL when calling function indirectly
5107 or considering a libcall. */
5110 ix86_function_regparm (const_tree type
, const_tree decl
)
5117 return (ix86_function_type_abi (type
) == SYSV_ABI
5118 ? X86_64_REGPARM_MAX
: X86_64_MS_REGPARM_MAX
);
5119 ccvt
= ix86_get_callcvt (type
);
5120 regparm
= ix86_regparm
;
5122 if ((ccvt
& IX86_CALLCVT_REGPARM
) != 0)
5124 attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (type
));
5127 regparm
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
5131 else if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
5133 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
5136 /* Use register calling convention for local functions when possible. */
5138 && TREE_CODE (decl
) == FUNCTION_DECL
5140 && !(profile_flag
&& !flag_fentry
))
5142 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5143 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE (decl
));
5144 if (i
&& i
->local
&& i
->can_change_signature
)
5146 int local_regparm
, globals
= 0, regno
;
5148 /* Make sure no regparm register is taken by a
5149 fixed register variable. */
5150 for (local_regparm
= 0; local_regparm
< REGPARM_MAX
; local_regparm
++)
5151 if (fixed_regs
[local_regparm
])
5154 /* We don't want to use regparm(3) for nested functions as
5155 these use a static chain pointer in the third argument. */
5156 if (local_regparm
== 3 && DECL_STATIC_CHAIN (decl
))
5159 /* In 32-bit mode save a register for the split stack. */
5160 if (!TARGET_64BIT
&& local_regparm
== 3 && flag_split_stack
)
5163 /* Each fixed register usage increases register pressure,
5164 so less registers should be used for argument passing.
5165 This functionality can be overriden by an explicit
5167 for (regno
= 0; regno
<= DI_REG
; regno
++)
5168 if (fixed_regs
[regno
])
5172 = globals
< local_regparm
? local_regparm
- globals
: 0;
5174 if (local_regparm
> regparm
)
5175 regparm
= local_regparm
;
5182 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5183 DFmode (2) arguments in SSE registers for a function with the
5184 indicated TYPE and DECL. DECL may be NULL when calling function
5185 indirectly or considering a libcall. Otherwise return 0. */
5188 ix86_function_sseregparm (const_tree type
, const_tree decl
, bool warn
)
5190 gcc_assert (!TARGET_64BIT
);
5192 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5193 by the sseregparm attribute. */
5194 if (TARGET_SSEREGPARM
5195 || (type
&& lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type
))))
5202 error ("calling %qD with attribute sseregparm without "
5203 "SSE/SSE2 enabled", decl
);
5205 error ("calling %qT with attribute sseregparm without "
5206 "SSE/SSE2 enabled", type
);
5214 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5215 (and DFmode for SSE2) arguments in SSE registers. */
5216 if (decl
&& TARGET_SSE_MATH
&& optimize
5217 && !(profile_flag
&& !flag_fentry
))
5219 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5220 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE(decl
));
5221 if (i
&& i
->local
&& i
->can_change_signature
)
5222 return TARGET_SSE2
? 2 : 1;
5228 /* Return true if EAX is live at the start of the function. Used by
5229 ix86_expand_prologue to determine if we need special help before
5230 calling allocate_stack_worker. */
5233 ix86_eax_live_at_start_p (void)
5235 /* Cheat. Don't bother working forward from ix86_function_regparm
5236 to the function type to whether an actual argument is located in
5237 eax. Instead just look at cfg info, which is still close enough
5238 to correct at this point. This gives false positives for broken
5239 functions that might use uninitialized data that happens to be
5240 allocated in eax, but who cares? */
5241 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR
), 0);
5245 ix86_keep_aggregate_return_pointer (tree fntype
)
5251 attr
= lookup_attribute ("callee_pop_aggregate_return",
5252 TYPE_ATTRIBUTES (fntype
));
5254 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
))) == 0);
5256 /* For 32-bit MS-ABI the default is to keep aggregate
5258 if (ix86_function_type_abi (fntype
) == MS_ABI
)
5261 return KEEP_AGGREGATE_RETURN_POINTER
!= 0;
5264 /* Value is the number of bytes of arguments automatically
5265 popped when returning from a subroutine call.
5266 FUNDECL is the declaration node of the function (as a tree),
5267 FUNTYPE is the data type of the function (as a tree),
5268 or for a library call it is an identifier node for the subroutine name.
5269 SIZE is the number of bytes of arguments passed on the stack.
5271 On the 80386, the RTD insn may be used to pop them if the number
5272 of args is fixed, but if the number is variable then the caller
5273 must pop them all. RTD can't be used for library calls now
5274 because the library is compiled with the Unix compiler.
5275 Use of RTD is a selectable option, since it is incompatible with
5276 standard Unix calling sequences. If the option is not selected,
5277 the caller must always pop the args.
5279 The attribute stdcall is equivalent to RTD on a per module basis. */
5282 ix86_return_pops_args (tree fundecl
, tree funtype
, int size
)
5286 /* None of the 64-bit ABIs pop arguments. */
5290 ccvt
= ix86_get_callcvt (funtype
);
5292 if ((ccvt
& (IX86_CALLCVT_STDCALL
| IX86_CALLCVT_FASTCALL
5293 | IX86_CALLCVT_THISCALL
)) != 0
5294 && ! stdarg_p (funtype
))
5297 /* Lose any fake structure return argument if it is passed on the stack. */
5298 if (aggregate_value_p (TREE_TYPE (funtype
), fundecl
)
5299 && !ix86_keep_aggregate_return_pointer (funtype
))
5301 int nregs
= ix86_function_regparm (funtype
, fundecl
);
5303 return GET_MODE_SIZE (Pmode
);
5309 /* Argument support functions. */
5311 /* Return true when register may be used to pass function parameters. */
5313 ix86_function_arg_regno_p (int regno
)
5316 const int *parm_regs
;
5321 return (regno
< REGPARM_MAX
5322 || (TARGET_SSE
&& SSE_REGNO_P (regno
) && !fixed_regs
[regno
]));
5324 return (regno
< REGPARM_MAX
5325 || (TARGET_MMX
&& MMX_REGNO_P (regno
)
5326 && (regno
< FIRST_MMX_REG
+ MMX_REGPARM_MAX
))
5327 || (TARGET_SSE
&& SSE_REGNO_P (regno
)
5328 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
)));
5333 if (SSE_REGNO_P (regno
) && TARGET_SSE
)
5338 if (TARGET_SSE
&& SSE_REGNO_P (regno
)
5339 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
))
5343 /* TODO: The function should depend on current function ABI but
5344 builtins.c would need updating then. Therefore we use the
5347 /* RAX is used as hidden argument to va_arg functions. */
5348 if (ix86_abi
== SYSV_ABI
&& regno
== AX_REG
)
5351 if (ix86_abi
== MS_ABI
)
5352 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
5354 parm_regs
= x86_64_int_parameter_registers
;
5355 for (i
= 0; i
< (ix86_abi
== MS_ABI
5356 ? X86_64_MS_REGPARM_MAX
: X86_64_REGPARM_MAX
); i
++)
5357 if (regno
== parm_regs
[i
])
5362 /* Return if we do not know how to pass TYPE solely in registers. */
5365 ix86_must_pass_in_stack (enum machine_mode mode
, const_tree type
)
5367 if (must_pass_in_stack_var_size_or_pad (mode
, type
))
5370 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5371 The layout_type routine is crafty and tries to trick us into passing
5372 currently unsupported vector types on the stack by using TImode. */
5373 return (!TARGET_64BIT
&& mode
== TImode
5374 && type
&& TREE_CODE (type
) != VECTOR_TYPE
);
5377 /* It returns the size, in bytes, of the area reserved for arguments passed
5378 in registers for the function represented by fndecl dependent to the used
5381 ix86_reg_parm_stack_space (const_tree fndecl
)
5383 enum calling_abi call_abi
= SYSV_ABI
;
5384 if (fndecl
!= NULL_TREE
&& TREE_CODE (fndecl
) == FUNCTION_DECL
)
5385 call_abi
= ix86_function_abi (fndecl
);
5387 call_abi
= ix86_function_type_abi (fndecl
);
5388 if (TARGET_64BIT
&& call_abi
== MS_ABI
)
5393 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5396 ix86_function_type_abi (const_tree fntype
)
5398 if (fntype
!= NULL_TREE
&& TYPE_ATTRIBUTES (fntype
) != NULL_TREE
)
5400 enum calling_abi abi
= ix86_abi
;
5401 if (abi
== SYSV_ABI
)
5403 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype
)))
5406 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype
)))
5414 ix86_function_ms_hook_prologue (const_tree fn
)
5416 if (fn
&& lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn
)))
5418 if (decl_function_context (fn
) != NULL_TREE
)
5419 error_at (DECL_SOURCE_LOCATION (fn
),
5420 "ms_hook_prologue is not compatible with nested function");
5427 static enum calling_abi
5428 ix86_function_abi (const_tree fndecl
)
5432 return ix86_function_type_abi (TREE_TYPE (fndecl
));
5435 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
5438 ix86_cfun_abi (void)
5442 return cfun
->machine
->call_abi
;
5445 /* Write the extra assembler code needed to declare a function properly. */
5448 ix86_asm_output_function_label (FILE *asm_out_file
, const char *fname
,
5451 bool is_ms_hook
= ix86_function_ms_hook_prologue (decl
);
5455 int i
, filler_count
= (TARGET_64BIT
? 32 : 16);
5456 unsigned int filler_cc
= 0xcccccccc;
5458 for (i
= 0; i
< filler_count
; i
+= 4)
5459 fprintf (asm_out_file
, ASM_LONG
" %#x\n", filler_cc
);
5462 #ifdef SUBTARGET_ASM_UNWIND_INIT
5463 SUBTARGET_ASM_UNWIND_INIT (asm_out_file
);
5466 ASM_OUTPUT_LABEL (asm_out_file
, fname
);
5468 /* Output magic byte marker, if hot-patch attribute is set. */
5473 /* leaq [%rsp + 0], %rsp */
5474 asm_fprintf (asm_out_file
, ASM_BYTE
5475 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
5479 /* movl.s %edi, %edi
5481 movl.s %esp, %ebp */
5482 asm_fprintf (asm_out_file
, ASM_BYTE
5483 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
5489 extern void init_regs (void);
5491 /* Implementation of call abi switching target hook. Specific to FNDECL
5492 the specific call register sets are set. See also
5493 ix86_conditional_register_usage for more details. */
5495 ix86_call_abi_override (const_tree fndecl
)
5497 if (fndecl
== NULL_TREE
)
5498 cfun
->machine
->call_abi
= ix86_abi
;
5500 cfun
->machine
->call_abi
= ix86_function_type_abi (TREE_TYPE (fndecl
));
5503 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
5504 expensive re-initialization of init_regs each time we switch function context
5505 since this is needed only during RTL expansion. */
5507 ix86_maybe_switch_abi (void)
5510 call_used_regs
[SI_REG
] == (cfun
->machine
->call_abi
== MS_ABI
))
5514 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5515 for a call to a function whose data type is FNTYPE.
5516 For a library call, FNTYPE is 0. */
5519 init_cumulative_args (CUMULATIVE_ARGS
*cum
, /* Argument info to initialize */
5520 tree fntype
, /* tree ptr for function decl */
5521 rtx libname
, /* SYMBOL_REF of library name or 0 */
5525 struct cgraph_local_info
*i
;
5528 memset (cum
, 0, sizeof (*cum
));
5530 /* Initialize for the current callee. */
5533 cfun
->machine
->callee_pass_avx256_p
= false;
5534 cfun
->machine
->callee_return_avx256_p
= false;
5539 i
= cgraph_local_info (fndecl
);
5540 cum
->call_abi
= ix86_function_abi (fndecl
);
5541 fnret_type
= TREE_TYPE (TREE_TYPE (fndecl
));
5546 cum
->call_abi
= ix86_function_type_abi (fntype
);
5548 fnret_type
= TREE_TYPE (fntype
);
5553 if (TARGET_VZEROUPPER
&& fnret_type
)
5555 rtx fnret_value
= ix86_function_value (fnret_type
, fntype
,
5557 if (function_pass_avx256_p (fnret_value
))
5559 /* The return value of this function uses 256bit AVX modes. */
5561 cfun
->machine
->callee_return_avx256_p
= true;
5563 cfun
->machine
->caller_return_avx256_p
= true;
5567 cum
->caller
= caller
;
5569 /* Set up the number of registers to use for passing arguments. */
5571 if (TARGET_64BIT
&& cum
->call_abi
== MS_ABI
&& !ACCUMULATE_OUTGOING_ARGS
)
5572 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
5573 "or subtarget optimization implying it");
5574 cum
->nregs
= ix86_regparm
;
5577 cum
->nregs
= (cum
->call_abi
== SYSV_ABI
5578 ? X86_64_REGPARM_MAX
5579 : X86_64_MS_REGPARM_MAX
);
5583 cum
->sse_nregs
= SSE_REGPARM_MAX
;
5586 cum
->sse_nregs
= (cum
->call_abi
== SYSV_ABI
5587 ? X86_64_SSE_REGPARM_MAX
5588 : X86_64_MS_SSE_REGPARM_MAX
);
5592 cum
->mmx_nregs
= MMX_REGPARM_MAX
;
5593 cum
->warn_avx
= true;
5594 cum
->warn_sse
= true;
5595 cum
->warn_mmx
= true;
5597 /* Because type might mismatch in between caller and callee, we need to
5598 use actual type of function for local calls.
5599 FIXME: cgraph_analyze can be told to actually record if function uses
5600 va_start so for local functions maybe_vaarg can be made aggressive
5602 FIXME: once typesytem is fixed, we won't need this code anymore. */
5603 if (i
&& i
->local
&& i
->can_change_signature
)
5604 fntype
= TREE_TYPE (fndecl
);
5605 cum
->maybe_vaarg
= (fntype
5606 ? (!prototype_p (fntype
) || stdarg_p (fntype
))
5611 /* If there are variable arguments, then we won't pass anything
5612 in registers in 32-bit mode. */
5613 if (stdarg_p (fntype
))
5624 /* Use ecx and edx registers if function has fastcall attribute,
5625 else look for regparm information. */
5628 unsigned int ccvt
= ix86_get_callcvt (fntype
);
5629 if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
5632 cum
->fastcall
= 1; /* Same first register as in fastcall. */
5634 else if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
5640 cum
->nregs
= ix86_function_regparm (fntype
, fndecl
);
5643 /* Set up the number of SSE registers used for passing SFmode
5644 and DFmode arguments. Warn for mismatching ABI. */
5645 cum
->float_in_sse
= ix86_function_sseregparm (fntype
, fndecl
, true);
5649 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
5650 But in the case of vector types, it is some vector mode.
5652 When we have only some of our vector isa extensions enabled, then there
5653 are some modes for which vector_mode_supported_p is false. For these
5654 modes, the generic vector support in gcc will choose some non-vector mode
5655 in order to implement the type. By computing the natural mode, we'll
5656 select the proper ABI location for the operand and not depend on whatever
5657 the middle-end decides to do with these vector types.
5659 The midde-end can't deal with the vector types > 16 bytes. In this
5660 case, we return the original mode and warn ABI change if CUM isn't
5663 static enum machine_mode
5664 type_natural_mode (const_tree type
, const CUMULATIVE_ARGS
*cum
)
5666 enum machine_mode mode
= TYPE_MODE (type
);
5668 if (TREE_CODE (type
) == VECTOR_TYPE
&& !VECTOR_MODE_P (mode
))
5670 HOST_WIDE_INT size
= int_size_in_bytes (type
);
5671 if ((size
== 8 || size
== 16 || size
== 32)
5672 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
5673 && TYPE_VECTOR_SUBPARTS (type
) > 1)
5675 enum machine_mode innermode
= TYPE_MODE (TREE_TYPE (type
));
5677 if (TREE_CODE (TREE_TYPE (type
)) == REAL_TYPE
)
5678 mode
= MIN_MODE_VECTOR_FLOAT
;
5680 mode
= MIN_MODE_VECTOR_INT
;
5682 /* Get the mode which has this inner mode and number of units. */
5683 for (; mode
!= VOIDmode
; mode
= GET_MODE_WIDER_MODE (mode
))
5684 if (GET_MODE_NUNITS (mode
) == TYPE_VECTOR_SUBPARTS (type
)
5685 && GET_MODE_INNER (mode
) == innermode
)
5687 if (size
== 32 && !TARGET_AVX
)
5689 static bool warnedavx
;
5696 warning (0, "AVX vector argument without AVX "
5697 "enabled changes the ABI");
5699 return TYPE_MODE (type
);
5712 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
5713 this may not agree with the mode that the type system has chosen for the
5714 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
5715 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
5718 gen_reg_or_parallel (enum machine_mode mode
, enum machine_mode orig_mode
,
5723 if (orig_mode
!= BLKmode
)
5724 tmp
= gen_rtx_REG (orig_mode
, regno
);
5727 tmp
= gen_rtx_REG (mode
, regno
);
5728 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
, const0_rtx
);
5729 tmp
= gen_rtx_PARALLEL (orig_mode
, gen_rtvec (1, tmp
));
5735 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
5736 of this code is to classify each 8bytes of incoming argument by the register
5737 class and assign registers accordingly. */
5739 /* Return the union class of CLASS1 and CLASS2.
5740 See the x86-64 PS ABI for details. */
5742 static enum x86_64_reg_class
5743 merge_classes (enum x86_64_reg_class class1
, enum x86_64_reg_class class2
)
5745 /* Rule #1: If both classes are equal, this is the resulting class. */
5746 if (class1
== class2
)
5749 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
5751 if (class1
== X86_64_NO_CLASS
)
5753 if (class2
== X86_64_NO_CLASS
)
5756 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
5757 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
5758 return X86_64_MEMORY_CLASS
;
5760 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
5761 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
5762 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
5763 return X86_64_INTEGERSI_CLASS
;
5764 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
5765 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
5766 return X86_64_INTEGER_CLASS
;
5768 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
5770 if (class1
== X86_64_X87_CLASS
5771 || class1
== X86_64_X87UP_CLASS
5772 || class1
== X86_64_COMPLEX_X87_CLASS
5773 || class2
== X86_64_X87_CLASS
5774 || class2
== X86_64_X87UP_CLASS
5775 || class2
== X86_64_COMPLEX_X87_CLASS
)
5776 return X86_64_MEMORY_CLASS
;
5778 /* Rule #6: Otherwise class SSE is used. */
5779 return X86_64_SSE_CLASS
;
5782 /* Classify the argument of type TYPE and mode MODE.
5783 CLASSES will be filled by the register class used to pass each word
5784 of the operand. The number of words is returned. In case the parameter
5785 should be passed in memory, 0 is returned. As a special case for zero
5786 sized containers, classes[0] will be NO_CLASS and 1 is returned.
5788 BIT_OFFSET is used internally for handling records and specifies offset
5789 of the offset in bits modulo 256 to avoid overflow cases.
5791 See the x86-64 PS ABI for details.
5795 classify_argument (enum machine_mode mode
, const_tree type
,
5796 enum x86_64_reg_class classes
[MAX_CLASSES
], int bit_offset
)
5798 HOST_WIDE_INT bytes
=
5799 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
5800 int words
= (bytes
+ (bit_offset
% 64) / 8 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
5802 /* Variable sized entities are always passed/returned in memory. */
5806 if (mode
!= VOIDmode
5807 && targetm
.calls
.must_pass_in_stack (mode
, type
))
5810 if (type
&& AGGREGATE_TYPE_P (type
))
5814 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
5816 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
5820 for (i
= 0; i
< words
; i
++)
5821 classes
[i
] = X86_64_NO_CLASS
;
5823 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
5824 signalize memory class, so handle it as special case. */
5827 classes
[0] = X86_64_NO_CLASS
;
5831 /* Classify each field of record and merge classes. */
5832 switch (TREE_CODE (type
))
5835 /* And now merge the fields of structure. */
5836 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5838 if (TREE_CODE (field
) == FIELD_DECL
)
5842 if (TREE_TYPE (field
) == error_mark_node
)
5845 /* Bitfields are always classified as integer. Handle them
5846 early, since later code would consider them to be
5847 misaligned integers. */
5848 if (DECL_BIT_FIELD (field
))
5850 for (i
= (int_bit_position (field
) + (bit_offset
% 64)) / 8 / 8;
5851 i
< ((int_bit_position (field
) + (bit_offset
% 64))
5852 + tree_low_cst (DECL_SIZE (field
), 0)
5855 merge_classes (X86_64_INTEGER_CLASS
,
5862 type
= TREE_TYPE (field
);
5864 /* Flexible array member is ignored. */
5865 if (TYPE_MODE (type
) == BLKmode
5866 && TREE_CODE (type
) == ARRAY_TYPE
5867 && TYPE_SIZE (type
) == NULL_TREE
5868 && TYPE_DOMAIN (type
) != NULL_TREE
5869 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type
))
5874 if (!warned
&& warn_psabi
)
5877 inform (input_location
,
5878 "the ABI of passing struct with"
5879 " a flexible array member has"
5880 " changed in GCC 4.4");
5884 num
= classify_argument (TYPE_MODE (type
), type
,
5886 (int_bit_position (field
)
5887 + bit_offset
) % 256);
5890 pos
= (int_bit_position (field
) + (bit_offset
% 64)) / 8 / 8;
5891 for (i
= 0; i
< num
&& (i
+ pos
) < words
; i
++)
5893 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
5900 /* Arrays are handled as small records. */
5903 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
5904 TREE_TYPE (type
), subclasses
, bit_offset
);
5908 /* The partial classes are now full classes. */
5909 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
5910 subclasses
[0] = X86_64_SSE_CLASS
;
5911 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
5912 && !((bit_offset
% 64) == 0 && bytes
== 4))
5913 subclasses
[0] = X86_64_INTEGER_CLASS
;
5915 for (i
= 0; i
< words
; i
++)
5916 classes
[i
] = subclasses
[i
% num
];
5921 case QUAL_UNION_TYPE
:
5922 /* Unions are similar to RECORD_TYPE but offset is always 0.
5924 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5926 if (TREE_CODE (field
) == FIELD_DECL
)
5930 if (TREE_TYPE (field
) == error_mark_node
)
5933 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
5934 TREE_TYPE (field
), subclasses
,
5938 for (i
= 0; i
< num
; i
++)
5939 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
5950 /* When size > 16 bytes, if the first one isn't
5951 X86_64_SSE_CLASS or any other ones aren't
5952 X86_64_SSEUP_CLASS, everything should be passed in
5954 if (classes
[0] != X86_64_SSE_CLASS
)
5957 for (i
= 1; i
< words
; i
++)
5958 if (classes
[i
] != X86_64_SSEUP_CLASS
)
5962 /* Final merger cleanup. */
5963 for (i
= 0; i
< words
; i
++)
5965 /* If one class is MEMORY, everything should be passed in
5967 if (classes
[i
] == X86_64_MEMORY_CLASS
)
5970 /* The X86_64_SSEUP_CLASS should be always preceded by
5971 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
5972 if (classes
[i
] == X86_64_SSEUP_CLASS
5973 && classes
[i
- 1] != X86_64_SSE_CLASS
5974 && classes
[i
- 1] != X86_64_SSEUP_CLASS
)
5976 /* The first one should never be X86_64_SSEUP_CLASS. */
5977 gcc_assert (i
!= 0);
5978 classes
[i
] = X86_64_SSE_CLASS
;
5981 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
5982 everything should be passed in memory. */
5983 if (classes
[i
] == X86_64_X87UP_CLASS
5984 && (classes
[i
- 1] != X86_64_X87_CLASS
))
5988 /* The first one should never be X86_64_X87UP_CLASS. */
5989 gcc_assert (i
!= 0);
5990 if (!warned
&& warn_psabi
)
5993 inform (input_location
,
5994 "the ABI of passing union with long double"
5995 " has changed in GCC 4.4");
6003 /* Compute alignment needed. We align all types to natural boundaries with
6004 exception of XFmode that is aligned to 64bits. */
6005 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
6007 int mode_alignment
= GET_MODE_BITSIZE (mode
);
6010 mode_alignment
= 128;
6011 else if (mode
== XCmode
)
6012 mode_alignment
= 256;
6013 if (COMPLEX_MODE_P (mode
))
6014 mode_alignment
/= 2;
6015 /* Misaligned fields are always returned in memory. */
6016 if (bit_offset
% mode_alignment
)
6020 /* for V1xx modes, just use the base mode */
6021 if (VECTOR_MODE_P (mode
) && mode
!= V1DImode
&& mode
!= V1TImode
6022 && GET_MODE_SIZE (GET_MODE_INNER (mode
)) == bytes
)
6023 mode
= GET_MODE_INNER (mode
);
6025 /* Classification of atomic types. */
6030 classes
[0] = X86_64_SSE_CLASS
;
6033 classes
[0] = X86_64_SSE_CLASS
;
6034 classes
[1] = X86_64_SSEUP_CLASS
;
6044 int size
= (bit_offset
% 64)+ (int) GET_MODE_BITSIZE (mode
);
6048 classes
[0] = X86_64_INTEGERSI_CLASS
;
6051 else if (size
<= 64)
6053 classes
[0] = X86_64_INTEGER_CLASS
;
6056 else if (size
<= 64+32)
6058 classes
[0] = X86_64_INTEGER_CLASS
;
6059 classes
[1] = X86_64_INTEGERSI_CLASS
;
6062 else if (size
<= 64+64)
6064 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
6072 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
6076 /* OImode shouldn't be used directly. */
6081 if (!(bit_offset
% 64))
6082 classes
[0] = X86_64_SSESF_CLASS
;
6084 classes
[0] = X86_64_SSE_CLASS
;
6087 classes
[0] = X86_64_SSEDF_CLASS
;
6090 classes
[0] = X86_64_X87_CLASS
;
6091 classes
[1] = X86_64_X87UP_CLASS
;
6094 classes
[0] = X86_64_SSE_CLASS
;
6095 classes
[1] = X86_64_SSEUP_CLASS
;
6098 classes
[0] = X86_64_SSE_CLASS
;
6099 if (!(bit_offset
% 64))
6105 if (!warned
&& warn_psabi
)
6108 inform (input_location
,
6109 "the ABI of passing structure with complex float"
6110 " member has changed in GCC 4.4");
6112 classes
[1] = X86_64_SSESF_CLASS
;
6116 classes
[0] = X86_64_SSEDF_CLASS
;
6117 classes
[1] = X86_64_SSEDF_CLASS
;
6120 classes
[0] = X86_64_COMPLEX_X87_CLASS
;
6123 /* This modes is larger than 16 bytes. */
6131 classes
[0] = X86_64_SSE_CLASS
;
6132 classes
[1] = X86_64_SSEUP_CLASS
;
6133 classes
[2] = X86_64_SSEUP_CLASS
;
6134 classes
[3] = X86_64_SSEUP_CLASS
;
6142 classes
[0] = X86_64_SSE_CLASS
;
6143 classes
[1] = X86_64_SSEUP_CLASS
;
6151 classes
[0] = X86_64_SSE_CLASS
;
6157 gcc_assert (VECTOR_MODE_P (mode
));
6162 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode
)) == MODE_INT
);
6164 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
6165 classes
[0] = X86_64_INTEGERSI_CLASS
;
6167 classes
[0] = X86_64_INTEGER_CLASS
;
6168 classes
[1] = X86_64_INTEGER_CLASS
;
6169 return 1 + (bytes
> 8);
6173 /* Examine the argument and return set number of register required in each
6174 class. Return 0 iff parameter should be passed in memory. */
6176 examine_argument (enum machine_mode mode
, const_tree type
, int in_return
,
6177 int *int_nregs
, int *sse_nregs
)
6179 enum x86_64_reg_class regclass
[MAX_CLASSES
];
6180 int n
= classify_argument (mode
, type
, regclass
, 0);
6186 for (n
--; n
>= 0; n
--)
6187 switch (regclass
[n
])
6189 case X86_64_INTEGER_CLASS
:
6190 case X86_64_INTEGERSI_CLASS
:
6193 case X86_64_SSE_CLASS
:
6194 case X86_64_SSESF_CLASS
:
6195 case X86_64_SSEDF_CLASS
:
6198 case X86_64_NO_CLASS
:
6199 case X86_64_SSEUP_CLASS
:
6201 case X86_64_X87_CLASS
:
6202 case X86_64_X87UP_CLASS
:
6206 case X86_64_COMPLEX_X87_CLASS
:
6207 return in_return
? 2 : 0;
6208 case X86_64_MEMORY_CLASS
:
6214 /* Construct container for the argument used by GCC interface. See
6215 FUNCTION_ARG for the detailed description. */
6218 construct_container (enum machine_mode mode
, enum machine_mode orig_mode
,
6219 const_tree type
, int in_return
, int nintregs
, int nsseregs
,
6220 const int *intreg
, int sse_regno
)
6222 /* The following variables hold the static issued_error state. */
6223 static bool issued_sse_arg_error
;
6224 static bool issued_sse_ret_error
;
6225 static bool issued_x87_ret_error
;
6227 enum machine_mode tmpmode
;
6229 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
6230 enum x86_64_reg_class regclass
[MAX_CLASSES
];
6234 int needed_sseregs
, needed_intregs
;
6235 rtx exp
[MAX_CLASSES
];
6238 n
= classify_argument (mode
, type
, regclass
, 0);
6241 if (!examine_argument (mode
, type
, in_return
, &needed_intregs
,
6244 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
6247 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6248 some less clueful developer tries to use floating-point anyway. */
6249 if (needed_sseregs
&& !TARGET_SSE
)
6253 if (!issued_sse_ret_error
)
6255 error ("SSE register return with SSE disabled");
6256 issued_sse_ret_error
= true;
6259 else if (!issued_sse_arg_error
)
6261 error ("SSE register argument with SSE disabled");
6262 issued_sse_arg_error
= true;
6267 /* Likewise, error if the ABI requires us to return values in the
6268 x87 registers and the user specified -mno-80387. */
6269 if (!TARGET_80387
&& in_return
)
6270 for (i
= 0; i
< n
; i
++)
6271 if (regclass
[i
] == X86_64_X87_CLASS
6272 || regclass
[i
] == X86_64_X87UP_CLASS
6273 || regclass
[i
] == X86_64_COMPLEX_X87_CLASS
)
6275 if (!issued_x87_ret_error
)
6277 error ("x87 register return with x87 disabled");
6278 issued_x87_ret_error
= true;
6283 /* First construct simple cases. Avoid SCmode, since we want to use
6284 single register to pass this type. */
6285 if (n
== 1 && mode
!= SCmode
)
6286 switch (regclass
[0])
6288 case X86_64_INTEGER_CLASS
:
6289 case X86_64_INTEGERSI_CLASS
:
6290 return gen_rtx_REG (mode
, intreg
[0]);
6291 case X86_64_SSE_CLASS
:
6292 case X86_64_SSESF_CLASS
:
6293 case X86_64_SSEDF_CLASS
:
6294 if (mode
!= BLKmode
)
6295 return gen_reg_or_parallel (mode
, orig_mode
,
6296 SSE_REGNO (sse_regno
));
6298 case X86_64_X87_CLASS
:
6299 case X86_64_COMPLEX_X87_CLASS
:
6300 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
6301 case X86_64_NO_CLASS
:
6302 /* Zero sized array, struct or class. */
6307 if (n
== 2 && regclass
[0] == X86_64_SSE_CLASS
6308 && regclass
[1] == X86_64_SSEUP_CLASS
&& mode
!= BLKmode
)
6309 return gen_rtx_REG (mode
, SSE_REGNO (sse_regno
));
6311 && regclass
[0] == X86_64_SSE_CLASS
6312 && regclass
[1] == X86_64_SSEUP_CLASS
6313 && regclass
[2] == X86_64_SSEUP_CLASS
6314 && regclass
[3] == X86_64_SSEUP_CLASS
6316 return gen_rtx_REG (mode
, SSE_REGNO (sse_regno
));
6319 && regclass
[0] == X86_64_X87_CLASS
&& regclass
[1] == X86_64_X87UP_CLASS
)
6320 return gen_rtx_REG (XFmode
, FIRST_STACK_REG
);
6321 if (n
== 2 && regclass
[0] == X86_64_INTEGER_CLASS
6322 && regclass
[1] == X86_64_INTEGER_CLASS
6323 && (mode
== CDImode
|| mode
== TImode
|| mode
== TFmode
)
6324 && intreg
[0] + 1 == intreg
[1])
6325 return gen_rtx_REG (mode
, intreg
[0]);
6327 /* Otherwise figure out the entries of the PARALLEL. */
6328 for (i
= 0; i
< n
; i
++)
6332 switch (regclass
[i
])
6334 case X86_64_NO_CLASS
:
6336 case X86_64_INTEGER_CLASS
:
6337 case X86_64_INTEGERSI_CLASS
:
6338 /* Merge TImodes on aligned occasions here too. */
6339 if (i
* 8 + 8 > bytes
)
6340 tmpmode
= mode_for_size ((bytes
- i
* 8) * BITS_PER_UNIT
, MODE_INT
, 0);
6341 else if (regclass
[i
] == X86_64_INTEGERSI_CLASS
)
6345 /* We've requested 24 bytes we don't have mode for. Use DImode. */
6346 if (tmpmode
== BLKmode
)
6348 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
6349 gen_rtx_REG (tmpmode
, *intreg
),
6353 case X86_64_SSESF_CLASS
:
6354 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
6355 gen_rtx_REG (SFmode
,
6356 SSE_REGNO (sse_regno
)),
6360 case X86_64_SSEDF_CLASS
:
6361 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
6362 gen_rtx_REG (DFmode
,
6363 SSE_REGNO (sse_regno
)),
6367 case X86_64_SSE_CLASS
:
6375 if (i
== 0 && regclass
[1] == X86_64_SSEUP_CLASS
)
6385 && regclass
[1] == X86_64_SSEUP_CLASS
6386 && regclass
[2] == X86_64_SSEUP_CLASS
6387 && regclass
[3] == X86_64_SSEUP_CLASS
);
6394 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
6395 gen_rtx_REG (tmpmode
,
6396 SSE_REGNO (sse_regno
)),
6405 /* Empty aligned struct, union or class. */
6409 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
6410 for (i
= 0; i
< nexps
; i
++)
6411 XVECEXP (ret
, 0, i
) = exp
[i
];
6415 /* Update the data in CUM to advance over an argument of mode MODE
6416 and data type TYPE. (TYPE is null for libcalls where that information
6417 may not be available.) */
6420 function_arg_advance_32 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6421 const_tree type
, HOST_WIDE_INT bytes
,
6422 HOST_WIDE_INT words
)
6438 cum
->words
+= words
;
6439 cum
->nregs
-= words
;
6440 cum
->regno
+= words
;
6442 if (cum
->nregs
<= 0)
6450 /* OImode shouldn't be used directly. */
6454 if (cum
->float_in_sse
< 2)
6457 if (cum
->float_in_sse
< 1)
6474 if (!type
|| !AGGREGATE_TYPE_P (type
))
6476 cum
->sse_words
+= words
;
6477 cum
->sse_nregs
-= 1;
6478 cum
->sse_regno
+= 1;
6479 if (cum
->sse_nregs
<= 0)
6493 if (!type
|| !AGGREGATE_TYPE_P (type
))
6495 cum
->mmx_words
+= words
;
6496 cum
->mmx_nregs
-= 1;
6497 cum
->mmx_regno
+= 1;
6498 if (cum
->mmx_nregs
<= 0)
6509 function_arg_advance_64 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6510 const_tree type
, HOST_WIDE_INT words
, bool named
)
6512 int int_nregs
, sse_nregs
;
6514 /* Unnamed 256bit vector mode parameters are passed on stack. */
6515 if (!named
&& VALID_AVX256_REG_MODE (mode
))
6518 if (examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
)
6519 && sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
6521 cum
->nregs
-= int_nregs
;
6522 cum
->sse_nregs
-= sse_nregs
;
6523 cum
->regno
+= int_nregs
;
6524 cum
->sse_regno
+= sse_nregs
;
6528 int align
= ix86_function_arg_boundary (mode
, type
) / BITS_PER_WORD
;
6529 cum
->words
= (cum
->words
+ align
- 1) & ~(align
- 1);
6530 cum
->words
+= words
;
6535 function_arg_advance_ms_64 (CUMULATIVE_ARGS
*cum
, HOST_WIDE_INT bytes
,
6536 HOST_WIDE_INT words
)
6538 /* Otherwise, this should be passed indirect. */
6539 gcc_assert (bytes
== 1 || bytes
== 2 || bytes
== 4 || bytes
== 8);
6541 cum
->words
+= words
;
6549 /* Update the data in CUM to advance over an argument of mode MODE and
6550 data type TYPE. (TYPE is null for libcalls where that information
6551 may not be available.) */
6554 ix86_function_arg_advance (cumulative_args_t cum_v
, enum machine_mode mode
,
6555 const_tree type
, bool named
)
6557 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
6558 HOST_WIDE_INT bytes
, words
;
6560 if (mode
== BLKmode
)
6561 bytes
= int_size_in_bytes (type
);
6563 bytes
= GET_MODE_SIZE (mode
);
6564 words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
6567 mode
= type_natural_mode (type
, NULL
);
6569 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
6570 function_arg_advance_ms_64 (cum
, bytes
, words
);
6571 else if (TARGET_64BIT
)
6572 function_arg_advance_64 (cum
, mode
, type
, words
, named
);
6574 function_arg_advance_32 (cum
, mode
, type
, bytes
, words
);
6577 /* Define where to put the arguments to a function.
6578 Value is zero to push the argument on the stack,
6579 or a hard register in which to store the argument.
6581 MODE is the argument's machine mode.
6582 TYPE is the data type of the argument (as a tree).
6583 This is null for libcalls where that information may
6585 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6586 the preceding args and about the function being called.
6587 NAMED is nonzero if this argument is a named parameter
6588 (otherwise it is an extra parameter matching an ellipsis). */
6591 function_arg_32 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6592 enum machine_mode orig_mode
, const_tree type
,
6593 HOST_WIDE_INT bytes
, HOST_WIDE_INT words
)
6595 static bool warnedsse
, warnedmmx
;
6597 /* Avoid the AL settings for the Unix64 ABI. */
6598 if (mode
== VOIDmode
)
6614 if (words
<= cum
->nregs
)
6616 int regno
= cum
->regno
;
6618 /* Fastcall allocates the first two DWORD (SImode) or
6619 smaller arguments to ECX and EDX if it isn't an
6625 || (type
&& AGGREGATE_TYPE_P (type
)))
6628 /* ECX not EAX is the first allocated register. */
6629 if (regno
== AX_REG
)
6632 return gen_rtx_REG (mode
, regno
);
6637 if (cum
->float_in_sse
< 2)
6640 if (cum
->float_in_sse
< 1)
6644 /* In 32bit, we pass TImode in xmm registers. */
6651 if (!type
|| !AGGREGATE_TYPE_P (type
))
6653 if (!TARGET_SSE
&& !warnedsse
&& cum
->warn_sse
)
6656 warning (0, "SSE vector argument without SSE enabled "
6660 return gen_reg_or_parallel (mode
, orig_mode
,
6661 cum
->sse_regno
+ FIRST_SSE_REG
);
6666 /* OImode shouldn't be used directly. */
6675 if (!type
|| !AGGREGATE_TYPE_P (type
))
6678 return gen_reg_or_parallel (mode
, orig_mode
,
6679 cum
->sse_regno
+ FIRST_SSE_REG
);
6689 if (!type
|| !AGGREGATE_TYPE_P (type
))
6691 if (!TARGET_MMX
&& !warnedmmx
&& cum
->warn_mmx
)
6694 warning (0, "MMX vector argument without MMX enabled "
6698 return gen_reg_or_parallel (mode
, orig_mode
,
6699 cum
->mmx_regno
+ FIRST_MMX_REG
);
6708 function_arg_64 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6709 enum machine_mode orig_mode
, const_tree type
, bool named
)
6711 /* Handle a hidden AL argument containing number of registers
6712 for varargs x86-64 functions. */
6713 if (mode
== VOIDmode
)
6714 return GEN_INT (cum
->maybe_vaarg
6715 ? (cum
->sse_nregs
< 0
6716 ? X86_64_SSE_REGPARM_MAX
6731 /* Unnamed 256bit vector mode parameters are passed on stack. */
6737 return construct_container (mode
, orig_mode
, type
, 0, cum
->nregs
,
6739 &x86_64_int_parameter_registers
[cum
->regno
],
6744 function_arg_ms_64 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6745 enum machine_mode orig_mode
, bool named
,
6746 HOST_WIDE_INT bytes
)
6750 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
6751 We use value of -2 to specify that current function call is MSABI. */
6752 if (mode
== VOIDmode
)
6753 return GEN_INT (-2);
6755 /* If we've run out of registers, it goes on the stack. */
6756 if (cum
->nregs
== 0)
6759 regno
= x86_64_ms_abi_int_parameter_registers
[cum
->regno
];
6761 /* Only floating point modes are passed in anything but integer regs. */
6762 if (TARGET_SSE
&& (mode
== SFmode
|| mode
== DFmode
))
6765 regno
= cum
->regno
+ FIRST_SSE_REG
;
6770 /* Unnamed floating parameters are passed in both the
6771 SSE and integer registers. */
6772 t1
= gen_rtx_REG (mode
, cum
->regno
+ FIRST_SSE_REG
);
6773 t2
= gen_rtx_REG (mode
, regno
);
6774 t1
= gen_rtx_EXPR_LIST (VOIDmode
, t1
, const0_rtx
);
6775 t2
= gen_rtx_EXPR_LIST (VOIDmode
, t2
, const0_rtx
);
6776 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, t1
, t2
));
6779 /* Handle aggregated types passed in register. */
6780 if (orig_mode
== BLKmode
)
6782 if (bytes
> 0 && bytes
<= 8)
6783 mode
= (bytes
> 4 ? DImode
: SImode
);
6784 if (mode
== BLKmode
)
6788 return gen_reg_or_parallel (mode
, orig_mode
, regno
);
6791 /* Return where to put the arguments to a function.
6792 Return zero to push the argument on the stack, or a hard register in which to store the argument.
6794 MODE is the argument's machine mode. TYPE is the data type of the
6795 argument. It is null for libcalls where that information may not be
6796 available. CUM gives information about the preceding args and about
6797 the function being called. NAMED is nonzero if this argument is a
6798 named parameter (otherwise it is an extra parameter matching an
6802 ix86_function_arg (cumulative_args_t cum_v
, enum machine_mode omode
,
6803 const_tree type
, bool named
)
6805 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
6806 enum machine_mode mode
= omode
;
6807 HOST_WIDE_INT bytes
, words
;
6810 if (mode
== BLKmode
)
6811 bytes
= int_size_in_bytes (type
);
6813 bytes
= GET_MODE_SIZE (mode
);
6814 words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
6816 /* To simplify the code below, represent vector types with a vector mode
6817 even if MMX/SSE are not active. */
6818 if (type
&& TREE_CODE (type
) == VECTOR_TYPE
)
6819 mode
= type_natural_mode (type
, cum
);
6821 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
6822 arg
= function_arg_ms_64 (cum
, mode
, omode
, named
, bytes
);
6823 else if (TARGET_64BIT
)
6824 arg
= function_arg_64 (cum
, mode
, omode
, type
, named
);
6826 arg
= function_arg_32 (cum
, mode
, omode
, type
, bytes
, words
);
6828 if (TARGET_VZEROUPPER
&& function_pass_avx256_p (arg
))
6830 /* This argument uses 256bit AVX modes. */
6832 cfun
->machine
->callee_pass_avx256_p
= true;
6834 cfun
->machine
->caller_pass_avx256_p
= true;
6840 /* A C expression that indicates when an argument must be passed by
6841 reference. If nonzero for an argument, a copy of that argument is
6842 made in memory and a pointer to the argument is passed instead of
6843 the argument itself. The pointer is passed in whatever way is
6844 appropriate for passing a pointer to that type. */
6847 ix86_pass_by_reference (cumulative_args_t cum_v ATTRIBUTE_UNUSED
,
6848 enum machine_mode mode ATTRIBUTE_UNUSED
,
6849 const_tree type
, bool named ATTRIBUTE_UNUSED
)
6851 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
6853 /* See Windows x64 Software Convention. */
6854 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
6856 int msize
= (int) GET_MODE_SIZE (mode
);
6859 /* Arrays are passed by reference. */
6860 if (TREE_CODE (type
) == ARRAY_TYPE
)
6863 if (AGGREGATE_TYPE_P (type
))
6865 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
6866 are passed by reference. */
6867 msize
= int_size_in_bytes (type
);
6871 /* __m128 is passed by reference. */
6873 case 1: case 2: case 4: case 8:
6879 else if (TARGET_64BIT
&& type
&& int_size_in_bytes (type
) == -1)
6885 /* Return true when TYPE should be 128bit aligned for 32bit argument
6886 passing ABI. XXX: This function is obsolete and is only used for
6887 checking psABI compatibility with previous versions of GCC. */
6890 ix86_compat_aligned_value_p (const_tree type
)
6892 enum machine_mode mode
= TYPE_MODE (type
);
6893 if (((TARGET_SSE
&& SSE_REG_MODE_P (mode
))
6897 && (!TYPE_USER_ALIGN (type
) || TYPE_ALIGN (type
) > 128))
6899 if (TYPE_ALIGN (type
) < 128)
6902 if (AGGREGATE_TYPE_P (type
))
6904 /* Walk the aggregates recursively. */
6905 switch (TREE_CODE (type
))
6909 case QUAL_UNION_TYPE
:
6913 /* Walk all the structure fields. */
6914 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6916 if (TREE_CODE (field
) == FIELD_DECL
6917 && ix86_compat_aligned_value_p (TREE_TYPE (field
)))
6924 /* Just for use if some languages passes arrays by value. */
6925 if (ix86_compat_aligned_value_p (TREE_TYPE (type
)))
6936 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
6937 XXX: This function is obsolete and is only used for checking psABI
6938 compatibility with previous versions of GCC. */
6941 ix86_compat_function_arg_boundary (enum machine_mode mode
,
6942 const_tree type
, unsigned int align
)
6944 /* In 32bit, only _Decimal128 and __float128 are aligned to their
6945 natural boundaries. */
6946 if (!TARGET_64BIT
&& mode
!= TDmode
&& mode
!= TFmode
)
6948 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
6949 make an exception for SSE modes since these require 128bit
6952 The handling here differs from field_alignment. ICC aligns MMX
6953 arguments to 4 byte boundaries, while structure fields are aligned
6954 to 8 byte boundaries. */
6957 if (!(TARGET_SSE
&& SSE_REG_MODE_P (mode
)))
6958 align
= PARM_BOUNDARY
;
6962 if (!ix86_compat_aligned_value_p (type
))
6963 align
= PARM_BOUNDARY
;
6966 if (align
> BIGGEST_ALIGNMENT
)
6967 align
= BIGGEST_ALIGNMENT
;
6971 /* Return true when TYPE should be 128bit aligned for 32bit argument
6975 ix86_contains_aligned_value_p (const_tree type
)
6977 enum machine_mode mode
= TYPE_MODE (type
);
6979 if (mode
== XFmode
|| mode
== XCmode
)
6982 if (TYPE_ALIGN (type
) < 128)
6985 if (AGGREGATE_TYPE_P (type
))
6987 /* Walk the aggregates recursively. */
6988 switch (TREE_CODE (type
))
6992 case QUAL_UNION_TYPE
:
6996 /* Walk all the structure fields. */
6997 for (field
= TYPE_FIELDS (type
);
6999 field
= DECL_CHAIN (field
))
7001 if (TREE_CODE (field
) == FIELD_DECL
7002 && ix86_contains_aligned_value_p (TREE_TYPE (field
)))
7009 /* Just for use if some languages passes arrays by value. */
7010 if (ix86_contains_aligned_value_p (TREE_TYPE (type
)))
7019 return TYPE_ALIGN (type
) >= 128;
7024 /* Gives the alignment boundary, in bits, of an argument with the
7025 specified mode and type. */
7028 ix86_function_arg_boundary (enum machine_mode mode
, const_tree type
)
7033 /* Since the main variant type is used for call, we convert it to
7034 the main variant type. */
7035 type
= TYPE_MAIN_VARIANT (type
);
7036 align
= TYPE_ALIGN (type
);
7039 align
= GET_MODE_ALIGNMENT (mode
);
7040 if (align
< PARM_BOUNDARY
)
7041 align
= PARM_BOUNDARY
;
7045 unsigned int saved_align
= align
;
7049 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7052 if (mode
== XFmode
|| mode
== XCmode
)
7053 align
= PARM_BOUNDARY
;
7055 else if (!ix86_contains_aligned_value_p (type
))
7056 align
= PARM_BOUNDARY
;
7059 align
= PARM_BOUNDARY
;
7064 && align
!= ix86_compat_function_arg_boundary (mode
, type
,
7068 inform (input_location
,
7069 "The ABI for passing parameters with %d-byte"
7070 " alignment has changed in GCC 4.6",
7071 align
/ BITS_PER_UNIT
);
7078 /* Return true if N is a possible register number of function value. */
7081 ix86_function_value_regno_p (const unsigned int regno
)
7088 case FIRST_FLOAT_REG
:
7089 /* TODO: The function should depend on current function ABI but
7090 builtins.c would need updating then. Therefore we use the
7092 if (TARGET_64BIT
&& ix86_abi
== MS_ABI
)
7094 return TARGET_FLOAT_RETURNS_IN_80387
;
7100 if (TARGET_MACHO
|| TARGET_64BIT
)
7108 /* Define how to find the value returned by a function.
7109 VALTYPE is the data type of the value (as a tree).
7110 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7111 otherwise, FUNC is 0. */
7114 function_value_32 (enum machine_mode orig_mode
, enum machine_mode mode
,
7115 const_tree fntype
, const_tree fn
)
7119 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7120 we normally prevent this case when mmx is not available. However
7121 some ABIs may require the result to be returned like DImode. */
7122 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
7123 regno
= FIRST_MMX_REG
;
7125 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7126 we prevent this case when sse is not available. However some ABIs
7127 may require the result to be returned like integer TImode. */
7128 else if (mode
== TImode
7129 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
7130 regno
= FIRST_SSE_REG
;
7132 /* 32-byte vector modes in %ymm0. */
7133 else if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 32)
7134 regno
= FIRST_SSE_REG
;
7136 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7137 else if (X87_FLOAT_MODE_P (mode
) && TARGET_FLOAT_RETURNS_IN_80387
)
7138 regno
= FIRST_FLOAT_REG
;
7140 /* Most things go in %eax. */
7143 /* Override FP return register with %xmm0 for local functions when
7144 SSE math is enabled or for functions with sseregparm attribute. */
7145 if ((fn
|| fntype
) && (mode
== SFmode
|| mode
== DFmode
))
7147 int sse_level
= ix86_function_sseregparm (fntype
, fn
, false);
7148 if ((sse_level
>= 1 && mode
== SFmode
)
7149 || (sse_level
== 2 && mode
== DFmode
))
7150 regno
= FIRST_SSE_REG
;
7153 /* OImode shouldn't be used directly. */
7154 gcc_assert (mode
!= OImode
);
7156 return gen_rtx_REG (orig_mode
, regno
);
7160 function_value_64 (enum machine_mode orig_mode
, enum machine_mode mode
,
7165 /* Handle libcalls, which don't provide a type node. */
7166 if (valtype
== NULL
)
7180 regno
= FIRST_SSE_REG
;
7184 regno
= FIRST_FLOAT_REG
;
7192 return gen_rtx_REG (mode
, regno
);
7194 else if (POINTER_TYPE_P (valtype
))
7196 /* Pointers are always returned in Pmode. */
7200 ret
= construct_container (mode
, orig_mode
, valtype
, 1,
7201 X86_64_REGPARM_MAX
, X86_64_SSE_REGPARM_MAX
,
7202 x86_64_int_return_registers
, 0);
7204 /* For zero sized structures, construct_container returns NULL, but we
7205 need to keep rest of compiler happy by returning meaningful value. */
7207 ret
= gen_rtx_REG (orig_mode
, AX_REG
);
7213 function_value_ms_64 (enum machine_mode orig_mode
, enum machine_mode mode
)
7215 unsigned int regno
= AX_REG
;
7219 switch (GET_MODE_SIZE (mode
))
7222 if((SCALAR_INT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
7223 && !COMPLEX_MODE_P (mode
))
7224 regno
= FIRST_SSE_REG
;
7228 if (mode
== SFmode
|| mode
== DFmode
)
7229 regno
= FIRST_SSE_REG
;
7235 return gen_rtx_REG (orig_mode
, regno
);
7239 ix86_function_value_1 (const_tree valtype
, const_tree fntype_or_decl
,
7240 enum machine_mode orig_mode
, enum machine_mode mode
)
7242 const_tree fn
, fntype
;
7245 if (fntype_or_decl
&& DECL_P (fntype_or_decl
))
7246 fn
= fntype_or_decl
;
7247 fntype
= fn
? TREE_TYPE (fn
) : fntype_or_decl
;
7249 if (TARGET_64BIT
&& ix86_function_type_abi (fntype
) == MS_ABI
)
7250 return function_value_ms_64 (orig_mode
, mode
);
7251 else if (TARGET_64BIT
)
7252 return function_value_64 (orig_mode
, mode
, valtype
);
7254 return function_value_32 (orig_mode
, mode
, fntype
, fn
);
7258 ix86_function_value (const_tree valtype
, const_tree fntype_or_decl
,
7259 bool outgoing ATTRIBUTE_UNUSED
)
7261 enum machine_mode mode
, orig_mode
;
7263 orig_mode
= TYPE_MODE (valtype
);
7264 mode
= type_natural_mode (valtype
, NULL
);
7265 return ix86_function_value_1 (valtype
, fntype_or_decl
, orig_mode
, mode
);
7268 /* Pointer function arguments and return values are promoted to Pmode. */
7270 static enum machine_mode
7271 ix86_promote_function_mode (const_tree type
, enum machine_mode mode
,
7272 int *punsignedp
, const_tree fntype
,
7275 if (type
!= NULL_TREE
&& POINTER_TYPE_P (type
))
7277 *punsignedp
= POINTERS_EXTEND_UNSIGNED
;
7280 return default_promote_function_mode (type
, mode
, punsignedp
, fntype
,
7285 ix86_libcall_value (enum machine_mode mode
)
7287 return ix86_function_value_1 (NULL
, NULL
, mode
, mode
);
7290 /* Return true iff type is returned in memory. */
7292 static bool ATTRIBUTE_UNUSED
7293 return_in_memory_32 (const_tree type
, enum machine_mode mode
)
7297 if (mode
== BLKmode
)
7300 size
= int_size_in_bytes (type
);
7302 if (MS_AGGREGATE_RETURN
&& AGGREGATE_TYPE_P (type
) && size
<= 8)
7305 if (VECTOR_MODE_P (mode
) || mode
== TImode
)
7307 /* User-created vectors small enough to fit in EAX. */
7311 /* MMX/3dNow values are returned in MM0,
7312 except when it doesn't exits or the ABI prescribes otherwise. */
7314 return !TARGET_MMX
|| TARGET_VECT8_RETURNS
;
7316 /* SSE values are returned in XMM0, except when it doesn't exist. */
7320 /* AVX values are returned in YMM0, except when it doesn't exist. */
7331 /* OImode shouldn't be used directly. */
7332 gcc_assert (mode
!= OImode
);
7337 static bool ATTRIBUTE_UNUSED
7338 return_in_memory_64 (const_tree type
, enum machine_mode mode
)
7340 int needed_intregs
, needed_sseregs
;
7341 return !examine_argument (mode
, type
, 1, &needed_intregs
, &needed_sseregs
);
7344 static bool ATTRIBUTE_UNUSED
7345 return_in_memory_ms_64 (const_tree type
, enum machine_mode mode
)
7347 HOST_WIDE_INT size
= int_size_in_bytes (type
);
7349 /* __m128 is returned in xmm0. */
7350 if ((SCALAR_INT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
7351 && !COMPLEX_MODE_P (mode
) && (GET_MODE_SIZE (mode
) == 16 || size
== 16))
7354 /* Otherwise, the size must be exactly in [1248]. */
7355 return size
!= 1 && size
!= 2 && size
!= 4 && size
!= 8;
7359 ix86_return_in_memory (const_tree type
, const_tree fntype ATTRIBUTE_UNUSED
)
7361 #ifdef SUBTARGET_RETURN_IN_MEMORY
7362 return SUBTARGET_RETURN_IN_MEMORY (type
, fntype
);
7364 const enum machine_mode mode
= type_natural_mode (type
, NULL
);
7368 if (ix86_function_type_abi (fntype
) == MS_ABI
)
7369 return return_in_memory_ms_64 (type
, mode
);
7371 return return_in_memory_64 (type
, mode
);
7374 return return_in_memory_32 (type
, mode
);
7378 /* When returning SSE vector types, we have a choice of either
7379 (1) being abi incompatible with a -march switch, or
7380 (2) generating an error.
7381 Given no good solution, I think the safest thing is one warning.
7382 The user won't be able to use -Werror, but....
7384 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
7385 called in response to actually generating a caller or callee that
7386 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
7387 via aggregate_value_p for general type probing from tree-ssa. */
7390 ix86_struct_value_rtx (tree type
, int incoming ATTRIBUTE_UNUSED
)
7392 static bool warnedsse
, warnedmmx
;
7394 if (!TARGET_64BIT
&& type
)
7396 /* Look at the return type of the function, not the function type. */
7397 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (type
));
7399 if (!TARGET_SSE
&& !warnedsse
)
7402 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
7405 warning (0, "SSE vector return without SSE enabled "
7410 if (!TARGET_MMX
&& !warnedmmx
)
7412 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
7415 warning (0, "MMX vector return without MMX enabled "
7425 /* Create the va_list data type. */
7427 /* Returns the calling convention specific va_list date type.
7428 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
7431 ix86_build_builtin_va_list_abi (enum calling_abi abi
)
7433 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
7435 /* For i386 we use plain pointer to argument area. */
7436 if (!TARGET_64BIT
|| abi
== MS_ABI
)
7437 return build_pointer_type (char_type_node
);
7439 record
= lang_hooks
.types
.make_type (RECORD_TYPE
);
7440 type_decl
= build_decl (BUILTINS_LOCATION
,
7441 TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
7443 f_gpr
= build_decl (BUILTINS_LOCATION
,
7444 FIELD_DECL
, get_identifier ("gp_offset"),
7445 unsigned_type_node
);
7446 f_fpr
= build_decl (BUILTINS_LOCATION
,
7447 FIELD_DECL
, get_identifier ("fp_offset"),
7448 unsigned_type_node
);
7449 f_ovf
= build_decl (BUILTINS_LOCATION
,
7450 FIELD_DECL
, get_identifier ("overflow_arg_area"),
7452 f_sav
= build_decl (BUILTINS_LOCATION
,
7453 FIELD_DECL
, get_identifier ("reg_save_area"),
7456 va_list_gpr_counter_field
= f_gpr
;
7457 va_list_fpr_counter_field
= f_fpr
;
7459 DECL_FIELD_CONTEXT (f_gpr
) = record
;
7460 DECL_FIELD_CONTEXT (f_fpr
) = record
;
7461 DECL_FIELD_CONTEXT (f_ovf
) = record
;
7462 DECL_FIELD_CONTEXT (f_sav
) = record
;
7464 TYPE_STUB_DECL (record
) = type_decl
;
7465 TYPE_NAME (record
) = type_decl
;
7466 TYPE_FIELDS (record
) = f_gpr
;
7467 DECL_CHAIN (f_gpr
) = f_fpr
;
7468 DECL_CHAIN (f_fpr
) = f_ovf
;
7469 DECL_CHAIN (f_ovf
) = f_sav
;
7471 layout_type (record
);
7473 /* The correct type is an array type of one element. */
7474 return build_array_type (record
, build_index_type (size_zero_node
));
7477 /* Setup the builtin va_list data type and for 64-bit the additional
7478 calling convention specific va_list data types. */
7481 ix86_build_builtin_va_list (void)
7483 tree ret
= ix86_build_builtin_va_list_abi (ix86_abi
);
7485 /* Initialize abi specific va_list builtin types. */
7489 if (ix86_abi
== MS_ABI
)
7491 t
= ix86_build_builtin_va_list_abi (SYSV_ABI
);
7492 if (TREE_CODE (t
) != RECORD_TYPE
)
7493 t
= build_variant_type_copy (t
);
7494 sysv_va_list_type_node
= t
;
7499 if (TREE_CODE (t
) != RECORD_TYPE
)
7500 t
= build_variant_type_copy (t
);
7501 sysv_va_list_type_node
= t
;
7503 if (ix86_abi
!= MS_ABI
)
7505 t
= ix86_build_builtin_va_list_abi (MS_ABI
);
7506 if (TREE_CODE (t
) != RECORD_TYPE
)
7507 t
= build_variant_type_copy (t
);
7508 ms_va_list_type_node
= t
;
7513 if (TREE_CODE (t
) != RECORD_TYPE
)
7514 t
= build_variant_type_copy (t
);
7515 ms_va_list_type_node
= t
;
7522 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
7525 setup_incoming_varargs_64 (CUMULATIVE_ARGS
*cum
)
7531 /* GPR size of varargs save area. */
7532 if (cfun
->va_list_gpr_size
)
7533 ix86_varargs_gpr_size
= X86_64_REGPARM_MAX
* UNITS_PER_WORD
;
7535 ix86_varargs_gpr_size
= 0;
7537 /* FPR size of varargs save area. We don't need it if we don't pass
7538 anything in SSE registers. */
7539 if (TARGET_SSE
&& cfun
->va_list_fpr_size
)
7540 ix86_varargs_fpr_size
= X86_64_SSE_REGPARM_MAX
* 16;
7542 ix86_varargs_fpr_size
= 0;
7544 if (! ix86_varargs_gpr_size
&& ! ix86_varargs_fpr_size
)
7547 save_area
= frame_pointer_rtx
;
7548 set
= get_varargs_alias_set ();
7550 max
= cum
->regno
+ cfun
->va_list_gpr_size
/ UNITS_PER_WORD
;
7551 if (max
> X86_64_REGPARM_MAX
)
7552 max
= X86_64_REGPARM_MAX
;
7554 for (i
= cum
->regno
; i
< max
; i
++)
7556 mem
= gen_rtx_MEM (Pmode
,
7557 plus_constant (save_area
, i
* UNITS_PER_WORD
));
7558 MEM_NOTRAP_P (mem
) = 1;
7559 set_mem_alias_set (mem
, set
);
7560 emit_move_insn (mem
, gen_rtx_REG (Pmode
,
7561 x86_64_int_parameter_registers
[i
]));
7564 if (ix86_varargs_fpr_size
)
7566 enum machine_mode smode
;
7569 /* Now emit code to save SSE registers. The AX parameter contains number
7570 of SSE parameter registers used to call this function, though all we
7571 actually check here is the zero/non-zero status. */
7573 label
= gen_label_rtx ();
7574 test
= gen_rtx_EQ (VOIDmode
, gen_rtx_REG (QImode
, AX_REG
), const0_rtx
);
7575 emit_jump_insn (gen_cbranchqi4 (test
, XEXP (test
, 0), XEXP (test
, 1),
7578 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
7579 we used movdqa (i.e. TImode) instead? Perhaps even better would
7580 be if we could determine the real mode of the data, via a hook
7581 into pass_stdarg. Ignore all that for now. */
7583 if (crtl
->stack_alignment_needed
< GET_MODE_ALIGNMENT (smode
))
7584 crtl
->stack_alignment_needed
= GET_MODE_ALIGNMENT (smode
);
7586 max
= cum
->sse_regno
+ cfun
->va_list_fpr_size
/ 16;
7587 if (max
> X86_64_SSE_REGPARM_MAX
)
7588 max
= X86_64_SSE_REGPARM_MAX
;
7590 for (i
= cum
->sse_regno
; i
< max
; ++i
)
7592 mem
= plus_constant (save_area
, i
* 16 + ix86_varargs_gpr_size
);
7593 mem
= gen_rtx_MEM (smode
, mem
);
7594 MEM_NOTRAP_P (mem
) = 1;
7595 set_mem_alias_set (mem
, set
);
7596 set_mem_align (mem
, GET_MODE_ALIGNMENT (smode
));
7598 emit_move_insn (mem
, gen_rtx_REG (smode
, SSE_REGNO (i
)));
7606 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS
*cum
)
7608 alias_set_type set
= get_varargs_alias_set ();
7611 /* Reset to zero, as there might be a sysv vaarg used
7613 ix86_varargs_gpr_size
= 0;
7614 ix86_varargs_fpr_size
= 0;
7616 for (i
= cum
->regno
; i
< X86_64_MS_REGPARM_MAX
; i
++)
7620 mem
= gen_rtx_MEM (Pmode
,
7621 plus_constant (virtual_incoming_args_rtx
,
7622 i
* UNITS_PER_WORD
));
7623 MEM_NOTRAP_P (mem
) = 1;
7624 set_mem_alias_set (mem
, set
);
7626 reg
= gen_rtx_REG (Pmode
, x86_64_ms_abi_int_parameter_registers
[i
]);
7627 emit_move_insn (mem
, reg
);
7632 ix86_setup_incoming_varargs (cumulative_args_t cum_v
, enum machine_mode mode
,
7633 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
7636 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
7637 CUMULATIVE_ARGS next_cum
;
7640 /* This argument doesn't appear to be used anymore. Which is good,
7641 because the old code here didn't suppress rtl generation. */
7642 gcc_assert (!no_rtl
);
7647 fntype
= TREE_TYPE (current_function_decl
);
7649 /* For varargs, we do not want to skip the dummy va_dcl argument.
7650 For stdargs, we do want to skip the last named argument. */
7652 if (stdarg_p (fntype
))
7653 ix86_function_arg_advance (pack_cumulative_args (&next_cum
), mode
, type
,
7656 if (cum
->call_abi
== MS_ABI
)
7657 setup_incoming_varargs_ms_64 (&next_cum
);
7659 setup_incoming_varargs_64 (&next_cum
);
7662 /* Checks if TYPE is of kind va_list char *. */
7665 is_va_list_char_pointer (tree type
)
7669 /* For 32-bit it is always true. */
7672 canonic
= ix86_canonical_va_list_type (type
);
7673 return (canonic
== ms_va_list_type_node
7674 || (ix86_abi
== MS_ABI
&& canonic
== va_list_type_node
));
7677 /* Implement va_start. */
7680 ix86_va_start (tree valist
, rtx nextarg
)
7682 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
7683 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
7684 tree gpr
, fpr
, ovf
, sav
, t
;
7688 if (flag_split_stack
7689 && cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
7691 unsigned int scratch_regno
;
7693 /* When we are splitting the stack, we can't refer to the stack
7694 arguments using internal_arg_pointer, because they may be on
7695 the old stack. The split stack prologue will arrange to
7696 leave a pointer to the old stack arguments in a scratch
7697 register, which we here copy to a pseudo-register. The split
7698 stack prologue can't set the pseudo-register directly because
7699 it (the prologue) runs before any registers have been saved. */
7701 scratch_regno
= split_stack_prologue_scratch_regno ();
7702 if (scratch_regno
!= INVALID_REGNUM
)
7706 reg
= gen_reg_rtx (Pmode
);
7707 cfun
->machine
->split_stack_varargs_pointer
= reg
;
7710 emit_move_insn (reg
, gen_rtx_REG (Pmode
, scratch_regno
));
7714 push_topmost_sequence ();
7715 emit_insn_after (seq
, entry_of_function ());
7716 pop_topmost_sequence ();
7720 /* Only 64bit target needs something special. */
7721 if (!TARGET_64BIT
|| is_va_list_char_pointer (TREE_TYPE (valist
)))
7723 if (cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
7724 std_expand_builtin_va_start (valist
, nextarg
);
7729 va_r
= expand_expr (valist
, NULL_RTX
, VOIDmode
, EXPAND_WRITE
);
7730 next
= expand_binop (ptr_mode
, add_optab
,
7731 cfun
->machine
->split_stack_varargs_pointer
,
7732 crtl
->args
.arg_offset_rtx
,
7733 NULL_RTX
, 0, OPTAB_LIB_WIDEN
);
7734 convert_move (va_r
, next
, 0);
7739 f_gpr
= TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node
));
7740 f_fpr
= DECL_CHAIN (f_gpr
);
7741 f_ovf
= DECL_CHAIN (f_fpr
);
7742 f_sav
= DECL_CHAIN (f_ovf
);
7744 valist
= build_simple_mem_ref (valist
);
7745 TREE_TYPE (valist
) = TREE_TYPE (sysv_va_list_type_node
);
7746 /* The following should be folded into the MEM_REF offset. */
7747 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), unshare_expr (valist
),
7749 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), unshare_expr (valist
),
7751 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), unshare_expr (valist
),
7753 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), unshare_expr (valist
),
7756 /* Count number of gp and fp argument registers used. */
7757 words
= crtl
->args
.info
.words
;
7758 n_gpr
= crtl
->args
.info
.regno
;
7759 n_fpr
= crtl
->args
.info
.sse_regno
;
7761 if (cfun
->va_list_gpr_size
)
7763 type
= TREE_TYPE (gpr
);
7764 t
= build2 (MODIFY_EXPR
, type
,
7765 gpr
, build_int_cst (type
, n_gpr
* 8));
7766 TREE_SIDE_EFFECTS (t
) = 1;
7767 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7770 if (TARGET_SSE
&& cfun
->va_list_fpr_size
)
7772 type
= TREE_TYPE (fpr
);
7773 t
= build2 (MODIFY_EXPR
, type
, fpr
,
7774 build_int_cst (type
, n_fpr
* 16 + 8*X86_64_REGPARM_MAX
));
7775 TREE_SIDE_EFFECTS (t
) = 1;
7776 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7779 /* Find the overflow area. */
7780 type
= TREE_TYPE (ovf
);
7781 if (cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
7782 ovf_rtx
= crtl
->args
.internal_arg_pointer
;
7784 ovf_rtx
= cfun
->machine
->split_stack_varargs_pointer
;
7785 t
= make_tree (type
, ovf_rtx
);
7787 t
= fold_build_pointer_plus_hwi (t
, words
* UNITS_PER_WORD
);
7788 t
= build2 (MODIFY_EXPR
, type
, ovf
, t
);
7789 TREE_SIDE_EFFECTS (t
) = 1;
7790 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7792 if (ix86_varargs_gpr_size
|| ix86_varargs_fpr_size
)
7794 /* Find the register save area.
7795 Prologue of the function save it right above stack frame. */
7796 type
= TREE_TYPE (sav
);
7797 t
= make_tree (type
, frame_pointer_rtx
);
7798 if (!ix86_varargs_gpr_size
)
7799 t
= fold_build_pointer_plus_hwi (t
, -8 * X86_64_REGPARM_MAX
);
7800 t
= build2 (MODIFY_EXPR
, type
, sav
, t
);
7801 TREE_SIDE_EFFECTS (t
) = 1;
7802 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7806 /* Implement va_arg. */
7809 ix86_gimplify_va_arg (tree valist
, tree type
, gimple_seq
*pre_p
,
7812 static const int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
7813 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
7814 tree gpr
, fpr
, ovf
, sav
, t
;
7816 tree lab_false
, lab_over
= NULL_TREE
;
7821 enum machine_mode nat_mode
;
7822 unsigned int arg_boundary
;
7824 /* Only 64bit target needs something special. */
7825 if (!TARGET_64BIT
|| is_va_list_char_pointer (TREE_TYPE (valist
)))
7826 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
7828 f_gpr
= TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node
));
7829 f_fpr
= DECL_CHAIN (f_gpr
);
7830 f_ovf
= DECL_CHAIN (f_fpr
);
7831 f_sav
= DECL_CHAIN (f_ovf
);
7833 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
),
7834 build_va_arg_indirect_ref (valist
), f_gpr
, NULL_TREE
);
7835 valist
= build_va_arg_indirect_ref (valist
);
7836 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
7837 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
7838 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
7840 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
7842 type
= build_pointer_type (type
);
7843 size
= int_size_in_bytes (type
);
7844 rsize
= (size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
7846 nat_mode
= type_natural_mode (type
, NULL
);
7855 /* Unnamed 256bit vector mode parameters are passed on stack. */
7856 if (!TARGET_64BIT_MS_ABI
)
7863 container
= construct_container (nat_mode
, TYPE_MODE (type
),
7864 type
, 0, X86_64_REGPARM_MAX
,
7865 X86_64_SSE_REGPARM_MAX
, intreg
,
7870 /* Pull the value out of the saved registers. */
7872 addr
= create_tmp_var (ptr_type_node
, "addr");
7876 int needed_intregs
, needed_sseregs
;
7878 tree int_addr
, sse_addr
;
7880 lab_false
= create_artificial_label (UNKNOWN_LOCATION
);
7881 lab_over
= create_artificial_label (UNKNOWN_LOCATION
);
7883 examine_argument (nat_mode
, type
, 0, &needed_intregs
, &needed_sseregs
);
7885 need_temp
= (!REG_P (container
)
7886 && ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
7887 || TYPE_ALIGN (type
) > 128));
7889 /* In case we are passing structure, verify that it is consecutive block
7890 on the register save area. If not we need to do moves. */
7891 if (!need_temp
&& !REG_P (container
))
7893 /* Verify that all registers are strictly consecutive */
7894 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
7898 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
7900 rtx slot
= XVECEXP (container
, 0, i
);
7901 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
7902 || INTVAL (XEXP (slot
, 1)) != i
* 16)
7910 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
7912 rtx slot
= XVECEXP (container
, 0, i
);
7913 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
7914 || INTVAL (XEXP (slot
, 1)) != i
* 8)
7926 int_addr
= create_tmp_var (ptr_type_node
, "int_addr");
7927 sse_addr
= create_tmp_var (ptr_type_node
, "sse_addr");
7930 /* First ensure that we fit completely in registers. */
7933 t
= build_int_cst (TREE_TYPE (gpr
),
7934 (X86_64_REGPARM_MAX
- needed_intregs
+ 1) * 8);
7935 t
= build2 (GE_EXPR
, boolean_type_node
, gpr
, t
);
7936 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
7937 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
7938 gimplify_and_add (t
, pre_p
);
7942 t
= build_int_cst (TREE_TYPE (fpr
),
7943 (X86_64_SSE_REGPARM_MAX
- needed_sseregs
+ 1) * 16
7944 + X86_64_REGPARM_MAX
* 8);
7945 t
= build2 (GE_EXPR
, boolean_type_node
, fpr
, t
);
7946 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
7947 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
7948 gimplify_and_add (t
, pre_p
);
7951 /* Compute index to start of area used for integer regs. */
7954 /* int_addr = gpr + sav; */
7955 t
= fold_build_pointer_plus (sav
, gpr
);
7956 gimplify_assign (int_addr
, t
, pre_p
);
7960 /* sse_addr = fpr + sav; */
7961 t
= fold_build_pointer_plus (sav
, fpr
);
7962 gimplify_assign (sse_addr
, t
, pre_p
);
7966 int i
, prev_size
= 0;
7967 tree temp
= create_tmp_var (type
, "va_arg_tmp");
7970 t
= build1 (ADDR_EXPR
, build_pointer_type (type
), temp
);
7971 gimplify_assign (addr
, t
, pre_p
);
7973 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
7975 rtx slot
= XVECEXP (container
, 0, i
);
7976 rtx reg
= XEXP (slot
, 0);
7977 enum machine_mode mode
= GET_MODE (reg
);
7983 tree dest_addr
, dest
;
7984 int cur_size
= GET_MODE_SIZE (mode
);
7986 gcc_assert (prev_size
<= INTVAL (XEXP (slot
, 1)));
7987 prev_size
= INTVAL (XEXP (slot
, 1));
7988 if (prev_size
+ cur_size
> size
)
7990 cur_size
= size
- prev_size
;
7991 mode
= mode_for_size (cur_size
* BITS_PER_UNIT
, MODE_INT
, 1);
7992 if (mode
== BLKmode
)
7995 piece_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
7996 if (mode
== GET_MODE (reg
))
7997 addr_type
= build_pointer_type (piece_type
);
7999 addr_type
= build_pointer_type_for_mode (piece_type
, ptr_mode
,
8001 daddr_type
= build_pointer_type_for_mode (piece_type
, ptr_mode
,
8004 if (SSE_REGNO_P (REGNO (reg
)))
8006 src_addr
= sse_addr
;
8007 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
8011 src_addr
= int_addr
;
8012 src_offset
= REGNO (reg
) * 8;
8014 src_addr
= fold_convert (addr_type
, src_addr
);
8015 src_addr
= fold_build_pointer_plus_hwi (src_addr
, src_offset
);
8017 dest_addr
= fold_convert (daddr_type
, addr
);
8018 dest_addr
= fold_build_pointer_plus_hwi (dest_addr
, prev_size
);
8019 if (cur_size
== GET_MODE_SIZE (mode
))
8021 src
= build_va_arg_indirect_ref (src_addr
);
8022 dest
= build_va_arg_indirect_ref (dest_addr
);
8024 gimplify_assign (dest
, src
, pre_p
);
8029 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY
),
8030 3, dest_addr
, src_addr
,
8031 size_int (cur_size
));
8032 gimplify_and_add (copy
, pre_p
);
8034 prev_size
+= cur_size
;
8040 t
= build2 (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
8041 build_int_cst (TREE_TYPE (gpr
), needed_intregs
* 8));
8042 gimplify_assign (gpr
, t
, pre_p
);
8047 t
= build2 (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
8048 build_int_cst (TREE_TYPE (fpr
), needed_sseregs
* 16));
8049 gimplify_assign (fpr
, t
, pre_p
);
8052 gimple_seq_add_stmt (pre_p
, gimple_build_goto (lab_over
));
8054 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_false
));
8057 /* ... otherwise out of the overflow area. */
8059 /* When we align parameter on stack for caller, if the parameter
8060 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
8061 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
8062 here with caller. */
8063 arg_boundary
= ix86_function_arg_boundary (VOIDmode
, type
);
8064 if ((unsigned int) arg_boundary
> MAX_SUPPORTED_STACK_ALIGNMENT
)
8065 arg_boundary
= MAX_SUPPORTED_STACK_ALIGNMENT
;
8067 /* Care for on-stack alignment if needed. */
8068 if (arg_boundary
<= 64 || size
== 0)
8072 HOST_WIDE_INT align
= arg_boundary
/ 8;
8073 t
= fold_build_pointer_plus_hwi (ovf
, align
- 1);
8074 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
8075 build_int_cst (TREE_TYPE (t
), -align
));
8078 gimplify_expr (&t
, pre_p
, NULL
, is_gimple_val
, fb_rvalue
);
8079 gimplify_assign (addr
, t
, pre_p
);
8081 t
= fold_build_pointer_plus_hwi (t
, rsize
* UNITS_PER_WORD
);
8082 gimplify_assign (unshare_expr (ovf
), t
, pre_p
);
8085 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_over
));
8087 ptrtype
= build_pointer_type_for_mode (type
, ptr_mode
, true);
8088 addr
= fold_convert (ptrtype
, addr
);
8091 addr
= build_va_arg_indirect_ref (addr
);
8092 return build_va_arg_indirect_ref (addr
);
8095 /* Return true if OPNUM's MEM should be matched
8096 in movabs* patterns. */
8099 ix86_check_movabs (rtx insn
, int opnum
)
8103 set
= PATTERN (insn
);
8104 if (GET_CODE (set
) == PARALLEL
)
8105 set
= XVECEXP (set
, 0, 0);
8106 gcc_assert (GET_CODE (set
) == SET
);
8107 mem
= XEXP (set
, opnum
);
8108 while (GET_CODE (mem
) == SUBREG
)
8109 mem
= SUBREG_REG (mem
);
8110 gcc_assert (MEM_P (mem
));
8111 return volatile_ok
|| !MEM_VOLATILE_P (mem
);
8114 /* Initialize the table of extra 80387 mathematical constants. */
8117 init_ext_80387_constants (void)
8119 static const char * cst
[5] =
8121 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8122 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8123 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8124 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8125 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8129 for (i
= 0; i
< 5; i
++)
8131 real_from_string (&ext_80387_constants_table
[i
], cst
[i
]);
8132 /* Ensure each constant is rounded to XFmode precision. */
8133 real_convert (&ext_80387_constants_table
[i
],
8134 XFmode
, &ext_80387_constants_table
[i
]);
8137 ext_80387_constants_init
= 1;
8140 /* Return non-zero if the constant is something that
8141 can be loaded with a special instruction. */
8144 standard_80387_constant_p (rtx x
)
8146 enum machine_mode mode
= GET_MODE (x
);
8150 if (!(X87_FLOAT_MODE_P (mode
) && (GET_CODE (x
) == CONST_DOUBLE
)))
8153 if (x
== CONST0_RTX (mode
))
8155 if (x
== CONST1_RTX (mode
))
8158 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
8160 /* For XFmode constants, try to find a special 80387 instruction when
8161 optimizing for size or on those CPUs that benefit from them. */
8163 && (optimize_function_for_size_p (cfun
) || TARGET_EXT_80387_CONSTANTS
))
8167 if (! ext_80387_constants_init
)
8168 init_ext_80387_constants ();
8170 for (i
= 0; i
< 5; i
++)
8171 if (real_identical (&r
, &ext_80387_constants_table
[i
]))
8175 /* Load of the constant -0.0 or -1.0 will be split as
8176 fldz;fchs or fld1;fchs sequence. */
8177 if (real_isnegzero (&r
))
8179 if (real_identical (&r
, &dconstm1
))
8185 /* Return the opcode of the special instruction to be used to load
8189 standard_80387_constant_opcode (rtx x
)
8191 switch (standard_80387_constant_p (x
))
8215 /* Return the CONST_DOUBLE representing the 80387 constant that is
8216 loaded by the specified special instruction. The argument IDX
8217 matches the return value from standard_80387_constant_p. */
8220 standard_80387_constant_rtx (int idx
)
8224 if (! ext_80387_constants_init
)
8225 init_ext_80387_constants ();
8241 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table
[i
],
8245 /* Return 1 if X is all 0s and 2 if x is all 1s
8246 in supported SSE/AVX vector mode. */
8249 standard_sse_constant_p (rtx x
)
8251 enum machine_mode mode
= GET_MODE (x
);
8253 if (x
== const0_rtx
|| x
== CONST0_RTX (GET_MODE (x
)))
8255 if (vector_all_ones_operand (x
, mode
))
8277 /* Return the opcode of the special instruction to be used to load
8281 standard_sse_constant_opcode (rtx insn
, rtx x
)
8283 switch (standard_sse_constant_p (x
))
8286 switch (get_attr_mode (insn
))
8289 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
)
8290 return "%vpxor\t%0, %d0";
8292 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
)
8293 return "%vxorpd\t%0, %d0";
8295 return "%vxorps\t%0, %d0";
8298 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
)
8299 return "vpxor\t%x0, %x0, %x0";
8301 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
)
8302 return "vxorpd\t%x0, %x0, %x0";
8304 return "vxorps\t%x0, %x0, %x0";
8312 return "vpcmpeqd\t%0, %0, %0";
8314 return "pcmpeqd\t%0, %0";
8322 /* Returns true if OP contains a symbol reference */
8325 symbolic_reference_mentioned_p (rtx op
)
8330 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
8333 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
8334 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
8340 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
8341 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
8345 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
8352 /* Return true if it is appropriate to emit `ret' instructions in the
8353 body of a function. Do this only if the epilogue is simple, needing a
8354 couple of insns. Prior to reloading, we can't tell how many registers
8355 must be saved, so return false then. Return false if there is no frame
8356 marker to de-allocate. */
8359 ix86_can_use_return_insn_p (void)
8361 struct ix86_frame frame
;
8363 if (! reload_completed
|| frame_pointer_needed
)
8366 /* Don't allow more than 32k pop, since that's all we can do
8367 with one instruction. */
8368 if (crtl
->args
.pops_args
&& crtl
->args
.size
>= 32768)
8371 ix86_compute_frame_layout (&frame
);
8372 return (frame
.stack_pointer_offset
== UNITS_PER_WORD
8373 && (frame
.nregs
+ frame
.nsseregs
) == 0);
8376 /* Value should be nonzero if functions must have frame pointers.
8377 Zero means the frame pointer need not be set up (and parms may
8378 be accessed via the stack pointer) in functions that seem suitable. */
8381 ix86_frame_pointer_required (void)
8383 /* If we accessed previous frames, then the generated code expects
8384 to be able to access the saved ebp value in our frame. */
8385 if (cfun
->machine
->accesses_prev_frame
)
8388 /* Several x86 os'es need a frame pointer for other reasons,
8389 usually pertaining to setjmp. */
8390 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
8393 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
8394 if (TARGET_32BIT_MS_ABI
&& cfun
->calls_setjmp
)
8397 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
8398 turns off the frame pointer by default. Turn it back on now if
8399 we've not got a leaf function. */
8400 if (TARGET_OMIT_LEAF_FRAME_POINTER
8401 && (!current_function_is_leaf
8402 || ix86_current_function_calls_tls_descriptor
))
8405 if (crtl
->profile
&& !flag_fentry
)
8411 /* Record that the current function accesses previous call frames. */
8414 ix86_setup_frame_addresses (void)
8416 cfun
->machine
->accesses_prev_frame
= 1;
8419 #ifndef USE_HIDDEN_LINKONCE
8420 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
8421 # define USE_HIDDEN_LINKONCE 1
8423 # define USE_HIDDEN_LINKONCE 0
8427 static int pic_labels_used
;
8429 /* Fills in the label name that should be used for a pc thunk for
8430 the given register. */
8433 get_pc_thunk_name (char name
[32], unsigned int regno
)
8435 gcc_assert (!TARGET_64BIT
);
8437 if (USE_HIDDEN_LINKONCE
)
8438 sprintf (name
, "__x86.get_pc_thunk.%s", reg_names
[regno
]);
8440 ASM_GENERATE_INTERNAL_LABEL (name
, "LPR", regno
);
8444 /* This function generates code for -fpic that loads %ebx with
8445 the return address of the caller and then returns. */
8448 ix86_code_end (void)
8453 for (regno
= AX_REG
; regno
<= SP_REG
; regno
++)
8458 if (!(pic_labels_used
& (1 << regno
)))
8461 get_pc_thunk_name (name
, regno
);
8463 decl
= build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
8464 get_identifier (name
),
8465 build_function_type_list (void_type_node
, NULL_TREE
));
8466 DECL_RESULT (decl
) = build_decl (BUILTINS_LOCATION
, RESULT_DECL
,
8467 NULL_TREE
, void_type_node
);
8468 TREE_PUBLIC (decl
) = 1;
8469 TREE_STATIC (decl
) = 1;
8474 switch_to_section (darwin_sections
[text_coal_section
]);
8475 fputs ("\t.weak_definition\t", asm_out_file
);
8476 assemble_name (asm_out_file
, name
);
8477 fputs ("\n\t.private_extern\t", asm_out_file
);
8478 assemble_name (asm_out_file
, name
);
8479 putc ('\n', asm_out_file
);
8480 ASM_OUTPUT_LABEL (asm_out_file
, name
);
8481 DECL_WEAK (decl
) = 1;
8485 if (USE_HIDDEN_LINKONCE
)
8487 DECL_COMDAT_GROUP (decl
) = DECL_ASSEMBLER_NAME (decl
);
8489 targetm
.asm_out
.unique_section (decl
, 0);
8490 switch_to_section (get_named_section (decl
, NULL
, 0));
8492 targetm
.asm_out
.globalize_label (asm_out_file
, name
);
8493 fputs ("\t.hidden\t", asm_out_file
);
8494 assemble_name (asm_out_file
, name
);
8495 putc ('\n', asm_out_file
);
8496 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
8500 switch_to_section (text_section
);
8501 ASM_OUTPUT_LABEL (asm_out_file
, name
);
8504 DECL_INITIAL (decl
) = make_node (BLOCK
);
8505 current_function_decl
= decl
;
8506 init_function_start (decl
);
8507 first_function_block_is_cold
= false;
8508 /* Make sure unwind info is emitted for the thunk if needed. */
8509 final_start_function (emit_barrier (), asm_out_file
, 1);
8511 /* Pad stack IP move with 4 instructions (two NOPs count
8512 as one instruction). */
8513 if (TARGET_PAD_SHORT_FUNCTION
)
8518 fputs ("\tnop\n", asm_out_file
);
8521 xops
[0] = gen_rtx_REG (Pmode
, regno
);
8522 xops
[1] = gen_rtx_MEM (Pmode
, stack_pointer_rtx
);
8523 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops
);
8524 fputs ("\tret\n", asm_out_file
);
8525 final_end_function ();
8526 init_insn_lengths ();
8527 free_after_compilation (cfun
);
8529 current_function_decl
= NULL
;
8532 if (flag_split_stack
)
8533 file_end_indicate_split_stack ();
8536 /* Emit code for the SET_GOT patterns. */
8539 output_set_got (rtx dest
, rtx label ATTRIBUTE_UNUSED
)
8545 if (TARGET_VXWORKS_RTP
&& flag_pic
)
8547 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
8548 xops
[2] = gen_rtx_MEM (Pmode
,
8549 gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
));
8550 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
8552 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
8553 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
8554 an unadorned address. */
8555 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
8556 SYMBOL_REF_FLAGS (xops
[2]) |= SYMBOL_FLAG_LOCAL
;
8557 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops
);
8561 xops
[1] = gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
8565 xops
[2] = gen_rtx_LABEL_REF (Pmode
, label
? label
: gen_label_rtx ());
8567 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops
);
8570 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8571 is what will be referenced by the Mach-O PIC subsystem. */
8573 ASM_OUTPUT_LABEL (asm_out_file
, MACHOPIC_FUNCTION_BASE_NAME
);
8576 targetm
.asm_out
.internal_label (asm_out_file
, "L",
8577 CODE_LABEL_NUMBER (XEXP (xops
[2], 0)));
8582 get_pc_thunk_name (name
, REGNO (dest
));
8583 pic_labels_used
|= 1 << REGNO (dest
);
8585 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
8586 xops
[2] = gen_rtx_MEM (QImode
, xops
[2]);
8587 output_asm_insn ("call\t%X2", xops
);
8588 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8589 is what will be referenced by the Mach-O PIC subsystem. */
8592 ASM_OUTPUT_LABEL (asm_out_file
, MACHOPIC_FUNCTION_BASE_NAME
);
8594 targetm
.asm_out
.internal_label (asm_out_file
, "L",
8595 CODE_LABEL_NUMBER (label
));
8600 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops
);
8605 /* Generate an "push" pattern for input ARG. */
8610 struct machine_function
*m
= cfun
->machine
;
8612 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
8613 m
->fs
.cfa_offset
+= UNITS_PER_WORD
;
8614 m
->fs
.sp_offset
+= UNITS_PER_WORD
;
8616 return gen_rtx_SET (VOIDmode
,
8618 gen_rtx_PRE_DEC (Pmode
,
8619 stack_pointer_rtx
)),
8623 /* Generate an "pop" pattern for input ARG. */
8628 return gen_rtx_SET (VOIDmode
,
8631 gen_rtx_POST_INC (Pmode
,
8632 stack_pointer_rtx
)));
8635 /* Return >= 0 if there is an unused call-clobbered register available
8636 for the entire function. */
8639 ix86_select_alt_pic_regnum (void)
8641 if (current_function_is_leaf
8643 && !ix86_current_function_calls_tls_descriptor
)
8646 /* Can't use the same register for both PIC and DRAP. */
8648 drap
= REGNO (crtl
->drap_reg
);
8651 for (i
= 2; i
>= 0; --i
)
8652 if (i
!= drap
&& !df_regs_ever_live_p (i
))
8656 return INVALID_REGNUM
;
8659 /* Return TRUE if we need to save REGNO. */
8662 ix86_save_reg (unsigned int regno
, bool maybe_eh_return
)
8664 if (pic_offset_table_rtx
8665 && regno
== REAL_PIC_OFFSET_TABLE_REGNUM
8666 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM
)
8668 || crtl
->calls_eh_return
8669 || crtl
->uses_const_pool
))
8670 return ix86_select_alt_pic_regnum () == INVALID_REGNUM
;
8672 if (crtl
->calls_eh_return
&& maybe_eh_return
)
8677 unsigned test
= EH_RETURN_DATA_REGNO (i
);
8678 if (test
== INVALID_REGNUM
)
8685 if (crtl
->drap_reg
&& regno
== REGNO (crtl
->drap_reg
))
8688 return (df_regs_ever_live_p (regno
)
8689 && !call_used_regs
[regno
]
8690 && !fixed_regs
[regno
]
8691 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
8694 /* Return number of saved general prupose registers. */
8697 ix86_nsaved_regs (void)
8702 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
8703 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
8708 /* Return number of saved SSE registrers. */
8711 ix86_nsaved_sseregs (void)
8716 if (!TARGET_64BIT_MS_ABI
)
8718 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
8719 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
8724 /* Given FROM and TO register numbers, say whether this elimination is
8725 allowed. If stack alignment is needed, we can only replace argument
8726 pointer with hard frame pointer, or replace frame pointer with stack
8727 pointer. Otherwise, frame pointer elimination is automatically
8728 handled and all other eliminations are valid. */
8731 ix86_can_eliminate (const int from
, const int to
)
8733 if (stack_realign_fp
)
8734 return ((from
== ARG_POINTER_REGNUM
8735 && to
== HARD_FRAME_POINTER_REGNUM
)
8736 || (from
== FRAME_POINTER_REGNUM
8737 && to
== STACK_POINTER_REGNUM
));
8739 return to
== STACK_POINTER_REGNUM
? !frame_pointer_needed
: true;
8742 /* Return the offset between two registers, one to be eliminated, and the other
8743 its replacement, at the start of a routine. */
8746 ix86_initial_elimination_offset (int from
, int to
)
8748 struct ix86_frame frame
;
8749 ix86_compute_frame_layout (&frame
);
8751 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
8752 return frame
.hard_frame_pointer_offset
;
8753 else if (from
== FRAME_POINTER_REGNUM
8754 && to
== HARD_FRAME_POINTER_REGNUM
)
8755 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
8758 gcc_assert (to
== STACK_POINTER_REGNUM
);
8760 if (from
== ARG_POINTER_REGNUM
)
8761 return frame
.stack_pointer_offset
;
8763 gcc_assert (from
== FRAME_POINTER_REGNUM
);
8764 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
8768 /* In a dynamically-aligned function, we can't know the offset from
8769 stack pointer to frame pointer, so we must ensure that setjmp
8770 eliminates fp against the hard fp (%ebp) rather than trying to
8771 index from %esp up to the top of the frame across a gap that is
8772 of unknown (at compile-time) size. */
8774 ix86_builtin_setjmp_frame_value (void)
8776 return stack_realign_fp
? hard_frame_pointer_rtx
: virtual_stack_vars_rtx
;
8779 /* When using -fsplit-stack, the allocation routines set a field in
8780 the TCB to the bottom of the stack plus this much space, measured
8783 #define SPLIT_STACK_AVAILABLE 256
8785 /* Fill structure ix86_frame about frame of currently computed function. */
8788 ix86_compute_frame_layout (struct ix86_frame
*frame
)
8790 unsigned int stack_alignment_needed
;
8791 HOST_WIDE_INT offset
;
8792 unsigned int preferred_alignment
;
8793 HOST_WIDE_INT size
= get_frame_size ();
8794 HOST_WIDE_INT to_allocate
;
8796 frame
->nregs
= ix86_nsaved_regs ();
8797 frame
->nsseregs
= ix86_nsaved_sseregs ();
8799 stack_alignment_needed
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
8800 preferred_alignment
= crtl
->preferred_stack_boundary
/ BITS_PER_UNIT
;
8802 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
8803 function prologues and leaf. */
8804 if ((TARGET_64BIT_MS_ABI
&& preferred_alignment
< 16)
8805 && (!current_function_is_leaf
|| cfun
->calls_alloca
!= 0
8806 || ix86_current_function_calls_tls_descriptor
))
8808 preferred_alignment
= 16;
8809 stack_alignment_needed
= 16;
8810 crtl
->preferred_stack_boundary
= 128;
8811 crtl
->stack_alignment_needed
= 128;
8814 gcc_assert (!size
|| stack_alignment_needed
);
8815 gcc_assert (preferred_alignment
>= STACK_BOUNDARY
/ BITS_PER_UNIT
);
8816 gcc_assert (preferred_alignment
<= stack_alignment_needed
);
8818 /* For SEH we have to limit the amount of code movement into the prologue.
8819 At present we do this via a BLOCKAGE, at which point there's very little
8820 scheduling that can be done, which means that there's very little point
8821 in doing anything except PUSHs. */
8823 cfun
->machine
->use_fast_prologue_epilogue
= false;
8825 /* During reload iteration the amount of registers saved can change.
8826 Recompute the value as needed. Do not recompute when amount of registers
8827 didn't change as reload does multiple calls to the function and does not
8828 expect the decision to change within single iteration. */
8829 else if (!optimize_function_for_size_p (cfun
)
8830 && cfun
->machine
->use_fast_prologue_epilogue_nregs
!= frame
->nregs
)
8832 int count
= frame
->nregs
;
8833 struct cgraph_node
*node
= cgraph_get_node (current_function_decl
);
8835 cfun
->machine
->use_fast_prologue_epilogue_nregs
= count
;
8837 /* The fast prologue uses move instead of push to save registers. This
8838 is significantly longer, but also executes faster as modern hardware
8839 can execute the moves in parallel, but can't do that for push/pop.
8841 Be careful about choosing what prologue to emit: When function takes
8842 many instructions to execute we may use slow version as well as in
8843 case function is known to be outside hot spot (this is known with
8844 feedback only). Weight the size of function by number of registers
8845 to save as it is cheap to use one or two push instructions but very
8846 slow to use many of them. */
8848 count
= (count
- 1) * FAST_PROLOGUE_INSN_COUNT
;
8849 if (node
->frequency
< NODE_FREQUENCY_NORMAL
8850 || (flag_branch_probabilities
8851 && node
->frequency
< NODE_FREQUENCY_HOT
))
8852 cfun
->machine
->use_fast_prologue_epilogue
= false;
8854 cfun
->machine
->use_fast_prologue_epilogue
8855 = !expensive_function_p (count
);
8858 frame
->save_regs_using_mov
8859 = (TARGET_PROLOGUE_USING_MOVE
&& cfun
->machine
->use_fast_prologue_epilogue
8860 /* If static stack checking is enabled and done with probes,
8861 the registers need to be saved before allocating the frame. */
8862 && flag_stack_check
!= STATIC_BUILTIN_STACK_CHECK
);
8864 /* Skip return address. */
8865 offset
= UNITS_PER_WORD
;
8867 /* Skip pushed static chain. */
8868 if (ix86_static_chain_on_stack
)
8869 offset
+= UNITS_PER_WORD
;
8871 /* Skip saved base pointer. */
8872 if (frame_pointer_needed
)
8873 offset
+= UNITS_PER_WORD
;
8874 frame
->hfp_save_offset
= offset
;
8876 /* The traditional frame pointer location is at the top of the frame. */
8877 frame
->hard_frame_pointer_offset
= offset
;
8879 /* Register save area */
8880 offset
+= frame
->nregs
* UNITS_PER_WORD
;
8881 frame
->reg_save_offset
= offset
;
8883 /* Align and set SSE register save area. */
8884 if (frame
->nsseregs
)
8886 /* The only ABI that has saved SSE registers (Win64) also has a
8887 16-byte aligned default stack, and thus we don't need to be
8888 within the re-aligned local stack frame to save them. */
8889 gcc_assert (INCOMING_STACK_BOUNDARY
>= 128);
8890 offset
= (offset
+ 16 - 1) & -16;
8891 offset
+= frame
->nsseregs
* 16;
8893 frame
->sse_reg_save_offset
= offset
;
8895 /* The re-aligned stack starts here. Values before this point are not
8896 directly comparable with values below this point. In order to make
8897 sure that no value happens to be the same before and after, force
8898 the alignment computation below to add a non-zero value. */
8899 if (stack_realign_fp
)
8900 offset
= (offset
+ stack_alignment_needed
) & -stack_alignment_needed
;
8903 frame
->va_arg_size
= ix86_varargs_gpr_size
+ ix86_varargs_fpr_size
;
8904 offset
+= frame
->va_arg_size
;
8906 /* Align start of frame for local function. */
8907 if (stack_realign_fp
8908 || offset
!= frame
->sse_reg_save_offset
8910 || !current_function_is_leaf
8911 || cfun
->calls_alloca
8912 || ix86_current_function_calls_tls_descriptor
)
8913 offset
= (offset
+ stack_alignment_needed
- 1) & -stack_alignment_needed
;
8915 /* Frame pointer points here. */
8916 frame
->frame_pointer_offset
= offset
;
8920 /* Add outgoing arguments area. Can be skipped if we eliminated
8921 all the function calls as dead code.
8922 Skipping is however impossible when function calls alloca. Alloca
8923 expander assumes that last crtl->outgoing_args_size
8924 of stack frame are unused. */
8925 if (ACCUMULATE_OUTGOING_ARGS
8926 && (!current_function_is_leaf
|| cfun
->calls_alloca
8927 || ix86_current_function_calls_tls_descriptor
))
8929 offset
+= crtl
->outgoing_args_size
;
8930 frame
->outgoing_arguments_size
= crtl
->outgoing_args_size
;
8933 frame
->outgoing_arguments_size
= 0;
8935 /* Align stack boundary. Only needed if we're calling another function
8937 if (!current_function_is_leaf
|| cfun
->calls_alloca
8938 || ix86_current_function_calls_tls_descriptor
)
8939 offset
= (offset
+ preferred_alignment
- 1) & -preferred_alignment
;
8941 /* We've reached end of stack frame. */
8942 frame
->stack_pointer_offset
= offset
;
8944 /* Size prologue needs to allocate. */
8945 to_allocate
= offset
- frame
->sse_reg_save_offset
;
8947 if ((!to_allocate
&& frame
->nregs
<= 1)
8948 || (TARGET_64BIT
&& to_allocate
>= (HOST_WIDE_INT
) 0x80000000))
8949 frame
->save_regs_using_mov
= false;
8951 if (ix86_using_red_zone ()
8952 && current_function_sp_is_unchanging
8953 && current_function_is_leaf
8954 && !ix86_current_function_calls_tls_descriptor
)
8956 frame
->red_zone_size
= to_allocate
;
8957 if (frame
->save_regs_using_mov
)
8958 frame
->red_zone_size
+= frame
->nregs
* UNITS_PER_WORD
;
8959 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
8960 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
8963 frame
->red_zone_size
= 0;
8964 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
8966 /* The SEH frame pointer location is near the bottom of the frame.
8967 This is enforced by the fact that the difference between the
8968 stack pointer and the frame pointer is limited to 240 bytes in
8969 the unwind data structure. */
8974 /* If we can leave the frame pointer where it is, do so. */
8975 diff
= frame
->stack_pointer_offset
- frame
->hard_frame_pointer_offset
;
8976 if (diff
> 240 || (diff
& 15) != 0)
8978 /* Ideally we'd determine what portion of the local stack frame
8979 (within the constraint of the lowest 240) is most heavily used.
8980 But without that complication, simply bias the frame pointer
8981 by 128 bytes so as to maximize the amount of the local stack
8982 frame that is addressable with 8-bit offsets. */
8983 frame
->hard_frame_pointer_offset
= frame
->stack_pointer_offset
- 128;
8988 /* This is semi-inlined memory_address_length, but simplified
8989 since we know that we're always dealing with reg+offset, and
8990 to avoid having to create and discard all that rtl. */
8993 choose_baseaddr_len (unsigned int regno
, HOST_WIDE_INT offset
)
8999 /* EBP and R13 cannot be encoded without an offset. */
9000 len
= (regno
== BP_REG
|| regno
== R13_REG
);
9002 else if (IN_RANGE (offset
, -128, 127))
9005 /* ESP and R12 must be encoded with a SIB byte. */
9006 if (regno
== SP_REG
|| regno
== R12_REG
)
9012 /* Return an RTX that points to CFA_OFFSET within the stack frame.
9013 The valid base registers are taken from CFUN->MACHINE->FS. */
9016 choose_baseaddr (HOST_WIDE_INT cfa_offset
)
9018 const struct machine_function
*m
= cfun
->machine
;
9019 rtx base_reg
= NULL
;
9020 HOST_WIDE_INT base_offset
= 0;
9022 if (m
->use_fast_prologue_epilogue
)
9024 /* Choose the base register most likely to allow the most scheduling
9025 opportunities. Generally FP is valid througout the function,
9026 while DRAP must be reloaded within the epilogue. But choose either
9027 over the SP due to increased encoding size. */
9031 base_reg
= hard_frame_pointer_rtx
;
9032 base_offset
= m
->fs
.fp_offset
- cfa_offset
;
9034 else if (m
->fs
.drap_valid
)
9036 base_reg
= crtl
->drap_reg
;
9037 base_offset
= 0 - cfa_offset
;
9039 else if (m
->fs
.sp_valid
)
9041 base_reg
= stack_pointer_rtx
;
9042 base_offset
= m
->fs
.sp_offset
- cfa_offset
;
9047 HOST_WIDE_INT toffset
;
9050 /* Choose the base register with the smallest address encoding.
9051 With a tie, choose FP > DRAP > SP. */
9054 base_reg
= stack_pointer_rtx
;
9055 base_offset
= m
->fs
.sp_offset
- cfa_offset
;
9056 len
= choose_baseaddr_len (STACK_POINTER_REGNUM
, base_offset
);
9058 if (m
->fs
.drap_valid
)
9060 toffset
= 0 - cfa_offset
;
9061 tlen
= choose_baseaddr_len (REGNO (crtl
->drap_reg
), toffset
);
9064 base_reg
= crtl
->drap_reg
;
9065 base_offset
= toffset
;
9071 toffset
= m
->fs
.fp_offset
- cfa_offset
;
9072 tlen
= choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM
, toffset
);
9075 base_reg
= hard_frame_pointer_rtx
;
9076 base_offset
= toffset
;
9081 gcc_assert (base_reg
!= NULL
);
9083 return plus_constant (base_reg
, base_offset
);
9086 /* Emit code to save registers in the prologue. */
9089 ix86_emit_save_regs (void)
9094 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
-- > 0; )
9095 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9097 insn
= emit_insn (gen_push (gen_rtx_REG (Pmode
, regno
)));
9098 RTX_FRAME_RELATED_P (insn
) = 1;
9102 /* Emit a single register save at CFA - CFA_OFFSET. */
9105 ix86_emit_save_reg_using_mov (enum machine_mode mode
, unsigned int regno
,
9106 HOST_WIDE_INT cfa_offset
)
9108 struct machine_function
*m
= cfun
->machine
;
9109 rtx reg
= gen_rtx_REG (mode
, regno
);
9110 rtx mem
, addr
, base
, insn
;
9112 addr
= choose_baseaddr (cfa_offset
);
9113 mem
= gen_frame_mem (mode
, addr
);
9115 /* For SSE saves, we need to indicate the 128-bit alignment. */
9116 set_mem_align (mem
, GET_MODE_ALIGNMENT (mode
));
9118 insn
= emit_move_insn (mem
, reg
);
9119 RTX_FRAME_RELATED_P (insn
) = 1;
9122 if (GET_CODE (base
) == PLUS
)
9123 base
= XEXP (base
, 0);
9124 gcc_checking_assert (REG_P (base
));
9126 /* When saving registers into a re-aligned local stack frame, avoid
9127 any tricky guessing by dwarf2out. */
9128 if (m
->fs
.realigned
)
9130 gcc_checking_assert (stack_realign_drap
);
9132 if (regno
== REGNO (crtl
->drap_reg
))
9134 /* A bit of a hack. We force the DRAP register to be saved in
9135 the re-aligned stack frame, which provides us with a copy
9136 of the CFA that will last past the prologue. Install it. */
9137 gcc_checking_assert (cfun
->machine
->fs
.fp_valid
);
9138 addr
= plus_constant (hard_frame_pointer_rtx
,
9139 cfun
->machine
->fs
.fp_offset
- cfa_offset
);
9140 mem
= gen_rtx_MEM (mode
, addr
);
9141 add_reg_note (insn
, REG_CFA_DEF_CFA
, mem
);
9145 /* The frame pointer is a stable reference within the
9146 aligned frame. Use it. */
9147 gcc_checking_assert (cfun
->machine
->fs
.fp_valid
);
9148 addr
= plus_constant (hard_frame_pointer_rtx
,
9149 cfun
->machine
->fs
.fp_offset
- cfa_offset
);
9150 mem
= gen_rtx_MEM (mode
, addr
);
9151 add_reg_note (insn
, REG_CFA_EXPRESSION
,
9152 gen_rtx_SET (VOIDmode
, mem
, reg
));
9156 /* The memory may not be relative to the current CFA register,
9157 which means that we may need to generate a new pattern for
9158 use by the unwind info. */
9159 else if (base
!= m
->fs
.cfa_reg
)
9161 addr
= plus_constant (m
->fs
.cfa_reg
, m
->fs
.cfa_offset
- cfa_offset
);
9162 mem
= gen_rtx_MEM (mode
, addr
);
9163 add_reg_note (insn
, REG_CFA_OFFSET
, gen_rtx_SET (VOIDmode
, mem
, reg
));
9167 /* Emit code to save registers using MOV insns.
9168 First register is stored at CFA - CFA_OFFSET. */
9170 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset
)
9174 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9175 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9177 ix86_emit_save_reg_using_mov (Pmode
, regno
, cfa_offset
);
9178 cfa_offset
-= UNITS_PER_WORD
;
9182 /* Emit code to save SSE registers using MOV insns.
9183 First register is stored at CFA - CFA_OFFSET. */
9185 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset
)
9189 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9190 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9192 ix86_emit_save_reg_using_mov (V4SFmode
, regno
, cfa_offset
);
9197 static GTY(()) rtx queued_cfa_restores
;
9199 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9200 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9201 Don't add the note if the previously saved value will be left untouched
9202 within stack red-zone till return, as unwinders can find the same value
9203 in the register and on the stack. */
9206 ix86_add_cfa_restore_note (rtx insn
, rtx reg
, HOST_WIDE_INT cfa_offset
)
9208 if (!crtl
->shrink_wrapped
9209 && cfa_offset
<= cfun
->machine
->fs
.red_zone_offset
)
9214 add_reg_note (insn
, REG_CFA_RESTORE
, reg
);
9215 RTX_FRAME_RELATED_P (insn
) = 1;
9219 = alloc_reg_note (REG_CFA_RESTORE
, reg
, queued_cfa_restores
);
9222 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9225 ix86_add_queued_cfa_restore_notes (rtx insn
)
9228 if (!queued_cfa_restores
)
9230 for (last
= queued_cfa_restores
; XEXP (last
, 1); last
= XEXP (last
, 1))
9232 XEXP (last
, 1) = REG_NOTES (insn
);
9233 REG_NOTES (insn
) = queued_cfa_restores
;
9234 queued_cfa_restores
= NULL_RTX
;
9235 RTX_FRAME_RELATED_P (insn
) = 1;
9238 /* Expand prologue or epilogue stack adjustment.
9239 The pattern exist to put a dependency on all ebp-based memory accesses.
9240 STYLE should be negative if instructions should be marked as frame related,
9241 zero if %r11 register is live and cannot be freely used and positive
9245 pro_epilogue_adjust_stack (rtx dest
, rtx src
, rtx offset
,
9246 int style
, bool set_cfa
)
9248 struct machine_function
*m
= cfun
->machine
;
9250 bool add_frame_related_expr
= false;
9253 insn
= gen_pro_epilogue_adjust_stack_si_add (dest
, src
, offset
);
9254 else if (x86_64_immediate_operand (offset
, DImode
))
9255 insn
= gen_pro_epilogue_adjust_stack_di_add (dest
, src
, offset
);
9259 /* r11 is used by indirect sibcall return as well, set before the
9260 epilogue and used after the epilogue. */
9262 tmp
= gen_rtx_REG (DImode
, R11_REG
);
9265 gcc_assert (src
!= hard_frame_pointer_rtx
9266 && dest
!= hard_frame_pointer_rtx
);
9267 tmp
= hard_frame_pointer_rtx
;
9269 insn
= emit_insn (gen_rtx_SET (DImode
, tmp
, offset
));
9271 add_frame_related_expr
= true;
9273 insn
= gen_pro_epilogue_adjust_stack_di_add (dest
, src
, tmp
);
9276 insn
= emit_insn (insn
);
9278 ix86_add_queued_cfa_restore_notes (insn
);
9284 gcc_assert (m
->fs
.cfa_reg
== src
);
9285 m
->fs
.cfa_offset
+= INTVAL (offset
);
9286 m
->fs
.cfa_reg
= dest
;
9288 r
= gen_rtx_PLUS (Pmode
, src
, offset
);
9289 r
= gen_rtx_SET (VOIDmode
, dest
, r
);
9290 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, r
);
9291 RTX_FRAME_RELATED_P (insn
) = 1;
9295 RTX_FRAME_RELATED_P (insn
) = 1;
9296 if (add_frame_related_expr
)
9298 rtx r
= gen_rtx_PLUS (Pmode
, src
, offset
);
9299 r
= gen_rtx_SET (VOIDmode
, dest
, r
);
9300 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, r
);
9304 if (dest
== stack_pointer_rtx
)
9306 HOST_WIDE_INT ooffset
= m
->fs
.sp_offset
;
9307 bool valid
= m
->fs
.sp_valid
;
9309 if (src
== hard_frame_pointer_rtx
)
9311 valid
= m
->fs
.fp_valid
;
9312 ooffset
= m
->fs
.fp_offset
;
9314 else if (src
== crtl
->drap_reg
)
9316 valid
= m
->fs
.drap_valid
;
9321 /* Else there are two possibilities: SP itself, which we set
9322 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
9323 taken care of this by hand along the eh_return path. */
9324 gcc_checking_assert (src
== stack_pointer_rtx
9325 || offset
== const0_rtx
);
9328 m
->fs
.sp_offset
= ooffset
- INTVAL (offset
);
9329 m
->fs
.sp_valid
= valid
;
9333 /* Find an available register to be used as dynamic realign argument
9334 pointer regsiter. Such a register will be written in prologue and
9335 used in begin of body, so it must not be
9336 1. parameter passing register.
9338 We reuse static-chain register if it is available. Otherwise, we
9339 use DI for i386 and R13 for x86-64. We chose R13 since it has
9342 Return: the regno of chosen register. */
9345 find_drap_reg (void)
9347 tree decl
= cfun
->decl
;
9351 /* Use R13 for nested function or function need static chain.
9352 Since function with tail call may use any caller-saved
9353 registers in epilogue, DRAP must not use caller-saved
9354 register in such case. */
9355 if (DECL_STATIC_CHAIN (decl
) || crtl
->tail_call_emit
)
9362 /* Use DI for nested function or function need static chain.
9363 Since function with tail call may use any caller-saved
9364 registers in epilogue, DRAP must not use caller-saved
9365 register in such case. */
9366 if (DECL_STATIC_CHAIN (decl
) || crtl
->tail_call_emit
)
9369 /* Reuse static chain register if it isn't used for parameter
9371 if (ix86_function_regparm (TREE_TYPE (decl
), decl
) <= 2)
9373 unsigned int ccvt
= ix86_get_callcvt (TREE_TYPE (decl
));
9374 if ((ccvt
& (IX86_CALLCVT_FASTCALL
| IX86_CALLCVT_THISCALL
)) == 0)
9381 /* Return minimum incoming stack alignment. */
9384 ix86_minimum_incoming_stack_boundary (bool sibcall
)
9386 unsigned int incoming_stack_boundary
;
9388 /* Prefer the one specified at command line. */
9389 if (ix86_user_incoming_stack_boundary
)
9390 incoming_stack_boundary
= ix86_user_incoming_stack_boundary
;
9391 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
9392 if -mstackrealign is used, it isn't used for sibcall check and
9393 estimated stack alignment is 128bit. */
9396 && ix86_force_align_arg_pointer
9397 && crtl
->stack_alignment_estimated
== 128)
9398 incoming_stack_boundary
= MIN_STACK_BOUNDARY
;
9400 incoming_stack_boundary
= ix86_default_incoming_stack_boundary
;
9402 /* Incoming stack alignment can be changed on individual functions
9403 via force_align_arg_pointer attribute. We use the smallest
9404 incoming stack boundary. */
9405 if (incoming_stack_boundary
> MIN_STACK_BOUNDARY
9406 && lookup_attribute (ix86_force_align_arg_pointer_string
,
9407 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))))
9408 incoming_stack_boundary
= MIN_STACK_BOUNDARY
;
9410 /* The incoming stack frame has to be aligned at least at
9411 parm_stack_boundary. */
9412 if (incoming_stack_boundary
< crtl
->parm_stack_boundary
)
9413 incoming_stack_boundary
= crtl
->parm_stack_boundary
;
9415 /* Stack at entrance of main is aligned by runtime. We use the
9416 smallest incoming stack boundary. */
9417 if (incoming_stack_boundary
> MAIN_STACK_BOUNDARY
9418 && DECL_NAME (current_function_decl
)
9419 && MAIN_NAME_P (DECL_NAME (current_function_decl
))
9420 && DECL_FILE_SCOPE_P (current_function_decl
))
9421 incoming_stack_boundary
= MAIN_STACK_BOUNDARY
;
9423 return incoming_stack_boundary
;
9426 /* Update incoming stack boundary and estimated stack alignment. */
9429 ix86_update_stack_boundary (void)
9431 ix86_incoming_stack_boundary
9432 = ix86_minimum_incoming_stack_boundary (false);
9434 /* x86_64 vararg needs 16byte stack alignment for register save
9438 && crtl
->stack_alignment_estimated
< 128)
9439 crtl
->stack_alignment_estimated
= 128;
9442 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
9443 needed or an rtx for DRAP otherwise. */
9446 ix86_get_drap_rtx (void)
9448 if (ix86_force_drap
|| !ACCUMULATE_OUTGOING_ARGS
)
9449 crtl
->need_drap
= true;
9451 if (stack_realign_drap
)
9453 /* Assign DRAP to vDRAP and returns vDRAP */
9454 unsigned int regno
= find_drap_reg ();
9459 arg_ptr
= gen_rtx_REG (Pmode
, regno
);
9460 crtl
->drap_reg
= arg_ptr
;
9463 drap_vreg
= copy_to_reg (arg_ptr
);
9467 insn
= emit_insn_before (seq
, NEXT_INSN (entry_of_function ()));
9470 add_reg_note (insn
, REG_CFA_SET_VDRAP
, drap_vreg
);
9471 RTX_FRAME_RELATED_P (insn
) = 1;
9479 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
9482 ix86_internal_arg_pointer (void)
9484 return virtual_incoming_args_rtx
;
9487 struct scratch_reg
{
9492 /* Return a short-lived scratch register for use on function entry.
9493 In 32-bit mode, it is valid only after the registers are saved
9494 in the prologue. This register must be released by means of
9495 release_scratch_register_on_entry once it is dead. */
9498 get_scratch_register_on_entry (struct scratch_reg
*sr
)
9506 /* We always use R11 in 64-bit mode. */
9511 tree decl
= current_function_decl
, fntype
= TREE_TYPE (decl
);
9513 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype
)) != NULL_TREE
;
9514 bool static_chain_p
= DECL_STATIC_CHAIN (decl
);
9515 int regparm
= ix86_function_regparm (fntype
, decl
);
9517 = crtl
->drap_reg
? REGNO (crtl
->drap_reg
) : INVALID_REGNUM
;
9519 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
9520 for the static chain register. */
9521 if ((regparm
< 1 || (fastcall_p
&& !static_chain_p
))
9522 && drap_regno
!= AX_REG
)
9524 else if (regparm
< 2 && drap_regno
!= DX_REG
)
9526 /* ecx is the static chain register. */
9527 else if (regparm
< 3 && !fastcall_p
&& !static_chain_p
9528 && drap_regno
!= CX_REG
)
9530 else if (ix86_save_reg (BX_REG
, true))
9532 /* esi is the static chain register. */
9533 else if (!(regparm
== 3 && static_chain_p
)
9534 && ix86_save_reg (SI_REG
, true))
9536 else if (ix86_save_reg (DI_REG
, true))
9540 regno
= (drap_regno
== AX_REG
? DX_REG
: AX_REG
);
9545 sr
->reg
= gen_rtx_REG (Pmode
, regno
);
9548 rtx insn
= emit_insn (gen_push (sr
->reg
));
9549 RTX_FRAME_RELATED_P (insn
) = 1;
9553 /* Release a scratch register obtained from the preceding function. */
9556 release_scratch_register_on_entry (struct scratch_reg
*sr
)
9560 rtx x
, insn
= emit_insn (gen_pop (sr
->reg
));
9562 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
9563 RTX_FRAME_RELATED_P (insn
) = 1;
9564 x
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, GEN_INT (UNITS_PER_WORD
));
9565 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
9566 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
9570 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
9572 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
9575 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size
)
9577 /* We skip the probe for the first interval + a small dope of 4 words and
9578 probe that many bytes past the specified size to maintain a protection
9579 area at the botton of the stack. */
9580 const int dope
= 4 * UNITS_PER_WORD
;
9581 rtx size_rtx
= GEN_INT (size
), last
;
9583 /* See if we have a constant small number of probes to generate. If so,
9584 that's the easy case. The run-time loop is made up of 11 insns in the
9585 generic case while the compile-time loop is made up of 3+2*(n-1) insns
9586 for n # of intervals. */
9587 if (size
<= 5 * PROBE_INTERVAL
)
9589 HOST_WIDE_INT i
, adjust
;
9590 bool first_probe
= true;
9592 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
9593 values of N from 1 until it exceeds SIZE. If only one probe is
9594 needed, this will not generate any code. Then adjust and probe
9595 to PROBE_INTERVAL + SIZE. */
9596 for (i
= PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
9600 adjust
= 2 * PROBE_INTERVAL
+ dope
;
9601 first_probe
= false;
9604 adjust
= PROBE_INTERVAL
;
9606 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9607 plus_constant (stack_pointer_rtx
, -adjust
)));
9608 emit_stack_probe (stack_pointer_rtx
);
9612 adjust
= size
+ PROBE_INTERVAL
+ dope
;
9614 adjust
= size
+ PROBE_INTERVAL
- i
;
9616 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9617 plus_constant (stack_pointer_rtx
, -adjust
)));
9618 emit_stack_probe (stack_pointer_rtx
);
9620 /* Adjust back to account for the additional first interval. */
9621 last
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9622 plus_constant (stack_pointer_rtx
,
9623 PROBE_INTERVAL
+ dope
)));
9626 /* Otherwise, do the same as above, but in a loop. Note that we must be
9627 extra careful with variables wrapping around because we might be at
9628 the very top (or the very bottom) of the address space and we have
9629 to be able to handle this case properly; in particular, we use an
9630 equality test for the loop condition. */
9633 HOST_WIDE_INT rounded_size
;
9634 struct scratch_reg sr
;
9636 get_scratch_register_on_entry (&sr
);
9639 /* Step 1: round SIZE to the previous multiple of the interval. */
9641 rounded_size
= size
& -PROBE_INTERVAL
;
9644 /* Step 2: compute initial and final value of the loop counter. */
9646 /* SP = SP_0 + PROBE_INTERVAL. */
9647 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9648 plus_constant (stack_pointer_rtx
,
9649 - (PROBE_INTERVAL
+ dope
))));
9651 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
9652 emit_move_insn (sr
.reg
, GEN_INT (-rounded_size
));
9653 emit_insn (gen_rtx_SET (VOIDmode
, sr
.reg
,
9654 gen_rtx_PLUS (Pmode
, sr
.reg
,
9655 stack_pointer_rtx
)));
9660 while (SP != LAST_ADDR)
9662 SP = SP + PROBE_INTERVAL
9666 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
9667 values of N from 1 until it is equal to ROUNDED_SIZE. */
9669 emit_insn (ix86_gen_adjust_stack_and_probe (sr
.reg
, sr
.reg
, size_rtx
));
9672 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
9673 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
9675 if (size
!= rounded_size
)
9677 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9678 plus_constant (stack_pointer_rtx
,
9679 rounded_size
- size
)));
9680 emit_stack_probe (stack_pointer_rtx
);
9683 /* Adjust back to account for the additional first interval. */
9684 last
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9685 plus_constant (stack_pointer_rtx
,
9686 PROBE_INTERVAL
+ dope
)));
9688 release_scratch_register_on_entry (&sr
);
9691 gcc_assert (cfun
->machine
->fs
.cfa_reg
!= stack_pointer_rtx
);
9693 /* Even if the stack pointer isn't the CFA register, we need to correctly
9694 describe the adjustments made to it, in particular differentiate the
9695 frame-related ones from the frame-unrelated ones. */
9698 rtx expr
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (2));
9699 XVECEXP (expr
, 0, 0)
9700 = gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9701 plus_constant (stack_pointer_rtx
, -size
));
9702 XVECEXP (expr
, 0, 1)
9703 = gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9704 plus_constant (stack_pointer_rtx
,
9705 PROBE_INTERVAL
+ dope
+ size
));
9706 add_reg_note (last
, REG_FRAME_RELATED_EXPR
, expr
);
9707 RTX_FRAME_RELATED_P (last
) = 1;
9709 cfun
->machine
->fs
.sp_offset
+= size
;
9712 /* Make sure nothing is scheduled before we are done. */
9713 emit_insn (gen_blockage ());
9716 /* Adjust the stack pointer up to REG while probing it. */
9719 output_adjust_stack_and_probe (rtx reg
)
9721 static int labelno
= 0;
9722 char loop_lab
[32], end_lab
[32];
9725 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
);
9726 ASM_GENERATE_INTERNAL_LABEL (end_lab
, "LPSRE", labelno
++);
9728 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
9730 /* Jump to END_LAB if SP == LAST_ADDR. */
9731 xops
[0] = stack_pointer_rtx
;
9733 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops
);
9734 fputs ("\tje\t", asm_out_file
);
9735 assemble_name_raw (asm_out_file
, end_lab
);
9736 fputc ('\n', asm_out_file
);
9738 /* SP = SP + PROBE_INTERVAL. */
9739 xops
[1] = GEN_INT (PROBE_INTERVAL
);
9740 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops
);
9743 xops
[1] = const0_rtx
;
9744 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops
);
9746 fprintf (asm_out_file
, "\tjmp\t");
9747 assemble_name_raw (asm_out_file
, loop_lab
);
9748 fputc ('\n', asm_out_file
);
9750 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, end_lab
);
9755 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
9756 inclusive. These are offsets from the current stack pointer. */
9759 ix86_emit_probe_stack_range (HOST_WIDE_INT first
, HOST_WIDE_INT size
)
9761 /* See if we have a constant small number of probes to generate. If so,
9762 that's the easy case. The run-time loop is made up of 7 insns in the
9763 generic case while the compile-time loop is made up of n insns for n #
9765 if (size
<= 7 * PROBE_INTERVAL
)
9769 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
9770 it exceeds SIZE. If only one probe is needed, this will not
9771 generate any code. Then probe at FIRST + SIZE. */
9772 for (i
= PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
9773 emit_stack_probe (plus_constant (stack_pointer_rtx
, -(first
+ i
)));
9775 emit_stack_probe (plus_constant (stack_pointer_rtx
, -(first
+ size
)));
9778 /* Otherwise, do the same as above, but in a loop. Note that we must be
9779 extra careful with variables wrapping around because we might be at
9780 the very top (or the very bottom) of the address space and we have
9781 to be able to handle this case properly; in particular, we use an
9782 equality test for the loop condition. */
9785 HOST_WIDE_INT rounded_size
, last
;
9786 struct scratch_reg sr
;
9788 get_scratch_register_on_entry (&sr
);
9791 /* Step 1: round SIZE to the previous multiple of the interval. */
9793 rounded_size
= size
& -PROBE_INTERVAL
;
9796 /* Step 2: compute initial and final value of the loop counter. */
9798 /* TEST_OFFSET = FIRST. */
9799 emit_move_insn (sr
.reg
, GEN_INT (-first
));
9801 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
9802 last
= first
+ rounded_size
;
9807 while (TEST_ADDR != LAST_ADDR)
9809 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
9813 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
9814 until it is equal to ROUNDED_SIZE. */
9816 emit_insn (ix86_gen_probe_stack_range (sr
.reg
, sr
.reg
, GEN_INT (-last
)));
9819 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
9820 that SIZE is equal to ROUNDED_SIZE. */
9822 if (size
!= rounded_size
)
9823 emit_stack_probe (plus_constant (gen_rtx_PLUS (Pmode
,
9826 rounded_size
- size
));
9828 release_scratch_register_on_entry (&sr
);
9831 /* Make sure nothing is scheduled before we are done. */
9832 emit_insn (gen_blockage ());
9835 /* Probe a range of stack addresses from REG to END, inclusive. These are
9836 offsets from the current stack pointer. */
9839 output_probe_stack_range (rtx reg
, rtx end
)
9841 static int labelno
= 0;
9842 char loop_lab
[32], end_lab
[32];
9845 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
);
9846 ASM_GENERATE_INTERNAL_LABEL (end_lab
, "LPSRE", labelno
++);
9848 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
9850 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
9853 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops
);
9854 fputs ("\tje\t", asm_out_file
);
9855 assemble_name_raw (asm_out_file
, end_lab
);
9856 fputc ('\n', asm_out_file
);
9858 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
9859 xops
[1] = GEN_INT (PROBE_INTERVAL
);
9860 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops
);
9862 /* Probe at TEST_ADDR. */
9863 xops
[0] = stack_pointer_rtx
;
9865 xops
[2] = const0_rtx
;
9866 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops
);
9868 fprintf (asm_out_file
, "\tjmp\t");
9869 assemble_name_raw (asm_out_file
, loop_lab
);
9870 fputc ('\n', asm_out_file
);
9872 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, end_lab
);
9877 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
9878 to be generated in correct form. */
9880 ix86_finalize_stack_realign_flags (void)
9882 /* Check if stack realign is really needed after reload, and
9883 stores result in cfun */
9884 unsigned int incoming_stack_boundary
9885 = (crtl
->parm_stack_boundary
> ix86_incoming_stack_boundary
9886 ? crtl
->parm_stack_boundary
: ix86_incoming_stack_boundary
);
9887 unsigned int stack_realign
= (incoming_stack_boundary
9888 < (current_function_is_leaf
9889 ? crtl
->max_used_stack_slot_alignment
9890 : crtl
->stack_alignment_needed
));
9892 if (crtl
->stack_realign_finalized
)
9894 /* After stack_realign_needed is finalized, we can't no longer
9896 gcc_assert (crtl
->stack_realign_needed
== stack_realign
);
9900 crtl
->stack_realign_needed
= stack_realign
;
9901 crtl
->stack_realign_finalized
= true;
9905 /* Expand the prologue into a bunch of separate insns. */
9908 ix86_expand_prologue (void)
9910 struct machine_function
*m
= cfun
->machine
;
9913 struct ix86_frame frame
;
9914 HOST_WIDE_INT allocate
;
9915 bool int_registers_saved
;
9917 ix86_finalize_stack_realign_flags ();
9919 /* DRAP should not coexist with stack_realign_fp */
9920 gcc_assert (!(crtl
->drap_reg
&& stack_realign_fp
));
9922 memset (&m
->fs
, 0, sizeof (m
->fs
));
9924 /* Initialize CFA state for before the prologue. */
9925 m
->fs
.cfa_reg
= stack_pointer_rtx
;
9926 m
->fs
.cfa_offset
= INCOMING_FRAME_SP_OFFSET
;
9928 /* Track SP offset to the CFA. We continue tracking this after we've
9929 swapped the CFA register away from SP. In the case of re-alignment
9930 this is fudged; we're interested to offsets within the local frame. */
9931 m
->fs
.sp_offset
= INCOMING_FRAME_SP_OFFSET
;
9932 m
->fs
.sp_valid
= true;
9934 ix86_compute_frame_layout (&frame
);
9936 if (!TARGET_64BIT
&& ix86_function_ms_hook_prologue (current_function_decl
))
9938 /* We should have already generated an error for any use of
9939 ms_hook on a nested function. */
9940 gcc_checking_assert (!ix86_static_chain_on_stack
);
9942 /* Check if profiling is active and we shall use profiling before
9943 prologue variant. If so sorry. */
9944 if (crtl
->profile
&& flag_fentry
!= 0)
9945 sorry ("ms_hook_prologue attribute isn%'t compatible "
9946 "with -mfentry for 32-bit");
9948 /* In ix86_asm_output_function_label we emitted:
9949 8b ff movl.s %edi,%edi
9951 8b ec movl.s %esp,%ebp
9953 This matches the hookable function prologue in Win32 API
9954 functions in Microsoft Windows XP Service Pack 2 and newer.
9955 Wine uses this to enable Windows apps to hook the Win32 API
9956 functions provided by Wine.
9958 What that means is that we've already set up the frame pointer. */
9960 if (frame_pointer_needed
9961 && !(crtl
->drap_reg
&& crtl
->stack_realign_needed
))
9965 /* We've decided to use the frame pointer already set up.
9966 Describe this to the unwinder by pretending that both
9967 push and mov insns happen right here.
9969 Putting the unwind info here at the end of the ms_hook
9970 is done so that we can make absolutely certain we get
9971 the required byte sequence at the start of the function,
9972 rather than relying on an assembler that can produce
9973 the exact encoding required.
9975 However it does mean (in the unpatched case) that we have
9976 a 1 insn window where the asynchronous unwind info is
9977 incorrect. However, if we placed the unwind info at
9978 its correct location we would have incorrect unwind info
9979 in the patched case. Which is probably all moot since
9980 I don't expect Wine generates dwarf2 unwind info for the
9981 system libraries that use this feature. */
9983 insn
= emit_insn (gen_blockage ());
9985 push
= gen_push (hard_frame_pointer_rtx
);
9986 mov
= gen_rtx_SET (VOIDmode
, hard_frame_pointer_rtx
,
9988 RTX_FRAME_RELATED_P (push
) = 1;
9989 RTX_FRAME_RELATED_P (mov
) = 1;
9991 RTX_FRAME_RELATED_P (insn
) = 1;
9992 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
9993 gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, push
, mov
)));
9995 /* Note that gen_push incremented m->fs.cfa_offset, even
9996 though we didn't emit the push insn here. */
9997 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
9998 m
->fs
.fp_offset
= m
->fs
.cfa_offset
;
9999 m
->fs
.fp_valid
= true;
10003 /* The frame pointer is not needed so pop %ebp again.
10004 This leaves us with a pristine state. */
10005 emit_insn (gen_pop (hard_frame_pointer_rtx
));
10009 /* The first insn of a function that accepts its static chain on the
10010 stack is to push the register that would be filled in by a direct
10011 call. This insn will be skipped by the trampoline. */
10012 else if (ix86_static_chain_on_stack
)
10014 insn
= emit_insn (gen_push (ix86_static_chain (cfun
->decl
, false)));
10015 emit_insn (gen_blockage ());
10017 /* We don't want to interpret this push insn as a register save,
10018 only as a stack adjustment. The real copy of the register as
10019 a save will be done later, if needed. */
10020 t
= plus_constant (stack_pointer_rtx
, -UNITS_PER_WORD
);
10021 t
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
);
10022 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, t
);
10023 RTX_FRAME_RELATED_P (insn
) = 1;
10026 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
10027 of DRAP is needed and stack realignment is really needed after reload */
10028 if (stack_realign_drap
)
10030 int align_bytes
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10032 /* Only need to push parameter pointer reg if it is caller saved. */
10033 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
10035 /* Push arg pointer reg */
10036 insn
= emit_insn (gen_push (crtl
->drap_reg
));
10037 RTX_FRAME_RELATED_P (insn
) = 1;
10040 /* Grab the argument pointer. */
10041 t
= plus_constant (stack_pointer_rtx
, m
->fs
.sp_offset
);
10042 insn
= emit_insn (gen_rtx_SET (VOIDmode
, crtl
->drap_reg
, t
));
10043 RTX_FRAME_RELATED_P (insn
) = 1;
10044 m
->fs
.cfa_reg
= crtl
->drap_reg
;
10045 m
->fs
.cfa_offset
= 0;
10047 /* Align the stack. */
10048 insn
= emit_insn (ix86_gen_andsp (stack_pointer_rtx
,
10050 GEN_INT (-align_bytes
)));
10051 RTX_FRAME_RELATED_P (insn
) = 1;
10053 /* Replicate the return address on the stack so that return
10054 address can be reached via (argp - 1) slot. This is needed
10055 to implement macro RETURN_ADDR_RTX and intrinsic function
10056 expand_builtin_return_addr etc. */
10057 t
= plus_constant (crtl
->drap_reg
, -UNITS_PER_WORD
);
10058 t
= gen_frame_mem (Pmode
, t
);
10059 insn
= emit_insn (gen_push (t
));
10060 RTX_FRAME_RELATED_P (insn
) = 1;
10062 /* For the purposes of frame and register save area addressing,
10063 we've started over with a new frame. */
10064 m
->fs
.sp_offset
= INCOMING_FRAME_SP_OFFSET
;
10065 m
->fs
.realigned
= true;
10068 if (frame_pointer_needed
&& !m
->fs
.fp_valid
)
10070 /* Note: AT&T enter does NOT have reversed args. Enter is probably
10071 slower on all targets. Also sdb doesn't like it. */
10072 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
10073 RTX_FRAME_RELATED_P (insn
) = 1;
10075 if (m
->fs
.sp_offset
== frame
.hard_frame_pointer_offset
)
10077 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
10078 RTX_FRAME_RELATED_P (insn
) = 1;
10080 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10081 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10082 m
->fs
.fp_offset
= m
->fs
.sp_offset
;
10083 m
->fs
.fp_valid
= true;
10087 int_registers_saved
= (frame
.nregs
== 0);
10089 if (!int_registers_saved
)
10091 /* If saving registers via PUSH, do so now. */
10092 if (!frame
.save_regs_using_mov
)
10094 ix86_emit_save_regs ();
10095 int_registers_saved
= true;
10096 gcc_assert (m
->fs
.sp_offset
== frame
.reg_save_offset
);
10099 /* When using red zone we may start register saving before allocating
10100 the stack frame saving one cycle of the prologue. However, avoid
10101 doing this if we have to probe the stack; at least on x86_64 the
10102 stack probe can turn into a call that clobbers a red zone location. */
10103 else if (ix86_using_red_zone ()
10104 && (! TARGET_STACK_PROBE
10105 || frame
.stack_pointer_offset
< CHECK_STACK_LIMIT
))
10107 ix86_emit_save_regs_using_mov (frame
.reg_save_offset
);
10108 int_registers_saved
= true;
10112 if (stack_realign_fp
)
10114 int align_bytes
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10115 gcc_assert (align_bytes
> MIN_STACK_BOUNDARY
/ BITS_PER_UNIT
);
10117 /* The computation of the size of the re-aligned stack frame means
10118 that we must allocate the size of the register save area before
10119 performing the actual alignment. Otherwise we cannot guarantee
10120 that there's enough storage above the realignment point. */
10121 if (m
->fs
.sp_offset
!= frame
.sse_reg_save_offset
)
10122 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10123 GEN_INT (m
->fs
.sp_offset
10124 - frame
.sse_reg_save_offset
),
10127 /* Align the stack. */
10128 insn
= emit_insn (ix86_gen_andsp (stack_pointer_rtx
,
10130 GEN_INT (-align_bytes
)));
10132 /* For the purposes of register save area addressing, the stack
10133 pointer is no longer valid. As for the value of sp_offset,
10134 see ix86_compute_frame_layout, which we need to match in order
10135 to pass verification of stack_pointer_offset at the end. */
10136 m
->fs
.sp_offset
= (m
->fs
.sp_offset
+ align_bytes
) & -align_bytes
;
10137 m
->fs
.sp_valid
= false;
10140 allocate
= frame
.stack_pointer_offset
- m
->fs
.sp_offset
;
10142 if (flag_stack_usage_info
)
10144 /* We start to count from ARG_POINTER. */
10145 HOST_WIDE_INT stack_size
= frame
.stack_pointer_offset
;
10147 /* If it was realigned, take into account the fake frame. */
10148 if (stack_realign_drap
)
10150 if (ix86_static_chain_on_stack
)
10151 stack_size
+= UNITS_PER_WORD
;
10153 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
10154 stack_size
+= UNITS_PER_WORD
;
10156 /* This over-estimates by 1 minimal-stack-alignment-unit but
10157 mitigates that by counting in the new return address slot. */
10158 current_function_dynamic_stack_size
10159 += crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10162 current_function_static_stack_size
= stack_size
;
10165 /* The stack has already been decremented by the instruction calling us
10166 so probe if the size is non-negative to preserve the protection area. */
10167 if (allocate
>= 0 && flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
10169 /* We expect the registers to be saved when probes are used. */
10170 gcc_assert (int_registers_saved
);
10172 if (STACK_CHECK_MOVING_SP
)
10174 ix86_adjust_stack_and_probe (allocate
);
10179 HOST_WIDE_INT size
= allocate
;
10181 if (TARGET_64BIT
&& size
>= (HOST_WIDE_INT
) 0x80000000)
10182 size
= 0x80000000 - STACK_CHECK_PROTECT
- 1;
10184 if (TARGET_STACK_PROBE
)
10185 ix86_emit_probe_stack_range (0, size
+ STACK_CHECK_PROTECT
);
10187 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT
, size
);
10193 else if (!ix86_target_stack_probe ()
10194 || frame
.stack_pointer_offset
< CHECK_STACK_LIMIT
)
10196 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10197 GEN_INT (-allocate
), -1,
10198 m
->fs
.cfa_reg
== stack_pointer_rtx
);
10202 rtx eax
= gen_rtx_REG (Pmode
, AX_REG
);
10204 rtx (*adjust_stack_insn
)(rtx
, rtx
, rtx
);
10206 bool eax_live
= false;
10207 bool r10_live
= false;
10210 r10_live
= (DECL_STATIC_CHAIN (current_function_decl
) != 0);
10211 if (!TARGET_64BIT_MS_ABI
)
10212 eax_live
= ix86_eax_live_at_start_p ();
10216 emit_insn (gen_push (eax
));
10217 allocate
-= UNITS_PER_WORD
;
10221 r10
= gen_rtx_REG (Pmode
, R10_REG
);
10222 emit_insn (gen_push (r10
));
10223 allocate
-= UNITS_PER_WORD
;
10226 emit_move_insn (eax
, GEN_INT (allocate
));
10227 emit_insn (ix86_gen_allocate_stack_worker (eax
, eax
));
10229 /* Use the fact that AX still contains ALLOCATE. */
10230 adjust_stack_insn
= (TARGET_64BIT
10231 ? gen_pro_epilogue_adjust_stack_di_sub
10232 : gen_pro_epilogue_adjust_stack_si_sub
);
10234 insn
= emit_insn (adjust_stack_insn (stack_pointer_rtx
,
10235 stack_pointer_rtx
, eax
));
10237 /* Note that SEH directives need to continue tracking the stack
10238 pointer even after the frame pointer has been set up. */
10239 if (m
->fs
.cfa_reg
== stack_pointer_rtx
|| TARGET_SEH
)
10241 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10242 m
->fs
.cfa_offset
+= allocate
;
10244 RTX_FRAME_RELATED_P (insn
) = 1;
10245 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
10246 gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10247 plus_constant (stack_pointer_rtx
,
10250 m
->fs
.sp_offset
+= allocate
;
10252 if (r10_live
&& eax_live
)
10254 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
);
10255 emit_move_insn (r10
, gen_frame_mem (Pmode
, t
));
10256 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
- UNITS_PER_WORD
);
10257 emit_move_insn (eax
, gen_frame_mem (Pmode
, t
));
10259 else if (eax_live
|| r10_live
)
10261 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
);
10262 emit_move_insn ((eax_live
? eax
: r10
), gen_frame_mem (Pmode
, t
));
10265 gcc_assert (m
->fs
.sp_offset
== frame
.stack_pointer_offset
);
10267 /* If we havn't already set up the frame pointer, do so now. */
10268 if (frame_pointer_needed
&& !m
->fs
.fp_valid
)
10270 insn
= ix86_gen_add3 (hard_frame_pointer_rtx
, stack_pointer_rtx
,
10271 GEN_INT (frame
.stack_pointer_offset
10272 - frame
.hard_frame_pointer_offset
));
10273 insn
= emit_insn (insn
);
10274 RTX_FRAME_RELATED_P (insn
) = 1;
10275 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, NULL
);
10277 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10278 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10279 m
->fs
.fp_offset
= frame
.hard_frame_pointer_offset
;
10280 m
->fs
.fp_valid
= true;
10283 if (!int_registers_saved
)
10284 ix86_emit_save_regs_using_mov (frame
.reg_save_offset
);
10285 if (frame
.nsseregs
)
10286 ix86_emit_save_sse_regs_using_mov (frame
.sse_reg_save_offset
);
10288 pic_reg_used
= false;
10289 if (pic_offset_table_rtx
10290 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM
)
10293 unsigned int alt_pic_reg_used
= ix86_select_alt_pic_regnum ();
10295 if (alt_pic_reg_used
!= INVALID_REGNUM
)
10296 SET_REGNO (pic_offset_table_rtx
, alt_pic_reg_used
);
10298 pic_reg_used
= true;
10305 if (ix86_cmodel
== CM_LARGE_PIC
)
10307 rtx tmp_reg
= gen_rtx_REG (DImode
, R11_REG
);
10308 rtx label
= gen_label_rtx ();
10309 emit_label (label
);
10310 LABEL_PRESERVE_P (label
) = 1;
10311 gcc_assert (REGNO (pic_offset_table_rtx
) != REGNO (tmp_reg
));
10312 insn
= emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx
, label
));
10313 insn
= emit_insn (gen_set_got_offset_rex64 (tmp_reg
, label
));
10314 insn
= emit_insn (gen_adddi3 (pic_offset_table_rtx
,
10315 pic_offset_table_rtx
, tmp_reg
));
10318 insn
= emit_insn (gen_set_got_rex64 (pic_offset_table_rtx
));
10322 insn
= emit_insn (gen_set_got (pic_offset_table_rtx
));
10323 RTX_FRAME_RELATED_P (insn
) = 1;
10324 add_reg_note (insn
, REG_CFA_FLUSH_QUEUE
, NULL_RTX
);
10328 /* In the pic_reg_used case, make sure that the got load isn't deleted
10329 when mcount needs it. Blockage to avoid call movement across mcount
10330 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
10332 if (crtl
->profile
&& !flag_fentry
&& pic_reg_used
)
10333 emit_insn (gen_prologue_use (pic_offset_table_rtx
));
10335 if (crtl
->drap_reg
&& !crtl
->stack_realign_needed
)
10337 /* vDRAP is setup but after reload it turns out stack realign
10338 isn't necessary, here we will emit prologue to setup DRAP
10339 without stack realign adjustment */
10340 t
= choose_baseaddr (0);
10341 emit_insn (gen_rtx_SET (VOIDmode
, crtl
->drap_reg
, t
));
10344 /* Prevent instructions from being scheduled into register save push
10345 sequence when access to the redzone area is done through frame pointer.
10346 The offset between the frame pointer and the stack pointer is calculated
10347 relative to the value of the stack pointer at the end of the function
10348 prologue, and moving instructions that access redzone area via frame
10349 pointer inside push sequence violates this assumption. */
10350 if (frame_pointer_needed
&& frame
.red_zone_size
)
10351 emit_insn (gen_memory_blockage ());
10353 /* Emit cld instruction if stringops are used in the function. */
10354 if (TARGET_CLD
&& ix86_current_function_needs_cld
)
10355 emit_insn (gen_cld ());
10357 /* SEH requires that the prologue end within 256 bytes of the start of
10358 the function. Prevent instruction schedules that would extend that.
10359 Further, prevent alloca modifications to the stack pointer from being
10360 combined with prologue modifications. */
10362 emit_insn (gen_prologue_use (stack_pointer_rtx
));
10365 /* Emit code to restore REG using a POP insn. */
10368 ix86_emit_restore_reg_using_pop (rtx reg
)
10370 struct machine_function
*m
= cfun
->machine
;
10371 rtx insn
= emit_insn (gen_pop (reg
));
10373 ix86_add_cfa_restore_note (insn
, reg
, m
->fs
.sp_offset
);
10374 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
10376 if (m
->fs
.cfa_reg
== crtl
->drap_reg
10377 && REGNO (reg
) == REGNO (crtl
->drap_reg
))
10379 /* Previously we'd represented the CFA as an expression
10380 like *(%ebp - 8). We've just popped that value from
10381 the stack, which means we need to reset the CFA to
10382 the drap register. This will remain until we restore
10383 the stack pointer. */
10384 add_reg_note (insn
, REG_CFA_DEF_CFA
, reg
);
10385 RTX_FRAME_RELATED_P (insn
) = 1;
10387 /* This means that the DRAP register is valid for addressing too. */
10388 m
->fs
.drap_valid
= true;
10392 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10394 rtx x
= plus_constant (stack_pointer_rtx
, UNITS_PER_WORD
);
10395 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
10396 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, x
);
10397 RTX_FRAME_RELATED_P (insn
) = 1;
10399 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
10402 /* When the frame pointer is the CFA, and we pop it, we are
10403 swapping back to the stack pointer as the CFA. This happens
10404 for stack frames that don't allocate other data, so we assume
10405 the stack pointer is now pointing at the return address, i.e.
10406 the function entry state, which makes the offset be 1 word. */
10407 if (reg
== hard_frame_pointer_rtx
)
10409 m
->fs
.fp_valid
= false;
10410 if (m
->fs
.cfa_reg
== hard_frame_pointer_rtx
)
10412 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10413 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
10415 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10416 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
10417 GEN_INT (m
->fs
.cfa_offset
)));
10418 RTX_FRAME_RELATED_P (insn
) = 1;
10423 /* Emit code to restore saved registers using POP insns. */
10426 ix86_emit_restore_regs_using_pop (void)
10428 unsigned int regno
;
10430 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
10431 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, false))
10432 ix86_emit_restore_reg_using_pop (gen_rtx_REG (Pmode
, regno
));
10435 /* Emit code and notes for the LEAVE instruction. */
10438 ix86_emit_leave (void)
10440 struct machine_function
*m
= cfun
->machine
;
10441 rtx insn
= emit_insn (ix86_gen_leave ());
10443 ix86_add_queued_cfa_restore_notes (insn
);
10445 gcc_assert (m
->fs
.fp_valid
);
10446 m
->fs
.sp_valid
= true;
10447 m
->fs
.sp_offset
= m
->fs
.fp_offset
- UNITS_PER_WORD
;
10448 m
->fs
.fp_valid
= false;
10450 if (m
->fs
.cfa_reg
== hard_frame_pointer_rtx
)
10452 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10453 m
->fs
.cfa_offset
= m
->fs
.sp_offset
;
10455 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10456 plus_constant (stack_pointer_rtx
, m
->fs
.sp_offset
));
10457 RTX_FRAME_RELATED_P (insn
) = 1;
10458 ix86_add_cfa_restore_note (insn
, hard_frame_pointer_rtx
,
10463 /* Emit code to restore saved registers using MOV insns.
10464 First register is restored from CFA - CFA_OFFSET. */
10466 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset
,
10467 bool maybe_eh_return
)
10469 struct machine_function
*m
= cfun
->machine
;
10470 unsigned int regno
;
10472 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
10473 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, maybe_eh_return
))
10475 rtx reg
= gen_rtx_REG (Pmode
, regno
);
10478 mem
= choose_baseaddr (cfa_offset
);
10479 mem
= gen_frame_mem (Pmode
, mem
);
10480 insn
= emit_move_insn (reg
, mem
);
10482 if (m
->fs
.cfa_reg
== crtl
->drap_reg
&& regno
== REGNO (crtl
->drap_reg
))
10484 /* Previously we'd represented the CFA as an expression
10485 like *(%ebp - 8). We've just popped that value from
10486 the stack, which means we need to reset the CFA to
10487 the drap register. This will remain until we restore
10488 the stack pointer. */
10489 add_reg_note (insn
, REG_CFA_DEF_CFA
, reg
);
10490 RTX_FRAME_RELATED_P (insn
) = 1;
10492 /* This means that the DRAP register is valid for addressing. */
10493 m
->fs
.drap_valid
= true;
10496 ix86_add_cfa_restore_note (NULL_RTX
, reg
, cfa_offset
);
10498 cfa_offset
-= UNITS_PER_WORD
;
10502 /* Emit code to restore saved registers using MOV insns.
10503 First register is restored from CFA - CFA_OFFSET. */
10505 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset
,
10506 bool maybe_eh_return
)
10508 unsigned int regno
;
10510 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
10511 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, maybe_eh_return
))
10513 rtx reg
= gen_rtx_REG (V4SFmode
, regno
);
10516 mem
= choose_baseaddr (cfa_offset
);
10517 mem
= gen_rtx_MEM (V4SFmode
, mem
);
10518 set_mem_align (mem
, 128);
10519 emit_move_insn (reg
, mem
);
10521 ix86_add_cfa_restore_note (NULL_RTX
, reg
, cfa_offset
);
10527 /* Restore function stack, frame, and registers. */
10530 ix86_expand_epilogue (int style
)
10532 struct machine_function
*m
= cfun
->machine
;
10533 struct machine_frame_state frame_state_save
= m
->fs
;
10534 struct ix86_frame frame
;
10535 bool restore_regs_via_mov
;
10538 ix86_finalize_stack_realign_flags ();
10539 ix86_compute_frame_layout (&frame
);
10541 m
->fs
.sp_valid
= (!frame_pointer_needed
10542 || (current_function_sp_is_unchanging
10543 && !stack_realign_fp
));
10544 gcc_assert (!m
->fs
.sp_valid
10545 || m
->fs
.sp_offset
== frame
.stack_pointer_offset
);
10547 /* The FP must be valid if the frame pointer is present. */
10548 gcc_assert (frame_pointer_needed
== m
->fs
.fp_valid
);
10549 gcc_assert (!m
->fs
.fp_valid
10550 || m
->fs
.fp_offset
== frame
.hard_frame_pointer_offset
);
10552 /* We must have *some* valid pointer to the stack frame. */
10553 gcc_assert (m
->fs
.sp_valid
|| m
->fs
.fp_valid
);
10555 /* The DRAP is never valid at this point. */
10556 gcc_assert (!m
->fs
.drap_valid
);
10558 /* See the comment about red zone and frame
10559 pointer usage in ix86_expand_prologue. */
10560 if (frame_pointer_needed
&& frame
.red_zone_size
)
10561 emit_insn (gen_memory_blockage ());
10563 using_drap
= crtl
->drap_reg
&& crtl
->stack_realign_needed
;
10564 gcc_assert (!using_drap
|| m
->fs
.cfa_reg
== crtl
->drap_reg
);
10566 /* Determine the CFA offset of the end of the red-zone. */
10567 m
->fs
.red_zone_offset
= 0;
10568 if (ix86_using_red_zone () && crtl
->args
.pops_args
< 65536)
10570 /* The red-zone begins below the return address. */
10571 m
->fs
.red_zone_offset
= RED_ZONE_SIZE
+ UNITS_PER_WORD
;
10573 /* When the register save area is in the aligned portion of
10574 the stack, determine the maximum runtime displacement that
10575 matches up with the aligned frame. */
10576 if (stack_realign_drap
)
10577 m
->fs
.red_zone_offset
-= (crtl
->stack_alignment_needed
/ BITS_PER_UNIT
10581 /* Special care must be taken for the normal return case of a function
10582 using eh_return: the eax and edx registers are marked as saved, but
10583 not restored along this path. Adjust the save location to match. */
10584 if (crtl
->calls_eh_return
&& style
!= 2)
10585 frame
.reg_save_offset
-= 2 * UNITS_PER_WORD
;
10587 /* EH_RETURN requires the use of moves to function properly. */
10588 if (crtl
->calls_eh_return
)
10589 restore_regs_via_mov
= true;
10590 /* SEH requires the use of pops to identify the epilogue. */
10591 else if (TARGET_SEH
)
10592 restore_regs_via_mov
= false;
10593 /* If we're only restoring one register and sp is not valid then
10594 using a move instruction to restore the register since it's
10595 less work than reloading sp and popping the register. */
10596 else if (!m
->fs
.sp_valid
&& frame
.nregs
<= 1)
10597 restore_regs_via_mov
= true;
10598 else if (TARGET_EPILOGUE_USING_MOVE
10599 && cfun
->machine
->use_fast_prologue_epilogue
10600 && (frame
.nregs
> 1
10601 || m
->fs
.sp_offset
!= frame
.reg_save_offset
))
10602 restore_regs_via_mov
= true;
10603 else if (frame_pointer_needed
10605 && m
->fs
.sp_offset
!= frame
.reg_save_offset
)
10606 restore_regs_via_mov
= true;
10607 else if (frame_pointer_needed
10608 && TARGET_USE_LEAVE
10609 && cfun
->machine
->use_fast_prologue_epilogue
10610 && frame
.nregs
== 1)
10611 restore_regs_via_mov
= true;
10613 restore_regs_via_mov
= false;
10615 if (restore_regs_via_mov
|| frame
.nsseregs
)
10617 /* Ensure that the entire register save area is addressable via
10618 the stack pointer, if we will restore via sp. */
10620 && m
->fs
.sp_offset
> 0x7fffffff
10621 && !(m
->fs
.fp_valid
|| m
->fs
.drap_valid
)
10622 && (frame
.nsseregs
+ frame
.nregs
) != 0)
10624 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10625 GEN_INT (m
->fs
.sp_offset
10626 - frame
.sse_reg_save_offset
),
10628 m
->fs
.cfa_reg
== stack_pointer_rtx
);
10632 /* If there are any SSE registers to restore, then we have to do it
10633 via moves, since there's obviously no pop for SSE regs. */
10634 if (frame
.nsseregs
)
10635 ix86_emit_restore_sse_regs_using_mov (frame
.sse_reg_save_offset
,
10638 if (restore_regs_via_mov
)
10643 ix86_emit_restore_regs_using_mov (frame
.reg_save_offset
, style
== 2);
10645 /* eh_return epilogues need %ecx added to the stack pointer. */
10648 rtx insn
, sa
= EH_RETURN_STACKADJ_RTX
;
10650 /* Stack align doesn't work with eh_return. */
10651 gcc_assert (!stack_realign_drap
);
10652 /* Neither does regparm nested functions. */
10653 gcc_assert (!ix86_static_chain_on_stack
);
10655 if (frame_pointer_needed
)
10657 t
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
10658 t
= plus_constant (t
, m
->fs
.fp_offset
- UNITS_PER_WORD
);
10659 emit_insn (gen_rtx_SET (VOIDmode
, sa
, t
));
10661 t
= gen_frame_mem (Pmode
, hard_frame_pointer_rtx
);
10662 insn
= emit_move_insn (hard_frame_pointer_rtx
, t
);
10664 /* Note that we use SA as a temporary CFA, as the return
10665 address is at the proper place relative to it. We
10666 pretend this happens at the FP restore insn because
10667 prior to this insn the FP would be stored at the wrong
10668 offset relative to SA, and after this insn we have no
10669 other reasonable register to use for the CFA. We don't
10670 bother resetting the CFA to the SP for the duration of
10671 the return insn. */
10672 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10673 plus_constant (sa
, UNITS_PER_WORD
));
10674 ix86_add_queued_cfa_restore_notes (insn
);
10675 add_reg_note (insn
, REG_CFA_RESTORE
, hard_frame_pointer_rtx
);
10676 RTX_FRAME_RELATED_P (insn
) = 1;
10678 m
->fs
.cfa_reg
= sa
;
10679 m
->fs
.cfa_offset
= UNITS_PER_WORD
;
10680 m
->fs
.fp_valid
= false;
10682 pro_epilogue_adjust_stack (stack_pointer_rtx
, sa
,
10683 const0_rtx
, style
, false);
10687 t
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
10688 t
= plus_constant (t
, m
->fs
.sp_offset
- UNITS_PER_WORD
);
10689 insn
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
));
10690 ix86_add_queued_cfa_restore_notes (insn
);
10692 gcc_assert (m
->fs
.cfa_reg
== stack_pointer_rtx
);
10693 if (m
->fs
.cfa_offset
!= UNITS_PER_WORD
)
10695 m
->fs
.cfa_offset
= UNITS_PER_WORD
;
10696 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10697 plus_constant (stack_pointer_rtx
,
10699 RTX_FRAME_RELATED_P (insn
) = 1;
10702 m
->fs
.sp_offset
= UNITS_PER_WORD
;
10703 m
->fs
.sp_valid
= true;
10708 /* SEH requires that the function end with (1) a stack adjustment
10709 if necessary, (2) a sequence of pops, and (3) a return or
10710 jump instruction. Prevent insns from the function body from
10711 being scheduled into this sequence. */
10714 /* Prevent a catch region from being adjacent to the standard
10715 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
10716 several other flags that would be interesting to test are
10718 if (flag_non_call_exceptions
)
10719 emit_insn (gen_nops (const1_rtx
));
10721 emit_insn (gen_blockage ());
10724 /* First step is to deallocate the stack frame so that we can
10725 pop the registers. */
10726 if (!m
->fs
.sp_valid
)
10728 pro_epilogue_adjust_stack (stack_pointer_rtx
, hard_frame_pointer_rtx
,
10729 GEN_INT (m
->fs
.fp_offset
10730 - frame
.reg_save_offset
),
10733 else if (m
->fs
.sp_offset
!= frame
.reg_save_offset
)
10735 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10736 GEN_INT (m
->fs
.sp_offset
10737 - frame
.reg_save_offset
),
10739 m
->fs
.cfa_reg
== stack_pointer_rtx
);
10742 ix86_emit_restore_regs_using_pop ();
10745 /* If we used a stack pointer and haven't already got rid of it,
10747 if (m
->fs
.fp_valid
)
10749 /* If the stack pointer is valid and pointing at the frame
10750 pointer store address, then we only need a pop. */
10751 if (m
->fs
.sp_valid
&& m
->fs
.sp_offset
== frame
.hfp_save_offset
)
10752 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx
);
10753 /* Leave results in shorter dependency chains on CPUs that are
10754 able to grok it fast. */
10755 else if (TARGET_USE_LEAVE
10756 || optimize_function_for_size_p (cfun
)
10757 || !cfun
->machine
->use_fast_prologue_epilogue
)
10758 ix86_emit_leave ();
10761 pro_epilogue_adjust_stack (stack_pointer_rtx
,
10762 hard_frame_pointer_rtx
,
10763 const0_rtx
, style
, !using_drap
);
10764 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx
);
10770 int param_ptr_offset
= UNITS_PER_WORD
;
10773 gcc_assert (stack_realign_drap
);
10775 if (ix86_static_chain_on_stack
)
10776 param_ptr_offset
+= UNITS_PER_WORD
;
10777 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
10778 param_ptr_offset
+= UNITS_PER_WORD
;
10780 insn
= emit_insn (gen_rtx_SET
10781 (VOIDmode
, stack_pointer_rtx
,
10782 gen_rtx_PLUS (Pmode
,
10784 GEN_INT (-param_ptr_offset
))));
10785 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10786 m
->fs
.cfa_offset
= param_ptr_offset
;
10787 m
->fs
.sp_offset
= param_ptr_offset
;
10788 m
->fs
.realigned
= false;
10790 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10791 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
10792 GEN_INT (param_ptr_offset
)));
10793 RTX_FRAME_RELATED_P (insn
) = 1;
10795 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
10796 ix86_emit_restore_reg_using_pop (crtl
->drap_reg
);
10799 /* At this point the stack pointer must be valid, and we must have
10800 restored all of the registers. We may not have deallocated the
10801 entire stack frame. We've delayed this until now because it may
10802 be possible to merge the local stack deallocation with the
10803 deallocation forced by ix86_static_chain_on_stack. */
10804 gcc_assert (m
->fs
.sp_valid
);
10805 gcc_assert (!m
->fs
.fp_valid
);
10806 gcc_assert (!m
->fs
.realigned
);
10807 if (m
->fs
.sp_offset
!= UNITS_PER_WORD
)
10809 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10810 GEN_INT (m
->fs
.sp_offset
- UNITS_PER_WORD
),
10814 ix86_add_queued_cfa_restore_notes (get_last_insn ());
10816 /* Sibcall epilogues don't want a return instruction. */
10819 m
->fs
= frame_state_save
;
10823 /* Emit vzeroupper if needed. */
10824 if (TARGET_VZEROUPPER
10825 && !TREE_THIS_VOLATILE (cfun
->decl
)
10826 && !cfun
->machine
->caller_return_avx256_p
)
10827 emit_insn (gen_avx_vzeroupper (GEN_INT (call_no_avx256
)));
10829 if (crtl
->args
.pops_args
&& crtl
->args
.size
)
10831 rtx popc
= GEN_INT (crtl
->args
.pops_args
);
10833 /* i386 can only pop 64K bytes. If asked to pop more, pop return
10834 address, do explicit add, and jump indirectly to the caller. */
10836 if (crtl
->args
.pops_args
>= 65536)
10838 rtx ecx
= gen_rtx_REG (SImode
, CX_REG
);
10841 /* There is no "pascal" calling convention in any 64bit ABI. */
10842 gcc_assert (!TARGET_64BIT
);
10844 insn
= emit_insn (gen_pop (ecx
));
10845 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
10846 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
10848 add_reg_note (insn
, REG_CFA_ADJUST_CFA
,
10849 copy_rtx (XVECEXP (PATTERN (insn
), 0, 1)));
10850 add_reg_note (insn
, REG_CFA_REGISTER
,
10851 gen_rtx_SET (VOIDmode
, ecx
, pc_rtx
));
10852 RTX_FRAME_RELATED_P (insn
) = 1;
10854 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10856 emit_jump_insn (gen_simple_return_indirect_internal (ecx
));
10859 emit_jump_insn (gen_simple_return_pop_internal (popc
));
10862 emit_jump_insn (gen_simple_return_internal ());
10864 /* Restore the state back to the state from the prologue,
10865 so that it's correct for the next epilogue. */
10866 m
->fs
= frame_state_save
;
10869 /* Reset from the function's potential modifications. */
10872 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
10873 HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
10875 if (pic_offset_table_rtx
)
10876 SET_REGNO (pic_offset_table_rtx
, REAL_PIC_OFFSET_TABLE_REGNUM
);
10878 /* Mach-O doesn't support labels at the end of objects, so if
10879 it looks like we might want one, insert a NOP. */
10881 rtx insn
= get_last_insn ();
10884 && NOTE_KIND (insn
) != NOTE_INSN_DELETED_LABEL
)
10885 insn
= PREV_INSN (insn
);
10889 && NOTE_KIND (insn
) == NOTE_INSN_DELETED_LABEL
)))
10890 fputs ("\tnop\n", file
);
10896 /* Return a scratch register to use in the split stack prologue. The
10897 split stack prologue is used for -fsplit-stack. It is the first
10898 instructions in the function, even before the regular prologue.
10899 The scratch register can be any caller-saved register which is not
10900 used for parameters or for the static chain. */
10902 static unsigned int
10903 split_stack_prologue_scratch_regno (void)
10912 is_fastcall
= (lookup_attribute ("fastcall",
10913 TYPE_ATTRIBUTES (TREE_TYPE (cfun
->decl
)))
10915 regparm
= ix86_function_regparm (TREE_TYPE (cfun
->decl
), cfun
->decl
);
10919 if (DECL_STATIC_CHAIN (cfun
->decl
))
10921 sorry ("-fsplit-stack does not support fastcall with "
10922 "nested function");
10923 return INVALID_REGNUM
;
10927 else if (regparm
< 3)
10929 if (!DECL_STATIC_CHAIN (cfun
->decl
))
10935 sorry ("-fsplit-stack does not support 2 register "
10936 " parameters for a nested function");
10937 return INVALID_REGNUM
;
10944 /* FIXME: We could make this work by pushing a register
10945 around the addition and comparison. */
10946 sorry ("-fsplit-stack does not support 3 register parameters");
10947 return INVALID_REGNUM
;
10952 /* A SYMBOL_REF for the function which allocates new stackspace for
10955 static GTY(()) rtx split_stack_fn
;
10957 /* A SYMBOL_REF for the more stack function when using the large
10960 static GTY(()) rtx split_stack_fn_large
;
10962 /* Handle -fsplit-stack. These are the first instructions in the
10963 function, even before the regular prologue. */
10966 ix86_expand_split_stack_prologue (void)
10968 struct ix86_frame frame
;
10969 HOST_WIDE_INT allocate
;
10970 unsigned HOST_WIDE_INT args_size
;
10971 rtx label
, limit
, current
, jump_insn
, allocate_rtx
, call_insn
, call_fusage
;
10972 rtx scratch_reg
= NULL_RTX
;
10973 rtx varargs_label
= NULL_RTX
;
10976 gcc_assert (flag_split_stack
&& reload_completed
);
10978 ix86_finalize_stack_realign_flags ();
10979 ix86_compute_frame_layout (&frame
);
10980 allocate
= frame
.stack_pointer_offset
- INCOMING_FRAME_SP_OFFSET
;
10982 /* This is the label we will branch to if we have enough stack
10983 space. We expect the basic block reordering pass to reverse this
10984 branch if optimizing, so that we branch in the unlikely case. */
10985 label
= gen_label_rtx ();
10987 /* We need to compare the stack pointer minus the frame size with
10988 the stack boundary in the TCB. The stack boundary always gives
10989 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
10990 can compare directly. Otherwise we need to do an addition. */
10992 limit
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
10993 UNSPEC_STACK_CHECK
);
10994 limit
= gen_rtx_CONST (Pmode
, limit
);
10995 limit
= gen_rtx_MEM (Pmode
, limit
);
10996 if (allocate
< SPLIT_STACK_AVAILABLE
)
10997 current
= stack_pointer_rtx
;
11000 unsigned int scratch_regno
;
11003 /* We need a scratch register to hold the stack pointer minus
11004 the required frame size. Since this is the very start of the
11005 function, the scratch register can be any caller-saved
11006 register which is not used for parameters. */
11007 offset
= GEN_INT (- allocate
);
11008 scratch_regno
= split_stack_prologue_scratch_regno ();
11009 if (scratch_regno
== INVALID_REGNUM
)
11011 scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
11012 if (!TARGET_64BIT
|| x86_64_immediate_operand (offset
, Pmode
))
11014 /* We don't use ix86_gen_add3 in this case because it will
11015 want to split to lea, but when not optimizing the insn
11016 will not be split after this point. */
11017 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11018 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11023 emit_move_insn (scratch_reg
, offset
);
11024 emit_insn (gen_adddi3 (scratch_reg
, scratch_reg
,
11025 stack_pointer_rtx
));
11027 current
= scratch_reg
;
11030 ix86_expand_branch (GEU
, current
, limit
, label
);
11031 jump_insn
= get_last_insn ();
11032 JUMP_LABEL (jump_insn
) = label
;
11034 /* Mark the jump as very likely to be taken. */
11035 add_reg_note (jump_insn
, REG_BR_PROB
,
11036 GEN_INT (REG_BR_PROB_BASE
- REG_BR_PROB_BASE
/ 100));
11038 if (split_stack_fn
== NULL_RTX
)
11039 split_stack_fn
= gen_rtx_SYMBOL_REF (Pmode
, "__morestack");
11040 fn
= split_stack_fn
;
11042 /* Get more stack space. We pass in the desired stack space and the
11043 size of the arguments to copy to the new stack. In 32-bit mode
11044 we push the parameters; __morestack will return on a new stack
11045 anyhow. In 64-bit mode we pass the parameters in r10 and
11047 allocate_rtx
= GEN_INT (allocate
);
11048 args_size
= crtl
->args
.size
>= 0 ? crtl
->args
.size
: 0;
11049 call_fusage
= NULL_RTX
;
11054 reg10
= gen_rtx_REG (Pmode
, R10_REG
);
11055 reg11
= gen_rtx_REG (Pmode
, R11_REG
);
11057 /* If this function uses a static chain, it will be in %r10.
11058 Preserve it across the call to __morestack. */
11059 if (DECL_STATIC_CHAIN (cfun
->decl
))
11063 rax
= gen_rtx_REG (Pmode
, AX_REG
);
11064 emit_move_insn (rax
, reg10
);
11065 use_reg (&call_fusage
, rax
);
11068 if (ix86_cmodel
== CM_LARGE
|| ix86_cmodel
== CM_LARGE_PIC
)
11070 HOST_WIDE_INT argval
;
11072 /* When using the large model we need to load the address
11073 into a register, and we've run out of registers. So we
11074 switch to a different calling convention, and we call a
11075 different function: __morestack_large. We pass the
11076 argument size in the upper 32 bits of r10 and pass the
11077 frame size in the lower 32 bits. */
11078 gcc_assert ((allocate
& (HOST_WIDE_INT
) 0xffffffff) == allocate
);
11079 gcc_assert ((args_size
& 0xffffffff) == args_size
);
11081 if (split_stack_fn_large
== NULL_RTX
)
11082 split_stack_fn_large
=
11083 gen_rtx_SYMBOL_REF (Pmode
, "__morestack_large_model");
11085 if (ix86_cmodel
== CM_LARGE_PIC
)
11089 label
= gen_label_rtx ();
11090 emit_label (label
);
11091 LABEL_PRESERVE_P (label
) = 1;
11092 emit_insn (gen_set_rip_rex64 (reg10
, label
));
11093 emit_insn (gen_set_got_offset_rex64 (reg11
, label
));
11094 emit_insn (gen_adddi3 (reg10
, reg10
, reg11
));
11095 x
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, split_stack_fn_large
),
11097 x
= gen_rtx_CONST (Pmode
, x
);
11098 emit_move_insn (reg11
, x
);
11099 x
= gen_rtx_PLUS (Pmode
, reg10
, reg11
);
11100 x
= gen_const_mem (Pmode
, x
);
11101 emit_move_insn (reg11
, x
);
11104 emit_move_insn (reg11
, split_stack_fn_large
);
11108 argval
= ((args_size
<< 16) << 16) + allocate
;
11109 emit_move_insn (reg10
, GEN_INT (argval
));
11113 emit_move_insn (reg10
, allocate_rtx
);
11114 emit_move_insn (reg11
, GEN_INT (args_size
));
11115 use_reg (&call_fusage
, reg11
);
11118 use_reg (&call_fusage
, reg10
);
11122 emit_insn (gen_push (GEN_INT (args_size
)));
11123 emit_insn (gen_push (allocate_rtx
));
11125 call_insn
= ix86_expand_call (NULL_RTX
, gen_rtx_MEM (QImode
, fn
),
11126 GEN_INT (UNITS_PER_WORD
), constm1_rtx
,
11128 add_function_usage_to (call_insn
, call_fusage
);
11130 /* In order to make call/return prediction work right, we now need
11131 to execute a return instruction. See
11132 libgcc/config/i386/morestack.S for the details on how this works.
11134 For flow purposes gcc must not see this as a return
11135 instruction--we need control flow to continue at the subsequent
11136 label. Therefore, we use an unspec. */
11137 gcc_assert (crtl
->args
.pops_args
< 65536);
11138 emit_insn (gen_split_stack_return (GEN_INT (crtl
->args
.pops_args
)));
11140 /* If we are in 64-bit mode and this function uses a static chain,
11141 we saved %r10 in %rax before calling _morestack. */
11142 if (TARGET_64BIT
&& DECL_STATIC_CHAIN (cfun
->decl
))
11143 emit_move_insn (gen_rtx_REG (Pmode
, R10_REG
),
11144 gen_rtx_REG (Pmode
, AX_REG
));
11146 /* If this function calls va_start, we need to store a pointer to
11147 the arguments on the old stack, because they may not have been
11148 all copied to the new stack. At this point the old stack can be
11149 found at the frame pointer value used by __morestack, because
11150 __morestack has set that up before calling back to us. Here we
11151 store that pointer in a scratch register, and in
11152 ix86_expand_prologue we store the scratch register in a stack
11154 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11156 unsigned int scratch_regno
;
11160 scratch_regno
= split_stack_prologue_scratch_regno ();
11161 scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
11162 frame_reg
= gen_rtx_REG (Pmode
, BP_REG
);
11166 return address within this function
11167 return address of caller of this function
11169 So we add three words to get to the stack arguments.
11173 return address within this function
11174 first argument to __morestack
11175 second argument to __morestack
11176 return address of caller of this function
11178 So we add five words to get to the stack arguments.
11180 words
= TARGET_64BIT
? 3 : 5;
11181 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11182 gen_rtx_PLUS (Pmode
, frame_reg
,
11183 GEN_INT (words
* UNITS_PER_WORD
))));
11185 varargs_label
= gen_label_rtx ();
11186 emit_jump_insn (gen_jump (varargs_label
));
11187 JUMP_LABEL (get_last_insn ()) = varargs_label
;
11192 emit_label (label
);
11193 LABEL_NUSES (label
) = 1;
11195 /* If this function calls va_start, we now have to set the scratch
11196 register for the case where we do not call __morestack. In this
11197 case we need to set it based on the stack pointer. */
11198 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11200 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11201 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11202 GEN_INT (UNITS_PER_WORD
))));
11204 emit_label (varargs_label
);
11205 LABEL_NUSES (varargs_label
) = 1;
11209 /* We may have to tell the dataflow pass that the split stack prologue
11210 is initializing a scratch register. */
11213 ix86_live_on_entry (bitmap regs
)
11215 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11217 gcc_assert (flag_split_stack
);
11218 bitmap_set_bit (regs
, split_stack_prologue_scratch_regno ());
11222 /* Determine if op is suitable SUBREG RTX for address. */
11225 ix86_address_subreg_operand (rtx op
)
11227 enum machine_mode mode
;
11232 mode
= GET_MODE (op
);
11234 if (GET_MODE_CLASS (mode
) != MODE_INT
)
11237 /* Don't allow SUBREGs that span more than a word. It can lead to spill
11238 failures when the register is one word out of a two word structure. */
11239 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
11242 /* Allow only SUBREGs of non-eliminable hard registers. */
11243 return register_no_elim_operand (op
, mode
);
11246 /* Extract the parts of an RTL expression that is a valid memory address
11247 for an instruction. Return 0 if the structure of the address is
11248 grossly off. Return -1 if the address contains ASHIFT, so it is not
11249 strictly valid, but still used for computing length of lea instruction. */
11252 ix86_decompose_address (rtx addr
, struct ix86_address
*out
)
11254 rtx base
= NULL_RTX
, index
= NULL_RTX
, disp
= NULL_RTX
;
11255 rtx base_reg
, index_reg
;
11256 HOST_WIDE_INT scale
= 1;
11257 rtx scale_rtx
= NULL_RTX
;
11260 enum ix86_address_seg seg
= SEG_DEFAULT
;
11262 /* Allow zero-extended SImode addresses,
11263 they will be emitted with addr32 prefix. */
11264 if (TARGET_64BIT
&& GET_MODE (addr
) == DImode
)
11266 if (GET_CODE (addr
) == ZERO_EXTEND
11267 && GET_MODE (XEXP (addr
, 0)) == SImode
)
11268 addr
= XEXP (addr
, 0);
11269 else if (GET_CODE (addr
) == AND
11270 && const_32bit_mask (XEXP (addr
, 1), DImode
))
11272 addr
= XEXP (addr
, 0);
11274 /* Strip subreg. */
11275 if (GET_CODE (addr
) == SUBREG
11276 && GET_MODE (SUBREG_REG (addr
)) == SImode
)
11277 addr
= SUBREG_REG (addr
);
11283 else if (GET_CODE (addr
) == SUBREG
)
11285 if (ix86_address_subreg_operand (SUBREG_REG (addr
)))
11290 else if (GET_CODE (addr
) == PLUS
)
11292 rtx addends
[4], op
;
11300 addends
[n
++] = XEXP (op
, 1);
11303 while (GET_CODE (op
) == PLUS
);
11308 for (i
= n
; i
>= 0; --i
)
11311 switch (GET_CODE (op
))
11316 index
= XEXP (op
, 0);
11317 scale_rtx
= XEXP (op
, 1);
11323 index
= XEXP (op
, 0);
11324 tmp
= XEXP (op
, 1);
11325 if (!CONST_INT_P (tmp
))
11327 scale
= INTVAL (tmp
);
11328 if ((unsigned HOST_WIDE_INT
) scale
> 3)
11330 scale
= 1 << scale
;
11334 if (XINT (op
, 1) == UNSPEC_TP
11335 && TARGET_TLS_DIRECT_SEG_REFS
11336 && seg
== SEG_DEFAULT
)
11337 seg
= TARGET_64BIT
? SEG_FS
: SEG_GS
;
11343 if (!ix86_address_subreg_operand (SUBREG_REG (op
)))
11370 else if (GET_CODE (addr
) == MULT
)
11372 index
= XEXP (addr
, 0); /* index*scale */
11373 scale_rtx
= XEXP (addr
, 1);
11375 else if (GET_CODE (addr
) == ASHIFT
)
11377 /* We're called for lea too, which implements ashift on occasion. */
11378 index
= XEXP (addr
, 0);
11379 tmp
= XEXP (addr
, 1);
11380 if (!CONST_INT_P (tmp
))
11382 scale
= INTVAL (tmp
);
11383 if ((unsigned HOST_WIDE_INT
) scale
> 3)
11385 scale
= 1 << scale
;
11389 disp
= addr
; /* displacement */
11395 else if (GET_CODE (index
) == SUBREG
11396 && ix86_address_subreg_operand (SUBREG_REG (index
)))
11402 /* Extract the integral value of scale. */
11405 if (!CONST_INT_P (scale_rtx
))
11407 scale
= INTVAL (scale_rtx
);
11410 base_reg
= base
&& GET_CODE (base
) == SUBREG
? SUBREG_REG (base
) : base
;
11411 index_reg
= index
&& GET_CODE (index
) == SUBREG
? SUBREG_REG (index
) : index
;
11413 /* Avoid useless 0 displacement. */
11414 if (disp
== const0_rtx
&& (base
|| index
))
11417 /* Allow arg pointer and stack pointer as index if there is not scaling. */
11418 if (base_reg
&& index_reg
&& scale
== 1
11419 && (index_reg
== arg_pointer_rtx
11420 || index_reg
== frame_pointer_rtx
11421 || (REG_P (index_reg
) && REGNO (index_reg
) == STACK_POINTER_REGNUM
)))
11424 tmp
= base
, base
= index
, index
= tmp
;
11425 tmp
= base_reg
, base_reg
= index_reg
, index_reg
= tmp
;
11428 /* Special case: %ebp cannot be encoded as a base without a displacement.
11432 && (base_reg
== hard_frame_pointer_rtx
11433 || base_reg
== frame_pointer_rtx
11434 || base_reg
== arg_pointer_rtx
11435 || (REG_P (base_reg
)
11436 && (REGNO (base_reg
) == HARD_FRAME_POINTER_REGNUM
11437 || REGNO (base_reg
) == R13_REG
))))
11440 /* Special case: on K6, [%esi] makes the instruction vector decoded.
11441 Avoid this by transforming to [%esi+0].
11442 Reload calls address legitimization without cfun defined, so we need
11443 to test cfun for being non-NULL. */
11444 if (TARGET_K6
&& cfun
&& optimize_function_for_speed_p (cfun
)
11445 && base_reg
&& !index_reg
&& !disp
11446 && REG_P (base_reg
) && REGNO (base_reg
) == SI_REG
)
11449 /* Special case: encode reg+reg instead of reg*2. */
11450 if (!base
&& index
&& scale
== 2)
11451 base
= index
, base_reg
= index_reg
, scale
= 1;
11453 /* Special case: scaling cannot be encoded without base or displacement. */
11454 if (!base
&& !disp
&& index
&& scale
!= 1)
11458 out
->index
= index
;
11460 out
->scale
= scale
;
11466 /* Return cost of the memory address x.
11467 For i386, it is better to use a complex address than let gcc copy
11468 the address into a reg and make a new pseudo. But not if the address
11469 requires to two regs - that would mean more pseudos with longer
11472 ix86_address_cost (rtx x
, bool speed ATTRIBUTE_UNUSED
)
11474 struct ix86_address parts
;
11476 int ok
= ix86_decompose_address (x
, &parts
);
11480 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
11481 parts
.base
= SUBREG_REG (parts
.base
);
11482 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
11483 parts
.index
= SUBREG_REG (parts
.index
);
11485 /* Attempt to minimize number of registers in the address. */
11487 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
))
11489 && (!REG_P (parts
.index
)
11490 || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)))
11494 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
11496 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
11497 && parts
.base
!= parts
.index
)
11500 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
11501 since it's predecode logic can't detect the length of instructions
11502 and it degenerates to vector decoded. Increase cost of such
11503 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
11504 to split such addresses or even refuse such addresses at all.
11506 Following addressing modes are affected:
11511 The first and last case may be avoidable by explicitly coding the zero in
11512 memory address, but I don't have AMD-K6 machine handy to check this
11516 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
11517 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
11518 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
11524 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
11525 this is used for to form addresses to local data when -fPIC is in
11529 darwin_local_data_pic (rtx disp
)
11531 return (GET_CODE (disp
) == UNSPEC
11532 && XINT (disp
, 1) == UNSPEC_MACHOPIC_OFFSET
);
11535 /* Determine if a given RTX is a valid constant. We already know this
11536 satisfies CONSTANT_P. */
11539 ix86_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
11541 switch (GET_CODE (x
))
11546 if (GET_CODE (x
) == PLUS
)
11548 if (!CONST_INT_P (XEXP (x
, 1)))
11553 if (TARGET_MACHO
&& darwin_local_data_pic (x
))
11556 /* Only some unspecs are valid as "constants". */
11557 if (GET_CODE (x
) == UNSPEC
)
11558 switch (XINT (x
, 1))
11561 case UNSPEC_GOTOFF
:
11562 case UNSPEC_PLTOFF
:
11563 return TARGET_64BIT
;
11565 case UNSPEC_NTPOFF
:
11566 x
= XVECEXP (x
, 0, 0);
11567 return (GET_CODE (x
) == SYMBOL_REF
11568 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
11569 case UNSPEC_DTPOFF
:
11570 x
= XVECEXP (x
, 0, 0);
11571 return (GET_CODE (x
) == SYMBOL_REF
11572 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
);
11577 /* We must have drilled down to a symbol. */
11578 if (GET_CODE (x
) == LABEL_REF
)
11580 if (GET_CODE (x
) != SYMBOL_REF
)
11585 /* TLS symbols are never valid. */
11586 if (SYMBOL_REF_TLS_MODEL (x
))
11589 /* DLLIMPORT symbols are never valid. */
11590 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
11591 && SYMBOL_REF_DLLIMPORT_P (x
))
11595 /* mdynamic-no-pic */
11596 if (MACHO_DYNAMIC_NO_PIC_P
)
11597 return machopic_symbol_defined_p (x
);
11602 if (GET_MODE (x
) == TImode
11603 && x
!= CONST0_RTX (TImode
)
11609 if (!standard_sse_constant_p (x
))
11616 /* Otherwise we handle everything else in the move patterns. */
11620 /* Determine if it's legal to put X into the constant pool. This
11621 is not possible for the address of thread-local symbols, which
11622 is checked above. */
11625 ix86_cannot_force_const_mem (enum machine_mode mode
, rtx x
)
11627 /* We can always put integral constants and vectors in memory. */
11628 switch (GET_CODE (x
))
11638 return !ix86_legitimate_constant_p (mode
, x
);
11642 /* Nonzero if the constant value X is a legitimate general operand
11643 when generating PIC code. It is given that flag_pic is on and
11644 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
11647 legitimate_pic_operand_p (rtx x
)
11651 switch (GET_CODE (x
))
11654 inner
= XEXP (x
, 0);
11655 if (GET_CODE (inner
) == PLUS
11656 && CONST_INT_P (XEXP (inner
, 1)))
11657 inner
= XEXP (inner
, 0);
11659 /* Only some unspecs are valid as "constants". */
11660 if (GET_CODE (inner
) == UNSPEC
)
11661 switch (XINT (inner
, 1))
11664 case UNSPEC_GOTOFF
:
11665 case UNSPEC_PLTOFF
:
11666 return TARGET_64BIT
;
11668 x
= XVECEXP (inner
, 0, 0);
11669 return (GET_CODE (x
) == SYMBOL_REF
11670 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
11671 case UNSPEC_MACHOPIC_OFFSET
:
11672 return legitimate_pic_address_disp_p (x
);
11680 return legitimate_pic_address_disp_p (x
);
11687 /* Determine if a given CONST RTX is a valid memory displacement
11691 legitimate_pic_address_disp_p (rtx disp
)
11695 /* In 64bit mode we can allow direct addresses of symbols and labels
11696 when they are not dynamic symbols. */
11699 rtx op0
= disp
, op1
;
11701 switch (GET_CODE (disp
))
11707 if (GET_CODE (XEXP (disp
, 0)) != PLUS
)
11709 op0
= XEXP (XEXP (disp
, 0), 0);
11710 op1
= XEXP (XEXP (disp
, 0), 1);
11711 if (!CONST_INT_P (op1
)
11712 || INTVAL (op1
) >= 16*1024*1024
11713 || INTVAL (op1
) < -16*1024*1024)
11715 if (GET_CODE (op0
) == LABEL_REF
)
11717 if (GET_CODE (op0
) != SYMBOL_REF
)
11722 /* TLS references should always be enclosed in UNSPEC. */
11723 if (SYMBOL_REF_TLS_MODEL (op0
))
11725 if (!SYMBOL_REF_FAR_ADDR_P (op0
) && SYMBOL_REF_LOCAL_P (op0
)
11726 && ix86_cmodel
!= CM_LARGE_PIC
)
11734 if (GET_CODE (disp
) != CONST
)
11736 disp
= XEXP (disp
, 0);
11740 /* We are unsafe to allow PLUS expressions. This limit allowed distance
11741 of GOT tables. We should not need these anyway. */
11742 if (GET_CODE (disp
) != UNSPEC
11743 || (XINT (disp
, 1) != UNSPEC_GOTPCREL
11744 && XINT (disp
, 1) != UNSPEC_GOTOFF
11745 && XINT (disp
, 1) != UNSPEC_PCREL
11746 && XINT (disp
, 1) != UNSPEC_PLTOFF
))
11749 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
11750 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
11756 if (GET_CODE (disp
) == PLUS
)
11758 if (!CONST_INT_P (XEXP (disp
, 1)))
11760 disp
= XEXP (disp
, 0);
11764 if (TARGET_MACHO
&& darwin_local_data_pic (disp
))
11767 if (GET_CODE (disp
) != UNSPEC
)
11770 switch (XINT (disp
, 1))
11775 /* We need to check for both symbols and labels because VxWorks loads
11776 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
11778 return (GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
11779 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
);
11780 case UNSPEC_GOTOFF
:
11781 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
11782 While ABI specify also 32bit relocation but we don't produce it in
11783 small PIC model at all. */
11784 if ((GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
11785 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
)
11787 return gotoff_operand (XVECEXP (disp
, 0, 0), Pmode
);
11789 case UNSPEC_GOTTPOFF
:
11790 case UNSPEC_GOTNTPOFF
:
11791 case UNSPEC_INDNTPOFF
:
11794 disp
= XVECEXP (disp
, 0, 0);
11795 return (GET_CODE (disp
) == SYMBOL_REF
11796 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_INITIAL_EXEC
);
11797 case UNSPEC_NTPOFF
:
11798 disp
= XVECEXP (disp
, 0, 0);
11799 return (GET_CODE (disp
) == SYMBOL_REF
11800 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_EXEC
);
11801 case UNSPEC_DTPOFF
:
11802 disp
= XVECEXP (disp
, 0, 0);
11803 return (GET_CODE (disp
) == SYMBOL_REF
11804 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_DYNAMIC
);
11810 /* Recognizes RTL expressions that are valid memory addresses for an
11811 instruction. The MODE argument is the machine mode for the MEM
11812 expression that wants to use this address.
11814 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
11815 convert common non-canonical forms to canonical form so that they will
11819 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED
,
11820 rtx addr
, bool strict
)
11822 struct ix86_address parts
;
11823 rtx base
, index
, disp
;
11824 HOST_WIDE_INT scale
;
11826 if (ix86_decompose_address (addr
, &parts
) <= 0)
11827 /* Decomposition failed. */
11831 index
= parts
.index
;
11833 scale
= parts
.scale
;
11835 /* Validate base register. */
11842 else if (GET_CODE (base
) == SUBREG
&& REG_P (SUBREG_REG (base
)))
11843 reg
= SUBREG_REG (base
);
11845 /* Base is not a register. */
11848 if (GET_MODE (base
) != SImode
&& GET_MODE (base
) != DImode
)
11851 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (reg
))
11852 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (reg
)))
11853 /* Base is not valid. */
11857 /* Validate index register. */
11864 else if (GET_CODE (index
) == SUBREG
&& REG_P (SUBREG_REG (index
)))
11865 reg
= SUBREG_REG (index
);
11867 /* Index is not a register. */
11870 if (GET_MODE (index
) != SImode
&& GET_MODE (index
) != DImode
)
11873 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (reg
))
11874 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (reg
)))
11875 /* Index is not valid. */
11879 /* Index and base should have the same mode. */
11881 && GET_MODE (base
) != GET_MODE (index
))
11884 /* Validate scale factor. */
11888 /* Scale without index. */
11891 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
11892 /* Scale is not a valid multiplier. */
11896 /* Validate displacement. */
11899 if (GET_CODE (disp
) == CONST
11900 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
11901 && XINT (XEXP (disp
, 0), 1) != UNSPEC_MACHOPIC_OFFSET
)
11902 switch (XINT (XEXP (disp
, 0), 1))
11904 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
11905 used. While ABI specify also 32bit relocations, we don't produce
11906 them at all and use IP relative instead. */
11908 case UNSPEC_GOTOFF
:
11909 gcc_assert (flag_pic
);
11911 goto is_legitimate_pic
;
11913 /* 64bit address unspec. */
11916 case UNSPEC_GOTPCREL
:
11918 gcc_assert (flag_pic
);
11919 goto is_legitimate_pic
;
11921 case UNSPEC_GOTTPOFF
:
11922 case UNSPEC_GOTNTPOFF
:
11923 case UNSPEC_INDNTPOFF
:
11924 case UNSPEC_NTPOFF
:
11925 case UNSPEC_DTPOFF
:
11928 case UNSPEC_STACK_CHECK
:
11929 gcc_assert (flag_split_stack
);
11933 /* Invalid address unspec. */
11937 else if (SYMBOLIC_CONST (disp
)
11941 && MACHOPIC_INDIRECT
11942 && !machopic_operand_p (disp
)
11948 if (TARGET_64BIT
&& (index
|| base
))
11950 /* foo@dtpoff(%rX) is ok. */
11951 if (GET_CODE (disp
) != CONST
11952 || GET_CODE (XEXP (disp
, 0)) != PLUS
11953 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) != UNSPEC
11954 || !CONST_INT_P (XEXP (XEXP (disp
, 0), 1))
11955 || (XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_DTPOFF
11956 && XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_NTPOFF
))
11957 /* Non-constant pic memory reference. */
11960 else if ((!TARGET_MACHO
|| flag_pic
)
11961 && ! legitimate_pic_address_disp_p (disp
))
11962 /* Displacement is an invalid pic construct. */
11965 else if (MACHO_DYNAMIC_NO_PIC_P
11966 && !ix86_legitimate_constant_p (Pmode
, disp
))
11967 /* displacment must be referenced via non_lazy_pointer */
11971 /* This code used to verify that a symbolic pic displacement
11972 includes the pic_offset_table_rtx register.
11974 While this is good idea, unfortunately these constructs may
11975 be created by "adds using lea" optimization for incorrect
11984 This code is nonsensical, but results in addressing
11985 GOT table with pic_offset_table_rtx base. We can't
11986 just refuse it easily, since it gets matched by
11987 "addsi3" pattern, that later gets split to lea in the
11988 case output register differs from input. While this
11989 can be handled by separate addsi pattern for this case
11990 that never results in lea, this seems to be easier and
11991 correct fix for crash to disable this test. */
11993 else if (GET_CODE (disp
) != LABEL_REF
11994 && !CONST_INT_P (disp
)
11995 && (GET_CODE (disp
) != CONST
11996 || !ix86_legitimate_constant_p (Pmode
, disp
))
11997 && (GET_CODE (disp
) != SYMBOL_REF
11998 || !ix86_legitimate_constant_p (Pmode
, disp
)))
11999 /* Displacement is not constant. */
12001 else if (TARGET_64BIT
12002 && !x86_64_immediate_operand (disp
, VOIDmode
))
12003 /* Displacement is out of range. */
12007 /* Everything looks valid. */
12011 /* Determine if a given RTX is a valid constant address. */
12014 constant_address_p (rtx x
)
12016 return CONSTANT_P (x
) && ix86_legitimate_address_p (Pmode
, x
, 1);
12019 /* Return a unique alias set for the GOT. */
12021 static alias_set_type
12022 ix86_GOT_alias_set (void)
12024 static alias_set_type set
= -1;
12026 set
= new_alias_set ();
12030 /* Return a legitimate reference for ORIG (an address) using the
12031 register REG. If REG is 0, a new pseudo is generated.
12033 There are two types of references that must be handled:
12035 1. Global data references must load the address from the GOT, via
12036 the PIC reg. An insn is emitted to do this load, and the reg is
12039 2. Static data references, constant pool addresses, and code labels
12040 compute the address as an offset from the GOT, whose base is in
12041 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
12042 differentiate them from global data objects. The returned
12043 address is the PIC reg + an unspec constant.
12045 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
12046 reg also appears in the address. */
12049 legitimize_pic_address (rtx orig
, rtx reg
)
12052 rtx new_rtx
= orig
;
12056 if (TARGET_MACHO
&& !TARGET_64BIT
)
12059 reg
= gen_reg_rtx (Pmode
);
12060 /* Use the generic Mach-O PIC machinery. */
12061 return machopic_legitimize_pic_address (orig
, GET_MODE (orig
), reg
);
12065 if (TARGET_64BIT
&& legitimate_pic_address_disp_p (addr
))
12067 else if (TARGET_64BIT
12068 && ix86_cmodel
!= CM_SMALL_PIC
12069 && gotoff_operand (addr
, Pmode
))
12072 /* This symbol may be referenced via a displacement from the PIC
12073 base address (@GOTOFF). */
12075 if (reload_in_progress
)
12076 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12077 if (GET_CODE (addr
) == CONST
)
12078 addr
= XEXP (addr
, 0);
12079 if (GET_CODE (addr
) == PLUS
)
12081 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
12083 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, XEXP (addr
, 1));
12086 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
12087 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12089 tmpreg
= gen_reg_rtx (Pmode
);
12092 emit_move_insn (tmpreg
, new_rtx
);
12096 new_rtx
= expand_simple_binop (Pmode
, PLUS
, reg
, pic_offset_table_rtx
,
12097 tmpreg
, 1, OPTAB_DIRECT
);
12100 else new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, tmpreg
);
12102 else if (!TARGET_64BIT
&& gotoff_operand (addr
, Pmode
))
12104 /* This symbol may be referenced via a displacement from the PIC
12105 base address (@GOTOFF). */
12107 if (reload_in_progress
)
12108 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12109 if (GET_CODE (addr
) == CONST
)
12110 addr
= XEXP (addr
, 0);
12111 if (GET_CODE (addr
) == PLUS
)
12113 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
12115 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, XEXP (addr
, 1));
12118 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
12119 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12120 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
12124 emit_move_insn (reg
, new_rtx
);
12128 else if ((GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (addr
) == 0)
12129 /* We can't use @GOTOFF for text labels on VxWorks;
12130 see gotoff_operand. */
12131 || (TARGET_VXWORKS_RTP
&& GET_CODE (addr
) == LABEL_REF
))
12133 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
12135 if (GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_DLLIMPORT_P (addr
))
12136 return legitimize_dllimport_symbol (addr
, true);
12137 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
12138 && GET_CODE (XEXP (XEXP (addr
, 0), 0)) == SYMBOL_REF
12139 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr
, 0), 0)))
12141 rtx t
= legitimize_dllimport_symbol (XEXP (XEXP (addr
, 0), 0), true);
12142 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (addr
, 0), 1));
12146 /* For x64 PE-COFF there is no GOT table. So we use address
12148 if (TARGET_64BIT
&& DEFAULT_ABI
== MS_ABI
)
12150 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_PCREL
);
12151 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12154 reg
= gen_reg_rtx (Pmode
);
12155 emit_move_insn (reg
, new_rtx
);
12158 else if (TARGET_64BIT
&& ix86_cmodel
!= CM_LARGE_PIC
)
12160 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTPCREL
);
12161 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12162 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
12163 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
12166 reg
= gen_reg_rtx (Pmode
);
12167 /* Use directly gen_movsi, otherwise the address is loaded
12168 into register for CSE. We don't want to CSE this addresses,
12169 instead we CSE addresses from the GOT table, so skip this. */
12170 emit_insn (gen_movsi (reg
, new_rtx
));
12175 /* This symbol must be referenced via a load from the
12176 Global Offset Table (@GOT). */
12178 if (reload_in_progress
)
12179 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12180 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
12181 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12183 new_rtx
= force_reg (Pmode
, new_rtx
);
12184 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
12185 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
12186 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
12189 reg
= gen_reg_rtx (Pmode
);
12190 emit_move_insn (reg
, new_rtx
);
12196 if (CONST_INT_P (addr
)
12197 && !x86_64_immediate_operand (addr
, VOIDmode
))
12201 emit_move_insn (reg
, addr
);
12205 new_rtx
= force_reg (Pmode
, addr
);
12207 else if (GET_CODE (addr
) == CONST
)
12209 addr
= XEXP (addr
, 0);
12211 /* We must match stuff we generate before. Assume the only
12212 unspecs that can get here are ours. Not that we could do
12213 anything with them anyway.... */
12214 if (GET_CODE (addr
) == UNSPEC
12215 || (GET_CODE (addr
) == PLUS
12216 && GET_CODE (XEXP (addr
, 0)) == UNSPEC
))
12218 gcc_assert (GET_CODE (addr
) == PLUS
);
12220 if (GET_CODE (addr
) == PLUS
)
12222 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
12224 /* Check first to see if this is a constant offset from a @GOTOFF
12225 symbol reference. */
12226 if (gotoff_operand (op0
, Pmode
)
12227 && CONST_INT_P (op1
))
12231 if (reload_in_progress
)
12232 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12233 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
),
12235 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, op1
);
12236 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12237 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
12241 emit_move_insn (reg
, new_rtx
);
12247 if (INTVAL (op1
) < -16*1024*1024
12248 || INTVAL (op1
) >= 16*1024*1024)
12250 if (!x86_64_immediate_operand (op1
, Pmode
))
12251 op1
= force_reg (Pmode
, op1
);
12252 new_rtx
= gen_rtx_PLUS (Pmode
, force_reg (Pmode
, op0
), op1
);
12258 base
= legitimize_pic_address (XEXP (addr
, 0), reg
);
12259 new_rtx
= legitimize_pic_address (XEXP (addr
, 1),
12260 base
== reg
? NULL_RTX
: reg
);
12262 if (CONST_INT_P (new_rtx
))
12263 new_rtx
= plus_constant (base
, INTVAL (new_rtx
));
12266 if (GET_CODE (new_rtx
) == PLUS
&& CONSTANT_P (XEXP (new_rtx
, 1)))
12268 base
= gen_rtx_PLUS (Pmode
, base
, XEXP (new_rtx
, 0));
12269 new_rtx
= XEXP (new_rtx
, 1);
12271 new_rtx
= gen_rtx_PLUS (Pmode
, base
, new_rtx
);
12279 /* Load the thread pointer. If TO_REG is true, force it into a register. */
12282 get_thread_pointer (bool to_reg
)
12284 rtx tp
= gen_rtx_UNSPEC (ptr_mode
, gen_rtvec (1, const0_rtx
), UNSPEC_TP
);
12286 if (GET_MODE (tp
) != Pmode
)
12287 tp
= convert_to_mode (Pmode
, tp
, 1);
12290 tp
= copy_addr_to_reg (tp
);
12295 /* Construct the SYMBOL_REF for the tls_get_addr function. */
12297 static GTY(()) rtx ix86_tls_symbol
;
12300 ix86_tls_get_addr (void)
12302 if (!ix86_tls_symbol
)
12305 = ((TARGET_ANY_GNU_TLS
&& !TARGET_64BIT
)
12306 ? "___tls_get_addr" : "__tls_get_addr");
12308 ix86_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
, sym
);
12311 return ix86_tls_symbol
;
12314 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
12316 static GTY(()) rtx ix86_tls_module_base_symbol
;
12319 ix86_tls_module_base (void)
12321 if (!ix86_tls_module_base_symbol
)
12323 ix86_tls_module_base_symbol
12324 = gen_rtx_SYMBOL_REF (Pmode
, "_TLS_MODULE_BASE_");
12326 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol
)
12327 |= TLS_MODEL_GLOBAL_DYNAMIC
<< SYMBOL_FLAG_TLS_SHIFT
;
12330 return ix86_tls_module_base_symbol
;
12333 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
12334 false if we expect this to be used for a memory address and true if
12335 we expect to load the address into a register. */
12338 legitimize_tls_address (rtx x
, enum tls_model model
, bool for_mov
)
12340 rtx dest
, base
, off
;
12341 rtx pic
= NULL_RTX
, tp
= NULL_RTX
;
12346 case TLS_MODEL_GLOBAL_DYNAMIC
:
12347 dest
= gen_reg_rtx (Pmode
);
12352 pic
= pic_offset_table_rtx
;
12355 pic
= gen_reg_rtx (Pmode
);
12356 emit_insn (gen_set_got (pic
));
12360 if (TARGET_GNU2_TLS
)
12363 emit_insn (gen_tls_dynamic_gnu2_64 (dest
, x
));
12365 emit_insn (gen_tls_dynamic_gnu2_32 (dest
, x
, pic
));
12367 tp
= get_thread_pointer (true);
12368 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, tp
, dest
));
12370 set_unique_reg_note (get_last_insn (), REG_EQUAL
, x
);
12374 rtx caddr
= ix86_tls_get_addr ();
12378 rtx rax
= gen_rtx_REG (Pmode
, AX_REG
), insns
;
12381 emit_call_insn (gen_tls_global_dynamic_64 (rax
, x
, caddr
));
12382 insns
= get_insns ();
12385 RTL_CONST_CALL_P (insns
) = 1;
12386 emit_libcall_block (insns
, dest
, rax
, x
);
12389 emit_insn (gen_tls_global_dynamic_32 (dest
, x
, pic
, caddr
));
12393 case TLS_MODEL_LOCAL_DYNAMIC
:
12394 base
= gen_reg_rtx (Pmode
);
12399 pic
= pic_offset_table_rtx
;
12402 pic
= gen_reg_rtx (Pmode
);
12403 emit_insn (gen_set_got (pic
));
12407 if (TARGET_GNU2_TLS
)
12409 rtx tmp
= ix86_tls_module_base ();
12412 emit_insn (gen_tls_dynamic_gnu2_64 (base
, tmp
));
12414 emit_insn (gen_tls_dynamic_gnu2_32 (base
, tmp
, pic
));
12416 tp
= get_thread_pointer (true);
12417 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
12418 gen_rtx_MINUS (Pmode
, tmp
, tp
));
12422 rtx caddr
= ix86_tls_get_addr ();
12426 rtx rax
= gen_rtx_REG (Pmode
, AX_REG
), insns
, eqv
;
12429 emit_call_insn (gen_tls_local_dynamic_base_64 (rax
, caddr
));
12430 insns
= get_insns ();
12433 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
12434 share the LD_BASE result with other LD model accesses. */
12435 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
12436 UNSPEC_TLS_LD_BASE
);
12438 RTL_CONST_CALL_P (insns
) = 1;
12439 emit_libcall_block (insns
, base
, rax
, eqv
);
12442 emit_insn (gen_tls_local_dynamic_base_32 (base
, pic
, caddr
));
12445 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_DTPOFF
);
12446 off
= gen_rtx_CONST (Pmode
, off
);
12448 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, base
, off
));
12450 if (TARGET_GNU2_TLS
)
12452 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, dest
, tp
));
12454 set_unique_reg_note (get_last_insn (), REG_EQUAL
, x
);
12458 case TLS_MODEL_INITIAL_EXEC
:
12461 if (TARGET_SUN_TLS
)
12463 /* The Sun linker took the AMD64 TLS spec literally
12464 and can only handle %rax as destination of the
12465 initial executable code sequence. */
12467 dest
= gen_reg_rtx (Pmode
);
12468 emit_insn (gen_tls_initial_exec_64_sun (dest
, x
));
12473 type
= UNSPEC_GOTNTPOFF
;
12477 if (reload_in_progress
)
12478 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12479 pic
= pic_offset_table_rtx
;
12480 type
= TARGET_ANY_GNU_TLS
? UNSPEC_GOTNTPOFF
: UNSPEC_GOTTPOFF
;
12482 else if (!TARGET_ANY_GNU_TLS
)
12484 pic
= gen_reg_rtx (Pmode
);
12485 emit_insn (gen_set_got (pic
));
12486 type
= UNSPEC_GOTTPOFF
;
12491 type
= UNSPEC_INDNTPOFF
;
12494 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), type
);
12495 off
= gen_rtx_CONST (Pmode
, off
);
12497 off
= gen_rtx_PLUS (Pmode
, pic
, off
);
12498 off
= gen_const_mem (Pmode
, off
);
12499 set_mem_alias_set (off
, ix86_GOT_alias_set ());
12501 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
12503 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
12504 off
= force_reg (Pmode
, off
);
12505 return gen_rtx_PLUS (Pmode
, base
, off
);
12509 base
= get_thread_pointer (true);
12510 dest
= gen_reg_rtx (Pmode
);
12511 emit_insn (gen_subsi3 (dest
, base
, off
));
12515 case TLS_MODEL_LOCAL_EXEC
:
12516 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
12517 (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
12518 ? UNSPEC_NTPOFF
: UNSPEC_TPOFF
);
12519 off
= gen_rtx_CONST (Pmode
, off
);
12521 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
12523 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
12524 return gen_rtx_PLUS (Pmode
, base
, off
);
12528 base
= get_thread_pointer (true);
12529 dest
= gen_reg_rtx (Pmode
);
12530 emit_insn (gen_subsi3 (dest
, base
, off
));
12535 gcc_unreachable ();
12541 /* Create or return the unique __imp_DECL dllimport symbol corresponding
12544 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map
)))
12545 htab_t dllimport_map
;
12548 get_dllimport_decl (tree decl
)
12550 struct tree_map
*h
, in
;
12553 const char *prefix
;
12554 size_t namelen
, prefixlen
;
12559 if (!dllimport_map
)
12560 dllimport_map
= htab_create_ggc (512, tree_map_hash
, tree_map_eq
, 0);
12562 in
.hash
= htab_hash_pointer (decl
);
12563 in
.base
.from
= decl
;
12564 loc
= htab_find_slot_with_hash (dllimport_map
, &in
, in
.hash
, INSERT
);
12565 h
= (struct tree_map
*) *loc
;
12569 *loc
= h
= ggc_alloc_tree_map ();
12571 h
->base
.from
= decl
;
12572 h
->to
= to
= build_decl (DECL_SOURCE_LOCATION (decl
),
12573 VAR_DECL
, NULL
, ptr_type_node
);
12574 DECL_ARTIFICIAL (to
) = 1;
12575 DECL_IGNORED_P (to
) = 1;
12576 DECL_EXTERNAL (to
) = 1;
12577 TREE_READONLY (to
) = 1;
12579 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
12580 name
= targetm
.strip_name_encoding (name
);
12581 prefix
= name
[0] == FASTCALL_PREFIX
|| user_label_prefix
[0] == 0
12582 ? "*__imp_" : "*__imp__";
12583 namelen
= strlen (name
);
12584 prefixlen
= strlen (prefix
);
12585 imp_name
= (char *) alloca (namelen
+ prefixlen
+ 1);
12586 memcpy (imp_name
, prefix
, prefixlen
);
12587 memcpy (imp_name
+ prefixlen
, name
, namelen
+ 1);
12589 name
= ggc_alloc_string (imp_name
, namelen
+ prefixlen
);
12590 rtl
= gen_rtx_SYMBOL_REF (Pmode
, name
);
12591 SET_SYMBOL_REF_DECL (rtl
, to
);
12592 SYMBOL_REF_FLAGS (rtl
) = SYMBOL_FLAG_LOCAL
;
12594 rtl
= gen_const_mem (Pmode
, rtl
);
12595 set_mem_alias_set (rtl
, ix86_GOT_alias_set ());
12597 SET_DECL_RTL (to
, rtl
);
12598 SET_DECL_ASSEMBLER_NAME (to
, get_identifier (name
));
12603 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
12604 true if we require the result be a register. */
12607 legitimize_dllimport_symbol (rtx symbol
, bool want_reg
)
12612 gcc_assert (SYMBOL_REF_DECL (symbol
));
12613 imp_decl
= get_dllimport_decl (SYMBOL_REF_DECL (symbol
));
12615 x
= DECL_RTL (imp_decl
);
12617 x
= force_reg (Pmode
, x
);
12621 /* Try machine-dependent ways of modifying an illegitimate address
12622 to be legitimate. If we find one, return the new, valid address.
12623 This macro is used in only one place: `memory_address' in explow.c.
12625 OLDX is the address as it was before break_out_memory_refs was called.
12626 In some cases it is useful to look at this to decide what needs to be done.
12628 It is always safe for this macro to do nothing. It exists to recognize
12629 opportunities to optimize the output.
12631 For the 80386, we handle X+REG by loading X into a register R and
12632 using R+REG. R will go in a general reg and indexing will be used.
12633 However, if REG is a broken-out memory address or multiplication,
12634 nothing needs to be done because REG can certainly go in a general reg.
12636 When -fpic is used, special handling is needed for symbolic references.
12637 See comments by legitimize_pic_address in i386.c for details. */
12640 ix86_legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
,
12641 enum machine_mode mode
)
12646 log
= GET_CODE (x
) == SYMBOL_REF
? SYMBOL_REF_TLS_MODEL (x
) : 0;
12648 return legitimize_tls_address (x
, (enum tls_model
) log
, false);
12649 if (GET_CODE (x
) == CONST
12650 && GET_CODE (XEXP (x
, 0)) == PLUS
12651 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
12652 && (log
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x
, 0), 0))))
12654 rtx t
= legitimize_tls_address (XEXP (XEXP (x
, 0), 0),
12655 (enum tls_model
) log
, false);
12656 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
12659 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
12661 if (GET_CODE (x
) == SYMBOL_REF
&& SYMBOL_REF_DLLIMPORT_P (x
))
12662 return legitimize_dllimport_symbol (x
, true);
12663 if (GET_CODE (x
) == CONST
12664 && GET_CODE (XEXP (x
, 0)) == PLUS
12665 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
12666 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x
, 0), 0)))
12668 rtx t
= legitimize_dllimport_symbol (XEXP (XEXP (x
, 0), 0), true);
12669 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
12673 if (flag_pic
&& SYMBOLIC_CONST (x
))
12674 return legitimize_pic_address (x
, 0);
12677 if (MACHO_DYNAMIC_NO_PIC_P
&& SYMBOLIC_CONST (x
))
12678 return machopic_indirect_data_reference (x
, 0);
12681 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
12682 if (GET_CODE (x
) == ASHIFT
12683 && CONST_INT_P (XEXP (x
, 1))
12684 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (x
, 1)) < 4)
12687 log
= INTVAL (XEXP (x
, 1));
12688 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
12689 GEN_INT (1 << log
));
12692 if (GET_CODE (x
) == PLUS
)
12694 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
12696 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
12697 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
12698 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 0), 1)) < 4)
12701 log
= INTVAL (XEXP (XEXP (x
, 0), 1));
12702 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
12703 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
12704 GEN_INT (1 << log
));
12707 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
12708 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
12709 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 1), 1)) < 4)
12712 log
= INTVAL (XEXP (XEXP (x
, 1), 1));
12713 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
12714 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
12715 GEN_INT (1 << log
));
12718 /* Put multiply first if it isn't already. */
12719 if (GET_CODE (XEXP (x
, 1)) == MULT
)
12721 rtx tmp
= XEXP (x
, 0);
12722 XEXP (x
, 0) = XEXP (x
, 1);
12727 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
12728 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
12729 created by virtual register instantiation, register elimination, and
12730 similar optimizations. */
12731 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
12734 x
= gen_rtx_PLUS (Pmode
,
12735 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
12736 XEXP (XEXP (x
, 1), 0)),
12737 XEXP (XEXP (x
, 1), 1));
12741 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
12742 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
12743 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
12744 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
12745 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
12746 && CONSTANT_P (XEXP (x
, 1)))
12749 rtx other
= NULL_RTX
;
12751 if (CONST_INT_P (XEXP (x
, 1)))
12753 constant
= XEXP (x
, 1);
12754 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
12756 else if (CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 1), 1)))
12758 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
12759 other
= XEXP (x
, 1);
12767 x
= gen_rtx_PLUS (Pmode
,
12768 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
12769 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
12770 plus_constant (other
, INTVAL (constant
)));
12774 if (changed
&& ix86_legitimate_address_p (mode
, x
, false))
12777 if (GET_CODE (XEXP (x
, 0)) == MULT
)
12780 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
12783 if (GET_CODE (XEXP (x
, 1)) == MULT
)
12786 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
12790 && REG_P (XEXP (x
, 1))
12791 && REG_P (XEXP (x
, 0)))
12794 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
12797 x
= legitimize_pic_address (x
, 0);
12800 if (changed
&& ix86_legitimate_address_p (mode
, x
, false))
12803 if (REG_P (XEXP (x
, 0)))
12805 rtx temp
= gen_reg_rtx (Pmode
);
12806 rtx val
= force_operand (XEXP (x
, 1), temp
);
12809 if (GET_MODE (val
) != Pmode
)
12810 val
= convert_to_mode (Pmode
, val
, 1);
12811 emit_move_insn (temp
, val
);
12814 XEXP (x
, 1) = temp
;
12818 else if (REG_P (XEXP (x
, 1)))
12820 rtx temp
= gen_reg_rtx (Pmode
);
12821 rtx val
= force_operand (XEXP (x
, 0), temp
);
12824 if (GET_MODE (val
) != Pmode
)
12825 val
= convert_to_mode (Pmode
, val
, 1);
12826 emit_move_insn (temp
, val
);
12829 XEXP (x
, 0) = temp
;
12837 /* Print an integer constant expression in assembler syntax. Addition
12838 and subtraction are the only arithmetic that may appear in these
12839 expressions. FILE is the stdio stream to write to, X is the rtx, and
12840 CODE is the operand print code from the output string. */
12843 output_pic_addr_const (FILE *file
, rtx x
, int code
)
12847 switch (GET_CODE (x
))
12850 gcc_assert (flag_pic
);
12855 if (TARGET_64BIT
|| ! TARGET_MACHO_BRANCH_ISLANDS
)
12856 output_addr_const (file
, x
);
12859 const char *name
= XSTR (x
, 0);
12861 /* Mark the decl as referenced so that cgraph will
12862 output the function. */
12863 if (SYMBOL_REF_DECL (x
))
12864 mark_decl_referenced (SYMBOL_REF_DECL (x
));
12867 if (MACHOPIC_INDIRECT
12868 && machopic_classify_symbol (x
) == MACHOPIC_UNDEFINED_FUNCTION
)
12869 name
= machopic_indirection_name (x
, /*stub_p=*/true);
12871 assemble_name (file
, name
);
12873 if (!TARGET_MACHO
&& !(TARGET_64BIT
&& DEFAULT_ABI
== MS_ABI
)
12874 && code
== 'P' && ! SYMBOL_REF_LOCAL_P (x
))
12875 fputs ("@PLT", file
);
12882 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
12883 assemble_name (asm_out_file
, buf
);
12887 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
12891 /* This used to output parentheses around the expression,
12892 but that does not work on the 386 (either ATT or BSD assembler). */
12893 output_pic_addr_const (file
, XEXP (x
, 0), code
);
12897 if (GET_MODE (x
) == VOIDmode
)
12899 /* We can use %d if the number is <32 bits and positive. */
12900 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
12901 fprintf (file
, "0x%lx%08lx",
12902 (unsigned long) CONST_DOUBLE_HIGH (x
),
12903 (unsigned long) CONST_DOUBLE_LOW (x
));
12905 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
12908 /* We can't handle floating point constants;
12909 TARGET_PRINT_OPERAND must handle them. */
12910 output_operand_lossage ("floating constant misused");
12914 /* Some assemblers need integer constants to appear first. */
12915 if (CONST_INT_P (XEXP (x
, 0)))
12917 output_pic_addr_const (file
, XEXP (x
, 0), code
);
12919 output_pic_addr_const (file
, XEXP (x
, 1), code
);
12923 gcc_assert (CONST_INT_P (XEXP (x
, 1)));
12924 output_pic_addr_const (file
, XEXP (x
, 1), code
);
12926 output_pic_addr_const (file
, XEXP (x
, 0), code
);
12932 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
12933 output_pic_addr_const (file
, XEXP (x
, 0), code
);
12935 output_pic_addr_const (file
, XEXP (x
, 1), code
);
12937 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
12941 if (XINT (x
, 1) == UNSPEC_STACK_CHECK
)
12943 bool f
= i386_asm_output_addr_const_extra (file
, x
);
12948 gcc_assert (XVECLEN (x
, 0) == 1);
12949 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
12950 switch (XINT (x
, 1))
12953 fputs ("@GOT", file
);
12955 case UNSPEC_GOTOFF
:
12956 fputs ("@GOTOFF", file
);
12958 case UNSPEC_PLTOFF
:
12959 fputs ("@PLTOFF", file
);
12962 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
12963 "(%rip)" : "[rip]", file
);
12965 case UNSPEC_GOTPCREL
:
12966 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
12967 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file
);
12969 case UNSPEC_GOTTPOFF
:
12970 /* FIXME: This might be @TPOFF in Sun ld too. */
12971 fputs ("@gottpoff", file
);
12974 fputs ("@tpoff", file
);
12976 case UNSPEC_NTPOFF
:
12978 fputs ("@tpoff", file
);
12980 fputs ("@ntpoff", file
);
12982 case UNSPEC_DTPOFF
:
12983 fputs ("@dtpoff", file
);
12985 case UNSPEC_GOTNTPOFF
:
12987 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
12988 "@gottpoff(%rip)": "@gottpoff[rip]", file
);
12990 fputs ("@gotntpoff", file
);
12992 case UNSPEC_INDNTPOFF
:
12993 fputs ("@indntpoff", file
);
12996 case UNSPEC_MACHOPIC_OFFSET
:
12998 machopic_output_function_base_name (file
);
13002 output_operand_lossage ("invalid UNSPEC as operand");
13008 output_operand_lossage ("invalid expression as operand");
13012 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
13013 We need to emit DTP-relative relocations. */
13015 static void ATTRIBUTE_UNUSED
13016 i386_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
13018 fputs (ASM_LONG
, file
);
13019 output_addr_const (file
, x
);
13020 fputs ("@dtpoff", file
);
13026 fputs (", 0", file
);
13029 gcc_unreachable ();
13033 /* Return true if X is a representation of the PIC register. This copes
13034 with calls from ix86_find_base_term, where the register might have
13035 been replaced by a cselib value. */
13038 ix86_pic_register_p (rtx x
)
13040 if (GET_CODE (x
) == VALUE
&& CSELIB_VAL_PTR (x
))
13041 return (pic_offset_table_rtx
13042 && rtx_equal_for_cselib_p (x
, pic_offset_table_rtx
));
13044 return REG_P (x
) && REGNO (x
) == PIC_OFFSET_TABLE_REGNUM
;
13047 /* Helper function for ix86_delegitimize_address.
13048 Attempt to delegitimize TLS local-exec accesses. */
13051 ix86_delegitimize_tls_address (rtx orig_x
)
13053 rtx x
= orig_x
, unspec
;
13054 struct ix86_address addr
;
13056 if (!TARGET_TLS_DIRECT_SEG_REFS
)
13060 if (GET_CODE (x
) != PLUS
|| GET_MODE (x
) != Pmode
)
13062 if (ix86_decompose_address (x
, &addr
) == 0
13063 || addr
.seg
!= (TARGET_64BIT
? SEG_FS
: SEG_GS
)
13064 || addr
.disp
== NULL_RTX
13065 || GET_CODE (addr
.disp
) != CONST
)
13067 unspec
= XEXP (addr
.disp
, 0);
13068 if (GET_CODE (unspec
) == PLUS
&& CONST_INT_P (XEXP (unspec
, 1)))
13069 unspec
= XEXP (unspec
, 0);
13070 if (GET_CODE (unspec
) != UNSPEC
|| XINT (unspec
, 1) != UNSPEC_NTPOFF
)
13072 x
= XVECEXP (unspec
, 0, 0);
13073 gcc_assert (GET_CODE (x
) == SYMBOL_REF
);
13074 if (unspec
!= XEXP (addr
.disp
, 0))
13075 x
= gen_rtx_PLUS (Pmode
, x
, XEXP (XEXP (addr
.disp
, 0), 1));
13078 rtx idx
= addr
.index
;
13079 if (addr
.scale
!= 1)
13080 idx
= gen_rtx_MULT (Pmode
, idx
, GEN_INT (addr
.scale
));
13081 x
= gen_rtx_PLUS (Pmode
, idx
, x
);
13084 x
= gen_rtx_PLUS (Pmode
, addr
.base
, x
);
13085 if (MEM_P (orig_x
))
13086 x
= replace_equiv_address_nv (orig_x
, x
);
13090 /* In the name of slightly smaller debug output, and to cater to
13091 general assembler lossage, recognize PIC+GOTOFF and turn it back
13092 into a direct symbol reference.
13094 On Darwin, this is necessary to avoid a crash, because Darwin
13095 has a different PIC label for each routine but the DWARF debugging
13096 information is not associated with any particular routine, so it's
13097 necessary to remove references to the PIC label from RTL stored by
13098 the DWARF output code. */
13101 ix86_delegitimize_address (rtx x
)
13103 rtx orig_x
= delegitimize_mem_from_attrs (x
);
13104 /* addend is NULL or some rtx if x is something+GOTOFF where
13105 something doesn't include the PIC register. */
13106 rtx addend
= NULL_RTX
;
13107 /* reg_addend is NULL or a multiple of some register. */
13108 rtx reg_addend
= NULL_RTX
;
13109 /* const_addend is NULL or a const_int. */
13110 rtx const_addend
= NULL_RTX
;
13111 /* This is the result, or NULL. */
13112 rtx result
= NULL_RTX
;
13121 if (GET_CODE (x
) != CONST
13122 || GET_CODE (XEXP (x
, 0)) != UNSPEC
13123 || (XINT (XEXP (x
, 0), 1) != UNSPEC_GOTPCREL
13124 && XINT (XEXP (x
, 0), 1) != UNSPEC_PCREL
)
13125 || !MEM_P (orig_x
))
13126 return ix86_delegitimize_tls_address (orig_x
);
13127 x
= XVECEXP (XEXP (x
, 0), 0, 0);
13128 if (GET_MODE (orig_x
) != GET_MODE (x
))
13130 x
= simplify_gen_subreg (GET_MODE (orig_x
), x
,
13138 if (GET_CODE (x
) != PLUS
13139 || GET_CODE (XEXP (x
, 1)) != CONST
)
13140 return ix86_delegitimize_tls_address (orig_x
);
13142 if (ix86_pic_register_p (XEXP (x
, 0)))
13143 /* %ebx + GOT/GOTOFF */
13145 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
13147 /* %ebx + %reg * scale + GOT/GOTOFF */
13148 reg_addend
= XEXP (x
, 0);
13149 if (ix86_pic_register_p (XEXP (reg_addend
, 0)))
13150 reg_addend
= XEXP (reg_addend
, 1);
13151 else if (ix86_pic_register_p (XEXP (reg_addend
, 1)))
13152 reg_addend
= XEXP (reg_addend
, 0);
13155 reg_addend
= NULL_RTX
;
13156 addend
= XEXP (x
, 0);
13160 addend
= XEXP (x
, 0);
13162 x
= XEXP (XEXP (x
, 1), 0);
13163 if (GET_CODE (x
) == PLUS
13164 && CONST_INT_P (XEXP (x
, 1)))
13166 const_addend
= XEXP (x
, 1);
13170 if (GET_CODE (x
) == UNSPEC
13171 && ((XINT (x
, 1) == UNSPEC_GOT
&& MEM_P (orig_x
) && !addend
)
13172 || (XINT (x
, 1) == UNSPEC_GOTOFF
&& !MEM_P (orig_x
))))
13173 result
= XVECEXP (x
, 0, 0);
13175 if (TARGET_MACHO
&& darwin_local_data_pic (x
)
13176 && !MEM_P (orig_x
))
13177 result
= XVECEXP (x
, 0, 0);
13180 return ix86_delegitimize_tls_address (orig_x
);
13183 result
= gen_rtx_CONST (Pmode
, gen_rtx_PLUS (Pmode
, result
, const_addend
));
13185 result
= gen_rtx_PLUS (Pmode
, reg_addend
, result
);
13188 /* If the rest of original X doesn't involve the PIC register, add
13189 addend and subtract pic_offset_table_rtx. This can happen e.g.
13191 leal (%ebx, %ecx, 4), %ecx
13193 movl foo@GOTOFF(%ecx), %edx
13194 in which case we return (%ecx - %ebx) + foo. */
13195 if (pic_offset_table_rtx
)
13196 result
= gen_rtx_PLUS (Pmode
, gen_rtx_MINUS (Pmode
, copy_rtx (addend
),
13197 pic_offset_table_rtx
),
13202 if (GET_MODE (orig_x
) != Pmode
&& MEM_P (orig_x
))
13204 result
= simplify_gen_subreg (GET_MODE (orig_x
), result
, Pmode
, 0);
13205 if (result
== NULL_RTX
)
13211 /* If X is a machine specific address (i.e. a symbol or label being
13212 referenced as a displacement from the GOT implemented using an
13213 UNSPEC), then return the base term. Otherwise return X. */
13216 ix86_find_base_term (rtx x
)
13222 if (GET_CODE (x
) != CONST
)
13224 term
= XEXP (x
, 0);
13225 if (GET_CODE (term
) == PLUS
13226 && (CONST_INT_P (XEXP (term
, 1))
13227 || GET_CODE (XEXP (term
, 1)) == CONST_DOUBLE
))
13228 term
= XEXP (term
, 0);
13229 if (GET_CODE (term
) != UNSPEC
13230 || (XINT (term
, 1) != UNSPEC_GOTPCREL
13231 && XINT (term
, 1) != UNSPEC_PCREL
))
13234 return XVECEXP (term
, 0, 0);
13237 return ix86_delegitimize_address (x
);
13241 put_condition_code (enum rtx_code code
, enum machine_mode mode
, int reverse
,
13242 int fp
, FILE *file
)
13244 const char *suffix
;
13246 if (mode
== CCFPmode
|| mode
== CCFPUmode
)
13248 code
= ix86_fp_compare_code_to_integer (code
);
13252 code
= reverse_condition (code
);
13303 gcc_assert (mode
== CCmode
|| mode
== CCNOmode
|| mode
== CCGCmode
);
13307 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
13308 Those same assemblers have the same but opposite lossage on cmov. */
13309 if (mode
== CCmode
)
13310 suffix
= fp
? "nbe" : "a";
13311 else if (mode
== CCCmode
)
13314 gcc_unreachable ();
13330 gcc_unreachable ();
13334 gcc_assert (mode
== CCmode
|| mode
== CCCmode
);
13351 gcc_unreachable ();
13355 /* ??? As above. */
13356 gcc_assert (mode
== CCmode
|| mode
== CCCmode
);
13357 suffix
= fp
? "nb" : "ae";
13360 gcc_assert (mode
== CCmode
|| mode
== CCGCmode
|| mode
== CCNOmode
);
13364 /* ??? As above. */
13365 if (mode
== CCmode
)
13367 else if (mode
== CCCmode
)
13368 suffix
= fp
? "nb" : "ae";
13370 gcc_unreachable ();
13373 suffix
= fp
? "u" : "p";
13376 suffix
= fp
? "nu" : "np";
13379 gcc_unreachable ();
13381 fputs (suffix
, file
);
13384 /* Print the name of register X to FILE based on its machine mode and number.
13385 If CODE is 'w', pretend the mode is HImode.
13386 If CODE is 'b', pretend the mode is QImode.
13387 If CODE is 'k', pretend the mode is SImode.
13388 If CODE is 'q', pretend the mode is DImode.
13389 If CODE is 'x', pretend the mode is V4SFmode.
13390 If CODE is 't', pretend the mode is V8SFmode.
13391 If CODE is 'h', pretend the reg is the 'high' byte register.
13392 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
13393 If CODE is 'd', duplicate the operand for AVX instruction.
13397 print_reg (rtx x
, int code
, FILE *file
)
13400 bool duplicated
= code
== 'd' && TARGET_AVX
;
13402 gcc_assert (x
== pc_rtx
13403 || (REGNO (x
) != ARG_POINTER_REGNUM
13404 && REGNO (x
) != FRAME_POINTER_REGNUM
13405 && REGNO (x
) != FLAGS_REG
13406 && REGNO (x
) != FPSR_REG
13407 && REGNO (x
) != FPCR_REG
));
13409 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13414 gcc_assert (TARGET_64BIT
);
13415 fputs ("rip", file
);
13419 if (code
== 'w' || MMX_REG_P (x
))
13421 else if (code
== 'b')
13423 else if (code
== 'k')
13425 else if (code
== 'q')
13427 else if (code
== 'y')
13429 else if (code
== 'h')
13431 else if (code
== 'x')
13433 else if (code
== 't')
13436 code
= GET_MODE_SIZE (GET_MODE (x
));
13438 /* Irritatingly, AMD extended registers use different naming convention
13439 from the normal registers. */
13440 if (REX_INT_REG_P (x
))
13442 gcc_assert (TARGET_64BIT
);
13446 error ("extended registers have no high halves");
13449 fprintf (file
, "r%ib", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
13452 fprintf (file
, "r%iw", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
13455 fprintf (file
, "r%id", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
13458 fprintf (file
, "r%i", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
13461 error ("unsupported operand size for extended register");
13471 if (STACK_TOP_P (x
))
13480 if (! ANY_FP_REG_P (x
))
13481 putc (code
== 8 && TARGET_64BIT
? 'r' : 'e', file
);
13486 reg
= hi_reg_name
[REGNO (x
)];
13489 if (REGNO (x
) >= ARRAY_SIZE (qi_reg_name
))
13491 reg
= qi_reg_name
[REGNO (x
)];
13494 if (REGNO (x
) >= ARRAY_SIZE (qi_high_reg_name
))
13496 reg
= qi_high_reg_name
[REGNO (x
)];
13501 gcc_assert (!duplicated
);
13503 fputs (hi_reg_name
[REGNO (x
)] + 1, file
);
13508 gcc_unreachable ();
13514 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13515 fprintf (file
, ", %%%s", reg
);
13517 fprintf (file
, ", %s", reg
);
13521 /* Locate some local-dynamic symbol still in use by this function
13522 so that we can print its name in some tls_local_dynamic_base
13526 get_some_local_dynamic_name_1 (rtx
*px
, void *data ATTRIBUTE_UNUSED
)
13530 if (GET_CODE (x
) == SYMBOL_REF
13531 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
)
13533 cfun
->machine
->some_ld_name
= XSTR (x
, 0);
13540 static const char *
13541 get_some_local_dynamic_name (void)
13545 if (cfun
->machine
->some_ld_name
)
13546 return cfun
->machine
->some_ld_name
;
13548 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
13549 if (NONDEBUG_INSN_P (insn
)
13550 && for_each_rtx (&PATTERN (insn
), get_some_local_dynamic_name_1
, 0))
13551 return cfun
->machine
->some_ld_name
;
13556 /* Meaning of CODE:
13557 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
13558 C -- print opcode suffix for set/cmov insn.
13559 c -- like C, but print reversed condition
13560 F,f -- likewise, but for floating-point.
13561 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
13563 R -- print the prefix for register names.
13564 z -- print the opcode suffix for the size of the current operand.
13565 Z -- likewise, with special suffixes for x87 instructions.
13566 * -- print a star (in certain assembler syntax)
13567 A -- print an absolute memory reference.
13568 w -- print the operand as if it's a "word" (HImode) even if it isn't.
13569 s -- print a shift double count, followed by the assemblers argument
13571 b -- print the QImode name of the register for the indicated operand.
13572 %b0 would print %al if operands[0] is reg 0.
13573 w -- likewise, print the HImode name of the register.
13574 k -- likewise, print the SImode name of the register.
13575 q -- likewise, print the DImode name of the register.
13576 x -- likewise, print the V4SFmode name of the register.
13577 t -- likewise, print the V8SFmode name of the register.
13578 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
13579 y -- print "st(0)" instead of "st" as a register.
13580 d -- print duplicated register operand for AVX instruction.
13581 D -- print condition for SSE cmp instruction.
13582 P -- if PIC, print an @PLT suffix.
13583 p -- print raw symbol name.
13584 X -- don't print any sort of PIC '@' suffix for a symbol.
13585 & -- print some in-use local-dynamic symbol name.
13586 H -- print a memory address offset by 8; used for sse high-parts
13587 Y -- print condition for XOP pcom* instruction.
13588 + -- print a branch hint as 'cs' or 'ds' prefix
13589 ; -- print a semicolon (after prefixes due to bug in older gas).
13590 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
13591 @ -- print a segment register of thread base pointer load
13595 ix86_print_operand (FILE *file
, rtx x
, int code
)
13602 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13608 const char *name
= get_some_local_dynamic_name ();
13610 output_operand_lossage ("'%%&' used without any "
13611 "local dynamic TLS references");
13613 assemble_name (file
, name
);
13618 switch (ASSEMBLER_DIALECT
)
13625 /* Intel syntax. For absolute addresses, registers should not
13626 be surrounded by braces. */
13630 ix86_print_operand (file
, x
, 0);
13637 gcc_unreachable ();
13640 ix86_print_operand (file
, x
, 0);
13645 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13650 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13655 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13660 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13665 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13670 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13675 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
13677 /* Opcodes don't get size suffixes if using Intel opcodes. */
13678 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
13681 switch (GET_MODE_SIZE (GET_MODE (x
)))
13700 output_operand_lossage
13701 ("invalid operand size for operand code '%c'", code
);
13706 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
13708 (0, "non-integer operand used with operand code '%c'", code
);
13712 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
13713 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
13716 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
13718 switch (GET_MODE_SIZE (GET_MODE (x
)))
13721 #ifdef HAVE_AS_IX86_FILDS
13731 #ifdef HAVE_AS_IX86_FILDQ
13734 fputs ("ll", file
);
13742 else if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
13744 /* 387 opcodes don't get size suffixes
13745 if the operands are registers. */
13746 if (STACK_REG_P (x
))
13749 switch (GET_MODE_SIZE (GET_MODE (x
)))
13770 output_operand_lossage
13771 ("invalid operand type used with operand code '%c'", code
);
13775 output_operand_lossage
13776 ("invalid operand size for operand code '%c'", code
);
13794 if (CONST_INT_P (x
) || ! SHIFT_DOUBLE_OMITS_COUNT
)
13796 ix86_print_operand (file
, x
, 0);
13797 fputs (", ", file
);
13802 /* Little bit of braindamage here. The SSE compare instructions
13803 does use completely different names for the comparisons that the
13804 fp conditional moves. */
13807 switch (GET_CODE (x
))
13810 fputs ("eq", file
);
13813 fputs ("eq_us", file
);
13816 fputs ("lt", file
);
13819 fputs ("nge", file
);
13822 fputs ("le", file
);
13825 fputs ("ngt", file
);
13828 fputs ("unord", file
);
13831 fputs ("neq", file
);
13834 fputs ("neq_oq", file
);
13837 fputs ("ge", file
);
13840 fputs ("nlt", file
);
13843 fputs ("gt", file
);
13846 fputs ("nle", file
);
13849 fputs ("ord", file
);
13852 output_operand_lossage ("operand is not a condition code, "
13853 "invalid operand code 'D'");
13859 switch (GET_CODE (x
))
13863 fputs ("eq", file
);
13867 fputs ("lt", file
);
13871 fputs ("le", file
);
13874 fputs ("unord", file
);
13878 fputs ("neq", file
);
13882 fputs ("nlt", file
);
13886 fputs ("nle", file
);
13889 fputs ("ord", file
);
13892 output_operand_lossage ("operand is not a condition code, "
13893 "invalid operand code 'D'");
13899 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13900 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13902 switch (GET_MODE (x
))
13904 case HImode
: putc ('w', file
); break;
13906 case SFmode
: putc ('l', file
); break;
13908 case DFmode
: putc ('q', file
); break;
13909 default: gcc_unreachable ();
13916 if (!COMPARISON_P (x
))
13918 output_operand_lossage ("operand is neither a constant nor a "
13919 "condition code, invalid operand code "
13923 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 0, file
);
13926 if (!COMPARISON_P (x
))
13928 output_operand_lossage ("operand is neither a constant nor a "
13929 "condition code, invalid operand code "
13933 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13934 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13937 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 1, file
);
13940 /* Like above, but reverse condition */
13942 /* Check to see if argument to %c is really a constant
13943 and not a condition code which needs to be reversed. */
13944 if (!COMPARISON_P (x
))
13946 output_operand_lossage ("operand is neither a constant nor a "
13947 "condition code, invalid operand "
13951 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 0, file
);
13954 if (!COMPARISON_P (x
))
13956 output_operand_lossage ("operand is neither a constant nor a "
13957 "condition code, invalid operand "
13961 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13962 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13965 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 1, file
);
13969 /* It doesn't actually matter what mode we use here, as we're
13970 only going to use this for printing. */
13971 x
= adjust_address_nv (x
, DImode
, 8);
13979 || optimize_function_for_size_p (cfun
) || !TARGET_BRANCH_PREDICTION_HINTS
)
13982 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
13985 int pred_val
= INTVAL (XEXP (x
, 0));
13987 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
13988 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
13990 int taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
13991 int cputaken
= final_forward_branch_p (current_output_insn
) == 0;
13993 /* Emit hints only in the case default branch prediction
13994 heuristics would fail. */
13995 if (taken
!= cputaken
)
13997 /* We use 3e (DS) prefix for taken branches and
13998 2e (CS) prefix for not taken branches. */
14000 fputs ("ds ; ", file
);
14002 fputs ("cs ; ", file
);
14010 switch (GET_CODE (x
))
14013 fputs ("neq", file
);
14016 fputs ("eq", file
);
14020 fputs (INTEGRAL_MODE_P (GET_MODE (x
)) ? "ge" : "unlt", file
);
14024 fputs (INTEGRAL_MODE_P (GET_MODE (x
)) ? "gt" : "unle", file
);
14028 fputs ("le", file
);
14032 fputs ("lt", file
);
14035 fputs ("unord", file
);
14038 fputs ("ord", file
);
14041 fputs ("ueq", file
);
14044 fputs ("nlt", file
);
14047 fputs ("nle", file
);
14050 fputs ("ule", file
);
14053 fputs ("ult", file
);
14056 fputs ("une", file
);
14059 output_operand_lossage ("operand is not a condition code, "
14060 "invalid operand code 'Y'");
14066 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
14072 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14075 /* The kernel uses a different segment register for performance
14076 reasons; a system call would not have to trash the userspace
14077 segment register, which would be expensive. */
14078 if (TARGET_64BIT
&& ix86_cmodel
!= CM_KERNEL
)
14079 fputs ("fs", file
);
14081 fputs ("gs", file
);
14085 putc (TARGET_AVX2
? 'i' : 'f', file
);
14089 output_operand_lossage ("invalid operand code '%c'", code
);
14094 print_reg (x
, code
, file
);
14096 else if (MEM_P (x
))
14098 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
14099 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P'
14100 && GET_MODE (x
) != BLKmode
)
14103 switch (GET_MODE_SIZE (GET_MODE (x
)))
14105 case 1: size
= "BYTE"; break;
14106 case 2: size
= "WORD"; break;
14107 case 4: size
= "DWORD"; break;
14108 case 8: size
= "QWORD"; break;
14109 case 12: size
= "TBYTE"; break;
14111 if (GET_MODE (x
) == XFmode
)
14116 case 32: size
= "YMMWORD"; break;
14118 gcc_unreachable ();
14121 /* Check for explicit size override (codes 'b', 'w' and 'k') */
14124 else if (code
== 'w')
14126 else if (code
== 'k')
14129 fputs (size
, file
);
14130 fputs (" PTR ", file
);
14134 /* Avoid (%rip) for call operands. */
14135 if (CONSTANT_ADDRESS_P (x
) && code
== 'P'
14136 && !CONST_INT_P (x
))
14137 output_addr_const (file
, x
);
14138 else if (this_is_asm_operands
&& ! address_operand (x
, VOIDmode
))
14139 output_operand_lossage ("invalid constraints for operand");
14141 output_address (x
);
14144 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
14149 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
14150 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
14152 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14154 /* Sign extend 32bit SFmode immediate to 8 bytes. */
14156 fprintf (file
, "0x%08llx", (unsigned long long) (int) l
);
14158 fprintf (file
, "0x%08x", (unsigned int) l
);
14161 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
14166 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
14167 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
14169 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14171 fprintf (file
, "0x%lx%08lx", l
[1] & 0xffffffff, l
[0] & 0xffffffff);
14174 /* These float cases don't actually occur as immediate operands. */
14175 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == XFmode
)
14179 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
14180 fputs (dstr
, file
);
14185 /* We have patterns that allow zero sets of memory, for instance.
14186 In 64-bit mode, we should probably support all 8-byte vectors,
14187 since we can in fact encode that into an immediate. */
14188 if (GET_CODE (x
) == CONST_VECTOR
)
14190 gcc_assert (x
== CONST0_RTX (GET_MODE (x
)));
14194 if (code
!= 'P' && code
!= 'p')
14196 if (CONST_INT_P (x
) || GET_CODE (x
) == CONST_DOUBLE
)
14198 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14201 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
14202 || GET_CODE (x
) == LABEL_REF
)
14204 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14207 fputs ("OFFSET FLAT:", file
);
14210 if (CONST_INT_P (x
))
14211 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
14212 else if (flag_pic
|| MACHOPIC_INDIRECT
)
14213 output_pic_addr_const (file
, x
, code
);
14215 output_addr_const (file
, x
);
14220 ix86_print_operand_punct_valid_p (unsigned char code
)
14222 return (code
== '@' || code
== '*' || code
== '+'
14223 || code
== '&' || code
== ';' || code
== '~');
14226 /* Print a memory operand whose address is ADDR. */
14229 ix86_print_operand_address (FILE *file
, rtx addr
)
14231 struct ix86_address parts
;
14232 rtx base
, index
, disp
;
14237 if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_VSIBADDR
)
14239 ok
= ix86_decompose_address (XVECEXP (addr
, 0, 0), &parts
);
14240 gcc_assert (parts
.index
== NULL_RTX
);
14241 parts
.index
= XVECEXP (addr
, 0, 1);
14242 parts
.scale
= INTVAL (XVECEXP (addr
, 0, 2));
14243 addr
= XVECEXP (addr
, 0, 0);
14247 ok
= ix86_decompose_address (addr
, &parts
);
14251 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
14253 rtx tmp
= SUBREG_REG (parts
.base
);
14254 parts
.base
= simplify_subreg (GET_MODE (parts
.base
),
14255 tmp
, GET_MODE (tmp
), 0);
14258 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
14260 rtx tmp
= SUBREG_REG (parts
.index
);
14261 parts
.index
= simplify_subreg (GET_MODE (parts
.index
),
14262 tmp
, GET_MODE (tmp
), 0);
14266 index
= parts
.index
;
14268 scale
= parts
.scale
;
14276 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14278 fputs ((parts
.seg
== SEG_FS
? "fs:" : "gs:"), file
);
14281 gcc_unreachable ();
14284 /* Use one byte shorter RIP relative addressing for 64bit mode. */
14285 if (TARGET_64BIT
&& !base
&& !index
)
14289 if (GET_CODE (disp
) == CONST
14290 && GET_CODE (XEXP (disp
, 0)) == PLUS
14291 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
14292 symbol
= XEXP (XEXP (disp
, 0), 0);
14294 if (GET_CODE (symbol
) == LABEL_REF
14295 || (GET_CODE (symbol
) == SYMBOL_REF
14296 && SYMBOL_REF_TLS_MODEL (symbol
) == 0))
14299 if (!base
&& !index
)
14301 /* Displacement only requires special attention. */
14303 if (CONST_INT_P (disp
))
14305 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& parts
.seg
== SEG_DEFAULT
)
14306 fputs ("ds:", file
);
14307 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (disp
));
14310 output_pic_addr_const (file
, disp
, 0);
14312 output_addr_const (file
, disp
);
14318 /* Print SImode registers for zero-extended addresses to force
14319 addr32 prefix. Otherwise print DImode registers to avoid it. */
14321 code
= ((GET_CODE (addr
) == ZERO_EXTEND
14322 || GET_CODE (addr
) == AND
)
14326 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14331 output_pic_addr_const (file
, disp
, 0);
14332 else if (GET_CODE (disp
) == LABEL_REF
)
14333 output_asm_label (disp
);
14335 output_addr_const (file
, disp
);
14340 print_reg (base
, code
, file
);
14344 print_reg (index
, vsib
? 0 : code
, file
);
14345 if (scale
!= 1 || vsib
)
14346 fprintf (file
, ",%d", scale
);
14352 rtx offset
= NULL_RTX
;
14356 /* Pull out the offset of a symbol; print any symbol itself. */
14357 if (GET_CODE (disp
) == CONST
14358 && GET_CODE (XEXP (disp
, 0)) == PLUS
14359 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
14361 offset
= XEXP (XEXP (disp
, 0), 1);
14362 disp
= gen_rtx_CONST (VOIDmode
,
14363 XEXP (XEXP (disp
, 0), 0));
14367 output_pic_addr_const (file
, disp
, 0);
14368 else if (GET_CODE (disp
) == LABEL_REF
)
14369 output_asm_label (disp
);
14370 else if (CONST_INT_P (disp
))
14373 output_addr_const (file
, disp
);
14379 print_reg (base
, code
, file
);
14382 if (INTVAL (offset
) >= 0)
14384 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
14388 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
14395 print_reg (index
, vsib
? 0 : code
, file
);
14396 if (scale
!= 1 || vsib
)
14397 fprintf (file
, "*%d", scale
);
14404 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
14407 i386_asm_output_addr_const_extra (FILE *file
, rtx x
)
14411 if (GET_CODE (x
) != UNSPEC
)
14414 op
= XVECEXP (x
, 0, 0);
14415 switch (XINT (x
, 1))
14417 case UNSPEC_GOTTPOFF
:
14418 output_addr_const (file
, op
);
14419 /* FIXME: This might be @TPOFF in Sun ld. */
14420 fputs ("@gottpoff", file
);
14423 output_addr_const (file
, op
);
14424 fputs ("@tpoff", file
);
14426 case UNSPEC_NTPOFF
:
14427 output_addr_const (file
, op
);
14429 fputs ("@tpoff", file
);
14431 fputs ("@ntpoff", file
);
14433 case UNSPEC_DTPOFF
:
14434 output_addr_const (file
, op
);
14435 fputs ("@dtpoff", file
);
14437 case UNSPEC_GOTNTPOFF
:
14438 output_addr_const (file
, op
);
14440 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
14441 "@gottpoff(%rip)" : "@gottpoff[rip]", file
);
14443 fputs ("@gotntpoff", file
);
14445 case UNSPEC_INDNTPOFF
:
14446 output_addr_const (file
, op
);
14447 fputs ("@indntpoff", file
);
14450 case UNSPEC_MACHOPIC_OFFSET
:
14451 output_addr_const (file
, op
);
14453 machopic_output_function_base_name (file
);
14457 case UNSPEC_STACK_CHECK
:
14461 gcc_assert (flag_split_stack
);
14463 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
14464 offset
= TARGET_THREAD_SPLIT_STACK_OFFSET
;
14466 gcc_unreachable ();
14469 fprintf (file
, "%s:%d", TARGET_64BIT
? "%fs" : "%gs", offset
);
14480 /* Split one or more double-mode RTL references into pairs of half-mode
14481 references. The RTL can be REG, offsettable MEM, integer constant, or
14482 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
14483 split and "num" is its length. lo_half and hi_half are output arrays
14484 that parallel "operands". */
14487 split_double_mode (enum machine_mode mode
, rtx operands
[],
14488 int num
, rtx lo_half
[], rtx hi_half
[])
14490 enum machine_mode half_mode
;
14496 half_mode
= DImode
;
14499 half_mode
= SImode
;
14502 gcc_unreachable ();
14505 byte
= GET_MODE_SIZE (half_mode
);
14509 rtx op
= operands
[num
];
14511 /* simplify_subreg refuse to split volatile memory addresses,
14512 but we still have to handle it. */
14515 lo_half
[num
] = adjust_address (op
, half_mode
, 0);
14516 hi_half
[num
] = adjust_address (op
, half_mode
, byte
);
14520 lo_half
[num
] = simplify_gen_subreg (half_mode
, op
,
14521 GET_MODE (op
) == VOIDmode
14522 ? mode
: GET_MODE (op
), 0);
14523 hi_half
[num
] = simplify_gen_subreg (half_mode
, op
,
14524 GET_MODE (op
) == VOIDmode
14525 ? mode
: GET_MODE (op
), byte
);
14530 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
14531 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
14532 is the expression of the binary operation. The output may either be
14533 emitted here, or returned to the caller, like all output_* functions.
14535 There is no guarantee that the operands are the same mode, as they
14536 might be within FLOAT or FLOAT_EXTEND expressions. */
14538 #ifndef SYSV386_COMPAT
14539 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
14540 wants to fix the assemblers because that causes incompatibility
14541 with gcc. No-one wants to fix gcc because that causes
14542 incompatibility with assemblers... You can use the option of
14543 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
14544 #define SYSV386_COMPAT 1
14548 output_387_binary_op (rtx insn
, rtx
*operands
)
14550 static char buf
[40];
14553 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]) || SSE_REG_P (operands
[2]);
14555 #ifdef ENABLE_CHECKING
14556 /* Even if we do not want to check the inputs, this documents input
14557 constraints. Which helps in understanding the following code. */
14558 if (STACK_REG_P (operands
[0])
14559 && ((REG_P (operands
[1])
14560 && REGNO (operands
[0]) == REGNO (operands
[1])
14561 && (STACK_REG_P (operands
[2]) || MEM_P (operands
[2])))
14562 || (REG_P (operands
[2])
14563 && REGNO (operands
[0]) == REGNO (operands
[2])
14564 && (STACK_REG_P (operands
[1]) || MEM_P (operands
[1]))))
14565 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
14568 gcc_assert (is_sse
);
14571 switch (GET_CODE (operands
[3]))
14574 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
14575 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
14583 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
14584 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
14592 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
14593 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
14601 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
14602 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
14610 gcc_unreachable ();
14617 strcpy (buf
, ssep
);
14618 if (GET_MODE (operands
[0]) == SFmode
)
14619 strcat (buf
, "ss\t{%2, %1, %0|%0, %1, %2}");
14621 strcat (buf
, "sd\t{%2, %1, %0|%0, %1, %2}");
14625 strcpy (buf
, ssep
+ 1);
14626 if (GET_MODE (operands
[0]) == SFmode
)
14627 strcat (buf
, "ss\t{%2, %0|%0, %2}");
14629 strcat (buf
, "sd\t{%2, %0|%0, %2}");
14635 switch (GET_CODE (operands
[3]))
14639 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
14641 rtx temp
= operands
[2];
14642 operands
[2] = operands
[1];
14643 operands
[1] = temp
;
14646 /* know operands[0] == operands[1]. */
14648 if (MEM_P (operands
[2]))
14654 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
14656 if (STACK_TOP_P (operands
[0]))
14657 /* How is it that we are storing to a dead operand[2]?
14658 Well, presumably operands[1] is dead too. We can't
14659 store the result to st(0) as st(0) gets popped on this
14660 instruction. Instead store to operands[2] (which I
14661 think has to be st(1)). st(1) will be popped later.
14662 gcc <= 2.8.1 didn't have this check and generated
14663 assembly code that the Unixware assembler rejected. */
14664 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14666 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14670 if (STACK_TOP_P (operands
[0]))
14671 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14673 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
14678 if (MEM_P (operands
[1]))
14684 if (MEM_P (operands
[2]))
14690 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
14693 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
14694 derived assemblers, confusingly reverse the direction of
14695 the operation for fsub{r} and fdiv{r} when the
14696 destination register is not st(0). The Intel assembler
14697 doesn't have this brain damage. Read !SYSV386_COMPAT to
14698 figure out what the hardware really does. */
14699 if (STACK_TOP_P (operands
[0]))
14700 p
= "{p\t%0, %2|rp\t%2, %0}";
14702 p
= "{rp\t%2, %0|p\t%0, %2}";
14704 if (STACK_TOP_P (operands
[0]))
14705 /* As above for fmul/fadd, we can't store to st(0). */
14706 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14708 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14713 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
14716 if (STACK_TOP_P (operands
[0]))
14717 p
= "{rp\t%0, %1|p\t%1, %0}";
14719 p
= "{p\t%1, %0|rp\t%0, %1}";
14721 if (STACK_TOP_P (operands
[0]))
14722 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
14724 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
14729 if (STACK_TOP_P (operands
[0]))
14731 if (STACK_TOP_P (operands
[1]))
14732 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14734 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
14737 else if (STACK_TOP_P (operands
[1]))
14740 p
= "{\t%1, %0|r\t%0, %1}";
14742 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
14748 p
= "{r\t%2, %0|\t%0, %2}";
14750 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
14756 gcc_unreachable ();
14763 /* Return needed mode for entity in optimize_mode_switching pass. */
14766 ix86_mode_needed (int entity
, rtx insn
)
14768 enum attr_i387_cw mode
;
14770 /* The mode UNINITIALIZED is used to store control word after a
14771 function call or ASM pattern. The mode ANY specify that function
14772 has no requirements on the control word and make no changes in the
14773 bits we are interested in. */
14776 || (NONJUMP_INSN_P (insn
)
14777 && (asm_noperands (PATTERN (insn
)) >= 0
14778 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
)))
14779 return I387_CW_UNINITIALIZED
;
14781 if (recog_memoized (insn
) < 0)
14782 return I387_CW_ANY
;
14784 mode
= get_attr_i387_cw (insn
);
14789 if (mode
== I387_CW_TRUNC
)
14794 if (mode
== I387_CW_FLOOR
)
14799 if (mode
== I387_CW_CEIL
)
14804 if (mode
== I387_CW_MASK_PM
)
14809 gcc_unreachable ();
14812 return I387_CW_ANY
;
14815 /* Output code to initialize control word copies used by trunc?f?i and
14816 rounding patterns. CURRENT_MODE is set to current control word,
14817 while NEW_MODE is set to new control word. */
14820 emit_i387_cw_initialization (int mode
)
14822 rtx stored_mode
= assign_386_stack_local (HImode
, SLOT_CW_STORED
);
14825 enum ix86_stack_slot slot
;
14827 rtx reg
= gen_reg_rtx (HImode
);
14829 emit_insn (gen_x86_fnstcw_1 (stored_mode
));
14830 emit_move_insn (reg
, copy_rtx (stored_mode
));
14832 if (TARGET_64BIT
|| TARGET_PARTIAL_REG_STALL
14833 || optimize_function_for_size_p (cfun
))
14837 case I387_CW_TRUNC
:
14838 /* round toward zero (truncate) */
14839 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0c00)));
14840 slot
= SLOT_CW_TRUNC
;
14843 case I387_CW_FLOOR
:
14844 /* round down toward -oo */
14845 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
14846 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0400)));
14847 slot
= SLOT_CW_FLOOR
;
14851 /* round up toward +oo */
14852 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
14853 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0800)));
14854 slot
= SLOT_CW_CEIL
;
14857 case I387_CW_MASK_PM
:
14858 /* mask precision exception for nearbyint() */
14859 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
14860 slot
= SLOT_CW_MASK_PM
;
14864 gcc_unreachable ();
14871 case I387_CW_TRUNC
:
14872 /* round toward zero (truncate) */
14873 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0xc)));
14874 slot
= SLOT_CW_TRUNC
;
14877 case I387_CW_FLOOR
:
14878 /* round down toward -oo */
14879 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x4)));
14880 slot
= SLOT_CW_FLOOR
;
14884 /* round up toward +oo */
14885 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x8)));
14886 slot
= SLOT_CW_CEIL
;
14889 case I387_CW_MASK_PM
:
14890 /* mask precision exception for nearbyint() */
14891 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
14892 slot
= SLOT_CW_MASK_PM
;
14896 gcc_unreachable ();
14900 gcc_assert (slot
< MAX_386_STACK_LOCALS
);
14902 new_mode
= assign_386_stack_local (HImode
, slot
);
14903 emit_move_insn (new_mode
, reg
);
14906 /* Output code for INSN to convert a float to a signed int. OPERANDS
14907 are the insn operands. The output may be [HSD]Imode and the input
14908 operand may be [SDX]Fmode. */
14911 output_fix_trunc (rtx insn
, rtx
*operands
, bool fisttp
)
14913 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
14914 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
14915 int round_mode
= get_attr_i387_cw (insn
);
14917 /* Jump through a hoop or two for DImode, since the hardware has no
14918 non-popping instruction. We used to do this a different way, but
14919 that was somewhat fragile and broke with post-reload splitters. */
14920 if ((dimode_p
|| fisttp
) && !stack_top_dies
)
14921 output_asm_insn ("fld\t%y1", operands
);
14923 gcc_assert (STACK_TOP_P (operands
[1]));
14924 gcc_assert (MEM_P (operands
[0]));
14925 gcc_assert (GET_MODE (operands
[1]) != TFmode
);
14928 output_asm_insn ("fisttp%Z0\t%0", operands
);
14931 if (round_mode
!= I387_CW_ANY
)
14932 output_asm_insn ("fldcw\t%3", operands
);
14933 if (stack_top_dies
|| dimode_p
)
14934 output_asm_insn ("fistp%Z0\t%0", operands
);
14936 output_asm_insn ("fist%Z0\t%0", operands
);
14937 if (round_mode
!= I387_CW_ANY
)
14938 output_asm_insn ("fldcw\t%2", operands
);
14944 /* Output code for x87 ffreep insn. The OPNO argument, which may only
14945 have the values zero or one, indicates the ffreep insn's operand
14946 from the OPERANDS array. */
14948 static const char *
14949 output_387_ffreep (rtx
*operands ATTRIBUTE_UNUSED
, int opno
)
14951 if (TARGET_USE_FFREEP
)
14952 #ifdef HAVE_AS_IX86_FFREEP
14953 return opno
? "ffreep\t%y1" : "ffreep\t%y0";
14956 static char retval
[32];
14957 int regno
= REGNO (operands
[opno
]);
14959 gcc_assert (FP_REGNO_P (regno
));
14961 regno
-= FIRST_STACK_REG
;
14963 snprintf (retval
, sizeof (retval
), ASM_SHORT
"0xc%ddf", regno
);
14968 return opno
? "fstp\t%y1" : "fstp\t%y0";
14972 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
14973 should be used. UNORDERED_P is true when fucom should be used. */
14976 output_fp_compare (rtx insn
, rtx
*operands
, bool eflags_p
, bool unordered_p
)
14978 int stack_top_dies
;
14979 rtx cmp_op0
, cmp_op1
;
14980 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]);
14984 cmp_op0
= operands
[0];
14985 cmp_op1
= operands
[1];
14989 cmp_op0
= operands
[1];
14990 cmp_op1
= operands
[2];
14995 if (GET_MODE (operands
[0]) == SFmode
)
14997 return "%vucomiss\t{%1, %0|%0, %1}";
14999 return "%vcomiss\t{%1, %0|%0, %1}";
15002 return "%vucomisd\t{%1, %0|%0, %1}";
15004 return "%vcomisd\t{%1, %0|%0, %1}";
15007 gcc_assert (STACK_TOP_P (cmp_op0
));
15009 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
15011 if (cmp_op1
== CONST0_RTX (GET_MODE (cmp_op1
)))
15013 if (stack_top_dies
)
15015 output_asm_insn ("ftst\n\tfnstsw\t%0", operands
);
15016 return output_387_ffreep (operands
, 1);
15019 return "ftst\n\tfnstsw\t%0";
15022 if (STACK_REG_P (cmp_op1
)
15024 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
15025 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
15027 /* If both the top of the 387 stack dies, and the other operand
15028 is also a stack register that dies, then this must be a
15029 `fcompp' float compare */
15033 /* There is no double popping fcomi variant. Fortunately,
15034 eflags is immune from the fstp's cc clobbering. */
15036 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
15038 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
15039 return output_387_ffreep (operands
, 0);
15044 return "fucompp\n\tfnstsw\t%0";
15046 return "fcompp\n\tfnstsw\t%0";
15051 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
15053 static const char * const alt
[16] =
15055 "fcom%Z2\t%y2\n\tfnstsw\t%0",
15056 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
15057 "fucom%Z2\t%y2\n\tfnstsw\t%0",
15058 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
15060 "ficom%Z2\t%y2\n\tfnstsw\t%0",
15061 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
15065 "fcomi\t{%y1, %0|%0, %y1}",
15066 "fcomip\t{%y1, %0|%0, %y1}",
15067 "fucomi\t{%y1, %0|%0, %y1}",
15068 "fucomip\t{%y1, %0|%0, %y1}",
15079 mask
= eflags_p
<< 3;
15080 mask
|= (GET_MODE_CLASS (GET_MODE (cmp_op1
)) == MODE_INT
) << 2;
15081 mask
|= unordered_p
<< 1;
15082 mask
|= stack_top_dies
;
15084 gcc_assert (mask
< 16);
15093 ix86_output_addr_vec_elt (FILE *file
, int value
)
15095 const char *directive
= ASM_LONG
;
15099 directive
= ASM_QUAD
;
15101 gcc_assert (!TARGET_64BIT
);
15104 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
15108 ix86_output_addr_diff_elt (FILE *file
, int value
, int rel
)
15110 const char *directive
= ASM_LONG
;
15113 if (TARGET_64BIT
&& CASE_VECTOR_MODE
== DImode
)
15114 directive
= ASM_QUAD
;
15116 gcc_assert (!TARGET_64BIT
);
15118 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
15119 if (TARGET_64BIT
|| TARGET_VXWORKS_RTP
)
15120 fprintf (file
, "%s%s%d-%s%d\n",
15121 directive
, LPREFIX
, value
, LPREFIX
, rel
);
15122 else if (HAVE_AS_GOTOFF_IN_DATA
)
15123 fprintf (file
, ASM_LONG
"%s%d@GOTOFF\n", LPREFIX
, value
);
15125 else if (TARGET_MACHO
)
15127 fprintf (file
, ASM_LONG
"%s%d-", LPREFIX
, value
);
15128 machopic_output_function_base_name (file
);
15133 asm_fprintf (file
, ASM_LONG
"%U%s+[.-%s%d]\n",
15134 GOT_SYMBOL_NAME
, LPREFIX
, value
);
15137 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
15141 ix86_expand_clear (rtx dest
)
15145 /* We play register width games, which are only valid after reload. */
15146 gcc_assert (reload_completed
);
15148 /* Avoid HImode and its attendant prefix byte. */
15149 if (GET_MODE_SIZE (GET_MODE (dest
)) < 4)
15150 dest
= gen_rtx_REG (SImode
, REGNO (dest
));
15151 tmp
= gen_rtx_SET (VOIDmode
, dest
, const0_rtx
);
15153 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
15154 if (!TARGET_USE_MOV0
|| optimize_insn_for_speed_p ())
15156 rtx clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
15157 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
15163 /* X is an unchanging MEM. If it is a constant pool reference, return
15164 the constant pool rtx, else NULL. */
15167 maybe_get_pool_constant (rtx x
)
15169 x
= ix86_delegitimize_address (XEXP (x
, 0));
15171 if (GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
15172 return get_pool_constant (x
);
15178 ix86_expand_move (enum machine_mode mode
, rtx operands
[])
15181 enum tls_model model
;
15186 if (GET_CODE (op1
) == SYMBOL_REF
)
15188 model
= SYMBOL_REF_TLS_MODEL (op1
);
15191 op1
= legitimize_tls_address (op1
, model
, true);
15192 op1
= force_operand (op1
, op0
);
15195 if (GET_MODE (op1
) != mode
)
15196 op1
= convert_to_mode (mode
, op1
, 1);
15198 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15199 && SYMBOL_REF_DLLIMPORT_P (op1
))
15200 op1
= legitimize_dllimport_symbol (op1
, false);
15202 else if (GET_CODE (op1
) == CONST
15203 && GET_CODE (XEXP (op1
, 0)) == PLUS
15204 && GET_CODE (XEXP (XEXP (op1
, 0), 0)) == SYMBOL_REF
)
15206 rtx addend
= XEXP (XEXP (op1
, 0), 1);
15207 rtx symbol
= XEXP (XEXP (op1
, 0), 0);
15210 model
= SYMBOL_REF_TLS_MODEL (symbol
);
15212 tmp
= legitimize_tls_address (symbol
, model
, true);
15213 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15214 && SYMBOL_REF_DLLIMPORT_P (symbol
))
15215 tmp
= legitimize_dllimport_symbol (symbol
, true);
15219 tmp
= force_operand (tmp
, NULL
);
15220 tmp
= expand_simple_binop (Pmode
, PLUS
, tmp
, addend
,
15221 op0
, 1, OPTAB_DIRECT
);
15224 if (GET_MODE (tmp
) != mode
)
15225 op1
= convert_to_mode (mode
, tmp
, 1);
15229 if ((flag_pic
|| MACHOPIC_INDIRECT
)
15230 && symbolic_operand (op1
, mode
))
15232 if (TARGET_MACHO
&& !TARGET_64BIT
)
15235 /* dynamic-no-pic */
15236 if (MACHOPIC_INDIRECT
)
15238 rtx temp
= ((reload_in_progress
15239 || ((op0
&& REG_P (op0
))
15241 ? op0
: gen_reg_rtx (Pmode
));
15242 op1
= machopic_indirect_data_reference (op1
, temp
);
15244 op1
= machopic_legitimize_pic_address (op1
, mode
,
15245 temp
== op1
? 0 : temp
);
15247 if (op0
!= op1
&& GET_CODE (op0
) != MEM
)
15249 rtx insn
= gen_rtx_SET (VOIDmode
, op0
, op1
);
15253 if (GET_CODE (op0
) == MEM
)
15254 op1
= force_reg (Pmode
, op1
);
15258 if (GET_CODE (temp
) != REG
)
15259 temp
= gen_reg_rtx (Pmode
);
15260 temp
= legitimize_pic_address (op1
, temp
);
15265 /* dynamic-no-pic */
15271 op1
= force_reg (mode
, op1
);
15272 else if (!(TARGET_64BIT
&& x86_64_movabs_operand (op1
, DImode
)))
15274 rtx reg
= can_create_pseudo_p () ? NULL_RTX
: op0
;
15275 op1
= legitimize_pic_address (op1
, reg
);
15278 if (GET_MODE (op1
) != mode
)
15279 op1
= convert_to_mode (mode
, op1
, 1);
15286 && (PUSH_ROUNDING (GET_MODE_SIZE (mode
)) != GET_MODE_SIZE (mode
)
15287 || !push_operand (op0
, mode
))
15289 op1
= force_reg (mode
, op1
);
15291 if (push_operand (op0
, mode
)
15292 && ! general_no_elim_operand (op1
, mode
))
15293 op1
= copy_to_mode_reg (mode
, op1
);
15295 /* Force large constants in 64bit compilation into register
15296 to get them CSEed. */
15297 if (can_create_pseudo_p ()
15298 && (mode
== DImode
) && TARGET_64BIT
15299 && immediate_operand (op1
, mode
)
15300 && !x86_64_zext_immediate_operand (op1
, VOIDmode
)
15301 && !register_operand (op0
, mode
)
15303 op1
= copy_to_mode_reg (mode
, op1
);
15305 if (can_create_pseudo_p ()
15306 && FLOAT_MODE_P (mode
)
15307 && GET_CODE (op1
) == CONST_DOUBLE
)
15309 /* If we are loading a floating point constant to a register,
15310 force the value to memory now, since we'll get better code
15311 out the back end. */
15313 op1
= validize_mem (force_const_mem (mode
, op1
));
15314 if (!register_operand (op0
, mode
))
15316 rtx temp
= gen_reg_rtx (mode
);
15317 emit_insn (gen_rtx_SET (VOIDmode
, temp
, op1
));
15318 emit_move_insn (op0
, temp
);
15324 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
15328 ix86_expand_vector_move (enum machine_mode mode
, rtx operands
[])
15330 rtx op0
= operands
[0], op1
= operands
[1];
15331 unsigned int align
= GET_MODE_ALIGNMENT (mode
);
15333 /* Force constants other than zero into memory. We do not know how
15334 the instructions used to build constants modify the upper 64 bits
15335 of the register, once we have that information we may be able
15336 to handle some of them more efficiently. */
15337 if (can_create_pseudo_p ()
15338 && register_operand (op0
, mode
)
15339 && (CONSTANT_P (op1
)
15340 || (GET_CODE (op1
) == SUBREG
15341 && CONSTANT_P (SUBREG_REG (op1
))))
15342 && !standard_sse_constant_p (op1
))
15343 op1
= validize_mem (force_const_mem (mode
, op1
));
15345 /* We need to check memory alignment for SSE mode since attribute
15346 can make operands unaligned. */
15347 if (can_create_pseudo_p ()
15348 && SSE_REG_MODE_P (mode
)
15349 && ((MEM_P (op0
) && (MEM_ALIGN (op0
) < align
))
15350 || (MEM_P (op1
) && (MEM_ALIGN (op1
) < align
))))
15354 /* ix86_expand_vector_move_misalign() does not like constants ... */
15355 if (CONSTANT_P (op1
)
15356 || (GET_CODE (op1
) == SUBREG
15357 && CONSTANT_P (SUBREG_REG (op1
))))
15358 op1
= validize_mem (force_const_mem (mode
, op1
));
15360 /* ... nor both arguments in memory. */
15361 if (!register_operand (op0
, mode
)
15362 && !register_operand (op1
, mode
))
15363 op1
= force_reg (mode
, op1
);
15365 tmp
[0] = op0
; tmp
[1] = op1
;
15366 ix86_expand_vector_move_misalign (mode
, tmp
);
15370 /* Make operand1 a register if it isn't already. */
15371 if (can_create_pseudo_p ()
15372 && !register_operand (op0
, mode
)
15373 && !register_operand (op1
, mode
))
15375 emit_move_insn (op0
, force_reg (GET_MODE (op0
), op1
));
15379 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
15382 /* Split 32-byte AVX unaligned load and store if needed. */
15385 ix86_avx256_split_vector_move_misalign (rtx op0
, rtx op1
)
15388 rtx (*extract
) (rtx
, rtx
, rtx
);
15389 rtx (*move_unaligned
) (rtx
, rtx
);
15390 enum machine_mode mode
;
15392 switch (GET_MODE (op0
))
15395 gcc_unreachable ();
15397 extract
= gen_avx_vextractf128v32qi
;
15398 move_unaligned
= gen_avx_movdqu256
;
15402 extract
= gen_avx_vextractf128v8sf
;
15403 move_unaligned
= gen_avx_movups256
;
15407 extract
= gen_avx_vextractf128v4df
;
15408 move_unaligned
= gen_avx_movupd256
;
15413 if (MEM_P (op1
) && TARGET_AVX256_SPLIT_UNALIGNED_LOAD
)
15415 rtx r
= gen_reg_rtx (mode
);
15416 m
= adjust_address (op1
, mode
, 0);
15417 emit_move_insn (r
, m
);
15418 m
= adjust_address (op1
, mode
, 16);
15419 r
= gen_rtx_VEC_CONCAT (GET_MODE (op0
), r
, m
);
15420 emit_move_insn (op0
, r
);
15422 else if (MEM_P (op0
) && TARGET_AVX256_SPLIT_UNALIGNED_STORE
)
15424 m
= adjust_address (op0
, mode
, 0);
15425 emit_insn (extract (m
, op1
, const0_rtx
));
15426 m
= adjust_address (op0
, mode
, 16);
15427 emit_insn (extract (m
, op1
, const1_rtx
));
15430 emit_insn (move_unaligned (op0
, op1
));
15433 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
15434 straight to ix86_expand_vector_move. */
15435 /* Code generation for scalar reg-reg moves of single and double precision data:
15436 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
15440 if (x86_sse_partial_reg_dependency == true)
15445 Code generation for scalar loads of double precision data:
15446 if (x86_sse_split_regs == true)
15447 movlpd mem, reg (gas syntax)
15451 Code generation for unaligned packed loads of single precision data
15452 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
15453 if (x86_sse_unaligned_move_optimal)
15456 if (x86_sse_partial_reg_dependency == true)
15468 Code generation for unaligned packed loads of double precision data
15469 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
15470 if (x86_sse_unaligned_move_optimal)
15473 if (x86_sse_split_regs == true)
15486 ix86_expand_vector_move_misalign (enum machine_mode mode
, rtx operands
[])
15495 switch (GET_MODE_CLASS (mode
))
15497 case MODE_VECTOR_INT
:
15499 switch (GET_MODE_SIZE (mode
))
15502 /* If we're optimizing for size, movups is the smallest. */
15503 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
)
15505 op0
= gen_lowpart (V4SFmode
, op0
);
15506 op1
= gen_lowpart (V4SFmode
, op1
);
15507 emit_insn (gen_sse_movups (op0
, op1
));
15510 op0
= gen_lowpart (V16QImode
, op0
);
15511 op1
= gen_lowpart (V16QImode
, op1
);
15512 emit_insn (gen_sse2_movdqu (op0
, op1
));
15515 op0
= gen_lowpart (V32QImode
, op0
);
15516 op1
= gen_lowpart (V32QImode
, op1
);
15517 ix86_avx256_split_vector_move_misalign (op0
, op1
);
15520 gcc_unreachable ();
15523 case MODE_VECTOR_FLOAT
:
15524 op0
= gen_lowpart (mode
, op0
);
15525 op1
= gen_lowpart (mode
, op1
);
15530 emit_insn (gen_sse_movups (op0
, op1
));
15533 ix86_avx256_split_vector_move_misalign (op0
, op1
);
15536 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
)
15538 op0
= gen_lowpart (V4SFmode
, op0
);
15539 op1
= gen_lowpart (V4SFmode
, op1
);
15540 emit_insn (gen_sse_movups (op0
, op1
));
15543 emit_insn (gen_sse2_movupd (op0
, op1
));
15546 ix86_avx256_split_vector_move_misalign (op0
, op1
);
15549 gcc_unreachable ();
15554 gcc_unreachable ();
15562 /* If we're optimizing for size, movups is the smallest. */
15563 if (optimize_insn_for_size_p ()
15564 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
)
15566 op0
= gen_lowpart (V4SFmode
, op0
);
15567 op1
= gen_lowpart (V4SFmode
, op1
);
15568 emit_insn (gen_sse_movups (op0
, op1
));
15572 /* ??? If we have typed data, then it would appear that using
15573 movdqu is the only way to get unaligned data loaded with
15575 if (TARGET_SSE2
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
15577 op0
= gen_lowpart (V16QImode
, op0
);
15578 op1
= gen_lowpart (V16QImode
, op1
);
15579 emit_insn (gen_sse2_movdqu (op0
, op1
));
15583 if (TARGET_SSE2
&& mode
== V2DFmode
)
15587 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
)
15589 op0
= gen_lowpart (V2DFmode
, op0
);
15590 op1
= gen_lowpart (V2DFmode
, op1
);
15591 emit_insn (gen_sse2_movupd (op0
, op1
));
15595 /* When SSE registers are split into halves, we can avoid
15596 writing to the top half twice. */
15597 if (TARGET_SSE_SPLIT_REGS
)
15599 emit_clobber (op0
);
15604 /* ??? Not sure about the best option for the Intel chips.
15605 The following would seem to satisfy; the register is
15606 entirely cleared, breaking the dependency chain. We
15607 then store to the upper half, with a dependency depth
15608 of one. A rumor has it that Intel recommends two movsd
15609 followed by an unpacklpd, but this is unconfirmed. And
15610 given that the dependency depth of the unpacklpd would
15611 still be one, I'm not sure why this would be better. */
15612 zero
= CONST0_RTX (V2DFmode
);
15615 m
= adjust_address (op1
, DFmode
, 0);
15616 emit_insn (gen_sse2_loadlpd (op0
, zero
, m
));
15617 m
= adjust_address (op1
, DFmode
, 8);
15618 emit_insn (gen_sse2_loadhpd (op0
, op0
, m
));
15622 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
)
15624 op0
= gen_lowpart (V4SFmode
, op0
);
15625 op1
= gen_lowpart (V4SFmode
, op1
);
15626 emit_insn (gen_sse_movups (op0
, op1
));
15630 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY
)
15631 emit_move_insn (op0
, CONST0_RTX (mode
));
15633 emit_clobber (op0
);
15635 if (mode
!= V4SFmode
)
15636 op0
= gen_lowpart (V4SFmode
, op0
);
15637 m
= adjust_address (op1
, V2SFmode
, 0);
15638 emit_insn (gen_sse_loadlps (op0
, op0
, m
));
15639 m
= adjust_address (op1
, V2SFmode
, 8);
15640 emit_insn (gen_sse_loadhps (op0
, op0
, m
));
15643 else if (MEM_P (op0
))
15645 /* If we're optimizing for size, movups is the smallest. */
15646 if (optimize_insn_for_size_p ()
15647 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
)
15649 op0
= gen_lowpart (V4SFmode
, op0
);
15650 op1
= gen_lowpart (V4SFmode
, op1
);
15651 emit_insn (gen_sse_movups (op0
, op1
));
15655 /* ??? Similar to above, only less clear because of quote
15656 typeless stores unquote. */
15657 if (TARGET_SSE2
&& !TARGET_SSE_TYPELESS_STORES
15658 && GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
15660 op0
= gen_lowpart (V16QImode
, op0
);
15661 op1
= gen_lowpart (V16QImode
, op1
);
15662 emit_insn (gen_sse2_movdqu (op0
, op1
));
15666 if (TARGET_SSE2
&& mode
== V2DFmode
)
15668 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL
)
15670 op0
= gen_lowpart (V2DFmode
, op0
);
15671 op1
= gen_lowpart (V2DFmode
, op1
);
15672 emit_insn (gen_sse2_movupd (op0
, op1
));
15676 m
= adjust_address (op0
, DFmode
, 0);
15677 emit_insn (gen_sse2_storelpd (m
, op1
));
15678 m
= adjust_address (op0
, DFmode
, 8);
15679 emit_insn (gen_sse2_storehpd (m
, op1
));
15684 if (mode
!= V4SFmode
)
15685 op1
= gen_lowpart (V4SFmode
, op1
);
15687 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL
)
15689 op0
= gen_lowpart (V4SFmode
, op0
);
15690 emit_insn (gen_sse_movups (op0
, op1
));
15694 m
= adjust_address (op0
, V2SFmode
, 0);
15695 emit_insn (gen_sse_storelps (m
, op1
));
15696 m
= adjust_address (op0
, V2SFmode
, 8);
15697 emit_insn (gen_sse_storehps (m
, op1
));
15702 gcc_unreachable ();
15705 /* Expand a push in MODE. This is some mode for which we do not support
15706 proper push instructions, at least from the registers that we expect
15707 the value to live in. */
15710 ix86_expand_push (enum machine_mode mode
, rtx x
)
15714 tmp
= expand_simple_binop (Pmode
, PLUS
, stack_pointer_rtx
,
15715 GEN_INT (-GET_MODE_SIZE (mode
)),
15716 stack_pointer_rtx
, 1, OPTAB_DIRECT
);
15717 if (tmp
!= stack_pointer_rtx
)
15718 emit_move_insn (stack_pointer_rtx
, tmp
);
15720 tmp
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
15722 /* When we push an operand onto stack, it has to be aligned at least
15723 at the function argument boundary. However since we don't have
15724 the argument type, we can't determine the actual argument
15726 emit_move_insn (tmp
, x
);
15729 /* Helper function of ix86_fixup_binary_operands to canonicalize
15730 operand order. Returns true if the operands should be swapped. */
15733 ix86_swap_binary_operands_p (enum rtx_code code
, enum machine_mode mode
,
15736 rtx dst
= operands
[0];
15737 rtx src1
= operands
[1];
15738 rtx src2
= operands
[2];
15740 /* If the operation is not commutative, we can't do anything. */
15741 if (GET_RTX_CLASS (code
) != RTX_COMM_ARITH
)
15744 /* Highest priority is that src1 should match dst. */
15745 if (rtx_equal_p (dst
, src1
))
15747 if (rtx_equal_p (dst
, src2
))
15750 /* Next highest priority is that immediate constants come second. */
15751 if (immediate_operand (src2
, mode
))
15753 if (immediate_operand (src1
, mode
))
15756 /* Lowest priority is that memory references should come second. */
15766 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
15767 destination to use for the operation. If different from the true
15768 destination in operands[0], a copy operation will be required. */
15771 ix86_fixup_binary_operands (enum rtx_code code
, enum machine_mode mode
,
15774 rtx dst
= operands
[0];
15775 rtx src1
= operands
[1];
15776 rtx src2
= operands
[2];
15778 /* Canonicalize operand order. */
15779 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
15783 /* It is invalid to swap operands of different modes. */
15784 gcc_assert (GET_MODE (src1
) == GET_MODE (src2
));
15791 /* Both source operands cannot be in memory. */
15792 if (MEM_P (src1
) && MEM_P (src2
))
15794 /* Optimization: Only read from memory once. */
15795 if (rtx_equal_p (src1
, src2
))
15797 src2
= force_reg (mode
, src2
);
15801 src2
= force_reg (mode
, src2
);
15804 /* If the destination is memory, and we do not have matching source
15805 operands, do things in registers. */
15806 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
15807 dst
= gen_reg_rtx (mode
);
15809 /* Source 1 cannot be a constant. */
15810 if (CONSTANT_P (src1
))
15811 src1
= force_reg (mode
, src1
);
15813 /* Source 1 cannot be a non-matching memory. */
15814 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
15815 src1
= force_reg (mode
, src1
);
15817 /* Improve address combine. */
15819 && GET_MODE_CLASS (mode
) == MODE_INT
15821 src2
= force_reg (mode
, src2
);
15823 operands
[1] = src1
;
15824 operands
[2] = src2
;
15828 /* Similarly, but assume that the destination has already been
15829 set up properly. */
15832 ix86_fixup_binary_operands_no_copy (enum rtx_code code
,
15833 enum machine_mode mode
, rtx operands
[])
15835 rtx dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
15836 gcc_assert (dst
== operands
[0]);
15839 /* Attempt to expand a binary operator. Make the expansion closer to the
15840 actual machine, then just general_operand, which will allow 3 separate
15841 memory references (one output, two input) in a single insn. */
15844 ix86_expand_binary_operator (enum rtx_code code
, enum machine_mode mode
,
15847 rtx src1
, src2
, dst
, op
, clob
;
15849 dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
15850 src1
= operands
[1];
15851 src2
= operands
[2];
15853 /* Emit the instruction. */
15855 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
15856 if (reload_in_progress
)
15858 /* Reload doesn't know about the flags register, and doesn't know that
15859 it doesn't want to clobber it. We can only do this with PLUS. */
15860 gcc_assert (code
== PLUS
);
15863 else if (reload_completed
15865 && !rtx_equal_p (dst
, src1
))
15867 /* This is going to be an LEA; avoid splitting it later. */
15872 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
15873 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
15876 /* Fix up the destination if needed. */
15877 if (dst
!= operands
[0])
15878 emit_move_insn (operands
[0], dst
);
15881 /* Return TRUE or FALSE depending on whether the binary operator meets the
15882 appropriate constraints. */
15885 ix86_binary_operator_ok (enum rtx_code code
, enum machine_mode mode
,
15888 rtx dst
= operands
[0];
15889 rtx src1
= operands
[1];
15890 rtx src2
= operands
[2];
15892 /* Both source operands cannot be in memory. */
15893 if (MEM_P (src1
) && MEM_P (src2
))
15896 /* Canonicalize operand order for commutative operators. */
15897 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
15904 /* If the destination is memory, we must have a matching source operand. */
15905 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
15908 /* Source 1 cannot be a constant. */
15909 if (CONSTANT_P (src1
))
15912 /* Source 1 cannot be a non-matching memory. */
15913 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
15914 /* Support "andhi/andsi/anddi" as a zero-extending move. */
15915 return (code
== AND
15918 || (TARGET_64BIT
&& mode
== DImode
))
15919 && satisfies_constraint_L (src2
));
15924 /* Attempt to expand a unary operator. Make the expansion closer to the
15925 actual machine, then just general_operand, which will allow 2 separate
15926 memory references (one output, one input) in a single insn. */
15929 ix86_expand_unary_operator (enum rtx_code code
, enum machine_mode mode
,
15932 int matching_memory
;
15933 rtx src
, dst
, op
, clob
;
15938 /* If the destination is memory, and we do not have matching source
15939 operands, do things in registers. */
15940 matching_memory
= 0;
15943 if (rtx_equal_p (dst
, src
))
15944 matching_memory
= 1;
15946 dst
= gen_reg_rtx (mode
);
15949 /* When source operand is memory, destination must match. */
15950 if (MEM_P (src
) && !matching_memory
)
15951 src
= force_reg (mode
, src
);
15953 /* Emit the instruction. */
15955 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_e (code
, mode
, src
));
15956 if (reload_in_progress
|| code
== NOT
)
15958 /* Reload doesn't know about the flags register, and doesn't know that
15959 it doesn't want to clobber it. */
15960 gcc_assert (code
== NOT
);
15965 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
15966 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
15969 /* Fix up the destination if needed. */
15970 if (dst
!= operands
[0])
15971 emit_move_insn (operands
[0], dst
);
15974 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
15975 divisor are within the range [0-255]. */
15978 ix86_split_idivmod (enum machine_mode mode
, rtx operands
[],
15981 rtx end_label
, qimode_label
;
15982 rtx insn
, div
, mod
;
15983 rtx scratch
, tmp0
, tmp1
, tmp2
;
15984 rtx (*gen_divmod4_1
) (rtx
, rtx
, rtx
, rtx
);
15985 rtx (*gen_zero_extend
) (rtx
, rtx
);
15986 rtx (*gen_test_ccno_1
) (rtx
, rtx
);
15991 gen_divmod4_1
= signed_p
? gen_divmodsi4_1
: gen_udivmodsi4_1
;
15992 gen_test_ccno_1
= gen_testsi_ccno_1
;
15993 gen_zero_extend
= gen_zero_extendqisi2
;
15996 gen_divmod4_1
= signed_p
? gen_divmoddi4_1
: gen_udivmoddi4_1
;
15997 gen_test_ccno_1
= gen_testdi_ccno_1
;
15998 gen_zero_extend
= gen_zero_extendqidi2
;
16001 gcc_unreachable ();
16004 end_label
= gen_label_rtx ();
16005 qimode_label
= gen_label_rtx ();
16007 scratch
= gen_reg_rtx (mode
);
16009 /* Use 8bit unsigned divimod if dividend and divisor are within
16010 the range [0-255]. */
16011 emit_move_insn (scratch
, operands
[2]);
16012 scratch
= expand_simple_binop (mode
, IOR
, scratch
, operands
[3],
16013 scratch
, 1, OPTAB_DIRECT
);
16014 emit_insn (gen_test_ccno_1 (scratch
, GEN_INT (-0x100)));
16015 tmp0
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
16016 tmp0
= gen_rtx_EQ (VOIDmode
, tmp0
, const0_rtx
);
16017 tmp0
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp0
,
16018 gen_rtx_LABEL_REF (VOIDmode
, qimode_label
),
16020 insn
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp0
));
16021 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
16022 JUMP_LABEL (insn
) = qimode_label
;
16024 /* Generate original signed/unsigned divimod. */
16025 div
= gen_divmod4_1 (operands
[0], operands
[1],
16026 operands
[2], operands
[3]);
16029 /* Branch to the end. */
16030 emit_jump_insn (gen_jump (end_label
));
16033 /* Generate 8bit unsigned divide. */
16034 emit_label (qimode_label
);
16035 /* Don't use operands[0] for result of 8bit divide since not all
16036 registers support QImode ZERO_EXTRACT. */
16037 tmp0
= simplify_gen_subreg (HImode
, scratch
, mode
, 0);
16038 tmp1
= simplify_gen_subreg (HImode
, operands
[2], mode
, 0);
16039 tmp2
= simplify_gen_subreg (QImode
, operands
[3], mode
, 0);
16040 emit_insn (gen_udivmodhiqi3 (tmp0
, tmp1
, tmp2
));
16044 div
= gen_rtx_DIV (SImode
, operands
[2], operands
[3]);
16045 mod
= gen_rtx_MOD (SImode
, operands
[2], operands
[3]);
16049 div
= gen_rtx_UDIV (SImode
, operands
[2], operands
[3]);
16050 mod
= gen_rtx_UMOD (SImode
, operands
[2], operands
[3]);
16053 /* Extract remainder from AH. */
16054 tmp1
= gen_rtx_ZERO_EXTRACT (mode
, tmp0
, GEN_INT (8), GEN_INT (8));
16055 if (REG_P (operands
[1]))
16056 insn
= emit_move_insn (operands
[1], tmp1
);
16059 /* Need a new scratch register since the old one has result
16061 scratch
= gen_reg_rtx (mode
);
16062 emit_move_insn (scratch
, tmp1
);
16063 insn
= emit_move_insn (operands
[1], scratch
);
16065 set_unique_reg_note (insn
, REG_EQUAL
, mod
);
16067 /* Zero extend quotient from AL. */
16068 tmp1
= gen_lowpart (QImode
, tmp0
);
16069 insn
= emit_insn (gen_zero_extend (operands
[0], tmp1
));
16070 set_unique_reg_note (insn
, REG_EQUAL
, div
);
16072 emit_label (end_label
);
16075 #define LEA_MAX_STALL (3)
16076 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
16078 /* Increase given DISTANCE in half-cycles according to
16079 dependencies between PREV and NEXT instructions.
16080 Add 1 half-cycle if there is no dependency and
16081 go to next cycle if there is some dependecy. */
16083 static unsigned int
16084 increase_distance (rtx prev
, rtx next
, unsigned int distance
)
16089 if (!prev
|| !next
)
16090 return distance
+ (distance
& 1) + 2;
16092 if (!DF_INSN_USES (next
) || !DF_INSN_DEFS (prev
))
16093 return distance
+ 1;
16095 for (use_rec
= DF_INSN_USES (next
); *use_rec
; use_rec
++)
16096 for (def_rec
= DF_INSN_DEFS (prev
); *def_rec
; def_rec
++)
16097 if (!DF_REF_IS_ARTIFICIAL (*def_rec
)
16098 && DF_REF_REGNO (*use_rec
) == DF_REF_REGNO (*def_rec
))
16099 return distance
+ (distance
& 1) + 2;
16101 return distance
+ 1;
16104 /* Function checks if instruction INSN defines register number
16105 REGNO1 or REGNO2. */
16108 insn_defines_reg (unsigned int regno1
, unsigned int regno2
,
16113 for (def_rec
= DF_INSN_DEFS (insn
); *def_rec
; def_rec
++)
16114 if (DF_REF_REG_DEF_P (*def_rec
)
16115 && !DF_REF_IS_ARTIFICIAL (*def_rec
)
16116 && (regno1
== DF_REF_REGNO (*def_rec
)
16117 || regno2
== DF_REF_REGNO (*def_rec
)))
16125 /* Function checks if instruction INSN uses register number
16126 REGNO as a part of address expression. */
16129 insn_uses_reg_mem (unsigned int regno
, rtx insn
)
16133 for (use_rec
= DF_INSN_USES (insn
); *use_rec
; use_rec
++)
16134 if (DF_REF_REG_MEM_P (*use_rec
) && regno
== DF_REF_REGNO (*use_rec
))
16140 /* Search backward for non-agu definition of register number REGNO1
16141 or register number REGNO2 in basic block starting from instruction
16142 START up to head of basic block or instruction INSN.
16144 Function puts true value into *FOUND var if definition was found
16145 and false otherwise.
16147 Distance in half-cycles between START and found instruction or head
16148 of BB is added to DISTANCE and returned. */
16151 distance_non_agu_define_in_bb (unsigned int regno1
, unsigned int regno2
,
16152 rtx insn
, int distance
,
16153 rtx start
, bool *found
)
16155 basic_block bb
= start
? BLOCK_FOR_INSN (start
) : NULL
;
16158 enum attr_type insn_type
;
16164 && distance
< LEA_SEARCH_THRESHOLD
)
16166 if (NONDEBUG_INSN_P (prev
) && NONJUMP_INSN_P (prev
))
16168 distance
= increase_distance (prev
, next
, distance
);
16169 if (insn_defines_reg (regno1
, regno2
, prev
))
16171 insn_type
= get_attr_type (prev
);
16172 if (insn_type
!= TYPE_LEA
)
16181 if (prev
== BB_HEAD (bb
))
16184 prev
= PREV_INSN (prev
);
16190 /* Search backward for non-agu definition of register number REGNO1
16191 or register number REGNO2 in INSN's basic block until
16192 1. Pass LEA_SEARCH_THRESHOLD instructions, or
16193 2. Reach neighbour BBs boundary, or
16194 3. Reach agu definition.
16195 Returns the distance between the non-agu definition point and INSN.
16196 If no definition point, returns -1. */
16199 distance_non_agu_define (unsigned int regno1
, unsigned int regno2
,
16202 basic_block bb
= BLOCK_FOR_INSN (insn
);
16204 bool found
= false;
16206 if (insn
!= BB_HEAD (bb
))
16207 distance
= distance_non_agu_define_in_bb (regno1
, regno2
, insn
,
16208 distance
, PREV_INSN (insn
),
16211 if (!found
&& distance
< LEA_SEARCH_THRESHOLD
)
16215 bool simple_loop
= false;
16217 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
16220 simple_loop
= true;
16225 distance
= distance_non_agu_define_in_bb (regno1
, regno2
,
16227 BB_END (bb
), &found
);
16230 int shortest_dist
= -1;
16231 bool found_in_bb
= false;
16233 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
16236 = distance_non_agu_define_in_bb (regno1
, regno2
,
16242 if (shortest_dist
< 0)
16243 shortest_dist
= bb_dist
;
16244 else if (bb_dist
> 0)
16245 shortest_dist
= MIN (bb_dist
, shortest_dist
);
16251 distance
= shortest_dist
;
16255 /* get_attr_type may modify recog data. We want to make sure
16256 that recog data is valid for instruction INSN, on which
16257 distance_non_agu_define is called. INSN is unchanged here. */
16258 extract_insn_cached (insn
);
16263 return distance
>> 1;
16266 /* Return the distance in half-cycles between INSN and the next
16267 insn that uses register number REGNO in memory address added
16268 to DISTANCE. Return -1 if REGNO0 is set.
16270 Put true value into *FOUND if register usage was found and
16272 Put true value into *REDEFINED if register redefinition was
16273 found and false otherwise. */
16276 distance_agu_use_in_bb (unsigned int regno
,
16277 rtx insn
, int distance
, rtx start
,
16278 bool *found
, bool *redefined
)
16280 basic_block bb
= start
? BLOCK_FOR_INSN (start
) : NULL
;
16285 *redefined
= false;
16289 && distance
< LEA_SEARCH_THRESHOLD
)
16291 if (NONDEBUG_INSN_P (next
) && NONJUMP_INSN_P (next
))
16293 distance
= increase_distance(prev
, next
, distance
);
16294 if (insn_uses_reg_mem (regno
, next
))
16296 /* Return DISTANCE if OP0 is used in memory
16297 address in NEXT. */
16302 if (insn_defines_reg (regno
, INVALID_REGNUM
, next
))
16304 /* Return -1 if OP0 is set in NEXT. */
16312 if (next
== BB_END (bb
))
16315 next
= NEXT_INSN (next
);
16321 /* Return the distance between INSN and the next insn that uses
16322 register number REGNO0 in memory address. Return -1 if no such
16323 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
16326 distance_agu_use (unsigned int regno0
, rtx insn
)
16328 basic_block bb
= BLOCK_FOR_INSN (insn
);
16330 bool found
= false;
16331 bool redefined
= false;
16333 if (insn
!= BB_END (bb
))
16334 distance
= distance_agu_use_in_bb (regno0
, insn
, distance
,
16336 &found
, &redefined
);
16338 if (!found
&& !redefined
&& distance
< LEA_SEARCH_THRESHOLD
)
16342 bool simple_loop
= false;
16344 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
16347 simple_loop
= true;
16352 distance
= distance_agu_use_in_bb (regno0
, insn
,
16353 distance
, BB_HEAD (bb
),
16354 &found
, &redefined
);
16357 int shortest_dist
= -1;
16358 bool found_in_bb
= false;
16359 bool redefined_in_bb
= false;
16361 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
16364 = distance_agu_use_in_bb (regno0
, insn
,
16365 distance
, BB_HEAD (e
->dest
),
16366 &found_in_bb
, &redefined_in_bb
);
16369 if (shortest_dist
< 0)
16370 shortest_dist
= bb_dist
;
16371 else if (bb_dist
> 0)
16372 shortest_dist
= MIN (bb_dist
, shortest_dist
);
16378 distance
= shortest_dist
;
16382 if (!found
|| redefined
)
16385 return distance
>> 1;
16388 /* Define this macro to tune LEA priority vs ADD, it take effect when
16389 there is a dilemma of choicing LEA or ADD
16390 Negative value: ADD is more preferred than LEA
16392 Positive value: LEA is more preferred than ADD*/
16393 #define IX86_LEA_PRIORITY 0
16395 /* Return true if usage of lea INSN has performance advantage
16396 over a sequence of instructions. Instructions sequence has
16397 SPLIT_COST cycles higher latency than lea latency. */
16400 ix86_lea_outperforms (rtx insn
, unsigned int regno0
, unsigned int regno1
,
16401 unsigned int regno2
, unsigned int split_cost
)
16403 int dist_define
, dist_use
;
16405 dist_define
= distance_non_agu_define (regno1
, regno2
, insn
);
16406 dist_use
= distance_agu_use (regno0
, insn
);
16408 if (dist_define
< 0 || dist_define
>= LEA_MAX_STALL
)
16410 /* If there is no non AGU operand definition, no AGU
16411 operand usage and split cost is 0 then both lea
16412 and non lea variants have same priority. Currently
16413 we prefer lea for 64 bit code and non lea on 32 bit
16415 if (dist_use
< 0 && split_cost
== 0)
16416 return TARGET_64BIT
|| IX86_LEA_PRIORITY
;
16421 /* With longer definitions distance lea is more preferable.
16422 Here we change it to take into account splitting cost and
16424 dist_define
+= split_cost
+ IX86_LEA_PRIORITY
;
16426 /* If there is no use in memory addess then we just check
16427 that split cost does not exceed AGU stall. */
16429 return dist_define
>= LEA_MAX_STALL
;
16431 /* If this insn has both backward non-agu dependence and forward
16432 agu dependence, the one with short distance takes effect. */
16433 return dist_define
>= dist_use
;
16436 /* Return true if it is legal to clobber flags by INSN and
16437 false otherwise. */
16440 ix86_ok_to_clobber_flags (rtx insn
)
16442 basic_block bb
= BLOCK_FOR_INSN (insn
);
16448 if (NONDEBUG_INSN_P (insn
))
16450 for (use
= DF_INSN_USES (insn
); *use
; use
++)
16451 if (DF_REF_REG_USE_P (*use
) && DF_REF_REGNO (*use
) == FLAGS_REG
)
16454 if (insn_defines_reg (FLAGS_REG
, INVALID_REGNUM
, insn
))
16458 if (insn
== BB_END (bb
))
16461 insn
= NEXT_INSN (insn
);
16464 live
= df_get_live_out(bb
);
16465 return !REGNO_REG_SET_P (live
, FLAGS_REG
);
16468 /* Return true if we need to split op0 = op1 + op2 into a sequence of
16469 move and add to avoid AGU stalls. */
16472 ix86_avoid_lea_for_add (rtx insn
, rtx operands
[])
16474 unsigned int regno0
= true_regnum (operands
[0]);
16475 unsigned int regno1
= true_regnum (operands
[1]);
16476 unsigned int regno2
= true_regnum (operands
[2]);
16478 /* Check if we need to optimize. */
16479 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
16482 /* Check it is correct to split here. */
16483 if (!ix86_ok_to_clobber_flags(insn
))
16486 /* We need to split only adds with non destructive
16487 destination operand. */
16488 if (regno0
== regno1
|| regno0
== regno2
)
16491 return !ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, 1);
16494 /* Return true if we need to split lea into a sequence of
16495 instructions to avoid AGU stalls. */
16498 ix86_avoid_lea_for_addr (rtx insn
, rtx operands
[])
16500 unsigned int regno0
= true_regnum (operands
[0]) ;
16501 unsigned int regno1
= -1;
16502 unsigned int regno2
= -1;
16503 unsigned int split_cost
= 0;
16504 struct ix86_address parts
;
16507 /* Check we need to optimize. */
16508 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
16511 /* Check it is correct to split here. */
16512 if (!ix86_ok_to_clobber_flags(insn
))
16515 ok
= ix86_decompose_address (operands
[1], &parts
);
16518 /* We should not split into add if non legitimate pic
16519 operand is used as displacement. */
16520 if (parts
.disp
&& flag_pic
&& !LEGITIMATE_PIC_OPERAND_P (parts
.disp
))
16524 regno1
= true_regnum (parts
.base
);
16526 regno2
= true_regnum (parts
.index
);
16528 /* Compute how many cycles we will add to execution time
16529 if split lea into a sequence of instructions. */
16530 if (parts
.base
|| parts
.index
)
16532 /* Have to use mov instruction if non desctructive
16533 destination form is used. */
16534 if (regno1
!= regno0
&& regno2
!= regno0
)
16537 /* Have to add index to base if both exist. */
16538 if (parts
.base
&& parts
.index
)
16541 /* Have to use shift and adds if scale is 2 or greater. */
16542 if (parts
.scale
> 1)
16544 if (regno0
!= regno1
)
16546 else if (regno2
== regno0
)
16549 split_cost
+= parts
.scale
;
16552 /* Have to use add instruction with immediate if
16553 disp is non zero. */
16554 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
16557 /* Subtract the price of lea. */
16561 return !ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, split_cost
);
16564 /* Emit x86 binary operand CODE in mode MODE, where the first operand
16565 matches destination. RTX includes clobber of FLAGS_REG. */
16568 ix86_emit_binop (enum rtx_code code
, enum machine_mode mode
,
16573 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, dst
, src
));
16574 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
16576 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
16579 /* Split lea instructions into a sequence of instructions
16580 which are executed on ALU to avoid AGU stalls.
16581 It is assumed that it is allowed to clobber flags register
16582 at lea position. */
16585 ix86_split_lea_for_addr (rtx operands
[], enum machine_mode mode
)
16587 unsigned int regno0
= true_regnum (operands
[0]) ;
16588 unsigned int regno1
= INVALID_REGNUM
;
16589 unsigned int regno2
= INVALID_REGNUM
;
16590 struct ix86_address parts
;
16594 ok
= ix86_decompose_address (operands
[1], &parts
);
16599 if (GET_MODE (parts
.base
) != mode
)
16600 parts
.base
= gen_rtx_SUBREG (mode
, parts
.base
, 0);
16601 regno1
= true_regnum (parts
.base
);
16606 if (GET_MODE (parts
.index
) != mode
)
16607 parts
.index
= gen_rtx_SUBREG (mode
, parts
.index
, 0);
16608 regno2
= true_regnum (parts
.index
);
16611 if (parts
.scale
> 1)
16613 /* Case r1 = r1 + ... */
16614 if (regno1
== regno0
)
16616 /* If we have a case r1 = r1 + C * r1 then we
16617 should use multiplication which is very
16618 expensive. Assume cost model is wrong if we
16619 have such case here. */
16620 gcc_assert (regno2
!= regno0
);
16622 for (adds
= parts
.scale
; adds
> 0; adds
--)
16623 ix86_emit_binop (PLUS
, mode
, operands
[0], parts
.index
);
16627 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
16628 if (regno0
!= regno2
)
16629 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0], parts
.index
));
16631 /* Use shift for scaling. */
16632 ix86_emit_binop (ASHIFT
, mode
, operands
[0],
16633 GEN_INT (exact_log2 (parts
.scale
)));
16636 ix86_emit_binop (PLUS
, mode
, operands
[0], parts
.base
);
16638 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
16639 ix86_emit_binop (PLUS
, mode
, operands
[0], parts
.disp
);
16642 else if (!parts
.base
&& !parts
.index
)
16644 gcc_assert(parts
.disp
);
16645 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0], parts
.disp
));
16651 if (regno0
!= regno2
)
16652 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0], parts
.index
));
16654 else if (!parts
.index
)
16656 if (regno0
!= regno1
)
16657 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0], parts
.base
));
16661 if (regno0
== regno1
)
16663 else if (regno0
== regno2
)
16667 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0], parts
.base
));
16671 ix86_emit_binop (PLUS
, mode
, operands
[0], tmp
);
16674 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
16675 ix86_emit_binop (PLUS
, mode
, operands
[0], parts
.disp
);
16679 /* Return true if it is ok to optimize an ADD operation to LEA
16680 operation to avoid flag register consumation. For most processors,
16681 ADD is faster than LEA. For the processors like ATOM, if the
16682 destination register of LEA holds an actual address which will be
16683 used soon, LEA is better and otherwise ADD is better. */
16686 ix86_lea_for_add_ok (rtx insn
, rtx operands
[])
16688 unsigned int regno0
= true_regnum (operands
[0]);
16689 unsigned int regno1
= true_regnum (operands
[1]);
16690 unsigned int regno2
= true_regnum (operands
[2]);
16692 /* If a = b + c, (a!=b && a!=c), must use lea form. */
16693 if (regno0
!= regno1
&& regno0
!= regno2
)
16696 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
16699 return ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, 0);
16702 /* Return true if destination reg of SET_BODY is shift count of
16706 ix86_dep_by_shift_count_body (const_rtx set_body
, const_rtx use_body
)
16712 /* Retrieve destination of SET_BODY. */
16713 switch (GET_CODE (set_body
))
16716 set_dest
= SET_DEST (set_body
);
16717 if (!set_dest
|| !REG_P (set_dest
))
16721 for (i
= XVECLEN (set_body
, 0) - 1; i
>= 0; i
--)
16722 if (ix86_dep_by_shift_count_body (XVECEXP (set_body
, 0, i
),
16730 /* Retrieve shift count of USE_BODY. */
16731 switch (GET_CODE (use_body
))
16734 shift_rtx
= XEXP (use_body
, 1);
16737 for (i
= XVECLEN (use_body
, 0) - 1; i
>= 0; i
--)
16738 if (ix86_dep_by_shift_count_body (set_body
,
16739 XVECEXP (use_body
, 0, i
)))
16747 && (GET_CODE (shift_rtx
) == ASHIFT
16748 || GET_CODE (shift_rtx
) == LSHIFTRT
16749 || GET_CODE (shift_rtx
) == ASHIFTRT
16750 || GET_CODE (shift_rtx
) == ROTATE
16751 || GET_CODE (shift_rtx
) == ROTATERT
))
16753 rtx shift_count
= XEXP (shift_rtx
, 1);
16755 /* Return true if shift count is dest of SET_BODY. */
16756 if (REG_P (shift_count
)
16757 && true_regnum (set_dest
) == true_regnum (shift_count
))
16764 /* Return true if destination reg of SET_INSN is shift count of
16768 ix86_dep_by_shift_count (const_rtx set_insn
, const_rtx use_insn
)
16770 return ix86_dep_by_shift_count_body (PATTERN (set_insn
),
16771 PATTERN (use_insn
));
16774 /* Return TRUE or FALSE depending on whether the unary operator meets the
16775 appropriate constraints. */
16778 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED
,
16779 enum machine_mode mode ATTRIBUTE_UNUSED
,
16780 rtx operands
[2] ATTRIBUTE_UNUSED
)
16782 /* If one of operands is memory, source and destination must match. */
16783 if ((MEM_P (operands
[0])
16784 || MEM_P (operands
[1]))
16785 && ! rtx_equal_p (operands
[0], operands
[1]))
16790 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
16791 are ok, keeping in mind the possible movddup alternative. */
16794 ix86_vec_interleave_v2df_operator_ok (rtx operands
[3], bool high
)
16796 if (MEM_P (operands
[0]))
16797 return rtx_equal_p (operands
[0], operands
[1 + high
]);
16798 if (MEM_P (operands
[1]) && MEM_P (operands
[2]))
16799 return TARGET_SSE3
&& rtx_equal_p (operands
[1], operands
[2]);
16803 /* Post-reload splitter for converting an SF or DFmode value in an
16804 SSE register into an unsigned SImode. */
16807 ix86_split_convert_uns_si_sse (rtx operands
[])
16809 enum machine_mode vecmode
;
16810 rtx value
, large
, zero_or_two31
, input
, two31
, x
;
16812 large
= operands
[1];
16813 zero_or_two31
= operands
[2];
16814 input
= operands
[3];
16815 two31
= operands
[4];
16816 vecmode
= GET_MODE (large
);
16817 value
= gen_rtx_REG (vecmode
, REGNO (operands
[0]));
16819 /* Load up the value into the low element. We must ensure that the other
16820 elements are valid floats -- zero is the easiest such value. */
16823 if (vecmode
== V4SFmode
)
16824 emit_insn (gen_vec_setv4sf_0 (value
, CONST0_RTX (V4SFmode
), input
));
16826 emit_insn (gen_sse2_loadlpd (value
, CONST0_RTX (V2DFmode
), input
));
16830 input
= gen_rtx_REG (vecmode
, REGNO (input
));
16831 emit_move_insn (value
, CONST0_RTX (vecmode
));
16832 if (vecmode
== V4SFmode
)
16833 emit_insn (gen_sse_movss (value
, value
, input
));
16835 emit_insn (gen_sse2_movsd (value
, value
, input
));
16838 emit_move_insn (large
, two31
);
16839 emit_move_insn (zero_or_two31
, MEM_P (two31
) ? large
: two31
);
16841 x
= gen_rtx_fmt_ee (LE
, vecmode
, large
, value
);
16842 emit_insn (gen_rtx_SET (VOIDmode
, large
, x
));
16844 x
= gen_rtx_AND (vecmode
, zero_or_two31
, large
);
16845 emit_insn (gen_rtx_SET (VOIDmode
, zero_or_two31
, x
));
16847 x
= gen_rtx_MINUS (vecmode
, value
, zero_or_two31
);
16848 emit_insn (gen_rtx_SET (VOIDmode
, value
, x
));
16850 large
= gen_rtx_REG (V4SImode
, REGNO (large
));
16851 emit_insn (gen_ashlv4si3 (large
, large
, GEN_INT (31)));
16853 x
= gen_rtx_REG (V4SImode
, REGNO (value
));
16854 if (vecmode
== V4SFmode
)
16855 emit_insn (gen_sse2_cvttps2dq (x
, value
));
16857 emit_insn (gen_sse2_cvttpd2dq (x
, value
));
16860 emit_insn (gen_xorv4si3 (value
, value
, large
));
16863 /* Convert an unsigned DImode value into a DFmode, using only SSE.
16864 Expects the 64-bit DImode to be supplied in a pair of integral
16865 registers. Requires SSE2; will use SSE3 if available. For x86_32,
16866 -mfpmath=sse, !optimize_size only. */
16869 ix86_expand_convert_uns_didf_sse (rtx target
, rtx input
)
16871 REAL_VALUE_TYPE bias_lo_rvt
, bias_hi_rvt
;
16872 rtx int_xmm
, fp_xmm
;
16873 rtx biases
, exponents
;
16876 int_xmm
= gen_reg_rtx (V4SImode
);
16877 if (TARGET_INTER_UNIT_MOVES
)
16878 emit_insn (gen_movdi_to_sse (int_xmm
, input
));
16879 else if (TARGET_SSE_SPLIT_REGS
)
16881 emit_clobber (int_xmm
);
16882 emit_move_insn (gen_lowpart (DImode
, int_xmm
), input
);
16886 x
= gen_reg_rtx (V2DImode
);
16887 ix86_expand_vector_init_one_nonzero (false, V2DImode
, x
, input
, 0);
16888 emit_move_insn (int_xmm
, gen_lowpart (V4SImode
, x
));
16891 x
= gen_rtx_CONST_VECTOR (V4SImode
,
16892 gen_rtvec (4, GEN_INT (0x43300000UL
),
16893 GEN_INT (0x45300000UL
),
16894 const0_rtx
, const0_rtx
));
16895 exponents
= validize_mem (force_const_mem (V4SImode
, x
));
16897 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
16898 emit_insn (gen_vec_interleave_lowv4si (int_xmm
, int_xmm
, exponents
));
16900 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
16901 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
16902 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
16903 (0x1.0p84 + double(fp_value_hi_xmm)).
16904 Note these exponents differ by 32. */
16906 fp_xmm
= copy_to_mode_reg (V2DFmode
, gen_lowpart (V2DFmode
, int_xmm
));
16908 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
16909 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
16910 real_ldexp (&bias_lo_rvt
, &dconst1
, 52);
16911 real_ldexp (&bias_hi_rvt
, &dconst1
, 84);
16912 biases
= const_double_from_real_value (bias_lo_rvt
, DFmode
);
16913 x
= const_double_from_real_value (bias_hi_rvt
, DFmode
);
16914 biases
= gen_rtx_CONST_VECTOR (V2DFmode
, gen_rtvec (2, biases
, x
));
16915 biases
= validize_mem (force_const_mem (V2DFmode
, biases
));
16916 emit_insn (gen_subv2df3 (fp_xmm
, fp_xmm
, biases
));
16918 /* Add the upper and lower DFmode values together. */
16920 emit_insn (gen_sse3_haddv2df3 (fp_xmm
, fp_xmm
, fp_xmm
));
16923 x
= copy_to_mode_reg (V2DFmode
, fp_xmm
);
16924 emit_insn (gen_vec_interleave_highv2df (fp_xmm
, fp_xmm
, fp_xmm
));
16925 emit_insn (gen_addv2df3 (fp_xmm
, fp_xmm
, x
));
16928 ix86_expand_vector_extract (false, target
, fp_xmm
, 0);
16931 /* Not used, but eases macroization of patterns. */
16933 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED
,
16934 rtx input ATTRIBUTE_UNUSED
)
16936 gcc_unreachable ();
16939 /* Convert an unsigned SImode value into a DFmode. Only currently used
16940 for SSE, but applicable anywhere. */
16943 ix86_expand_convert_uns_sidf_sse (rtx target
, rtx input
)
16945 REAL_VALUE_TYPE TWO31r
;
16948 x
= expand_simple_binop (SImode
, PLUS
, input
, GEN_INT (-2147483647 - 1),
16949 NULL
, 1, OPTAB_DIRECT
);
16951 fp
= gen_reg_rtx (DFmode
);
16952 emit_insn (gen_floatsidf2 (fp
, x
));
16954 real_ldexp (&TWO31r
, &dconst1
, 31);
16955 x
= const_double_from_real_value (TWO31r
, DFmode
);
16957 x
= expand_simple_binop (DFmode
, PLUS
, fp
, x
, target
, 0, OPTAB_DIRECT
);
16959 emit_move_insn (target
, x
);
16962 /* Convert a signed DImode value into a DFmode. Only used for SSE in
16963 32-bit mode; otherwise we have a direct convert instruction. */
16966 ix86_expand_convert_sign_didf_sse (rtx target
, rtx input
)
16968 REAL_VALUE_TYPE TWO32r
;
16969 rtx fp_lo
, fp_hi
, x
;
16971 fp_lo
= gen_reg_rtx (DFmode
);
16972 fp_hi
= gen_reg_rtx (DFmode
);
16974 emit_insn (gen_floatsidf2 (fp_hi
, gen_highpart (SImode
, input
)));
16976 real_ldexp (&TWO32r
, &dconst1
, 32);
16977 x
= const_double_from_real_value (TWO32r
, DFmode
);
16978 fp_hi
= expand_simple_binop (DFmode
, MULT
, fp_hi
, x
, fp_hi
, 0, OPTAB_DIRECT
);
16980 ix86_expand_convert_uns_sidf_sse (fp_lo
, gen_lowpart (SImode
, input
));
16982 x
= expand_simple_binop (DFmode
, PLUS
, fp_hi
, fp_lo
, target
,
16985 emit_move_insn (target
, x
);
16988 /* Convert an unsigned SImode value into a SFmode, using only SSE.
16989 For x86_32, -mfpmath=sse, !optimize_size only. */
16991 ix86_expand_convert_uns_sisf_sse (rtx target
, rtx input
)
16993 REAL_VALUE_TYPE ONE16r
;
16994 rtx fp_hi
, fp_lo
, int_hi
, int_lo
, x
;
16996 real_ldexp (&ONE16r
, &dconst1
, 16);
16997 x
= const_double_from_real_value (ONE16r
, SFmode
);
16998 int_lo
= expand_simple_binop (SImode
, AND
, input
, GEN_INT(0xffff),
16999 NULL
, 0, OPTAB_DIRECT
);
17000 int_hi
= expand_simple_binop (SImode
, LSHIFTRT
, input
, GEN_INT(16),
17001 NULL
, 0, OPTAB_DIRECT
);
17002 fp_hi
= gen_reg_rtx (SFmode
);
17003 fp_lo
= gen_reg_rtx (SFmode
);
17004 emit_insn (gen_floatsisf2 (fp_hi
, int_hi
));
17005 emit_insn (gen_floatsisf2 (fp_lo
, int_lo
));
17006 fp_hi
= expand_simple_binop (SFmode
, MULT
, fp_hi
, x
, fp_hi
,
17008 fp_hi
= expand_simple_binop (SFmode
, PLUS
, fp_hi
, fp_lo
, target
,
17010 if (!rtx_equal_p (target
, fp_hi
))
17011 emit_move_insn (target
, fp_hi
);
17014 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
17015 then replicate the value for all elements of the vector
17019 ix86_build_const_vector (enum machine_mode mode
, bool vect
, rtx value
)
17023 enum machine_mode scalar_mode
;
17040 n_elt
= GET_MODE_NUNITS (mode
);
17041 v
= rtvec_alloc (n_elt
);
17042 scalar_mode
= GET_MODE_INNER (mode
);
17044 RTVEC_ELT (v
, 0) = value
;
17046 for (i
= 1; i
< n_elt
; ++i
)
17047 RTVEC_ELT (v
, i
) = vect
? value
: CONST0_RTX (scalar_mode
);
17049 return gen_rtx_CONST_VECTOR (mode
, v
);
17052 gcc_unreachable ();
17056 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
17057 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
17058 for an SSE register. If VECT is true, then replicate the mask for
17059 all elements of the vector register. If INVERT is true, then create
17060 a mask excluding the sign bit. */
17063 ix86_build_signbit_mask (enum machine_mode mode
, bool vect
, bool invert
)
17065 enum machine_mode vec_mode
, imode
;
17066 HOST_WIDE_INT hi
, lo
;
17071 /* Find the sign bit, sign extended to 2*HWI. */
17079 mode
= GET_MODE_INNER (mode
);
17081 lo
= 0x80000000, hi
= lo
< 0;
17089 mode
= GET_MODE_INNER (mode
);
17091 if (HOST_BITS_PER_WIDE_INT
>= 64)
17092 lo
= (HOST_WIDE_INT
)1 << shift
, hi
= -1;
17094 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
17099 vec_mode
= VOIDmode
;
17100 if (HOST_BITS_PER_WIDE_INT
>= 64)
17103 lo
= 0, hi
= (HOST_WIDE_INT
)1 << shift
;
17110 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
17114 lo
= ~lo
, hi
= ~hi
;
17120 mask
= immed_double_const (lo
, hi
, imode
);
17122 vec
= gen_rtvec (2, v
, mask
);
17123 v
= gen_rtx_CONST_VECTOR (V2DImode
, vec
);
17124 v
= copy_to_mode_reg (mode
, gen_lowpart (mode
, v
));
17131 gcc_unreachable ();
17135 lo
= ~lo
, hi
= ~hi
;
17137 /* Force this value into the low part of a fp vector constant. */
17138 mask
= immed_double_const (lo
, hi
, imode
);
17139 mask
= gen_lowpart (mode
, mask
);
17141 if (vec_mode
== VOIDmode
)
17142 return force_reg (mode
, mask
);
17144 v
= ix86_build_const_vector (vec_mode
, vect
, mask
);
17145 return force_reg (vec_mode
, v
);
17148 /* Generate code for floating point ABS or NEG. */
17151 ix86_expand_fp_absneg_operator (enum rtx_code code
, enum machine_mode mode
,
17154 rtx mask
, set
, dst
, src
;
17155 bool use_sse
= false;
17156 bool vector_mode
= VECTOR_MODE_P (mode
);
17157 enum machine_mode vmode
= mode
;
17161 else if (mode
== TFmode
)
17163 else if (TARGET_SSE_MATH
)
17165 use_sse
= SSE_FLOAT_MODE_P (mode
);
17166 if (mode
== SFmode
)
17168 else if (mode
== DFmode
)
17172 /* NEG and ABS performed with SSE use bitwise mask operations.
17173 Create the appropriate mask now. */
17175 mask
= ix86_build_signbit_mask (vmode
, vector_mode
, code
== ABS
);
17182 set
= gen_rtx_fmt_e (code
, mode
, src
);
17183 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
17190 use
= gen_rtx_USE (VOIDmode
, mask
);
17192 par
= gen_rtvec (2, set
, use
);
17195 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
17196 par
= gen_rtvec (3, set
, use
, clob
);
17198 emit_insn (gen_rtx_PARALLEL (VOIDmode
, par
));
17204 /* Expand a copysign operation. Special case operand 0 being a constant. */
17207 ix86_expand_copysign (rtx operands
[])
17209 enum machine_mode mode
, vmode
;
17210 rtx dest
, op0
, op1
, mask
, nmask
;
17212 dest
= operands
[0];
17216 mode
= GET_MODE (dest
);
17218 if (mode
== SFmode
)
17220 else if (mode
== DFmode
)
17225 if (GET_CODE (op0
) == CONST_DOUBLE
)
17227 rtx (*copysign_insn
)(rtx
, rtx
, rtx
, rtx
);
17229 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0
)))
17230 op0
= simplify_unary_operation (ABS
, mode
, op0
, mode
);
17232 if (mode
== SFmode
|| mode
== DFmode
)
17234 if (op0
== CONST0_RTX (mode
))
17235 op0
= CONST0_RTX (vmode
);
17238 rtx v
= ix86_build_const_vector (vmode
, false, op0
);
17240 op0
= force_reg (vmode
, v
);
17243 else if (op0
!= CONST0_RTX (mode
))
17244 op0
= force_reg (mode
, op0
);
17246 mask
= ix86_build_signbit_mask (vmode
, 0, 0);
17248 if (mode
== SFmode
)
17249 copysign_insn
= gen_copysignsf3_const
;
17250 else if (mode
== DFmode
)
17251 copysign_insn
= gen_copysigndf3_const
;
17253 copysign_insn
= gen_copysigntf3_const
;
17255 emit_insn (copysign_insn (dest
, op0
, op1
, mask
));
17259 rtx (*copysign_insn
)(rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
17261 nmask
= ix86_build_signbit_mask (vmode
, 0, 1);
17262 mask
= ix86_build_signbit_mask (vmode
, 0, 0);
17264 if (mode
== SFmode
)
17265 copysign_insn
= gen_copysignsf3_var
;
17266 else if (mode
== DFmode
)
17267 copysign_insn
= gen_copysigndf3_var
;
17269 copysign_insn
= gen_copysigntf3_var
;
17271 emit_insn (copysign_insn (dest
, NULL_RTX
, op0
, op1
, nmask
, mask
));
17275 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
17276 be a constant, and so has already been expanded into a vector constant. */
17279 ix86_split_copysign_const (rtx operands
[])
17281 enum machine_mode mode
, vmode
;
17282 rtx dest
, op0
, mask
, x
;
17284 dest
= operands
[0];
17286 mask
= operands
[3];
17288 mode
= GET_MODE (dest
);
17289 vmode
= GET_MODE (mask
);
17291 dest
= simplify_gen_subreg (vmode
, dest
, mode
, 0);
17292 x
= gen_rtx_AND (vmode
, dest
, mask
);
17293 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
17295 if (op0
!= CONST0_RTX (vmode
))
17297 x
= gen_rtx_IOR (vmode
, dest
, op0
);
17298 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
17302 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
17303 so we have to do two masks. */
17306 ix86_split_copysign_var (rtx operands
[])
17308 enum machine_mode mode
, vmode
;
17309 rtx dest
, scratch
, op0
, op1
, mask
, nmask
, x
;
17311 dest
= operands
[0];
17312 scratch
= operands
[1];
17315 nmask
= operands
[4];
17316 mask
= operands
[5];
17318 mode
= GET_MODE (dest
);
17319 vmode
= GET_MODE (mask
);
17321 if (rtx_equal_p (op0
, op1
))
17323 /* Shouldn't happen often (it's useless, obviously), but when it does
17324 we'd generate incorrect code if we continue below. */
17325 emit_move_insn (dest
, op0
);
17329 if (REG_P (mask
) && REGNO (dest
) == REGNO (mask
)) /* alternative 0 */
17331 gcc_assert (REGNO (op1
) == REGNO (scratch
));
17333 x
= gen_rtx_AND (vmode
, scratch
, mask
);
17334 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
17337 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
17338 x
= gen_rtx_NOT (vmode
, dest
);
17339 x
= gen_rtx_AND (vmode
, x
, op0
);
17340 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
17344 if (REGNO (op1
) == REGNO (scratch
)) /* alternative 1,3 */
17346 x
= gen_rtx_AND (vmode
, scratch
, mask
);
17348 else /* alternative 2,4 */
17350 gcc_assert (REGNO (mask
) == REGNO (scratch
));
17351 op1
= simplify_gen_subreg (vmode
, op1
, mode
, 0);
17352 x
= gen_rtx_AND (vmode
, scratch
, op1
);
17354 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
17356 if (REGNO (op0
) == REGNO (dest
)) /* alternative 1,2 */
17358 dest
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
17359 x
= gen_rtx_AND (vmode
, dest
, nmask
);
17361 else /* alternative 3,4 */
17363 gcc_assert (REGNO (nmask
) == REGNO (dest
));
17365 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
17366 x
= gen_rtx_AND (vmode
, dest
, op0
);
17368 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
17371 x
= gen_rtx_IOR (vmode
, dest
, scratch
);
17372 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
17375 /* Return TRUE or FALSE depending on whether the first SET in INSN
17376 has source and destination with matching CC modes, and that the
17377 CC mode is at least as constrained as REQ_MODE. */
17380 ix86_match_ccmode (rtx insn
, enum machine_mode req_mode
)
17383 enum machine_mode set_mode
;
17385 set
= PATTERN (insn
);
17386 if (GET_CODE (set
) == PARALLEL
)
17387 set
= XVECEXP (set
, 0, 0);
17388 gcc_assert (GET_CODE (set
) == SET
);
17389 gcc_assert (GET_CODE (SET_SRC (set
)) == COMPARE
);
17391 set_mode
= GET_MODE (SET_DEST (set
));
17395 if (req_mode
!= CCNOmode
17396 && (req_mode
!= CCmode
17397 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
17401 if (req_mode
== CCGCmode
)
17405 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
17409 if (req_mode
== CCZmode
)
17419 if (set_mode
!= req_mode
)
17424 gcc_unreachable ();
17427 return GET_MODE (SET_SRC (set
)) == set_mode
;
17430 /* Generate insn patterns to do an integer compare of OPERANDS. */
17433 ix86_expand_int_compare (enum rtx_code code
, rtx op0
, rtx op1
)
17435 enum machine_mode cmpmode
;
17438 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
17439 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
17441 /* This is very simple, but making the interface the same as in the
17442 FP case makes the rest of the code easier. */
17443 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
17444 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
17446 /* Return the test that should be put into the flags user, i.e.
17447 the bcc, scc, or cmov instruction. */
17448 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
17451 /* Figure out whether to use ordered or unordered fp comparisons.
17452 Return the appropriate mode to use. */
17455 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED
)
17457 /* ??? In order to make all comparisons reversible, we do all comparisons
17458 non-trapping when compiling for IEEE. Once gcc is able to distinguish
17459 all forms trapping and nontrapping comparisons, we can make inequality
17460 comparisons trapping again, since it results in better code when using
17461 FCOM based compares. */
17462 return TARGET_IEEE_FP
? CCFPUmode
: CCFPmode
;
17466 ix86_cc_mode (enum rtx_code code
, rtx op0
, rtx op1
)
17468 enum machine_mode mode
= GET_MODE (op0
);
17470 if (SCALAR_FLOAT_MODE_P (mode
))
17472 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
17473 return ix86_fp_compare_mode (code
);
17478 /* Only zero flag is needed. */
17479 case EQ
: /* ZF=0 */
17480 case NE
: /* ZF!=0 */
17482 /* Codes needing carry flag. */
17483 case GEU
: /* CF=0 */
17484 case LTU
: /* CF=1 */
17485 /* Detect overflow checks. They need just the carry flag. */
17486 if (GET_CODE (op0
) == PLUS
17487 && rtx_equal_p (op1
, XEXP (op0
, 0)))
17491 case GTU
: /* CF=0 & ZF=0 */
17492 case LEU
: /* CF=1 | ZF=1 */
17493 /* Detect overflow checks. They need just the carry flag. */
17494 if (GET_CODE (op0
) == MINUS
17495 && rtx_equal_p (op1
, XEXP (op0
, 0)))
17499 /* Codes possibly doable only with sign flag when
17500 comparing against zero. */
17501 case GE
: /* SF=OF or SF=0 */
17502 case LT
: /* SF<>OF or SF=1 */
17503 if (op1
== const0_rtx
)
17506 /* For other cases Carry flag is not required. */
17508 /* Codes doable only with sign flag when comparing
17509 against zero, but we miss jump instruction for it
17510 so we need to use relational tests against overflow
17511 that thus needs to be zero. */
17512 case GT
: /* ZF=0 & SF=OF */
17513 case LE
: /* ZF=1 | SF<>OF */
17514 if (op1
== const0_rtx
)
17518 /* strcmp pattern do (use flags) and combine may ask us for proper
17523 gcc_unreachable ();
17527 /* Return the fixed registers used for condition codes. */
17530 ix86_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
17537 /* If two condition code modes are compatible, return a condition code
17538 mode which is compatible with both. Otherwise, return
17541 static enum machine_mode
17542 ix86_cc_modes_compatible (enum machine_mode m1
, enum machine_mode m2
)
17547 if (GET_MODE_CLASS (m1
) != MODE_CC
|| GET_MODE_CLASS (m2
) != MODE_CC
)
17550 if ((m1
== CCGCmode
&& m2
== CCGOCmode
)
17551 || (m1
== CCGOCmode
&& m2
== CCGCmode
))
17557 gcc_unreachable ();
17587 /* These are only compatible with themselves, which we already
17594 /* Return a comparison we can do and that it is equivalent to
17595 swap_condition (code) apart possibly from orderedness.
17596 But, never change orderedness if TARGET_IEEE_FP, returning
17597 UNKNOWN in that case if necessary. */
17599 static enum rtx_code
17600 ix86_fp_swap_condition (enum rtx_code code
)
17604 case GT
: /* GTU - CF=0 & ZF=0 */
17605 return TARGET_IEEE_FP
? UNKNOWN
: UNLT
;
17606 case GE
: /* GEU - CF=0 */
17607 return TARGET_IEEE_FP
? UNKNOWN
: UNLE
;
17608 case UNLT
: /* LTU - CF=1 */
17609 return TARGET_IEEE_FP
? UNKNOWN
: GT
;
17610 case UNLE
: /* LEU - CF=1 | ZF=1 */
17611 return TARGET_IEEE_FP
? UNKNOWN
: GE
;
17613 return swap_condition (code
);
17617 /* Return cost of comparison CODE using the best strategy for performance.
17618 All following functions do use number of instructions as a cost metrics.
17619 In future this should be tweaked to compute bytes for optimize_size and
17620 take into account performance of various instructions on various CPUs. */
17623 ix86_fp_comparison_cost (enum rtx_code code
)
17627 /* The cost of code using bit-twiddling on %ah. */
17644 arith_cost
= TARGET_IEEE_FP
? 5 : 4;
17648 arith_cost
= TARGET_IEEE_FP
? 6 : 4;
17651 gcc_unreachable ();
17654 switch (ix86_fp_comparison_strategy (code
))
17656 case IX86_FPCMP_COMI
:
17657 return arith_cost
> 4 ? 3 : 2;
17658 case IX86_FPCMP_SAHF
:
17659 return arith_cost
> 4 ? 4 : 3;
17665 /* Return strategy to use for floating-point. We assume that fcomi is always
17666 preferrable where available, since that is also true when looking at size
17667 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
17669 enum ix86_fpcmp_strategy
17670 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED
)
17672 /* Do fcomi/sahf based test when profitable. */
17675 return IX86_FPCMP_COMI
;
17677 if (TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_function_for_size_p (cfun
)))
17678 return IX86_FPCMP_SAHF
;
17680 return IX86_FPCMP_ARITH
;
17683 /* Swap, force into registers, or otherwise massage the two operands
17684 to a fp comparison. The operands are updated in place; the new
17685 comparison code is returned. */
17687 static enum rtx_code
17688 ix86_prepare_fp_compare_args (enum rtx_code code
, rtx
*pop0
, rtx
*pop1
)
17690 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
17691 rtx op0
= *pop0
, op1
= *pop1
;
17692 enum machine_mode op_mode
= GET_MODE (op0
);
17693 int is_sse
= TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (op_mode
);
17695 /* All of the unordered compare instructions only work on registers.
17696 The same is true of the fcomi compare instructions. The XFmode
17697 compare instructions require registers except when comparing
17698 against zero or when converting operand 1 from fixed point to
17702 && (fpcmp_mode
== CCFPUmode
17703 || (op_mode
== XFmode
17704 && ! (standard_80387_constant_p (op0
) == 1
17705 || standard_80387_constant_p (op1
) == 1)
17706 && GET_CODE (op1
) != FLOAT
)
17707 || ix86_fp_comparison_strategy (code
) == IX86_FPCMP_COMI
))
17709 op0
= force_reg (op_mode
, op0
);
17710 op1
= force_reg (op_mode
, op1
);
17714 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
17715 things around if they appear profitable, otherwise force op0
17716 into a register. */
17718 if (standard_80387_constant_p (op0
) == 0
17720 && ! (standard_80387_constant_p (op1
) == 0
17723 enum rtx_code new_code
= ix86_fp_swap_condition (code
);
17724 if (new_code
!= UNKNOWN
)
17727 tmp
= op0
, op0
= op1
, op1
= tmp
;
17733 op0
= force_reg (op_mode
, op0
);
17735 if (CONSTANT_P (op1
))
17737 int tmp
= standard_80387_constant_p (op1
);
17739 op1
= validize_mem (force_const_mem (op_mode
, op1
));
17743 op1
= force_reg (op_mode
, op1
);
17746 op1
= force_reg (op_mode
, op1
);
17750 /* Try to rearrange the comparison to make it cheaper. */
17751 if (ix86_fp_comparison_cost (code
)
17752 > ix86_fp_comparison_cost (swap_condition (code
))
17753 && (REG_P (op1
) || can_create_pseudo_p ()))
17756 tmp
= op0
, op0
= op1
, op1
= tmp
;
17757 code
= swap_condition (code
);
17759 op0
= force_reg (op_mode
, op0
);
17767 /* Convert comparison codes we use to represent FP comparison to integer
17768 code that will result in proper branch. Return UNKNOWN if no such code
17772 ix86_fp_compare_code_to_integer (enum rtx_code code
)
17801 /* Generate insn patterns to do a floating point compare of OPERANDS. */
17804 ix86_expand_fp_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx scratch
)
17806 enum machine_mode fpcmp_mode
, intcmp_mode
;
17809 fpcmp_mode
= ix86_fp_compare_mode (code
);
17810 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
17812 /* Do fcomi/sahf based test when profitable. */
17813 switch (ix86_fp_comparison_strategy (code
))
17815 case IX86_FPCMP_COMI
:
17816 intcmp_mode
= fpcmp_mode
;
17817 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
17818 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
17823 case IX86_FPCMP_SAHF
:
17824 intcmp_mode
= fpcmp_mode
;
17825 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
17826 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
17830 scratch
= gen_reg_rtx (HImode
);
17831 tmp2
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
17832 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, tmp2
)));
17835 case IX86_FPCMP_ARITH
:
17836 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
17837 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
17838 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
17840 scratch
= gen_reg_rtx (HImode
);
17841 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
17843 /* In the unordered case, we have to check C2 for NaN's, which
17844 doesn't happen to work out to anything nice combination-wise.
17845 So do some bit twiddling on the value we've got in AH to come
17846 up with an appropriate set of condition codes. */
17848 intcmp_mode
= CCNOmode
;
17853 if (code
== GT
|| !TARGET_IEEE_FP
)
17855 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
17860 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
17861 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
17862 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
17863 intcmp_mode
= CCmode
;
17869 if (code
== LT
&& TARGET_IEEE_FP
)
17871 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
17872 emit_insn (gen_cmpqi_ext_3 (scratch
, const1_rtx
));
17873 intcmp_mode
= CCmode
;
17878 emit_insn (gen_testqi_ext_ccno_0 (scratch
, const1_rtx
));
17884 if (code
== GE
|| !TARGET_IEEE_FP
)
17886 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x05)));
17891 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
17892 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
, const1_rtx
));
17898 if (code
== LE
&& TARGET_IEEE_FP
)
17900 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
17901 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
17902 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
17903 intcmp_mode
= CCmode
;
17908 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
17914 if (code
== EQ
&& TARGET_IEEE_FP
)
17916 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
17917 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
17918 intcmp_mode
= CCmode
;
17923 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
17929 if (code
== NE
&& TARGET_IEEE_FP
)
17931 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
17932 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
17938 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
17944 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
17948 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
17953 gcc_unreachable ();
17961 /* Return the test that should be put into the flags user, i.e.
17962 the bcc, scc, or cmov instruction. */
17963 return gen_rtx_fmt_ee (code
, VOIDmode
,
17964 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
17969 ix86_expand_compare (enum rtx_code code
, rtx op0
, rtx op1
)
17973 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_CC
)
17974 ret
= gen_rtx_fmt_ee (code
, VOIDmode
, op0
, op1
);
17976 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0
)))
17978 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0
)));
17979 ret
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
);
17982 ret
= ix86_expand_int_compare (code
, op0
, op1
);
17988 ix86_expand_branch (enum rtx_code code
, rtx op0
, rtx op1
, rtx label
)
17990 enum machine_mode mode
= GET_MODE (op0
);
18002 tmp
= ix86_expand_compare (code
, op0
, op1
);
18003 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
18004 gen_rtx_LABEL_REF (VOIDmode
, label
),
18006 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
18013 /* Expand DImode branch into multiple compare+branch. */
18015 rtx lo
[2], hi
[2], label2
;
18016 enum rtx_code code1
, code2
, code3
;
18017 enum machine_mode submode
;
18019 if (CONSTANT_P (op0
) && !CONSTANT_P (op1
))
18021 tmp
= op0
, op0
= op1
, op1
= tmp
;
18022 code
= swap_condition (code
);
18025 split_double_mode (mode
, &op0
, 1, lo
+0, hi
+0);
18026 split_double_mode (mode
, &op1
, 1, lo
+1, hi
+1);
18028 submode
= mode
== DImode
? SImode
: DImode
;
18030 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
18031 avoid two branches. This costs one extra insn, so disable when
18032 optimizing for size. */
18034 if ((code
== EQ
|| code
== NE
)
18035 && (!optimize_insn_for_size_p ()
18036 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
18041 if (hi
[1] != const0_rtx
)
18042 xor1
= expand_binop (submode
, xor_optab
, xor1
, hi
[1],
18043 NULL_RTX
, 0, OPTAB_WIDEN
);
18046 if (lo
[1] != const0_rtx
)
18047 xor0
= expand_binop (submode
, xor_optab
, xor0
, lo
[1],
18048 NULL_RTX
, 0, OPTAB_WIDEN
);
18050 tmp
= expand_binop (submode
, ior_optab
, xor1
, xor0
,
18051 NULL_RTX
, 0, OPTAB_WIDEN
);
18053 ix86_expand_branch (code
, tmp
, const0_rtx
, label
);
18057 /* Otherwise, if we are doing less-than or greater-or-equal-than,
18058 op1 is a constant and the low word is zero, then we can just
18059 examine the high word. Similarly for low word -1 and
18060 less-or-equal-than or greater-than. */
18062 if (CONST_INT_P (hi
[1]))
18065 case LT
: case LTU
: case GE
: case GEU
:
18066 if (lo
[1] == const0_rtx
)
18068 ix86_expand_branch (code
, hi
[0], hi
[1], label
);
18072 case LE
: case LEU
: case GT
: case GTU
:
18073 if (lo
[1] == constm1_rtx
)
18075 ix86_expand_branch (code
, hi
[0], hi
[1], label
);
18083 /* Otherwise, we need two or three jumps. */
18085 label2
= gen_label_rtx ();
18088 code2
= swap_condition (code
);
18089 code3
= unsigned_condition (code
);
18093 case LT
: case GT
: case LTU
: case GTU
:
18096 case LE
: code1
= LT
; code2
= GT
; break;
18097 case GE
: code1
= GT
; code2
= LT
; break;
18098 case LEU
: code1
= LTU
; code2
= GTU
; break;
18099 case GEU
: code1
= GTU
; code2
= LTU
; break;
18101 case EQ
: code1
= UNKNOWN
; code2
= NE
; break;
18102 case NE
: code2
= UNKNOWN
; break;
18105 gcc_unreachable ();
18110 * if (hi(a) < hi(b)) goto true;
18111 * if (hi(a) > hi(b)) goto false;
18112 * if (lo(a) < lo(b)) goto true;
18116 if (code1
!= UNKNOWN
)
18117 ix86_expand_branch (code1
, hi
[0], hi
[1], label
);
18118 if (code2
!= UNKNOWN
)
18119 ix86_expand_branch (code2
, hi
[0], hi
[1], label2
);
18121 ix86_expand_branch (code3
, lo
[0], lo
[1], label
);
18123 if (code2
!= UNKNOWN
)
18124 emit_label (label2
);
18129 gcc_assert (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_CC
);
18134 /* Split branch based on floating point condition. */
18136 ix86_split_fp_branch (enum rtx_code code
, rtx op1
, rtx op2
,
18137 rtx target1
, rtx target2
, rtx tmp
, rtx pushed
)
18142 if (target2
!= pc_rtx
)
18145 code
= reverse_condition_maybe_unordered (code
);
18150 condition
= ix86_expand_fp_compare (code
, op1
, op2
,
18153 /* Remove pushed operand from stack. */
18155 ix86_free_from_memory (GET_MODE (pushed
));
18157 i
= emit_jump_insn (gen_rtx_SET
18159 gen_rtx_IF_THEN_ELSE (VOIDmode
,
18160 condition
, target1
, target2
)));
18161 if (split_branch_probability
>= 0)
18162 add_reg_note (i
, REG_BR_PROB
, GEN_INT (split_branch_probability
));
18166 ix86_expand_setcc (rtx dest
, enum rtx_code code
, rtx op0
, rtx op1
)
18170 gcc_assert (GET_MODE (dest
) == QImode
);
18172 ret
= ix86_expand_compare (code
, op0
, op1
);
18173 PUT_MODE (ret
, QImode
);
18174 emit_insn (gen_rtx_SET (VOIDmode
, dest
, ret
));
18177 /* Expand comparison setting or clearing carry flag. Return true when
18178 successful and set pop for the operation. */
18180 ix86_expand_carry_flag_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx
*pop
)
18182 enum machine_mode mode
=
18183 GET_MODE (op0
) != VOIDmode
? GET_MODE (op0
) : GET_MODE (op1
);
18185 /* Do not handle double-mode compares that go through special path. */
18186 if (mode
== (TARGET_64BIT
? TImode
: DImode
))
18189 if (SCALAR_FLOAT_MODE_P (mode
))
18191 rtx compare_op
, compare_seq
;
18193 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
18195 /* Shortcut: following common codes never translate
18196 into carry flag compares. */
18197 if (code
== EQ
|| code
== NE
|| code
== UNEQ
|| code
== LTGT
18198 || code
== ORDERED
|| code
== UNORDERED
)
18201 /* These comparisons require zero flag; swap operands so they won't. */
18202 if ((code
== GT
|| code
== UNLE
|| code
== LE
|| code
== UNGT
)
18203 && !TARGET_IEEE_FP
)
18208 code
= swap_condition (code
);
18211 /* Try to expand the comparison and verify that we end up with
18212 carry flag based comparison. This fails to be true only when
18213 we decide to expand comparison using arithmetic that is not
18214 too common scenario. */
18216 compare_op
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
);
18217 compare_seq
= get_insns ();
18220 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
18221 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
18222 code
= ix86_fp_compare_code_to_integer (GET_CODE (compare_op
));
18224 code
= GET_CODE (compare_op
);
18226 if (code
!= LTU
&& code
!= GEU
)
18229 emit_insn (compare_seq
);
18234 if (!INTEGRAL_MODE_P (mode
))
18243 /* Convert a==0 into (unsigned)a<1. */
18246 if (op1
!= const0_rtx
)
18249 code
= (code
== EQ
? LTU
: GEU
);
18252 /* Convert a>b into b<a or a>=b-1. */
18255 if (CONST_INT_P (op1
))
18257 op1
= gen_int_mode (INTVAL (op1
) + 1, GET_MODE (op0
));
18258 /* Bail out on overflow. We still can swap operands but that
18259 would force loading of the constant into register. */
18260 if (op1
== const0_rtx
18261 || !x86_64_immediate_operand (op1
, GET_MODE (op1
)))
18263 code
= (code
== GTU
? GEU
: LTU
);
18270 code
= (code
== GTU
? LTU
: GEU
);
18274 /* Convert a>=0 into (unsigned)a<0x80000000. */
18277 if (mode
== DImode
|| op1
!= const0_rtx
)
18279 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
18280 code
= (code
== LT
? GEU
: LTU
);
18284 if (mode
== DImode
|| op1
!= constm1_rtx
)
18286 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
18287 code
= (code
== LE
? GEU
: LTU
);
18293 /* Swapping operands may cause constant to appear as first operand. */
18294 if (!nonimmediate_operand (op0
, VOIDmode
))
18296 if (!can_create_pseudo_p ())
18298 op0
= force_reg (mode
, op0
);
18300 *pop
= ix86_expand_compare (code
, op0
, op1
);
18301 gcc_assert (GET_CODE (*pop
) == LTU
|| GET_CODE (*pop
) == GEU
);
18306 ix86_expand_int_movcc (rtx operands
[])
18308 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
18309 rtx compare_seq
, compare_op
;
18310 enum machine_mode mode
= GET_MODE (operands
[0]);
18311 bool sign_bit_compare_p
= false;
18312 rtx op0
= XEXP (operands
[1], 0);
18313 rtx op1
= XEXP (operands
[1], 1);
18316 compare_op
= ix86_expand_compare (code
, op0
, op1
);
18317 compare_seq
= get_insns ();
18320 compare_code
= GET_CODE (compare_op
);
18322 if ((op1
== const0_rtx
&& (code
== GE
|| code
== LT
))
18323 || (op1
== constm1_rtx
&& (code
== GT
|| code
== LE
)))
18324 sign_bit_compare_p
= true;
18326 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
18327 HImode insns, we'd be swallowed in word prefix ops. */
18329 if ((mode
!= HImode
|| TARGET_FAST_PREFIX
)
18330 && (mode
!= (TARGET_64BIT
? TImode
: DImode
))
18331 && CONST_INT_P (operands
[2])
18332 && CONST_INT_P (operands
[3]))
18334 rtx out
= operands
[0];
18335 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
18336 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
18337 HOST_WIDE_INT diff
;
18340 /* Sign bit compares are better done using shifts than we do by using
18342 if (sign_bit_compare_p
18343 || ix86_expand_carry_flag_compare (code
, op0
, op1
, &compare_op
))
18345 /* Detect overlap between destination and compare sources. */
18348 if (!sign_bit_compare_p
)
18351 bool fpcmp
= false;
18353 compare_code
= GET_CODE (compare_op
);
18355 flags
= XEXP (compare_op
, 0);
18357 if (GET_MODE (flags
) == CCFPmode
18358 || GET_MODE (flags
) == CCFPUmode
)
18362 = ix86_fp_compare_code_to_integer (compare_code
);
18365 /* To simplify rest of code, restrict to the GEU case. */
18366 if (compare_code
== LTU
)
18368 HOST_WIDE_INT tmp
= ct
;
18371 compare_code
= reverse_condition (compare_code
);
18372 code
= reverse_condition (code
);
18377 PUT_CODE (compare_op
,
18378 reverse_condition_maybe_unordered
18379 (GET_CODE (compare_op
)));
18381 PUT_CODE (compare_op
,
18382 reverse_condition (GET_CODE (compare_op
)));
18386 if (reg_overlap_mentioned_p (out
, op0
)
18387 || reg_overlap_mentioned_p (out
, op1
))
18388 tmp
= gen_reg_rtx (mode
);
18390 if (mode
== DImode
)
18391 emit_insn (gen_x86_movdicc_0_m1 (tmp
, flags
, compare_op
));
18393 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode
, tmp
),
18394 flags
, compare_op
));
18398 if (code
== GT
|| code
== GE
)
18399 code
= reverse_condition (code
);
18402 HOST_WIDE_INT tmp
= ct
;
18407 tmp
= emit_store_flag (tmp
, code
, op0
, op1
, VOIDmode
, 0, -1);
18420 tmp
= expand_simple_binop (mode
, PLUS
,
18422 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
18433 tmp
= expand_simple_binop (mode
, IOR
,
18435 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
18437 else if (diff
== -1 && ct
)
18447 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
18449 tmp
= expand_simple_binop (mode
, PLUS
,
18450 copy_rtx (tmp
), GEN_INT (cf
),
18451 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
18459 * andl cf - ct, dest
18469 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
18472 tmp
= expand_simple_binop (mode
, AND
,
18474 gen_int_mode (cf
- ct
, mode
),
18475 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
18477 tmp
= expand_simple_binop (mode
, PLUS
,
18478 copy_rtx (tmp
), GEN_INT (ct
),
18479 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
18482 if (!rtx_equal_p (tmp
, out
))
18483 emit_move_insn (copy_rtx (out
), copy_rtx (tmp
));
18490 enum machine_mode cmp_mode
= GET_MODE (op0
);
18493 tmp
= ct
, ct
= cf
, cf
= tmp
;
18496 if (SCALAR_FLOAT_MODE_P (cmp_mode
))
18498 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode
));
18500 /* We may be reversing unordered compare to normal compare, that
18501 is not valid in general (we may convert non-trapping condition
18502 to trapping one), however on i386 we currently emit all
18503 comparisons unordered. */
18504 compare_code
= reverse_condition_maybe_unordered (compare_code
);
18505 code
= reverse_condition_maybe_unordered (code
);
18509 compare_code
= reverse_condition (compare_code
);
18510 code
= reverse_condition (code
);
18514 compare_code
= UNKNOWN
;
18515 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
18516 && CONST_INT_P (op1
))
18518 if (op1
== const0_rtx
18519 && (code
== LT
|| code
== GE
))
18520 compare_code
= code
;
18521 else if (op1
== constm1_rtx
)
18525 else if (code
== GT
)
18530 /* Optimize dest = (op0 < 0) ? -1 : cf. */
18531 if (compare_code
!= UNKNOWN
18532 && GET_MODE (op0
) == GET_MODE (out
)
18533 && (cf
== -1 || ct
== -1))
18535 /* If lea code below could be used, only optimize
18536 if it results in a 2 insn sequence. */
18538 if (! (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
18539 || diff
== 3 || diff
== 5 || diff
== 9)
18540 || (compare_code
== LT
&& ct
== -1)
18541 || (compare_code
== GE
&& cf
== -1))
18544 * notl op1 (if necessary)
18552 code
= reverse_condition (code
);
18555 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, -1);
18557 out
= expand_simple_binop (mode
, IOR
,
18559 out
, 1, OPTAB_DIRECT
);
18560 if (out
!= operands
[0])
18561 emit_move_insn (operands
[0], out
);
18568 if ((diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
18569 || diff
== 3 || diff
== 5 || diff
== 9)
18570 && ((mode
!= QImode
&& mode
!= HImode
) || !TARGET_PARTIAL_REG_STALL
)
18572 || x86_64_immediate_operand (GEN_INT (cf
), VOIDmode
)))
18578 * lea cf(dest*(ct-cf)),dest
18582 * This also catches the degenerate setcc-only case.
18588 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, 1);
18591 /* On x86_64 the lea instruction operates on Pmode, so we need
18592 to get arithmetics done in proper mode to match. */
18594 tmp
= copy_rtx (out
);
18598 out1
= copy_rtx (out
);
18599 tmp
= gen_rtx_MULT (mode
, out1
, GEN_INT (diff
& ~1));
18603 tmp
= gen_rtx_PLUS (mode
, tmp
, out1
);
18609 tmp
= gen_rtx_PLUS (mode
, tmp
, GEN_INT (cf
));
18612 if (!rtx_equal_p (tmp
, out
))
18615 out
= force_operand (tmp
, copy_rtx (out
));
18617 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (out
), copy_rtx (tmp
)));
18619 if (!rtx_equal_p (out
, operands
[0]))
18620 emit_move_insn (operands
[0], copy_rtx (out
));
18626 * General case: Jumpful:
18627 * xorl dest,dest cmpl op1, op2
18628 * cmpl op1, op2 movl ct, dest
18629 * setcc dest jcc 1f
18630 * decl dest movl cf, dest
18631 * andl (cf-ct),dest 1:
18634 * Size 20. Size 14.
18636 * This is reasonably steep, but branch mispredict costs are
18637 * high on modern cpus, so consider failing only if optimizing
18641 if ((!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
18642 && BRANCH_COST (optimize_insn_for_speed_p (),
18647 enum machine_mode cmp_mode
= GET_MODE (op0
);
18652 if (SCALAR_FLOAT_MODE_P (cmp_mode
))
18654 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode
));
18656 /* We may be reversing unordered compare to normal compare,
18657 that is not valid in general (we may convert non-trapping
18658 condition to trapping one), however on i386 we currently
18659 emit all comparisons unordered. */
18660 code
= reverse_condition_maybe_unordered (code
);
18664 code
= reverse_condition (code
);
18665 if (compare_code
!= UNKNOWN
)
18666 compare_code
= reverse_condition (compare_code
);
18670 if (compare_code
!= UNKNOWN
)
18672 /* notl op1 (if needed)
18677 For x < 0 (resp. x <= -1) there will be no notl,
18678 so if possible swap the constants to get rid of the
18680 True/false will be -1/0 while code below (store flag
18681 followed by decrement) is 0/-1, so the constants need
18682 to be exchanged once more. */
18684 if (compare_code
== GE
|| !cf
)
18686 code
= reverse_condition (code
);
18691 HOST_WIDE_INT tmp
= cf
;
18696 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, -1);
18700 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, 1);
18702 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
),
18704 copy_rtx (out
), 1, OPTAB_DIRECT
);
18707 out
= expand_simple_binop (mode
, AND
, copy_rtx (out
),
18708 gen_int_mode (cf
- ct
, mode
),
18709 copy_rtx (out
), 1, OPTAB_DIRECT
);
18711 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), GEN_INT (ct
),
18712 copy_rtx (out
), 1, OPTAB_DIRECT
);
18713 if (!rtx_equal_p (out
, operands
[0]))
18714 emit_move_insn (operands
[0], copy_rtx (out
));
18720 if (!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
18722 /* Try a few things more with specific constants and a variable. */
18725 rtx var
, orig_out
, out
, tmp
;
18727 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
18730 /* If one of the two operands is an interesting constant, load a
18731 constant with the above and mask it in with a logical operation. */
18733 if (CONST_INT_P (operands
[2]))
18736 if (INTVAL (operands
[2]) == 0 && operands
[3] != constm1_rtx
)
18737 operands
[3] = constm1_rtx
, op
= and_optab
;
18738 else if (INTVAL (operands
[2]) == -1 && operands
[3] != const0_rtx
)
18739 operands
[3] = const0_rtx
, op
= ior_optab
;
18743 else if (CONST_INT_P (operands
[3]))
18746 if (INTVAL (operands
[3]) == 0 && operands
[2] != constm1_rtx
)
18747 operands
[2] = constm1_rtx
, op
= and_optab
;
18748 else if (INTVAL (operands
[3]) == -1 && operands
[3] != const0_rtx
)
18749 operands
[2] = const0_rtx
, op
= ior_optab
;
18756 orig_out
= operands
[0];
18757 tmp
= gen_reg_rtx (mode
);
18760 /* Recurse to get the constant loaded. */
18761 if (ix86_expand_int_movcc (operands
) == 0)
18764 /* Mask in the interesting variable. */
18765 out
= expand_binop (mode
, op
, var
, tmp
, orig_out
, 0,
18767 if (!rtx_equal_p (out
, orig_out
))
18768 emit_move_insn (copy_rtx (orig_out
), copy_rtx (out
));
18774 * For comparison with above,
18784 if (! nonimmediate_operand (operands
[2], mode
))
18785 operands
[2] = force_reg (mode
, operands
[2]);
18786 if (! nonimmediate_operand (operands
[3], mode
))
18787 operands
[3] = force_reg (mode
, operands
[3]);
18789 if (! register_operand (operands
[2], VOIDmode
)
18791 || ! register_operand (operands
[3], VOIDmode
)))
18792 operands
[2] = force_reg (mode
, operands
[2]);
18795 && ! register_operand (operands
[3], VOIDmode
))
18796 operands
[3] = force_reg (mode
, operands
[3]);
18798 emit_insn (compare_seq
);
18799 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
18800 gen_rtx_IF_THEN_ELSE (mode
,
18801 compare_op
, operands
[2],
18806 /* Swap, force into registers, or otherwise massage the two operands
18807 to an sse comparison with a mask result. Thus we differ a bit from
18808 ix86_prepare_fp_compare_args which expects to produce a flags result.
18810 The DEST operand exists to help determine whether to commute commutative
18811 operators. The POP0/POP1 operands are updated in place. The new
18812 comparison code is returned, or UNKNOWN if not implementable. */
18814 static enum rtx_code
18815 ix86_prepare_sse_fp_compare_args (rtx dest
, enum rtx_code code
,
18816 rtx
*pop0
, rtx
*pop1
)
18824 /* AVX supports all the needed comparisons. */
18827 /* We have no LTGT as an operator. We could implement it with
18828 NE & ORDERED, but this requires an extra temporary. It's
18829 not clear that it's worth it. */
18836 /* These are supported directly. */
18843 /* AVX has 3 operand comparisons, no need to swap anything. */
18846 /* For commutative operators, try to canonicalize the destination
18847 operand to be first in the comparison - this helps reload to
18848 avoid extra moves. */
18849 if (!dest
|| !rtx_equal_p (dest
, *pop1
))
18857 /* These are not supported directly before AVX, and furthermore
18858 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
18859 comparison operands to transform into something that is
18864 code
= swap_condition (code
);
18868 gcc_unreachable ();
18874 /* Detect conditional moves that exactly match min/max operational
18875 semantics. Note that this is IEEE safe, as long as we don't
18876 interchange the operands.
18878 Returns FALSE if this conditional move doesn't match a MIN/MAX,
18879 and TRUE if the operation is successful and instructions are emitted. */
18882 ix86_expand_sse_fp_minmax (rtx dest
, enum rtx_code code
, rtx cmp_op0
,
18883 rtx cmp_op1
, rtx if_true
, rtx if_false
)
18885 enum machine_mode mode
;
18891 else if (code
== UNGE
)
18894 if_true
= if_false
;
18900 if (rtx_equal_p (cmp_op0
, if_true
) && rtx_equal_p (cmp_op1
, if_false
))
18902 else if (rtx_equal_p (cmp_op1
, if_true
) && rtx_equal_p (cmp_op0
, if_false
))
18907 mode
= GET_MODE (dest
);
18909 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
18910 but MODE may be a vector mode and thus not appropriate. */
18911 if (!flag_finite_math_only
|| !flag_unsafe_math_optimizations
)
18913 int u
= is_min
? UNSPEC_IEEE_MIN
: UNSPEC_IEEE_MAX
;
18916 if_true
= force_reg (mode
, if_true
);
18917 v
= gen_rtvec (2, if_true
, if_false
);
18918 tmp
= gen_rtx_UNSPEC (mode
, v
, u
);
18922 code
= is_min
? SMIN
: SMAX
;
18923 tmp
= gen_rtx_fmt_ee (code
, mode
, if_true
, if_false
);
18926 emit_insn (gen_rtx_SET (VOIDmode
, dest
, tmp
));
18930 /* Expand an sse vector comparison. Return the register with the result. */
18933 ix86_expand_sse_cmp (rtx dest
, enum rtx_code code
, rtx cmp_op0
, rtx cmp_op1
,
18934 rtx op_true
, rtx op_false
)
18936 enum machine_mode mode
= GET_MODE (dest
);
18937 enum machine_mode cmp_mode
= GET_MODE (cmp_op0
);
18940 cmp_op0
= force_reg (cmp_mode
, cmp_op0
);
18941 if (!nonimmediate_operand (cmp_op1
, cmp_mode
))
18942 cmp_op1
= force_reg (cmp_mode
, cmp_op1
);
18945 || reg_overlap_mentioned_p (dest
, op_true
)
18946 || reg_overlap_mentioned_p (dest
, op_false
))
18947 dest
= gen_reg_rtx (mode
);
18949 x
= gen_rtx_fmt_ee (code
, cmp_mode
, cmp_op0
, cmp_op1
);
18950 if (cmp_mode
!= mode
)
18952 x
= force_reg (cmp_mode
, x
);
18953 convert_move (dest
, x
, false);
18956 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18961 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
18962 operations. This is used for both scalar and vector conditional moves. */
18965 ix86_expand_sse_movcc (rtx dest
, rtx cmp
, rtx op_true
, rtx op_false
)
18967 enum machine_mode mode
= GET_MODE (dest
);
18970 if (vector_all_ones_operand (op_true
, mode
)
18971 && rtx_equal_p (op_false
, CONST0_RTX (mode
)))
18973 emit_insn (gen_rtx_SET (VOIDmode
, dest
, cmp
));
18975 else if (op_false
== CONST0_RTX (mode
))
18977 op_true
= force_reg (mode
, op_true
);
18978 x
= gen_rtx_AND (mode
, cmp
, op_true
);
18979 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18981 else if (op_true
== CONST0_RTX (mode
))
18983 op_false
= force_reg (mode
, op_false
);
18984 x
= gen_rtx_NOT (mode
, cmp
);
18985 x
= gen_rtx_AND (mode
, x
, op_false
);
18986 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18988 else if (INTEGRAL_MODE_P (mode
) && op_true
== CONSTM1_RTX (mode
))
18990 op_false
= force_reg (mode
, op_false
);
18991 x
= gen_rtx_IOR (mode
, cmp
, op_false
);
18992 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18994 else if (TARGET_XOP
)
18996 op_true
= force_reg (mode
, op_true
);
18998 if (!nonimmediate_operand (op_false
, mode
))
18999 op_false
= force_reg (mode
, op_false
);
19001 emit_insn (gen_rtx_SET (mode
, dest
,
19002 gen_rtx_IF_THEN_ELSE (mode
, cmp
,
19008 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
) = NULL
;
19010 if (!nonimmediate_operand (op_true
, mode
))
19011 op_true
= force_reg (mode
, op_true
);
19013 op_false
= force_reg (mode
, op_false
);
19019 gen
= gen_sse4_1_blendvps
;
19023 gen
= gen_sse4_1_blendvpd
;
19031 gen
= gen_sse4_1_pblendvb
;
19032 dest
= gen_lowpart (V16QImode
, dest
);
19033 op_false
= gen_lowpart (V16QImode
, op_false
);
19034 op_true
= gen_lowpart (V16QImode
, op_true
);
19035 cmp
= gen_lowpart (V16QImode
, cmp
);
19040 gen
= gen_avx_blendvps256
;
19044 gen
= gen_avx_blendvpd256
;
19052 gen
= gen_avx2_pblendvb
;
19053 dest
= gen_lowpart (V32QImode
, dest
);
19054 op_false
= gen_lowpart (V32QImode
, op_false
);
19055 op_true
= gen_lowpart (V32QImode
, op_true
);
19056 cmp
= gen_lowpart (V32QImode
, cmp
);
19064 emit_insn (gen (dest
, op_false
, op_true
, cmp
));
19067 op_true
= force_reg (mode
, op_true
);
19069 t2
= gen_reg_rtx (mode
);
19071 t3
= gen_reg_rtx (mode
);
19075 x
= gen_rtx_AND (mode
, op_true
, cmp
);
19076 emit_insn (gen_rtx_SET (VOIDmode
, t2
, x
));
19078 x
= gen_rtx_NOT (mode
, cmp
);
19079 x
= gen_rtx_AND (mode
, x
, op_false
);
19080 emit_insn (gen_rtx_SET (VOIDmode
, t3
, x
));
19082 x
= gen_rtx_IOR (mode
, t3
, t2
);
19083 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19088 /* Expand a floating-point conditional move. Return true if successful. */
19091 ix86_expand_fp_movcc (rtx operands
[])
19093 enum machine_mode mode
= GET_MODE (operands
[0]);
19094 enum rtx_code code
= GET_CODE (operands
[1]);
19095 rtx tmp
, compare_op
;
19096 rtx op0
= XEXP (operands
[1], 0);
19097 rtx op1
= XEXP (operands
[1], 1);
19099 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
19101 enum machine_mode cmode
;
19103 /* Since we've no cmove for sse registers, don't force bad register
19104 allocation just to gain access to it. Deny movcc when the
19105 comparison mode doesn't match the move mode. */
19106 cmode
= GET_MODE (op0
);
19107 if (cmode
== VOIDmode
)
19108 cmode
= GET_MODE (op1
);
19112 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
, &op0
, &op1
);
19113 if (code
== UNKNOWN
)
19116 if (ix86_expand_sse_fp_minmax (operands
[0], code
, op0
, op1
,
19117 operands
[2], operands
[3]))
19120 tmp
= ix86_expand_sse_cmp (operands
[0], code
, op0
, op1
,
19121 operands
[2], operands
[3]);
19122 ix86_expand_sse_movcc (operands
[0], tmp
, operands
[2], operands
[3]);
19126 /* The floating point conditional move instructions don't directly
19127 support conditions resulting from a signed integer comparison. */
19129 compare_op
= ix86_expand_compare (code
, op0
, op1
);
19130 if (!fcmov_comparison_operator (compare_op
, VOIDmode
))
19132 tmp
= gen_reg_rtx (QImode
);
19133 ix86_expand_setcc (tmp
, code
, op0
, op1
);
19135 compare_op
= ix86_expand_compare (NE
, tmp
, const0_rtx
);
19138 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
19139 gen_rtx_IF_THEN_ELSE (mode
, compare_op
,
19140 operands
[2], operands
[3])));
19145 /* Expand a floating-point vector conditional move; a vcond operation
19146 rather than a movcc operation. */
19149 ix86_expand_fp_vcond (rtx operands
[])
19151 enum rtx_code code
= GET_CODE (operands
[3]);
19154 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
19155 &operands
[4], &operands
[5]);
19156 if (code
== UNKNOWN
)
19159 switch (GET_CODE (operands
[3]))
19162 temp
= ix86_expand_sse_cmp (operands
[0], ORDERED
, operands
[4],
19163 operands
[5], operands
[0], operands
[0]);
19164 cmp
= ix86_expand_sse_cmp (operands
[0], NE
, operands
[4],
19165 operands
[5], operands
[1], operands
[2]);
19169 temp
= ix86_expand_sse_cmp (operands
[0], UNORDERED
, operands
[4],
19170 operands
[5], operands
[0], operands
[0]);
19171 cmp
= ix86_expand_sse_cmp (operands
[0], EQ
, operands
[4],
19172 operands
[5], operands
[1], operands
[2]);
19176 gcc_unreachable ();
19178 cmp
= expand_simple_binop (GET_MODE (cmp
), code
, temp
, cmp
, cmp
, 1,
19180 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
19184 if (ix86_expand_sse_fp_minmax (operands
[0], code
, operands
[4],
19185 operands
[5], operands
[1], operands
[2]))
19188 cmp
= ix86_expand_sse_cmp (operands
[0], code
, operands
[4], operands
[5],
19189 operands
[1], operands
[2]);
19190 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
19194 /* Expand a signed/unsigned integral vector conditional move. */
19197 ix86_expand_int_vcond (rtx operands
[])
19199 enum machine_mode data_mode
= GET_MODE (operands
[0]);
19200 enum machine_mode mode
= GET_MODE (operands
[4]);
19201 enum rtx_code code
= GET_CODE (operands
[3]);
19202 bool negate
= false;
19205 cop0
= operands
[4];
19206 cop1
= operands
[5];
19208 /* XOP supports all of the comparisons on all vector int types. */
19211 /* Canonicalize the comparison to EQ, GT, GTU. */
19222 code
= reverse_condition (code
);
19228 code
= reverse_condition (code
);
19234 code
= swap_condition (code
);
19235 x
= cop0
, cop0
= cop1
, cop1
= x
;
19239 gcc_unreachable ();
19242 /* Only SSE4.1/SSE4.2 supports V2DImode. */
19243 if (mode
== V2DImode
)
19248 /* SSE4.1 supports EQ. */
19249 if (!TARGET_SSE4_1
)
19255 /* SSE4.2 supports GT/GTU. */
19256 if (!TARGET_SSE4_2
)
19261 gcc_unreachable ();
19265 /* Unsigned parallel compare is not supported by the hardware.
19266 Play some tricks to turn this into a signed comparison
19270 cop0
= force_reg (mode
, cop0
);
19280 rtx (*gen_sub3
) (rtx
, rtx
, rtx
);
19284 case V8SImode
: gen_sub3
= gen_subv8si3
; break;
19285 case V4DImode
: gen_sub3
= gen_subv4di3
; break;
19286 case V4SImode
: gen_sub3
= gen_subv4si3
; break;
19287 case V2DImode
: gen_sub3
= gen_subv2di3
; break;
19289 gcc_unreachable ();
19291 /* Subtract (-(INT MAX) - 1) from both operands to make
19293 mask
= ix86_build_signbit_mask (mode
, true, false);
19294 t1
= gen_reg_rtx (mode
);
19295 emit_insn (gen_sub3 (t1
, cop0
, mask
));
19297 t2
= gen_reg_rtx (mode
);
19298 emit_insn (gen_sub3 (t2
, cop1
, mask
));
19310 /* Perform a parallel unsigned saturating subtraction. */
19311 x
= gen_reg_rtx (mode
);
19312 emit_insn (gen_rtx_SET (VOIDmode
, x
,
19313 gen_rtx_US_MINUS (mode
, cop0
, cop1
)));
19316 cop1
= CONST0_RTX (mode
);
19322 gcc_unreachable ();
19327 /* Allow the comparison to be done in one mode, but the movcc to
19328 happen in another mode. */
19329 if (data_mode
== mode
)
19331 x
= ix86_expand_sse_cmp (operands
[0], code
, cop0
, cop1
,
19332 operands
[1+negate
], operands
[2-negate
]);
19336 gcc_assert (GET_MODE_SIZE (data_mode
) == GET_MODE_SIZE (mode
));
19337 x
= ix86_expand_sse_cmp (gen_lowpart (mode
, operands
[0]),
19339 operands
[1+negate
], operands
[2-negate
]);
19340 x
= gen_lowpart (data_mode
, x
);
19343 ix86_expand_sse_movcc (operands
[0], x
, operands
[1+negate
],
19344 operands
[2-negate
]);
19348 /* Expand a variable vector permutation. */
19351 ix86_expand_vec_perm (rtx operands
[])
19353 rtx target
= operands
[0];
19354 rtx op0
= operands
[1];
19355 rtx op1
= operands
[2];
19356 rtx mask
= operands
[3];
19357 rtx t1
, t2
, t3
, t4
, vt
, vt2
, vec
[32];
19358 enum machine_mode mode
= GET_MODE (op0
);
19359 enum machine_mode maskmode
= GET_MODE (mask
);
19361 bool one_operand_shuffle
= rtx_equal_p (op0
, op1
);
19363 /* Number of elements in the vector. */
19364 w
= GET_MODE_NUNITS (mode
);
19365 e
= GET_MODE_UNIT_SIZE (mode
);
19366 gcc_assert (w
<= 32);
19370 if (mode
== V4DImode
|| mode
== V4DFmode
|| mode
== V16HImode
)
19372 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
19373 an constant shuffle operand. With a tiny bit of effort we can
19374 use VPERMD instead. A re-interpretation stall for V4DFmode is
19375 unfortunate but there's no avoiding it.
19376 Similarly for V16HImode we don't have instructions for variable
19377 shuffling, while for V32QImode we can use after preparing suitable
19378 masks vpshufb; vpshufb; vpermq; vpor. */
19380 if (mode
== V16HImode
)
19382 maskmode
= mode
= V32QImode
;
19388 maskmode
= mode
= V8SImode
;
19392 t1
= gen_reg_rtx (maskmode
);
19394 /* Replicate the low bits of the V4DImode mask into V8SImode:
19396 t1 = { A A B B C C D D }. */
19397 for (i
= 0; i
< w
/ 2; ++i
)
19398 vec
[i
*2 + 1] = vec
[i
*2] = GEN_INT (i
* 2);
19399 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
19400 vt
= force_reg (maskmode
, vt
);
19401 mask
= gen_lowpart (maskmode
, mask
);
19402 if (maskmode
== V8SImode
)
19403 emit_insn (gen_avx2_permvarv8si (t1
, vt
, mask
));
19405 emit_insn (gen_avx2_pshufbv32qi3 (t1
, mask
, vt
));
19407 /* Multiply the shuffle indicies by two. */
19408 t1
= expand_simple_binop (maskmode
, PLUS
, t1
, t1
, t1
, 1,
19411 /* Add one to the odd shuffle indicies:
19412 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
19413 for (i
= 0; i
< w
/ 2; ++i
)
19415 vec
[i
* 2] = const0_rtx
;
19416 vec
[i
* 2 + 1] = const1_rtx
;
19418 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
19419 vt
= force_const_mem (maskmode
, vt
);
19420 t1
= expand_simple_binop (maskmode
, PLUS
, t1
, vt
, t1
, 1,
19423 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
19424 operands
[3] = mask
= t1
;
19425 target
= gen_lowpart (mode
, target
);
19426 op0
= gen_lowpart (mode
, op0
);
19427 op1
= gen_lowpart (mode
, op1
);
19433 /* The VPERMD and VPERMPS instructions already properly ignore
19434 the high bits of the shuffle elements. No need for us to
19435 perform an AND ourselves. */
19436 if (one_operand_shuffle
)
19437 emit_insn (gen_avx2_permvarv8si (target
, mask
, op0
));
19440 t1
= gen_reg_rtx (V8SImode
);
19441 t2
= gen_reg_rtx (V8SImode
);
19442 emit_insn (gen_avx2_permvarv8si (t1
, mask
, op0
));
19443 emit_insn (gen_avx2_permvarv8si (t2
, mask
, op1
));
19449 mask
= gen_lowpart (V8SFmode
, mask
);
19450 if (one_operand_shuffle
)
19451 emit_insn (gen_avx2_permvarv8sf (target
, mask
, op0
));
19454 t1
= gen_reg_rtx (V8SFmode
);
19455 t2
= gen_reg_rtx (V8SFmode
);
19456 emit_insn (gen_avx2_permvarv8sf (t1
, mask
, op0
));
19457 emit_insn (gen_avx2_permvarv8sf (t2
, mask
, op1
));
19463 /* By combining the two 128-bit input vectors into one 256-bit
19464 input vector, we can use VPERMD and VPERMPS for the full
19465 two-operand shuffle. */
19466 t1
= gen_reg_rtx (V8SImode
);
19467 t2
= gen_reg_rtx (V8SImode
);
19468 emit_insn (gen_avx_vec_concatv8si (t1
, op0
, op1
));
19469 emit_insn (gen_avx_vec_concatv8si (t2
, mask
, mask
));
19470 emit_insn (gen_avx2_permvarv8si (t1
, t2
, t1
));
19471 emit_insn (gen_avx_vextractf128v8si (target
, t1
, const0_rtx
));
19475 t1
= gen_reg_rtx (V8SFmode
);
19476 t2
= gen_reg_rtx (V8SFmode
);
19477 mask
= gen_lowpart (V4SFmode
, mask
);
19478 emit_insn (gen_avx_vec_concatv8sf (t1
, op0
, op1
));
19479 emit_insn (gen_avx_vec_concatv8sf (t2
, mask
, mask
));
19480 emit_insn (gen_avx2_permvarv8sf (t1
, t2
, t1
));
19481 emit_insn (gen_avx_vextractf128v8sf (target
, t1
, const0_rtx
));
19485 t1
= gen_reg_rtx (V32QImode
);
19486 t2
= gen_reg_rtx (V32QImode
);
19487 t3
= gen_reg_rtx (V32QImode
);
19488 vt2
= GEN_INT (128);
19489 for (i
= 0; i
< 32; i
++)
19491 vt
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, vec
));
19492 vt
= force_reg (V32QImode
, vt
);
19493 for (i
= 0; i
< 32; i
++)
19494 vec
[i
] = i
< 16 ? vt2
: const0_rtx
;
19495 vt2
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, vec
));
19496 vt2
= force_reg (V32QImode
, vt2
);
19497 /* From mask create two adjusted masks, which contain the same
19498 bits as mask in the low 7 bits of each vector element.
19499 The first mask will have the most significant bit clear
19500 if it requests element from the same 128-bit lane
19501 and MSB set if it requests element from the other 128-bit lane.
19502 The second mask will have the opposite values of the MSB,
19503 and additionally will have its 128-bit lanes swapped.
19504 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
19505 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
19506 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
19507 stands for other 12 bytes. */
19508 /* The bit whether element is from the same lane or the other
19509 lane is bit 4, so shift it up by 3 to the MSB position. */
19510 emit_insn (gen_ashlv4di3 (gen_lowpart (V4DImode
, t1
),
19511 gen_lowpart (V4DImode
, mask
),
19513 /* Clear MSB bits from the mask just in case it had them set. */
19514 emit_insn (gen_avx2_andnotv32qi3 (t2
, vt
, mask
));
19515 /* After this t1 will have MSB set for elements from other lane. */
19516 emit_insn (gen_xorv32qi3 (t1
, t1
, vt2
));
19517 /* Clear bits other than MSB. */
19518 emit_insn (gen_andv32qi3 (t1
, t1
, vt
));
19519 /* Or in the lower bits from mask into t3. */
19520 emit_insn (gen_iorv32qi3 (t3
, t1
, t2
));
19521 /* And invert MSB bits in t1, so MSB is set for elements from the same
19523 emit_insn (gen_xorv32qi3 (t1
, t1
, vt
));
19524 /* Swap 128-bit lanes in t3. */
19525 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t3
),
19526 gen_lowpart (V4DImode
, t3
),
19527 const2_rtx
, GEN_INT (3),
19528 const0_rtx
, const1_rtx
));
19529 /* And or in the lower bits from mask into t1. */
19530 emit_insn (gen_iorv32qi3 (t1
, t1
, t2
));
19531 if (one_operand_shuffle
)
19533 /* Each of these shuffles will put 0s in places where
19534 element from the other 128-bit lane is needed, otherwise
19535 will shuffle in the requested value. */
19536 emit_insn (gen_avx2_pshufbv32qi3 (t3
, op0
, t3
));
19537 emit_insn (gen_avx2_pshufbv32qi3 (t1
, op0
, t1
));
19538 /* For t3 the 128-bit lanes are swapped again. */
19539 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t3
),
19540 gen_lowpart (V4DImode
, t3
),
19541 const2_rtx
, GEN_INT (3),
19542 const0_rtx
, const1_rtx
));
19543 /* And oring both together leads to the result. */
19544 emit_insn (gen_iorv32qi3 (target
, t1
, t3
));
19548 t4
= gen_reg_rtx (V32QImode
);
19549 /* Similarly to the above one_operand_shuffle code,
19550 just for repeated twice for each operand. merge_two:
19551 code will merge the two results together. */
19552 emit_insn (gen_avx2_pshufbv32qi3 (t4
, op0
, t3
));
19553 emit_insn (gen_avx2_pshufbv32qi3 (t3
, op1
, t3
));
19554 emit_insn (gen_avx2_pshufbv32qi3 (t2
, op0
, t1
));
19555 emit_insn (gen_avx2_pshufbv32qi3 (t1
, op1
, t1
));
19556 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t4
),
19557 gen_lowpart (V4DImode
, t4
),
19558 const2_rtx
, GEN_INT (3),
19559 const0_rtx
, const1_rtx
));
19560 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t3
),
19561 gen_lowpart (V4DImode
, t3
),
19562 const2_rtx
, GEN_INT (3),
19563 const0_rtx
, const1_rtx
));
19564 emit_insn (gen_iorv32qi3 (t4
, t2
, t4
));
19565 emit_insn (gen_iorv32qi3 (t3
, t1
, t3
));
19571 gcc_assert (GET_MODE_SIZE (mode
) <= 16);
19578 /* The XOP VPPERM insn supports three inputs. By ignoring the
19579 one_operand_shuffle special case, we avoid creating another
19580 set of constant vectors in memory. */
19581 one_operand_shuffle
= false;
19583 /* mask = mask & {2*w-1, ...} */
19584 vt
= GEN_INT (2*w
- 1);
19588 /* mask = mask & {w-1, ...} */
19589 vt
= GEN_INT (w
- 1);
19592 for (i
= 0; i
< w
; i
++)
19594 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
19595 mask
= expand_simple_binop (maskmode
, AND
, mask
, vt
,
19596 NULL_RTX
, 0, OPTAB_DIRECT
);
19598 /* For non-QImode operations, convert the word permutation control
19599 into a byte permutation control. */
19600 if (mode
!= V16QImode
)
19602 mask
= expand_simple_binop (maskmode
, ASHIFT
, mask
,
19603 GEN_INT (exact_log2 (e
)),
19604 NULL_RTX
, 0, OPTAB_DIRECT
);
19606 /* Convert mask to vector of chars. */
19607 mask
= force_reg (V16QImode
, gen_lowpart (V16QImode
, mask
));
19609 /* Replicate each of the input bytes into byte positions:
19610 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
19611 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
19612 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
19613 for (i
= 0; i
< 16; ++i
)
19614 vec
[i
] = GEN_INT (i
/e
* e
);
19615 vt
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, vec
));
19616 vt
= force_const_mem (V16QImode
, vt
);
19618 emit_insn (gen_xop_pperm (mask
, mask
, mask
, vt
));
19620 emit_insn (gen_ssse3_pshufbv16qi3 (mask
, mask
, vt
));
19622 /* Convert it into the byte positions by doing
19623 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
19624 for (i
= 0; i
< 16; ++i
)
19625 vec
[i
] = GEN_INT (i
% e
);
19626 vt
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, vec
));
19627 vt
= force_const_mem (V16QImode
, vt
);
19628 emit_insn (gen_addv16qi3 (mask
, mask
, vt
));
19631 /* The actual shuffle operations all operate on V16QImode. */
19632 op0
= gen_lowpart (V16QImode
, op0
);
19633 op1
= gen_lowpart (V16QImode
, op1
);
19634 target
= gen_lowpart (V16QImode
, target
);
19638 emit_insn (gen_xop_pperm (target
, op0
, op1
, mask
));
19640 else if (one_operand_shuffle
)
19642 emit_insn (gen_ssse3_pshufbv16qi3 (target
, op0
, mask
));
19649 /* Shuffle the two input vectors independently. */
19650 t1
= gen_reg_rtx (V16QImode
);
19651 t2
= gen_reg_rtx (V16QImode
);
19652 emit_insn (gen_ssse3_pshufbv16qi3 (t1
, op0
, mask
));
19653 emit_insn (gen_ssse3_pshufbv16qi3 (t2
, op1
, mask
));
19656 /* Then merge them together. The key is whether any given control
19657 element contained a bit set that indicates the second word. */
19658 mask
= operands
[3];
19660 if (maskmode
== V2DImode
&& !TARGET_SSE4_1
)
19662 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
19663 more shuffle to convert the V2DI input mask into a V4SI
19664 input mask. At which point the masking that expand_int_vcond
19665 will work as desired. */
19666 rtx t3
= gen_reg_rtx (V4SImode
);
19667 emit_insn (gen_sse2_pshufd_1 (t3
, gen_lowpart (V4SImode
, mask
),
19668 const0_rtx
, const0_rtx
,
19669 const2_rtx
, const2_rtx
));
19671 maskmode
= V4SImode
;
19675 for (i
= 0; i
< w
; i
++)
19677 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
19678 vt
= force_reg (maskmode
, vt
);
19679 mask
= expand_simple_binop (maskmode
, AND
, mask
, vt
,
19680 NULL_RTX
, 0, OPTAB_DIRECT
);
19682 xops
[0] = gen_lowpart (mode
, operands
[0]);
19683 xops
[1] = gen_lowpart (mode
, t2
);
19684 xops
[2] = gen_lowpart (mode
, t1
);
19685 xops
[3] = gen_rtx_EQ (maskmode
, mask
, vt
);
19688 ok
= ix86_expand_int_vcond (xops
);
19693 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
19694 true if we should do zero extension, else sign extension. HIGH_P is
19695 true if we want the N/2 high elements, else the low elements. */
19698 ix86_expand_sse_unpack (rtx operands
[2], bool unsigned_p
, bool high_p
)
19700 enum machine_mode imode
= GET_MODE (operands
[1]);
19705 rtx (*unpack
)(rtx
, rtx
);
19706 rtx (*extract
)(rtx
, rtx
) = NULL
;
19707 enum machine_mode halfmode
= BLKmode
;
19713 unpack
= gen_avx2_zero_extendv16qiv16hi2
;
19715 unpack
= gen_avx2_sign_extendv16qiv16hi2
;
19716 halfmode
= V16QImode
;
19718 = high_p
? gen_vec_extract_hi_v32qi
: gen_vec_extract_lo_v32qi
;
19722 unpack
= gen_avx2_zero_extendv8hiv8si2
;
19724 unpack
= gen_avx2_sign_extendv8hiv8si2
;
19725 halfmode
= V8HImode
;
19727 = high_p
? gen_vec_extract_hi_v16hi
: gen_vec_extract_lo_v16hi
;
19731 unpack
= gen_avx2_zero_extendv4siv4di2
;
19733 unpack
= gen_avx2_sign_extendv4siv4di2
;
19734 halfmode
= V4SImode
;
19736 = high_p
? gen_vec_extract_hi_v8si
: gen_vec_extract_lo_v8si
;
19740 unpack
= gen_sse4_1_zero_extendv8qiv8hi2
;
19742 unpack
= gen_sse4_1_sign_extendv8qiv8hi2
;
19746 unpack
= gen_sse4_1_zero_extendv4hiv4si2
;
19748 unpack
= gen_sse4_1_sign_extendv4hiv4si2
;
19752 unpack
= gen_sse4_1_zero_extendv2siv2di2
;
19754 unpack
= gen_sse4_1_sign_extendv2siv2di2
;
19757 gcc_unreachable ();
19760 if (GET_MODE_SIZE (imode
) == 32)
19762 tmp
= gen_reg_rtx (halfmode
);
19763 emit_insn (extract (tmp
, operands
[1]));
19767 /* Shift higher 8 bytes to lower 8 bytes. */
19768 tmp
= gen_reg_rtx (imode
);
19769 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode
, tmp
),
19770 gen_lowpart (V1TImode
, operands
[1]),
19776 emit_insn (unpack (operands
[0], tmp
));
19780 rtx (*unpack
)(rtx
, rtx
, rtx
);
19786 unpack
= gen_vec_interleave_highv16qi
;
19788 unpack
= gen_vec_interleave_lowv16qi
;
19792 unpack
= gen_vec_interleave_highv8hi
;
19794 unpack
= gen_vec_interleave_lowv8hi
;
19798 unpack
= gen_vec_interleave_highv4si
;
19800 unpack
= gen_vec_interleave_lowv4si
;
19803 gcc_unreachable ();
19806 dest
= gen_lowpart (imode
, operands
[0]);
19809 tmp
= force_reg (imode
, CONST0_RTX (imode
));
19811 tmp
= ix86_expand_sse_cmp (gen_reg_rtx (imode
), GT
, CONST0_RTX (imode
),
19812 operands
[1], pc_rtx
, pc_rtx
);
19814 emit_insn (unpack (dest
, operands
[1], tmp
));
19818 /* Expand conditional increment or decrement using adb/sbb instructions.
19819 The default case using setcc followed by the conditional move can be
19820 done by generic code. */
19822 ix86_expand_int_addcc (rtx operands
[])
19824 enum rtx_code code
= GET_CODE (operands
[1]);
19826 rtx (*insn
)(rtx
, rtx
, rtx
, rtx
, rtx
);
19828 rtx val
= const0_rtx
;
19829 bool fpcmp
= false;
19830 enum machine_mode mode
;
19831 rtx op0
= XEXP (operands
[1], 0);
19832 rtx op1
= XEXP (operands
[1], 1);
19834 if (operands
[3] != const1_rtx
19835 && operands
[3] != constm1_rtx
)
19837 if (!ix86_expand_carry_flag_compare (code
, op0
, op1
, &compare_op
))
19839 code
= GET_CODE (compare_op
);
19841 flags
= XEXP (compare_op
, 0);
19843 if (GET_MODE (flags
) == CCFPmode
19844 || GET_MODE (flags
) == CCFPUmode
)
19847 code
= ix86_fp_compare_code_to_integer (code
);
19854 PUT_CODE (compare_op
,
19855 reverse_condition_maybe_unordered
19856 (GET_CODE (compare_op
)));
19858 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
19861 mode
= GET_MODE (operands
[0]);
19863 /* Construct either adc or sbb insn. */
19864 if ((code
== LTU
) == (operands
[3] == constm1_rtx
))
19869 insn
= gen_subqi3_carry
;
19872 insn
= gen_subhi3_carry
;
19875 insn
= gen_subsi3_carry
;
19878 insn
= gen_subdi3_carry
;
19881 gcc_unreachable ();
19889 insn
= gen_addqi3_carry
;
19892 insn
= gen_addhi3_carry
;
19895 insn
= gen_addsi3_carry
;
19898 insn
= gen_adddi3_carry
;
19901 gcc_unreachable ();
19904 emit_insn (insn (operands
[0], operands
[2], val
, flags
, compare_op
));
19910 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
19911 but works for floating pointer parameters and nonoffsetable memories.
19912 For pushes, it returns just stack offsets; the values will be saved
19913 in the right order. Maximally three parts are generated. */
19916 ix86_split_to_parts (rtx operand
, rtx
*parts
, enum machine_mode mode
)
19921 size
= mode
==XFmode
? 3 : GET_MODE_SIZE (mode
) / 4;
19923 size
= (GET_MODE_SIZE (mode
) + 4) / 8;
19925 gcc_assert (!REG_P (operand
) || !MMX_REGNO_P (REGNO (operand
)));
19926 gcc_assert (size
>= 2 && size
<= 4);
19928 /* Optimize constant pool reference to immediates. This is used by fp
19929 moves, that force all constants to memory to allow combining. */
19930 if (MEM_P (operand
) && MEM_READONLY_P (operand
))
19932 rtx tmp
= maybe_get_pool_constant (operand
);
19937 if (MEM_P (operand
) && !offsettable_memref_p (operand
))
19939 /* The only non-offsetable memories we handle are pushes. */
19940 int ok
= push_operand (operand
, VOIDmode
);
19944 operand
= copy_rtx (operand
);
19945 PUT_MODE (operand
, Pmode
);
19946 parts
[0] = parts
[1] = parts
[2] = parts
[3] = operand
;
19950 if (GET_CODE (operand
) == CONST_VECTOR
)
19952 enum machine_mode imode
= int_mode_for_mode (mode
);
19953 /* Caution: if we looked through a constant pool memory above,
19954 the operand may actually have a different mode now. That's
19955 ok, since we want to pun this all the way back to an integer. */
19956 operand
= simplify_subreg (imode
, operand
, GET_MODE (operand
), 0);
19957 gcc_assert (operand
!= NULL
);
19963 if (mode
== DImode
)
19964 split_double_mode (mode
, &operand
, 1, &parts
[0], &parts
[1]);
19969 if (REG_P (operand
))
19971 gcc_assert (reload_completed
);
19972 for (i
= 0; i
< size
; i
++)
19973 parts
[i
] = gen_rtx_REG (SImode
, REGNO (operand
) + i
);
19975 else if (offsettable_memref_p (operand
))
19977 operand
= adjust_address (operand
, SImode
, 0);
19978 parts
[0] = operand
;
19979 for (i
= 1; i
< size
; i
++)
19980 parts
[i
] = adjust_address (operand
, SImode
, 4 * i
);
19982 else if (GET_CODE (operand
) == CONST_DOUBLE
)
19987 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
19991 real_to_target (l
, &r
, mode
);
19992 parts
[3] = gen_int_mode (l
[3], SImode
);
19993 parts
[2] = gen_int_mode (l
[2], SImode
);
19996 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r
, l
);
19997 parts
[2] = gen_int_mode (l
[2], SImode
);
20000 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
20003 gcc_unreachable ();
20005 parts
[1] = gen_int_mode (l
[1], SImode
);
20006 parts
[0] = gen_int_mode (l
[0], SImode
);
20009 gcc_unreachable ();
20014 if (mode
== TImode
)
20015 split_double_mode (mode
, &operand
, 1, &parts
[0], &parts
[1]);
20016 if (mode
== XFmode
|| mode
== TFmode
)
20018 enum machine_mode upper_mode
= mode
==XFmode
? SImode
: DImode
;
20019 if (REG_P (operand
))
20021 gcc_assert (reload_completed
);
20022 parts
[0] = gen_rtx_REG (DImode
, REGNO (operand
) + 0);
20023 parts
[1] = gen_rtx_REG (upper_mode
, REGNO (operand
) + 1);
20025 else if (offsettable_memref_p (operand
))
20027 operand
= adjust_address (operand
, DImode
, 0);
20028 parts
[0] = operand
;
20029 parts
[1] = adjust_address (operand
, upper_mode
, 8);
20031 else if (GET_CODE (operand
) == CONST_DOUBLE
)
20036 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
20037 real_to_target (l
, &r
, mode
);
20039 /* Do not use shift by 32 to avoid warning on 32bit systems. */
20040 if (HOST_BITS_PER_WIDE_INT
>= 64)
20043 ((l
[0] & (((HOST_WIDE_INT
) 2 << 31) - 1))
20044 + ((((HOST_WIDE_INT
) l
[1]) << 31) << 1),
20047 parts
[0] = immed_double_const (l
[0], l
[1], DImode
);
20049 if (upper_mode
== SImode
)
20050 parts
[1] = gen_int_mode (l
[2], SImode
);
20051 else if (HOST_BITS_PER_WIDE_INT
>= 64)
20054 ((l
[2] & (((HOST_WIDE_INT
) 2 << 31) - 1))
20055 + ((((HOST_WIDE_INT
) l
[3]) << 31) << 1),
20058 parts
[1] = immed_double_const (l
[2], l
[3], DImode
);
20061 gcc_unreachable ();
20068 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
20069 Return false when normal moves are needed; true when all required
20070 insns have been emitted. Operands 2-4 contain the input values
20071 int the correct order; operands 5-7 contain the output values. */
20074 ix86_split_long_move (rtx operands
[])
20079 int collisions
= 0;
20080 enum machine_mode mode
= GET_MODE (operands
[0]);
20081 bool collisionparts
[4];
20083 /* The DFmode expanders may ask us to move double.
20084 For 64bit target this is single move. By hiding the fact
20085 here we simplify i386.md splitters. */
20086 if (TARGET_64BIT
&& GET_MODE_SIZE (GET_MODE (operands
[0])) == 8)
20088 /* Optimize constant pool reference to immediates. This is used by
20089 fp moves, that force all constants to memory to allow combining. */
20091 if (MEM_P (operands
[1])
20092 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
20093 && CONSTANT_POOL_ADDRESS_P (XEXP (operands
[1], 0)))
20094 operands
[1] = get_pool_constant (XEXP (operands
[1], 0));
20095 if (push_operand (operands
[0], VOIDmode
))
20097 operands
[0] = copy_rtx (operands
[0]);
20098 PUT_MODE (operands
[0], Pmode
);
20101 operands
[0] = gen_lowpart (DImode
, operands
[0]);
20102 operands
[1] = gen_lowpart (DImode
, operands
[1]);
20103 emit_move_insn (operands
[0], operands
[1]);
20107 /* The only non-offsettable memory we handle is push. */
20108 if (push_operand (operands
[0], VOIDmode
))
20111 gcc_assert (!MEM_P (operands
[0])
20112 || offsettable_memref_p (operands
[0]));
20114 nparts
= ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands
[0]));
20115 ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands
[0]));
20117 /* When emitting push, take care for source operands on the stack. */
20118 if (push
&& MEM_P (operands
[1])
20119 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
20121 rtx src_base
= XEXP (part
[1][nparts
- 1], 0);
20123 /* Compensate for the stack decrement by 4. */
20124 if (!TARGET_64BIT
&& nparts
== 3
20125 && mode
== XFmode
&& TARGET_128BIT_LONG_DOUBLE
)
20126 src_base
= plus_constant (src_base
, 4);
20128 /* src_base refers to the stack pointer and is
20129 automatically decreased by emitted push. */
20130 for (i
= 0; i
< nparts
; i
++)
20131 part
[1][i
] = change_address (part
[1][i
],
20132 GET_MODE (part
[1][i
]), src_base
);
20135 /* We need to do copy in the right order in case an address register
20136 of the source overlaps the destination. */
20137 if (REG_P (part
[0][0]) && MEM_P (part
[1][0]))
20141 for (i
= 0; i
< nparts
; i
++)
20144 = reg_overlap_mentioned_p (part
[0][i
], XEXP (part
[1][0], 0));
20145 if (collisionparts
[i
])
20149 /* Collision in the middle part can be handled by reordering. */
20150 if (collisions
== 1 && nparts
== 3 && collisionparts
[1])
20152 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
20153 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
20155 else if (collisions
== 1
20157 && (collisionparts
[1] || collisionparts
[2]))
20159 if (collisionparts
[1])
20161 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
20162 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
20166 tmp
= part
[0][2]; part
[0][2] = part
[0][3]; part
[0][3] = tmp
;
20167 tmp
= part
[1][2]; part
[1][2] = part
[1][3]; part
[1][3] = tmp
;
20171 /* If there are more collisions, we can't handle it by reordering.
20172 Do an lea to the last part and use only one colliding move. */
20173 else if (collisions
> 1)
20179 base
= part
[0][nparts
- 1];
20181 /* Handle the case when the last part isn't valid for lea.
20182 Happens in 64-bit mode storing the 12-byte XFmode. */
20183 if (GET_MODE (base
) != Pmode
)
20184 base
= gen_rtx_REG (Pmode
, REGNO (base
));
20186 emit_insn (gen_rtx_SET (VOIDmode
, base
, XEXP (part
[1][0], 0)));
20187 part
[1][0] = replace_equiv_address (part
[1][0], base
);
20188 for (i
= 1; i
< nparts
; i
++)
20190 tmp
= plus_constant (base
, UNITS_PER_WORD
* i
);
20191 part
[1][i
] = replace_equiv_address (part
[1][i
], tmp
);
20202 if (TARGET_128BIT_LONG_DOUBLE
&& mode
== XFmode
)
20203 emit_insn (gen_addsi3 (stack_pointer_rtx
,
20204 stack_pointer_rtx
, GEN_INT (-4)));
20205 emit_move_insn (part
[0][2], part
[1][2]);
20207 else if (nparts
== 4)
20209 emit_move_insn (part
[0][3], part
[1][3]);
20210 emit_move_insn (part
[0][2], part
[1][2]);
20215 /* In 64bit mode we don't have 32bit push available. In case this is
20216 register, it is OK - we will just use larger counterpart. We also
20217 retype memory - these comes from attempt to avoid REX prefix on
20218 moving of second half of TFmode value. */
20219 if (GET_MODE (part
[1][1]) == SImode
)
20221 switch (GET_CODE (part
[1][1]))
20224 part
[1][1] = adjust_address (part
[1][1], DImode
, 0);
20228 part
[1][1] = gen_rtx_REG (DImode
, REGNO (part
[1][1]));
20232 gcc_unreachable ();
20235 if (GET_MODE (part
[1][0]) == SImode
)
20236 part
[1][0] = part
[1][1];
20239 emit_move_insn (part
[0][1], part
[1][1]);
20240 emit_move_insn (part
[0][0], part
[1][0]);
20244 /* Choose correct order to not overwrite the source before it is copied. */
20245 if ((REG_P (part
[0][0])
20246 && REG_P (part
[1][1])
20247 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
20249 && REGNO (part
[0][0]) == REGNO (part
[1][2]))
20251 && REGNO (part
[0][0]) == REGNO (part
[1][3]))))
20253 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
20255 for (i
= 0, j
= nparts
- 1; i
< nparts
; i
++, j
--)
20257 operands
[2 + i
] = part
[0][j
];
20258 operands
[6 + i
] = part
[1][j
];
20263 for (i
= 0; i
< nparts
; i
++)
20265 operands
[2 + i
] = part
[0][i
];
20266 operands
[6 + i
] = part
[1][i
];
20270 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
20271 if (optimize_insn_for_size_p ())
20273 for (j
= 0; j
< nparts
- 1; j
++)
20274 if (CONST_INT_P (operands
[6 + j
])
20275 && operands
[6 + j
] != const0_rtx
20276 && REG_P (operands
[2 + j
]))
20277 for (i
= j
; i
< nparts
- 1; i
++)
20278 if (CONST_INT_P (operands
[7 + i
])
20279 && INTVAL (operands
[7 + i
]) == INTVAL (operands
[6 + j
]))
20280 operands
[7 + i
] = operands
[2 + j
];
20283 for (i
= 0; i
< nparts
; i
++)
20284 emit_move_insn (operands
[2 + i
], operands
[6 + i
]);
20289 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
20290 left shift by a constant, either using a single shift or
20291 a sequence of add instructions. */
20294 ix86_expand_ashl_const (rtx operand
, int count
, enum machine_mode mode
)
20296 rtx (*insn
)(rtx
, rtx
, rtx
);
20299 || (count
* ix86_cost
->add
<= ix86_cost
->shift_const
20300 && !optimize_insn_for_size_p ()))
20302 insn
= mode
== DImode
? gen_addsi3
: gen_adddi3
;
20303 while (count
-- > 0)
20304 emit_insn (insn (operand
, operand
, operand
));
20308 insn
= mode
== DImode
? gen_ashlsi3
: gen_ashldi3
;
20309 emit_insn (insn (operand
, operand
, GEN_INT (count
)));
20314 ix86_split_ashl (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
20316 rtx (*gen_ashl3
)(rtx
, rtx
, rtx
);
20317 rtx (*gen_shld
)(rtx
, rtx
, rtx
);
20318 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
20320 rtx low
[2], high
[2];
20323 if (CONST_INT_P (operands
[2]))
20325 split_double_mode (mode
, operands
, 2, low
, high
);
20326 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
20328 if (count
>= half_width
)
20330 emit_move_insn (high
[0], low
[1]);
20331 emit_move_insn (low
[0], const0_rtx
);
20333 if (count
> half_width
)
20334 ix86_expand_ashl_const (high
[0], count
- half_width
, mode
);
20338 gen_shld
= mode
== DImode
? gen_x86_shld
: gen_x86_64_shld
;
20340 if (!rtx_equal_p (operands
[0], operands
[1]))
20341 emit_move_insn (operands
[0], operands
[1]);
20343 emit_insn (gen_shld (high
[0], low
[0], GEN_INT (count
)));
20344 ix86_expand_ashl_const (low
[0], count
, mode
);
20349 split_double_mode (mode
, operands
, 1, low
, high
);
20351 gen_ashl3
= mode
== DImode
? gen_ashlsi3
: gen_ashldi3
;
20353 if (operands
[1] == const1_rtx
)
20355 /* Assuming we've chosen a QImode capable registers, then 1 << N
20356 can be done with two 32/64-bit shifts, no branches, no cmoves. */
20357 if (ANY_QI_REG_P (low
[0]) && ANY_QI_REG_P (high
[0]))
20359 rtx s
, d
, flags
= gen_rtx_REG (CCZmode
, FLAGS_REG
);
20361 ix86_expand_clear (low
[0]);
20362 ix86_expand_clear (high
[0]);
20363 emit_insn (gen_testqi_ccz_1 (operands
[2], GEN_INT (half_width
)));
20365 d
= gen_lowpart (QImode
, low
[0]);
20366 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
20367 s
= gen_rtx_EQ (QImode
, flags
, const0_rtx
);
20368 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
20370 d
= gen_lowpart (QImode
, high
[0]);
20371 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
20372 s
= gen_rtx_NE (QImode
, flags
, const0_rtx
);
20373 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
20376 /* Otherwise, we can get the same results by manually performing
20377 a bit extract operation on bit 5/6, and then performing the two
20378 shifts. The two methods of getting 0/1 into low/high are exactly
20379 the same size. Avoiding the shift in the bit extract case helps
20380 pentium4 a bit; no one else seems to care much either way. */
20383 enum machine_mode half_mode
;
20384 rtx (*gen_lshr3
)(rtx
, rtx
, rtx
);
20385 rtx (*gen_and3
)(rtx
, rtx
, rtx
);
20386 rtx (*gen_xor3
)(rtx
, rtx
, rtx
);
20387 HOST_WIDE_INT bits
;
20390 if (mode
== DImode
)
20392 half_mode
= SImode
;
20393 gen_lshr3
= gen_lshrsi3
;
20394 gen_and3
= gen_andsi3
;
20395 gen_xor3
= gen_xorsi3
;
20400 half_mode
= DImode
;
20401 gen_lshr3
= gen_lshrdi3
;
20402 gen_and3
= gen_anddi3
;
20403 gen_xor3
= gen_xordi3
;
20407 if (TARGET_PARTIAL_REG_STALL
&& !optimize_insn_for_size_p ())
20408 x
= gen_rtx_ZERO_EXTEND (half_mode
, operands
[2]);
20410 x
= gen_lowpart (half_mode
, operands
[2]);
20411 emit_insn (gen_rtx_SET (VOIDmode
, high
[0], x
));
20413 emit_insn (gen_lshr3 (high
[0], high
[0], GEN_INT (bits
)));
20414 emit_insn (gen_and3 (high
[0], high
[0], const1_rtx
));
20415 emit_move_insn (low
[0], high
[0]);
20416 emit_insn (gen_xor3 (low
[0], low
[0], const1_rtx
));
20419 emit_insn (gen_ashl3 (low
[0], low
[0], operands
[2]));
20420 emit_insn (gen_ashl3 (high
[0], high
[0], operands
[2]));
20424 if (operands
[1] == constm1_rtx
)
20426 /* For -1 << N, we can avoid the shld instruction, because we
20427 know that we're shifting 0...31/63 ones into a -1. */
20428 emit_move_insn (low
[0], constm1_rtx
);
20429 if (optimize_insn_for_size_p ())
20430 emit_move_insn (high
[0], low
[0]);
20432 emit_move_insn (high
[0], constm1_rtx
);
20436 gen_shld
= mode
== DImode
? gen_x86_shld
: gen_x86_64_shld
;
20438 if (!rtx_equal_p (operands
[0], operands
[1]))
20439 emit_move_insn (operands
[0], operands
[1]);
20441 split_double_mode (mode
, operands
, 1, low
, high
);
20442 emit_insn (gen_shld (high
[0], low
[0], operands
[2]));
20445 emit_insn (gen_ashl3 (low
[0], low
[0], operands
[2]));
20447 if (TARGET_CMOVE
&& scratch
)
20449 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
20450 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
20452 ix86_expand_clear (scratch
);
20453 emit_insn (gen_x86_shift_adj_1 (high
[0], low
[0], operands
[2], scratch
));
20457 rtx (*gen_x86_shift_adj_2
)(rtx
, rtx
, rtx
)
20458 = mode
== DImode
? gen_x86_shiftsi_adj_2
: gen_x86_shiftdi_adj_2
;
20460 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
20465 ix86_split_ashr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
20467 rtx (*gen_ashr3
)(rtx
, rtx
, rtx
)
20468 = mode
== DImode
? gen_ashrsi3
: gen_ashrdi3
;
20469 rtx (*gen_shrd
)(rtx
, rtx
, rtx
);
20470 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
20472 rtx low
[2], high
[2];
20475 if (CONST_INT_P (operands
[2]))
20477 split_double_mode (mode
, operands
, 2, low
, high
);
20478 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
20480 if (count
== GET_MODE_BITSIZE (mode
) - 1)
20482 emit_move_insn (high
[0], high
[1]);
20483 emit_insn (gen_ashr3 (high
[0], high
[0],
20484 GEN_INT (half_width
- 1)));
20485 emit_move_insn (low
[0], high
[0]);
20488 else if (count
>= half_width
)
20490 emit_move_insn (low
[0], high
[1]);
20491 emit_move_insn (high
[0], low
[0]);
20492 emit_insn (gen_ashr3 (high
[0], high
[0],
20493 GEN_INT (half_width
- 1)));
20495 if (count
> half_width
)
20496 emit_insn (gen_ashr3 (low
[0], low
[0],
20497 GEN_INT (count
- half_width
)));
20501 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
20503 if (!rtx_equal_p (operands
[0], operands
[1]))
20504 emit_move_insn (operands
[0], operands
[1]);
20506 emit_insn (gen_shrd (low
[0], high
[0], GEN_INT (count
)));
20507 emit_insn (gen_ashr3 (high
[0], high
[0], GEN_INT (count
)));
20512 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
20514 if (!rtx_equal_p (operands
[0], operands
[1]))
20515 emit_move_insn (operands
[0], operands
[1]);
20517 split_double_mode (mode
, operands
, 1, low
, high
);
20519 emit_insn (gen_shrd (low
[0], high
[0], operands
[2]));
20520 emit_insn (gen_ashr3 (high
[0], high
[0], operands
[2]));
20522 if (TARGET_CMOVE
&& scratch
)
20524 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
20525 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
20527 emit_move_insn (scratch
, high
[0]);
20528 emit_insn (gen_ashr3 (scratch
, scratch
,
20529 GEN_INT (half_width
- 1)));
20530 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
20535 rtx (*gen_x86_shift_adj_3
)(rtx
, rtx
, rtx
)
20536 = mode
== DImode
? gen_x86_shiftsi_adj_3
: gen_x86_shiftdi_adj_3
;
20538 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
20544 ix86_split_lshr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
20546 rtx (*gen_lshr3
)(rtx
, rtx
, rtx
)
20547 = mode
== DImode
? gen_lshrsi3
: gen_lshrdi3
;
20548 rtx (*gen_shrd
)(rtx
, rtx
, rtx
);
20549 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
20551 rtx low
[2], high
[2];
20554 if (CONST_INT_P (operands
[2]))
20556 split_double_mode (mode
, operands
, 2, low
, high
);
20557 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
20559 if (count
>= half_width
)
20561 emit_move_insn (low
[0], high
[1]);
20562 ix86_expand_clear (high
[0]);
20564 if (count
> half_width
)
20565 emit_insn (gen_lshr3 (low
[0], low
[0],
20566 GEN_INT (count
- half_width
)));
20570 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
20572 if (!rtx_equal_p (operands
[0], operands
[1]))
20573 emit_move_insn (operands
[0], operands
[1]);
20575 emit_insn (gen_shrd (low
[0], high
[0], GEN_INT (count
)));
20576 emit_insn (gen_lshr3 (high
[0], high
[0], GEN_INT (count
)));
20581 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
20583 if (!rtx_equal_p (operands
[0], operands
[1]))
20584 emit_move_insn (operands
[0], operands
[1]);
20586 split_double_mode (mode
, operands
, 1, low
, high
);
20588 emit_insn (gen_shrd (low
[0], high
[0], operands
[2]));
20589 emit_insn (gen_lshr3 (high
[0], high
[0], operands
[2]));
20591 if (TARGET_CMOVE
&& scratch
)
20593 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
20594 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
20596 ix86_expand_clear (scratch
);
20597 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
20602 rtx (*gen_x86_shift_adj_2
)(rtx
, rtx
, rtx
)
20603 = mode
== DImode
? gen_x86_shiftsi_adj_2
: gen_x86_shiftdi_adj_2
;
20605 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
20610 /* Predict just emitted jump instruction to be taken with probability PROB. */
20612 predict_jump (int prob
)
20614 rtx insn
= get_last_insn ();
20615 gcc_assert (JUMP_P (insn
));
20616 add_reg_note (insn
, REG_BR_PROB
, GEN_INT (prob
));
20619 /* Helper function for the string operations below. Dest VARIABLE whether
20620 it is aligned to VALUE bytes. If true, jump to the label. */
20622 ix86_expand_aligntest (rtx variable
, int value
, bool epilogue
)
20624 rtx label
= gen_label_rtx ();
20625 rtx tmpcount
= gen_reg_rtx (GET_MODE (variable
));
20626 if (GET_MODE (variable
) == DImode
)
20627 emit_insn (gen_anddi3 (tmpcount
, variable
, GEN_INT (value
)));
20629 emit_insn (gen_andsi3 (tmpcount
, variable
, GEN_INT (value
)));
20630 emit_cmp_and_jump_insns (tmpcount
, const0_rtx
, EQ
, 0, GET_MODE (variable
),
20633 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
20635 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
20639 /* Adjust COUNTER by the VALUE. */
20641 ix86_adjust_counter (rtx countreg
, HOST_WIDE_INT value
)
20643 rtx (*gen_add
)(rtx
, rtx
, rtx
)
20644 = GET_MODE (countreg
) == DImode
? gen_adddi3
: gen_addsi3
;
20646 emit_insn (gen_add (countreg
, countreg
, GEN_INT (-value
)));
20649 /* Zero extend possibly SImode EXP to Pmode register. */
20651 ix86_zero_extend_to_Pmode (rtx exp
)
20654 if (GET_MODE (exp
) == VOIDmode
)
20655 return force_reg (Pmode
, exp
);
20656 if (GET_MODE (exp
) == Pmode
)
20657 return copy_to_mode_reg (Pmode
, exp
);
20658 r
= gen_reg_rtx (Pmode
);
20659 emit_insn (gen_zero_extendsidi2 (r
, exp
));
20663 /* Divide COUNTREG by SCALE. */
20665 scale_counter (rtx countreg
, int scale
)
20671 if (CONST_INT_P (countreg
))
20672 return GEN_INT (INTVAL (countreg
) / scale
);
20673 gcc_assert (REG_P (countreg
));
20675 sc
= expand_simple_binop (GET_MODE (countreg
), LSHIFTRT
, countreg
,
20676 GEN_INT (exact_log2 (scale
)),
20677 NULL
, 1, OPTAB_DIRECT
);
20681 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
20682 DImode for constant loop counts. */
20684 static enum machine_mode
20685 counter_mode (rtx count_exp
)
20687 if (GET_MODE (count_exp
) != VOIDmode
)
20688 return GET_MODE (count_exp
);
20689 if (!CONST_INT_P (count_exp
))
20691 if (TARGET_64BIT
&& (INTVAL (count_exp
) & ~0xffffffff))
20696 /* When SRCPTR is non-NULL, output simple loop to move memory
20697 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
20698 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
20699 equivalent loop to set memory by VALUE (supposed to be in MODE).
20701 The size is rounded down to whole number of chunk size moved at once.
20702 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
20706 expand_set_or_movmem_via_loop (rtx destmem
, rtx srcmem
,
20707 rtx destptr
, rtx srcptr
, rtx value
,
20708 rtx count
, enum machine_mode mode
, int unroll
,
20711 rtx out_label
, top_label
, iter
, tmp
;
20712 enum machine_mode iter_mode
= counter_mode (count
);
20713 rtx piece_size
= GEN_INT (GET_MODE_SIZE (mode
) * unroll
);
20714 rtx piece_size_mask
= GEN_INT (~((GET_MODE_SIZE (mode
) * unroll
) - 1));
20720 top_label
= gen_label_rtx ();
20721 out_label
= gen_label_rtx ();
20722 iter
= gen_reg_rtx (iter_mode
);
20724 size
= expand_simple_binop (iter_mode
, AND
, count
, piece_size_mask
,
20725 NULL
, 1, OPTAB_DIRECT
);
20726 /* Those two should combine. */
20727 if (piece_size
== const1_rtx
)
20729 emit_cmp_and_jump_insns (size
, const0_rtx
, EQ
, NULL_RTX
, iter_mode
,
20731 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
20733 emit_move_insn (iter
, const0_rtx
);
20735 emit_label (top_label
);
20737 tmp
= convert_modes (Pmode
, iter_mode
, iter
, true);
20738 x_addr
= gen_rtx_PLUS (Pmode
, destptr
, tmp
);
20739 destmem
= change_address (destmem
, mode
, x_addr
);
20743 y_addr
= gen_rtx_PLUS (Pmode
, srcptr
, copy_rtx (tmp
));
20744 srcmem
= change_address (srcmem
, mode
, y_addr
);
20746 /* When unrolling for chips that reorder memory reads and writes,
20747 we can save registers by using single temporary.
20748 Also using 4 temporaries is overkill in 32bit mode. */
20749 if (!TARGET_64BIT
&& 0)
20751 for (i
= 0; i
< unroll
; i
++)
20756 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
20758 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
20760 emit_move_insn (destmem
, srcmem
);
20766 gcc_assert (unroll
<= 4);
20767 for (i
= 0; i
< unroll
; i
++)
20769 tmpreg
[i
] = gen_reg_rtx (mode
);
20773 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
20775 emit_move_insn (tmpreg
[i
], srcmem
);
20777 for (i
= 0; i
< unroll
; i
++)
20782 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
20784 emit_move_insn (destmem
, tmpreg
[i
]);
20789 for (i
= 0; i
< unroll
; i
++)
20793 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
20794 emit_move_insn (destmem
, value
);
20797 tmp
= expand_simple_binop (iter_mode
, PLUS
, iter
, piece_size
, iter
,
20798 true, OPTAB_LIB_WIDEN
);
20800 emit_move_insn (iter
, tmp
);
20802 emit_cmp_and_jump_insns (iter
, size
, LT
, NULL_RTX
, iter_mode
,
20804 if (expected_size
!= -1)
20806 expected_size
/= GET_MODE_SIZE (mode
) * unroll
;
20807 if (expected_size
== 0)
20809 else if (expected_size
> REG_BR_PROB_BASE
)
20810 predict_jump (REG_BR_PROB_BASE
- 1);
20812 predict_jump (REG_BR_PROB_BASE
- (REG_BR_PROB_BASE
+ expected_size
/ 2) / expected_size
);
20815 predict_jump (REG_BR_PROB_BASE
* 80 / 100);
20816 iter
= ix86_zero_extend_to_Pmode (iter
);
20817 tmp
= expand_simple_binop (Pmode
, PLUS
, destptr
, iter
, destptr
,
20818 true, OPTAB_LIB_WIDEN
);
20819 if (tmp
!= destptr
)
20820 emit_move_insn (destptr
, tmp
);
20823 tmp
= expand_simple_binop (Pmode
, PLUS
, srcptr
, iter
, srcptr
,
20824 true, OPTAB_LIB_WIDEN
);
20826 emit_move_insn (srcptr
, tmp
);
20828 emit_label (out_label
);
20831 /* Output "rep; mov" instruction.
20832 Arguments have same meaning as for previous function */
20834 expand_movmem_via_rep_mov (rtx destmem
, rtx srcmem
,
20835 rtx destptr
, rtx srcptr
,
20837 enum machine_mode mode
)
20842 HOST_WIDE_INT rounded_count
;
20844 /* If the size is known, it is shorter to use rep movs. */
20845 if (mode
== QImode
&& CONST_INT_P (count
)
20846 && !(INTVAL (count
) & 3))
20849 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
20850 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
20851 if (srcptr
!= XEXP (srcmem
, 0) || GET_MODE (srcmem
) != BLKmode
)
20852 srcmem
= adjust_automodify_address_nv (srcmem
, BLKmode
, srcptr
, 0);
20853 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
20854 if (mode
!= QImode
)
20856 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
20857 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
20858 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
20859 srcexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
20860 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
20861 srcexp
= gen_rtx_PLUS (Pmode
, srcexp
, srcptr
);
20865 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
20866 srcexp
= gen_rtx_PLUS (Pmode
, srcptr
, countreg
);
20868 if (CONST_INT_P (count
))
20870 rounded_count
= (INTVAL (count
)
20871 & ~((HOST_WIDE_INT
) GET_MODE_SIZE (mode
) - 1));
20872 destmem
= shallow_copy_rtx (destmem
);
20873 srcmem
= shallow_copy_rtx (srcmem
);
20874 set_mem_size (destmem
, rounded_count
);
20875 set_mem_size (srcmem
, rounded_count
);
20879 if (MEM_SIZE_KNOWN_P (destmem
))
20880 clear_mem_size (destmem
);
20881 if (MEM_SIZE_KNOWN_P (srcmem
))
20882 clear_mem_size (srcmem
);
20884 emit_insn (gen_rep_mov (destptr
, destmem
, srcptr
, srcmem
, countreg
,
20888 /* Output "rep; stos" instruction.
20889 Arguments have same meaning as for previous function */
20891 expand_setmem_via_rep_stos (rtx destmem
, rtx destptr
, rtx value
,
20892 rtx count
, enum machine_mode mode
,
20897 HOST_WIDE_INT rounded_count
;
20899 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
20900 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
20901 value
= force_reg (mode
, gen_lowpart (mode
, value
));
20902 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
20903 if (mode
!= QImode
)
20905 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
20906 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
20907 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
20910 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
20911 if (orig_value
== const0_rtx
&& CONST_INT_P (count
))
20913 rounded_count
= (INTVAL (count
)
20914 & ~((HOST_WIDE_INT
) GET_MODE_SIZE (mode
) - 1));
20915 destmem
= shallow_copy_rtx (destmem
);
20916 set_mem_size (destmem
, rounded_count
);
20918 else if (MEM_SIZE_KNOWN_P (destmem
))
20919 clear_mem_size (destmem
);
20920 emit_insn (gen_rep_stos (destptr
, countreg
, destmem
, value
, destexp
));
20924 emit_strmov (rtx destmem
, rtx srcmem
,
20925 rtx destptr
, rtx srcptr
, enum machine_mode mode
, int offset
)
20927 rtx src
= adjust_automodify_address_nv (srcmem
, mode
, srcptr
, offset
);
20928 rtx dest
= adjust_automodify_address_nv (destmem
, mode
, destptr
, offset
);
20929 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
20932 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
20934 expand_movmem_epilogue (rtx destmem
, rtx srcmem
,
20935 rtx destptr
, rtx srcptr
, rtx count
, int max_size
)
20938 if (CONST_INT_P (count
))
20940 HOST_WIDE_INT countval
= INTVAL (count
);
20943 if ((countval
& 0x10) && max_size
> 16)
20947 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
20948 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
+ 8);
20951 gcc_unreachable ();
20954 if ((countval
& 0x08) && max_size
> 8)
20957 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
20960 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
);
20961 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
+ 4);
20965 if ((countval
& 0x04) && max_size
> 4)
20967 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
);
20970 if ((countval
& 0x02) && max_size
> 2)
20972 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, HImode
, offset
);
20975 if ((countval
& 0x01) && max_size
> 1)
20977 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, QImode
, offset
);
20984 count
= expand_simple_binop (GET_MODE (count
), AND
, count
, GEN_INT (max_size
- 1),
20985 count
, 1, OPTAB_DIRECT
);
20986 expand_set_or_movmem_via_loop (destmem
, srcmem
, destptr
, srcptr
, NULL
,
20987 count
, QImode
, 1, 4);
20991 /* When there are stringops, we can cheaply increase dest and src pointers.
20992 Otherwise we save code size by maintaining offset (zero is readily
20993 available from preceding rep operation) and using x86 addressing modes.
20995 if (TARGET_SINGLE_STRINGOP
)
20999 rtx label
= ix86_expand_aligntest (count
, 4, true);
21000 src
= change_address (srcmem
, SImode
, srcptr
);
21001 dest
= change_address (destmem
, SImode
, destptr
);
21002 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
21003 emit_label (label
);
21004 LABEL_NUSES (label
) = 1;
21008 rtx label
= ix86_expand_aligntest (count
, 2, true);
21009 src
= change_address (srcmem
, HImode
, srcptr
);
21010 dest
= change_address (destmem
, HImode
, destptr
);
21011 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
21012 emit_label (label
);
21013 LABEL_NUSES (label
) = 1;
21017 rtx label
= ix86_expand_aligntest (count
, 1, true);
21018 src
= change_address (srcmem
, QImode
, srcptr
);
21019 dest
= change_address (destmem
, QImode
, destptr
);
21020 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
21021 emit_label (label
);
21022 LABEL_NUSES (label
) = 1;
21027 rtx offset
= force_reg (Pmode
, const0_rtx
);
21032 rtx label
= ix86_expand_aligntest (count
, 4, true);
21033 src
= change_address (srcmem
, SImode
, srcptr
);
21034 dest
= change_address (destmem
, SImode
, destptr
);
21035 emit_move_insn (dest
, src
);
21036 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (4), NULL
,
21037 true, OPTAB_LIB_WIDEN
);
21039 emit_move_insn (offset
, tmp
);
21040 emit_label (label
);
21041 LABEL_NUSES (label
) = 1;
21045 rtx label
= ix86_expand_aligntest (count
, 2, true);
21046 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
21047 src
= change_address (srcmem
, HImode
, tmp
);
21048 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
21049 dest
= change_address (destmem
, HImode
, tmp
);
21050 emit_move_insn (dest
, src
);
21051 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (2), tmp
,
21052 true, OPTAB_LIB_WIDEN
);
21054 emit_move_insn (offset
, tmp
);
21055 emit_label (label
);
21056 LABEL_NUSES (label
) = 1;
21060 rtx label
= ix86_expand_aligntest (count
, 1, true);
21061 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
21062 src
= change_address (srcmem
, QImode
, tmp
);
21063 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
21064 dest
= change_address (destmem
, QImode
, tmp
);
21065 emit_move_insn (dest
, src
);
21066 emit_label (label
);
21067 LABEL_NUSES (label
) = 1;
21072 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
21074 expand_setmem_epilogue_via_loop (rtx destmem
, rtx destptr
, rtx value
,
21075 rtx count
, int max_size
)
21078 expand_simple_binop (counter_mode (count
), AND
, count
,
21079 GEN_INT (max_size
- 1), count
, 1, OPTAB_DIRECT
);
21080 expand_set_or_movmem_via_loop (destmem
, NULL
, destptr
, NULL
,
21081 gen_lowpart (QImode
, value
), count
, QImode
,
21085 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
21087 expand_setmem_epilogue (rtx destmem
, rtx destptr
, rtx value
, rtx count
, int max_size
)
21091 if (CONST_INT_P (count
))
21093 HOST_WIDE_INT countval
= INTVAL (count
);
21096 if ((countval
& 0x10) && max_size
> 16)
21100 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
21101 emit_insn (gen_strset (destptr
, dest
, value
));
21102 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
+ 8);
21103 emit_insn (gen_strset (destptr
, dest
, value
));
21106 gcc_unreachable ();
21109 if ((countval
& 0x08) && max_size
> 8)
21113 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
21114 emit_insn (gen_strset (destptr
, dest
, value
));
21118 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
21119 emit_insn (gen_strset (destptr
, dest
, value
));
21120 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
+ 4);
21121 emit_insn (gen_strset (destptr
, dest
, value
));
21125 if ((countval
& 0x04) && max_size
> 4)
21127 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
21128 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
21131 if ((countval
& 0x02) && max_size
> 2)
21133 dest
= adjust_automodify_address_nv (destmem
, HImode
, destptr
, offset
);
21134 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
21137 if ((countval
& 0x01) && max_size
> 1)
21139 dest
= adjust_automodify_address_nv (destmem
, QImode
, destptr
, offset
);
21140 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
21147 expand_setmem_epilogue_via_loop (destmem
, destptr
, value
, count
, max_size
);
21152 rtx label
= ix86_expand_aligntest (count
, 16, true);
21155 dest
= change_address (destmem
, DImode
, destptr
);
21156 emit_insn (gen_strset (destptr
, dest
, value
));
21157 emit_insn (gen_strset (destptr
, dest
, value
));
21161 dest
= change_address (destmem
, SImode
, destptr
);
21162 emit_insn (gen_strset (destptr
, dest
, value
));
21163 emit_insn (gen_strset (destptr
, dest
, value
));
21164 emit_insn (gen_strset (destptr
, dest
, value
));
21165 emit_insn (gen_strset (destptr
, dest
, value
));
21167 emit_label (label
);
21168 LABEL_NUSES (label
) = 1;
21172 rtx label
= ix86_expand_aligntest (count
, 8, true);
21175 dest
= change_address (destmem
, DImode
, destptr
);
21176 emit_insn (gen_strset (destptr
, dest
, value
));
21180 dest
= change_address (destmem
, SImode
, destptr
);
21181 emit_insn (gen_strset (destptr
, dest
, value
));
21182 emit_insn (gen_strset (destptr
, dest
, value
));
21184 emit_label (label
);
21185 LABEL_NUSES (label
) = 1;
21189 rtx label
= ix86_expand_aligntest (count
, 4, true);
21190 dest
= change_address (destmem
, SImode
, destptr
);
21191 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
21192 emit_label (label
);
21193 LABEL_NUSES (label
) = 1;
21197 rtx label
= ix86_expand_aligntest (count
, 2, true);
21198 dest
= change_address (destmem
, HImode
, destptr
);
21199 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
21200 emit_label (label
);
21201 LABEL_NUSES (label
) = 1;
21205 rtx label
= ix86_expand_aligntest (count
, 1, true);
21206 dest
= change_address (destmem
, QImode
, destptr
);
21207 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
21208 emit_label (label
);
21209 LABEL_NUSES (label
) = 1;
21213 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
21214 DESIRED_ALIGNMENT. */
21216 expand_movmem_prologue (rtx destmem
, rtx srcmem
,
21217 rtx destptr
, rtx srcptr
, rtx count
,
21218 int align
, int desired_alignment
)
21220 if (align
<= 1 && desired_alignment
> 1)
21222 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
21223 srcmem
= change_address (srcmem
, QImode
, srcptr
);
21224 destmem
= change_address (destmem
, QImode
, destptr
);
21225 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
21226 ix86_adjust_counter (count
, 1);
21227 emit_label (label
);
21228 LABEL_NUSES (label
) = 1;
21230 if (align
<= 2 && desired_alignment
> 2)
21232 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
21233 srcmem
= change_address (srcmem
, HImode
, srcptr
);
21234 destmem
= change_address (destmem
, HImode
, destptr
);
21235 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
21236 ix86_adjust_counter (count
, 2);
21237 emit_label (label
);
21238 LABEL_NUSES (label
) = 1;
21240 if (align
<= 4 && desired_alignment
> 4)
21242 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
21243 srcmem
= change_address (srcmem
, SImode
, srcptr
);
21244 destmem
= change_address (destmem
, SImode
, destptr
);
21245 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
21246 ix86_adjust_counter (count
, 4);
21247 emit_label (label
);
21248 LABEL_NUSES (label
) = 1;
21250 gcc_assert (desired_alignment
<= 8);
21253 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
21254 ALIGN_BYTES is how many bytes need to be copied. */
21256 expand_constant_movmem_prologue (rtx dst
, rtx
*srcp
, rtx destreg
, rtx srcreg
,
21257 int desired_align
, int align_bytes
)
21260 rtx orig_dst
= dst
;
21261 rtx orig_src
= src
;
21263 int src_align_bytes
= get_mem_align_offset (src
, desired_align
* BITS_PER_UNIT
);
21264 if (src_align_bytes
>= 0)
21265 src_align_bytes
= desired_align
- src_align_bytes
;
21266 if (align_bytes
& 1)
21268 dst
= adjust_automodify_address_nv (dst
, QImode
, destreg
, 0);
21269 src
= adjust_automodify_address_nv (src
, QImode
, srcreg
, 0);
21271 emit_insn (gen_strmov (destreg
, dst
, srcreg
, src
));
21273 if (align_bytes
& 2)
21275 dst
= adjust_automodify_address_nv (dst
, HImode
, destreg
, off
);
21276 src
= adjust_automodify_address_nv (src
, HImode
, srcreg
, off
);
21277 if (MEM_ALIGN (dst
) < 2 * BITS_PER_UNIT
)
21278 set_mem_align (dst
, 2 * BITS_PER_UNIT
);
21279 if (src_align_bytes
>= 0
21280 && (src_align_bytes
& 1) == (align_bytes
& 1)
21281 && MEM_ALIGN (src
) < 2 * BITS_PER_UNIT
)
21282 set_mem_align (src
, 2 * BITS_PER_UNIT
);
21284 emit_insn (gen_strmov (destreg
, dst
, srcreg
, src
));
21286 if (align_bytes
& 4)
21288 dst
= adjust_automodify_address_nv (dst
, SImode
, destreg
, off
);
21289 src
= adjust_automodify_address_nv (src
, SImode
, srcreg
, off
);
21290 if (MEM_ALIGN (dst
) < 4 * BITS_PER_UNIT
)
21291 set_mem_align (dst
, 4 * BITS_PER_UNIT
);
21292 if (src_align_bytes
>= 0)
21294 unsigned int src_align
= 0;
21295 if ((src_align_bytes
& 3) == (align_bytes
& 3))
21297 else if ((src_align_bytes
& 1) == (align_bytes
& 1))
21299 if (MEM_ALIGN (src
) < src_align
* BITS_PER_UNIT
)
21300 set_mem_align (src
, src_align
* BITS_PER_UNIT
);
21303 emit_insn (gen_strmov (destreg
, dst
, srcreg
, src
));
21305 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
, off
);
21306 src
= adjust_automodify_address_nv (src
, BLKmode
, srcreg
, off
);
21307 if (MEM_ALIGN (dst
) < (unsigned int) desired_align
* BITS_PER_UNIT
)
21308 set_mem_align (dst
, desired_align
* BITS_PER_UNIT
);
21309 if (src_align_bytes
>= 0)
21311 unsigned int src_align
= 0;
21312 if ((src_align_bytes
& 7) == (align_bytes
& 7))
21314 else if ((src_align_bytes
& 3) == (align_bytes
& 3))
21316 else if ((src_align_bytes
& 1) == (align_bytes
& 1))
21318 if (src_align
> (unsigned int) desired_align
)
21319 src_align
= desired_align
;
21320 if (MEM_ALIGN (src
) < src_align
* BITS_PER_UNIT
)
21321 set_mem_align (src
, src_align
* BITS_PER_UNIT
);
21323 if (MEM_SIZE_KNOWN_P (orig_dst
))
21324 set_mem_size (dst
, MEM_SIZE (orig_dst
) - align_bytes
);
21325 if (MEM_SIZE_KNOWN_P (orig_src
))
21326 set_mem_size (src
, MEM_SIZE (orig_src
) - align_bytes
);
21331 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
21332 DESIRED_ALIGNMENT. */
21334 expand_setmem_prologue (rtx destmem
, rtx destptr
, rtx value
, rtx count
,
21335 int align
, int desired_alignment
)
21337 if (align
<= 1 && desired_alignment
> 1)
21339 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
21340 destmem
= change_address (destmem
, QImode
, destptr
);
21341 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (QImode
, value
)));
21342 ix86_adjust_counter (count
, 1);
21343 emit_label (label
);
21344 LABEL_NUSES (label
) = 1;
21346 if (align
<= 2 && desired_alignment
> 2)
21348 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
21349 destmem
= change_address (destmem
, HImode
, destptr
);
21350 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (HImode
, value
)));
21351 ix86_adjust_counter (count
, 2);
21352 emit_label (label
);
21353 LABEL_NUSES (label
) = 1;
21355 if (align
<= 4 && desired_alignment
> 4)
21357 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
21358 destmem
= change_address (destmem
, SImode
, destptr
);
21359 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (SImode
, value
)));
21360 ix86_adjust_counter (count
, 4);
21361 emit_label (label
);
21362 LABEL_NUSES (label
) = 1;
21364 gcc_assert (desired_alignment
<= 8);
21367 /* Set enough from DST to align DST known to by aligned by ALIGN to
21368 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
21370 expand_constant_setmem_prologue (rtx dst
, rtx destreg
, rtx value
,
21371 int desired_align
, int align_bytes
)
21374 rtx orig_dst
= dst
;
21375 if (align_bytes
& 1)
21377 dst
= adjust_automodify_address_nv (dst
, QImode
, destreg
, 0);
21379 emit_insn (gen_strset (destreg
, dst
,
21380 gen_lowpart (QImode
, value
)));
21382 if (align_bytes
& 2)
21384 dst
= adjust_automodify_address_nv (dst
, HImode
, destreg
, off
);
21385 if (MEM_ALIGN (dst
) < 2 * BITS_PER_UNIT
)
21386 set_mem_align (dst
, 2 * BITS_PER_UNIT
);
21388 emit_insn (gen_strset (destreg
, dst
,
21389 gen_lowpart (HImode
, value
)));
21391 if (align_bytes
& 4)
21393 dst
= adjust_automodify_address_nv (dst
, SImode
, destreg
, off
);
21394 if (MEM_ALIGN (dst
) < 4 * BITS_PER_UNIT
)
21395 set_mem_align (dst
, 4 * BITS_PER_UNIT
);
21397 emit_insn (gen_strset (destreg
, dst
,
21398 gen_lowpart (SImode
, value
)));
21400 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
, off
);
21401 if (MEM_ALIGN (dst
) < (unsigned int) desired_align
* BITS_PER_UNIT
)
21402 set_mem_align (dst
, desired_align
* BITS_PER_UNIT
);
21403 if (MEM_SIZE_KNOWN_P (orig_dst
))
21404 set_mem_size (dst
, MEM_SIZE (orig_dst
) - align_bytes
);
21408 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
21409 static enum stringop_alg
21410 decide_alg (HOST_WIDE_INT count
, HOST_WIDE_INT expected_size
, bool memset
,
21411 int *dynamic_check
)
21413 const struct stringop_algs
* algs
;
21414 bool optimize_for_speed
;
21415 /* Algorithms using the rep prefix want at least edi and ecx;
21416 additionally, memset wants eax and memcpy wants esi. Don't
21417 consider such algorithms if the user has appropriated those
21418 registers for their own purposes. */
21419 bool rep_prefix_usable
= !(fixed_regs
[CX_REG
] || fixed_regs
[DI_REG
]
21421 ? fixed_regs
[AX_REG
] : fixed_regs
[SI_REG
]));
21423 #define ALG_USABLE_P(alg) (rep_prefix_usable \
21424 || (alg != rep_prefix_1_byte \
21425 && alg != rep_prefix_4_byte \
21426 && alg != rep_prefix_8_byte))
21427 const struct processor_costs
*cost
;
21429 /* Even if the string operation call is cold, we still might spend a lot
21430 of time processing large blocks. */
21431 if (optimize_function_for_size_p (cfun
)
21432 || (optimize_insn_for_size_p ()
21433 && expected_size
!= -1 && expected_size
< 256))
21434 optimize_for_speed
= false;
21436 optimize_for_speed
= true;
21438 cost
= optimize_for_speed
? ix86_cost
: &ix86_size_cost
;
21440 *dynamic_check
= -1;
21442 algs
= &cost
->memset
[TARGET_64BIT
!= 0];
21444 algs
= &cost
->memcpy
[TARGET_64BIT
!= 0];
21445 if (ix86_stringop_alg
!= no_stringop
&& ALG_USABLE_P (ix86_stringop_alg
))
21446 return ix86_stringop_alg
;
21447 /* rep; movq or rep; movl is the smallest variant. */
21448 else if (!optimize_for_speed
)
21450 if (!count
|| (count
& 3))
21451 return rep_prefix_usable
? rep_prefix_1_byte
: loop_1_byte
;
21453 return rep_prefix_usable
? rep_prefix_4_byte
: loop
;
21455 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
21457 else if (expected_size
!= -1 && expected_size
< 4)
21458 return loop_1_byte
;
21459 else if (expected_size
!= -1)
21462 enum stringop_alg alg
= libcall
;
21463 for (i
= 0; i
< MAX_STRINGOP_ALGS
; i
++)
21465 /* We get here if the algorithms that were not libcall-based
21466 were rep-prefix based and we are unable to use rep prefixes
21467 based on global register usage. Break out of the loop and
21468 use the heuristic below. */
21469 if (algs
->size
[i
].max
== 0)
21471 if (algs
->size
[i
].max
>= expected_size
|| algs
->size
[i
].max
== -1)
21473 enum stringop_alg candidate
= algs
->size
[i
].alg
;
21475 if (candidate
!= libcall
&& ALG_USABLE_P (candidate
))
21477 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
21478 last non-libcall inline algorithm. */
21479 if (TARGET_INLINE_ALL_STRINGOPS
)
21481 /* When the current size is best to be copied by a libcall,
21482 but we are still forced to inline, run the heuristic below
21483 that will pick code for medium sized blocks. */
21484 if (alg
!= libcall
)
21488 else if (ALG_USABLE_P (candidate
))
21492 gcc_assert (TARGET_INLINE_ALL_STRINGOPS
|| !rep_prefix_usable
);
21494 /* When asked to inline the call anyway, try to pick meaningful choice.
21495 We look for maximal size of block that is faster to copy by hand and
21496 take blocks of at most of that size guessing that average size will
21497 be roughly half of the block.
21499 If this turns out to be bad, we might simply specify the preferred
21500 choice in ix86_costs. */
21501 if ((TARGET_INLINE_ALL_STRINGOPS
|| TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
21502 && (algs
->unknown_size
== libcall
|| !ALG_USABLE_P (algs
->unknown_size
)))
21505 enum stringop_alg alg
;
21507 bool any_alg_usable_p
= true;
21509 for (i
= 0; i
< MAX_STRINGOP_ALGS
; i
++)
21511 enum stringop_alg candidate
= algs
->size
[i
].alg
;
21512 any_alg_usable_p
= any_alg_usable_p
&& ALG_USABLE_P (candidate
);
21514 if (candidate
!= libcall
&& candidate
21515 && ALG_USABLE_P (candidate
))
21516 max
= algs
->size
[i
].max
;
21518 /* If there aren't any usable algorithms, then recursing on
21519 smaller sizes isn't going to find anything. Just return the
21520 simple byte-at-a-time copy loop. */
21521 if (!any_alg_usable_p
)
21523 /* Pick something reasonable. */
21524 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
21525 *dynamic_check
= 128;
21526 return loop_1_byte
;
21530 alg
= decide_alg (count
, max
/ 2, memset
, dynamic_check
);
21531 gcc_assert (*dynamic_check
== -1);
21532 gcc_assert (alg
!= libcall
);
21533 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
21534 *dynamic_check
= max
;
21537 return ALG_USABLE_P (algs
->unknown_size
) ? algs
->unknown_size
: libcall
;
21538 #undef ALG_USABLE_P
21541 /* Decide on alignment. We know that the operand is already aligned to ALIGN
21542 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
21544 decide_alignment (int align
,
21545 enum stringop_alg alg
,
21548 int desired_align
= 0;
21552 gcc_unreachable ();
21554 case unrolled_loop
:
21555 desired_align
= GET_MODE_SIZE (Pmode
);
21557 case rep_prefix_8_byte
:
21560 case rep_prefix_4_byte
:
21561 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
21562 copying whole cacheline at once. */
21563 if (TARGET_PENTIUMPRO
)
21568 case rep_prefix_1_byte
:
21569 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
21570 copying whole cacheline at once. */
21571 if (TARGET_PENTIUMPRO
)
21585 if (desired_align
< align
)
21586 desired_align
= align
;
21587 if (expected_size
!= -1 && expected_size
< 4)
21588 desired_align
= align
;
21589 return desired_align
;
21592 /* Return the smallest power of 2 greater than VAL. */
21594 smallest_pow2_greater_than (int val
)
21602 /* Expand string move (memcpy) operation. Use i386 string operations
21603 when profitable. expand_setmem contains similar code. The code
21604 depends upon architecture, block size and alignment, but always has
21605 the same overall structure:
21607 1) Prologue guard: Conditional that jumps up to epilogues for small
21608 blocks that can be handled by epilogue alone. This is faster
21609 but also needed for correctness, since prologue assume the block
21610 is larger than the desired alignment.
21612 Optional dynamic check for size and libcall for large
21613 blocks is emitted here too, with -minline-stringops-dynamically.
21615 2) Prologue: copy first few bytes in order to get destination
21616 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
21617 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
21618 copied. We emit either a jump tree on power of two sized
21619 blocks, or a byte loop.
21621 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
21622 with specified algorithm.
21624 4) Epilogue: code copying tail of the block that is too small to be
21625 handled by main body (or up to size guarded by prologue guard). */
21628 ix86_expand_movmem (rtx dst
, rtx src
, rtx count_exp
, rtx align_exp
,
21629 rtx expected_align_exp
, rtx expected_size_exp
)
21635 rtx jump_around_label
= NULL
;
21636 HOST_WIDE_INT align
= 1;
21637 unsigned HOST_WIDE_INT count
= 0;
21638 HOST_WIDE_INT expected_size
= -1;
21639 int size_needed
= 0, epilogue_size_needed
;
21640 int desired_align
= 0, align_bytes
= 0;
21641 enum stringop_alg alg
;
21643 bool need_zero_guard
= false;
21645 if (CONST_INT_P (align_exp
))
21646 align
= INTVAL (align_exp
);
21647 /* i386 can do misaligned access on reasonably increased cost. */
21648 if (CONST_INT_P (expected_align_exp
)
21649 && INTVAL (expected_align_exp
) > align
)
21650 align
= INTVAL (expected_align_exp
);
21651 /* ALIGN is the minimum of destination and source alignment, but we care here
21652 just about destination alignment. */
21653 else if (MEM_ALIGN (dst
) > (unsigned HOST_WIDE_INT
) align
* BITS_PER_UNIT
)
21654 align
= MEM_ALIGN (dst
) / BITS_PER_UNIT
;
21656 if (CONST_INT_P (count_exp
))
21657 count
= expected_size
= INTVAL (count_exp
);
21658 if (CONST_INT_P (expected_size_exp
) && count
== 0)
21659 expected_size
= INTVAL (expected_size_exp
);
21661 /* Make sure we don't need to care about overflow later on. */
21662 if (count
> ((unsigned HOST_WIDE_INT
) 1 << 30))
21665 /* Step 0: Decide on preferred algorithm, desired alignment and
21666 size of chunks to be copied by main loop. */
21668 alg
= decide_alg (count
, expected_size
, false, &dynamic_check
);
21669 desired_align
= decide_alignment (align
, alg
, expected_size
);
21671 if (!TARGET_ALIGN_STRINGOPS
)
21672 align
= desired_align
;
21674 if (alg
== libcall
)
21676 gcc_assert (alg
!= no_stringop
);
21678 count_exp
= copy_to_mode_reg (GET_MODE (count_exp
), count_exp
);
21679 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
21680 srcreg
= copy_to_mode_reg (Pmode
, XEXP (src
, 0));
21685 gcc_unreachable ();
21687 need_zero_guard
= true;
21688 size_needed
= GET_MODE_SIZE (Pmode
);
21690 case unrolled_loop
:
21691 need_zero_guard
= true;
21692 size_needed
= GET_MODE_SIZE (Pmode
) * (TARGET_64BIT
? 4 : 2);
21694 case rep_prefix_8_byte
:
21697 case rep_prefix_4_byte
:
21700 case rep_prefix_1_byte
:
21704 need_zero_guard
= true;
21709 epilogue_size_needed
= size_needed
;
21711 /* Step 1: Prologue guard. */
21713 /* Alignment code needs count to be in register. */
21714 if (CONST_INT_P (count_exp
) && desired_align
> align
)
21716 if (INTVAL (count_exp
) > desired_align
21717 && INTVAL (count_exp
) > size_needed
)
21720 = get_mem_align_offset (dst
, desired_align
* BITS_PER_UNIT
);
21721 if (align_bytes
<= 0)
21724 align_bytes
= desired_align
- align_bytes
;
21726 if (align_bytes
== 0)
21727 count_exp
= force_reg (counter_mode (count_exp
), count_exp
);
21729 gcc_assert (desired_align
>= 1 && align
>= 1);
21731 /* Ensure that alignment prologue won't copy past end of block. */
21732 if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
21734 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
21735 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
21736 Make sure it is power of 2. */
21737 epilogue_size_needed
= smallest_pow2_greater_than (epilogue_size_needed
);
21741 if (count
< (unsigned HOST_WIDE_INT
)epilogue_size_needed
)
21743 /* If main algorithm works on QImode, no epilogue is needed.
21744 For small sizes just don't align anything. */
21745 if (size_needed
== 1)
21746 desired_align
= align
;
21753 label
= gen_label_rtx ();
21754 emit_cmp_and_jump_insns (count_exp
,
21755 GEN_INT (epilogue_size_needed
),
21756 LTU
, 0, counter_mode (count_exp
), 1, label
);
21757 if (expected_size
== -1 || expected_size
< epilogue_size_needed
)
21758 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
21760 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
21764 /* Emit code to decide on runtime whether library call or inline should be
21766 if (dynamic_check
!= -1)
21768 if (CONST_INT_P (count_exp
))
21770 if (UINTVAL (count_exp
) >= (unsigned HOST_WIDE_INT
)dynamic_check
)
21772 emit_block_move_via_libcall (dst
, src
, count_exp
, false);
21773 count_exp
= const0_rtx
;
21779 rtx hot_label
= gen_label_rtx ();
21780 jump_around_label
= gen_label_rtx ();
21781 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
21782 LEU
, 0, GET_MODE (count_exp
), 1, hot_label
);
21783 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
21784 emit_block_move_via_libcall (dst
, src
, count_exp
, false);
21785 emit_jump (jump_around_label
);
21786 emit_label (hot_label
);
21790 /* Step 2: Alignment prologue. */
21792 if (desired_align
> align
)
21794 if (align_bytes
== 0)
21796 /* Except for the first move in epilogue, we no longer know
21797 constant offset in aliasing info. It don't seems to worth
21798 the pain to maintain it for the first move, so throw away
21800 src
= change_address (src
, BLKmode
, srcreg
);
21801 dst
= change_address (dst
, BLKmode
, destreg
);
21802 expand_movmem_prologue (dst
, src
, destreg
, srcreg
, count_exp
, align
,
21807 /* If we know how many bytes need to be stored before dst is
21808 sufficiently aligned, maintain aliasing info accurately. */
21809 dst
= expand_constant_movmem_prologue (dst
, &src
, destreg
, srcreg
,
21810 desired_align
, align_bytes
);
21811 count_exp
= plus_constant (count_exp
, -align_bytes
);
21812 count
-= align_bytes
;
21814 if (need_zero_guard
21815 && (count
< (unsigned HOST_WIDE_INT
) size_needed
21816 || (align_bytes
== 0
21817 && count
< ((unsigned HOST_WIDE_INT
) size_needed
21818 + desired_align
- align
))))
21820 /* It is possible that we copied enough so the main loop will not
21822 gcc_assert (size_needed
> 1);
21823 if (label
== NULL_RTX
)
21824 label
= gen_label_rtx ();
21825 emit_cmp_and_jump_insns (count_exp
,
21826 GEN_INT (size_needed
),
21827 LTU
, 0, counter_mode (count_exp
), 1, label
);
21828 if (expected_size
== -1
21829 || expected_size
< (desired_align
- align
) / 2 + size_needed
)
21830 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
21832 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
21835 if (label
&& size_needed
== 1)
21837 emit_label (label
);
21838 LABEL_NUSES (label
) = 1;
21840 epilogue_size_needed
= 1;
21842 else if (label
== NULL_RTX
)
21843 epilogue_size_needed
= size_needed
;
21845 /* Step 3: Main loop. */
21851 gcc_unreachable ();
21853 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
21854 count_exp
, QImode
, 1, expected_size
);
21857 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
21858 count_exp
, Pmode
, 1, expected_size
);
21860 case unrolled_loop
:
21861 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
21862 registers for 4 temporaries anyway. */
21863 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
21864 count_exp
, Pmode
, TARGET_64BIT
? 4 : 2,
21867 case rep_prefix_8_byte
:
21868 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
21871 case rep_prefix_4_byte
:
21872 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
21875 case rep_prefix_1_byte
:
21876 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
21880 /* Adjust properly the offset of src and dest memory for aliasing. */
21881 if (CONST_INT_P (count_exp
))
21883 src
= adjust_automodify_address_nv (src
, BLKmode
, srcreg
,
21884 (count
/ size_needed
) * size_needed
);
21885 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
21886 (count
/ size_needed
) * size_needed
);
21890 src
= change_address (src
, BLKmode
, srcreg
);
21891 dst
= change_address (dst
, BLKmode
, destreg
);
21894 /* Step 4: Epilogue to copy the remaining bytes. */
21898 /* When the main loop is done, COUNT_EXP might hold original count,
21899 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
21900 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
21901 bytes. Compensate if needed. */
21903 if (size_needed
< epilogue_size_needed
)
21906 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
21907 GEN_INT (size_needed
- 1), count_exp
, 1,
21909 if (tmp
!= count_exp
)
21910 emit_move_insn (count_exp
, tmp
);
21912 emit_label (label
);
21913 LABEL_NUSES (label
) = 1;
21916 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
21917 expand_movmem_epilogue (dst
, src
, destreg
, srcreg
, count_exp
,
21918 epilogue_size_needed
);
21919 if (jump_around_label
)
21920 emit_label (jump_around_label
);
21924 /* Helper function for memcpy. For QImode value 0xXY produce
21925 0xXYXYXYXY of wide specified by MODE. This is essentially
21926 a * 0x10101010, but we can do slightly better than
21927 synth_mult by unwinding the sequence by hand on CPUs with
21930 promote_duplicated_reg (enum machine_mode mode
, rtx val
)
21932 enum machine_mode valmode
= GET_MODE (val
);
21934 int nops
= mode
== DImode
? 3 : 2;
21936 gcc_assert (mode
== SImode
|| mode
== DImode
);
21937 if (val
== const0_rtx
)
21938 return copy_to_mode_reg (mode
, const0_rtx
);
21939 if (CONST_INT_P (val
))
21941 HOST_WIDE_INT v
= INTVAL (val
) & 255;
21945 if (mode
== DImode
)
21946 v
|= (v
<< 16) << 16;
21947 return copy_to_mode_reg (mode
, gen_int_mode (v
, mode
));
21950 if (valmode
== VOIDmode
)
21952 if (valmode
!= QImode
)
21953 val
= gen_lowpart (QImode
, val
);
21954 if (mode
== QImode
)
21956 if (!TARGET_PARTIAL_REG_STALL
)
21958 if (ix86_cost
->mult_init
[mode
== DImode
? 3 : 2]
21959 + ix86_cost
->mult_bit
* (mode
== DImode
? 8 : 4)
21960 <= (ix86_cost
->shift_const
+ ix86_cost
->add
) * nops
21961 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL
== 0)))
21963 rtx reg
= convert_modes (mode
, QImode
, val
, true);
21964 tmp
= promote_duplicated_reg (mode
, const1_rtx
);
21965 return expand_simple_binop (mode
, MULT
, reg
, tmp
, NULL
, 1,
21970 rtx reg
= convert_modes (mode
, QImode
, val
, true);
21972 if (!TARGET_PARTIAL_REG_STALL
)
21973 if (mode
== SImode
)
21974 emit_insn (gen_movsi_insv_1 (reg
, reg
));
21976 emit_insn (gen_movdi_insv_1 (reg
, reg
));
21979 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (8),
21980 NULL
, 1, OPTAB_DIRECT
);
21982 expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
21984 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (16),
21985 NULL
, 1, OPTAB_DIRECT
);
21986 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
21987 if (mode
== SImode
)
21989 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (32),
21990 NULL
, 1, OPTAB_DIRECT
);
21991 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
21996 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
21997 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
21998 alignment from ALIGN to DESIRED_ALIGN. */
22000 promote_duplicated_reg_to_size (rtx val
, int size_needed
, int desired_align
, int align
)
22005 && (size_needed
> 4 || (desired_align
> align
&& desired_align
> 4)))
22006 promoted_val
= promote_duplicated_reg (DImode
, val
);
22007 else if (size_needed
> 2 || (desired_align
> align
&& desired_align
> 2))
22008 promoted_val
= promote_duplicated_reg (SImode
, val
);
22009 else if (size_needed
> 1 || (desired_align
> align
&& desired_align
> 1))
22010 promoted_val
= promote_duplicated_reg (HImode
, val
);
22012 promoted_val
= val
;
22014 return promoted_val
;
22017 /* Expand string clear operation (bzero). Use i386 string operations when
22018 profitable. See expand_movmem comment for explanation of individual
22019 steps performed. */
22021 ix86_expand_setmem (rtx dst
, rtx count_exp
, rtx val_exp
, rtx align_exp
,
22022 rtx expected_align_exp
, rtx expected_size_exp
)
22027 rtx jump_around_label
= NULL
;
22028 HOST_WIDE_INT align
= 1;
22029 unsigned HOST_WIDE_INT count
= 0;
22030 HOST_WIDE_INT expected_size
= -1;
22031 int size_needed
= 0, epilogue_size_needed
;
22032 int desired_align
= 0, align_bytes
= 0;
22033 enum stringop_alg alg
;
22034 rtx promoted_val
= NULL
;
22035 bool force_loopy_epilogue
= false;
22037 bool need_zero_guard
= false;
22039 if (CONST_INT_P (align_exp
))
22040 align
= INTVAL (align_exp
);
22041 /* i386 can do misaligned access on reasonably increased cost. */
22042 if (CONST_INT_P (expected_align_exp
)
22043 && INTVAL (expected_align_exp
) > align
)
22044 align
= INTVAL (expected_align_exp
);
22045 if (CONST_INT_P (count_exp
))
22046 count
= expected_size
= INTVAL (count_exp
);
22047 if (CONST_INT_P (expected_size_exp
) && count
== 0)
22048 expected_size
= INTVAL (expected_size_exp
);
22050 /* Make sure we don't need to care about overflow later on. */
22051 if (count
> ((unsigned HOST_WIDE_INT
) 1 << 30))
22054 /* Step 0: Decide on preferred algorithm, desired alignment and
22055 size of chunks to be copied by main loop. */
22057 alg
= decide_alg (count
, expected_size
, true, &dynamic_check
);
22058 desired_align
= decide_alignment (align
, alg
, expected_size
);
22060 if (!TARGET_ALIGN_STRINGOPS
)
22061 align
= desired_align
;
22063 if (alg
== libcall
)
22065 gcc_assert (alg
!= no_stringop
);
22067 count_exp
= copy_to_mode_reg (counter_mode (count_exp
), count_exp
);
22068 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
22073 gcc_unreachable ();
22075 need_zero_guard
= true;
22076 size_needed
= GET_MODE_SIZE (Pmode
);
22078 case unrolled_loop
:
22079 need_zero_guard
= true;
22080 size_needed
= GET_MODE_SIZE (Pmode
) * 4;
22082 case rep_prefix_8_byte
:
22085 case rep_prefix_4_byte
:
22088 case rep_prefix_1_byte
:
22092 need_zero_guard
= true;
22096 epilogue_size_needed
= size_needed
;
22098 /* Step 1: Prologue guard. */
22100 /* Alignment code needs count to be in register. */
22101 if (CONST_INT_P (count_exp
) && desired_align
> align
)
22103 if (INTVAL (count_exp
) > desired_align
22104 && INTVAL (count_exp
) > size_needed
)
22107 = get_mem_align_offset (dst
, desired_align
* BITS_PER_UNIT
);
22108 if (align_bytes
<= 0)
22111 align_bytes
= desired_align
- align_bytes
;
22113 if (align_bytes
== 0)
22115 enum machine_mode mode
= SImode
;
22116 if (TARGET_64BIT
&& (count
& ~0xffffffff))
22118 count_exp
= force_reg (mode
, count_exp
);
22121 /* Do the cheap promotion to allow better CSE across the
22122 main loop and epilogue (ie one load of the big constant in the
22123 front of all code. */
22124 if (CONST_INT_P (val_exp
))
22125 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
22126 desired_align
, align
);
22127 /* Ensure that alignment prologue won't copy past end of block. */
22128 if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
22130 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
22131 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
22132 Make sure it is power of 2. */
22133 epilogue_size_needed
= smallest_pow2_greater_than (epilogue_size_needed
);
22135 /* To improve performance of small blocks, we jump around the VAL
22136 promoting mode. This mean that if the promoted VAL is not constant,
22137 we might not use it in the epilogue and have to use byte
22139 if (epilogue_size_needed
> 2 && !promoted_val
)
22140 force_loopy_epilogue
= true;
22143 if (count
< (unsigned HOST_WIDE_INT
)epilogue_size_needed
)
22145 /* If main algorithm works on QImode, no epilogue is needed.
22146 For small sizes just don't align anything. */
22147 if (size_needed
== 1)
22148 desired_align
= align
;
22155 label
= gen_label_rtx ();
22156 emit_cmp_and_jump_insns (count_exp
,
22157 GEN_INT (epilogue_size_needed
),
22158 LTU
, 0, counter_mode (count_exp
), 1, label
);
22159 if (expected_size
== -1 || expected_size
<= epilogue_size_needed
)
22160 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
22162 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
22165 if (dynamic_check
!= -1)
22167 rtx hot_label
= gen_label_rtx ();
22168 jump_around_label
= gen_label_rtx ();
22169 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
22170 LEU
, 0, counter_mode (count_exp
), 1, hot_label
);
22171 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
22172 set_storage_via_libcall (dst
, count_exp
, val_exp
, false);
22173 emit_jump (jump_around_label
);
22174 emit_label (hot_label
);
22177 /* Step 2: Alignment prologue. */
22179 /* Do the expensive promotion once we branched off the small blocks. */
22181 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
22182 desired_align
, align
);
22183 gcc_assert (desired_align
>= 1 && align
>= 1);
22185 if (desired_align
> align
)
22187 if (align_bytes
== 0)
22189 /* Except for the first move in epilogue, we no longer know
22190 constant offset in aliasing info. It don't seems to worth
22191 the pain to maintain it for the first move, so throw away
22193 dst
= change_address (dst
, BLKmode
, destreg
);
22194 expand_setmem_prologue (dst
, destreg
, promoted_val
, count_exp
, align
,
22199 /* If we know how many bytes need to be stored before dst is
22200 sufficiently aligned, maintain aliasing info accurately. */
22201 dst
= expand_constant_setmem_prologue (dst
, destreg
, promoted_val
,
22202 desired_align
, align_bytes
);
22203 count_exp
= plus_constant (count_exp
, -align_bytes
);
22204 count
-= align_bytes
;
22206 if (need_zero_guard
22207 && (count
< (unsigned HOST_WIDE_INT
) size_needed
22208 || (align_bytes
== 0
22209 && count
< ((unsigned HOST_WIDE_INT
) size_needed
22210 + desired_align
- align
))))
22212 /* It is possible that we copied enough so the main loop will not
22214 gcc_assert (size_needed
> 1);
22215 if (label
== NULL_RTX
)
22216 label
= gen_label_rtx ();
22217 emit_cmp_and_jump_insns (count_exp
,
22218 GEN_INT (size_needed
),
22219 LTU
, 0, counter_mode (count_exp
), 1, label
);
22220 if (expected_size
== -1
22221 || expected_size
< (desired_align
- align
) / 2 + size_needed
)
22222 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
22224 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
22227 if (label
&& size_needed
== 1)
22229 emit_label (label
);
22230 LABEL_NUSES (label
) = 1;
22232 promoted_val
= val_exp
;
22233 epilogue_size_needed
= 1;
22235 else if (label
== NULL_RTX
)
22236 epilogue_size_needed
= size_needed
;
22238 /* Step 3: Main loop. */
22244 gcc_unreachable ();
22246 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
22247 count_exp
, QImode
, 1, expected_size
);
22250 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
22251 count_exp
, Pmode
, 1, expected_size
);
22253 case unrolled_loop
:
22254 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
22255 count_exp
, Pmode
, 4, expected_size
);
22257 case rep_prefix_8_byte
:
22258 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
22261 case rep_prefix_4_byte
:
22262 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
22265 case rep_prefix_1_byte
:
22266 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
22270 /* Adjust properly the offset of src and dest memory for aliasing. */
22271 if (CONST_INT_P (count_exp
))
22272 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
22273 (count
/ size_needed
) * size_needed
);
22275 dst
= change_address (dst
, BLKmode
, destreg
);
22277 /* Step 4: Epilogue to copy the remaining bytes. */
22281 /* When the main loop is done, COUNT_EXP might hold original count,
22282 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
22283 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
22284 bytes. Compensate if needed. */
22286 if (size_needed
< epilogue_size_needed
)
22289 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
22290 GEN_INT (size_needed
- 1), count_exp
, 1,
22292 if (tmp
!= count_exp
)
22293 emit_move_insn (count_exp
, tmp
);
22295 emit_label (label
);
22296 LABEL_NUSES (label
) = 1;
22299 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
22301 if (force_loopy_epilogue
)
22302 expand_setmem_epilogue_via_loop (dst
, destreg
, val_exp
, count_exp
,
22303 epilogue_size_needed
);
22305 expand_setmem_epilogue (dst
, destreg
, promoted_val
, count_exp
,
22306 epilogue_size_needed
);
22308 if (jump_around_label
)
22309 emit_label (jump_around_label
);
22313 /* Expand the appropriate insns for doing strlen if not just doing
22316 out = result, initialized with the start address
22317 align_rtx = alignment of the address.
22318 scratch = scratch register, initialized with the startaddress when
22319 not aligned, otherwise undefined
22321 This is just the body. It needs the initializations mentioned above and
22322 some address computing at the end. These things are done in i386.md. */
22325 ix86_expand_strlensi_unroll_1 (rtx out
, rtx src
, rtx align_rtx
)
22329 rtx align_2_label
= NULL_RTX
;
22330 rtx align_3_label
= NULL_RTX
;
22331 rtx align_4_label
= gen_label_rtx ();
22332 rtx end_0_label
= gen_label_rtx ();
22334 rtx tmpreg
= gen_reg_rtx (SImode
);
22335 rtx scratch
= gen_reg_rtx (SImode
);
22339 if (CONST_INT_P (align_rtx
))
22340 align
= INTVAL (align_rtx
);
22342 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
22344 /* Is there a known alignment and is it less than 4? */
22347 rtx scratch1
= gen_reg_rtx (Pmode
);
22348 emit_move_insn (scratch1
, out
);
22349 /* Is there a known alignment and is it not 2? */
22352 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
22353 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
22355 /* Leave just the 3 lower bits. */
22356 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (3),
22357 NULL_RTX
, 0, OPTAB_WIDEN
);
22359 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
22360 Pmode
, 1, align_4_label
);
22361 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, EQ
, NULL
,
22362 Pmode
, 1, align_2_label
);
22363 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, GTU
, NULL
,
22364 Pmode
, 1, align_3_label
);
22368 /* Since the alignment is 2, we have to check 2 or 0 bytes;
22369 check if is aligned to 4 - byte. */
22371 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, const2_rtx
,
22372 NULL_RTX
, 0, OPTAB_WIDEN
);
22374 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
22375 Pmode
, 1, align_4_label
);
22378 mem
= change_address (src
, QImode
, out
);
22380 /* Now compare the bytes. */
22382 /* Compare the first n unaligned byte on a byte per byte basis. */
22383 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
22384 QImode
, 1, end_0_label
);
22386 /* Increment the address. */
22387 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
22389 /* Not needed with an alignment of 2 */
22392 emit_label (align_2_label
);
22394 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
22397 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
22399 emit_label (align_3_label
);
22402 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
22405 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
22408 /* Generate loop to check 4 bytes at a time. It is not a good idea to
22409 align this loop. It gives only huge programs, but does not help to
22411 emit_label (align_4_label
);
22413 mem
= change_address (src
, SImode
, out
);
22414 emit_move_insn (scratch
, mem
);
22415 emit_insn (ix86_gen_add3 (out
, out
, GEN_INT (4)));
22417 /* This formula yields a nonzero result iff one of the bytes is zero.
22418 This saves three branches inside loop and many cycles. */
22420 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
22421 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
22422 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
22423 emit_insn (gen_andsi3 (tmpreg
, tmpreg
,
22424 gen_int_mode (0x80808080, SImode
)));
22425 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0, SImode
, 1,
22430 rtx reg
= gen_reg_rtx (SImode
);
22431 rtx reg2
= gen_reg_rtx (Pmode
);
22432 emit_move_insn (reg
, tmpreg
);
22433 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
22435 /* If zero is not in the first two bytes, move two bytes forward. */
22436 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
22437 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
22438 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
22439 emit_insn (gen_rtx_SET (VOIDmode
, tmpreg
,
22440 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
22443 /* Emit lea manually to avoid clobbering of flags. */
22444 emit_insn (gen_rtx_SET (SImode
, reg2
,
22445 gen_rtx_PLUS (Pmode
, out
, const2_rtx
)));
22447 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
22448 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
22449 emit_insn (gen_rtx_SET (VOIDmode
, out
,
22450 gen_rtx_IF_THEN_ELSE (Pmode
, tmp
,
22456 rtx end_2_label
= gen_label_rtx ();
22457 /* Is zero in the first two bytes? */
22459 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
22460 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
22461 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
22462 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
22463 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
22465 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
22466 JUMP_LABEL (tmp
) = end_2_label
;
22468 /* Not in the first two. Move two bytes forward. */
22469 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
22470 emit_insn (ix86_gen_add3 (out
, out
, const2_rtx
));
22472 emit_label (end_2_label
);
22476 /* Avoid branch in fixing the byte. */
22477 tmpreg
= gen_lowpart (QImode
, tmpreg
);
22478 emit_insn (gen_addqi3_cc (tmpreg
, tmpreg
, tmpreg
));
22479 tmp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
22480 cmp
= gen_rtx_LTU (VOIDmode
, tmp
, const0_rtx
);
22481 emit_insn (ix86_gen_sub3_carry (out
, out
, GEN_INT (3), tmp
, cmp
));
22483 emit_label (end_0_label
);
22486 /* Expand strlen. */
22489 ix86_expand_strlen (rtx out
, rtx src
, rtx eoschar
, rtx align
)
22491 rtx addr
, scratch1
, scratch2
, scratch3
, scratch4
;
22493 /* The generic case of strlen expander is long. Avoid it's
22494 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
22496 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
22497 && !TARGET_INLINE_ALL_STRINGOPS
22498 && !optimize_insn_for_size_p ()
22499 && (!CONST_INT_P (align
) || INTVAL (align
) < 4))
22502 addr
= force_reg (Pmode
, XEXP (src
, 0));
22503 scratch1
= gen_reg_rtx (Pmode
);
22505 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
22506 && !optimize_insn_for_size_p ())
22508 /* Well it seems that some optimizer does not combine a call like
22509 foo(strlen(bar), strlen(bar));
22510 when the move and the subtraction is done here. It does calculate
22511 the length just once when these instructions are done inside of
22512 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
22513 often used and I use one fewer register for the lifetime of
22514 output_strlen_unroll() this is better. */
22516 emit_move_insn (out
, addr
);
22518 ix86_expand_strlensi_unroll_1 (out
, src
, align
);
22520 /* strlensi_unroll_1 returns the address of the zero at the end of
22521 the string, like memchr(), so compute the length by subtracting
22522 the start address. */
22523 emit_insn (ix86_gen_sub3 (out
, out
, addr
));
22529 /* Can't use this if the user has appropriated eax, ecx, or edi. */
22530 if (fixed_regs
[AX_REG
] || fixed_regs
[CX_REG
] || fixed_regs
[DI_REG
])
22533 scratch2
= gen_reg_rtx (Pmode
);
22534 scratch3
= gen_reg_rtx (Pmode
);
22535 scratch4
= force_reg (Pmode
, constm1_rtx
);
22537 emit_move_insn (scratch3
, addr
);
22538 eoschar
= force_reg (QImode
, eoschar
);
22540 src
= replace_equiv_address_nv (src
, scratch3
);
22542 /* If .md starts supporting :P, this can be done in .md. */
22543 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (4, src
, eoschar
, align
,
22544 scratch4
), UNSPEC_SCAS
);
22545 emit_insn (gen_strlenqi_1 (scratch1
, scratch3
, unspec
));
22546 emit_insn (ix86_gen_one_cmpl2 (scratch2
, scratch1
));
22547 emit_insn (ix86_gen_add3 (out
, scratch2
, constm1_rtx
));
22552 /* For given symbol (function) construct code to compute address of it's PLT
22553 entry in large x86-64 PIC model. */
22555 construct_plt_address (rtx symbol
)
22557 rtx tmp
= gen_reg_rtx (Pmode
);
22558 rtx unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, symbol
), UNSPEC_PLTOFF
);
22560 gcc_assert (GET_CODE (symbol
) == SYMBOL_REF
);
22561 gcc_assert (ix86_cmodel
== CM_LARGE_PIC
);
22563 emit_move_insn (tmp
, gen_rtx_CONST (Pmode
, unspec
));
22564 emit_insn (gen_adddi3 (tmp
, tmp
, pic_offset_table_rtx
));
22569 ix86_expand_call (rtx retval
, rtx fnaddr
, rtx callarg1
,
22571 rtx pop
, bool sibcall
)
22573 /* We need to represent that SI and DI registers are clobbered
22575 static int clobbered_registers
[] = {
22576 XMM6_REG
, XMM7_REG
, XMM8_REG
,
22577 XMM9_REG
, XMM10_REG
, XMM11_REG
,
22578 XMM12_REG
, XMM13_REG
, XMM14_REG
,
22579 XMM15_REG
, SI_REG
, DI_REG
22581 rtx vec
[ARRAY_SIZE (clobbered_registers
) + 3];
22582 rtx use
= NULL
, call
;
22583 unsigned int vec_len
;
22585 if (pop
== const0_rtx
)
22587 gcc_assert (!TARGET_64BIT
|| !pop
);
22589 if (TARGET_MACHO
&& !TARGET_64BIT
)
22592 if (flag_pic
&& GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
)
22593 fnaddr
= machopic_indirect_call_target (fnaddr
);
22598 /* Static functions and indirect calls don't need the pic register. */
22599 if (flag_pic
&& (!TARGET_64BIT
|| ix86_cmodel
== CM_LARGE_PIC
)
22600 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
22601 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr
, 0)))
22602 use_reg (&use
, pic_offset_table_rtx
);
22605 if (TARGET_64BIT
&& INTVAL (callarg2
) >= 0)
22607 rtx al
= gen_rtx_REG (QImode
, AX_REG
);
22608 emit_move_insn (al
, callarg2
);
22609 use_reg (&use
, al
);
22612 if (ix86_cmodel
== CM_LARGE_PIC
22614 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
22615 && !local_symbolic_operand (XEXP (fnaddr
, 0), VOIDmode
))
22616 fnaddr
= gen_rtx_MEM (QImode
, construct_plt_address (XEXP (fnaddr
, 0)));
22618 ? !sibcall_insn_operand (XEXP (fnaddr
, 0), Pmode
)
22619 : !call_insn_operand (XEXP (fnaddr
, 0), Pmode
))
22621 fnaddr
= XEXP (fnaddr
, 0);
22622 if (GET_MODE (fnaddr
) != Pmode
)
22623 fnaddr
= convert_to_mode (Pmode
, fnaddr
, 1);
22624 fnaddr
= gen_rtx_MEM (QImode
, copy_to_mode_reg (Pmode
, fnaddr
));
22628 call
= gen_rtx_CALL (VOIDmode
, fnaddr
, callarg1
);
22630 call
= gen_rtx_SET (VOIDmode
, retval
, call
);
22631 vec
[vec_len
++] = call
;
22635 pop
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, pop
);
22636 pop
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, pop
);
22637 vec
[vec_len
++] = pop
;
22640 if (TARGET_64BIT_MS_ABI
22641 && (!callarg2
|| INTVAL (callarg2
) != -2))
22645 vec
[vec_len
++] = gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, const0_rtx
),
22646 UNSPEC_MS_TO_SYSV_CALL
);
22648 for (i
= 0; i
< ARRAY_SIZE (clobbered_registers
); i
++)
22650 = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers
[i
])
22652 gen_rtx_REG (SSE_REGNO_P (clobbered_registers
[i
])
22654 clobbered_registers
[i
]));
22657 /* Add UNSPEC_CALL_NEEDS_VZEROUPPER decoration. */
22658 if (TARGET_VZEROUPPER
)
22661 if (cfun
->machine
->callee_pass_avx256_p
)
22663 if (cfun
->machine
->callee_return_avx256_p
)
22664 avx256
= callee_return_pass_avx256
;
22666 avx256
= callee_pass_avx256
;
22668 else if (cfun
->machine
->callee_return_avx256_p
)
22669 avx256
= callee_return_avx256
;
22671 avx256
= call_no_avx256
;
22673 if (reload_completed
)
22674 emit_insn (gen_avx_vzeroupper (GEN_INT (avx256
)));
22676 vec
[vec_len
++] = gen_rtx_UNSPEC (VOIDmode
,
22677 gen_rtvec (1, GEN_INT (avx256
)),
22678 UNSPEC_CALL_NEEDS_VZEROUPPER
);
22682 call
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (vec_len
, vec
));
22683 call
= emit_call_insn (call
);
22685 CALL_INSN_FUNCTION_USAGE (call
) = use
;
22691 ix86_split_call_vzeroupper (rtx insn
, rtx vzeroupper
)
22693 rtx pat
= PATTERN (insn
);
22694 rtvec vec
= XVEC (pat
, 0);
22695 int len
= GET_NUM_ELEM (vec
) - 1;
22697 /* Strip off the last entry of the parallel. */
22698 gcc_assert (GET_CODE (RTVEC_ELT (vec
, len
)) == UNSPEC
);
22699 gcc_assert (XINT (RTVEC_ELT (vec
, len
), 1) == UNSPEC_CALL_NEEDS_VZEROUPPER
);
22701 pat
= RTVEC_ELT (vec
, 0);
22703 pat
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (len
, &RTVEC_ELT (vec
, 0)));
22705 emit_insn (gen_avx_vzeroupper (vzeroupper
));
22706 emit_call_insn (pat
);
22709 /* Output the assembly for a call instruction. */
22712 ix86_output_call_insn (rtx insn
, rtx call_op
)
22714 bool direct_p
= constant_call_address_operand (call_op
, Pmode
);
22715 bool seh_nop_p
= false;
22718 if (SIBLING_CALL_P (insn
))
22722 /* SEH epilogue detection requires the indirect branch case
22723 to include REX.W. */
22724 else if (TARGET_SEH
)
22725 xasm
= "rex.W jmp %A0";
22729 output_asm_insn (xasm
, &call_op
);
22733 /* SEH unwinding can require an extra nop to be emitted in several
22734 circumstances. Determine if we have one of those. */
22739 for (i
= NEXT_INSN (insn
); i
; i
= NEXT_INSN (i
))
22741 /* If we get to another real insn, we don't need the nop. */
22745 /* If we get to the epilogue note, prevent a catch region from
22746 being adjacent to the standard epilogue sequence. If non-
22747 call-exceptions, we'll have done this during epilogue emission. */
22748 if (NOTE_P (i
) && NOTE_KIND (i
) == NOTE_INSN_EPILOGUE_BEG
22749 && !flag_non_call_exceptions
22750 && !can_throw_internal (insn
))
22757 /* If we didn't find a real insn following the call, prevent the
22758 unwinder from looking into the next function. */
22764 xasm
= "call\t%P0";
22766 xasm
= "call\t%A0";
22768 output_asm_insn (xasm
, &call_op
);
22776 /* Clear stack slot assignments remembered from previous functions.
22777 This is called from INIT_EXPANDERS once before RTL is emitted for each
22780 static struct machine_function
*
22781 ix86_init_machine_status (void)
22783 struct machine_function
*f
;
22785 f
= ggc_alloc_cleared_machine_function ();
22786 f
->use_fast_prologue_epilogue_nregs
= -1;
22787 f
->tls_descriptor_call_expanded_p
= 0;
22788 f
->call_abi
= ix86_abi
;
22793 /* Return a MEM corresponding to a stack slot with mode MODE.
22794 Allocate a new slot if necessary.
22796 The RTL for a function can have several slots available: N is
22797 which slot to use. */
22800 assign_386_stack_local (enum machine_mode mode
, enum ix86_stack_slot n
)
22802 struct stack_local_entry
*s
;
22804 gcc_assert (n
< MAX_386_STACK_LOCALS
);
22806 /* Virtual slot is valid only before vregs are instantiated. */
22807 gcc_assert ((n
== SLOT_VIRTUAL
) == !virtuals_instantiated
);
22809 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
22810 if (s
->mode
== mode
&& s
->n
== n
)
22811 return validize_mem (copy_rtx (s
->rtl
));
22813 s
= ggc_alloc_stack_local_entry ();
22816 s
->rtl
= assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
22818 s
->next
= ix86_stack_locals
;
22819 ix86_stack_locals
= s
;
22820 return validize_mem (s
->rtl
);
22823 /* Calculate the length of the memory address in the instruction encoding.
22824 Includes addr32 prefix, does not include the one-byte modrm, opcode,
22825 or other prefixes. */
22828 memory_address_length (rtx addr
)
22830 struct ix86_address parts
;
22831 rtx base
, index
, disp
;
22835 if (GET_CODE (addr
) == PRE_DEC
22836 || GET_CODE (addr
) == POST_INC
22837 || GET_CODE (addr
) == PRE_MODIFY
22838 || GET_CODE (addr
) == POST_MODIFY
)
22841 ok
= ix86_decompose_address (addr
, &parts
);
22844 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
22845 parts
.base
= SUBREG_REG (parts
.base
);
22846 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
22847 parts
.index
= SUBREG_REG (parts
.index
);
22850 index
= parts
.index
;
22853 /* Add length of addr32 prefix. */
22854 len
= (GET_CODE (addr
) == ZERO_EXTEND
22855 || GET_CODE (addr
) == AND
);
22858 - esp as the base always wants an index,
22859 - ebp as the base always wants a displacement,
22860 - r12 as the base always wants an index,
22861 - r13 as the base always wants a displacement. */
22863 /* Register Indirect. */
22864 if (base
&& !index
&& !disp
)
22866 /* esp (for its index) and ebp (for its displacement) need
22867 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
22870 && (addr
== arg_pointer_rtx
22871 || addr
== frame_pointer_rtx
22872 || REGNO (addr
) == SP_REG
22873 || REGNO (addr
) == BP_REG
22874 || REGNO (addr
) == R12_REG
22875 || REGNO (addr
) == R13_REG
))
22879 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
22880 is not disp32, but disp32(%rip), so for disp32
22881 SIB byte is needed, unless print_operand_address
22882 optimizes it into disp32(%rip) or (%rip) is implied
22884 else if (disp
&& !base
&& !index
)
22891 if (GET_CODE (disp
) == CONST
)
22892 symbol
= XEXP (disp
, 0);
22893 if (GET_CODE (symbol
) == PLUS
22894 && CONST_INT_P (XEXP (symbol
, 1)))
22895 symbol
= XEXP (symbol
, 0);
22897 if (GET_CODE (symbol
) != LABEL_REF
22898 && (GET_CODE (symbol
) != SYMBOL_REF
22899 || SYMBOL_REF_TLS_MODEL (symbol
) != 0)
22900 && (GET_CODE (symbol
) != UNSPEC
22901 || (XINT (symbol
, 1) != UNSPEC_GOTPCREL
22902 && XINT (symbol
, 1) != UNSPEC_PCREL
22903 && XINT (symbol
, 1) != UNSPEC_GOTNTPOFF
)))
22910 /* Find the length of the displacement constant. */
22913 if (base
&& satisfies_constraint_K (disp
))
22918 /* ebp always wants a displacement. Similarly r13. */
22919 else if (base
&& REG_P (base
)
22920 && (REGNO (base
) == BP_REG
|| REGNO (base
) == R13_REG
))
22923 /* An index requires the two-byte modrm form.... */
22925 /* ...like esp (or r12), which always wants an index. */
22926 || base
== arg_pointer_rtx
22927 || base
== frame_pointer_rtx
22928 || (base
&& REG_P (base
)
22929 && (REGNO (base
) == SP_REG
|| REGNO (base
) == R12_REG
)))
22946 /* Compute default value for "length_immediate" attribute. When SHORTFORM
22947 is set, expect that insn have 8bit immediate alternative. */
22949 ix86_attr_length_immediate_default (rtx insn
, bool shortform
)
22953 extract_insn_cached (insn
);
22954 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
22955 if (CONSTANT_P (recog_data
.operand
[i
]))
22957 enum attr_mode mode
= get_attr_mode (insn
);
22960 if (shortform
&& CONST_INT_P (recog_data
.operand
[i
]))
22962 HOST_WIDE_INT ival
= INTVAL (recog_data
.operand
[i
]);
22969 ival
= trunc_int_for_mode (ival
, HImode
);
22972 ival
= trunc_int_for_mode (ival
, SImode
);
22977 if (IN_RANGE (ival
, -128, 127))
22994 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
22999 fatal_insn ("unknown insn mode", insn
);
23004 /* Compute default value for "length_address" attribute. */
23006 ix86_attr_length_address_default (rtx insn
)
23010 if (get_attr_type (insn
) == TYPE_LEA
)
23012 rtx set
= PATTERN (insn
), addr
;
23014 if (GET_CODE (set
) == PARALLEL
)
23015 set
= XVECEXP (set
, 0, 0);
23017 gcc_assert (GET_CODE (set
) == SET
);
23019 addr
= SET_SRC (set
);
23020 if (TARGET_64BIT
&& get_attr_mode (insn
) == MODE_SI
)
23022 if (GET_CODE (addr
) == ZERO_EXTEND
)
23023 addr
= XEXP (addr
, 0);
23024 if (GET_CODE (addr
) == SUBREG
)
23025 addr
= SUBREG_REG (addr
);
23028 return memory_address_length (addr
);
23031 extract_insn_cached (insn
);
23032 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
23033 if (MEM_P (recog_data
.operand
[i
]))
23035 constrain_operands_cached (reload_completed
);
23036 if (which_alternative
!= -1)
23038 const char *constraints
= recog_data
.constraints
[i
];
23039 int alt
= which_alternative
;
23041 while (*constraints
== '=' || *constraints
== '+')
23044 while (*constraints
++ != ',')
23046 /* Skip ignored operands. */
23047 if (*constraints
== 'X')
23050 return memory_address_length (XEXP (recog_data
.operand
[i
], 0));
23055 /* Compute default value for "length_vex" attribute. It includes
23056 2 or 3 byte VEX prefix and 1 opcode byte. */
23059 ix86_attr_length_vex_default (rtx insn
, bool has_0f_opcode
, bool has_vex_w
)
23063 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
23064 byte VEX prefix. */
23065 if (!has_0f_opcode
|| has_vex_w
)
23068 /* We can always use 2 byte VEX prefix in 32bit. */
23072 extract_insn_cached (insn
);
23074 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
23075 if (REG_P (recog_data
.operand
[i
]))
23077 /* REX.W bit uses 3 byte VEX prefix. */
23078 if (GET_MODE (recog_data
.operand
[i
]) == DImode
23079 && GENERAL_REG_P (recog_data
.operand
[i
]))
23084 /* REX.X or REX.B bits use 3 byte VEX prefix. */
23085 if (MEM_P (recog_data
.operand
[i
])
23086 && x86_extended_reg_mentioned_p (recog_data
.operand
[i
]))
23093 /* Return the maximum number of instructions a cpu can issue. */
23096 ix86_issue_rate (void)
23100 case PROCESSOR_PENTIUM
:
23101 case PROCESSOR_ATOM
:
23105 case PROCESSOR_PENTIUMPRO
:
23106 case PROCESSOR_PENTIUM4
:
23107 case PROCESSOR_CORE2_32
:
23108 case PROCESSOR_CORE2_64
:
23109 case PROCESSOR_COREI7_32
:
23110 case PROCESSOR_COREI7_64
:
23111 case PROCESSOR_ATHLON
:
23113 case PROCESSOR_AMDFAM10
:
23114 case PROCESSOR_NOCONA
:
23115 case PROCESSOR_GENERIC32
:
23116 case PROCESSOR_GENERIC64
:
23117 case PROCESSOR_BDVER1
:
23118 case PROCESSOR_BDVER2
:
23119 case PROCESSOR_BTVER1
:
23127 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
23128 by DEP_INSN and nothing set by DEP_INSN. */
23131 ix86_flags_dependent (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
23135 /* Simplify the test for uninteresting insns. */
23136 if (insn_type
!= TYPE_SETCC
23137 && insn_type
!= TYPE_ICMOV
23138 && insn_type
!= TYPE_FCMOV
23139 && insn_type
!= TYPE_IBR
)
23142 if ((set
= single_set (dep_insn
)) != 0)
23144 set
= SET_DEST (set
);
23147 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
23148 && XVECLEN (PATTERN (dep_insn
), 0) == 2
23149 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
23150 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
23152 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
23153 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
23158 if (!REG_P (set
) || REGNO (set
) != FLAGS_REG
)
23161 /* This test is true if the dependent insn reads the flags but
23162 not any other potentially set register. */
23163 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
23166 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
23172 /* Return true iff USE_INSN has a memory address with operands set by
23176 ix86_agi_dependent (rtx set_insn
, rtx use_insn
)
23179 extract_insn_cached (use_insn
);
23180 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
23181 if (MEM_P (recog_data
.operand
[i
]))
23183 rtx addr
= XEXP (recog_data
.operand
[i
], 0);
23184 return modified_in_p (addr
, set_insn
) != 0;
23190 ix86_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
)
23192 enum attr_type insn_type
, dep_insn_type
;
23193 enum attr_memory memory
;
23195 int dep_insn_code_number
;
23197 /* Anti and output dependencies have zero cost on all CPUs. */
23198 if (REG_NOTE_KIND (link
) != 0)
23201 dep_insn_code_number
= recog_memoized (dep_insn
);
23203 /* If we can't recognize the insns, we can't really do anything. */
23204 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
23207 insn_type
= get_attr_type (insn
);
23208 dep_insn_type
= get_attr_type (dep_insn
);
23212 case PROCESSOR_PENTIUM
:
23213 /* Address Generation Interlock adds a cycle of latency. */
23214 if (insn_type
== TYPE_LEA
)
23216 rtx addr
= PATTERN (insn
);
23218 if (GET_CODE (addr
) == PARALLEL
)
23219 addr
= XVECEXP (addr
, 0, 0);
23221 gcc_assert (GET_CODE (addr
) == SET
);
23223 addr
= SET_SRC (addr
);
23224 if (modified_in_p (addr
, dep_insn
))
23227 else if (ix86_agi_dependent (dep_insn
, insn
))
23230 /* ??? Compares pair with jump/setcc. */
23231 if (ix86_flags_dependent (insn
, dep_insn
, insn_type
))
23234 /* Floating point stores require value to be ready one cycle earlier. */
23235 if (insn_type
== TYPE_FMOV
23236 && get_attr_memory (insn
) == MEMORY_STORE
23237 && !ix86_agi_dependent (dep_insn
, insn
))
23241 case PROCESSOR_PENTIUMPRO
:
23242 memory
= get_attr_memory (insn
);
23244 /* INT->FP conversion is expensive. */
23245 if (get_attr_fp_int_src (dep_insn
))
23248 /* There is one cycle extra latency between an FP op and a store. */
23249 if (insn_type
== TYPE_FMOV
23250 && (set
= single_set (dep_insn
)) != NULL_RTX
23251 && (set2
= single_set (insn
)) != NULL_RTX
23252 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
23253 && MEM_P (SET_DEST (set2
)))
23256 /* Show ability of reorder buffer to hide latency of load by executing
23257 in parallel with previous instruction in case
23258 previous instruction is not needed to compute the address. */
23259 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
23260 && !ix86_agi_dependent (dep_insn
, insn
))
23262 /* Claim moves to take one cycle, as core can issue one load
23263 at time and the next load can start cycle later. */
23264 if (dep_insn_type
== TYPE_IMOV
23265 || dep_insn_type
== TYPE_FMOV
)
23273 memory
= get_attr_memory (insn
);
23275 /* The esp dependency is resolved before the instruction is really
23277 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
23278 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
23281 /* INT->FP conversion is expensive. */
23282 if (get_attr_fp_int_src (dep_insn
))
23285 /* Show ability of reorder buffer to hide latency of load by executing
23286 in parallel with previous instruction in case
23287 previous instruction is not needed to compute the address. */
23288 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
23289 && !ix86_agi_dependent (dep_insn
, insn
))
23291 /* Claim moves to take one cycle, as core can issue one load
23292 at time and the next load can start cycle later. */
23293 if (dep_insn_type
== TYPE_IMOV
23294 || dep_insn_type
== TYPE_FMOV
)
23303 case PROCESSOR_ATHLON
:
23305 case PROCESSOR_AMDFAM10
:
23306 case PROCESSOR_BDVER1
:
23307 case PROCESSOR_BDVER2
:
23308 case PROCESSOR_BTVER1
:
23309 case PROCESSOR_ATOM
:
23310 case PROCESSOR_GENERIC32
:
23311 case PROCESSOR_GENERIC64
:
23312 memory
= get_attr_memory (insn
);
23314 /* Show ability of reorder buffer to hide latency of load by executing
23315 in parallel with previous instruction in case
23316 previous instruction is not needed to compute the address. */
23317 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
23318 && !ix86_agi_dependent (dep_insn
, insn
))
23320 enum attr_unit unit
= get_attr_unit (insn
);
23323 /* Because of the difference between the length of integer and
23324 floating unit pipeline preparation stages, the memory operands
23325 for floating point are cheaper.
23327 ??? For Athlon it the difference is most probably 2. */
23328 if (unit
== UNIT_INTEGER
|| unit
== UNIT_UNKNOWN
)
23331 loadcost
= TARGET_ATHLON
? 2 : 0;
23333 if (cost
>= loadcost
)
23346 /* How many alternative schedules to try. This should be as wide as the
23347 scheduling freedom in the DFA, but no wider. Making this value too
23348 large results extra work for the scheduler. */
23351 ia32_multipass_dfa_lookahead (void)
23355 case PROCESSOR_PENTIUM
:
23358 case PROCESSOR_PENTIUMPRO
:
23362 case PROCESSOR_CORE2_32
:
23363 case PROCESSOR_CORE2_64
:
23364 case PROCESSOR_COREI7_32
:
23365 case PROCESSOR_COREI7_64
:
23366 /* Generally, we want haifa-sched:max_issue() to look ahead as far
23367 as many instructions can be executed on a cycle, i.e.,
23368 issue_rate. I wonder why tuning for many CPUs does not do this. */
23369 return ix86_issue_rate ();
23378 /* Model decoder of Core 2/i7.
23379 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
23380 track the instruction fetch block boundaries and make sure that long
23381 (9+ bytes) instructions are assigned to D0. */
23383 /* Maximum length of an insn that can be handled by
23384 a secondary decoder unit. '8' for Core 2/i7. */
23385 static int core2i7_secondary_decoder_max_insn_size
;
23387 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
23388 '16' for Core 2/i7. */
23389 static int core2i7_ifetch_block_size
;
23391 /* Maximum number of instructions decoder can handle per cycle.
23392 '6' for Core 2/i7. */
23393 static int core2i7_ifetch_block_max_insns
;
23395 typedef struct ix86_first_cycle_multipass_data_
*
23396 ix86_first_cycle_multipass_data_t
;
23397 typedef const struct ix86_first_cycle_multipass_data_
*
23398 const_ix86_first_cycle_multipass_data_t
;
23400 /* A variable to store target state across calls to max_issue within
23402 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data
,
23403 *ix86_first_cycle_multipass_data
= &_ix86_first_cycle_multipass_data
;
23405 /* Initialize DATA. */
23407 core2i7_first_cycle_multipass_init (void *_data
)
23409 ix86_first_cycle_multipass_data_t data
23410 = (ix86_first_cycle_multipass_data_t
) _data
;
23412 data
->ifetch_block_len
= 0;
23413 data
->ifetch_block_n_insns
= 0;
23414 data
->ready_try_change
= NULL
;
23415 data
->ready_try_change_size
= 0;
23418 /* Advancing the cycle; reset ifetch block counts. */
23420 core2i7_dfa_post_advance_cycle (void)
23422 ix86_first_cycle_multipass_data_t data
= ix86_first_cycle_multipass_data
;
23424 gcc_assert (data
->ifetch_block_n_insns
<= core2i7_ifetch_block_max_insns
);
23426 data
->ifetch_block_len
= 0;
23427 data
->ifetch_block_n_insns
= 0;
23430 static int min_insn_size (rtx
);
23432 /* Filter out insns from ready_try that the core will not be able to issue
23433 on current cycle due to decoder. */
23435 core2i7_first_cycle_multipass_filter_ready_try
23436 (const_ix86_first_cycle_multipass_data_t data
,
23437 char *ready_try
, int n_ready
, bool first_cycle_insn_p
)
23444 if (ready_try
[n_ready
])
23447 insn
= get_ready_element (n_ready
);
23448 insn_size
= min_insn_size (insn
);
23450 if (/* If this is a too long an insn for a secondary decoder ... */
23451 (!first_cycle_insn_p
23452 && insn_size
> core2i7_secondary_decoder_max_insn_size
)
23453 /* ... or it would not fit into the ifetch block ... */
23454 || data
->ifetch_block_len
+ insn_size
> core2i7_ifetch_block_size
23455 /* ... or the decoder is full already ... */
23456 || data
->ifetch_block_n_insns
+ 1 > core2i7_ifetch_block_max_insns
)
23457 /* ... mask the insn out. */
23459 ready_try
[n_ready
] = 1;
23461 if (data
->ready_try_change
)
23462 SET_BIT (data
->ready_try_change
, n_ready
);
23467 /* Prepare for a new round of multipass lookahead scheduling. */
23469 core2i7_first_cycle_multipass_begin (void *_data
, char *ready_try
, int n_ready
,
23470 bool first_cycle_insn_p
)
23472 ix86_first_cycle_multipass_data_t data
23473 = (ix86_first_cycle_multipass_data_t
) _data
;
23474 const_ix86_first_cycle_multipass_data_t prev_data
23475 = ix86_first_cycle_multipass_data
;
23477 /* Restore the state from the end of the previous round. */
23478 data
->ifetch_block_len
= prev_data
->ifetch_block_len
;
23479 data
->ifetch_block_n_insns
= prev_data
->ifetch_block_n_insns
;
23481 /* Filter instructions that cannot be issued on current cycle due to
23482 decoder restrictions. */
23483 core2i7_first_cycle_multipass_filter_ready_try (data
, ready_try
, n_ready
,
23484 first_cycle_insn_p
);
23487 /* INSN is being issued in current solution. Account for its impact on
23488 the decoder model. */
23490 core2i7_first_cycle_multipass_issue (void *_data
, char *ready_try
, int n_ready
,
23491 rtx insn
, const void *_prev_data
)
23493 ix86_first_cycle_multipass_data_t data
23494 = (ix86_first_cycle_multipass_data_t
) _data
;
23495 const_ix86_first_cycle_multipass_data_t prev_data
23496 = (const_ix86_first_cycle_multipass_data_t
) _prev_data
;
23498 int insn_size
= min_insn_size (insn
);
23500 data
->ifetch_block_len
= prev_data
->ifetch_block_len
+ insn_size
;
23501 data
->ifetch_block_n_insns
= prev_data
->ifetch_block_n_insns
+ 1;
23502 gcc_assert (data
->ifetch_block_len
<= core2i7_ifetch_block_size
23503 && data
->ifetch_block_n_insns
<= core2i7_ifetch_block_max_insns
);
23505 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
23506 if (!data
->ready_try_change
)
23508 data
->ready_try_change
= sbitmap_alloc (n_ready
);
23509 data
->ready_try_change_size
= n_ready
;
23511 else if (data
->ready_try_change_size
< n_ready
)
23513 data
->ready_try_change
= sbitmap_resize (data
->ready_try_change
,
23515 data
->ready_try_change_size
= n_ready
;
23517 sbitmap_zero (data
->ready_try_change
);
23519 /* Filter out insns from ready_try that the core will not be able to issue
23520 on current cycle due to decoder. */
23521 core2i7_first_cycle_multipass_filter_ready_try (data
, ready_try
, n_ready
,
23525 /* Revert the effect on ready_try. */
23527 core2i7_first_cycle_multipass_backtrack (const void *_data
,
23529 int n_ready ATTRIBUTE_UNUSED
)
23531 const_ix86_first_cycle_multipass_data_t data
23532 = (const_ix86_first_cycle_multipass_data_t
) _data
;
23533 unsigned int i
= 0;
23534 sbitmap_iterator sbi
;
23536 gcc_assert (sbitmap_last_set_bit (data
->ready_try_change
) < n_ready
);
23537 EXECUTE_IF_SET_IN_SBITMAP (data
->ready_try_change
, 0, i
, sbi
)
23543 /* Save the result of multipass lookahead scheduling for the next round. */
23545 core2i7_first_cycle_multipass_end (const void *_data
)
23547 const_ix86_first_cycle_multipass_data_t data
23548 = (const_ix86_first_cycle_multipass_data_t
) _data
;
23549 ix86_first_cycle_multipass_data_t next_data
23550 = ix86_first_cycle_multipass_data
;
23554 next_data
->ifetch_block_len
= data
->ifetch_block_len
;
23555 next_data
->ifetch_block_n_insns
= data
->ifetch_block_n_insns
;
23559 /* Deallocate target data. */
23561 core2i7_first_cycle_multipass_fini (void *_data
)
23563 ix86_first_cycle_multipass_data_t data
23564 = (ix86_first_cycle_multipass_data_t
) _data
;
23566 if (data
->ready_try_change
)
23568 sbitmap_free (data
->ready_try_change
);
23569 data
->ready_try_change
= NULL
;
23570 data
->ready_try_change_size
= 0;
23574 /* Prepare for scheduling pass. */
23576 ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED
,
23577 int verbose ATTRIBUTE_UNUSED
,
23578 int max_uid ATTRIBUTE_UNUSED
)
23580 /* Install scheduling hooks for current CPU. Some of these hooks are used
23581 in time-critical parts of the scheduler, so we only set them up when
23582 they are actually used. */
23585 case PROCESSOR_CORE2_32
:
23586 case PROCESSOR_CORE2_64
:
23587 case PROCESSOR_COREI7_32
:
23588 case PROCESSOR_COREI7_64
:
23589 targetm
.sched
.dfa_post_advance_cycle
23590 = core2i7_dfa_post_advance_cycle
;
23591 targetm
.sched
.first_cycle_multipass_init
23592 = core2i7_first_cycle_multipass_init
;
23593 targetm
.sched
.first_cycle_multipass_begin
23594 = core2i7_first_cycle_multipass_begin
;
23595 targetm
.sched
.first_cycle_multipass_issue
23596 = core2i7_first_cycle_multipass_issue
;
23597 targetm
.sched
.first_cycle_multipass_backtrack
23598 = core2i7_first_cycle_multipass_backtrack
;
23599 targetm
.sched
.first_cycle_multipass_end
23600 = core2i7_first_cycle_multipass_end
;
23601 targetm
.sched
.first_cycle_multipass_fini
23602 = core2i7_first_cycle_multipass_fini
;
23604 /* Set decoder parameters. */
23605 core2i7_secondary_decoder_max_insn_size
= 8;
23606 core2i7_ifetch_block_size
= 16;
23607 core2i7_ifetch_block_max_insns
= 6;
23611 targetm
.sched
.dfa_post_advance_cycle
= NULL
;
23612 targetm
.sched
.first_cycle_multipass_init
= NULL
;
23613 targetm
.sched
.first_cycle_multipass_begin
= NULL
;
23614 targetm
.sched
.first_cycle_multipass_issue
= NULL
;
23615 targetm
.sched
.first_cycle_multipass_backtrack
= NULL
;
23616 targetm
.sched
.first_cycle_multipass_end
= NULL
;
23617 targetm
.sched
.first_cycle_multipass_fini
= NULL
;
23623 /* Compute the alignment given to a constant that is being placed in memory.
23624 EXP is the constant and ALIGN is the alignment that the object would
23626 The value of this function is used instead of that alignment to align
23630 ix86_constant_alignment (tree exp
, int align
)
23632 if (TREE_CODE (exp
) == REAL_CST
|| TREE_CODE (exp
) == VECTOR_CST
23633 || TREE_CODE (exp
) == INTEGER_CST
)
23635 if (TYPE_MODE (TREE_TYPE (exp
)) == DFmode
&& align
< 64)
23637 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp
))) && align
< 128)
23640 else if (!optimize_size
&& TREE_CODE (exp
) == STRING_CST
23641 && TREE_STRING_LENGTH (exp
) >= 31 && align
< BITS_PER_WORD
)
23642 return BITS_PER_WORD
;
23647 /* Compute the alignment for a static variable.
23648 TYPE is the data type, and ALIGN is the alignment that
23649 the object would ordinarily have. The value of this function is used
23650 instead of that alignment to align the object. */
23653 ix86_data_alignment (tree type
, int align
)
23655 int max_align
= optimize_size
? BITS_PER_WORD
: MIN (256, MAX_OFILE_ALIGNMENT
);
23657 if (AGGREGATE_TYPE_P (type
)
23658 && TYPE_SIZE (type
)
23659 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
23660 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= (unsigned) max_align
23661 || TREE_INT_CST_HIGH (TYPE_SIZE (type
)))
23662 && align
< max_align
)
23665 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
23666 to 16byte boundary. */
23669 if (AGGREGATE_TYPE_P (type
)
23670 && TYPE_SIZE (type
)
23671 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
23672 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 128
23673 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
23677 if (TREE_CODE (type
) == ARRAY_TYPE
)
23679 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
23681 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
23684 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
23687 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
23689 if ((TYPE_MODE (type
) == XCmode
23690 || TYPE_MODE (type
) == TCmode
) && align
< 128)
23693 else if ((TREE_CODE (type
) == RECORD_TYPE
23694 || TREE_CODE (type
) == UNION_TYPE
23695 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
23696 && TYPE_FIELDS (type
))
23698 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
23700 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
23703 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
23704 || TREE_CODE (type
) == INTEGER_TYPE
)
23706 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
23708 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
23715 /* Compute the alignment for a local variable or a stack slot. EXP is
23716 the data type or decl itself, MODE is the widest mode available and
23717 ALIGN is the alignment that the object would ordinarily have. The
23718 value of this macro is used instead of that alignment to align the
23722 ix86_local_alignment (tree exp
, enum machine_mode mode
,
23723 unsigned int align
)
23727 if (exp
&& DECL_P (exp
))
23729 type
= TREE_TYPE (exp
);
23738 /* Don't do dynamic stack realignment for long long objects with
23739 -mpreferred-stack-boundary=2. */
23742 && ix86_preferred_stack_boundary
< 64
23743 && (mode
== DImode
|| (type
&& TYPE_MODE (type
) == DImode
))
23744 && (!type
|| !TYPE_USER_ALIGN (type
))
23745 && (!decl
|| !DECL_USER_ALIGN (decl
)))
23748 /* If TYPE is NULL, we are allocating a stack slot for caller-save
23749 register in MODE. We will return the largest alignment of XF
23753 if (mode
== XFmode
&& align
< GET_MODE_ALIGNMENT (DFmode
))
23754 align
= GET_MODE_ALIGNMENT (DFmode
);
23758 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
23759 to 16byte boundary. Exact wording is:
23761 An array uses the same alignment as its elements, except that a local or
23762 global array variable of length at least 16 bytes or
23763 a C99 variable-length array variable always has alignment of at least 16 bytes.
23765 This was added to allow use of aligned SSE instructions at arrays. This
23766 rule is meant for static storage (where compiler can not do the analysis
23767 by itself). We follow it for automatic variables only when convenient.
23768 We fully control everything in the function compiled and functions from
23769 other unit can not rely on the alignment.
23771 Exclude va_list type. It is the common case of local array where
23772 we can not benefit from the alignment. */
23773 if (TARGET_64BIT
&& optimize_function_for_speed_p (cfun
)
23776 if (AGGREGATE_TYPE_P (type
)
23777 && (va_list_type_node
== NULL_TREE
23778 || (TYPE_MAIN_VARIANT (type
)
23779 != TYPE_MAIN_VARIANT (va_list_type_node
)))
23780 && TYPE_SIZE (type
)
23781 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
23782 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 16
23783 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
23786 if (TREE_CODE (type
) == ARRAY_TYPE
)
23788 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
23790 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
23793 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
23795 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
23797 if ((TYPE_MODE (type
) == XCmode
23798 || TYPE_MODE (type
) == TCmode
) && align
< 128)
23801 else if ((TREE_CODE (type
) == RECORD_TYPE
23802 || TREE_CODE (type
) == UNION_TYPE
23803 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
23804 && TYPE_FIELDS (type
))
23806 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
23808 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
23811 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
23812 || TREE_CODE (type
) == INTEGER_TYPE
)
23815 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
23817 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
23823 /* Compute the minimum required alignment for dynamic stack realignment
23824 purposes for a local variable, parameter or a stack slot. EXP is
23825 the data type or decl itself, MODE is its mode and ALIGN is the
23826 alignment that the object would ordinarily have. */
23829 ix86_minimum_alignment (tree exp
, enum machine_mode mode
,
23830 unsigned int align
)
23834 if (exp
&& DECL_P (exp
))
23836 type
= TREE_TYPE (exp
);
23845 if (TARGET_64BIT
|| align
!= 64 || ix86_preferred_stack_boundary
>= 64)
23848 /* Don't do dynamic stack realignment for long long objects with
23849 -mpreferred-stack-boundary=2. */
23850 if ((mode
== DImode
|| (type
&& TYPE_MODE (type
) == DImode
))
23851 && (!type
|| !TYPE_USER_ALIGN (type
))
23852 && (!decl
|| !DECL_USER_ALIGN (decl
)))
23858 /* Find a location for the static chain incoming to a nested function.
23859 This is a register, unless all free registers are used by arguments. */
23862 ix86_static_chain (const_tree fndecl
, bool incoming_p
)
23866 if (!DECL_STATIC_CHAIN (fndecl
))
23871 /* We always use R10 in 64-bit mode. */
23879 /* By default in 32-bit mode we use ECX to pass the static chain. */
23882 fntype
= TREE_TYPE (fndecl
);
23883 ccvt
= ix86_get_callcvt (fntype
);
23884 if ((ccvt
& (IX86_CALLCVT_FASTCALL
| IX86_CALLCVT_THISCALL
)) != 0)
23886 /* Fastcall functions use ecx/edx for arguments, which leaves
23887 us with EAX for the static chain.
23888 Thiscall functions use ecx for arguments, which also
23889 leaves us with EAX for the static chain. */
23892 else if (ix86_function_regparm (fntype
, fndecl
) == 3)
23894 /* For regparm 3, we have no free call-clobbered registers in
23895 which to store the static chain. In order to implement this,
23896 we have the trampoline push the static chain to the stack.
23897 However, we can't push a value below the return address when
23898 we call the nested function directly, so we have to use an
23899 alternate entry point. For this we use ESI, and have the
23900 alternate entry point push ESI, so that things appear the
23901 same once we're executing the nested function. */
23904 if (fndecl
== current_function_decl
)
23905 ix86_static_chain_on_stack
= true;
23906 return gen_frame_mem (SImode
,
23907 plus_constant (arg_pointer_rtx
, -8));
23913 return gen_rtx_REG (Pmode
, regno
);
23916 /* Emit RTL insns to initialize the variable parts of a trampoline.
23917 FNDECL is the decl of the target address; M_TRAMP is a MEM for
23918 the trampoline, and CHAIN_VALUE is an RTX for the static chain
23919 to be passed to the target function. */
23922 ix86_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
23928 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
23934 /* Load the function address to r11. Try to load address using
23935 the shorter movl instead of movabs. We may want to support
23936 movq for kernel mode, but kernel does not use trampolines at
23938 if (x86_64_zext_immediate_operand (fnaddr
, VOIDmode
))
23940 fnaddr
= copy_to_mode_reg (DImode
, fnaddr
);
23942 mem
= adjust_address (m_tramp
, HImode
, offset
);
23943 emit_move_insn (mem
, gen_int_mode (0xbb41, HImode
));
23945 mem
= adjust_address (m_tramp
, SImode
, offset
+ 2);
23946 emit_move_insn (mem
, gen_lowpart (SImode
, fnaddr
));
23951 mem
= adjust_address (m_tramp
, HImode
, offset
);
23952 emit_move_insn (mem
, gen_int_mode (0xbb49, HImode
));
23954 mem
= adjust_address (m_tramp
, DImode
, offset
+ 2);
23955 emit_move_insn (mem
, fnaddr
);
23959 /* Load static chain using movabs to r10. Use the
23960 shorter movl instead of movabs for x32. */
23972 mem
= adjust_address (m_tramp
, HImode
, offset
);
23973 emit_move_insn (mem
, gen_int_mode (opcode
, HImode
));
23975 mem
= adjust_address (m_tramp
, ptr_mode
, offset
+ 2);
23976 emit_move_insn (mem
, chain_value
);
23979 /* Jump to r11; the last (unused) byte is a nop, only there to
23980 pad the write out to a single 32-bit store. */
23981 mem
= adjust_address (m_tramp
, SImode
, offset
);
23982 emit_move_insn (mem
, gen_int_mode (0x90e3ff49, SImode
));
23989 /* Depending on the static chain location, either load a register
23990 with a constant, or push the constant to the stack. All of the
23991 instructions are the same size. */
23992 chain
= ix86_static_chain (fndecl
, true);
23995 switch (REGNO (chain
))
23998 opcode
= 0xb8; break;
24000 opcode
= 0xb9; break;
24002 gcc_unreachable ();
24008 mem
= adjust_address (m_tramp
, QImode
, offset
);
24009 emit_move_insn (mem
, gen_int_mode (opcode
, QImode
));
24011 mem
= adjust_address (m_tramp
, SImode
, offset
+ 1);
24012 emit_move_insn (mem
, chain_value
);
24015 mem
= adjust_address (m_tramp
, QImode
, offset
);
24016 emit_move_insn (mem
, gen_int_mode (0xe9, QImode
));
24018 mem
= adjust_address (m_tramp
, SImode
, offset
+ 1);
24020 /* Compute offset from the end of the jmp to the target function.
24021 In the case in which the trampoline stores the static chain on
24022 the stack, we need to skip the first insn which pushes the
24023 (call-saved) register static chain; this push is 1 byte. */
24025 disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
24026 plus_constant (XEXP (m_tramp
, 0),
24027 offset
- (MEM_P (chain
) ? 1 : 0)),
24028 NULL_RTX
, 1, OPTAB_DIRECT
);
24029 emit_move_insn (mem
, disp
);
24032 gcc_assert (offset
<= TRAMPOLINE_SIZE
);
24034 #ifdef HAVE_ENABLE_EXECUTE_STACK
24035 #ifdef CHECK_EXECUTE_STACK_ENABLED
24036 if (CHECK_EXECUTE_STACK_ENABLED
)
24038 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__enable_execute_stack"),
24039 LCT_NORMAL
, VOIDmode
, 1, XEXP (m_tramp
, 0), Pmode
);
24043 /* The following file contains several enumerations and data structures
24044 built from the definitions in i386-builtin-types.def. */
24046 #include "i386-builtin-types.inc"
24048 /* Table for the ix86 builtin non-function types. */
24049 static GTY(()) tree ix86_builtin_type_tab
[(int) IX86_BT_LAST_CPTR
+ 1];
24051 /* Retrieve an element from the above table, building some of
24052 the types lazily. */
24055 ix86_get_builtin_type (enum ix86_builtin_type tcode
)
24057 unsigned int index
;
24060 gcc_assert ((unsigned)tcode
< ARRAY_SIZE(ix86_builtin_type_tab
));
24062 type
= ix86_builtin_type_tab
[(int) tcode
];
24066 gcc_assert (tcode
> IX86_BT_LAST_PRIM
);
24067 if (tcode
<= IX86_BT_LAST_VECT
)
24069 enum machine_mode mode
;
24071 index
= tcode
- IX86_BT_LAST_PRIM
- 1;
24072 itype
= ix86_get_builtin_type (ix86_builtin_type_vect_base
[index
]);
24073 mode
= ix86_builtin_type_vect_mode
[index
];
24075 type
= build_vector_type_for_mode (itype
, mode
);
24081 index
= tcode
- IX86_BT_LAST_VECT
- 1;
24082 if (tcode
<= IX86_BT_LAST_PTR
)
24083 quals
= TYPE_UNQUALIFIED
;
24085 quals
= TYPE_QUAL_CONST
;
24087 itype
= ix86_get_builtin_type (ix86_builtin_type_ptr_base
[index
]);
24088 if (quals
!= TYPE_UNQUALIFIED
)
24089 itype
= build_qualified_type (itype
, quals
);
24091 type
= build_pointer_type (itype
);
24094 ix86_builtin_type_tab
[(int) tcode
] = type
;
24098 /* Table for the ix86 builtin function types. */
24099 static GTY(()) tree ix86_builtin_func_type_tab
[(int) IX86_BT_LAST_ALIAS
+ 1];
24101 /* Retrieve an element from the above table, building some of
24102 the types lazily. */
24105 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode
)
24109 gcc_assert ((unsigned)tcode
< ARRAY_SIZE (ix86_builtin_func_type_tab
));
24111 type
= ix86_builtin_func_type_tab
[(int) tcode
];
24115 if (tcode
<= IX86_BT_LAST_FUNC
)
24117 unsigned start
= ix86_builtin_func_start
[(int) tcode
];
24118 unsigned after
= ix86_builtin_func_start
[(int) tcode
+ 1];
24119 tree rtype
, atype
, args
= void_list_node
;
24122 rtype
= ix86_get_builtin_type (ix86_builtin_func_args
[start
]);
24123 for (i
= after
- 1; i
> start
; --i
)
24125 atype
= ix86_get_builtin_type (ix86_builtin_func_args
[i
]);
24126 args
= tree_cons (NULL
, atype
, args
);
24129 type
= build_function_type (rtype
, args
);
24133 unsigned index
= tcode
- IX86_BT_LAST_FUNC
- 1;
24134 enum ix86_builtin_func_type icode
;
24136 icode
= ix86_builtin_func_alias_base
[index
];
24137 type
= ix86_get_builtin_func_type (icode
);
24140 ix86_builtin_func_type_tab
[(int) tcode
] = type
;
24145 /* Codes for all the SSE/MMX builtins. */
24148 IX86_BUILTIN_ADDPS
,
24149 IX86_BUILTIN_ADDSS
,
24150 IX86_BUILTIN_DIVPS
,
24151 IX86_BUILTIN_DIVSS
,
24152 IX86_BUILTIN_MULPS
,
24153 IX86_BUILTIN_MULSS
,
24154 IX86_BUILTIN_SUBPS
,
24155 IX86_BUILTIN_SUBSS
,
24157 IX86_BUILTIN_CMPEQPS
,
24158 IX86_BUILTIN_CMPLTPS
,
24159 IX86_BUILTIN_CMPLEPS
,
24160 IX86_BUILTIN_CMPGTPS
,
24161 IX86_BUILTIN_CMPGEPS
,
24162 IX86_BUILTIN_CMPNEQPS
,
24163 IX86_BUILTIN_CMPNLTPS
,
24164 IX86_BUILTIN_CMPNLEPS
,
24165 IX86_BUILTIN_CMPNGTPS
,
24166 IX86_BUILTIN_CMPNGEPS
,
24167 IX86_BUILTIN_CMPORDPS
,
24168 IX86_BUILTIN_CMPUNORDPS
,
24169 IX86_BUILTIN_CMPEQSS
,
24170 IX86_BUILTIN_CMPLTSS
,
24171 IX86_BUILTIN_CMPLESS
,
24172 IX86_BUILTIN_CMPNEQSS
,
24173 IX86_BUILTIN_CMPNLTSS
,
24174 IX86_BUILTIN_CMPNLESS
,
24175 IX86_BUILTIN_CMPNGTSS
,
24176 IX86_BUILTIN_CMPNGESS
,
24177 IX86_BUILTIN_CMPORDSS
,
24178 IX86_BUILTIN_CMPUNORDSS
,
24180 IX86_BUILTIN_COMIEQSS
,
24181 IX86_BUILTIN_COMILTSS
,
24182 IX86_BUILTIN_COMILESS
,
24183 IX86_BUILTIN_COMIGTSS
,
24184 IX86_BUILTIN_COMIGESS
,
24185 IX86_BUILTIN_COMINEQSS
,
24186 IX86_BUILTIN_UCOMIEQSS
,
24187 IX86_BUILTIN_UCOMILTSS
,
24188 IX86_BUILTIN_UCOMILESS
,
24189 IX86_BUILTIN_UCOMIGTSS
,
24190 IX86_BUILTIN_UCOMIGESS
,
24191 IX86_BUILTIN_UCOMINEQSS
,
24193 IX86_BUILTIN_CVTPI2PS
,
24194 IX86_BUILTIN_CVTPS2PI
,
24195 IX86_BUILTIN_CVTSI2SS
,
24196 IX86_BUILTIN_CVTSI642SS
,
24197 IX86_BUILTIN_CVTSS2SI
,
24198 IX86_BUILTIN_CVTSS2SI64
,
24199 IX86_BUILTIN_CVTTPS2PI
,
24200 IX86_BUILTIN_CVTTSS2SI
,
24201 IX86_BUILTIN_CVTTSS2SI64
,
24203 IX86_BUILTIN_MAXPS
,
24204 IX86_BUILTIN_MAXSS
,
24205 IX86_BUILTIN_MINPS
,
24206 IX86_BUILTIN_MINSS
,
24208 IX86_BUILTIN_LOADUPS
,
24209 IX86_BUILTIN_STOREUPS
,
24210 IX86_BUILTIN_MOVSS
,
24212 IX86_BUILTIN_MOVHLPS
,
24213 IX86_BUILTIN_MOVLHPS
,
24214 IX86_BUILTIN_LOADHPS
,
24215 IX86_BUILTIN_LOADLPS
,
24216 IX86_BUILTIN_STOREHPS
,
24217 IX86_BUILTIN_STORELPS
,
24219 IX86_BUILTIN_MASKMOVQ
,
24220 IX86_BUILTIN_MOVMSKPS
,
24221 IX86_BUILTIN_PMOVMSKB
,
24223 IX86_BUILTIN_MOVNTPS
,
24224 IX86_BUILTIN_MOVNTQ
,
24226 IX86_BUILTIN_LOADDQU
,
24227 IX86_BUILTIN_STOREDQU
,
24229 IX86_BUILTIN_PACKSSWB
,
24230 IX86_BUILTIN_PACKSSDW
,
24231 IX86_BUILTIN_PACKUSWB
,
24233 IX86_BUILTIN_PADDB
,
24234 IX86_BUILTIN_PADDW
,
24235 IX86_BUILTIN_PADDD
,
24236 IX86_BUILTIN_PADDQ
,
24237 IX86_BUILTIN_PADDSB
,
24238 IX86_BUILTIN_PADDSW
,
24239 IX86_BUILTIN_PADDUSB
,
24240 IX86_BUILTIN_PADDUSW
,
24241 IX86_BUILTIN_PSUBB
,
24242 IX86_BUILTIN_PSUBW
,
24243 IX86_BUILTIN_PSUBD
,
24244 IX86_BUILTIN_PSUBQ
,
24245 IX86_BUILTIN_PSUBSB
,
24246 IX86_BUILTIN_PSUBSW
,
24247 IX86_BUILTIN_PSUBUSB
,
24248 IX86_BUILTIN_PSUBUSW
,
24251 IX86_BUILTIN_PANDN
,
24255 IX86_BUILTIN_PAVGB
,
24256 IX86_BUILTIN_PAVGW
,
24258 IX86_BUILTIN_PCMPEQB
,
24259 IX86_BUILTIN_PCMPEQW
,
24260 IX86_BUILTIN_PCMPEQD
,
24261 IX86_BUILTIN_PCMPGTB
,
24262 IX86_BUILTIN_PCMPGTW
,
24263 IX86_BUILTIN_PCMPGTD
,
24265 IX86_BUILTIN_PMADDWD
,
24267 IX86_BUILTIN_PMAXSW
,
24268 IX86_BUILTIN_PMAXUB
,
24269 IX86_BUILTIN_PMINSW
,
24270 IX86_BUILTIN_PMINUB
,
24272 IX86_BUILTIN_PMULHUW
,
24273 IX86_BUILTIN_PMULHW
,
24274 IX86_BUILTIN_PMULLW
,
24276 IX86_BUILTIN_PSADBW
,
24277 IX86_BUILTIN_PSHUFW
,
24279 IX86_BUILTIN_PSLLW
,
24280 IX86_BUILTIN_PSLLD
,
24281 IX86_BUILTIN_PSLLQ
,
24282 IX86_BUILTIN_PSRAW
,
24283 IX86_BUILTIN_PSRAD
,
24284 IX86_BUILTIN_PSRLW
,
24285 IX86_BUILTIN_PSRLD
,
24286 IX86_BUILTIN_PSRLQ
,
24287 IX86_BUILTIN_PSLLWI
,
24288 IX86_BUILTIN_PSLLDI
,
24289 IX86_BUILTIN_PSLLQI
,
24290 IX86_BUILTIN_PSRAWI
,
24291 IX86_BUILTIN_PSRADI
,
24292 IX86_BUILTIN_PSRLWI
,
24293 IX86_BUILTIN_PSRLDI
,
24294 IX86_BUILTIN_PSRLQI
,
24296 IX86_BUILTIN_PUNPCKHBW
,
24297 IX86_BUILTIN_PUNPCKHWD
,
24298 IX86_BUILTIN_PUNPCKHDQ
,
24299 IX86_BUILTIN_PUNPCKLBW
,
24300 IX86_BUILTIN_PUNPCKLWD
,
24301 IX86_BUILTIN_PUNPCKLDQ
,
24303 IX86_BUILTIN_SHUFPS
,
24305 IX86_BUILTIN_RCPPS
,
24306 IX86_BUILTIN_RCPSS
,
24307 IX86_BUILTIN_RSQRTPS
,
24308 IX86_BUILTIN_RSQRTPS_NR
,
24309 IX86_BUILTIN_RSQRTSS
,
24310 IX86_BUILTIN_RSQRTF
,
24311 IX86_BUILTIN_SQRTPS
,
24312 IX86_BUILTIN_SQRTPS_NR
,
24313 IX86_BUILTIN_SQRTSS
,
24315 IX86_BUILTIN_UNPCKHPS
,
24316 IX86_BUILTIN_UNPCKLPS
,
24318 IX86_BUILTIN_ANDPS
,
24319 IX86_BUILTIN_ANDNPS
,
24321 IX86_BUILTIN_XORPS
,
24324 IX86_BUILTIN_LDMXCSR
,
24325 IX86_BUILTIN_STMXCSR
,
24326 IX86_BUILTIN_SFENCE
,
24328 /* 3DNow! Original */
24329 IX86_BUILTIN_FEMMS
,
24330 IX86_BUILTIN_PAVGUSB
,
24331 IX86_BUILTIN_PF2ID
,
24332 IX86_BUILTIN_PFACC
,
24333 IX86_BUILTIN_PFADD
,
24334 IX86_BUILTIN_PFCMPEQ
,
24335 IX86_BUILTIN_PFCMPGE
,
24336 IX86_BUILTIN_PFCMPGT
,
24337 IX86_BUILTIN_PFMAX
,
24338 IX86_BUILTIN_PFMIN
,
24339 IX86_BUILTIN_PFMUL
,
24340 IX86_BUILTIN_PFRCP
,
24341 IX86_BUILTIN_PFRCPIT1
,
24342 IX86_BUILTIN_PFRCPIT2
,
24343 IX86_BUILTIN_PFRSQIT1
,
24344 IX86_BUILTIN_PFRSQRT
,
24345 IX86_BUILTIN_PFSUB
,
24346 IX86_BUILTIN_PFSUBR
,
24347 IX86_BUILTIN_PI2FD
,
24348 IX86_BUILTIN_PMULHRW
,
24350 /* 3DNow! Athlon Extensions */
24351 IX86_BUILTIN_PF2IW
,
24352 IX86_BUILTIN_PFNACC
,
24353 IX86_BUILTIN_PFPNACC
,
24354 IX86_BUILTIN_PI2FW
,
24355 IX86_BUILTIN_PSWAPDSI
,
24356 IX86_BUILTIN_PSWAPDSF
,
24359 IX86_BUILTIN_ADDPD
,
24360 IX86_BUILTIN_ADDSD
,
24361 IX86_BUILTIN_DIVPD
,
24362 IX86_BUILTIN_DIVSD
,
24363 IX86_BUILTIN_MULPD
,
24364 IX86_BUILTIN_MULSD
,
24365 IX86_BUILTIN_SUBPD
,
24366 IX86_BUILTIN_SUBSD
,
24368 IX86_BUILTIN_CMPEQPD
,
24369 IX86_BUILTIN_CMPLTPD
,
24370 IX86_BUILTIN_CMPLEPD
,
24371 IX86_BUILTIN_CMPGTPD
,
24372 IX86_BUILTIN_CMPGEPD
,
24373 IX86_BUILTIN_CMPNEQPD
,
24374 IX86_BUILTIN_CMPNLTPD
,
24375 IX86_BUILTIN_CMPNLEPD
,
24376 IX86_BUILTIN_CMPNGTPD
,
24377 IX86_BUILTIN_CMPNGEPD
,
24378 IX86_BUILTIN_CMPORDPD
,
24379 IX86_BUILTIN_CMPUNORDPD
,
24380 IX86_BUILTIN_CMPEQSD
,
24381 IX86_BUILTIN_CMPLTSD
,
24382 IX86_BUILTIN_CMPLESD
,
24383 IX86_BUILTIN_CMPNEQSD
,
24384 IX86_BUILTIN_CMPNLTSD
,
24385 IX86_BUILTIN_CMPNLESD
,
24386 IX86_BUILTIN_CMPORDSD
,
24387 IX86_BUILTIN_CMPUNORDSD
,
24389 IX86_BUILTIN_COMIEQSD
,
24390 IX86_BUILTIN_COMILTSD
,
24391 IX86_BUILTIN_COMILESD
,
24392 IX86_BUILTIN_COMIGTSD
,
24393 IX86_BUILTIN_COMIGESD
,
24394 IX86_BUILTIN_COMINEQSD
,
24395 IX86_BUILTIN_UCOMIEQSD
,
24396 IX86_BUILTIN_UCOMILTSD
,
24397 IX86_BUILTIN_UCOMILESD
,
24398 IX86_BUILTIN_UCOMIGTSD
,
24399 IX86_BUILTIN_UCOMIGESD
,
24400 IX86_BUILTIN_UCOMINEQSD
,
24402 IX86_BUILTIN_MAXPD
,
24403 IX86_BUILTIN_MAXSD
,
24404 IX86_BUILTIN_MINPD
,
24405 IX86_BUILTIN_MINSD
,
24407 IX86_BUILTIN_ANDPD
,
24408 IX86_BUILTIN_ANDNPD
,
24410 IX86_BUILTIN_XORPD
,
24412 IX86_BUILTIN_SQRTPD
,
24413 IX86_BUILTIN_SQRTSD
,
24415 IX86_BUILTIN_UNPCKHPD
,
24416 IX86_BUILTIN_UNPCKLPD
,
24418 IX86_BUILTIN_SHUFPD
,
24420 IX86_BUILTIN_LOADUPD
,
24421 IX86_BUILTIN_STOREUPD
,
24422 IX86_BUILTIN_MOVSD
,
24424 IX86_BUILTIN_LOADHPD
,
24425 IX86_BUILTIN_LOADLPD
,
24427 IX86_BUILTIN_CVTDQ2PD
,
24428 IX86_BUILTIN_CVTDQ2PS
,
24430 IX86_BUILTIN_CVTPD2DQ
,
24431 IX86_BUILTIN_CVTPD2PI
,
24432 IX86_BUILTIN_CVTPD2PS
,
24433 IX86_BUILTIN_CVTTPD2DQ
,
24434 IX86_BUILTIN_CVTTPD2PI
,
24436 IX86_BUILTIN_CVTPI2PD
,
24437 IX86_BUILTIN_CVTSI2SD
,
24438 IX86_BUILTIN_CVTSI642SD
,
24440 IX86_BUILTIN_CVTSD2SI
,
24441 IX86_BUILTIN_CVTSD2SI64
,
24442 IX86_BUILTIN_CVTSD2SS
,
24443 IX86_BUILTIN_CVTSS2SD
,
24444 IX86_BUILTIN_CVTTSD2SI
,
24445 IX86_BUILTIN_CVTTSD2SI64
,
24447 IX86_BUILTIN_CVTPS2DQ
,
24448 IX86_BUILTIN_CVTPS2PD
,
24449 IX86_BUILTIN_CVTTPS2DQ
,
24451 IX86_BUILTIN_MOVNTI
,
24452 IX86_BUILTIN_MOVNTPD
,
24453 IX86_BUILTIN_MOVNTDQ
,
24455 IX86_BUILTIN_MOVQ128
,
24458 IX86_BUILTIN_MASKMOVDQU
,
24459 IX86_BUILTIN_MOVMSKPD
,
24460 IX86_BUILTIN_PMOVMSKB128
,
24462 IX86_BUILTIN_PACKSSWB128
,
24463 IX86_BUILTIN_PACKSSDW128
,
24464 IX86_BUILTIN_PACKUSWB128
,
24466 IX86_BUILTIN_PADDB128
,
24467 IX86_BUILTIN_PADDW128
,
24468 IX86_BUILTIN_PADDD128
,
24469 IX86_BUILTIN_PADDQ128
,
24470 IX86_BUILTIN_PADDSB128
,
24471 IX86_BUILTIN_PADDSW128
,
24472 IX86_BUILTIN_PADDUSB128
,
24473 IX86_BUILTIN_PADDUSW128
,
24474 IX86_BUILTIN_PSUBB128
,
24475 IX86_BUILTIN_PSUBW128
,
24476 IX86_BUILTIN_PSUBD128
,
24477 IX86_BUILTIN_PSUBQ128
,
24478 IX86_BUILTIN_PSUBSB128
,
24479 IX86_BUILTIN_PSUBSW128
,
24480 IX86_BUILTIN_PSUBUSB128
,
24481 IX86_BUILTIN_PSUBUSW128
,
24483 IX86_BUILTIN_PAND128
,
24484 IX86_BUILTIN_PANDN128
,
24485 IX86_BUILTIN_POR128
,
24486 IX86_BUILTIN_PXOR128
,
24488 IX86_BUILTIN_PAVGB128
,
24489 IX86_BUILTIN_PAVGW128
,
24491 IX86_BUILTIN_PCMPEQB128
,
24492 IX86_BUILTIN_PCMPEQW128
,
24493 IX86_BUILTIN_PCMPEQD128
,
24494 IX86_BUILTIN_PCMPGTB128
,
24495 IX86_BUILTIN_PCMPGTW128
,
24496 IX86_BUILTIN_PCMPGTD128
,
24498 IX86_BUILTIN_PMADDWD128
,
24500 IX86_BUILTIN_PMAXSW128
,
24501 IX86_BUILTIN_PMAXUB128
,
24502 IX86_BUILTIN_PMINSW128
,
24503 IX86_BUILTIN_PMINUB128
,
24505 IX86_BUILTIN_PMULUDQ
,
24506 IX86_BUILTIN_PMULUDQ128
,
24507 IX86_BUILTIN_PMULHUW128
,
24508 IX86_BUILTIN_PMULHW128
,
24509 IX86_BUILTIN_PMULLW128
,
24511 IX86_BUILTIN_PSADBW128
,
24512 IX86_BUILTIN_PSHUFHW
,
24513 IX86_BUILTIN_PSHUFLW
,
24514 IX86_BUILTIN_PSHUFD
,
24516 IX86_BUILTIN_PSLLDQI128
,
24517 IX86_BUILTIN_PSLLWI128
,
24518 IX86_BUILTIN_PSLLDI128
,
24519 IX86_BUILTIN_PSLLQI128
,
24520 IX86_BUILTIN_PSRAWI128
,
24521 IX86_BUILTIN_PSRADI128
,
24522 IX86_BUILTIN_PSRLDQI128
,
24523 IX86_BUILTIN_PSRLWI128
,
24524 IX86_BUILTIN_PSRLDI128
,
24525 IX86_BUILTIN_PSRLQI128
,
24527 IX86_BUILTIN_PSLLDQ128
,
24528 IX86_BUILTIN_PSLLW128
,
24529 IX86_BUILTIN_PSLLD128
,
24530 IX86_BUILTIN_PSLLQ128
,
24531 IX86_BUILTIN_PSRAW128
,
24532 IX86_BUILTIN_PSRAD128
,
24533 IX86_BUILTIN_PSRLW128
,
24534 IX86_BUILTIN_PSRLD128
,
24535 IX86_BUILTIN_PSRLQ128
,
24537 IX86_BUILTIN_PUNPCKHBW128
,
24538 IX86_BUILTIN_PUNPCKHWD128
,
24539 IX86_BUILTIN_PUNPCKHDQ128
,
24540 IX86_BUILTIN_PUNPCKHQDQ128
,
24541 IX86_BUILTIN_PUNPCKLBW128
,
24542 IX86_BUILTIN_PUNPCKLWD128
,
24543 IX86_BUILTIN_PUNPCKLDQ128
,
24544 IX86_BUILTIN_PUNPCKLQDQ128
,
24546 IX86_BUILTIN_CLFLUSH
,
24547 IX86_BUILTIN_MFENCE
,
24548 IX86_BUILTIN_LFENCE
,
24549 IX86_BUILTIN_PAUSE
,
24551 IX86_BUILTIN_BSRSI
,
24552 IX86_BUILTIN_BSRDI
,
24553 IX86_BUILTIN_RDPMC
,
24554 IX86_BUILTIN_RDTSC
,
24555 IX86_BUILTIN_RDTSCP
,
24556 IX86_BUILTIN_ROLQI
,
24557 IX86_BUILTIN_ROLHI
,
24558 IX86_BUILTIN_RORQI
,
24559 IX86_BUILTIN_RORHI
,
24562 IX86_BUILTIN_ADDSUBPS
,
24563 IX86_BUILTIN_HADDPS
,
24564 IX86_BUILTIN_HSUBPS
,
24565 IX86_BUILTIN_MOVSHDUP
,
24566 IX86_BUILTIN_MOVSLDUP
,
24567 IX86_BUILTIN_ADDSUBPD
,
24568 IX86_BUILTIN_HADDPD
,
24569 IX86_BUILTIN_HSUBPD
,
24570 IX86_BUILTIN_LDDQU
,
24572 IX86_BUILTIN_MONITOR
,
24573 IX86_BUILTIN_MWAIT
,
24576 IX86_BUILTIN_PHADDW
,
24577 IX86_BUILTIN_PHADDD
,
24578 IX86_BUILTIN_PHADDSW
,
24579 IX86_BUILTIN_PHSUBW
,
24580 IX86_BUILTIN_PHSUBD
,
24581 IX86_BUILTIN_PHSUBSW
,
24582 IX86_BUILTIN_PMADDUBSW
,
24583 IX86_BUILTIN_PMULHRSW
,
24584 IX86_BUILTIN_PSHUFB
,
24585 IX86_BUILTIN_PSIGNB
,
24586 IX86_BUILTIN_PSIGNW
,
24587 IX86_BUILTIN_PSIGND
,
24588 IX86_BUILTIN_PALIGNR
,
24589 IX86_BUILTIN_PABSB
,
24590 IX86_BUILTIN_PABSW
,
24591 IX86_BUILTIN_PABSD
,
24593 IX86_BUILTIN_PHADDW128
,
24594 IX86_BUILTIN_PHADDD128
,
24595 IX86_BUILTIN_PHADDSW128
,
24596 IX86_BUILTIN_PHSUBW128
,
24597 IX86_BUILTIN_PHSUBD128
,
24598 IX86_BUILTIN_PHSUBSW128
,
24599 IX86_BUILTIN_PMADDUBSW128
,
24600 IX86_BUILTIN_PMULHRSW128
,
24601 IX86_BUILTIN_PSHUFB128
,
24602 IX86_BUILTIN_PSIGNB128
,
24603 IX86_BUILTIN_PSIGNW128
,
24604 IX86_BUILTIN_PSIGND128
,
24605 IX86_BUILTIN_PALIGNR128
,
24606 IX86_BUILTIN_PABSB128
,
24607 IX86_BUILTIN_PABSW128
,
24608 IX86_BUILTIN_PABSD128
,
24610 /* AMDFAM10 - SSE4A New Instructions. */
24611 IX86_BUILTIN_MOVNTSD
,
24612 IX86_BUILTIN_MOVNTSS
,
24613 IX86_BUILTIN_EXTRQI
,
24614 IX86_BUILTIN_EXTRQ
,
24615 IX86_BUILTIN_INSERTQI
,
24616 IX86_BUILTIN_INSERTQ
,
24619 IX86_BUILTIN_BLENDPD
,
24620 IX86_BUILTIN_BLENDPS
,
24621 IX86_BUILTIN_BLENDVPD
,
24622 IX86_BUILTIN_BLENDVPS
,
24623 IX86_BUILTIN_PBLENDVB128
,
24624 IX86_BUILTIN_PBLENDW128
,
24629 IX86_BUILTIN_INSERTPS128
,
24631 IX86_BUILTIN_MOVNTDQA
,
24632 IX86_BUILTIN_MPSADBW128
,
24633 IX86_BUILTIN_PACKUSDW128
,
24634 IX86_BUILTIN_PCMPEQQ
,
24635 IX86_BUILTIN_PHMINPOSUW128
,
24637 IX86_BUILTIN_PMAXSB128
,
24638 IX86_BUILTIN_PMAXSD128
,
24639 IX86_BUILTIN_PMAXUD128
,
24640 IX86_BUILTIN_PMAXUW128
,
24642 IX86_BUILTIN_PMINSB128
,
24643 IX86_BUILTIN_PMINSD128
,
24644 IX86_BUILTIN_PMINUD128
,
24645 IX86_BUILTIN_PMINUW128
,
24647 IX86_BUILTIN_PMOVSXBW128
,
24648 IX86_BUILTIN_PMOVSXBD128
,
24649 IX86_BUILTIN_PMOVSXBQ128
,
24650 IX86_BUILTIN_PMOVSXWD128
,
24651 IX86_BUILTIN_PMOVSXWQ128
,
24652 IX86_BUILTIN_PMOVSXDQ128
,
24654 IX86_BUILTIN_PMOVZXBW128
,
24655 IX86_BUILTIN_PMOVZXBD128
,
24656 IX86_BUILTIN_PMOVZXBQ128
,
24657 IX86_BUILTIN_PMOVZXWD128
,
24658 IX86_BUILTIN_PMOVZXWQ128
,
24659 IX86_BUILTIN_PMOVZXDQ128
,
24661 IX86_BUILTIN_PMULDQ128
,
24662 IX86_BUILTIN_PMULLD128
,
24664 IX86_BUILTIN_ROUNDPD
,
24665 IX86_BUILTIN_ROUNDPS
,
24666 IX86_BUILTIN_ROUNDSD
,
24667 IX86_BUILTIN_ROUNDSS
,
24669 IX86_BUILTIN_FLOORPD
,
24670 IX86_BUILTIN_CEILPD
,
24671 IX86_BUILTIN_TRUNCPD
,
24672 IX86_BUILTIN_RINTPD
,
24673 IX86_BUILTIN_ROUNDPD_AZ
,
24674 IX86_BUILTIN_FLOORPS
,
24675 IX86_BUILTIN_CEILPS
,
24676 IX86_BUILTIN_TRUNCPS
,
24677 IX86_BUILTIN_RINTPS
,
24678 IX86_BUILTIN_ROUNDPS_AZ
,
24680 IX86_BUILTIN_PTESTZ
,
24681 IX86_BUILTIN_PTESTC
,
24682 IX86_BUILTIN_PTESTNZC
,
24684 IX86_BUILTIN_VEC_INIT_V2SI
,
24685 IX86_BUILTIN_VEC_INIT_V4HI
,
24686 IX86_BUILTIN_VEC_INIT_V8QI
,
24687 IX86_BUILTIN_VEC_EXT_V2DF
,
24688 IX86_BUILTIN_VEC_EXT_V2DI
,
24689 IX86_BUILTIN_VEC_EXT_V4SF
,
24690 IX86_BUILTIN_VEC_EXT_V4SI
,
24691 IX86_BUILTIN_VEC_EXT_V8HI
,
24692 IX86_BUILTIN_VEC_EXT_V2SI
,
24693 IX86_BUILTIN_VEC_EXT_V4HI
,
24694 IX86_BUILTIN_VEC_EXT_V16QI
,
24695 IX86_BUILTIN_VEC_SET_V2DI
,
24696 IX86_BUILTIN_VEC_SET_V4SF
,
24697 IX86_BUILTIN_VEC_SET_V4SI
,
24698 IX86_BUILTIN_VEC_SET_V8HI
,
24699 IX86_BUILTIN_VEC_SET_V4HI
,
24700 IX86_BUILTIN_VEC_SET_V16QI
,
24702 IX86_BUILTIN_VEC_PACK_SFIX
,
24705 IX86_BUILTIN_CRC32QI
,
24706 IX86_BUILTIN_CRC32HI
,
24707 IX86_BUILTIN_CRC32SI
,
24708 IX86_BUILTIN_CRC32DI
,
24710 IX86_BUILTIN_PCMPESTRI128
,
24711 IX86_BUILTIN_PCMPESTRM128
,
24712 IX86_BUILTIN_PCMPESTRA128
,
24713 IX86_BUILTIN_PCMPESTRC128
,
24714 IX86_BUILTIN_PCMPESTRO128
,
24715 IX86_BUILTIN_PCMPESTRS128
,
24716 IX86_BUILTIN_PCMPESTRZ128
,
24717 IX86_BUILTIN_PCMPISTRI128
,
24718 IX86_BUILTIN_PCMPISTRM128
,
24719 IX86_BUILTIN_PCMPISTRA128
,
24720 IX86_BUILTIN_PCMPISTRC128
,
24721 IX86_BUILTIN_PCMPISTRO128
,
24722 IX86_BUILTIN_PCMPISTRS128
,
24723 IX86_BUILTIN_PCMPISTRZ128
,
24725 IX86_BUILTIN_PCMPGTQ
,
24727 /* AES instructions */
24728 IX86_BUILTIN_AESENC128
,
24729 IX86_BUILTIN_AESENCLAST128
,
24730 IX86_BUILTIN_AESDEC128
,
24731 IX86_BUILTIN_AESDECLAST128
,
24732 IX86_BUILTIN_AESIMC128
,
24733 IX86_BUILTIN_AESKEYGENASSIST128
,
24735 /* PCLMUL instruction */
24736 IX86_BUILTIN_PCLMULQDQ128
,
24739 IX86_BUILTIN_ADDPD256
,
24740 IX86_BUILTIN_ADDPS256
,
24741 IX86_BUILTIN_ADDSUBPD256
,
24742 IX86_BUILTIN_ADDSUBPS256
,
24743 IX86_BUILTIN_ANDPD256
,
24744 IX86_BUILTIN_ANDPS256
,
24745 IX86_BUILTIN_ANDNPD256
,
24746 IX86_BUILTIN_ANDNPS256
,
24747 IX86_BUILTIN_BLENDPD256
,
24748 IX86_BUILTIN_BLENDPS256
,
24749 IX86_BUILTIN_BLENDVPD256
,
24750 IX86_BUILTIN_BLENDVPS256
,
24751 IX86_BUILTIN_DIVPD256
,
24752 IX86_BUILTIN_DIVPS256
,
24753 IX86_BUILTIN_DPPS256
,
24754 IX86_BUILTIN_HADDPD256
,
24755 IX86_BUILTIN_HADDPS256
,
24756 IX86_BUILTIN_HSUBPD256
,
24757 IX86_BUILTIN_HSUBPS256
,
24758 IX86_BUILTIN_MAXPD256
,
24759 IX86_BUILTIN_MAXPS256
,
24760 IX86_BUILTIN_MINPD256
,
24761 IX86_BUILTIN_MINPS256
,
24762 IX86_BUILTIN_MULPD256
,
24763 IX86_BUILTIN_MULPS256
,
24764 IX86_BUILTIN_ORPD256
,
24765 IX86_BUILTIN_ORPS256
,
24766 IX86_BUILTIN_SHUFPD256
,
24767 IX86_BUILTIN_SHUFPS256
,
24768 IX86_BUILTIN_SUBPD256
,
24769 IX86_BUILTIN_SUBPS256
,
24770 IX86_BUILTIN_XORPD256
,
24771 IX86_BUILTIN_XORPS256
,
24772 IX86_BUILTIN_CMPSD
,
24773 IX86_BUILTIN_CMPSS
,
24774 IX86_BUILTIN_CMPPD
,
24775 IX86_BUILTIN_CMPPS
,
24776 IX86_BUILTIN_CMPPD256
,
24777 IX86_BUILTIN_CMPPS256
,
24778 IX86_BUILTIN_CVTDQ2PD256
,
24779 IX86_BUILTIN_CVTDQ2PS256
,
24780 IX86_BUILTIN_CVTPD2PS256
,
24781 IX86_BUILTIN_CVTPS2DQ256
,
24782 IX86_BUILTIN_CVTPS2PD256
,
24783 IX86_BUILTIN_CVTTPD2DQ256
,
24784 IX86_BUILTIN_CVTPD2DQ256
,
24785 IX86_BUILTIN_CVTTPS2DQ256
,
24786 IX86_BUILTIN_EXTRACTF128PD256
,
24787 IX86_BUILTIN_EXTRACTF128PS256
,
24788 IX86_BUILTIN_EXTRACTF128SI256
,
24789 IX86_BUILTIN_VZEROALL
,
24790 IX86_BUILTIN_VZEROUPPER
,
24791 IX86_BUILTIN_VPERMILVARPD
,
24792 IX86_BUILTIN_VPERMILVARPS
,
24793 IX86_BUILTIN_VPERMILVARPD256
,
24794 IX86_BUILTIN_VPERMILVARPS256
,
24795 IX86_BUILTIN_VPERMILPD
,
24796 IX86_BUILTIN_VPERMILPS
,
24797 IX86_BUILTIN_VPERMILPD256
,
24798 IX86_BUILTIN_VPERMILPS256
,
24799 IX86_BUILTIN_VPERMIL2PD
,
24800 IX86_BUILTIN_VPERMIL2PS
,
24801 IX86_BUILTIN_VPERMIL2PD256
,
24802 IX86_BUILTIN_VPERMIL2PS256
,
24803 IX86_BUILTIN_VPERM2F128PD256
,
24804 IX86_BUILTIN_VPERM2F128PS256
,
24805 IX86_BUILTIN_VPERM2F128SI256
,
24806 IX86_BUILTIN_VBROADCASTSS
,
24807 IX86_BUILTIN_VBROADCASTSD256
,
24808 IX86_BUILTIN_VBROADCASTSS256
,
24809 IX86_BUILTIN_VBROADCASTPD256
,
24810 IX86_BUILTIN_VBROADCASTPS256
,
24811 IX86_BUILTIN_VINSERTF128PD256
,
24812 IX86_BUILTIN_VINSERTF128PS256
,
24813 IX86_BUILTIN_VINSERTF128SI256
,
24814 IX86_BUILTIN_LOADUPD256
,
24815 IX86_BUILTIN_LOADUPS256
,
24816 IX86_BUILTIN_STOREUPD256
,
24817 IX86_BUILTIN_STOREUPS256
,
24818 IX86_BUILTIN_LDDQU256
,
24819 IX86_BUILTIN_MOVNTDQ256
,
24820 IX86_BUILTIN_MOVNTPD256
,
24821 IX86_BUILTIN_MOVNTPS256
,
24822 IX86_BUILTIN_LOADDQU256
,
24823 IX86_BUILTIN_STOREDQU256
,
24824 IX86_BUILTIN_MASKLOADPD
,
24825 IX86_BUILTIN_MASKLOADPS
,
24826 IX86_BUILTIN_MASKSTOREPD
,
24827 IX86_BUILTIN_MASKSTOREPS
,
24828 IX86_BUILTIN_MASKLOADPD256
,
24829 IX86_BUILTIN_MASKLOADPS256
,
24830 IX86_BUILTIN_MASKSTOREPD256
,
24831 IX86_BUILTIN_MASKSTOREPS256
,
24832 IX86_BUILTIN_MOVSHDUP256
,
24833 IX86_BUILTIN_MOVSLDUP256
,
24834 IX86_BUILTIN_MOVDDUP256
,
24836 IX86_BUILTIN_SQRTPD256
,
24837 IX86_BUILTIN_SQRTPS256
,
24838 IX86_BUILTIN_SQRTPS_NR256
,
24839 IX86_BUILTIN_RSQRTPS256
,
24840 IX86_BUILTIN_RSQRTPS_NR256
,
24842 IX86_BUILTIN_RCPPS256
,
24844 IX86_BUILTIN_ROUNDPD256
,
24845 IX86_BUILTIN_ROUNDPS256
,
24847 IX86_BUILTIN_FLOORPD256
,
24848 IX86_BUILTIN_CEILPD256
,
24849 IX86_BUILTIN_TRUNCPD256
,
24850 IX86_BUILTIN_RINTPD256
,
24851 IX86_BUILTIN_ROUNDPD_AZ256
,
24852 IX86_BUILTIN_FLOORPS256
,
24853 IX86_BUILTIN_CEILPS256
,
24854 IX86_BUILTIN_TRUNCPS256
,
24855 IX86_BUILTIN_RINTPS256
,
24856 IX86_BUILTIN_ROUNDPS_AZ256
,
24858 IX86_BUILTIN_UNPCKHPD256
,
24859 IX86_BUILTIN_UNPCKLPD256
,
24860 IX86_BUILTIN_UNPCKHPS256
,
24861 IX86_BUILTIN_UNPCKLPS256
,
24863 IX86_BUILTIN_SI256_SI
,
24864 IX86_BUILTIN_PS256_PS
,
24865 IX86_BUILTIN_PD256_PD
,
24866 IX86_BUILTIN_SI_SI256
,
24867 IX86_BUILTIN_PS_PS256
,
24868 IX86_BUILTIN_PD_PD256
,
24870 IX86_BUILTIN_VTESTZPD
,
24871 IX86_BUILTIN_VTESTCPD
,
24872 IX86_BUILTIN_VTESTNZCPD
,
24873 IX86_BUILTIN_VTESTZPS
,
24874 IX86_BUILTIN_VTESTCPS
,
24875 IX86_BUILTIN_VTESTNZCPS
,
24876 IX86_BUILTIN_VTESTZPD256
,
24877 IX86_BUILTIN_VTESTCPD256
,
24878 IX86_BUILTIN_VTESTNZCPD256
,
24879 IX86_BUILTIN_VTESTZPS256
,
24880 IX86_BUILTIN_VTESTCPS256
,
24881 IX86_BUILTIN_VTESTNZCPS256
,
24882 IX86_BUILTIN_PTESTZ256
,
24883 IX86_BUILTIN_PTESTC256
,
24884 IX86_BUILTIN_PTESTNZC256
,
24886 IX86_BUILTIN_MOVMSKPD256
,
24887 IX86_BUILTIN_MOVMSKPS256
,
24890 IX86_BUILTIN_MPSADBW256
,
24891 IX86_BUILTIN_PABSB256
,
24892 IX86_BUILTIN_PABSW256
,
24893 IX86_BUILTIN_PABSD256
,
24894 IX86_BUILTIN_PACKSSDW256
,
24895 IX86_BUILTIN_PACKSSWB256
,
24896 IX86_BUILTIN_PACKUSDW256
,
24897 IX86_BUILTIN_PACKUSWB256
,
24898 IX86_BUILTIN_PADDB256
,
24899 IX86_BUILTIN_PADDW256
,
24900 IX86_BUILTIN_PADDD256
,
24901 IX86_BUILTIN_PADDQ256
,
24902 IX86_BUILTIN_PADDSB256
,
24903 IX86_BUILTIN_PADDSW256
,
24904 IX86_BUILTIN_PADDUSB256
,
24905 IX86_BUILTIN_PADDUSW256
,
24906 IX86_BUILTIN_PALIGNR256
,
24907 IX86_BUILTIN_AND256I
,
24908 IX86_BUILTIN_ANDNOT256I
,
24909 IX86_BUILTIN_PAVGB256
,
24910 IX86_BUILTIN_PAVGW256
,
24911 IX86_BUILTIN_PBLENDVB256
,
24912 IX86_BUILTIN_PBLENDVW256
,
24913 IX86_BUILTIN_PCMPEQB256
,
24914 IX86_BUILTIN_PCMPEQW256
,
24915 IX86_BUILTIN_PCMPEQD256
,
24916 IX86_BUILTIN_PCMPEQQ256
,
24917 IX86_BUILTIN_PCMPGTB256
,
24918 IX86_BUILTIN_PCMPGTW256
,
24919 IX86_BUILTIN_PCMPGTD256
,
24920 IX86_BUILTIN_PCMPGTQ256
,
24921 IX86_BUILTIN_PHADDW256
,
24922 IX86_BUILTIN_PHADDD256
,
24923 IX86_BUILTIN_PHADDSW256
,
24924 IX86_BUILTIN_PHSUBW256
,
24925 IX86_BUILTIN_PHSUBD256
,
24926 IX86_BUILTIN_PHSUBSW256
,
24927 IX86_BUILTIN_PMADDUBSW256
,
24928 IX86_BUILTIN_PMADDWD256
,
24929 IX86_BUILTIN_PMAXSB256
,
24930 IX86_BUILTIN_PMAXSW256
,
24931 IX86_BUILTIN_PMAXSD256
,
24932 IX86_BUILTIN_PMAXUB256
,
24933 IX86_BUILTIN_PMAXUW256
,
24934 IX86_BUILTIN_PMAXUD256
,
24935 IX86_BUILTIN_PMINSB256
,
24936 IX86_BUILTIN_PMINSW256
,
24937 IX86_BUILTIN_PMINSD256
,
24938 IX86_BUILTIN_PMINUB256
,
24939 IX86_BUILTIN_PMINUW256
,
24940 IX86_BUILTIN_PMINUD256
,
24941 IX86_BUILTIN_PMOVMSKB256
,
24942 IX86_BUILTIN_PMOVSXBW256
,
24943 IX86_BUILTIN_PMOVSXBD256
,
24944 IX86_BUILTIN_PMOVSXBQ256
,
24945 IX86_BUILTIN_PMOVSXWD256
,
24946 IX86_BUILTIN_PMOVSXWQ256
,
24947 IX86_BUILTIN_PMOVSXDQ256
,
24948 IX86_BUILTIN_PMOVZXBW256
,
24949 IX86_BUILTIN_PMOVZXBD256
,
24950 IX86_BUILTIN_PMOVZXBQ256
,
24951 IX86_BUILTIN_PMOVZXWD256
,
24952 IX86_BUILTIN_PMOVZXWQ256
,
24953 IX86_BUILTIN_PMOVZXDQ256
,
24954 IX86_BUILTIN_PMULDQ256
,
24955 IX86_BUILTIN_PMULHRSW256
,
24956 IX86_BUILTIN_PMULHUW256
,
24957 IX86_BUILTIN_PMULHW256
,
24958 IX86_BUILTIN_PMULLW256
,
24959 IX86_BUILTIN_PMULLD256
,
24960 IX86_BUILTIN_PMULUDQ256
,
24961 IX86_BUILTIN_POR256
,
24962 IX86_BUILTIN_PSADBW256
,
24963 IX86_BUILTIN_PSHUFB256
,
24964 IX86_BUILTIN_PSHUFD256
,
24965 IX86_BUILTIN_PSHUFHW256
,
24966 IX86_BUILTIN_PSHUFLW256
,
24967 IX86_BUILTIN_PSIGNB256
,
24968 IX86_BUILTIN_PSIGNW256
,
24969 IX86_BUILTIN_PSIGND256
,
24970 IX86_BUILTIN_PSLLDQI256
,
24971 IX86_BUILTIN_PSLLWI256
,
24972 IX86_BUILTIN_PSLLW256
,
24973 IX86_BUILTIN_PSLLDI256
,
24974 IX86_BUILTIN_PSLLD256
,
24975 IX86_BUILTIN_PSLLQI256
,
24976 IX86_BUILTIN_PSLLQ256
,
24977 IX86_BUILTIN_PSRAWI256
,
24978 IX86_BUILTIN_PSRAW256
,
24979 IX86_BUILTIN_PSRADI256
,
24980 IX86_BUILTIN_PSRAD256
,
24981 IX86_BUILTIN_PSRLDQI256
,
24982 IX86_BUILTIN_PSRLWI256
,
24983 IX86_BUILTIN_PSRLW256
,
24984 IX86_BUILTIN_PSRLDI256
,
24985 IX86_BUILTIN_PSRLD256
,
24986 IX86_BUILTIN_PSRLQI256
,
24987 IX86_BUILTIN_PSRLQ256
,
24988 IX86_BUILTIN_PSUBB256
,
24989 IX86_BUILTIN_PSUBW256
,
24990 IX86_BUILTIN_PSUBD256
,
24991 IX86_BUILTIN_PSUBQ256
,
24992 IX86_BUILTIN_PSUBSB256
,
24993 IX86_BUILTIN_PSUBSW256
,
24994 IX86_BUILTIN_PSUBUSB256
,
24995 IX86_BUILTIN_PSUBUSW256
,
24996 IX86_BUILTIN_PUNPCKHBW256
,
24997 IX86_BUILTIN_PUNPCKHWD256
,
24998 IX86_BUILTIN_PUNPCKHDQ256
,
24999 IX86_BUILTIN_PUNPCKHQDQ256
,
25000 IX86_BUILTIN_PUNPCKLBW256
,
25001 IX86_BUILTIN_PUNPCKLWD256
,
25002 IX86_BUILTIN_PUNPCKLDQ256
,
25003 IX86_BUILTIN_PUNPCKLQDQ256
,
25004 IX86_BUILTIN_PXOR256
,
25005 IX86_BUILTIN_MOVNTDQA256
,
25006 IX86_BUILTIN_VBROADCASTSS_PS
,
25007 IX86_BUILTIN_VBROADCASTSS_PS256
,
25008 IX86_BUILTIN_VBROADCASTSD_PD256
,
25009 IX86_BUILTIN_VBROADCASTSI256
,
25010 IX86_BUILTIN_PBLENDD256
,
25011 IX86_BUILTIN_PBLENDD128
,
25012 IX86_BUILTIN_PBROADCASTB256
,
25013 IX86_BUILTIN_PBROADCASTW256
,
25014 IX86_BUILTIN_PBROADCASTD256
,
25015 IX86_BUILTIN_PBROADCASTQ256
,
25016 IX86_BUILTIN_PBROADCASTB128
,
25017 IX86_BUILTIN_PBROADCASTW128
,
25018 IX86_BUILTIN_PBROADCASTD128
,
25019 IX86_BUILTIN_PBROADCASTQ128
,
25020 IX86_BUILTIN_VPERMVARSI256
,
25021 IX86_BUILTIN_VPERMDF256
,
25022 IX86_BUILTIN_VPERMVARSF256
,
25023 IX86_BUILTIN_VPERMDI256
,
25024 IX86_BUILTIN_VPERMTI256
,
25025 IX86_BUILTIN_VEXTRACT128I256
,
25026 IX86_BUILTIN_VINSERT128I256
,
25027 IX86_BUILTIN_MASKLOADD
,
25028 IX86_BUILTIN_MASKLOADQ
,
25029 IX86_BUILTIN_MASKLOADD256
,
25030 IX86_BUILTIN_MASKLOADQ256
,
25031 IX86_BUILTIN_MASKSTORED
,
25032 IX86_BUILTIN_MASKSTOREQ
,
25033 IX86_BUILTIN_MASKSTORED256
,
25034 IX86_BUILTIN_MASKSTOREQ256
,
25035 IX86_BUILTIN_PSLLVV4DI
,
25036 IX86_BUILTIN_PSLLVV2DI
,
25037 IX86_BUILTIN_PSLLVV8SI
,
25038 IX86_BUILTIN_PSLLVV4SI
,
25039 IX86_BUILTIN_PSRAVV8SI
,
25040 IX86_BUILTIN_PSRAVV4SI
,
25041 IX86_BUILTIN_PSRLVV4DI
,
25042 IX86_BUILTIN_PSRLVV2DI
,
25043 IX86_BUILTIN_PSRLVV8SI
,
25044 IX86_BUILTIN_PSRLVV4SI
,
25046 IX86_BUILTIN_GATHERSIV2DF
,
25047 IX86_BUILTIN_GATHERSIV4DF
,
25048 IX86_BUILTIN_GATHERDIV2DF
,
25049 IX86_BUILTIN_GATHERDIV4DF
,
25050 IX86_BUILTIN_GATHERSIV4SF
,
25051 IX86_BUILTIN_GATHERSIV8SF
,
25052 IX86_BUILTIN_GATHERDIV4SF
,
25053 IX86_BUILTIN_GATHERDIV8SF
,
25054 IX86_BUILTIN_GATHERSIV2DI
,
25055 IX86_BUILTIN_GATHERSIV4DI
,
25056 IX86_BUILTIN_GATHERDIV2DI
,
25057 IX86_BUILTIN_GATHERDIV4DI
,
25058 IX86_BUILTIN_GATHERSIV4SI
,
25059 IX86_BUILTIN_GATHERSIV8SI
,
25060 IX86_BUILTIN_GATHERDIV4SI
,
25061 IX86_BUILTIN_GATHERDIV8SI
,
25063 /* TFmode support builtins. */
25065 IX86_BUILTIN_HUGE_VALQ
,
25066 IX86_BUILTIN_FABSQ
,
25067 IX86_BUILTIN_COPYSIGNQ
,
25069 /* Vectorizer support builtins. */
25070 IX86_BUILTIN_CPYSGNPS
,
25071 IX86_BUILTIN_CPYSGNPD
,
25072 IX86_BUILTIN_CPYSGNPS256
,
25073 IX86_BUILTIN_CPYSGNPD256
,
25075 IX86_BUILTIN_CVTUDQ2PS
,
25077 /* FMA4 instructions. */
25078 IX86_BUILTIN_VFMADDSS
,
25079 IX86_BUILTIN_VFMADDSD
,
25080 IX86_BUILTIN_VFMADDPS
,
25081 IX86_BUILTIN_VFMADDPD
,
25082 IX86_BUILTIN_VFMADDPS256
,
25083 IX86_BUILTIN_VFMADDPD256
,
25084 IX86_BUILTIN_VFMADDSUBPS
,
25085 IX86_BUILTIN_VFMADDSUBPD
,
25086 IX86_BUILTIN_VFMADDSUBPS256
,
25087 IX86_BUILTIN_VFMADDSUBPD256
,
25089 /* FMA3 instructions. */
25090 IX86_BUILTIN_VFMADDSS3
,
25091 IX86_BUILTIN_VFMADDSD3
,
25093 /* XOP instructions. */
25094 IX86_BUILTIN_VPCMOV
,
25095 IX86_BUILTIN_VPCMOV_V2DI
,
25096 IX86_BUILTIN_VPCMOV_V4SI
,
25097 IX86_BUILTIN_VPCMOV_V8HI
,
25098 IX86_BUILTIN_VPCMOV_V16QI
,
25099 IX86_BUILTIN_VPCMOV_V4SF
,
25100 IX86_BUILTIN_VPCMOV_V2DF
,
25101 IX86_BUILTIN_VPCMOV256
,
25102 IX86_BUILTIN_VPCMOV_V4DI256
,
25103 IX86_BUILTIN_VPCMOV_V8SI256
,
25104 IX86_BUILTIN_VPCMOV_V16HI256
,
25105 IX86_BUILTIN_VPCMOV_V32QI256
,
25106 IX86_BUILTIN_VPCMOV_V8SF256
,
25107 IX86_BUILTIN_VPCMOV_V4DF256
,
25109 IX86_BUILTIN_VPPERM
,
25111 IX86_BUILTIN_VPMACSSWW
,
25112 IX86_BUILTIN_VPMACSWW
,
25113 IX86_BUILTIN_VPMACSSWD
,
25114 IX86_BUILTIN_VPMACSWD
,
25115 IX86_BUILTIN_VPMACSSDD
,
25116 IX86_BUILTIN_VPMACSDD
,
25117 IX86_BUILTIN_VPMACSSDQL
,
25118 IX86_BUILTIN_VPMACSSDQH
,
25119 IX86_BUILTIN_VPMACSDQL
,
25120 IX86_BUILTIN_VPMACSDQH
,
25121 IX86_BUILTIN_VPMADCSSWD
,
25122 IX86_BUILTIN_VPMADCSWD
,
25124 IX86_BUILTIN_VPHADDBW
,
25125 IX86_BUILTIN_VPHADDBD
,
25126 IX86_BUILTIN_VPHADDBQ
,
25127 IX86_BUILTIN_VPHADDWD
,
25128 IX86_BUILTIN_VPHADDWQ
,
25129 IX86_BUILTIN_VPHADDDQ
,
25130 IX86_BUILTIN_VPHADDUBW
,
25131 IX86_BUILTIN_VPHADDUBD
,
25132 IX86_BUILTIN_VPHADDUBQ
,
25133 IX86_BUILTIN_VPHADDUWD
,
25134 IX86_BUILTIN_VPHADDUWQ
,
25135 IX86_BUILTIN_VPHADDUDQ
,
25136 IX86_BUILTIN_VPHSUBBW
,
25137 IX86_BUILTIN_VPHSUBWD
,
25138 IX86_BUILTIN_VPHSUBDQ
,
25140 IX86_BUILTIN_VPROTB
,
25141 IX86_BUILTIN_VPROTW
,
25142 IX86_BUILTIN_VPROTD
,
25143 IX86_BUILTIN_VPROTQ
,
25144 IX86_BUILTIN_VPROTB_IMM
,
25145 IX86_BUILTIN_VPROTW_IMM
,
25146 IX86_BUILTIN_VPROTD_IMM
,
25147 IX86_BUILTIN_VPROTQ_IMM
,
25149 IX86_BUILTIN_VPSHLB
,
25150 IX86_BUILTIN_VPSHLW
,
25151 IX86_BUILTIN_VPSHLD
,
25152 IX86_BUILTIN_VPSHLQ
,
25153 IX86_BUILTIN_VPSHAB
,
25154 IX86_BUILTIN_VPSHAW
,
25155 IX86_BUILTIN_VPSHAD
,
25156 IX86_BUILTIN_VPSHAQ
,
25158 IX86_BUILTIN_VFRCZSS
,
25159 IX86_BUILTIN_VFRCZSD
,
25160 IX86_BUILTIN_VFRCZPS
,
25161 IX86_BUILTIN_VFRCZPD
,
25162 IX86_BUILTIN_VFRCZPS256
,
25163 IX86_BUILTIN_VFRCZPD256
,
25165 IX86_BUILTIN_VPCOMEQUB
,
25166 IX86_BUILTIN_VPCOMNEUB
,
25167 IX86_BUILTIN_VPCOMLTUB
,
25168 IX86_BUILTIN_VPCOMLEUB
,
25169 IX86_BUILTIN_VPCOMGTUB
,
25170 IX86_BUILTIN_VPCOMGEUB
,
25171 IX86_BUILTIN_VPCOMFALSEUB
,
25172 IX86_BUILTIN_VPCOMTRUEUB
,
25174 IX86_BUILTIN_VPCOMEQUW
,
25175 IX86_BUILTIN_VPCOMNEUW
,
25176 IX86_BUILTIN_VPCOMLTUW
,
25177 IX86_BUILTIN_VPCOMLEUW
,
25178 IX86_BUILTIN_VPCOMGTUW
,
25179 IX86_BUILTIN_VPCOMGEUW
,
25180 IX86_BUILTIN_VPCOMFALSEUW
,
25181 IX86_BUILTIN_VPCOMTRUEUW
,
25183 IX86_BUILTIN_VPCOMEQUD
,
25184 IX86_BUILTIN_VPCOMNEUD
,
25185 IX86_BUILTIN_VPCOMLTUD
,
25186 IX86_BUILTIN_VPCOMLEUD
,
25187 IX86_BUILTIN_VPCOMGTUD
,
25188 IX86_BUILTIN_VPCOMGEUD
,
25189 IX86_BUILTIN_VPCOMFALSEUD
,
25190 IX86_BUILTIN_VPCOMTRUEUD
,
25192 IX86_BUILTIN_VPCOMEQUQ
,
25193 IX86_BUILTIN_VPCOMNEUQ
,
25194 IX86_BUILTIN_VPCOMLTUQ
,
25195 IX86_BUILTIN_VPCOMLEUQ
,
25196 IX86_BUILTIN_VPCOMGTUQ
,
25197 IX86_BUILTIN_VPCOMGEUQ
,
25198 IX86_BUILTIN_VPCOMFALSEUQ
,
25199 IX86_BUILTIN_VPCOMTRUEUQ
,
25201 IX86_BUILTIN_VPCOMEQB
,
25202 IX86_BUILTIN_VPCOMNEB
,
25203 IX86_BUILTIN_VPCOMLTB
,
25204 IX86_BUILTIN_VPCOMLEB
,
25205 IX86_BUILTIN_VPCOMGTB
,
25206 IX86_BUILTIN_VPCOMGEB
,
25207 IX86_BUILTIN_VPCOMFALSEB
,
25208 IX86_BUILTIN_VPCOMTRUEB
,
25210 IX86_BUILTIN_VPCOMEQW
,
25211 IX86_BUILTIN_VPCOMNEW
,
25212 IX86_BUILTIN_VPCOMLTW
,
25213 IX86_BUILTIN_VPCOMLEW
,
25214 IX86_BUILTIN_VPCOMGTW
,
25215 IX86_BUILTIN_VPCOMGEW
,
25216 IX86_BUILTIN_VPCOMFALSEW
,
25217 IX86_BUILTIN_VPCOMTRUEW
,
25219 IX86_BUILTIN_VPCOMEQD
,
25220 IX86_BUILTIN_VPCOMNED
,
25221 IX86_BUILTIN_VPCOMLTD
,
25222 IX86_BUILTIN_VPCOMLED
,
25223 IX86_BUILTIN_VPCOMGTD
,
25224 IX86_BUILTIN_VPCOMGED
,
25225 IX86_BUILTIN_VPCOMFALSED
,
25226 IX86_BUILTIN_VPCOMTRUED
,
25228 IX86_BUILTIN_VPCOMEQQ
,
25229 IX86_BUILTIN_VPCOMNEQ
,
25230 IX86_BUILTIN_VPCOMLTQ
,
25231 IX86_BUILTIN_VPCOMLEQ
,
25232 IX86_BUILTIN_VPCOMGTQ
,
25233 IX86_BUILTIN_VPCOMGEQ
,
25234 IX86_BUILTIN_VPCOMFALSEQ
,
25235 IX86_BUILTIN_VPCOMTRUEQ
,
25237 /* LWP instructions. */
25238 IX86_BUILTIN_LLWPCB
,
25239 IX86_BUILTIN_SLWPCB
,
25240 IX86_BUILTIN_LWPVAL32
,
25241 IX86_BUILTIN_LWPVAL64
,
25242 IX86_BUILTIN_LWPINS32
,
25243 IX86_BUILTIN_LWPINS64
,
25247 /* BMI instructions. */
25248 IX86_BUILTIN_BEXTR32
,
25249 IX86_BUILTIN_BEXTR64
,
25252 /* TBM instructions. */
25253 IX86_BUILTIN_BEXTRI32
,
25254 IX86_BUILTIN_BEXTRI64
,
25256 /* BMI2 instructions. */
25257 IX86_BUILTIN_BZHI32
,
25258 IX86_BUILTIN_BZHI64
,
25259 IX86_BUILTIN_PDEP32
,
25260 IX86_BUILTIN_PDEP64
,
25261 IX86_BUILTIN_PEXT32
,
25262 IX86_BUILTIN_PEXT64
,
25264 /* FSGSBASE instructions. */
25265 IX86_BUILTIN_RDFSBASE32
,
25266 IX86_BUILTIN_RDFSBASE64
,
25267 IX86_BUILTIN_RDGSBASE32
,
25268 IX86_BUILTIN_RDGSBASE64
,
25269 IX86_BUILTIN_WRFSBASE32
,
25270 IX86_BUILTIN_WRFSBASE64
,
25271 IX86_BUILTIN_WRGSBASE32
,
25272 IX86_BUILTIN_WRGSBASE64
,
25274 /* RDRND instructions. */
25275 IX86_BUILTIN_RDRAND16_STEP
,
25276 IX86_BUILTIN_RDRAND32_STEP
,
25277 IX86_BUILTIN_RDRAND64_STEP
,
25279 /* F16C instructions. */
25280 IX86_BUILTIN_CVTPH2PS
,
25281 IX86_BUILTIN_CVTPH2PS256
,
25282 IX86_BUILTIN_CVTPS2PH
,
25283 IX86_BUILTIN_CVTPS2PH256
,
25285 /* CFString built-in for darwin */
25286 IX86_BUILTIN_CFSTRING
,
25291 /* Table for the ix86 builtin decls. */
25292 static GTY(()) tree ix86_builtins
[(int) IX86_BUILTIN_MAX
];
25294 /* Table of all of the builtin functions that are possible with different ISA's
25295 but are waiting to be built until a function is declared to use that
25297 struct builtin_isa
{
25298 const char *name
; /* function name */
25299 enum ix86_builtin_func_type tcode
; /* type to use in the declaration */
25300 HOST_WIDE_INT isa
; /* isa_flags this builtin is defined for */
25301 bool const_p
; /* true if the declaration is constant */
25302 bool set_and_not_built_p
;
25305 static struct builtin_isa ix86_builtins_isa
[(int) IX86_BUILTIN_MAX
];
25308 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
25309 of which isa_flags to use in the ix86_builtins_isa array. Stores the
25310 function decl in the ix86_builtins array. Returns the function decl or
25311 NULL_TREE, if the builtin was not added.
25313 If the front end has a special hook for builtin functions, delay adding
25314 builtin functions that aren't in the current ISA until the ISA is changed
25315 with function specific optimization. Doing so, can save about 300K for the
25316 default compiler. When the builtin is expanded, check at that time whether
25319 If the front end doesn't have a special hook, record all builtins, even if
25320 it isn't an instruction set in the current ISA in case the user uses
25321 function specific options for a different ISA, so that we don't get scope
25322 errors if a builtin is added in the middle of a function scope. */
25325 def_builtin (HOST_WIDE_INT mask
, const char *name
,
25326 enum ix86_builtin_func_type tcode
,
25327 enum ix86_builtins code
)
25329 tree decl
= NULL_TREE
;
25331 if (!(mask
& OPTION_MASK_ISA_64BIT
) || TARGET_64BIT
)
25333 ix86_builtins_isa
[(int) code
].isa
= mask
;
25335 mask
&= ~OPTION_MASK_ISA_64BIT
;
25337 || (mask
& ix86_isa_flags
) != 0
25338 || (lang_hooks
.builtin_function
25339 == lang_hooks
.builtin_function_ext_scope
))
25342 tree type
= ix86_get_builtin_func_type (tcode
);
25343 decl
= add_builtin_function (name
, type
, code
, BUILT_IN_MD
,
25345 ix86_builtins
[(int) code
] = decl
;
25346 ix86_builtins_isa
[(int) code
].set_and_not_built_p
= false;
25350 ix86_builtins
[(int) code
] = NULL_TREE
;
25351 ix86_builtins_isa
[(int) code
].tcode
= tcode
;
25352 ix86_builtins_isa
[(int) code
].name
= name
;
25353 ix86_builtins_isa
[(int) code
].const_p
= false;
25354 ix86_builtins_isa
[(int) code
].set_and_not_built_p
= true;
25361 /* Like def_builtin, but also marks the function decl "const". */
25364 def_builtin_const (HOST_WIDE_INT mask
, const char *name
,
25365 enum ix86_builtin_func_type tcode
, enum ix86_builtins code
)
25367 tree decl
= def_builtin (mask
, name
, tcode
, code
);
25369 TREE_READONLY (decl
) = 1;
25371 ix86_builtins_isa
[(int) code
].const_p
= true;
25376 /* Add any new builtin functions for a given ISA that may not have been
25377 declared. This saves a bit of space compared to adding all of the
25378 declarations to the tree, even if we didn't use them. */
25381 ix86_add_new_builtins (HOST_WIDE_INT isa
)
25385 for (i
= 0; i
< (int)IX86_BUILTIN_MAX
; i
++)
25387 if ((ix86_builtins_isa
[i
].isa
& isa
) != 0
25388 && ix86_builtins_isa
[i
].set_and_not_built_p
)
25392 /* Don't define the builtin again. */
25393 ix86_builtins_isa
[i
].set_and_not_built_p
= false;
25395 type
= ix86_get_builtin_func_type (ix86_builtins_isa
[i
].tcode
);
25396 decl
= add_builtin_function_ext_scope (ix86_builtins_isa
[i
].name
,
25397 type
, i
, BUILT_IN_MD
, NULL
,
25400 ix86_builtins
[i
] = decl
;
25401 if (ix86_builtins_isa
[i
].const_p
)
25402 TREE_READONLY (decl
) = 1;
25407 /* Bits for builtin_description.flag. */
25409 /* Set when we don't support the comparison natively, and should
25410 swap_comparison in order to support it. */
25411 #define BUILTIN_DESC_SWAP_OPERANDS 1
25413 struct builtin_description
25415 const HOST_WIDE_INT mask
;
25416 const enum insn_code icode
;
25417 const char *const name
;
25418 const enum ix86_builtins code
;
25419 const enum rtx_code comparison
;
25423 static const struct builtin_description bdesc_comi
[] =
25425 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS
, UNEQ
, 0 },
25426 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS
, UNLT
, 0 },
25427 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS
, UNLE
, 0 },
25428 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS
, GT
, 0 },
25429 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS
, GE
, 0 },
25430 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS
, LTGT
, 0 },
25431 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS
, UNEQ
, 0 },
25432 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS
, UNLT
, 0 },
25433 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS
, UNLE
, 0 },
25434 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS
, GT
, 0 },
25435 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS
, GE
, 0 },
25436 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS
, LTGT
, 0 },
25437 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD
, UNEQ
, 0 },
25438 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD
, UNLT
, 0 },
25439 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD
, UNLE
, 0 },
25440 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD
, GT
, 0 },
25441 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD
, GE
, 0 },
25442 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD
, LTGT
, 0 },
25443 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD
, UNEQ
, 0 },
25444 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD
, UNLT
, 0 },
25445 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD
, UNLE
, 0 },
25446 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD
, GT
, 0 },
25447 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD
, GE
, 0 },
25448 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD
, LTGT
, 0 },
25451 static const struct builtin_description bdesc_pcmpestr
[] =
25454 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128
, UNKNOWN
, 0 },
25455 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128
, UNKNOWN
, 0 },
25456 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128
, UNKNOWN
, (int) CCAmode
},
25457 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128
, UNKNOWN
, (int) CCCmode
},
25458 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128
, UNKNOWN
, (int) CCOmode
},
25459 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128
, UNKNOWN
, (int) CCSmode
},
25460 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128
, UNKNOWN
, (int) CCZmode
},
25463 static const struct builtin_description bdesc_pcmpistr
[] =
25466 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128
, UNKNOWN
, 0 },
25467 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128
, UNKNOWN
, 0 },
25468 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128
, UNKNOWN
, (int) CCAmode
},
25469 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128
, UNKNOWN
, (int) CCCmode
},
25470 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128
, UNKNOWN
, (int) CCOmode
},
25471 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128
, UNKNOWN
, (int) CCSmode
},
25472 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128
, UNKNOWN
, (int) CCZmode
},
25475 /* Special builtins with variable number of arguments. */
25476 static const struct builtin_description bdesc_special_args
[] =
25478 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rdtsc
, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
25479 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rdtscp
, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP
, UNKNOWN
, (int) UINT64_FTYPE_PUNSIGNED
},
25480 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_pause
, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
25483 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_emms
, "__builtin_ia32_emms", IX86_BUILTIN_EMMS
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
25486 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_femms
, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
25489 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movups
, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
25490 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movntv4sf
, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
25491 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movups
, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS
, UNKNOWN
, (int) V4SF_FTYPE_PCFLOAT
},
25493 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_loadhps_exp
, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_PCV2SF
},
25494 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_loadlps_exp
, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_PCV2SF
},
25495 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_storehps
, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS
, UNKNOWN
, (int) VOID_FTYPE_PV2SF_V4SF
},
25496 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_storelps
, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS
, UNKNOWN
, (int) VOID_FTYPE_PV2SF_V4SF
},
25498 /* SSE or 3DNow!A */
25499 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_sse_sfence
, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
25500 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_sse_movntdi
, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ
, UNKNOWN
, (int) VOID_FTYPE_PULONGLONG_ULONGLONG
},
25503 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_lfence
, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
25504 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_mfence
, 0, IX86_BUILTIN_MFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
25505 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movupd
, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
25506 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movdqu
, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU
, UNKNOWN
, (int) VOID_FTYPE_PCHAR_V16QI
},
25507 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntv2df
, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
25508 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntv2di
, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ
, UNKNOWN
, (int) VOID_FTYPE_PV2DI_V2DI
},
25509 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntsi
, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI
, UNKNOWN
, (int) VOID_FTYPE_PINT_INT
},
25510 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movupd
, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD
, UNKNOWN
, (int) V2DF_FTYPE_PCDOUBLE
},
25511 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movdqu
, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU
, UNKNOWN
, (int) V16QI_FTYPE_PCCHAR
},
25513 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loadhpd_exp
, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_PCDOUBLE
},
25514 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loadlpd_exp
, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_PCDOUBLE
},
25517 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_lddqu
, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU
, UNKNOWN
, (int) V16QI_FTYPE_PCCHAR
},
25520 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_movntdqa
, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA
, UNKNOWN
, (int) V2DI_FTYPE_PV2DI
},
25523 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_vmmovntv2df
, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
25524 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_vmmovntv4sf
, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
25527 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vzeroall
, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
25528 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vzeroupper
, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
25530 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv4sf
, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS
, UNKNOWN
, (int) V4SF_FTYPE_PCFLOAT
},
25531 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv4df
, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256
, UNKNOWN
, (int) V4DF_FTYPE_PCDOUBLE
},
25532 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv8sf
, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256
, UNKNOWN
, (int) V8SF_FTYPE_PCFLOAT
},
25533 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vbroadcastf128_v4df
, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCV2DF
},
25534 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vbroadcastf128_v8sf
, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCV4SF
},
25536 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movupd256
, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCDOUBLE
},
25537 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movups256
, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCFLOAT
},
25538 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movupd256
, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V4DF
},
25539 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movups256
, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V8SF
},
25540 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movdqu256
, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256
, UNKNOWN
, (int) V32QI_FTYPE_PCCHAR
},
25541 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movdqu256
, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256
, UNKNOWN
, (int) VOID_FTYPE_PCHAR_V32QI
},
25542 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_lddqu256
, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256
, UNKNOWN
, (int) V32QI_FTYPE_PCCHAR
},
25544 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv4di
, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256
, UNKNOWN
, (int) VOID_FTYPE_PV4DI_V4DI
},
25545 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv4df
, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V4DF
},
25546 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv8sf
, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V8SF
},
25548 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadpd
, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD
, UNKNOWN
, (int) V2DF_FTYPE_PCV2DF_V2DI
},
25549 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadps
, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS
, UNKNOWN
, (int) V4SF_FTYPE_PCV4SF_V4SI
},
25550 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadpd256
, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCV4DF_V4DI
},
25551 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadps256
, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCV8SF_V8SI
},
25552 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstorepd
, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD
, UNKNOWN
, (int) VOID_FTYPE_PV2DF_V2DI_V2DF
},
25553 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstoreps
, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS
, UNKNOWN
, (int) VOID_FTYPE_PV4SF_V4SI_V4SF
},
25554 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstorepd256
, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256
, UNKNOWN
, (int) VOID_FTYPE_PV4DF_V4DI_V4DF
},
25555 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstoreps256
, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256
, UNKNOWN
, (int) VOID_FTYPE_PV8SF_V8SI_V8SF
},
25558 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_movntdqa
, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256
, UNKNOWN
, (int) V4DI_FTYPE_PV4DI
},
25559 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadd
, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD
, UNKNOWN
, (int) V4SI_FTYPE_PCV4SI_V4SI
},
25560 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadq
, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ
, UNKNOWN
, (int) V2DI_FTYPE_PCV2DI_V2DI
},
25561 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadd256
, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256
, UNKNOWN
, (int) V8SI_FTYPE_PCV8SI_V8SI
},
25562 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadq256
, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256
, UNKNOWN
, (int) V4DI_FTYPE_PCV4DI_V4DI
},
25563 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstored
, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED
, UNKNOWN
, (int) VOID_FTYPE_PV4SI_V4SI_V4SI
},
25564 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstoreq
, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ
, UNKNOWN
, (int) VOID_FTYPE_PV2DI_V2DI_V2DI
},
25565 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstored256
, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256
, UNKNOWN
, (int) VOID_FTYPE_PV8SI_V8SI_V8SI
},
25566 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstoreq256
, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256
, UNKNOWN
, (int) VOID_FTYPE_PV4DI_V4DI_V4DI
},
25568 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_llwpcb
, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
25569 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_slwpcb
, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB
, UNKNOWN
, (int) PVOID_FTYPE_VOID
},
25570 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpvalsi3
, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32
, UNKNOWN
, (int) VOID_FTYPE_UINT_UINT_UINT
},
25571 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpvaldi3
, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64
, UNKNOWN
, (int) VOID_FTYPE_UINT64_UINT_UINT
},
25572 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpinssi3
, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32
, UNKNOWN
, (int) UCHAR_FTYPE_UINT_UINT_UINT
},
25573 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpinsdi3
, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64
, UNKNOWN
, (int) UCHAR_FTYPE_UINT64_UINT_UINT
},
25576 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdfsbasesi
, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
25577 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdfsbasedi
, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
25578 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdgsbasesi
, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
25579 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdgsbasedi
, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
25580 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrfsbasesi
, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32
, UNKNOWN
, (int) VOID_FTYPE_UNSIGNED
},
25581 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrfsbasedi
, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64
, UNKNOWN
, (int) VOID_FTYPE_UINT64
},
25582 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrgsbasesi
, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32
, UNKNOWN
, (int) VOID_FTYPE_UNSIGNED
},
25583 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrgsbasedi
, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64
, UNKNOWN
, (int) VOID_FTYPE_UINT64
},
25586 /* Builtins with variable number of arguments. */
25587 static const struct builtin_description bdesc_args
[] =
25589 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_bsr
, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI
, UNKNOWN
, (int) INT_FTYPE_INT
},
25590 { OPTION_MASK_ISA_64BIT
, CODE_FOR_bsr_rex64
, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI
, UNKNOWN
, (int) INT64_FTYPE_INT64
},
25591 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rdpmc
, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC
, UNKNOWN
, (int) UINT64_FTYPE_INT
},
25592 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotlqi3
, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI
, UNKNOWN
, (int) UINT8_FTYPE_UINT8_INT
},
25593 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotlhi3
, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI
, UNKNOWN
, (int) UINT16_FTYPE_UINT16_INT
},
25594 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotrqi3
, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI
, UNKNOWN
, (int) UINT8_FTYPE_UINT8_INT
},
25595 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotrhi3
, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI
, UNKNOWN
, (int) UINT16_FTYPE_UINT16_INT
},
25598 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv8qi3
, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
25599 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv4hi3
, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
25600 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv2si3
, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
25601 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv8qi3
, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
25602 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv4hi3
, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
25603 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv2si3
, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
25605 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ssaddv8qi3
, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
25606 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ssaddv4hi3
, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
25607 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_sssubv8qi3
, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
25608 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_sssubv4hi3
, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
25609 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_usaddv8qi3
, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
25610 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_usaddv4hi3
, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
25611 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ussubv8qi3
, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
25612 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ussubv4hi3
, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
25614 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_mulv4hi3
, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
25615 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_smulv4hi3_highpart
, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
25617 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_andv2si3
, "__builtin_ia32_pand", IX86_BUILTIN_PAND
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
25618 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_andnotv2si3
, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
25619 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_iorv2si3
, "__builtin_ia32_por", IX86_BUILTIN_POR
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
25620 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_xorv2si3
, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
25622 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv8qi3
, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
25623 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv4hi3
, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
25624 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv2si3
, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
25625 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv8qi3
, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
25626 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv4hi3
, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
25627 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv2si3
, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
25629 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhbw
, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
25630 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhwd
, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
25631 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhdq
, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
25632 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpcklbw
, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
25633 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpcklwd
, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
25634 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckldq
, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
25636 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packsswb
, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB
, UNKNOWN
, (int) V8QI_FTYPE_V4HI_V4HI
},
25637 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packssdw
, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW
, UNKNOWN
, (int) V4HI_FTYPE_V2SI_V2SI
},
25638 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packuswb
, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB
, UNKNOWN
, (int) V8QI_FTYPE_V4HI_V4HI
},
25640 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_pmaddwd
, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD
, UNKNOWN
, (int) V2SI_FTYPE_V4HI_V4HI
},
25642 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv4hi3
, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
25643 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv2si3
, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
25644 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv1di3
, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_SI_COUNT
},
25645 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv4hi3
, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
25646 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv2si3
, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
25647 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv1di3
, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_COUNT
},
25649 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv4hi3
, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
25650 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv2si3
, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
25651 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv1di3
, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_SI_COUNT
},
25652 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv4hi3
, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
25653 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv2si3
, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
25654 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv1di3
, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_COUNT
},
25656 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv4hi3
, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
25657 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv2si3
, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
25658 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv4hi3
, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
25659 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv2si3
, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
25662 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_pf2id
, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID
, UNKNOWN
, (int) V2SI_FTYPE_V2SF
},
25663 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_floatv2si2
, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD
, UNKNOWN
, (int) V2SF_FTYPE_V2SI
},
25664 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpv2sf2
, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
25665 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rsqrtv2sf2
, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
25667 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
25668 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_haddv2sf3
, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
25669 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_addv2sf3
, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
25670 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_eqv2sf3
, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
25671 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_gev2sf3
, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
25672 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_gtv2sf3
, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
25673 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_smaxv2sf3
, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
25674 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_sminv2sf3
, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
25675 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_mulv2sf3
, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
25676 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpit1v2sf3
, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
25677 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpit2v2sf3
, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
25678 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rsqit1v2sf3
, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
25679 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_subv2sf3
, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
25680 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_subrv2sf3
, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
25681 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_pmulhrwv4hi3
, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
25684 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pf2iw
, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW
, UNKNOWN
, (int) V2SI_FTYPE_V2SF
},
25685 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pi2fw
, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW
, UNKNOWN
, (int) V2SF_FTYPE_V2SI
},
25686 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pswapdv2si2
, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI
},
25687 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pswapdv2sf2
, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
25688 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_hsubv2sf3
, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
25689 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_addsubv2sf3
, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
25692 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movmskps
, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
25693 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_sqrtv4sf2
, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
25694 { OPTION_MASK_ISA_SSE
, CODE_FOR_sqrtv4sf2
, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
25695 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_rsqrtv4sf2
, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
25696 { OPTION_MASK_ISA_SSE
, CODE_FOR_rsqrtv4sf2
, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
25697 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_rcpv4sf2
, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
25698 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtps2pi
, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI
, UNKNOWN
, (int) V2SI_FTYPE_V4SF
},
25699 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtss2si
, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
25700 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvtss2siq
, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64
, UNKNOWN
, (int) INT64_FTYPE_V4SF
},
25701 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvttps2pi
, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI
, UNKNOWN
, (int) V2SI_FTYPE_V4SF
},
25702 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvttss2si
, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
25703 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvttss2siq
, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64
, UNKNOWN
, (int) INT64_FTYPE_V4SF
},
25705 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_shufps
, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
25707 { OPTION_MASK_ISA_SSE
, CODE_FOR_addv4sf3
, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
25708 { OPTION_MASK_ISA_SSE
, CODE_FOR_subv4sf3
, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
25709 { OPTION_MASK_ISA_SSE
, CODE_FOR_mulv4sf3
, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
25710 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_divv4sf3
, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
25711 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmaddv4sf3
, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
25712 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsubv4sf3
, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
25713 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmulv4sf3
, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
25714 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmdivv4sf3
, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
25716 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS
, EQ
, (int) V4SF_FTYPE_V4SF_V4SF
},
25717 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF
},
25718 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF
},
25719 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
25720 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
25721 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS
, UNORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
25722 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS
, NE
, (int) V4SF_FTYPE_V4SF_V4SF
},
25723 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF
},
25724 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF
},
25725 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
25726 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
25727 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS
, ORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
25728 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS
, EQ
, (int) V4SF_FTYPE_V4SF_V4SF
},
25729 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF
},
25730 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF
},
25731 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS
, UNORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
25732 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS
, NE
, (int) V4SF_FTYPE_V4SF_V4SF
},
25733 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF
},
25734 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF
},
25735 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
25736 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
25737 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS
, ORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
25739 { OPTION_MASK_ISA_SSE
, CODE_FOR_sminv4sf3
, "__builtin_ia32_minps", IX86_BUILTIN_MINPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
25740 { OPTION_MASK_ISA_SSE
, CODE_FOR_smaxv4sf3
, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
25741 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsminv4sf3
, "__builtin_ia32_minss", IX86_BUILTIN_MINSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
25742 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsmaxv4sf3
, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
25744 { OPTION_MASK_ISA_SSE
, CODE_FOR_andv4sf3
, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
25745 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_andnotv4sf3
, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
25746 { OPTION_MASK_ISA_SSE
, CODE_FOR_iorv4sf3
, "__builtin_ia32_orps", IX86_BUILTIN_ORPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
25747 { OPTION_MASK_ISA_SSE
, CODE_FOR_xorv4sf3
, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
25749 { OPTION_MASK_ISA_SSE
, CODE_FOR_copysignv4sf3
, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
25751 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movss
, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
25752 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movhlps_exp
, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
25753 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movlhps_exp
, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
25754 { OPTION_MASK_ISA_SSE
, CODE_FOR_vec_interleave_highv4sf
, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
25755 { OPTION_MASK_ISA_SSE
, CODE_FOR_vec_interleave_lowv4sf
, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
25757 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtpi2ps
, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V2SI
},
25758 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtsi2ss
, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_SI
},
25759 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvtsi2ssq
, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS
, UNKNOWN
, V4SF_FTYPE_V4SF_DI
},
25761 { OPTION_MASK_ISA_SSE
, CODE_FOR_rsqrtsf2
, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF
, UNKNOWN
, (int) FLOAT_FTYPE_FLOAT
},
25763 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsqrtv4sf2
, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
25764 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmrsqrtv4sf2
, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
25765 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmrcpv4sf2
, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
25767 /* SSE MMX or 3Dnow!A */
25768 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
25769 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uavgv4hi3
, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
25770 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_umulv4hi3_highpart
, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
25772 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_umaxv8qi3
, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
25773 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_smaxv4hi3
, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
25774 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uminv8qi3
, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
25775 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_sminv4hi3
, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
25777 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_psadbw
, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW
, UNKNOWN
, (int) V1DI_FTYPE_V8QI_V8QI
},
25778 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pmovmskb
, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB
, UNKNOWN
, (int) INT_FTYPE_V8QI
},
25780 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pshufw
, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_INT
},
25783 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_shufpd
, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
25785 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movmskpd
, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
25786 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pmovmskb
, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128
, UNKNOWN
, (int) INT_FTYPE_V16QI
},
25787 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sqrtv2df2
, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF
},
25788 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtdq2pd
, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD
, UNKNOWN
, (int) V2DF_FTYPE_V4SI
},
25789 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtdq2ps
, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SI
},
25790 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtudq2ps
, "__builtin_ia32_cvtudq2ps", IX86_BUILTIN_CVTUDQ2PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SI
},
25792 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2dq
, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V2DF
},
25793 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2pi
, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI
, UNKNOWN
, (int) V2SI_FTYPE_V2DF
},
25794 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2ps
, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS
, UNKNOWN
, (int) V4SF_FTYPE_V2DF
},
25795 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttpd2dq
, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V2DF
},
25796 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttpd2pi
, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI
, UNKNOWN
, (int) V2SI_FTYPE_V2DF
},
25798 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpi2pd
, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD
, UNKNOWN
, (int) V2DF_FTYPE_V2SI
},
25800 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsd2si
, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
25801 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttsd2si
, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
25802 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvtsd2siq
, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64
, UNKNOWN
, (int) INT64_FTYPE_V2DF
},
25803 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvttsd2siq
, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64
, UNKNOWN
, (int) INT64_FTYPE_V2DF
},
25805 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtps2dq
, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
25806 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtps2pd
, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD
, UNKNOWN
, (int) V2DF_FTYPE_V4SF
},
25807 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttps2dq
, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
25809 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv2df3
, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
25810 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv2df3
, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
25811 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mulv2df3
, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
25812 { OPTION_MASK_ISA_SSE2
, CODE_FOR_divv2df3
, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
25813 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmaddv2df3
, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
25814 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsubv2df3
, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
25815 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmulv2df3
, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
25816 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmdivv2df3
, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
25818 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD
, EQ
, (int) V2DF_FTYPE_V2DF_V2DF
},
25819 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF
},
25820 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF
},
25821 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
25822 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
25823 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD
, UNORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
25824 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD
, NE
, (int) V2DF_FTYPE_V2DF_V2DF
},
25825 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF
},
25826 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF
},
25827 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
25828 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
25829 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD
, ORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
25830 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD
, EQ
, (int) V2DF_FTYPE_V2DF_V2DF
},
25831 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF
},
25832 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF
},
25833 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD
, UNORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
25834 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD
, NE
, (int) V2DF_FTYPE_V2DF_V2DF
},
25835 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF
},
25836 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF
},
25837 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD
, ORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
25839 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sminv2df3
, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
25840 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smaxv2df3
, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
25841 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsminv2df3
, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
25842 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsmaxv2df3
, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
25844 { OPTION_MASK_ISA_SSE2
, CODE_FOR_andv2df3
, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
25845 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_andnotv2df3
, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
25846 { OPTION_MASK_ISA_SSE2
, CODE_FOR_iorv2df3
, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
25847 { OPTION_MASK_ISA_SSE2
, CODE_FOR_xorv2df3
, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
25849 { OPTION_MASK_ISA_SSE2
, CODE_FOR_copysignv2df3
, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
25851 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movsd
, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
25852 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv2df
, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
25853 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv2df
, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
25855 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_pack_sfix_v2df
, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX
, UNKNOWN
, (int) V4SI_FTYPE_V2DF_V2DF
},
25857 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv16qi3
, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
25858 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv8hi3
, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
25859 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
25860 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv2di3
, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
25861 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv16qi3
, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
25862 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv8hi3
, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
25863 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
25864 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv2di3
, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
25866 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ssaddv16qi3
, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
25867 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ssaddv8hi3
, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
25868 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_sssubv16qi3
, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
25869 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_sssubv8hi3
, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
25870 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_usaddv16qi3
, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
25871 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_usaddv8hi3
, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
25872 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ussubv16qi3
, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
25873 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ussubv8hi3
, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
25875 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mulv8hi3
, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
25876 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smulv8hi3_highpart
, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128
, UNKNOWN
,(int) V8HI_FTYPE_V8HI_V8HI
},
25878 { OPTION_MASK_ISA_SSE2
, CODE_FOR_andv2di3
, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
25879 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_andnotv2di3
, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
25880 { OPTION_MASK_ISA_SSE2
, CODE_FOR_iorv2di3
, "__builtin_ia32_por128", IX86_BUILTIN_POR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
25881 { OPTION_MASK_ISA_SSE2
, CODE_FOR_xorv2di3
, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
25883 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_uavgv16qi3
, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
25884 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_uavgv8hi3
, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
25886 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv16qi3
, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
25887 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv8hi3
, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
25888 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv4si3
, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
25889 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv16qi3
, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
25890 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv8hi3
, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
25891 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv4si3
, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
25893 { OPTION_MASK_ISA_SSE2
, CODE_FOR_umaxv16qi3
, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
25894 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smaxv8hi3
, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
25895 { OPTION_MASK_ISA_SSE2
, CODE_FOR_uminv16qi3
, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
25896 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sminv8hi3
, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
25898 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv16qi
, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
25899 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv8hi
, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
25900 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv4si
, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
25901 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv2di
, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
25902 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv16qi
, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
25903 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv8hi
, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
25904 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv4si
, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
25905 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv2di
, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
25907 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packsswb
, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128
, UNKNOWN
, (int) V16QI_FTYPE_V8HI_V8HI
},
25908 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packssdw
, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128
, UNKNOWN
, (int) V8HI_FTYPE_V4SI_V4SI
},
25909 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packuswb
, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128
, UNKNOWN
, (int) V16QI_FTYPE_V8HI_V8HI
},
25911 { OPTION_MASK_ISA_SSE2
, CODE_FOR_umulv8hi3_highpart
, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
25912 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_psadbw
, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI_V16QI
},
25914 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_umulv1siv1di3
, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ
, UNKNOWN
, (int) V1DI_FTYPE_V2SI_V2SI
},
25915 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_umulv2siv2di3
, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI_V4SI
},
25917 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pmaddwd
, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI_V8HI
},
25919 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsi2sd
, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_SI
},
25920 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvtsi2sdq
, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_DI
},
25921 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsd2ss
, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V2DF
},
25922 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtss2sd
, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V4SF
},
25924 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ashlv1ti3
, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT_CONVERT
},
25925 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv8hi3
, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
25926 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv4si3
, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
25927 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv2di3
, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_SI_COUNT
},
25928 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv8hi3
, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
25929 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv4si3
, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
25930 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv2di3
, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_COUNT
},
25932 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_lshrv1ti3
, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT_CONVERT
},
25933 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv8hi3
, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
25934 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv4si3
, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
25935 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv2di3
, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_SI_COUNT
},
25936 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv8hi3
, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
25937 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv4si3
, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
25938 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv2di3
, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_COUNT
},
25940 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv8hi3
, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
25941 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv4si3
, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
25942 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv8hi3
, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
25943 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv4si3
, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
25945 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshufd
, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_INT
},
25946 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshuflw
, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_INT
},
25947 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshufhw
, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_INT
},
25949 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsqrtv2df2
, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_VEC_MERGE
},
25951 { OPTION_MASK_ISA_SSE2
, CODE_FOR_abstf2
, 0, IX86_BUILTIN_FABSQ
, UNKNOWN
, (int) FLOAT128_FTYPE_FLOAT128
},
25952 { OPTION_MASK_ISA_SSE2
, CODE_FOR_copysigntf3
, 0, IX86_BUILTIN_COPYSIGNQ
, UNKNOWN
, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128
},
25954 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse2_movq128
, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
25957 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mmx_addv1di3
, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI
},
25958 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mmx_subv1di3
, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI
},
25961 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_movshdup
, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
25962 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_movsldup
, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
25964 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_addsubv4sf3
, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
25965 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_addsubv2df3
, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
25966 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_haddv4sf3
, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
25967 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_haddv2df3
, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
25968 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_hsubv4sf3
, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
25969 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_hsubv2df3
, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
25972 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv16qi2
, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI
},
25973 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv8qi2
, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI
},
25974 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv8hi2
, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
25975 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv4hi2
, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI
},
25976 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv4si2
, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI
},
25977 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv2si2
, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI
},
25979 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddwv8hi3
, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
25980 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddwv4hi3
, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
25981 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phadddv4si3
, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
25982 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phadddv2si3
, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
25983 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddswv8hi3
, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
25984 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddswv4hi3
, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
25985 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubwv8hi3
, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
25986 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubwv4hi3
, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
25987 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubdv4si3
, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
25988 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubdv2si3
, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
25989 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubswv8hi3
, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
25990 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubswv4hi3
, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
25991 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmaddubsw128
, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI_V16QI
},
25992 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmaddubsw
, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V8QI_V8QI
},
25993 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmulhrswv8hi3
, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
25994 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmulhrswv4hi3
, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
25995 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pshufbv16qi3
, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
25996 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pshufbv8qi3
, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
25997 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv16qi3
, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
25998 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv8qi3
, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
25999 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv8hi3
, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26000 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv4hi3
, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26001 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv4si3
, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26002 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv2si3
, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26005 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_palignrti
, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT
},
26006 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_palignrdi
, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT
},
26009 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendpd
, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
26010 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendps
, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
26011 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendvpd
, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_V2DF
},
26012 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendvps
, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_V4SF
},
26013 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_dppd
, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
26014 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_dpps
, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
26015 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_insertps
, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
26016 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_mpsadbw
, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI_INT
},
26017 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_pblendvb
, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI_V16QI
},
26018 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_pblendw
, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_INT
},
26020 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv8qiv8hi2
, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI
},
26021 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv4qiv4si2
, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128
, UNKNOWN
, (int) V4SI_FTYPE_V16QI
},
26022 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2qiv2di2
, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI
},
26023 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv4hiv4si2
, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI
},
26024 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2hiv2di2
, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128
, UNKNOWN
, (int) V2DI_FTYPE_V8HI
},
26025 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2siv2di2
, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI
},
26026 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv8qiv8hi2
, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI
},
26027 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv4qiv4si2
, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128
, UNKNOWN
, (int) V4SI_FTYPE_V16QI
},
26028 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2qiv2di2
, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI
},
26029 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv4hiv4si2
, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI
},
26030 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2hiv2di2
, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128
, UNKNOWN
, (int) V2DI_FTYPE_V8HI
},
26031 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2siv2di2
, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI
},
26032 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_phminposuw
, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
26034 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_packusdw
, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128
, UNKNOWN
, (int) V8HI_FTYPE_V4SI_V4SI
},
26035 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_eqv2di3
, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26036 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_smaxv16qi3
, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26037 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_smaxv4si3
, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26038 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_umaxv4si3
, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26039 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_umaxv8hi3
, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26040 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sminv16qi3
, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26041 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sminv4si3
, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26042 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_uminv4si3
, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26043 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_uminv8hi3
, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26044 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_mulv2siv2di3
, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI_V4SI
},
26045 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_mulv4si3
, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26048 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_INT
},
26049 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_INT
},
26050 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundsd
, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
26051 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundss
, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
26053 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD
, (enum rtx_code
) ROUND_FLOOR
, (int) V2DF_FTYPE_V2DF_ROUND
},
26054 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD
, (enum rtx_code
) ROUND_CEIL
, (int) V2DF_FTYPE_V2DF_ROUND
},
26055 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD
, (enum rtx_code
) ROUND_TRUNC
, (int) V2DF_FTYPE_V2DF_ROUND
},
26056 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD
, (enum rtx_code
) ROUND_MXCSR
, (int) V2DF_FTYPE_V2DF_ROUND
},
26058 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv2df2
, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ
, UNKNOWN
, (int) V2DF_FTYPE_V2DF
},
26060 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS
, (enum rtx_code
) ROUND_FLOOR
, (int) V4SF_FTYPE_V4SF_ROUND
},
26061 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS
, (enum rtx_code
) ROUND_CEIL
, (int) V4SF_FTYPE_V4SF_ROUND
},
26062 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS
, (enum rtx_code
) ROUND_TRUNC
, (int) V4SF_FTYPE_V4SF_ROUND
},
26063 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS
, (enum rtx_code
) ROUND_MXCSR
, (int) V4SF_FTYPE_V4SF_ROUND
},
26065 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv4sf2
, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26067 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ
, EQ
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
26068 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC
, LTU
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
26069 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC
, GTU
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
26072 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_gtv2di3
, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26073 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32qi
, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI
, UNKNOWN
, (int) UINT_FTYPE_UINT_UCHAR
},
26074 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32hi
, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI
, UNKNOWN
, (int) UINT_FTYPE_UINT_USHORT
},
26075 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32si
, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
26076 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse4_2_crc32di
, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
26079 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_extrqi
, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_UINT_UINT
},
26080 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_extrq
, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V16QI
},
26081 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_insertqi
, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT
},
26082 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_insertq
, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26085 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aeskeygenassist
, 0, IX86_BUILTIN_AESKEYGENASSIST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT
},
26086 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesimc
, 0, IX86_BUILTIN_AESIMC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
26088 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesenc
, 0, IX86_BUILTIN_AESENC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26089 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesenclast
, 0, IX86_BUILTIN_AESENCLAST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26090 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesdec
, 0, IX86_BUILTIN_AESDEC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26091 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesdeclast
, 0, IX86_BUILTIN_AESDECLAST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26094 { OPTION_MASK_ISA_SSE2
, CODE_FOR_pclmulqdq
, 0, IX86_BUILTIN_PCLMULQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_INT
},
26097 { OPTION_MASK_ISA_AVX
, CODE_FOR_addv4df3
, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26098 { OPTION_MASK_ISA_AVX
, CODE_FOR_addv8sf3
, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26099 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_addsubv4df3
, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26100 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_addsubv8sf3
, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26101 { OPTION_MASK_ISA_AVX
, CODE_FOR_andv4df3
, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26102 { OPTION_MASK_ISA_AVX
, CODE_FOR_andv8sf3
, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26103 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_andnotv4df3
, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26104 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_andnotv8sf3
, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26105 { OPTION_MASK_ISA_AVX
, CODE_FOR_divv4df3
, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26106 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_divv8sf3
, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26107 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_haddv4df3
, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26108 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_hsubv8sf3
, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26109 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_hsubv4df3
, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26110 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_haddv8sf3
, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26111 { OPTION_MASK_ISA_AVX
, CODE_FOR_smaxv4df3
, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26112 { OPTION_MASK_ISA_AVX
, CODE_FOR_smaxv8sf3
, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26113 { OPTION_MASK_ISA_AVX
, CODE_FOR_sminv4df3
, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26114 { OPTION_MASK_ISA_AVX
, CODE_FOR_sminv8sf3
, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26115 { OPTION_MASK_ISA_AVX
, CODE_FOR_mulv4df3
, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26116 { OPTION_MASK_ISA_AVX
, CODE_FOR_mulv8sf3
, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26117 { OPTION_MASK_ISA_AVX
, CODE_FOR_iorv4df3
, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26118 { OPTION_MASK_ISA_AVX
, CODE_FOR_iorv8sf3
, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26119 { OPTION_MASK_ISA_AVX
, CODE_FOR_subv4df3
, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26120 { OPTION_MASK_ISA_AVX
, CODE_FOR_subv8sf3
, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26121 { OPTION_MASK_ISA_AVX
, CODE_FOR_xorv4df3
, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26122 { OPTION_MASK_ISA_AVX
, CODE_FOR_xorv8sf3
, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26124 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv2df3
, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DI
},
26125 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv4sf3
, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SI
},
26126 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv4df3
, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DI
},
26127 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv8sf3
, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SI
},
26129 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendpd256
, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
26130 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendps256
, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
26131 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendvpd256
, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_V4DF
},
26132 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendvps256
, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_V8SF
},
26133 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_dpps256
, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
26134 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_shufpd256
, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
26135 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_shufps256
, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
26136 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vmcmpv2df3
, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
26137 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vmcmpv4sf3
, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
26138 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv2df3
, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
26139 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv4sf3
, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
26140 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv4df3
, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
26141 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv8sf3
, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
26142 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v4df
, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256
, UNKNOWN
, (int) V2DF_FTYPE_V4DF_INT
},
26143 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v8sf
, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256
, UNKNOWN
, (int) V4SF_FTYPE_V8SF_INT
},
26144 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v8si
, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256
, UNKNOWN
, (int) V4SI_FTYPE_V8SI_INT
},
26145 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtdq2pd256
, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4SI
},
26146 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtdq2ps256
, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SI
},
26147 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtpd2ps256
, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256
, UNKNOWN
, (int) V4SF_FTYPE_V4DF
},
26148 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtps2dq256
, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
26149 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtps2pd256
, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4SF
},
26150 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvttpd2dq256
, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256
, UNKNOWN
, (int) V4SI_FTYPE_V4DF
},
26151 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtpd2dq256
, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256
, UNKNOWN
, (int) V4SI_FTYPE_V4DF
},
26152 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvttps2dq256
, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
26153 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v4df3
, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
26154 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v8sf3
, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
26155 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v8si3
, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI_INT
},
26156 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv2df
, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_INT
},
26157 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv4sf
, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_INT
},
26158 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv4df
, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
26159 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv8sf
, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_INT
},
26160 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v4df
, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V2DF_INT
},
26161 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v8sf
, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V4SF_INT
},
26162 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v8si
, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_INT
},
26164 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movshdup256
, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
26165 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movsldup256
, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
26166 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movddup256
, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
26168 { OPTION_MASK_ISA_AVX
, CODE_FOR_sqrtv4df2
, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
26169 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_sqrtv8sf2
, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
26170 { OPTION_MASK_ISA_AVX
, CODE_FOR_sqrtv8sf2
, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
26171 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_rsqrtv8sf2
, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
26172 { OPTION_MASK_ISA_AVX
, CODE_FOR_rsqrtv8sf2
, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
26174 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_rcpv8sf2
, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
26176 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
26177 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_INT
},
26179 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256
, (enum rtx_code
) ROUND_FLOOR
, (int) V4DF_FTYPE_V4DF_ROUND
},
26180 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256
, (enum rtx_code
) ROUND_CEIL
, (int) V4DF_FTYPE_V4DF_ROUND
},
26181 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256
, (enum rtx_code
) ROUND_TRUNC
, (int) V4DF_FTYPE_V4DF_ROUND
},
26182 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256
, (enum rtx_code
) ROUND_MXCSR
, (int) V4DF_FTYPE_V4DF_ROUND
},
26184 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv4df2
, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
26186 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256
, (enum rtx_code
) ROUND_FLOOR
, (int) V8SF_FTYPE_V8SF_ROUND
},
26187 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256
, (enum rtx_code
) ROUND_CEIL
, (int) V8SF_FTYPE_V8SF_ROUND
},
26188 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256
, (enum rtx_code
) ROUND_TRUNC
, (int) V8SF_FTYPE_V8SF_ROUND
},
26189 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256
, (enum rtx_code
) ROUND_MXCSR
, (int) V8SF_FTYPE_V8SF_ROUND
},
26191 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv8sf2
, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
26193 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpckhpd256
, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26194 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpcklpd256
, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26195 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpckhps256
, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26196 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpcklps256
, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26198 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_si256_si
, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI
, UNKNOWN
, (int) V8SI_FTYPE_V4SI
},
26199 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ps256_ps
, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS
, UNKNOWN
, (int) V8SF_FTYPE_V4SF
},
26200 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_pd256_pd
, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD
, UNKNOWN
, (int) V4DF_FTYPE_V2DF
},
26201 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v8si
, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256
, UNKNOWN
, (int) V4SI_FTYPE_V8SI
},
26202 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v8sf
, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256
, UNKNOWN
, (int) V4SF_FTYPE_V8SF
},
26203 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v4df
, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256
, UNKNOWN
, (int) V2DF_FTYPE_V4DF
},
26205 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD
, EQ
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
26206 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD
, LTU
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
26207 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD
, GTU
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
26208 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS
, EQ
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
26209 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS
, LTU
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
26210 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS
, GTU
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
26211 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256
, EQ
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
26212 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256
, LTU
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
26213 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256
, GTU
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
26214 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256
, EQ
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
26215 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256
, LTU
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
26216 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256
, GTU
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
26217 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256
, EQ
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
26218 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256
, LTU
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
26219 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256
, GTU
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
26221 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movmskpd256
, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256
, UNKNOWN
, (int) INT_FTYPE_V4DF
},
26222 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movmskps256
, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256
, UNKNOWN
, (int) INT_FTYPE_V8SF
},
26224 { OPTION_MASK_ISA_AVX
, CODE_FOR_copysignv8sf3
, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26225 { OPTION_MASK_ISA_AVX
, CODE_FOR_copysignv4df3
, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26228 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_mpsadbw
, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI_INT
},
26229 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv32qi2
, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI
},
26230 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv16hi2
, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI
},
26231 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv8si2
, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI
},
26232 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packssdw
, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256
, UNKNOWN
, (int) V16HI_FTYPE_V8SI_V8SI
},
26233 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packsswb
, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256
, UNKNOWN
, (int) V32QI_FTYPE_V16HI_V16HI
},
26234 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packusdw
, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256
, UNKNOWN
, (int) V16HI_FTYPE_V8SI_V8SI
},
26235 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packuswb
, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256
, UNKNOWN
, (int) V32QI_FTYPE_V16HI_V16HI
},
26236 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv32qi3
, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26237 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv16hi3
, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26238 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv8si3
, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26239 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv4di3
, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26240 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ssaddv32qi3
, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26241 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ssaddv16hi3
, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26242 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_usaddv32qi3
, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26243 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_usaddv16hi3
, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26244 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_palignrv2ti
, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT
},
26245 { OPTION_MASK_ISA_AVX2
, CODE_FOR_andv4di3
, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26246 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_andnotv4di3
, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26247 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_uavgv32qi3
, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26248 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_uavgv16hi3
, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26249 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblendvb
, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI_V32QI
},
26250 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblendw
, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI_INT
},
26251 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv32qi3
, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26252 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv16hi3
, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26253 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv8si3
, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26254 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv4di3
, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26255 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv32qi3
, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26256 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv16hi3
, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26257 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv8si3
, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26258 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv4di3
, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26259 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phaddwv16hi3
, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26260 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phadddv8si3
, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26261 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phaddswv16hi3
, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26262 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubwv16hi3
, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26263 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubdv8si3
, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26264 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubswv16hi3
, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26265 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmaddubsw256
, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V32QI_V32QI
},
26266 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmaddwd
, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256
, UNKNOWN
, (int) V8SI_FTYPE_V16HI_V16HI
},
26267 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv32qi3
, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26268 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv16hi3
, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26269 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv8si3
, "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26270 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv32qi3
, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26271 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv16hi3
, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26272 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv8si3
, "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26273 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv32qi3
, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26274 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv16hi3
, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26275 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv8si3
, "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26276 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv32qi3
, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26277 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv16hi3
, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26278 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv8si3
, "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26279 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmovmskb
, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256
, UNKNOWN
, (int) INT_FTYPE_V32QI
},
26280 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv16qiv16hi2
, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16QI
},
26281 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv8qiv8si2
, "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256
, UNKNOWN
, (int) V8SI_FTYPE_V16QI
},
26282 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4qiv4di2
, "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V16QI
},
26283 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv8hiv8si2
, "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256
, UNKNOWN
, (int) V8SI_FTYPE_V8HI
},
26284 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4hiv4di2
, "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8HI
},
26285 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4siv4di2
, "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4SI
},
26286 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv16qiv16hi2
, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16QI
},
26287 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv8qiv8si2
, "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256
, UNKNOWN
, (int) V8SI_FTYPE_V16QI
},
26288 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4qiv4di2
, "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V16QI
},
26289 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv8hiv8si2
, "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256
, UNKNOWN
, (int) V8SI_FTYPE_V8HI
},
26290 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4hiv4di2
, "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8HI
},
26291 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4siv4di2
, "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4SI
},
26292 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_mulv4siv4di3
, "__builtin_ia32_pmuldq256" , IX86_BUILTIN_PMULDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8SI_V8SI
},
26293 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_umulhrswv16hi3
, "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26294 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umulv16hi3_highpart
, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26295 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smulv16hi3_highpart
, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26296 { OPTION_MASK_ISA_AVX2
, CODE_FOR_mulv16hi3
, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26297 { OPTION_MASK_ISA_AVX2
, CODE_FOR_mulv8si3
, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26298 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_umulv4siv4di3
, "__builtin_ia32_pmuludq256" , IX86_BUILTIN_PMULUDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8SI_V8SI
},
26299 { OPTION_MASK_ISA_AVX2
, CODE_FOR_iorv4di3
, "__builtin_ia32_por256", IX86_BUILTIN_POR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26300 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psadbw
, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256
, UNKNOWN
, (int) V16HI_FTYPE_V32QI_V32QI
},
26301 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufbv32qi3
, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26302 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufdv3
, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_INT
},
26303 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufhwv3
, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_INT
},
26304 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshuflwv3
, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_INT
},
26305 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv32qi3
, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26306 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv16hi3
, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26307 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv8si3
, "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26308 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlv2ti3
, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_CONVERT
},
26309 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv16hi3
, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
26310 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv16hi3
, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
26311 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv8si3
, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
26312 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv8si3
, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
26313 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv4di3
, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_COUNT
},
26314 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv4di3
, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_COUNT
},
26315 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv16hi3
, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
26316 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv16hi3
, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
26317 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv8si3
, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
26318 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv8si3
, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
26319 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrv2ti3
, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_CONVERT
},
26320 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv16hi3
, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
26321 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv16hi3
, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
26322 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv8si3
, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
26323 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv8si3
, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
26324 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv4di3
, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_COUNT
},
26325 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv4di3
, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_COUNT
},
26326 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv32qi3
, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26327 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv16hi3
, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26328 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv8si3
, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26329 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv4di3
, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26330 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sssubv32qi3
, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26331 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sssubv16hi3
, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26332 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ussubv32qi3
, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26333 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ussubv16hi3
, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26334 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv32qi
, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26335 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv16hi
, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26336 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv8si
, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26337 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv4di
, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26338 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv32qi
, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26339 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv16hi
, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26340 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv8si
, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26341 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv4di
, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26342 { OPTION_MASK_ISA_AVX2
, CODE_FOR_xorv4di3
, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26343 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv4sf
, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26344 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv8sf
, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256
, UNKNOWN
, (int) V8SF_FTYPE_V4SF
},
26345 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv4df
, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256
, UNKNOWN
, (int) V4DF_FTYPE_V2DF
},
26346 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vbroadcasti128_v4di
, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256
, UNKNOWN
, (int) V4DI_FTYPE_V2DI
},
26347 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblenddv4si
, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_INT
},
26348 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblenddv8si
, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI_INT
},
26349 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv32qi
, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256
, UNKNOWN
, (int) V32QI_FTYPE_V16QI
},
26350 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv16hi
, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256
, UNKNOWN
, (int) V16HI_FTYPE_V8HI
},
26351 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv8si
, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256
, UNKNOWN
, (int) V8SI_FTYPE_V4SI
},
26352 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv4di
, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256
, UNKNOWN
, (int) V4DI_FTYPE_V2DI
},
26353 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv16qi
, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI
},
26354 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv8hi
, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
26355 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv4si
, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI
},
26356 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv2di
, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
26357 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permvarv8si
, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26358 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv4df
, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
26359 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permvarv8sf
, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26360 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv4di
, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT
},
26361 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv2ti
, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI_INT
},
26362 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_extracti128
, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256
, UNKNOWN
, (int) V2DI_FTYPE_V4DI_INT
},
26363 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_inserti128
, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_INT
},
26364 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshlvv4di
, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26365 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshlvv2di
, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26366 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshlvv8si
, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26367 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshlvv4si
, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26368 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashrvv8si
, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26369 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashrvv4si
, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26370 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv4di
, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26371 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv2di
, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26372 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv8si
, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26373 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv4si
, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26375 { OPTION_MASK_ISA_LZCNT
, CODE_FOR_clzhi2_lzcnt
, "__builtin_clzs", IX86_BUILTIN_CLZS
, UNKNOWN
, (int) UINT16_FTYPE_UINT16
},
26378 { OPTION_MASK_ISA_BMI
, CODE_FOR_bmi_bextr_si
, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
26379 { OPTION_MASK_ISA_BMI
, CODE_FOR_bmi_bextr_di
, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
26380 { OPTION_MASK_ISA_BMI
, CODE_FOR_ctzhi2
, "__builtin_ctzs", IX86_BUILTIN_CTZS
, UNKNOWN
, (int) UINT16_FTYPE_UINT16
},
26383 { OPTION_MASK_ISA_TBM
, CODE_FOR_tbm_bextri_si
, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
26384 { OPTION_MASK_ISA_TBM
, CODE_FOR_tbm_bextri_di
, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
26387 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtph2ps
, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS
, UNKNOWN
, (int) V4SF_FTYPE_V8HI
},
26388 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtph2ps256
, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8HI
},
26389 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtps2ph
, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH
, UNKNOWN
, (int) V8HI_FTYPE_V4SF_INT
},
26390 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtps2ph256
, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256
, UNKNOWN
, (int) V8HI_FTYPE_V8SF_INT
},
26393 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_bzhi_si3
, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
26394 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_bzhi_di3
, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
26395 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pdep_si3
, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
26396 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pdep_di3
, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
26397 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pext_si3
, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
26398 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pext_di3
, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
26401 /* FMA4 and XOP. */
26402 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
26403 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
26404 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
26405 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
26406 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
26407 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
26408 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
26409 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
26410 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
26411 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
26412 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
26413 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
26414 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
26415 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
26416 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
26417 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
26418 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
26419 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
26420 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
26421 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
26422 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
26423 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
26424 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
26425 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
26426 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
26427 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
26428 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
26429 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
26430 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
26431 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
26432 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
26433 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
26434 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
26435 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
26436 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
26437 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
26438 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
26439 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
26440 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
26441 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
26442 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
26443 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
26444 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
26445 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
26446 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
26447 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
26448 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
26449 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
26450 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
26451 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
26452 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
26453 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
26455 static const struct builtin_description bdesc_multi_arg
[] =
26457 { OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_vmfmadd_v4sf
,
26458 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS
,
26459 UNKNOWN
, (int)MULTI_ARG_3_SF
},
26460 { OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_vmfmadd_v2df
,
26461 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD
,
26462 UNKNOWN
, (int)MULTI_ARG_3_DF
},
26464 { OPTION_MASK_ISA_FMA
, CODE_FOR_fmai_vmfmadd_v4sf
,
26465 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3
,
26466 UNKNOWN
, (int)MULTI_ARG_3_SF
},
26467 { OPTION_MASK_ISA_FMA
, CODE_FOR_fmai_vmfmadd_v2df
,
26468 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3
,
26469 UNKNOWN
, (int)MULTI_ARG_3_DF
},
26471 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v4sf
,
26472 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS
,
26473 UNKNOWN
, (int)MULTI_ARG_3_SF
},
26474 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v2df
,
26475 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD
,
26476 UNKNOWN
, (int)MULTI_ARG_3_DF
},
26477 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v8sf
,
26478 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256
,
26479 UNKNOWN
, (int)MULTI_ARG_3_SF2
},
26480 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v4df
,
26481 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256
,
26482 UNKNOWN
, (int)MULTI_ARG_3_DF2
},
26484 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v4sf
,
26485 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS
,
26486 UNKNOWN
, (int)MULTI_ARG_3_SF
},
26487 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v2df
,
26488 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD
,
26489 UNKNOWN
, (int)MULTI_ARG_3_DF
},
26490 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v8sf
,
26491 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256
,
26492 UNKNOWN
, (int)MULTI_ARG_3_SF2
},
26493 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v4df
,
26494 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256
,
26495 UNKNOWN
, (int)MULTI_ARG_3_DF2
},
26497 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2di
, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV
, UNKNOWN
, (int)MULTI_ARG_3_DI
},
26498 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2di
, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI
, UNKNOWN
, (int)MULTI_ARG_3_DI
},
26499 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4si
, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
26500 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8hi
, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
26501 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v16qi
, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI
,UNKNOWN
, (int)MULTI_ARG_3_QI
},
26502 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2df
, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF
, UNKNOWN
, (int)MULTI_ARG_3_DF
},
26503 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4sf
, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF
, UNKNOWN
, (int)MULTI_ARG_3_SF
},
26505 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4di256
, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256
, UNKNOWN
, (int)MULTI_ARG_3_DI2
},
26506 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4di256
, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256
, UNKNOWN
, (int)MULTI_ARG_3_DI2
},
26507 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8si256
, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256
, UNKNOWN
, (int)MULTI_ARG_3_SI2
},
26508 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v16hi256
, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256
, UNKNOWN
, (int)MULTI_ARG_3_HI2
},
26509 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v32qi256
, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256
, UNKNOWN
, (int)MULTI_ARG_3_QI2
},
26510 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4df256
, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256
, UNKNOWN
, (int)MULTI_ARG_3_DF2
},
26511 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8sf256
, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256
, UNKNOWN
, (int)MULTI_ARG_3_SF2
},
26513 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pperm
, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM
, UNKNOWN
, (int)MULTI_ARG_3_QI
},
26515 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssww
, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
26516 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsww
, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
26517 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsswd
, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
26518 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacswd
, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
26519 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdd
, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
26520 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdd
, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
26521 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdql
, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
26522 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdqh
, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
26523 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdql
, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
26524 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdqh
, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
26525 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmadcsswd
, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
26526 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmadcswd
, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
26528 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv2di3
, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
26529 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv4si3
, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
26530 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv8hi3
, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
26531 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv16qi3
, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
26532 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv2di3
, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM
, UNKNOWN
, (int)MULTI_ARG_2_DI_IMM
},
26533 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv4si3
, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM
, UNKNOWN
, (int)MULTI_ARG_2_SI_IMM
},
26534 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv8hi3
, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM
, UNKNOWN
, (int)MULTI_ARG_2_HI_IMM
},
26535 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv16qi3
, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM
, UNKNOWN
, (int)MULTI_ARG_2_QI_IMM
},
26536 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_ashlv2di3
, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
26537 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_ashlv4si3
, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
26538 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_ashlv8hi3
, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
26539 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_ashlv16qi3
, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
26540 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_lshlv2di3
, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
26541 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_lshlv4si3
, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
26542 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_lshlv8hi3
, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
26543 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_lshlv16qi3
, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
26545 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vmfrczv4sf2
, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS
, UNKNOWN
, (int)MULTI_ARG_2_SF
},
26546 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vmfrczv2df2
, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD
, UNKNOWN
, (int)MULTI_ARG_2_DF
},
26547 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv4sf2
, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS
, UNKNOWN
, (int)MULTI_ARG_1_SF
},
26548 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv2df2
, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD
, UNKNOWN
, (int)MULTI_ARG_1_DF
},
26549 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv8sf2
, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256
, UNKNOWN
, (int)MULTI_ARG_1_SF2
},
26550 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv4df2
, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256
, UNKNOWN
, (int)MULTI_ARG_1_DF2
},
26552 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbw
, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
26553 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbd
, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD
, UNKNOWN
, (int)MULTI_ARG_1_QI_SI
},
26554 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbq
, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ
, UNKNOWN
, (int)MULTI_ARG_1_QI_DI
},
26555 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddwd
, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
26556 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddwq
, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ
, UNKNOWN
, (int)MULTI_ARG_1_HI_DI
},
26557 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadddq
, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
26558 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubw
, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
26559 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubd
, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD
, UNKNOWN
, (int)MULTI_ARG_1_QI_SI
},
26560 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubq
, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ
, UNKNOWN
, (int)MULTI_ARG_1_QI_DI
},
26561 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadduwd
, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
26562 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadduwq
, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ
, UNKNOWN
, (int)MULTI_ARG_1_HI_DI
},
26563 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddudq
, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
26564 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubbw
, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
26565 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubwd
, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
26566 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubdq
, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
26568 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB
, EQ
, (int)MULTI_ARG_2_QI_CMP
},
26569 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
26570 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
26571 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB
, LT
, (int)MULTI_ARG_2_QI_CMP
},
26572 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB
, LE
, (int)MULTI_ARG_2_QI_CMP
},
26573 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB
, GT
, (int)MULTI_ARG_2_QI_CMP
},
26574 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB
, GE
, (int)MULTI_ARG_2_QI_CMP
},
26576 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW
, EQ
, (int)MULTI_ARG_2_HI_CMP
},
26577 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
26578 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
26579 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW
, LT
, (int)MULTI_ARG_2_HI_CMP
},
26580 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW
, LE
, (int)MULTI_ARG_2_HI_CMP
},
26581 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW
, GT
, (int)MULTI_ARG_2_HI_CMP
},
26582 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW
, GE
, (int)MULTI_ARG_2_HI_CMP
},
26584 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD
, EQ
, (int)MULTI_ARG_2_SI_CMP
},
26585 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED
, NE
, (int)MULTI_ARG_2_SI_CMP
},
26586 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED
, NE
, (int)MULTI_ARG_2_SI_CMP
},
26587 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD
, LT
, (int)MULTI_ARG_2_SI_CMP
},
26588 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED
, LE
, (int)MULTI_ARG_2_SI_CMP
},
26589 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD
, GT
, (int)MULTI_ARG_2_SI_CMP
},
26590 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED
, GE
, (int)MULTI_ARG_2_SI_CMP
},
26592 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ
, EQ
, (int)MULTI_ARG_2_DI_CMP
},
26593 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
26594 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
26595 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ
, LT
, (int)MULTI_ARG_2_DI_CMP
},
26596 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ
, LE
, (int)MULTI_ARG_2_DI_CMP
},
26597 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ
, GT
, (int)MULTI_ARG_2_DI_CMP
},
26598 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ
, GE
, (int)MULTI_ARG_2_DI_CMP
},
26600 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB
, EQ
, (int)MULTI_ARG_2_QI_CMP
},
26601 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
26602 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
26603 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB
, LTU
, (int)MULTI_ARG_2_QI_CMP
},
26604 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB
, LEU
, (int)MULTI_ARG_2_QI_CMP
},
26605 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB
, GTU
, (int)MULTI_ARG_2_QI_CMP
},
26606 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB
, GEU
, (int)MULTI_ARG_2_QI_CMP
},
26608 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW
, EQ
, (int)MULTI_ARG_2_HI_CMP
},
26609 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
26610 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
26611 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW
, LTU
, (int)MULTI_ARG_2_HI_CMP
},
26612 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW
, LEU
, (int)MULTI_ARG_2_HI_CMP
},
26613 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW
, GTU
, (int)MULTI_ARG_2_HI_CMP
},
26614 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW
, GEU
, (int)MULTI_ARG_2_HI_CMP
},
26616 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD
, EQ
, (int)MULTI_ARG_2_SI_CMP
},
26617 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD
, NE
, (int)MULTI_ARG_2_SI_CMP
},
26618 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD
, NE
, (int)MULTI_ARG_2_SI_CMP
},
26619 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD
, LTU
, (int)MULTI_ARG_2_SI_CMP
},
26620 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD
, LEU
, (int)MULTI_ARG_2_SI_CMP
},
26621 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD
, GTU
, (int)MULTI_ARG_2_SI_CMP
},
26622 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD
, GEU
, (int)MULTI_ARG_2_SI_CMP
},
26624 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ
, EQ
, (int)MULTI_ARG_2_DI_CMP
},
26625 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
26626 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
26627 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ
, LTU
, (int)MULTI_ARG_2_DI_CMP
},
26628 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ
, LEU
, (int)MULTI_ARG_2_DI_CMP
},
26629 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ
, GTU
, (int)MULTI_ARG_2_DI_CMP
},
26630 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ
, GEU
, (int)MULTI_ARG_2_DI_CMP
},
26632 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_QI_TF
},
26633 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_HI_TF
},
26634 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_SI_TF
},
26635 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_DI_TF
},
26636 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_QI_TF
},
26637 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_HI_TF
},
26638 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_SI_TF
},
26639 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_DI_TF
},
26641 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_QI_TF
},
26642 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_HI_TF
},
26643 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_SI_TF
},
26644 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_DI_TF
},
26645 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_QI_TF
},
26646 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_HI_TF
},
26647 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_SI_TF
},
26648 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_DI_TF
},
26650 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v2df3
, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD
, UNKNOWN
, (int)MULTI_ARG_4_DF2_DI_I
},
26651 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v4sf3
, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS
, UNKNOWN
, (int)MULTI_ARG_4_SF2_SI_I
},
26652 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v4df3
, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256
, UNKNOWN
, (int)MULTI_ARG_4_DF2_DI_I1
},
26653 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v8sf3
, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256
, UNKNOWN
, (int)MULTI_ARG_4_SF2_SI_I1
},
26657 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
26658 in the current target ISA to allow the user to compile particular modules
26659 with different target specific options that differ from the command line
26662 ix86_init_mmx_sse_builtins (void)
26664 const struct builtin_description
* d
;
26665 enum ix86_builtin_func_type ftype
;
26668 /* Add all special builtins with variable number of operands. */
26669 for (i
= 0, d
= bdesc_special_args
;
26670 i
< ARRAY_SIZE (bdesc_special_args
);
26676 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
26677 def_builtin (d
->mask
, d
->name
, ftype
, d
->code
);
26680 /* Add all builtins with variable number of operands. */
26681 for (i
= 0, d
= bdesc_args
;
26682 i
< ARRAY_SIZE (bdesc_args
);
26688 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
26689 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
26692 /* pcmpestr[im] insns. */
26693 for (i
= 0, d
= bdesc_pcmpestr
;
26694 i
< ARRAY_SIZE (bdesc_pcmpestr
);
26697 if (d
->code
== IX86_BUILTIN_PCMPESTRM128
)
26698 ftype
= V16QI_FTYPE_V16QI_INT_V16QI_INT_INT
;
26700 ftype
= INT_FTYPE_V16QI_INT_V16QI_INT_INT
;
26701 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
26704 /* pcmpistr[im] insns. */
26705 for (i
= 0, d
= bdesc_pcmpistr
;
26706 i
< ARRAY_SIZE (bdesc_pcmpistr
);
26709 if (d
->code
== IX86_BUILTIN_PCMPISTRM128
)
26710 ftype
= V16QI_FTYPE_V16QI_V16QI_INT
;
26712 ftype
= INT_FTYPE_V16QI_V16QI_INT
;
26713 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
26716 /* comi/ucomi insns. */
26717 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
26719 if (d
->mask
== OPTION_MASK_ISA_SSE2
)
26720 ftype
= INT_FTYPE_V2DF_V2DF
;
26722 ftype
= INT_FTYPE_V4SF_V4SF
;
26723 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
26727 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_ldmxcsr",
26728 VOID_FTYPE_UNSIGNED
, IX86_BUILTIN_LDMXCSR
);
26729 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_stmxcsr",
26730 UNSIGNED_FTYPE_VOID
, IX86_BUILTIN_STMXCSR
);
26732 /* SSE or 3DNow!A */
26733 def_builtin (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
26734 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR
,
26735 IX86_BUILTIN_MASKMOVQ
);
26738 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_maskmovdqu",
26739 VOID_FTYPE_V16QI_V16QI_PCHAR
, IX86_BUILTIN_MASKMOVDQU
);
26741 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_clflush",
26742 VOID_FTYPE_PCVOID
, IX86_BUILTIN_CLFLUSH
);
26743 x86_mfence
= def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_mfence",
26744 VOID_FTYPE_VOID
, IX86_BUILTIN_MFENCE
);
26747 def_builtin (OPTION_MASK_ISA_SSE3
, "__builtin_ia32_monitor",
26748 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED
, IX86_BUILTIN_MONITOR
);
26749 def_builtin (OPTION_MASK_ISA_SSE3
, "__builtin_ia32_mwait",
26750 VOID_FTYPE_UNSIGNED_UNSIGNED
, IX86_BUILTIN_MWAIT
);
26753 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesenc128",
26754 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESENC128
);
26755 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesenclast128",
26756 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESENCLAST128
);
26757 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesdec128",
26758 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESDEC128
);
26759 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesdeclast128",
26760 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESDECLAST128
);
26761 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesimc128",
26762 V2DI_FTYPE_V2DI
, IX86_BUILTIN_AESIMC128
);
26763 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aeskeygenassist128",
26764 V2DI_FTYPE_V2DI_INT
, IX86_BUILTIN_AESKEYGENASSIST128
);
26767 def_builtin_const (OPTION_MASK_ISA_PCLMUL
, "__builtin_ia32_pclmulqdq128",
26768 V2DI_FTYPE_V2DI_V2DI_INT
, IX86_BUILTIN_PCLMULQDQ128
);
26771 def_builtin (OPTION_MASK_ISA_RDRND
, "__builtin_ia32_rdrand16_step",
26772 INT_FTYPE_PUSHORT
, IX86_BUILTIN_RDRAND16_STEP
);
26773 def_builtin (OPTION_MASK_ISA_RDRND
, "__builtin_ia32_rdrand32_step",
26774 INT_FTYPE_PUNSIGNED
, IX86_BUILTIN_RDRAND32_STEP
);
26775 def_builtin (OPTION_MASK_ISA_RDRND
| OPTION_MASK_ISA_64BIT
,
26776 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG
,
26777 IX86_BUILTIN_RDRAND64_STEP
);
26780 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv2df",
26781 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT
,
26782 IX86_BUILTIN_GATHERSIV2DF
);
26784 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4df",
26785 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT
,
26786 IX86_BUILTIN_GATHERSIV4DF
);
26788 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv2df",
26789 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT
,
26790 IX86_BUILTIN_GATHERDIV2DF
);
26792 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4df",
26793 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT
,
26794 IX86_BUILTIN_GATHERDIV4DF
);
26796 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4sf",
26797 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT
,
26798 IX86_BUILTIN_GATHERSIV4SF
);
26800 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv8sf",
26801 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT
,
26802 IX86_BUILTIN_GATHERSIV8SF
);
26804 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4sf",
26805 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT
,
26806 IX86_BUILTIN_GATHERDIV4SF
);
26808 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4sf256",
26809 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT
,
26810 IX86_BUILTIN_GATHERDIV8SF
);
26812 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv2di",
26813 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT
,
26814 IX86_BUILTIN_GATHERSIV2DI
);
26816 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4di",
26817 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT
,
26818 IX86_BUILTIN_GATHERSIV4DI
);
26820 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv2di",
26821 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT
,
26822 IX86_BUILTIN_GATHERDIV2DI
);
26824 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4di",
26825 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT
,
26826 IX86_BUILTIN_GATHERDIV4DI
);
26828 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4si",
26829 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT
,
26830 IX86_BUILTIN_GATHERSIV4SI
);
26832 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv8si",
26833 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT
,
26834 IX86_BUILTIN_GATHERSIV8SI
);
26836 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4si",
26837 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT
,
26838 IX86_BUILTIN_GATHERDIV4SI
);
26840 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4si256",
26841 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT
,
26842 IX86_BUILTIN_GATHERDIV8SI
);
26844 /* MMX access to the vec_init patterns. */
26845 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v2si",
26846 V2SI_FTYPE_INT_INT
, IX86_BUILTIN_VEC_INIT_V2SI
);
26848 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v4hi",
26849 V4HI_FTYPE_HI_HI_HI_HI
,
26850 IX86_BUILTIN_VEC_INIT_V4HI
);
26852 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v8qi",
26853 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI
,
26854 IX86_BUILTIN_VEC_INIT_V8QI
);
26856 /* Access to the vec_extract patterns. */
26857 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v2df",
26858 DOUBLE_FTYPE_V2DF_INT
, IX86_BUILTIN_VEC_EXT_V2DF
);
26859 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v2di",
26860 DI_FTYPE_V2DI_INT
, IX86_BUILTIN_VEC_EXT_V2DI
);
26861 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_vec_ext_v4sf",
26862 FLOAT_FTYPE_V4SF_INT
, IX86_BUILTIN_VEC_EXT_V4SF
);
26863 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v4si",
26864 SI_FTYPE_V4SI_INT
, IX86_BUILTIN_VEC_EXT_V4SI
);
26865 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v8hi",
26866 HI_FTYPE_V8HI_INT
, IX86_BUILTIN_VEC_EXT_V8HI
);
26868 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
26869 "__builtin_ia32_vec_ext_v4hi",
26870 HI_FTYPE_V4HI_INT
, IX86_BUILTIN_VEC_EXT_V4HI
);
26872 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_ext_v2si",
26873 SI_FTYPE_V2SI_INT
, IX86_BUILTIN_VEC_EXT_V2SI
);
26875 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v16qi",
26876 QI_FTYPE_V16QI_INT
, IX86_BUILTIN_VEC_EXT_V16QI
);
26878 /* Access to the vec_set patterns. */
26879 def_builtin_const (OPTION_MASK_ISA_SSE4_1
| OPTION_MASK_ISA_64BIT
,
26880 "__builtin_ia32_vec_set_v2di",
26881 V2DI_FTYPE_V2DI_DI_INT
, IX86_BUILTIN_VEC_SET_V2DI
);
26883 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v4sf",
26884 V4SF_FTYPE_V4SF_FLOAT_INT
, IX86_BUILTIN_VEC_SET_V4SF
);
26886 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v4si",
26887 V4SI_FTYPE_V4SI_SI_INT
, IX86_BUILTIN_VEC_SET_V4SI
);
26889 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_set_v8hi",
26890 V8HI_FTYPE_V8HI_HI_INT
, IX86_BUILTIN_VEC_SET_V8HI
);
26892 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
26893 "__builtin_ia32_vec_set_v4hi",
26894 V4HI_FTYPE_V4HI_HI_INT
, IX86_BUILTIN_VEC_SET_V4HI
);
26896 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v16qi",
26897 V16QI_FTYPE_V16QI_QI_INT
, IX86_BUILTIN_VEC_SET_V16QI
);
26899 /* Add FMA4 multi-arg argument instructions */
26900 for (i
= 0, d
= bdesc_multi_arg
; i
< ARRAY_SIZE (bdesc_multi_arg
); i
++, d
++)
26905 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
26906 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
26910 /* Internal method for ix86_init_builtins. */
26913 ix86_init_builtins_va_builtins_abi (void)
26915 tree ms_va_ref
, sysv_va_ref
;
26916 tree fnvoid_va_end_ms
, fnvoid_va_end_sysv
;
26917 tree fnvoid_va_start_ms
, fnvoid_va_start_sysv
;
26918 tree fnvoid_va_copy_ms
, fnvoid_va_copy_sysv
;
26919 tree fnattr_ms
= NULL_TREE
, fnattr_sysv
= NULL_TREE
;
26923 fnattr_ms
= build_tree_list (get_identifier ("ms_abi"), NULL_TREE
);
26924 fnattr_sysv
= build_tree_list (get_identifier ("sysv_abi"), NULL_TREE
);
26925 ms_va_ref
= build_reference_type (ms_va_list_type_node
);
26927 build_pointer_type (TREE_TYPE (sysv_va_list_type_node
));
26930 build_function_type_list (void_type_node
, ms_va_ref
, NULL_TREE
);
26931 fnvoid_va_start_ms
=
26932 build_varargs_function_type_list (void_type_node
, ms_va_ref
, NULL_TREE
);
26933 fnvoid_va_end_sysv
=
26934 build_function_type_list (void_type_node
, sysv_va_ref
, NULL_TREE
);
26935 fnvoid_va_start_sysv
=
26936 build_varargs_function_type_list (void_type_node
, sysv_va_ref
,
26938 fnvoid_va_copy_ms
=
26939 build_function_type_list (void_type_node
, ms_va_ref
, ms_va_list_type_node
,
26941 fnvoid_va_copy_sysv
=
26942 build_function_type_list (void_type_node
, sysv_va_ref
,
26943 sysv_va_ref
, NULL_TREE
);
26945 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms
,
26946 BUILT_IN_VA_START
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
26947 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms
,
26948 BUILT_IN_VA_END
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
26949 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms
,
26950 BUILT_IN_VA_COPY
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
26951 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv
,
26952 BUILT_IN_VA_START
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
26953 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv
,
26954 BUILT_IN_VA_END
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
26955 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv
,
26956 BUILT_IN_VA_COPY
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
26960 ix86_init_builtin_types (void)
26962 tree float128_type_node
, float80_type_node
;
26964 /* The __float80 type. */
26965 float80_type_node
= long_double_type_node
;
26966 if (TYPE_MODE (float80_type_node
) != XFmode
)
26968 /* The __float80 type. */
26969 float80_type_node
= make_node (REAL_TYPE
);
26971 TYPE_PRECISION (float80_type_node
) = 80;
26972 layout_type (float80_type_node
);
26974 lang_hooks
.types
.register_builtin_type (float80_type_node
, "__float80");
26976 /* The __float128 type. */
26977 float128_type_node
= make_node (REAL_TYPE
);
26978 TYPE_PRECISION (float128_type_node
) = 128;
26979 layout_type (float128_type_node
);
26980 lang_hooks
.types
.register_builtin_type (float128_type_node
, "__float128");
26982 /* This macro is built by i386-builtin-types.awk. */
26983 DEFINE_BUILTIN_PRIMITIVE_TYPES
;
26987 ix86_init_builtins (void)
26991 ix86_init_builtin_types ();
26993 /* TFmode support builtins. */
26994 def_builtin_const (0, "__builtin_infq",
26995 FLOAT128_FTYPE_VOID
, IX86_BUILTIN_INFQ
);
26996 def_builtin_const (0, "__builtin_huge_valq",
26997 FLOAT128_FTYPE_VOID
, IX86_BUILTIN_HUGE_VALQ
);
26999 /* We will expand them to normal call if SSE2 isn't available since
27000 they are used by libgcc. */
27001 t
= ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128
);
27002 t
= add_builtin_function ("__builtin_fabsq", t
, IX86_BUILTIN_FABSQ
,
27003 BUILT_IN_MD
, "__fabstf2", NULL_TREE
);
27004 TREE_READONLY (t
) = 1;
27005 ix86_builtins
[(int) IX86_BUILTIN_FABSQ
] = t
;
27007 t
= ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128
);
27008 t
= add_builtin_function ("__builtin_copysignq", t
, IX86_BUILTIN_COPYSIGNQ
,
27009 BUILT_IN_MD
, "__copysigntf3", NULL_TREE
);
27010 TREE_READONLY (t
) = 1;
27011 ix86_builtins
[(int) IX86_BUILTIN_COPYSIGNQ
] = t
;
27013 ix86_init_mmx_sse_builtins ();
27016 ix86_init_builtins_va_builtins_abi ();
27018 #ifdef SUBTARGET_INIT_BUILTINS
27019 SUBTARGET_INIT_BUILTINS
;
27023 /* Return the ix86 builtin for CODE. */
27026 ix86_builtin_decl (unsigned code
, bool initialize_p ATTRIBUTE_UNUSED
)
27028 if (code
>= IX86_BUILTIN_MAX
)
27029 return error_mark_node
;
27031 return ix86_builtins
[code
];
27034 /* Errors in the source file can cause expand_expr to return const0_rtx
27035 where we expect a vector. To avoid crashing, use one of the vector
27036 clear instructions. */
27038 safe_vector_operand (rtx x
, enum machine_mode mode
)
27040 if (x
== const0_rtx
)
27041 x
= CONST0_RTX (mode
);
27045 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
27048 ix86_expand_binop_builtin (enum insn_code icode
, tree exp
, rtx target
)
27051 tree arg0
= CALL_EXPR_ARG (exp
, 0);
27052 tree arg1
= CALL_EXPR_ARG (exp
, 1);
27053 rtx op0
= expand_normal (arg0
);
27054 rtx op1
= expand_normal (arg1
);
27055 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
27056 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
27057 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
27059 if (VECTOR_MODE_P (mode0
))
27060 op0
= safe_vector_operand (op0
, mode0
);
27061 if (VECTOR_MODE_P (mode1
))
27062 op1
= safe_vector_operand (op1
, mode1
);
27064 if (optimize
|| !target
27065 || GET_MODE (target
) != tmode
27066 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
27067 target
= gen_reg_rtx (tmode
);
27069 if (GET_MODE (op1
) == SImode
&& mode1
== TImode
)
27071 rtx x
= gen_reg_rtx (V4SImode
);
27072 emit_insn (gen_sse2_loadd (x
, op1
));
27073 op1
= gen_lowpart (TImode
, x
);
27076 if (!insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
27077 op0
= copy_to_mode_reg (mode0
, op0
);
27078 if (!insn_data
[icode
].operand
[2].predicate (op1
, mode1
))
27079 op1
= copy_to_mode_reg (mode1
, op1
);
27081 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
27090 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
27093 ix86_expand_multi_arg_builtin (enum insn_code icode
, tree exp
, rtx target
,
27094 enum ix86_builtin_func_type m_type
,
27095 enum rtx_code sub_code
)
27100 bool comparison_p
= false;
27102 bool last_arg_constant
= false;
27103 int num_memory
= 0;
27106 enum machine_mode mode
;
27109 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
27113 case MULTI_ARG_4_DF2_DI_I
:
27114 case MULTI_ARG_4_DF2_DI_I1
:
27115 case MULTI_ARG_4_SF2_SI_I
:
27116 case MULTI_ARG_4_SF2_SI_I1
:
27118 last_arg_constant
= true;
27121 case MULTI_ARG_3_SF
:
27122 case MULTI_ARG_3_DF
:
27123 case MULTI_ARG_3_SF2
:
27124 case MULTI_ARG_3_DF2
:
27125 case MULTI_ARG_3_DI
:
27126 case MULTI_ARG_3_SI
:
27127 case MULTI_ARG_3_SI_DI
:
27128 case MULTI_ARG_3_HI
:
27129 case MULTI_ARG_3_HI_SI
:
27130 case MULTI_ARG_3_QI
:
27131 case MULTI_ARG_3_DI2
:
27132 case MULTI_ARG_3_SI2
:
27133 case MULTI_ARG_3_HI2
:
27134 case MULTI_ARG_3_QI2
:
27138 case MULTI_ARG_2_SF
:
27139 case MULTI_ARG_2_DF
:
27140 case MULTI_ARG_2_DI
:
27141 case MULTI_ARG_2_SI
:
27142 case MULTI_ARG_2_HI
:
27143 case MULTI_ARG_2_QI
:
27147 case MULTI_ARG_2_DI_IMM
:
27148 case MULTI_ARG_2_SI_IMM
:
27149 case MULTI_ARG_2_HI_IMM
:
27150 case MULTI_ARG_2_QI_IMM
:
27152 last_arg_constant
= true;
27155 case MULTI_ARG_1_SF
:
27156 case MULTI_ARG_1_DF
:
27157 case MULTI_ARG_1_SF2
:
27158 case MULTI_ARG_1_DF2
:
27159 case MULTI_ARG_1_DI
:
27160 case MULTI_ARG_1_SI
:
27161 case MULTI_ARG_1_HI
:
27162 case MULTI_ARG_1_QI
:
27163 case MULTI_ARG_1_SI_DI
:
27164 case MULTI_ARG_1_HI_DI
:
27165 case MULTI_ARG_1_HI_SI
:
27166 case MULTI_ARG_1_QI_DI
:
27167 case MULTI_ARG_1_QI_SI
:
27168 case MULTI_ARG_1_QI_HI
:
27172 case MULTI_ARG_2_DI_CMP
:
27173 case MULTI_ARG_2_SI_CMP
:
27174 case MULTI_ARG_2_HI_CMP
:
27175 case MULTI_ARG_2_QI_CMP
:
27177 comparison_p
= true;
27180 case MULTI_ARG_2_SF_TF
:
27181 case MULTI_ARG_2_DF_TF
:
27182 case MULTI_ARG_2_DI_TF
:
27183 case MULTI_ARG_2_SI_TF
:
27184 case MULTI_ARG_2_HI_TF
:
27185 case MULTI_ARG_2_QI_TF
:
27191 gcc_unreachable ();
27194 if (optimize
|| !target
27195 || GET_MODE (target
) != tmode
27196 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
27197 target
= gen_reg_rtx (tmode
);
27199 gcc_assert (nargs
<= 4);
27201 for (i
= 0; i
< nargs
; i
++)
27203 tree arg
= CALL_EXPR_ARG (exp
, i
);
27204 rtx op
= expand_normal (arg
);
27205 int adjust
= (comparison_p
) ? 1 : 0;
27206 enum machine_mode mode
= insn_data
[icode
].operand
[i
+adjust
+1].mode
;
27208 if (last_arg_constant
&& i
== nargs
- 1)
27210 if (!insn_data
[icode
].operand
[i
+ 1].predicate (op
, mode
))
27212 enum insn_code new_icode
= icode
;
27215 case CODE_FOR_xop_vpermil2v2df3
:
27216 case CODE_FOR_xop_vpermil2v4sf3
:
27217 case CODE_FOR_xop_vpermil2v4df3
:
27218 case CODE_FOR_xop_vpermil2v8sf3
:
27219 error ("the last argument must be a 2-bit immediate");
27220 return gen_reg_rtx (tmode
);
27221 case CODE_FOR_xop_rotlv2di3
:
27222 new_icode
= CODE_FOR_rotlv2di3
;
27224 case CODE_FOR_xop_rotlv4si3
:
27225 new_icode
= CODE_FOR_rotlv4si3
;
27227 case CODE_FOR_xop_rotlv8hi3
:
27228 new_icode
= CODE_FOR_rotlv8hi3
;
27230 case CODE_FOR_xop_rotlv16qi3
:
27231 new_icode
= CODE_FOR_rotlv16qi3
;
27233 if (CONST_INT_P (op
))
27235 int mask
= GET_MODE_BITSIZE (GET_MODE_INNER (tmode
)) - 1;
27236 op
= GEN_INT (INTVAL (op
) & mask
);
27237 gcc_checking_assert
27238 (insn_data
[icode
].operand
[i
+ 1].predicate (op
, mode
));
27242 gcc_checking_assert
27244 && insn_data
[new_icode
].operand
[0].mode
== tmode
27245 && insn_data
[new_icode
].operand
[1].mode
== tmode
27246 && insn_data
[new_icode
].operand
[2].mode
== mode
27247 && insn_data
[new_icode
].operand
[0].predicate
27248 == insn_data
[icode
].operand
[0].predicate
27249 && insn_data
[new_icode
].operand
[1].predicate
27250 == insn_data
[icode
].operand
[1].predicate
);
27256 gcc_unreachable ();
27263 if (VECTOR_MODE_P (mode
))
27264 op
= safe_vector_operand (op
, mode
);
27266 /* If we aren't optimizing, only allow one memory operand to be
27268 if (memory_operand (op
, mode
))
27271 gcc_assert (GET_MODE (op
) == mode
|| GET_MODE (op
) == VOIDmode
);
27274 || !insn_data
[icode
].operand
[i
+adjust
+1].predicate (op
, mode
)
27276 op
= force_reg (mode
, op
);
27280 args
[i
].mode
= mode
;
27286 pat
= GEN_FCN (icode
) (target
, args
[0].op
);
27291 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
,
27292 GEN_INT ((int)sub_code
));
27293 else if (! comparison_p
)
27294 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
);
27297 rtx cmp_op
= gen_rtx_fmt_ee (sub_code
, GET_MODE (target
),
27301 pat
= GEN_FCN (icode
) (target
, cmp_op
, args
[0].op
, args
[1].op
);
27306 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
);
27310 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
, args
[3].op
);
27314 gcc_unreachable ();
27324 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
27325 insns with vec_merge. */
27328 ix86_expand_unop_vec_merge_builtin (enum insn_code icode
, tree exp
,
27332 tree arg0
= CALL_EXPR_ARG (exp
, 0);
27333 rtx op1
, op0
= expand_normal (arg0
);
27334 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
27335 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
27337 if (optimize
|| !target
27338 || GET_MODE (target
) != tmode
27339 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
27340 target
= gen_reg_rtx (tmode
);
27342 if (VECTOR_MODE_P (mode0
))
27343 op0
= safe_vector_operand (op0
, mode0
);
27345 if ((optimize
&& !register_operand (op0
, mode0
))
27346 || !insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
27347 op0
= copy_to_mode_reg (mode0
, op0
);
27350 if (!insn_data
[icode
].operand
[2].predicate (op1
, mode0
))
27351 op1
= copy_to_mode_reg (mode0
, op1
);
27353 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
27360 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
27363 ix86_expand_sse_compare (const struct builtin_description
*d
,
27364 tree exp
, rtx target
, bool swap
)
27367 tree arg0
= CALL_EXPR_ARG (exp
, 0);
27368 tree arg1
= CALL_EXPR_ARG (exp
, 1);
27369 rtx op0
= expand_normal (arg0
);
27370 rtx op1
= expand_normal (arg1
);
27372 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
27373 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
27374 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
27375 enum rtx_code comparison
= d
->comparison
;
27377 if (VECTOR_MODE_P (mode0
))
27378 op0
= safe_vector_operand (op0
, mode0
);
27379 if (VECTOR_MODE_P (mode1
))
27380 op1
= safe_vector_operand (op1
, mode1
);
27382 /* Swap operands if we have a comparison that isn't available in
27386 rtx tmp
= gen_reg_rtx (mode1
);
27387 emit_move_insn (tmp
, op1
);
27392 if (optimize
|| !target
27393 || GET_MODE (target
) != tmode
27394 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
27395 target
= gen_reg_rtx (tmode
);
27397 if ((optimize
&& !register_operand (op0
, mode0
))
27398 || !insn_data
[d
->icode
].operand
[1].predicate (op0
, mode0
))
27399 op0
= copy_to_mode_reg (mode0
, op0
);
27400 if ((optimize
&& !register_operand (op1
, mode1
))
27401 || !insn_data
[d
->icode
].operand
[2].predicate (op1
, mode1
))
27402 op1
= copy_to_mode_reg (mode1
, op1
);
27404 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
27405 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
27412 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
27415 ix86_expand_sse_comi (const struct builtin_description
*d
, tree exp
,
27419 tree arg0
= CALL_EXPR_ARG (exp
, 0);
27420 tree arg1
= CALL_EXPR_ARG (exp
, 1);
27421 rtx op0
= expand_normal (arg0
);
27422 rtx op1
= expand_normal (arg1
);
27423 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
27424 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
27425 enum rtx_code comparison
= d
->comparison
;
27427 if (VECTOR_MODE_P (mode0
))
27428 op0
= safe_vector_operand (op0
, mode0
);
27429 if (VECTOR_MODE_P (mode1
))
27430 op1
= safe_vector_operand (op1
, mode1
);
27432 /* Swap operands if we have a comparison that isn't available in
27434 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
27441 target
= gen_reg_rtx (SImode
);
27442 emit_move_insn (target
, const0_rtx
);
27443 target
= gen_rtx_SUBREG (QImode
, target
, 0);
27445 if ((optimize
&& !register_operand (op0
, mode0
))
27446 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
27447 op0
= copy_to_mode_reg (mode0
, op0
);
27448 if ((optimize
&& !register_operand (op1
, mode1
))
27449 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
27450 op1
= copy_to_mode_reg (mode1
, op1
);
27452 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
27456 emit_insn (gen_rtx_SET (VOIDmode
,
27457 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
27458 gen_rtx_fmt_ee (comparison
, QImode
,
27462 return SUBREG_REG (target
);
27465 /* Subroutine of ix86_expand_args_builtin to take care of round insns. */
27468 ix86_expand_sse_round (const struct builtin_description
*d
, tree exp
,
27472 tree arg0
= CALL_EXPR_ARG (exp
, 0);
27473 rtx op1
, op0
= expand_normal (arg0
);
27474 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
27475 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
27477 if (optimize
|| target
== 0
27478 || GET_MODE (target
) != tmode
27479 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
27480 target
= gen_reg_rtx (tmode
);
27482 if (VECTOR_MODE_P (mode0
))
27483 op0
= safe_vector_operand (op0
, mode0
);
27485 if ((optimize
&& !register_operand (op0
, mode0
))
27486 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
27487 op0
= copy_to_mode_reg (mode0
, op0
);
27489 op1
= GEN_INT (d
->comparison
);
27491 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
);
27498 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
27501 ix86_expand_sse_ptest (const struct builtin_description
*d
, tree exp
,
27505 tree arg0
= CALL_EXPR_ARG (exp
, 0);
27506 tree arg1
= CALL_EXPR_ARG (exp
, 1);
27507 rtx op0
= expand_normal (arg0
);
27508 rtx op1
= expand_normal (arg1
);
27509 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
27510 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
27511 enum rtx_code comparison
= d
->comparison
;
27513 if (VECTOR_MODE_P (mode0
))
27514 op0
= safe_vector_operand (op0
, mode0
);
27515 if (VECTOR_MODE_P (mode1
))
27516 op1
= safe_vector_operand (op1
, mode1
);
27518 target
= gen_reg_rtx (SImode
);
27519 emit_move_insn (target
, const0_rtx
);
27520 target
= gen_rtx_SUBREG (QImode
, target
, 0);
27522 if ((optimize
&& !register_operand (op0
, mode0
))
27523 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
27524 op0
= copy_to_mode_reg (mode0
, op0
);
27525 if ((optimize
&& !register_operand (op1
, mode1
))
27526 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
27527 op1
= copy_to_mode_reg (mode1
, op1
);
27529 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
27533 emit_insn (gen_rtx_SET (VOIDmode
,
27534 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
27535 gen_rtx_fmt_ee (comparison
, QImode
,
27539 return SUBREG_REG (target
);
27542 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
27545 ix86_expand_sse_pcmpestr (const struct builtin_description
*d
,
27546 tree exp
, rtx target
)
27549 tree arg0
= CALL_EXPR_ARG (exp
, 0);
27550 tree arg1
= CALL_EXPR_ARG (exp
, 1);
27551 tree arg2
= CALL_EXPR_ARG (exp
, 2);
27552 tree arg3
= CALL_EXPR_ARG (exp
, 3);
27553 tree arg4
= CALL_EXPR_ARG (exp
, 4);
27554 rtx scratch0
, scratch1
;
27555 rtx op0
= expand_normal (arg0
);
27556 rtx op1
= expand_normal (arg1
);
27557 rtx op2
= expand_normal (arg2
);
27558 rtx op3
= expand_normal (arg3
);
27559 rtx op4
= expand_normal (arg4
);
27560 enum machine_mode tmode0
, tmode1
, modev2
, modei3
, modev4
, modei5
, modeimm
;
27562 tmode0
= insn_data
[d
->icode
].operand
[0].mode
;
27563 tmode1
= insn_data
[d
->icode
].operand
[1].mode
;
27564 modev2
= insn_data
[d
->icode
].operand
[2].mode
;
27565 modei3
= insn_data
[d
->icode
].operand
[3].mode
;
27566 modev4
= insn_data
[d
->icode
].operand
[4].mode
;
27567 modei5
= insn_data
[d
->icode
].operand
[5].mode
;
27568 modeimm
= insn_data
[d
->icode
].operand
[6].mode
;
27570 if (VECTOR_MODE_P (modev2
))
27571 op0
= safe_vector_operand (op0
, modev2
);
27572 if (VECTOR_MODE_P (modev4
))
27573 op2
= safe_vector_operand (op2
, modev4
);
27575 if (!insn_data
[d
->icode
].operand
[2].predicate (op0
, modev2
))
27576 op0
= copy_to_mode_reg (modev2
, op0
);
27577 if (!insn_data
[d
->icode
].operand
[3].predicate (op1
, modei3
))
27578 op1
= copy_to_mode_reg (modei3
, op1
);
27579 if ((optimize
&& !register_operand (op2
, modev4
))
27580 || !insn_data
[d
->icode
].operand
[4].predicate (op2
, modev4
))
27581 op2
= copy_to_mode_reg (modev4
, op2
);
27582 if (!insn_data
[d
->icode
].operand
[5].predicate (op3
, modei5
))
27583 op3
= copy_to_mode_reg (modei5
, op3
);
27585 if (!insn_data
[d
->icode
].operand
[6].predicate (op4
, modeimm
))
27587 error ("the fifth argument must be an 8-bit immediate");
27591 if (d
->code
== IX86_BUILTIN_PCMPESTRI128
)
27593 if (optimize
|| !target
27594 || GET_MODE (target
) != tmode0
27595 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode0
))
27596 target
= gen_reg_rtx (tmode0
);
27598 scratch1
= gen_reg_rtx (tmode1
);
27600 pat
= GEN_FCN (d
->icode
) (target
, scratch1
, op0
, op1
, op2
, op3
, op4
);
27602 else if (d
->code
== IX86_BUILTIN_PCMPESTRM128
)
27604 if (optimize
|| !target
27605 || GET_MODE (target
) != tmode1
27606 || !insn_data
[d
->icode
].operand
[1].predicate (target
, tmode1
))
27607 target
= gen_reg_rtx (tmode1
);
27609 scratch0
= gen_reg_rtx (tmode0
);
27611 pat
= GEN_FCN (d
->icode
) (scratch0
, target
, op0
, op1
, op2
, op3
, op4
);
27615 gcc_assert (d
->flag
);
27617 scratch0
= gen_reg_rtx (tmode0
);
27618 scratch1
= gen_reg_rtx (tmode1
);
27620 pat
= GEN_FCN (d
->icode
) (scratch0
, scratch1
, op0
, op1
, op2
, op3
, op4
);
27630 target
= gen_reg_rtx (SImode
);
27631 emit_move_insn (target
, const0_rtx
);
27632 target
= gen_rtx_SUBREG (QImode
, target
, 0);
27635 (gen_rtx_SET (VOIDmode
, gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
27636 gen_rtx_fmt_ee (EQ
, QImode
,
27637 gen_rtx_REG ((enum machine_mode
) d
->flag
,
27640 return SUBREG_REG (target
);
27647 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
27650 ix86_expand_sse_pcmpistr (const struct builtin_description
*d
,
27651 tree exp
, rtx target
)
27654 tree arg0
= CALL_EXPR_ARG (exp
, 0);
27655 tree arg1
= CALL_EXPR_ARG (exp
, 1);
27656 tree arg2
= CALL_EXPR_ARG (exp
, 2);
27657 rtx scratch0
, scratch1
;
27658 rtx op0
= expand_normal (arg0
);
27659 rtx op1
= expand_normal (arg1
);
27660 rtx op2
= expand_normal (arg2
);
27661 enum machine_mode tmode0
, tmode1
, modev2
, modev3
, modeimm
;
27663 tmode0
= insn_data
[d
->icode
].operand
[0].mode
;
27664 tmode1
= insn_data
[d
->icode
].operand
[1].mode
;
27665 modev2
= insn_data
[d
->icode
].operand
[2].mode
;
27666 modev3
= insn_data
[d
->icode
].operand
[3].mode
;
27667 modeimm
= insn_data
[d
->icode
].operand
[4].mode
;
27669 if (VECTOR_MODE_P (modev2
))
27670 op0
= safe_vector_operand (op0
, modev2
);
27671 if (VECTOR_MODE_P (modev3
))
27672 op1
= safe_vector_operand (op1
, modev3
);
27674 if (!insn_data
[d
->icode
].operand
[2].predicate (op0
, modev2
))
27675 op0
= copy_to_mode_reg (modev2
, op0
);
27676 if ((optimize
&& !register_operand (op1
, modev3
))
27677 || !insn_data
[d
->icode
].operand
[3].predicate (op1
, modev3
))
27678 op1
= copy_to_mode_reg (modev3
, op1
);
27680 if (!insn_data
[d
->icode
].operand
[4].predicate (op2
, modeimm
))
27682 error ("the third argument must be an 8-bit immediate");
27686 if (d
->code
== IX86_BUILTIN_PCMPISTRI128
)
27688 if (optimize
|| !target
27689 || GET_MODE (target
) != tmode0
27690 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode0
))
27691 target
= gen_reg_rtx (tmode0
);
27693 scratch1
= gen_reg_rtx (tmode1
);
27695 pat
= GEN_FCN (d
->icode
) (target
, scratch1
, op0
, op1
, op2
);
27697 else if (d
->code
== IX86_BUILTIN_PCMPISTRM128
)
27699 if (optimize
|| !target
27700 || GET_MODE (target
) != tmode1
27701 || !insn_data
[d
->icode
].operand
[1].predicate (target
, tmode1
))
27702 target
= gen_reg_rtx (tmode1
);
27704 scratch0
= gen_reg_rtx (tmode0
);
27706 pat
= GEN_FCN (d
->icode
) (scratch0
, target
, op0
, op1
, op2
);
27710 gcc_assert (d
->flag
);
27712 scratch0
= gen_reg_rtx (tmode0
);
27713 scratch1
= gen_reg_rtx (tmode1
);
27715 pat
= GEN_FCN (d
->icode
) (scratch0
, scratch1
, op0
, op1
, op2
);
27725 target
= gen_reg_rtx (SImode
);
27726 emit_move_insn (target
, const0_rtx
);
27727 target
= gen_rtx_SUBREG (QImode
, target
, 0);
27730 (gen_rtx_SET (VOIDmode
, gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
27731 gen_rtx_fmt_ee (EQ
, QImode
,
27732 gen_rtx_REG ((enum machine_mode
) d
->flag
,
27735 return SUBREG_REG (target
);
27741 /* Subroutine of ix86_expand_builtin to take care of insns with
27742 variable number of operands. */
27745 ix86_expand_args_builtin (const struct builtin_description
*d
,
27746 tree exp
, rtx target
)
27748 rtx pat
, real_target
;
27749 unsigned int i
, nargs
;
27750 unsigned int nargs_constant
= 0;
27751 int num_memory
= 0;
27755 enum machine_mode mode
;
27757 bool last_arg_count
= false;
27758 enum insn_code icode
= d
->icode
;
27759 const struct insn_data_d
*insn_p
= &insn_data
[icode
];
27760 enum machine_mode tmode
= insn_p
->operand
[0].mode
;
27761 enum machine_mode rmode
= VOIDmode
;
27763 enum rtx_code comparison
= d
->comparison
;
27765 switch ((enum ix86_builtin_func_type
) d
->flag
)
27767 case V2DF_FTYPE_V2DF_ROUND
:
27768 case V4DF_FTYPE_V4DF_ROUND
:
27769 case V4SF_FTYPE_V4SF_ROUND
:
27770 case V8SF_FTYPE_V8SF_ROUND
:
27771 return ix86_expand_sse_round (d
, exp
, target
);
27772 case INT_FTYPE_V8SF_V8SF_PTEST
:
27773 case INT_FTYPE_V4DI_V4DI_PTEST
:
27774 case INT_FTYPE_V4DF_V4DF_PTEST
:
27775 case INT_FTYPE_V4SF_V4SF_PTEST
:
27776 case INT_FTYPE_V2DI_V2DI_PTEST
:
27777 case INT_FTYPE_V2DF_V2DF_PTEST
:
27778 return ix86_expand_sse_ptest (d
, exp
, target
);
27779 case FLOAT128_FTYPE_FLOAT128
:
27780 case FLOAT_FTYPE_FLOAT
:
27781 case INT_FTYPE_INT
:
27782 case UINT64_FTYPE_INT
:
27783 case UINT16_FTYPE_UINT16
:
27784 case INT64_FTYPE_INT64
:
27785 case INT64_FTYPE_V4SF
:
27786 case INT64_FTYPE_V2DF
:
27787 case INT_FTYPE_V16QI
:
27788 case INT_FTYPE_V8QI
:
27789 case INT_FTYPE_V8SF
:
27790 case INT_FTYPE_V4DF
:
27791 case INT_FTYPE_V4SF
:
27792 case INT_FTYPE_V2DF
:
27793 case INT_FTYPE_V32QI
:
27794 case V16QI_FTYPE_V16QI
:
27795 case V8SI_FTYPE_V8SF
:
27796 case V8SI_FTYPE_V4SI
:
27797 case V8HI_FTYPE_V8HI
:
27798 case V8HI_FTYPE_V16QI
:
27799 case V8QI_FTYPE_V8QI
:
27800 case V8SF_FTYPE_V8SF
:
27801 case V8SF_FTYPE_V8SI
:
27802 case V8SF_FTYPE_V4SF
:
27803 case V8SF_FTYPE_V8HI
:
27804 case V4SI_FTYPE_V4SI
:
27805 case V4SI_FTYPE_V16QI
:
27806 case V4SI_FTYPE_V4SF
:
27807 case V4SI_FTYPE_V8SI
:
27808 case V4SI_FTYPE_V8HI
:
27809 case V4SI_FTYPE_V4DF
:
27810 case V4SI_FTYPE_V2DF
:
27811 case V4HI_FTYPE_V4HI
:
27812 case V4DF_FTYPE_V4DF
:
27813 case V4DF_FTYPE_V4SI
:
27814 case V4DF_FTYPE_V4SF
:
27815 case V4DF_FTYPE_V2DF
:
27816 case V4SF_FTYPE_V4SF
:
27817 case V4SF_FTYPE_V4SI
:
27818 case V4SF_FTYPE_V8SF
:
27819 case V4SF_FTYPE_V4DF
:
27820 case V4SF_FTYPE_V8HI
:
27821 case V4SF_FTYPE_V2DF
:
27822 case V2DI_FTYPE_V2DI
:
27823 case V2DI_FTYPE_V16QI
:
27824 case V2DI_FTYPE_V8HI
:
27825 case V2DI_FTYPE_V4SI
:
27826 case V2DF_FTYPE_V2DF
:
27827 case V2DF_FTYPE_V4SI
:
27828 case V2DF_FTYPE_V4DF
:
27829 case V2DF_FTYPE_V4SF
:
27830 case V2DF_FTYPE_V2SI
:
27831 case V2SI_FTYPE_V2SI
:
27832 case V2SI_FTYPE_V4SF
:
27833 case V2SI_FTYPE_V2SF
:
27834 case V2SI_FTYPE_V2DF
:
27835 case V2SF_FTYPE_V2SF
:
27836 case V2SF_FTYPE_V2SI
:
27837 case V32QI_FTYPE_V32QI
:
27838 case V32QI_FTYPE_V16QI
:
27839 case V16HI_FTYPE_V16HI
:
27840 case V16HI_FTYPE_V8HI
:
27841 case V8SI_FTYPE_V8SI
:
27842 case V16HI_FTYPE_V16QI
:
27843 case V8SI_FTYPE_V16QI
:
27844 case V4DI_FTYPE_V16QI
:
27845 case V8SI_FTYPE_V8HI
:
27846 case V4DI_FTYPE_V8HI
:
27847 case V4DI_FTYPE_V4SI
:
27848 case V4DI_FTYPE_V2DI
:
27851 case V4SF_FTYPE_V4SF_VEC_MERGE
:
27852 case V2DF_FTYPE_V2DF_VEC_MERGE
:
27853 return ix86_expand_unop_vec_merge_builtin (icode
, exp
, target
);
27854 case FLOAT128_FTYPE_FLOAT128_FLOAT128
:
27855 case V16QI_FTYPE_V16QI_V16QI
:
27856 case V16QI_FTYPE_V8HI_V8HI
:
27857 case V8QI_FTYPE_V8QI_V8QI
:
27858 case V8QI_FTYPE_V4HI_V4HI
:
27859 case V8HI_FTYPE_V8HI_V8HI
:
27860 case V8HI_FTYPE_V16QI_V16QI
:
27861 case V8HI_FTYPE_V4SI_V4SI
:
27862 case V8SF_FTYPE_V8SF_V8SF
:
27863 case V8SF_FTYPE_V8SF_V8SI
:
27864 case V4SI_FTYPE_V4SI_V4SI
:
27865 case V4SI_FTYPE_V8HI_V8HI
:
27866 case V4SI_FTYPE_V4SF_V4SF
:
27867 case V4SI_FTYPE_V2DF_V2DF
:
27868 case V4HI_FTYPE_V4HI_V4HI
:
27869 case V4HI_FTYPE_V8QI_V8QI
:
27870 case V4HI_FTYPE_V2SI_V2SI
:
27871 case V4DF_FTYPE_V4DF_V4DF
:
27872 case V4DF_FTYPE_V4DF_V4DI
:
27873 case V4SF_FTYPE_V4SF_V4SF
:
27874 case V4SF_FTYPE_V4SF_V4SI
:
27875 case V4SF_FTYPE_V4SF_V2SI
:
27876 case V4SF_FTYPE_V4SF_V2DF
:
27877 case V4SF_FTYPE_V4SF_DI
:
27878 case V4SF_FTYPE_V4SF_SI
:
27879 case V2DI_FTYPE_V2DI_V2DI
:
27880 case V2DI_FTYPE_V16QI_V16QI
:
27881 case V2DI_FTYPE_V4SI_V4SI
:
27882 case V2DI_FTYPE_V2DI_V16QI
:
27883 case V2DI_FTYPE_V2DF_V2DF
:
27884 case V2SI_FTYPE_V2SI_V2SI
:
27885 case V2SI_FTYPE_V4HI_V4HI
:
27886 case V2SI_FTYPE_V2SF_V2SF
:
27887 case V2DF_FTYPE_V2DF_V2DF
:
27888 case V2DF_FTYPE_V2DF_V4SF
:
27889 case V2DF_FTYPE_V2DF_V2DI
:
27890 case V2DF_FTYPE_V2DF_DI
:
27891 case V2DF_FTYPE_V2DF_SI
:
27892 case V2SF_FTYPE_V2SF_V2SF
:
27893 case V1DI_FTYPE_V1DI_V1DI
:
27894 case V1DI_FTYPE_V8QI_V8QI
:
27895 case V1DI_FTYPE_V2SI_V2SI
:
27896 case V32QI_FTYPE_V16HI_V16HI
:
27897 case V16HI_FTYPE_V8SI_V8SI
:
27898 case V32QI_FTYPE_V32QI_V32QI
:
27899 case V16HI_FTYPE_V32QI_V32QI
:
27900 case V16HI_FTYPE_V16HI_V16HI
:
27901 case V8SI_FTYPE_V8SI_V8SI
:
27902 case V8SI_FTYPE_V16HI_V16HI
:
27903 case V4DI_FTYPE_V4DI_V4DI
:
27904 case V4DI_FTYPE_V8SI_V8SI
:
27905 if (comparison
== UNKNOWN
)
27906 return ix86_expand_binop_builtin (icode
, exp
, target
);
27909 case V4SF_FTYPE_V4SF_V4SF_SWAP
:
27910 case V2DF_FTYPE_V2DF_V2DF_SWAP
:
27911 gcc_assert (comparison
!= UNKNOWN
);
27915 case V16HI_FTYPE_V16HI_V8HI_COUNT
:
27916 case V16HI_FTYPE_V16HI_SI_COUNT
:
27917 case V8SI_FTYPE_V8SI_V4SI_COUNT
:
27918 case V8SI_FTYPE_V8SI_SI_COUNT
:
27919 case V4DI_FTYPE_V4DI_V2DI_COUNT
:
27920 case V4DI_FTYPE_V4DI_INT_COUNT
:
27921 case V8HI_FTYPE_V8HI_V8HI_COUNT
:
27922 case V8HI_FTYPE_V8HI_SI_COUNT
:
27923 case V4SI_FTYPE_V4SI_V4SI_COUNT
:
27924 case V4SI_FTYPE_V4SI_SI_COUNT
:
27925 case V4HI_FTYPE_V4HI_V4HI_COUNT
:
27926 case V4HI_FTYPE_V4HI_SI_COUNT
:
27927 case V2DI_FTYPE_V2DI_V2DI_COUNT
:
27928 case V2DI_FTYPE_V2DI_SI_COUNT
:
27929 case V2SI_FTYPE_V2SI_V2SI_COUNT
:
27930 case V2SI_FTYPE_V2SI_SI_COUNT
:
27931 case V1DI_FTYPE_V1DI_V1DI_COUNT
:
27932 case V1DI_FTYPE_V1DI_SI_COUNT
:
27934 last_arg_count
= true;
27936 case UINT64_FTYPE_UINT64_UINT64
:
27937 case UINT_FTYPE_UINT_UINT
:
27938 case UINT_FTYPE_UINT_USHORT
:
27939 case UINT_FTYPE_UINT_UCHAR
:
27940 case UINT16_FTYPE_UINT16_INT
:
27941 case UINT8_FTYPE_UINT8_INT
:
27944 case V2DI_FTYPE_V2DI_INT_CONVERT
:
27947 nargs_constant
= 1;
27949 case V4DI_FTYPE_V4DI_INT_CONVERT
:
27952 nargs_constant
= 1;
27954 case V8HI_FTYPE_V8HI_INT
:
27955 case V8HI_FTYPE_V8SF_INT
:
27956 case V8HI_FTYPE_V4SF_INT
:
27957 case V8SF_FTYPE_V8SF_INT
:
27958 case V4SI_FTYPE_V4SI_INT
:
27959 case V4SI_FTYPE_V8SI_INT
:
27960 case V4HI_FTYPE_V4HI_INT
:
27961 case V4DF_FTYPE_V4DF_INT
:
27962 case V4SF_FTYPE_V4SF_INT
:
27963 case V4SF_FTYPE_V8SF_INT
:
27964 case V2DI_FTYPE_V2DI_INT
:
27965 case V2DF_FTYPE_V2DF_INT
:
27966 case V2DF_FTYPE_V4DF_INT
:
27967 case V16HI_FTYPE_V16HI_INT
:
27968 case V8SI_FTYPE_V8SI_INT
:
27969 case V4DI_FTYPE_V4DI_INT
:
27970 case V2DI_FTYPE_V4DI_INT
:
27972 nargs_constant
= 1;
27974 case V16QI_FTYPE_V16QI_V16QI_V16QI
:
27975 case V8SF_FTYPE_V8SF_V8SF_V8SF
:
27976 case V4DF_FTYPE_V4DF_V4DF_V4DF
:
27977 case V4SF_FTYPE_V4SF_V4SF_V4SF
:
27978 case V2DF_FTYPE_V2DF_V2DF_V2DF
:
27979 case V32QI_FTYPE_V32QI_V32QI_V32QI
:
27982 case V32QI_FTYPE_V32QI_V32QI_INT
:
27983 case V16HI_FTYPE_V16HI_V16HI_INT
:
27984 case V16QI_FTYPE_V16QI_V16QI_INT
:
27985 case V4DI_FTYPE_V4DI_V4DI_INT
:
27986 case V8HI_FTYPE_V8HI_V8HI_INT
:
27987 case V8SI_FTYPE_V8SI_V8SI_INT
:
27988 case V8SI_FTYPE_V8SI_V4SI_INT
:
27989 case V8SF_FTYPE_V8SF_V8SF_INT
:
27990 case V8SF_FTYPE_V8SF_V4SF_INT
:
27991 case V4SI_FTYPE_V4SI_V4SI_INT
:
27992 case V4DF_FTYPE_V4DF_V4DF_INT
:
27993 case V4DF_FTYPE_V4DF_V2DF_INT
:
27994 case V4SF_FTYPE_V4SF_V4SF_INT
:
27995 case V2DI_FTYPE_V2DI_V2DI_INT
:
27996 case V4DI_FTYPE_V4DI_V2DI_INT
:
27997 case V2DF_FTYPE_V2DF_V2DF_INT
:
27999 nargs_constant
= 1;
28001 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT
:
28004 nargs_constant
= 1;
28006 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT
:
28009 nargs_constant
= 1;
28011 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT
:
28014 nargs_constant
= 1;
28016 case V2DI_FTYPE_V2DI_UINT_UINT
:
28018 nargs_constant
= 2;
28020 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT
:
28021 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT
:
28022 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT
:
28023 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT
:
28025 nargs_constant
= 1;
28027 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT
:
28029 nargs_constant
= 2;
28032 gcc_unreachable ();
28035 gcc_assert (nargs
<= ARRAY_SIZE (args
));
28037 if (comparison
!= UNKNOWN
)
28039 gcc_assert (nargs
== 2);
28040 return ix86_expand_sse_compare (d
, exp
, target
, swap
);
28043 if (rmode
== VOIDmode
|| rmode
== tmode
)
28047 || GET_MODE (target
) != tmode
28048 || !insn_p
->operand
[0].predicate (target
, tmode
))
28049 target
= gen_reg_rtx (tmode
);
28050 real_target
= target
;
28054 target
= gen_reg_rtx (rmode
);
28055 real_target
= simplify_gen_subreg (tmode
, target
, rmode
, 0);
28058 for (i
= 0; i
< nargs
; i
++)
28060 tree arg
= CALL_EXPR_ARG (exp
, i
);
28061 rtx op
= expand_normal (arg
);
28062 enum machine_mode mode
= insn_p
->operand
[i
+ 1].mode
;
28063 bool match
= insn_p
->operand
[i
+ 1].predicate (op
, mode
);
28065 if (last_arg_count
&& (i
+ 1) == nargs
)
28067 /* SIMD shift insns take either an 8-bit immediate or
28068 register as count. But builtin functions take int as
28069 count. If count doesn't match, we put it in register. */
28072 op
= simplify_gen_subreg (SImode
, op
, GET_MODE (op
), 0);
28073 if (!insn_p
->operand
[i
+ 1].predicate (op
, mode
))
28074 op
= copy_to_reg (op
);
28077 else if ((nargs
- i
) <= nargs_constant
)
28082 case CODE_FOR_avx2_inserti128
:
28083 case CODE_FOR_avx2_extracti128
:
28084 error ("the last argument must be an 1-bit immediate");
28087 case CODE_FOR_sse4_1_roundpd
:
28088 case CODE_FOR_sse4_1_roundps
:
28089 case CODE_FOR_sse4_1_roundsd
:
28090 case CODE_FOR_sse4_1_roundss
:
28091 case CODE_FOR_sse4_1_blendps
:
28092 case CODE_FOR_avx_blendpd256
:
28093 case CODE_FOR_avx_vpermilv4df
:
28094 case CODE_FOR_avx_roundpd256
:
28095 case CODE_FOR_avx_roundps256
:
28096 error ("the last argument must be a 4-bit immediate");
28099 case CODE_FOR_sse4_1_blendpd
:
28100 case CODE_FOR_avx_vpermilv2df
:
28101 case CODE_FOR_xop_vpermil2v2df3
:
28102 case CODE_FOR_xop_vpermil2v4sf3
:
28103 case CODE_FOR_xop_vpermil2v4df3
:
28104 case CODE_FOR_xop_vpermil2v8sf3
:
28105 error ("the last argument must be a 2-bit immediate");
28108 case CODE_FOR_avx_vextractf128v4df
:
28109 case CODE_FOR_avx_vextractf128v8sf
:
28110 case CODE_FOR_avx_vextractf128v8si
:
28111 case CODE_FOR_avx_vinsertf128v4df
:
28112 case CODE_FOR_avx_vinsertf128v8sf
:
28113 case CODE_FOR_avx_vinsertf128v8si
:
28114 error ("the last argument must be a 1-bit immediate");
28117 case CODE_FOR_avx_vmcmpv2df3
:
28118 case CODE_FOR_avx_vmcmpv4sf3
:
28119 case CODE_FOR_avx_cmpv2df3
:
28120 case CODE_FOR_avx_cmpv4sf3
:
28121 case CODE_FOR_avx_cmpv4df3
:
28122 case CODE_FOR_avx_cmpv8sf3
:
28123 error ("the last argument must be a 5-bit immediate");
28127 switch (nargs_constant
)
28130 if ((nargs
- i
) == nargs_constant
)
28132 error ("the next to last argument must be an 8-bit immediate");
28136 error ("the last argument must be an 8-bit immediate");
28139 gcc_unreachable ();
28146 if (VECTOR_MODE_P (mode
))
28147 op
= safe_vector_operand (op
, mode
);
28149 /* If we aren't optimizing, only allow one memory operand to
28151 if (memory_operand (op
, mode
))
28154 if (GET_MODE (op
) == mode
|| GET_MODE (op
) == VOIDmode
)
28156 if (optimize
|| !match
|| num_memory
> 1)
28157 op
= copy_to_mode_reg (mode
, op
);
28161 op
= copy_to_reg (op
);
28162 op
= simplify_gen_subreg (mode
, op
, GET_MODE (op
), 0);
28167 args
[i
].mode
= mode
;
28173 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
);
28176 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
);
28179 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
,
28183 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
,
28184 args
[2].op
, args
[3].op
);
28187 gcc_unreachable ();
28197 /* Subroutine of ix86_expand_builtin to take care of special insns
28198 with variable number of operands. */
28201 ix86_expand_special_args_builtin (const struct builtin_description
*d
,
28202 tree exp
, rtx target
)
28206 unsigned int i
, nargs
, arg_adjust
, memory
;
28210 enum machine_mode mode
;
28212 enum insn_code icode
= d
->icode
;
28213 bool last_arg_constant
= false;
28214 const struct insn_data_d
*insn_p
= &insn_data
[icode
];
28215 enum machine_mode tmode
= insn_p
->operand
[0].mode
;
28216 enum { load
, store
} klass
;
28218 switch ((enum ix86_builtin_func_type
) d
->flag
)
28220 case VOID_FTYPE_VOID
:
28221 if (icode
== CODE_FOR_avx_vzeroupper
)
28222 target
= GEN_INT (vzeroupper_intrinsic
);
28223 emit_insn (GEN_FCN (icode
) (target
));
28225 case VOID_FTYPE_UINT64
:
28226 case VOID_FTYPE_UNSIGNED
:
28231 case UINT64_FTYPE_VOID
:
28232 case UNSIGNED_FTYPE_VOID
:
28237 case UINT64_FTYPE_PUNSIGNED
:
28238 case V2DI_FTYPE_PV2DI
:
28239 case V4DI_FTYPE_PV4DI
:
28240 case V32QI_FTYPE_PCCHAR
:
28241 case V16QI_FTYPE_PCCHAR
:
28242 case V8SF_FTYPE_PCV4SF
:
28243 case V8SF_FTYPE_PCFLOAT
:
28244 case V4SF_FTYPE_PCFLOAT
:
28245 case V4DF_FTYPE_PCV2DF
:
28246 case V4DF_FTYPE_PCDOUBLE
:
28247 case V2DF_FTYPE_PCDOUBLE
:
28248 case VOID_FTYPE_PVOID
:
28253 case VOID_FTYPE_PV2SF_V4SF
:
28254 case VOID_FTYPE_PV4DI_V4DI
:
28255 case VOID_FTYPE_PV2DI_V2DI
:
28256 case VOID_FTYPE_PCHAR_V32QI
:
28257 case VOID_FTYPE_PCHAR_V16QI
:
28258 case VOID_FTYPE_PFLOAT_V8SF
:
28259 case VOID_FTYPE_PFLOAT_V4SF
:
28260 case VOID_FTYPE_PDOUBLE_V4DF
:
28261 case VOID_FTYPE_PDOUBLE_V2DF
:
28262 case VOID_FTYPE_PULONGLONG_ULONGLONG
:
28263 case VOID_FTYPE_PINT_INT
:
28266 /* Reserve memory operand for target. */
28267 memory
= ARRAY_SIZE (args
);
28269 case V4SF_FTYPE_V4SF_PCV2SF
:
28270 case V2DF_FTYPE_V2DF_PCDOUBLE
:
28275 case V8SF_FTYPE_PCV8SF_V8SI
:
28276 case V4DF_FTYPE_PCV4DF_V4DI
:
28277 case V4SF_FTYPE_PCV4SF_V4SI
:
28278 case V2DF_FTYPE_PCV2DF_V2DI
:
28279 case V8SI_FTYPE_PCV8SI_V8SI
:
28280 case V4DI_FTYPE_PCV4DI_V4DI
:
28281 case V4SI_FTYPE_PCV4SI_V4SI
:
28282 case V2DI_FTYPE_PCV2DI_V2DI
:
28287 case VOID_FTYPE_PV8SF_V8SI_V8SF
:
28288 case VOID_FTYPE_PV4DF_V4DI_V4DF
:
28289 case VOID_FTYPE_PV4SF_V4SI_V4SF
:
28290 case VOID_FTYPE_PV2DF_V2DI_V2DF
:
28291 case VOID_FTYPE_PV8SI_V8SI_V8SI
:
28292 case VOID_FTYPE_PV4DI_V4DI_V4DI
:
28293 case VOID_FTYPE_PV4SI_V4SI_V4SI
:
28294 case VOID_FTYPE_PV2DI_V2DI_V2DI
:
28297 /* Reserve memory operand for target. */
28298 memory
= ARRAY_SIZE (args
);
28300 case VOID_FTYPE_UINT_UINT_UINT
:
28301 case VOID_FTYPE_UINT64_UINT_UINT
:
28302 case UCHAR_FTYPE_UINT_UINT_UINT
:
28303 case UCHAR_FTYPE_UINT64_UINT_UINT
:
28306 memory
= ARRAY_SIZE (args
);
28307 last_arg_constant
= true;
28310 gcc_unreachable ();
28313 gcc_assert (nargs
<= ARRAY_SIZE (args
));
28315 if (klass
== store
)
28317 arg
= CALL_EXPR_ARG (exp
, 0);
28318 op
= expand_normal (arg
);
28319 gcc_assert (target
== 0);
28322 if (GET_MODE (op
) != Pmode
)
28323 op
= convert_to_mode (Pmode
, op
, 1);
28324 target
= gen_rtx_MEM (tmode
, force_reg (Pmode
, op
));
28327 target
= force_reg (tmode
, op
);
28335 || GET_MODE (target
) != tmode
28336 || !insn_p
->operand
[0].predicate (target
, tmode
))
28337 target
= gen_reg_rtx (tmode
);
28340 for (i
= 0; i
< nargs
; i
++)
28342 enum machine_mode mode
= insn_p
->operand
[i
+ 1].mode
;
28345 arg
= CALL_EXPR_ARG (exp
, i
+ arg_adjust
);
28346 op
= expand_normal (arg
);
28347 match
= insn_p
->operand
[i
+ 1].predicate (op
, mode
);
28349 if (last_arg_constant
&& (i
+ 1) == nargs
)
28353 if (icode
== CODE_FOR_lwp_lwpvalsi3
28354 || icode
== CODE_FOR_lwp_lwpinssi3
28355 || icode
== CODE_FOR_lwp_lwpvaldi3
28356 || icode
== CODE_FOR_lwp_lwpinsdi3
)
28357 error ("the last argument must be a 32-bit immediate");
28359 error ("the last argument must be an 8-bit immediate");
28367 /* This must be the memory operand. */
28368 if (GET_MODE (op
) != Pmode
)
28369 op
= convert_to_mode (Pmode
, op
, 1);
28370 op
= gen_rtx_MEM (mode
, force_reg (Pmode
, op
));
28371 gcc_assert (GET_MODE (op
) == mode
28372 || GET_MODE (op
) == VOIDmode
);
28376 /* This must be register. */
28377 if (VECTOR_MODE_P (mode
))
28378 op
= safe_vector_operand (op
, mode
);
28380 gcc_assert (GET_MODE (op
) == mode
28381 || GET_MODE (op
) == VOIDmode
);
28382 op
= copy_to_mode_reg (mode
, op
);
28387 args
[i
].mode
= mode
;
28393 pat
= GEN_FCN (icode
) (target
);
28396 pat
= GEN_FCN (icode
) (target
, args
[0].op
);
28399 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
);
28402 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
);
28405 gcc_unreachable ();
28411 return klass
== store
? 0 : target
;
28414 /* Return the integer constant in ARG. Constrain it to be in the range
28415 of the subparts of VEC_TYPE; issue an error if not. */
28418 get_element_number (tree vec_type
, tree arg
)
28420 unsigned HOST_WIDE_INT elt
, max
= TYPE_VECTOR_SUBPARTS (vec_type
) - 1;
28422 if (!host_integerp (arg
, 1)
28423 || (elt
= tree_low_cst (arg
, 1), elt
> max
))
28425 error ("selector must be an integer constant in the range 0..%wi", max
);
28432 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
28433 ix86_expand_vector_init. We DO have language-level syntax for this, in
28434 the form of (type){ init-list }. Except that since we can't place emms
28435 instructions from inside the compiler, we can't allow the use of MMX
28436 registers unless the user explicitly asks for it. So we do *not* define
28437 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
28438 we have builtins invoked by mmintrin.h that gives us license to emit
28439 these sorts of instructions. */
28442 ix86_expand_vec_init_builtin (tree type
, tree exp
, rtx target
)
28444 enum machine_mode tmode
= TYPE_MODE (type
);
28445 enum machine_mode inner_mode
= GET_MODE_INNER (tmode
);
28446 int i
, n_elt
= GET_MODE_NUNITS (tmode
);
28447 rtvec v
= rtvec_alloc (n_elt
);
28449 gcc_assert (VECTOR_MODE_P (tmode
));
28450 gcc_assert (call_expr_nargs (exp
) == n_elt
);
28452 for (i
= 0; i
< n_elt
; ++i
)
28454 rtx x
= expand_normal (CALL_EXPR_ARG (exp
, i
));
28455 RTVEC_ELT (v
, i
) = gen_lowpart (inner_mode
, x
);
28458 if (!target
|| !register_operand (target
, tmode
))
28459 target
= gen_reg_rtx (tmode
);
28461 ix86_expand_vector_init (true, target
, gen_rtx_PARALLEL (tmode
, v
));
28465 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
28466 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
28467 had a language-level syntax for referencing vector elements. */
28470 ix86_expand_vec_ext_builtin (tree exp
, rtx target
)
28472 enum machine_mode tmode
, mode0
;
28477 arg0
= CALL_EXPR_ARG (exp
, 0);
28478 arg1
= CALL_EXPR_ARG (exp
, 1);
28480 op0
= expand_normal (arg0
);
28481 elt
= get_element_number (TREE_TYPE (arg0
), arg1
);
28483 tmode
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
28484 mode0
= TYPE_MODE (TREE_TYPE (arg0
));
28485 gcc_assert (VECTOR_MODE_P (mode0
));
28487 op0
= force_reg (mode0
, op0
);
28489 if (optimize
|| !target
|| !register_operand (target
, tmode
))
28490 target
= gen_reg_rtx (tmode
);
28492 ix86_expand_vector_extract (true, target
, op0
, elt
);
28497 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
28498 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
28499 a language-level syntax for referencing vector elements. */
28502 ix86_expand_vec_set_builtin (tree exp
)
28504 enum machine_mode tmode
, mode1
;
28505 tree arg0
, arg1
, arg2
;
28507 rtx op0
, op1
, target
;
28509 arg0
= CALL_EXPR_ARG (exp
, 0);
28510 arg1
= CALL_EXPR_ARG (exp
, 1);
28511 arg2
= CALL_EXPR_ARG (exp
, 2);
28513 tmode
= TYPE_MODE (TREE_TYPE (arg0
));
28514 mode1
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
28515 gcc_assert (VECTOR_MODE_P (tmode
));
28517 op0
= expand_expr (arg0
, NULL_RTX
, tmode
, EXPAND_NORMAL
);
28518 op1
= expand_expr (arg1
, NULL_RTX
, mode1
, EXPAND_NORMAL
);
28519 elt
= get_element_number (TREE_TYPE (arg0
), arg2
);
28521 if (GET_MODE (op1
) != mode1
&& GET_MODE (op1
) != VOIDmode
)
28522 op1
= convert_modes (mode1
, GET_MODE (op1
), op1
, true);
28524 op0
= force_reg (tmode
, op0
);
28525 op1
= force_reg (mode1
, op1
);
28527 /* OP0 is the source of these builtin functions and shouldn't be
28528 modified. Create a copy, use it and return it as target. */
28529 target
= gen_reg_rtx (tmode
);
28530 emit_move_insn (target
, op0
);
28531 ix86_expand_vector_set (true, target
, op1
, elt
);
28536 /* Expand an expression EXP that calls a built-in function,
28537 with result going to TARGET if that's convenient
28538 (and in mode MODE if that's convenient).
28539 SUBTARGET may be used as the target for computing one of EXP's operands.
28540 IGNORE is nonzero if the value is to be ignored. */
28543 ix86_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
28544 enum machine_mode mode ATTRIBUTE_UNUSED
,
28545 int ignore ATTRIBUTE_UNUSED
)
28547 const struct builtin_description
*d
;
28549 enum insn_code icode
;
28550 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
28551 tree arg0
, arg1
, arg2
, arg3
, arg4
;
28552 rtx op0
, op1
, op2
, op3
, op4
, pat
;
28553 enum machine_mode mode0
, mode1
, mode2
, mode3
, mode4
;
28554 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
28556 /* Determine whether the builtin function is available under the current ISA.
28557 Originally the builtin was not created if it wasn't applicable to the
28558 current ISA based on the command line switches. With function specific
28559 options, we need to check in the context of the function making the call
28560 whether it is supported. */
28561 if (ix86_builtins_isa
[fcode
].isa
28562 && !(ix86_builtins_isa
[fcode
].isa
& ix86_isa_flags
))
28564 char *opts
= ix86_target_string (ix86_builtins_isa
[fcode
].isa
, 0, NULL
,
28565 NULL
, (enum fpmath_unit
) 0, false);
28568 error ("%qE needs unknown isa option", fndecl
);
28571 gcc_assert (opts
!= NULL
);
28572 error ("%qE needs isa option %s", fndecl
, opts
);
28580 case IX86_BUILTIN_MASKMOVQ
:
28581 case IX86_BUILTIN_MASKMOVDQU
:
28582 icode
= (fcode
== IX86_BUILTIN_MASKMOVQ
28583 ? CODE_FOR_mmx_maskmovq
28584 : CODE_FOR_sse2_maskmovdqu
);
28585 /* Note the arg order is different from the operand order. */
28586 arg1
= CALL_EXPR_ARG (exp
, 0);
28587 arg2
= CALL_EXPR_ARG (exp
, 1);
28588 arg0
= CALL_EXPR_ARG (exp
, 2);
28589 op0
= expand_normal (arg0
);
28590 op1
= expand_normal (arg1
);
28591 op2
= expand_normal (arg2
);
28592 mode0
= insn_data
[icode
].operand
[0].mode
;
28593 mode1
= insn_data
[icode
].operand
[1].mode
;
28594 mode2
= insn_data
[icode
].operand
[2].mode
;
28596 if (GET_MODE (op0
) != Pmode
)
28597 op0
= convert_to_mode (Pmode
, op0
, 1);
28598 op0
= gen_rtx_MEM (mode1
, force_reg (Pmode
, op0
));
28600 if (!insn_data
[icode
].operand
[0].predicate (op0
, mode0
))
28601 op0
= copy_to_mode_reg (mode0
, op0
);
28602 if (!insn_data
[icode
].operand
[1].predicate (op1
, mode1
))
28603 op1
= copy_to_mode_reg (mode1
, op1
);
28604 if (!insn_data
[icode
].operand
[2].predicate (op2
, mode2
))
28605 op2
= copy_to_mode_reg (mode2
, op2
);
28606 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
28612 case IX86_BUILTIN_LDMXCSR
:
28613 op0
= expand_normal (CALL_EXPR_ARG (exp
, 0));
28614 target
= assign_386_stack_local (SImode
, SLOT_VIRTUAL
);
28615 emit_move_insn (target
, op0
);
28616 emit_insn (gen_sse_ldmxcsr (target
));
28619 case IX86_BUILTIN_STMXCSR
:
28620 target
= assign_386_stack_local (SImode
, SLOT_VIRTUAL
);
28621 emit_insn (gen_sse_stmxcsr (target
));
28622 return copy_to_mode_reg (SImode
, target
);
28624 case IX86_BUILTIN_CLFLUSH
:
28625 arg0
= CALL_EXPR_ARG (exp
, 0);
28626 op0
= expand_normal (arg0
);
28627 icode
= CODE_FOR_sse2_clflush
;
28628 if (!insn_data
[icode
].operand
[0].predicate (op0
, Pmode
))
28630 if (GET_MODE (op0
) != Pmode
)
28631 op0
= convert_to_mode (Pmode
, op0
, 1);
28632 op0
= force_reg (Pmode
, op0
);
28635 emit_insn (gen_sse2_clflush (op0
));
28638 case IX86_BUILTIN_MONITOR
:
28639 arg0
= CALL_EXPR_ARG (exp
, 0);
28640 arg1
= CALL_EXPR_ARG (exp
, 1);
28641 arg2
= CALL_EXPR_ARG (exp
, 2);
28642 op0
= expand_normal (arg0
);
28643 op1
= expand_normal (arg1
);
28644 op2
= expand_normal (arg2
);
28647 if (GET_MODE (op0
) != Pmode
)
28648 op0
= convert_to_mode (Pmode
, op0
, 1);
28649 op0
= force_reg (Pmode
, op0
);
28652 op1
= copy_to_mode_reg (SImode
, op1
);
28654 op2
= copy_to_mode_reg (SImode
, op2
);
28655 emit_insn (ix86_gen_monitor (op0
, op1
, op2
));
28658 case IX86_BUILTIN_MWAIT
:
28659 arg0
= CALL_EXPR_ARG (exp
, 0);
28660 arg1
= CALL_EXPR_ARG (exp
, 1);
28661 op0
= expand_normal (arg0
);
28662 op1
= expand_normal (arg1
);
28664 op0
= copy_to_mode_reg (SImode
, op0
);
28666 op1
= copy_to_mode_reg (SImode
, op1
);
28667 emit_insn (gen_sse3_mwait (op0
, op1
));
28670 case IX86_BUILTIN_VEC_INIT_V2SI
:
28671 case IX86_BUILTIN_VEC_INIT_V4HI
:
28672 case IX86_BUILTIN_VEC_INIT_V8QI
:
28673 return ix86_expand_vec_init_builtin (TREE_TYPE (exp
), exp
, target
);
28675 case IX86_BUILTIN_VEC_EXT_V2DF
:
28676 case IX86_BUILTIN_VEC_EXT_V2DI
:
28677 case IX86_BUILTIN_VEC_EXT_V4SF
:
28678 case IX86_BUILTIN_VEC_EXT_V4SI
:
28679 case IX86_BUILTIN_VEC_EXT_V8HI
:
28680 case IX86_BUILTIN_VEC_EXT_V2SI
:
28681 case IX86_BUILTIN_VEC_EXT_V4HI
:
28682 case IX86_BUILTIN_VEC_EXT_V16QI
:
28683 return ix86_expand_vec_ext_builtin (exp
, target
);
28685 case IX86_BUILTIN_VEC_SET_V2DI
:
28686 case IX86_BUILTIN_VEC_SET_V4SF
:
28687 case IX86_BUILTIN_VEC_SET_V4SI
:
28688 case IX86_BUILTIN_VEC_SET_V8HI
:
28689 case IX86_BUILTIN_VEC_SET_V4HI
:
28690 case IX86_BUILTIN_VEC_SET_V16QI
:
28691 return ix86_expand_vec_set_builtin (exp
);
28693 case IX86_BUILTIN_INFQ
:
28694 case IX86_BUILTIN_HUGE_VALQ
:
28696 REAL_VALUE_TYPE inf
;
28700 tmp
= CONST_DOUBLE_FROM_REAL_VALUE (inf
, mode
);
28702 tmp
= validize_mem (force_const_mem (mode
, tmp
));
28705 target
= gen_reg_rtx (mode
);
28707 emit_move_insn (target
, tmp
);
28711 case IX86_BUILTIN_LLWPCB
:
28712 arg0
= CALL_EXPR_ARG (exp
, 0);
28713 op0
= expand_normal (arg0
);
28714 icode
= CODE_FOR_lwp_llwpcb
;
28715 if (!insn_data
[icode
].operand
[0].predicate (op0
, Pmode
))
28717 if (GET_MODE (op0
) != Pmode
)
28718 op0
= convert_to_mode (Pmode
, op0
, 1);
28719 op0
= force_reg (Pmode
, op0
);
28721 emit_insn (gen_lwp_llwpcb (op0
));
28724 case IX86_BUILTIN_SLWPCB
:
28725 icode
= CODE_FOR_lwp_slwpcb
;
28727 || !insn_data
[icode
].operand
[0].predicate (target
, Pmode
))
28728 target
= gen_reg_rtx (Pmode
);
28729 emit_insn (gen_lwp_slwpcb (target
));
28732 case IX86_BUILTIN_BEXTRI32
:
28733 case IX86_BUILTIN_BEXTRI64
:
28734 arg0
= CALL_EXPR_ARG (exp
, 0);
28735 arg1
= CALL_EXPR_ARG (exp
, 1);
28736 op0
= expand_normal (arg0
);
28737 op1
= expand_normal (arg1
);
28738 icode
= (fcode
== IX86_BUILTIN_BEXTRI32
28739 ? CODE_FOR_tbm_bextri_si
28740 : CODE_FOR_tbm_bextri_di
);
28741 if (!CONST_INT_P (op1
))
28743 error ("last argument must be an immediate");
28748 unsigned char length
= (INTVAL (op1
) >> 8) & 0xFF;
28749 unsigned char lsb_index
= INTVAL (op1
) & 0xFF;
28750 op1
= GEN_INT (length
);
28751 op2
= GEN_INT (lsb_index
);
28752 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
28758 case IX86_BUILTIN_RDRAND16_STEP
:
28759 icode
= CODE_FOR_rdrandhi_1
;
28763 case IX86_BUILTIN_RDRAND32_STEP
:
28764 icode
= CODE_FOR_rdrandsi_1
;
28768 case IX86_BUILTIN_RDRAND64_STEP
:
28769 icode
= CODE_FOR_rdranddi_1
;
28773 op0
= gen_reg_rtx (mode0
);
28774 emit_insn (GEN_FCN (icode
) (op0
));
28776 arg0
= CALL_EXPR_ARG (exp
, 0);
28777 op1
= expand_normal (arg0
);
28778 if (!address_operand (op1
, VOIDmode
))
28780 op1
= convert_memory_address (Pmode
, op1
);
28781 op1
= copy_addr_to_reg (op1
);
28783 emit_move_insn (gen_rtx_MEM (mode0
, op1
), op0
);
28785 op1
= gen_reg_rtx (SImode
);
28786 emit_move_insn (op1
, CONST1_RTX (SImode
));
28788 /* Emit SImode conditional move. */
28789 if (mode0
== HImode
)
28791 op2
= gen_reg_rtx (SImode
);
28792 emit_insn (gen_zero_extendhisi2 (op2
, op0
));
28794 else if (mode0
== SImode
)
28797 op2
= gen_rtx_SUBREG (SImode
, op0
, 0);
28800 target
= gen_reg_rtx (SImode
);
28802 pat
= gen_rtx_GEU (VOIDmode
, gen_rtx_REG (CCCmode
, FLAGS_REG
),
28804 emit_insn (gen_rtx_SET (VOIDmode
, target
,
28805 gen_rtx_IF_THEN_ELSE (SImode
, pat
, op2
, op1
)));
28808 case IX86_BUILTIN_GATHERSIV2DF
:
28809 icode
= CODE_FOR_avx2_gathersiv2df
;
28811 case IX86_BUILTIN_GATHERSIV4DF
:
28812 icode
= CODE_FOR_avx2_gathersiv4df
;
28814 case IX86_BUILTIN_GATHERDIV2DF
:
28815 icode
= CODE_FOR_avx2_gatherdiv2df
;
28817 case IX86_BUILTIN_GATHERDIV4DF
:
28818 icode
= CODE_FOR_avx2_gatherdiv4df
;
28820 case IX86_BUILTIN_GATHERSIV4SF
:
28821 icode
= CODE_FOR_avx2_gathersiv4sf
;
28823 case IX86_BUILTIN_GATHERSIV8SF
:
28824 icode
= CODE_FOR_avx2_gathersiv8sf
;
28826 case IX86_BUILTIN_GATHERDIV4SF
:
28827 icode
= CODE_FOR_avx2_gatherdiv4sf
;
28829 case IX86_BUILTIN_GATHERDIV8SF
:
28830 icode
= CODE_FOR_avx2_gatherdiv4sf256
;
28832 case IX86_BUILTIN_GATHERSIV2DI
:
28833 icode
= CODE_FOR_avx2_gathersiv2di
;
28835 case IX86_BUILTIN_GATHERSIV4DI
:
28836 icode
= CODE_FOR_avx2_gathersiv4di
;
28838 case IX86_BUILTIN_GATHERDIV2DI
:
28839 icode
= CODE_FOR_avx2_gatherdiv2di
;
28841 case IX86_BUILTIN_GATHERDIV4DI
:
28842 icode
= CODE_FOR_avx2_gatherdiv4di
;
28844 case IX86_BUILTIN_GATHERSIV4SI
:
28845 icode
= CODE_FOR_avx2_gathersiv4si
;
28847 case IX86_BUILTIN_GATHERSIV8SI
:
28848 icode
= CODE_FOR_avx2_gathersiv8si
;
28850 case IX86_BUILTIN_GATHERDIV4SI
:
28851 icode
= CODE_FOR_avx2_gatherdiv4si
;
28853 case IX86_BUILTIN_GATHERDIV8SI
:
28854 icode
= CODE_FOR_avx2_gatherdiv4si256
;
28857 arg0
= CALL_EXPR_ARG (exp
, 0);
28858 arg1
= CALL_EXPR_ARG (exp
, 1);
28859 arg2
= CALL_EXPR_ARG (exp
, 2);
28860 arg3
= CALL_EXPR_ARG (exp
, 3);
28861 arg4
= CALL_EXPR_ARG (exp
, 4);
28862 op0
= expand_normal (arg0
);
28863 op1
= expand_normal (arg1
);
28864 op2
= expand_normal (arg2
);
28865 op3
= expand_normal (arg3
);
28866 op4
= expand_normal (arg4
);
28867 /* Note the arg order is different from the operand order. */
28868 mode0
= insn_data
[icode
].operand
[1].mode
;
28869 mode2
= insn_data
[icode
].operand
[3].mode
;
28870 mode3
= insn_data
[icode
].operand
[4].mode
;
28871 mode4
= insn_data
[icode
].operand
[5].mode
;
28873 if (target
== NULL_RTX
)
28874 target
= gen_reg_rtx (insn_data
[icode
].operand
[0].mode
);
28876 /* Force memory operand only with base register here. But we
28877 don't want to do it on memory operand for other builtin
28879 if (GET_MODE (op1
) != Pmode
)
28880 op1
= convert_to_mode (Pmode
, op1
, 1);
28881 op1
= force_reg (Pmode
, op1
);
28883 if (!insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
28884 op0
= copy_to_mode_reg (mode0
, op0
);
28885 if (!insn_data
[icode
].operand
[2].predicate (op1
, Pmode
))
28886 op1
= copy_to_mode_reg (Pmode
, op1
);
28887 if (!insn_data
[icode
].operand
[3].predicate (op2
, mode2
))
28888 op2
= copy_to_mode_reg (mode2
, op2
);
28889 if (!insn_data
[icode
].operand
[4].predicate (op3
, mode3
))
28890 op3
= copy_to_mode_reg (mode3
, op3
);
28891 if (!insn_data
[icode
].operand
[5].predicate (op4
, mode4
))
28893 error ("last argument must be scale 1, 2, 4, 8");
28896 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
, op3
, op4
);
28906 for (i
= 0, d
= bdesc_special_args
;
28907 i
< ARRAY_SIZE (bdesc_special_args
);
28909 if (d
->code
== fcode
)
28910 return ix86_expand_special_args_builtin (d
, exp
, target
);
28912 for (i
= 0, d
= bdesc_args
;
28913 i
< ARRAY_SIZE (bdesc_args
);
28915 if (d
->code
== fcode
)
28918 case IX86_BUILTIN_FABSQ
:
28919 case IX86_BUILTIN_COPYSIGNQ
:
28921 /* Emit a normal call if SSE2 isn't available. */
28922 return expand_call (exp
, target
, ignore
);
28924 return ix86_expand_args_builtin (d
, exp
, target
);
28927 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
28928 if (d
->code
== fcode
)
28929 return ix86_expand_sse_comi (d
, exp
, target
);
28931 for (i
= 0, d
= bdesc_pcmpestr
;
28932 i
< ARRAY_SIZE (bdesc_pcmpestr
);
28934 if (d
->code
== fcode
)
28935 return ix86_expand_sse_pcmpestr (d
, exp
, target
);
28937 for (i
= 0, d
= bdesc_pcmpistr
;
28938 i
< ARRAY_SIZE (bdesc_pcmpistr
);
28940 if (d
->code
== fcode
)
28941 return ix86_expand_sse_pcmpistr (d
, exp
, target
);
28943 for (i
= 0, d
= bdesc_multi_arg
; i
< ARRAY_SIZE (bdesc_multi_arg
); i
++, d
++)
28944 if (d
->code
== fcode
)
28945 return ix86_expand_multi_arg_builtin (d
->icode
, exp
, target
,
28946 (enum ix86_builtin_func_type
)
28947 d
->flag
, d
->comparison
);
28949 gcc_unreachable ();
28952 /* Returns a function decl for a vectorized version of the builtin function
28953 with builtin function code FN and the result vector type TYPE, or NULL_TREE
28954 if it is not available. */
28957 ix86_builtin_vectorized_function (tree fndecl
, tree type_out
,
28960 enum machine_mode in_mode
, out_mode
;
28962 enum built_in_function fn
= DECL_FUNCTION_CODE (fndecl
);
28964 if (TREE_CODE (type_out
) != VECTOR_TYPE
28965 || TREE_CODE (type_in
) != VECTOR_TYPE
28966 || DECL_BUILT_IN_CLASS (fndecl
) != BUILT_IN_NORMAL
)
28969 out_mode
= TYPE_MODE (TREE_TYPE (type_out
));
28970 out_n
= TYPE_VECTOR_SUBPARTS (type_out
);
28971 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
28972 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
28976 case BUILT_IN_SQRT
:
28977 if (out_mode
== DFmode
&& in_mode
== DFmode
)
28979 if (out_n
== 2 && in_n
== 2)
28980 return ix86_builtins
[IX86_BUILTIN_SQRTPD
];
28981 else if (out_n
== 4 && in_n
== 4)
28982 return ix86_builtins
[IX86_BUILTIN_SQRTPD256
];
28986 case BUILT_IN_SQRTF
:
28987 if (out_mode
== SFmode
&& in_mode
== SFmode
)
28989 if (out_n
== 4 && in_n
== 4)
28990 return ix86_builtins
[IX86_BUILTIN_SQRTPS_NR
];
28991 else if (out_n
== 8 && in_n
== 8)
28992 return ix86_builtins
[IX86_BUILTIN_SQRTPS_NR256
];
28996 case BUILT_IN_LRINT
:
28997 if (out_mode
== SImode
&& out_n
== 4
28998 && in_mode
== DFmode
&& in_n
== 2)
28999 return ix86_builtins
[IX86_BUILTIN_VEC_PACK_SFIX
];
29002 case BUILT_IN_LRINTF
:
29003 if (out_mode
== SImode
&& in_mode
== SFmode
)
29005 if (out_n
== 4 && in_n
== 4)
29006 return ix86_builtins
[IX86_BUILTIN_CVTPS2DQ
];
29007 else if (out_n
== 8 && in_n
== 8)
29008 return ix86_builtins
[IX86_BUILTIN_CVTPS2DQ256
];
29012 case BUILT_IN_COPYSIGN
:
29013 if (out_mode
== DFmode
&& in_mode
== DFmode
)
29015 if (out_n
== 2 && in_n
== 2)
29016 return ix86_builtins
[IX86_BUILTIN_CPYSGNPD
];
29017 else if (out_n
== 4 && in_n
== 4)
29018 return ix86_builtins
[IX86_BUILTIN_CPYSGNPD256
];
29022 case BUILT_IN_COPYSIGNF
:
29023 if (out_mode
== SFmode
&& in_mode
== SFmode
)
29025 if (out_n
== 4 && in_n
== 4)
29026 return ix86_builtins
[IX86_BUILTIN_CPYSGNPS
];
29027 else if (out_n
== 8 && in_n
== 8)
29028 return ix86_builtins
[IX86_BUILTIN_CPYSGNPS256
];
29032 case BUILT_IN_FLOOR
:
29033 /* The round insn does not trap on denormals. */
29034 if (flag_trapping_math
|| !TARGET_ROUND
)
29037 if (out_mode
== DFmode
&& in_mode
== DFmode
)
29039 if (out_n
== 2 && in_n
== 2)
29040 return ix86_builtins
[IX86_BUILTIN_FLOORPD
];
29041 else if (out_n
== 4 && in_n
== 4)
29042 return ix86_builtins
[IX86_BUILTIN_FLOORPD256
];
29046 case BUILT_IN_FLOORF
:
29047 /* The round insn does not trap on denormals. */
29048 if (flag_trapping_math
|| !TARGET_ROUND
)
29051 if (out_mode
== SFmode
&& in_mode
== SFmode
)
29053 if (out_n
== 4 && in_n
== 4)
29054 return ix86_builtins
[IX86_BUILTIN_FLOORPS
];
29055 else if (out_n
== 8 && in_n
== 8)
29056 return ix86_builtins
[IX86_BUILTIN_FLOORPS256
];
29060 case BUILT_IN_CEIL
:
29061 /* The round insn does not trap on denormals. */
29062 if (flag_trapping_math
|| !TARGET_ROUND
)
29065 if (out_mode
== DFmode
&& in_mode
== DFmode
)
29067 if (out_n
== 2 && in_n
== 2)
29068 return ix86_builtins
[IX86_BUILTIN_CEILPD
];
29069 else if (out_n
== 4 && in_n
== 4)
29070 return ix86_builtins
[IX86_BUILTIN_CEILPD256
];
29074 case BUILT_IN_CEILF
:
29075 /* The round insn does not trap on denormals. */
29076 if (flag_trapping_math
|| !TARGET_ROUND
)
29079 if (out_mode
== SFmode
&& in_mode
== SFmode
)
29081 if (out_n
== 4 && in_n
== 4)
29082 return ix86_builtins
[IX86_BUILTIN_CEILPS
];
29083 else if (out_n
== 8 && in_n
== 8)
29084 return ix86_builtins
[IX86_BUILTIN_CEILPS256
];
29088 case BUILT_IN_TRUNC
:
29089 /* The round insn does not trap on denormals. */
29090 if (flag_trapping_math
|| !TARGET_ROUND
)
29093 if (out_mode
== DFmode
&& in_mode
== DFmode
)
29095 if (out_n
== 2 && in_n
== 2)
29096 return ix86_builtins
[IX86_BUILTIN_TRUNCPD
];
29097 else if (out_n
== 4 && in_n
== 4)
29098 return ix86_builtins
[IX86_BUILTIN_TRUNCPD256
];
29102 case BUILT_IN_TRUNCF
:
29103 /* The round insn does not trap on denormals. */
29104 if (flag_trapping_math
|| !TARGET_ROUND
)
29107 if (out_mode
== SFmode
&& in_mode
== SFmode
)
29109 if (out_n
== 4 && in_n
== 4)
29110 return ix86_builtins
[IX86_BUILTIN_TRUNCPS
];
29111 else if (out_n
== 8 && in_n
== 8)
29112 return ix86_builtins
[IX86_BUILTIN_TRUNCPS256
];
29116 case BUILT_IN_RINT
:
29117 /* The round insn does not trap on denormals. */
29118 if (flag_trapping_math
|| !TARGET_ROUND
)
29121 if (out_mode
== DFmode
&& in_mode
== DFmode
)
29123 if (out_n
== 2 && in_n
== 2)
29124 return ix86_builtins
[IX86_BUILTIN_RINTPD
];
29125 else if (out_n
== 4 && in_n
== 4)
29126 return ix86_builtins
[IX86_BUILTIN_RINTPD256
];
29130 case BUILT_IN_RINTF
:
29131 /* The round insn does not trap on denormals. */
29132 if (flag_trapping_math
|| !TARGET_ROUND
)
29135 if (out_mode
== SFmode
&& in_mode
== SFmode
)
29137 if (out_n
== 4 && in_n
== 4)
29138 return ix86_builtins
[IX86_BUILTIN_RINTPS
];
29139 else if (out_n
== 8 && in_n
== 8)
29140 return ix86_builtins
[IX86_BUILTIN_RINTPS256
];
29144 case BUILT_IN_ROUND
:
29145 /* The round insn does not trap on denormals. */
29146 if (flag_trapping_math
|| !TARGET_ROUND
)
29149 if (out_mode
== DFmode
&& in_mode
== DFmode
)
29151 if (out_n
== 2 && in_n
== 2)
29152 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ
];
29153 else if (out_n
== 4 && in_n
== 4)
29154 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ256
];
29158 case BUILT_IN_ROUNDF
:
29159 /* The round insn does not trap on denormals. */
29160 if (flag_trapping_math
|| !TARGET_ROUND
)
29163 if (out_mode
== SFmode
&& in_mode
== SFmode
)
29165 if (out_n
== 4 && in_n
== 4)
29166 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ
];
29167 else if (out_n
== 8 && in_n
== 8)
29168 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ256
];
29173 if (out_mode
== DFmode
&& in_mode
== DFmode
)
29175 if (out_n
== 2 && in_n
== 2)
29176 return ix86_builtins
[IX86_BUILTIN_VFMADDPD
];
29177 if (out_n
== 4 && in_n
== 4)
29178 return ix86_builtins
[IX86_BUILTIN_VFMADDPD256
];
29182 case BUILT_IN_FMAF
:
29183 if (out_mode
== SFmode
&& in_mode
== SFmode
)
29185 if (out_n
== 4 && in_n
== 4)
29186 return ix86_builtins
[IX86_BUILTIN_VFMADDPS
];
29187 if (out_n
== 8 && in_n
== 8)
29188 return ix86_builtins
[IX86_BUILTIN_VFMADDPS256
];
29196 /* Dispatch to a handler for a vectorization library. */
29197 if (ix86_veclib_handler
)
29198 return ix86_veclib_handler ((enum built_in_function
) fn
, type_out
,
29204 /* Handler for an SVML-style interface to
29205 a library with vectorized intrinsics. */
29208 ix86_veclibabi_svml (enum built_in_function fn
, tree type_out
, tree type_in
)
29211 tree fntype
, new_fndecl
, args
;
29214 enum machine_mode el_mode
, in_mode
;
29217 /* The SVML is suitable for unsafe math only. */
29218 if (!flag_unsafe_math_optimizations
)
29221 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
29222 n
= TYPE_VECTOR_SUBPARTS (type_out
);
29223 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
29224 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
29225 if (el_mode
!= in_mode
29233 case BUILT_IN_LOG10
:
29235 case BUILT_IN_TANH
:
29237 case BUILT_IN_ATAN
:
29238 case BUILT_IN_ATAN2
:
29239 case BUILT_IN_ATANH
:
29240 case BUILT_IN_CBRT
:
29241 case BUILT_IN_SINH
:
29243 case BUILT_IN_ASINH
:
29244 case BUILT_IN_ASIN
:
29245 case BUILT_IN_COSH
:
29247 case BUILT_IN_ACOSH
:
29248 case BUILT_IN_ACOS
:
29249 if (el_mode
!= DFmode
|| n
!= 2)
29253 case BUILT_IN_EXPF
:
29254 case BUILT_IN_LOGF
:
29255 case BUILT_IN_LOG10F
:
29256 case BUILT_IN_POWF
:
29257 case BUILT_IN_TANHF
:
29258 case BUILT_IN_TANF
:
29259 case BUILT_IN_ATANF
:
29260 case BUILT_IN_ATAN2F
:
29261 case BUILT_IN_ATANHF
:
29262 case BUILT_IN_CBRTF
:
29263 case BUILT_IN_SINHF
:
29264 case BUILT_IN_SINF
:
29265 case BUILT_IN_ASINHF
:
29266 case BUILT_IN_ASINF
:
29267 case BUILT_IN_COSHF
:
29268 case BUILT_IN_COSF
:
29269 case BUILT_IN_ACOSHF
:
29270 case BUILT_IN_ACOSF
:
29271 if (el_mode
!= SFmode
|| n
!= 4)
29279 bname
= IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn
)));
29281 if (fn
== BUILT_IN_LOGF
)
29282 strcpy (name
, "vmlsLn4");
29283 else if (fn
== BUILT_IN_LOG
)
29284 strcpy (name
, "vmldLn2");
29287 sprintf (name
, "vmls%s", bname
+10);
29288 name
[strlen (name
)-1] = '4';
29291 sprintf (name
, "vmld%s2", bname
+10);
29293 /* Convert to uppercase. */
29297 for (args
= DECL_ARGUMENTS (builtin_decl_implicit (fn
));
29299 args
= TREE_CHAIN (args
))
29303 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
29305 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
29307 /* Build a function declaration for the vectorized function. */
29308 new_fndecl
= build_decl (BUILTINS_LOCATION
,
29309 FUNCTION_DECL
, get_identifier (name
), fntype
);
29310 TREE_PUBLIC (new_fndecl
) = 1;
29311 DECL_EXTERNAL (new_fndecl
) = 1;
29312 DECL_IS_NOVOPS (new_fndecl
) = 1;
29313 TREE_READONLY (new_fndecl
) = 1;
29318 /* Handler for an ACML-style interface to
29319 a library with vectorized intrinsics. */
29322 ix86_veclibabi_acml (enum built_in_function fn
, tree type_out
, tree type_in
)
29324 char name
[20] = "__vr.._";
29325 tree fntype
, new_fndecl
, args
;
29328 enum machine_mode el_mode
, in_mode
;
29331 /* The ACML is 64bits only and suitable for unsafe math only as
29332 it does not correctly support parts of IEEE with the required
29333 precision such as denormals. */
29335 || !flag_unsafe_math_optimizations
)
29338 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
29339 n
= TYPE_VECTOR_SUBPARTS (type_out
);
29340 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
29341 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
29342 if (el_mode
!= in_mode
29352 case BUILT_IN_LOG2
:
29353 case BUILT_IN_LOG10
:
29356 if (el_mode
!= DFmode
29361 case BUILT_IN_SINF
:
29362 case BUILT_IN_COSF
:
29363 case BUILT_IN_EXPF
:
29364 case BUILT_IN_POWF
:
29365 case BUILT_IN_LOGF
:
29366 case BUILT_IN_LOG2F
:
29367 case BUILT_IN_LOG10F
:
29370 if (el_mode
!= SFmode
29379 bname
= IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn
)));
29380 sprintf (name
+ 7, "%s", bname
+10);
29383 for (args
= DECL_ARGUMENTS (builtin_decl_implicit (fn
));
29385 args
= TREE_CHAIN (args
))
29389 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
29391 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
29393 /* Build a function declaration for the vectorized function. */
29394 new_fndecl
= build_decl (BUILTINS_LOCATION
,
29395 FUNCTION_DECL
, get_identifier (name
), fntype
);
29396 TREE_PUBLIC (new_fndecl
) = 1;
29397 DECL_EXTERNAL (new_fndecl
) = 1;
29398 DECL_IS_NOVOPS (new_fndecl
) = 1;
29399 TREE_READONLY (new_fndecl
) = 1;
29405 /* Returns a decl of a function that implements conversion of an integer vector
29406 into a floating-point vector, or vice-versa. DEST_TYPE and SRC_TYPE
29407 are the types involved when converting according to CODE.
29408 Return NULL_TREE if it is not available. */
29411 ix86_vectorize_builtin_conversion (unsigned int code
,
29412 tree dest_type
, tree src_type
)
29420 switch (TYPE_MODE (src_type
))
29423 switch (TYPE_MODE (dest_type
))
29426 return (TYPE_UNSIGNED (src_type
)
29427 ? ix86_builtins
[IX86_BUILTIN_CVTUDQ2PS
]
29428 : ix86_builtins
[IX86_BUILTIN_CVTDQ2PS
]);
29430 return (TYPE_UNSIGNED (src_type
)
29432 : ix86_builtins
[IX86_BUILTIN_CVTDQ2PD256
]);
29438 switch (TYPE_MODE (dest_type
))
29441 return (TYPE_UNSIGNED (src_type
)
29443 : ix86_builtins
[IX86_BUILTIN_CVTDQ2PS256
]);
29452 case FIX_TRUNC_EXPR
:
29453 switch (TYPE_MODE (dest_type
))
29456 switch (TYPE_MODE (src_type
))
29459 return (TYPE_UNSIGNED (dest_type
)
29461 : ix86_builtins
[IX86_BUILTIN_CVTTPS2DQ
]);
29463 return (TYPE_UNSIGNED (dest_type
)
29465 : ix86_builtins
[IX86_BUILTIN_CVTTPD2DQ256
]);
29472 switch (TYPE_MODE (src_type
))
29475 return (TYPE_UNSIGNED (dest_type
)
29477 : ix86_builtins
[IX86_BUILTIN_CVTTPS2DQ256
]);
29494 /* Returns a code for a target-specific builtin that implements
29495 reciprocal of the function, or NULL_TREE if not available. */
29498 ix86_builtin_reciprocal (unsigned int fn
, bool md_fn
,
29499 bool sqrt ATTRIBUTE_UNUSED
)
29501 if (! (TARGET_SSE_MATH
&& !optimize_insn_for_size_p ()
29502 && flag_finite_math_only
&& !flag_trapping_math
29503 && flag_unsafe_math_optimizations
))
29507 /* Machine dependent builtins. */
29510 /* Vectorized version of sqrt to rsqrt conversion. */
29511 case IX86_BUILTIN_SQRTPS_NR
:
29512 return ix86_builtins
[IX86_BUILTIN_RSQRTPS_NR
];
29514 case IX86_BUILTIN_SQRTPS_NR256
:
29515 return ix86_builtins
[IX86_BUILTIN_RSQRTPS_NR256
];
29521 /* Normal builtins. */
29524 /* Sqrt to rsqrt conversion. */
29525 case BUILT_IN_SQRTF
:
29526 return ix86_builtins
[IX86_BUILTIN_RSQRTF
];
29533 /* Helper for avx_vpermilps256_operand et al. This is also used by
29534 the expansion functions to turn the parallel back into a mask.
29535 The return value is 0 for no match and the imm8+1 for a match. */
29538 avx_vpermilp_parallel (rtx par
, enum machine_mode mode
)
29540 unsigned i
, nelt
= GET_MODE_NUNITS (mode
);
29542 unsigned char ipar
[8];
29544 if (XVECLEN (par
, 0) != (int) nelt
)
29547 /* Validate that all of the elements are constants, and not totally
29548 out of range. Copy the data into an integral array to make the
29549 subsequent checks easier. */
29550 for (i
= 0; i
< nelt
; ++i
)
29552 rtx er
= XVECEXP (par
, 0, i
);
29553 unsigned HOST_WIDE_INT ei
;
29555 if (!CONST_INT_P (er
))
29566 /* In the 256-bit DFmode case, we can only move elements within
29568 for (i
= 0; i
< 2; ++i
)
29572 mask
|= ipar
[i
] << i
;
29574 for (i
= 2; i
< 4; ++i
)
29578 mask
|= (ipar
[i
] - 2) << i
;
29583 /* In the 256-bit SFmode case, we have full freedom of movement
29584 within the low 128-bit lane, but the high 128-bit lane must
29585 mirror the exact same pattern. */
29586 for (i
= 0; i
< 4; ++i
)
29587 if (ipar
[i
] + 4 != ipar
[i
+ 4])
29594 /* In the 128-bit case, we've full freedom in the placement of
29595 the elements from the source operand. */
29596 for (i
= 0; i
< nelt
; ++i
)
29597 mask
|= ipar
[i
] << (i
* (nelt
/ 2));
29601 gcc_unreachable ();
29604 /* Make sure success has a non-zero value by adding one. */
29608 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
29609 the expansion functions to turn the parallel back into a mask.
29610 The return value is 0 for no match and the imm8+1 for a match. */
29613 avx_vperm2f128_parallel (rtx par
, enum machine_mode mode
)
29615 unsigned i
, nelt
= GET_MODE_NUNITS (mode
), nelt2
= nelt
/ 2;
29617 unsigned char ipar
[8];
29619 if (XVECLEN (par
, 0) != (int) nelt
)
29622 /* Validate that all of the elements are constants, and not totally
29623 out of range. Copy the data into an integral array to make the
29624 subsequent checks easier. */
29625 for (i
= 0; i
< nelt
; ++i
)
29627 rtx er
= XVECEXP (par
, 0, i
);
29628 unsigned HOST_WIDE_INT ei
;
29630 if (!CONST_INT_P (er
))
29633 if (ei
>= 2 * nelt
)
29638 /* Validate that the halves of the permute are halves. */
29639 for (i
= 0; i
< nelt2
- 1; ++i
)
29640 if (ipar
[i
] + 1 != ipar
[i
+ 1])
29642 for (i
= nelt2
; i
< nelt
- 1; ++i
)
29643 if (ipar
[i
] + 1 != ipar
[i
+ 1])
29646 /* Reconstruct the mask. */
29647 for (i
= 0; i
< 2; ++i
)
29649 unsigned e
= ipar
[i
* nelt2
];
29653 mask
|= e
<< (i
* 4);
29656 /* Make sure success has a non-zero value by adding one. */
29661 /* Store OPERAND to the memory after reload is completed. This means
29662 that we can't easily use assign_stack_local. */
29664 ix86_force_to_memory (enum machine_mode mode
, rtx operand
)
29668 gcc_assert (reload_completed
);
29669 if (ix86_using_red_zone ())
29671 result
= gen_rtx_MEM (mode
,
29672 gen_rtx_PLUS (Pmode
,
29674 GEN_INT (-RED_ZONE_SIZE
)));
29675 emit_move_insn (result
, operand
);
29677 else if (TARGET_64BIT
)
29683 operand
= gen_lowpart (DImode
, operand
);
29687 gen_rtx_SET (VOIDmode
,
29688 gen_rtx_MEM (DImode
,
29689 gen_rtx_PRE_DEC (DImode
,
29690 stack_pointer_rtx
)),
29694 gcc_unreachable ();
29696 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
29705 split_double_mode (mode
, &operand
, 1, operands
, operands
+ 1);
29707 gen_rtx_SET (VOIDmode
,
29708 gen_rtx_MEM (SImode
,
29709 gen_rtx_PRE_DEC (Pmode
,
29710 stack_pointer_rtx
)),
29713 gen_rtx_SET (VOIDmode
,
29714 gen_rtx_MEM (SImode
,
29715 gen_rtx_PRE_DEC (Pmode
,
29716 stack_pointer_rtx
)),
29721 /* Store HImodes as SImodes. */
29722 operand
= gen_lowpart (SImode
, operand
);
29726 gen_rtx_SET (VOIDmode
,
29727 gen_rtx_MEM (GET_MODE (operand
),
29728 gen_rtx_PRE_DEC (SImode
,
29729 stack_pointer_rtx
)),
29733 gcc_unreachable ();
29735 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
29740 /* Free operand from the memory. */
29742 ix86_free_from_memory (enum machine_mode mode
)
29744 if (!ix86_using_red_zone ())
29748 if (mode
== DImode
|| TARGET_64BIT
)
29752 /* Use LEA to deallocate stack space. In peephole2 it will be converted
29753 to pop or add instruction if registers are available. */
29754 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
29755 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
29760 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
29762 Put float CONST_DOUBLE in the constant pool instead of fp regs.
29763 QImode must go into class Q_REGS.
29764 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
29765 movdf to do mem-to-mem moves through integer regs. */
29768 ix86_preferred_reload_class (rtx x
, reg_class_t regclass
)
29770 enum machine_mode mode
= GET_MODE (x
);
29772 /* We're only allowed to return a subclass of CLASS. Many of the
29773 following checks fail for NO_REGS, so eliminate that early. */
29774 if (regclass
== NO_REGS
)
29777 /* All classes can load zeros. */
29778 if (x
== CONST0_RTX (mode
))
29781 /* Force constants into memory if we are loading a (nonzero) constant into
29782 an MMX or SSE register. This is because there are no MMX/SSE instructions
29783 to load from a constant. */
29785 && (MAYBE_MMX_CLASS_P (regclass
) || MAYBE_SSE_CLASS_P (regclass
)))
29788 /* Prefer SSE regs only, if we can use them for math. */
29789 if (TARGET_SSE_MATH
&& !TARGET_MIX_SSE_I387
&& SSE_FLOAT_MODE_P (mode
))
29790 return SSE_CLASS_P (regclass
) ? regclass
: NO_REGS
;
29792 /* Floating-point constants need more complex checks. */
29793 if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) != VOIDmode
)
29795 /* General regs can load everything. */
29796 if (reg_class_subset_p (regclass
, GENERAL_REGS
))
29799 /* Floats can load 0 and 1 plus some others. Note that we eliminated
29800 zero above. We only want to wind up preferring 80387 registers if
29801 we plan on doing computation with them. */
29803 && standard_80387_constant_p (x
) > 0)
29805 /* Limit class to non-sse. */
29806 if (regclass
== FLOAT_SSE_REGS
)
29808 if (regclass
== FP_TOP_SSE_REGS
)
29810 if (regclass
== FP_SECOND_SSE_REGS
)
29811 return FP_SECOND_REG
;
29812 if (regclass
== FLOAT_INT_REGS
|| regclass
== FLOAT_REGS
)
29819 /* Generally when we see PLUS here, it's the function invariant
29820 (plus soft-fp const_int). Which can only be computed into general
29822 if (GET_CODE (x
) == PLUS
)
29823 return reg_class_subset_p (regclass
, GENERAL_REGS
) ? regclass
: NO_REGS
;
29825 /* QImode constants are easy to load, but non-constant QImode data
29826 must go into Q_REGS. */
29827 if (GET_MODE (x
) == QImode
&& !CONSTANT_P (x
))
29829 if (reg_class_subset_p (regclass
, Q_REGS
))
29831 if (reg_class_subset_p (Q_REGS
, regclass
))
29839 /* Discourage putting floating-point values in SSE registers unless
29840 SSE math is being used, and likewise for the 387 registers. */
29842 ix86_preferred_output_reload_class (rtx x
, reg_class_t regclass
)
29844 enum machine_mode mode
= GET_MODE (x
);
29846 /* Restrict the output reload class to the register bank that we are doing
29847 math on. If we would like not to return a subset of CLASS, reject this
29848 alternative: if reload cannot do this, it will still use its choice. */
29849 mode
= GET_MODE (x
);
29850 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
29851 return MAYBE_SSE_CLASS_P (regclass
) ? SSE_REGS
: NO_REGS
;
29853 if (X87_FLOAT_MODE_P (mode
))
29855 if (regclass
== FP_TOP_SSE_REGS
)
29857 else if (regclass
== FP_SECOND_SSE_REGS
)
29858 return FP_SECOND_REG
;
29860 return FLOAT_CLASS_P (regclass
) ? regclass
: NO_REGS
;
29867 ix86_secondary_reload (bool in_p
, rtx x
, reg_class_t rclass
,
29868 enum machine_mode mode
, secondary_reload_info
*sri
)
29870 /* Double-word spills from general registers to non-offsettable memory
29871 references (zero-extended addresses) require special handling. */
29874 && GET_MODE_SIZE (mode
) > UNITS_PER_WORD
29875 && rclass
== GENERAL_REGS
29876 && !offsettable_memref_p (x
))
29879 ? CODE_FOR_reload_noff_load
29880 : CODE_FOR_reload_noff_store
);
29881 /* Add the cost of moving address to a temporary. */
29882 sri
->extra_cost
= 1;
29887 /* QImode spills from non-QI registers require
29888 intermediate register on 32bit targets. */
29890 && !in_p
&& mode
== QImode
29891 && (rclass
== GENERAL_REGS
29892 || rclass
== LEGACY_REGS
29893 || rclass
== INDEX_REGS
))
29902 if (regno
>= FIRST_PSEUDO_REGISTER
|| GET_CODE (x
) == SUBREG
)
29903 regno
= true_regnum (x
);
29905 /* Return Q_REGS if the operand is in memory. */
29910 /* This condition handles corner case where an expression involving
29911 pointers gets vectorized. We're trying to use the address of a
29912 stack slot as a vector initializer.
29914 (set (reg:V2DI 74 [ vect_cst_.2 ])
29915 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
29917 Eventually frame gets turned into sp+offset like this:
29919 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
29920 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
29921 (const_int 392 [0x188]))))
29923 That later gets turned into:
29925 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
29926 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
29927 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
29929 We'll have the following reload recorded:
29931 Reload 0: reload_in (DI) =
29932 (plus:DI (reg/f:DI 7 sp)
29933 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
29934 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
29935 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
29936 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
29937 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
29938 reload_reg_rtx: (reg:V2DI 22 xmm1)
29940 Which isn't going to work since SSE instructions can't handle scalar
29941 additions. Returning GENERAL_REGS forces the addition into integer
29942 register and reload can handle subsequent reloads without problems. */
29944 if (in_p
&& GET_CODE (x
) == PLUS
29945 && SSE_CLASS_P (rclass
)
29946 && SCALAR_INT_MODE_P (mode
))
29947 return GENERAL_REGS
;
29952 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
29955 ix86_class_likely_spilled_p (reg_class_t rclass
)
29966 case SSE_FIRST_REG
:
29968 case FP_SECOND_REG
:
29978 /* If we are copying between general and FP registers, we need a memory
29979 location. The same is true for SSE and MMX registers.
29981 To optimize register_move_cost performance, allow inline variant.
29983 The macro can't work reliably when one of the CLASSES is class containing
29984 registers from multiple units (SSE, MMX, integer). We avoid this by never
29985 combining those units in single alternative in the machine description.
29986 Ensure that this constraint holds to avoid unexpected surprises.
29988 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
29989 enforce these sanity checks. */
29992 inline_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
29993 enum machine_mode mode
, int strict
)
29995 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
29996 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
29997 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
29998 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
29999 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
30000 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
))
30002 gcc_assert (!strict
);
30006 if (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
))
30009 /* ??? This is a lie. We do have moves between mmx/general, and for
30010 mmx/sse2. But by saying we need secondary memory we discourage the
30011 register allocator from using the mmx registers unless needed. */
30012 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
))
30015 if (SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
30017 /* SSE1 doesn't have any direct moves from other classes. */
30021 /* If the target says that inter-unit moves are more expensive
30022 than moving through memory, then don't generate them. */
30023 if (!TARGET_INTER_UNIT_MOVES
)
30026 /* Between SSE and general, we have moves no larger than word size. */
30027 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
30035 ix86_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
30036 enum machine_mode mode
, int strict
)
30038 return inline_secondary_memory_needed (class1
, class2
, mode
, strict
);
30041 /* Implement the TARGET_CLASS_MAX_NREGS hook.
30043 On the 80386, this is the size of MODE in words,
30044 except in the FP regs, where a single reg is always enough. */
30046 static unsigned char
30047 ix86_class_max_nregs (reg_class_t rclass
, enum machine_mode mode
)
30049 if (MAYBE_INTEGER_CLASS_P (rclass
))
30051 if (mode
== XFmode
)
30052 return (TARGET_64BIT
? 2 : 3);
30053 else if (mode
== XCmode
)
30054 return (TARGET_64BIT
? 4 : 6);
30056 return ((GET_MODE_SIZE (mode
) + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
);
30060 if (COMPLEX_MODE_P (mode
))
30067 /* Return true if the registers in CLASS cannot represent the change from
30068 modes FROM to TO. */
30071 ix86_cannot_change_mode_class (enum machine_mode from
, enum machine_mode to
,
30072 enum reg_class regclass
)
30077 /* x87 registers can't do subreg at all, as all values are reformatted
30078 to extended precision. */
30079 if (MAYBE_FLOAT_CLASS_P (regclass
))
30082 if (MAYBE_SSE_CLASS_P (regclass
) || MAYBE_MMX_CLASS_P (regclass
))
30084 /* Vector registers do not support QI or HImode loads. If we don't
30085 disallow a change to these modes, reload will assume it's ok to
30086 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
30087 the vec_dupv4hi pattern. */
30088 if (GET_MODE_SIZE (from
) < 4)
30091 /* Vector registers do not support subreg with nonzero offsets, which
30092 are otherwise valid for integer registers. Since we can't see
30093 whether we have a nonzero offset from here, prohibit all
30094 nonparadoxical subregs changing size. */
30095 if (GET_MODE_SIZE (to
) < GET_MODE_SIZE (from
))
30102 /* Return the cost of moving data of mode M between a
30103 register and memory. A value of 2 is the default; this cost is
30104 relative to those in `REGISTER_MOVE_COST'.
30106 This function is used extensively by register_move_cost that is used to
30107 build tables at startup. Make it inline in this case.
30108 When IN is 2, return maximum of in and out move cost.
30110 If moving between registers and memory is more expensive than
30111 between two registers, you should define this macro to express the
30114 Model also increased moving costs of QImode registers in non
30118 inline_memory_move_cost (enum machine_mode mode
, enum reg_class regclass
,
30122 if (FLOAT_CLASS_P (regclass
))
30140 return MAX (ix86_cost
->fp_load
[index
], ix86_cost
->fp_store
[index
]);
30141 return in
? ix86_cost
->fp_load
[index
] : ix86_cost
->fp_store
[index
];
30143 if (SSE_CLASS_P (regclass
))
30146 switch (GET_MODE_SIZE (mode
))
30161 return MAX (ix86_cost
->sse_load
[index
], ix86_cost
->sse_store
[index
]);
30162 return in
? ix86_cost
->sse_load
[index
] : ix86_cost
->sse_store
[index
];
30164 if (MMX_CLASS_P (regclass
))
30167 switch (GET_MODE_SIZE (mode
))
30179 return MAX (ix86_cost
->mmx_load
[index
], ix86_cost
->mmx_store
[index
]);
30180 return in
? ix86_cost
->mmx_load
[index
] : ix86_cost
->mmx_store
[index
];
30182 switch (GET_MODE_SIZE (mode
))
30185 if (Q_CLASS_P (regclass
) || TARGET_64BIT
)
30188 return ix86_cost
->int_store
[0];
30189 if (TARGET_PARTIAL_REG_DEPENDENCY
30190 && optimize_function_for_speed_p (cfun
))
30191 cost
= ix86_cost
->movzbl_load
;
30193 cost
= ix86_cost
->int_load
[0];
30195 return MAX (cost
, ix86_cost
->int_store
[0]);
30201 return MAX (ix86_cost
->movzbl_load
, ix86_cost
->int_store
[0] + 4);
30203 return ix86_cost
->movzbl_load
;
30205 return ix86_cost
->int_store
[0] + 4;
30210 return MAX (ix86_cost
->int_load
[1], ix86_cost
->int_store
[1]);
30211 return in
? ix86_cost
->int_load
[1] : ix86_cost
->int_store
[1];
30213 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
30214 if (mode
== TFmode
)
30217 cost
= MAX (ix86_cost
->int_load
[2] , ix86_cost
->int_store
[2]);
30219 cost
= ix86_cost
->int_load
[2];
30221 cost
= ix86_cost
->int_store
[2];
30222 return (cost
* (((int) GET_MODE_SIZE (mode
)
30223 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
));
30228 ix86_memory_move_cost (enum machine_mode mode
, reg_class_t regclass
,
30231 return inline_memory_move_cost (mode
, (enum reg_class
) regclass
, in
? 1 : 0);
30235 /* Return the cost of moving data from a register in class CLASS1 to
30236 one in class CLASS2.
30238 It is not required that the cost always equal 2 when FROM is the same as TO;
30239 on some machines it is expensive to move between registers if they are not
30240 general registers. */
30243 ix86_register_move_cost (enum machine_mode mode
, reg_class_t class1_i
,
30244 reg_class_t class2_i
)
30246 enum reg_class class1
= (enum reg_class
) class1_i
;
30247 enum reg_class class2
= (enum reg_class
) class2_i
;
30249 /* In case we require secondary memory, compute cost of the store followed
30250 by load. In order to avoid bad register allocation choices, we need
30251 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
30253 if (inline_secondary_memory_needed (class1
, class2
, mode
, 0))
30257 cost
+= inline_memory_move_cost (mode
, class1
, 2);
30258 cost
+= inline_memory_move_cost (mode
, class2
, 2);
30260 /* In case of copying from general_purpose_register we may emit multiple
30261 stores followed by single load causing memory size mismatch stall.
30262 Count this as arbitrarily high cost of 20. */
30263 if (targetm
.class_max_nregs (class1
, mode
)
30264 > targetm
.class_max_nregs (class2
, mode
))
30267 /* In the case of FP/MMX moves, the registers actually overlap, and we
30268 have to switch modes in order to treat them differently. */
30269 if ((MMX_CLASS_P (class1
) && MAYBE_FLOAT_CLASS_P (class2
))
30270 || (MMX_CLASS_P (class2
) && MAYBE_FLOAT_CLASS_P (class1
)))
30276 /* Moves between SSE/MMX and integer unit are expensive. */
30277 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
30278 || SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
30280 /* ??? By keeping returned value relatively high, we limit the number
30281 of moves between integer and MMX/SSE registers for all targets.
30282 Additionally, high value prevents problem with x86_modes_tieable_p(),
30283 where integer modes in MMX/SSE registers are not tieable
30284 because of missing QImode and HImode moves to, from or between
30285 MMX/SSE registers. */
30286 return MAX (8, ix86_cost
->mmxsse_to_integer
);
30288 if (MAYBE_FLOAT_CLASS_P (class1
))
30289 return ix86_cost
->fp_move
;
30290 if (MAYBE_SSE_CLASS_P (class1
))
30291 return ix86_cost
->sse_move
;
30292 if (MAYBE_MMX_CLASS_P (class1
))
30293 return ix86_cost
->mmx_move
;
30297 /* Return TRUE if hard register REGNO can hold a value of machine-mode
30301 ix86_hard_regno_mode_ok (int regno
, enum machine_mode mode
)
30303 /* Flags and only flags can only hold CCmode values. */
30304 if (CC_REGNO_P (regno
))
30305 return GET_MODE_CLASS (mode
) == MODE_CC
;
30306 if (GET_MODE_CLASS (mode
) == MODE_CC
30307 || GET_MODE_CLASS (mode
) == MODE_RANDOM
30308 || GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
30310 if (FP_REGNO_P (regno
))
30311 return VALID_FP_MODE_P (mode
);
30312 if (SSE_REGNO_P (regno
))
30314 /* We implement the move patterns for all vector modes into and
30315 out of SSE registers, even when no operation instructions
30316 are available. OImode move is available only when AVX is
30318 return ((TARGET_AVX
&& mode
== OImode
)
30319 || VALID_AVX256_REG_MODE (mode
)
30320 || VALID_SSE_REG_MODE (mode
)
30321 || VALID_SSE2_REG_MODE (mode
)
30322 || VALID_MMX_REG_MODE (mode
)
30323 || VALID_MMX_REG_MODE_3DNOW (mode
));
30325 if (MMX_REGNO_P (regno
))
30327 /* We implement the move patterns for 3DNOW modes even in MMX mode,
30328 so if the register is available at all, then we can move data of
30329 the given mode into or out of it. */
30330 return (VALID_MMX_REG_MODE (mode
)
30331 || VALID_MMX_REG_MODE_3DNOW (mode
));
30334 if (mode
== QImode
)
30336 /* Take care for QImode values - they can be in non-QI regs,
30337 but then they do cause partial register stalls. */
30338 if (regno
<= BX_REG
|| TARGET_64BIT
)
30340 if (!TARGET_PARTIAL_REG_STALL
)
30342 return !can_create_pseudo_p ();
30344 /* We handle both integer and floats in the general purpose registers. */
30345 else if (VALID_INT_MODE_P (mode
))
30347 else if (VALID_FP_MODE_P (mode
))
30349 else if (VALID_DFP_MODE_P (mode
))
30351 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
30352 on to use that value in smaller contexts, this can easily force a
30353 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
30354 supporting DImode, allow it. */
30355 else if (VALID_MMX_REG_MODE_3DNOW (mode
) || VALID_MMX_REG_MODE (mode
))
30361 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
30362 tieable integer mode. */
30365 ix86_tieable_integer_mode_p (enum machine_mode mode
)
30374 return TARGET_64BIT
|| !TARGET_PARTIAL_REG_STALL
;
30377 return TARGET_64BIT
;
30384 /* Return true if MODE1 is accessible in a register that can hold MODE2
30385 without copying. That is, all register classes that can hold MODE2
30386 can also hold MODE1. */
30389 ix86_modes_tieable_p (enum machine_mode mode1
, enum machine_mode mode2
)
30391 if (mode1
== mode2
)
30394 if (ix86_tieable_integer_mode_p (mode1
)
30395 && ix86_tieable_integer_mode_p (mode2
))
30398 /* MODE2 being XFmode implies fp stack or general regs, which means we
30399 can tie any smaller floating point modes to it. Note that we do not
30400 tie this with TFmode. */
30401 if (mode2
== XFmode
)
30402 return mode1
== SFmode
|| mode1
== DFmode
;
30404 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
30405 that we can tie it with SFmode. */
30406 if (mode2
== DFmode
)
30407 return mode1
== SFmode
;
30409 /* If MODE2 is only appropriate for an SSE register, then tie with
30410 any other mode acceptable to SSE registers. */
30411 if (GET_MODE_SIZE (mode2
) == 16
30412 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
30413 return (GET_MODE_SIZE (mode1
) == 16
30414 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
30416 /* If MODE2 is appropriate for an MMX register, then tie
30417 with any other mode acceptable to MMX registers. */
30418 if (GET_MODE_SIZE (mode2
) == 8
30419 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode2
))
30420 return (GET_MODE_SIZE (mode1
) == 8
30421 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode1
));
30426 /* Compute a (partial) cost for rtx X. Return true if the complete
30427 cost has been computed, and false if subexpressions should be
30428 scanned. In either case, *TOTAL contains the cost result. */
30431 ix86_rtx_costs (rtx x
, int code
, int outer_code_i
, int opno
, int *total
,
30434 enum rtx_code outer_code
= (enum rtx_code
) outer_code_i
;
30435 enum machine_mode mode
= GET_MODE (x
);
30436 const struct processor_costs
*cost
= speed
? ix86_cost
: &ix86_size_cost
;
30444 if (TARGET_64BIT
&& !x86_64_immediate_operand (x
, VOIDmode
))
30446 else if (TARGET_64BIT
&& !x86_64_zext_immediate_operand (x
, VOIDmode
))
30448 else if (flag_pic
&& SYMBOLIC_CONST (x
)
30450 || (!GET_CODE (x
) != LABEL_REF
30451 && (GET_CODE (x
) != SYMBOL_REF
30452 || !SYMBOL_REF_LOCAL_P (x
)))))
30459 if (mode
== VOIDmode
)
30462 switch (standard_80387_constant_p (x
))
30467 default: /* Other constants */
30472 /* Start with (MEM (SYMBOL_REF)), since that's where
30473 it'll probably end up. Add a penalty for size. */
30474 *total
= (COSTS_N_INSNS (1)
30475 + (flag_pic
!= 0 && !TARGET_64BIT
)
30476 + (mode
== SFmode
? 0 : mode
== DFmode
? 1 : 2));
30482 /* The zero extensions is often completely free on x86_64, so make
30483 it as cheap as possible. */
30484 if (TARGET_64BIT
&& mode
== DImode
30485 && GET_MODE (XEXP (x
, 0)) == SImode
)
30487 else if (TARGET_ZERO_EXTEND_WITH_AND
)
30488 *total
= cost
->add
;
30490 *total
= cost
->movzx
;
30494 *total
= cost
->movsx
;
30498 if (CONST_INT_P (XEXP (x
, 1))
30499 && (GET_MODE (XEXP (x
, 0)) != DImode
|| TARGET_64BIT
))
30501 HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
30504 *total
= cost
->add
;
30507 if ((value
== 2 || value
== 3)
30508 && cost
->lea
<= cost
->shift_const
)
30510 *total
= cost
->lea
;
30520 if (!TARGET_64BIT
&& GET_MODE (XEXP (x
, 0)) == DImode
)
30522 if (CONST_INT_P (XEXP (x
, 1)))
30524 if (INTVAL (XEXP (x
, 1)) > 32)
30525 *total
= cost
->shift_const
+ COSTS_N_INSNS (2);
30527 *total
= cost
->shift_const
* 2;
30531 if (GET_CODE (XEXP (x
, 1)) == AND
)
30532 *total
= cost
->shift_var
* 2;
30534 *total
= cost
->shift_var
* 6 + COSTS_N_INSNS (2);
30539 if (CONST_INT_P (XEXP (x
, 1)))
30540 *total
= cost
->shift_const
;
30542 *total
= cost
->shift_var
;
30550 gcc_assert (FLOAT_MODE_P (mode
));
30551 gcc_assert (TARGET_FMA
|| TARGET_FMA4
);
30553 /* ??? SSE scalar/vector cost should be used here. */
30554 /* ??? Bald assumption that fma has the same cost as fmul. */
30555 *total
= cost
->fmul
;
30556 *total
+= rtx_cost (XEXP (x
, 1), FMA
, 1, speed
);
30558 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
30560 if (GET_CODE (sub
) == NEG
)
30561 sub
= XEXP (sub
, 0);
30562 *total
+= rtx_cost (sub
, FMA
, 0, speed
);
30565 if (GET_CODE (sub
) == NEG
)
30566 sub
= XEXP (sub
, 0);
30567 *total
+= rtx_cost (sub
, FMA
, 2, speed
);
30572 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
30574 /* ??? SSE scalar cost should be used here. */
30575 *total
= cost
->fmul
;
30578 else if (X87_FLOAT_MODE_P (mode
))
30580 *total
= cost
->fmul
;
30583 else if (FLOAT_MODE_P (mode
))
30585 /* ??? SSE vector cost should be used here. */
30586 *total
= cost
->fmul
;
30591 rtx op0
= XEXP (x
, 0);
30592 rtx op1
= XEXP (x
, 1);
30594 if (CONST_INT_P (XEXP (x
, 1)))
30596 unsigned HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
30597 for (nbits
= 0; value
!= 0; value
&= value
- 1)
30601 /* This is arbitrary. */
30604 /* Compute costs correctly for widening multiplication. */
30605 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op0
) == ZERO_EXTEND
)
30606 && GET_MODE_SIZE (GET_MODE (XEXP (op0
, 0))) * 2
30607 == GET_MODE_SIZE (mode
))
30609 int is_mulwiden
= 0;
30610 enum machine_mode inner_mode
= GET_MODE (op0
);
30612 if (GET_CODE (op0
) == GET_CODE (op1
))
30613 is_mulwiden
= 1, op1
= XEXP (op1
, 0);
30614 else if (CONST_INT_P (op1
))
30616 if (GET_CODE (op0
) == SIGN_EXTEND
)
30617 is_mulwiden
= trunc_int_for_mode (INTVAL (op1
), inner_mode
)
30620 is_mulwiden
= !(INTVAL (op1
) & ~GET_MODE_MASK (inner_mode
));
30624 op0
= XEXP (op0
, 0), mode
= GET_MODE (op0
);
30627 *total
= (cost
->mult_init
[MODE_INDEX (mode
)]
30628 + nbits
* cost
->mult_bit
30629 + rtx_cost (op0
, outer_code
, opno
, speed
)
30630 + rtx_cost (op1
, outer_code
, opno
, speed
));
30639 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
30640 /* ??? SSE cost should be used here. */
30641 *total
= cost
->fdiv
;
30642 else if (X87_FLOAT_MODE_P (mode
))
30643 *total
= cost
->fdiv
;
30644 else if (FLOAT_MODE_P (mode
))
30645 /* ??? SSE vector cost should be used here. */
30646 *total
= cost
->fdiv
;
30648 *total
= cost
->divide
[MODE_INDEX (mode
)];
30652 if (GET_MODE_CLASS (mode
) == MODE_INT
30653 && GET_MODE_BITSIZE (mode
) <= GET_MODE_BITSIZE (Pmode
))
30655 if (GET_CODE (XEXP (x
, 0)) == PLUS
30656 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
30657 && CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 0), 1))
30658 && CONSTANT_P (XEXP (x
, 1)))
30660 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1));
30661 if (val
== 2 || val
== 4 || val
== 8)
30663 *total
= cost
->lea
;
30664 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1),
30665 outer_code
, opno
, speed
);
30666 *total
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
30667 outer_code
, opno
, speed
);
30668 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
30672 else if (GET_CODE (XEXP (x
, 0)) == MULT
30673 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
30675 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (x
, 0), 1));
30676 if (val
== 2 || val
== 4 || val
== 8)
30678 *total
= cost
->lea
;
30679 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0),
30680 outer_code
, opno
, speed
);
30681 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
30685 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
30687 *total
= cost
->lea
;
30688 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0),
30689 outer_code
, opno
, speed
);
30690 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1),
30691 outer_code
, opno
, speed
);
30692 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
30699 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
30701 /* ??? SSE cost should be used here. */
30702 *total
= cost
->fadd
;
30705 else if (X87_FLOAT_MODE_P (mode
))
30707 *total
= cost
->fadd
;
30710 else if (FLOAT_MODE_P (mode
))
30712 /* ??? SSE vector cost should be used here. */
30713 *total
= cost
->fadd
;
30721 if (!TARGET_64BIT
&& mode
== DImode
)
30723 *total
= (cost
->add
* 2
30724 + (rtx_cost (XEXP (x
, 0), outer_code
, opno
, speed
)
30725 << (GET_MODE (XEXP (x
, 0)) != DImode
))
30726 + (rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
)
30727 << (GET_MODE (XEXP (x
, 1)) != DImode
)));
30733 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
30735 /* ??? SSE cost should be used here. */
30736 *total
= cost
->fchs
;
30739 else if (X87_FLOAT_MODE_P (mode
))
30741 *total
= cost
->fchs
;
30744 else if (FLOAT_MODE_P (mode
))
30746 /* ??? SSE vector cost should be used here. */
30747 *total
= cost
->fchs
;
30753 if (!TARGET_64BIT
&& mode
== DImode
)
30754 *total
= cost
->add
* 2;
30756 *total
= cost
->add
;
30760 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTRACT
30761 && XEXP (XEXP (x
, 0), 1) == const1_rtx
30762 && CONST_INT_P (XEXP (XEXP (x
, 0), 2))
30763 && XEXP (x
, 1) == const0_rtx
)
30765 /* This kind of construct is implemented using test[bwl].
30766 Treat it as if we had an AND. */
30767 *total
= (cost
->add
30768 + rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
, opno
, speed
)
30769 + rtx_cost (const1_rtx
, outer_code
, opno
, speed
));
30775 if (!(SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
))
30780 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
30781 /* ??? SSE cost should be used here. */
30782 *total
= cost
->fabs
;
30783 else if (X87_FLOAT_MODE_P (mode
))
30784 *total
= cost
->fabs
;
30785 else if (FLOAT_MODE_P (mode
))
30786 /* ??? SSE vector cost should be used here. */
30787 *total
= cost
->fabs
;
30791 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
30792 /* ??? SSE cost should be used here. */
30793 *total
= cost
->fsqrt
;
30794 else if (X87_FLOAT_MODE_P (mode
))
30795 *total
= cost
->fsqrt
;
30796 else if (FLOAT_MODE_P (mode
))
30797 /* ??? SSE vector cost should be used here. */
30798 *total
= cost
->fsqrt
;
30802 if (XINT (x
, 1) == UNSPEC_TP
)
30809 case VEC_DUPLICATE
:
30810 /* ??? Assume all of these vector manipulation patterns are
30811 recognizable. In which case they all pretty much have the
30813 *total
= COSTS_N_INSNS (1);
30823 static int current_machopic_label_num
;
30825 /* Given a symbol name and its associated stub, write out the
30826 definition of the stub. */
30829 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
30831 unsigned int length
;
30832 char *binder_name
, *symbol_name
, lazy_ptr_name
[32];
30833 int label
= ++current_machopic_label_num
;
30835 /* For 64-bit we shouldn't get here. */
30836 gcc_assert (!TARGET_64BIT
);
30838 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
30839 symb
= targetm
.strip_name_encoding (symb
);
30841 length
= strlen (stub
);
30842 binder_name
= XALLOCAVEC (char, length
+ 32);
30843 GEN_BINDER_NAME_FOR_STUB (binder_name
, stub
, length
);
30845 length
= strlen (symb
);
30846 symbol_name
= XALLOCAVEC (char, length
+ 32);
30847 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
30849 sprintf (lazy_ptr_name
, "L%d$lz", label
);
30851 if (MACHOPIC_ATT_STUB
)
30852 switch_to_section (darwin_sections
[machopic_picsymbol_stub3_section
]);
30853 else if (MACHOPIC_PURE
)
30854 switch_to_section (darwin_sections
[machopic_picsymbol_stub2_section
]);
30856 switch_to_section (darwin_sections
[machopic_symbol_stub_section
]);
30858 fprintf (file
, "%s:\n", stub
);
30859 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
30861 if (MACHOPIC_ATT_STUB
)
30863 fprintf (file
, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
30865 else if (MACHOPIC_PURE
)
30868 /* 25-byte PIC stub using "CALL get_pc_thunk". */
30869 rtx tmp
= gen_rtx_REG (SImode
, 2 /* ECX */);
30870 output_set_got (tmp
, NULL_RTX
); /* "CALL ___<cpu>.get_pc_thunk.cx". */
30871 fprintf (file
, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
30872 label
, lazy_ptr_name
, label
);
30873 fprintf (file
, "\tjmp\t*%%ecx\n");
30876 fprintf (file
, "\tjmp\t*%s\n", lazy_ptr_name
);
30878 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
30879 it needs no stub-binding-helper. */
30880 if (MACHOPIC_ATT_STUB
)
30883 fprintf (file
, "%s:\n", binder_name
);
30887 fprintf (file
, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name
, binder_name
);
30888 fprintf (file
, "\tpushl\t%%ecx\n");
30891 fprintf (file
, "\tpushl\t$%s\n", lazy_ptr_name
);
30893 fputs ("\tjmp\tdyld_stub_binding_helper\n", file
);
30895 /* N.B. Keep the correspondence of these
30896 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
30897 old-pic/new-pic/non-pic stubs; altering this will break
30898 compatibility with existing dylibs. */
30901 /* 25-byte PIC stub using "CALL get_pc_thunk". */
30902 switch_to_section (darwin_sections
[machopic_lazy_symbol_ptr2_section
]);
30905 /* 16-byte -mdynamic-no-pic stub. */
30906 switch_to_section(darwin_sections
[machopic_lazy_symbol_ptr3_section
]);
30908 fprintf (file
, "%s:\n", lazy_ptr_name
);
30909 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
30910 fprintf (file
, ASM_LONG
"%s\n", binder_name
);
30912 #endif /* TARGET_MACHO */
30914 /* Order the registers for register allocator. */
30917 x86_order_regs_for_local_alloc (void)
30922 /* First allocate the local general purpose registers. */
30923 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
30924 if (GENERAL_REGNO_P (i
) && call_used_regs
[i
])
30925 reg_alloc_order
[pos
++] = i
;
30927 /* Global general purpose registers. */
30928 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
30929 if (GENERAL_REGNO_P (i
) && !call_used_regs
[i
])
30930 reg_alloc_order
[pos
++] = i
;
30932 /* x87 registers come first in case we are doing FP math
30934 if (!TARGET_SSE_MATH
)
30935 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
30936 reg_alloc_order
[pos
++] = i
;
30938 /* SSE registers. */
30939 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
30940 reg_alloc_order
[pos
++] = i
;
30941 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
30942 reg_alloc_order
[pos
++] = i
;
30944 /* x87 registers. */
30945 if (TARGET_SSE_MATH
)
30946 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
30947 reg_alloc_order
[pos
++] = i
;
30949 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
30950 reg_alloc_order
[pos
++] = i
;
30952 /* Initialize the rest of array as we do not allocate some registers
30954 while (pos
< FIRST_PSEUDO_REGISTER
)
30955 reg_alloc_order
[pos
++] = 0;
30958 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
30959 in struct attribute_spec handler. */
30961 ix86_handle_callee_pop_aggregate_return (tree
*node
, tree name
,
30963 int flags ATTRIBUTE_UNUSED
,
30964 bool *no_add_attrs
)
30966 if (TREE_CODE (*node
) != FUNCTION_TYPE
30967 && TREE_CODE (*node
) != METHOD_TYPE
30968 && TREE_CODE (*node
) != FIELD_DECL
30969 && TREE_CODE (*node
) != TYPE_DECL
)
30971 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
30973 *no_add_attrs
= true;
30978 warning (OPT_Wattributes
, "%qE attribute only available for 32-bit",
30980 *no_add_attrs
= true;
30983 if (is_attribute_p ("callee_pop_aggregate_return", name
))
30987 cst
= TREE_VALUE (args
);
30988 if (TREE_CODE (cst
) != INTEGER_CST
)
30990 warning (OPT_Wattributes
,
30991 "%qE attribute requires an integer constant argument",
30993 *no_add_attrs
= true;
30995 else if (compare_tree_int (cst
, 0) != 0
30996 && compare_tree_int (cst
, 1) != 0)
30998 warning (OPT_Wattributes
,
30999 "argument to %qE attribute is neither zero, nor one",
31001 *no_add_attrs
= true;
31010 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
31011 struct attribute_spec.handler. */
31013 ix86_handle_abi_attribute (tree
*node
, tree name
,
31014 tree args ATTRIBUTE_UNUSED
,
31015 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
31017 if (TREE_CODE (*node
) != FUNCTION_TYPE
31018 && TREE_CODE (*node
) != METHOD_TYPE
31019 && TREE_CODE (*node
) != FIELD_DECL
31020 && TREE_CODE (*node
) != TYPE_DECL
)
31022 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
31024 *no_add_attrs
= true;
31028 /* Can combine regparm with all attributes but fastcall. */
31029 if (is_attribute_p ("ms_abi", name
))
31031 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node
)))
31033 error ("ms_abi and sysv_abi attributes are not compatible");
31038 else if (is_attribute_p ("sysv_abi", name
))
31040 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node
)))
31042 error ("ms_abi and sysv_abi attributes are not compatible");
31051 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
31052 struct attribute_spec.handler. */
31054 ix86_handle_struct_attribute (tree
*node
, tree name
,
31055 tree args ATTRIBUTE_UNUSED
,
31056 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
31059 if (DECL_P (*node
))
31061 if (TREE_CODE (*node
) == TYPE_DECL
)
31062 type
= &TREE_TYPE (*node
);
31067 if (!(type
&& (TREE_CODE (*type
) == RECORD_TYPE
31068 || TREE_CODE (*type
) == UNION_TYPE
)))
31070 warning (OPT_Wattributes
, "%qE attribute ignored",
31072 *no_add_attrs
= true;
31075 else if ((is_attribute_p ("ms_struct", name
)
31076 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type
)))
31077 || ((is_attribute_p ("gcc_struct", name
)
31078 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type
)))))
31080 warning (OPT_Wattributes
, "%qE incompatible attribute ignored",
31082 *no_add_attrs
= true;
31089 ix86_handle_fndecl_attribute (tree
*node
, tree name
,
31090 tree args ATTRIBUTE_UNUSED
,
31091 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
31093 if (TREE_CODE (*node
) != FUNCTION_DECL
)
31095 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
31097 *no_add_attrs
= true;
31103 ix86_ms_bitfield_layout_p (const_tree record_type
)
31105 return ((TARGET_MS_BITFIELD_LAYOUT
31106 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
31107 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
)));
31110 /* Returns an expression indicating where the this parameter is
31111 located on entry to the FUNCTION. */
31114 x86_this_parameter (tree function
)
31116 tree type
= TREE_TYPE (function
);
31117 bool aggr
= aggregate_value_p (TREE_TYPE (type
), type
) != 0;
31122 const int *parm_regs
;
31124 if (ix86_function_type_abi (type
) == MS_ABI
)
31125 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
31127 parm_regs
= x86_64_int_parameter_registers
;
31128 return gen_rtx_REG (DImode
, parm_regs
[aggr
]);
31131 nregs
= ix86_function_regparm (type
, function
);
31133 if (nregs
> 0 && !stdarg_p (type
))
31136 unsigned int ccvt
= ix86_get_callcvt (type
);
31138 if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
31139 regno
= aggr
? DX_REG
: CX_REG
;
31140 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
31144 return gen_rtx_MEM (SImode
,
31145 plus_constant (stack_pointer_rtx
, 4));
31154 return gen_rtx_MEM (SImode
,
31155 plus_constant (stack_pointer_rtx
, 4));
31158 return gen_rtx_REG (SImode
, regno
);
31161 return gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, aggr
? 8 : 4));
31164 /* Determine whether x86_output_mi_thunk can succeed. */
31167 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED
,
31168 HOST_WIDE_INT delta ATTRIBUTE_UNUSED
,
31169 HOST_WIDE_INT vcall_offset
, const_tree function
)
31171 /* 64-bit can handle anything. */
31175 /* For 32-bit, everything's fine if we have one free register. */
31176 if (ix86_function_regparm (TREE_TYPE (function
), function
) < 3)
31179 /* Need a free register for vcall_offset. */
31183 /* Need a free register for GOT references. */
31184 if (flag_pic
&& !targetm
.binds_local_p (function
))
31187 /* Otherwise ok. */
31191 /* Output the assembler code for a thunk function. THUNK_DECL is the
31192 declaration for the thunk function itself, FUNCTION is the decl for
31193 the target function. DELTA is an immediate constant offset to be
31194 added to THIS. If VCALL_OFFSET is nonzero, the word at
31195 *(*this + vcall_offset) should be added to THIS. */
31198 x86_output_mi_thunk (FILE *file
,
31199 tree thunk ATTRIBUTE_UNUSED
, HOST_WIDE_INT delta
,
31200 HOST_WIDE_INT vcall_offset
, tree function
)
31202 rtx this_param
= x86_this_parameter (function
);
31203 rtx this_reg
, tmp
, fnaddr
;
31205 emit_note (NOTE_INSN_PROLOGUE_END
);
31207 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
31208 pull it in now and let DELTA benefit. */
31209 if (REG_P (this_param
))
31210 this_reg
= this_param
;
31211 else if (vcall_offset
)
31213 /* Put the this parameter into %eax. */
31214 this_reg
= gen_rtx_REG (Pmode
, AX_REG
);
31215 emit_move_insn (this_reg
, this_param
);
31218 this_reg
= NULL_RTX
;
31220 /* Adjust the this parameter by a fixed constant. */
31223 rtx delta_rtx
= GEN_INT (delta
);
31224 rtx delta_dst
= this_reg
? this_reg
: this_param
;
31228 if (!x86_64_general_operand (delta_rtx
, Pmode
))
31230 tmp
= gen_rtx_REG (Pmode
, R10_REG
);
31231 emit_move_insn (tmp
, delta_rtx
);
31236 ix86_emit_binop (PLUS
, Pmode
, delta_dst
, delta_rtx
);
31239 /* Adjust the this parameter by a value stored in the vtable. */
31242 rtx vcall_addr
, vcall_mem
, this_mem
;
31243 unsigned int tmp_regno
;
31246 tmp_regno
= R10_REG
;
31249 unsigned int ccvt
= ix86_get_callcvt (TREE_TYPE (function
));
31250 if ((ccvt
& (IX86_CALLCVT_FASTCALL
| IX86_CALLCVT_THISCALL
)) != 0)
31251 tmp_regno
= AX_REG
;
31253 tmp_regno
= CX_REG
;
31255 tmp
= gen_rtx_REG (Pmode
, tmp_regno
);
31257 this_mem
= gen_rtx_MEM (ptr_mode
, this_reg
);
31258 if (Pmode
!= ptr_mode
)
31259 this_mem
= gen_rtx_ZERO_EXTEND (Pmode
, this_mem
);
31260 emit_move_insn (tmp
, this_mem
);
31262 /* Adjust the this parameter. */
31263 vcall_addr
= plus_constant (tmp
, vcall_offset
);
31265 && !ix86_legitimate_address_p (ptr_mode
, vcall_addr
, true))
31267 rtx tmp2
= gen_rtx_REG (Pmode
, R11_REG
);
31268 emit_move_insn (tmp2
, GEN_INT (vcall_offset
));
31269 vcall_addr
= gen_rtx_PLUS (Pmode
, tmp
, tmp2
);
31272 vcall_mem
= gen_rtx_MEM (ptr_mode
, vcall_addr
);
31273 if (Pmode
!= ptr_mode
)
31274 emit_insn (gen_addsi_1_zext (this_reg
,
31275 gen_rtx_REG (ptr_mode
,
31279 ix86_emit_binop (PLUS
, Pmode
, this_reg
, vcall_mem
);
31282 /* If necessary, drop THIS back to its stack slot. */
31283 if (this_reg
&& this_reg
!= this_param
)
31284 emit_move_insn (this_param
, this_reg
);
31286 fnaddr
= XEXP (DECL_RTL (function
), 0);
31289 if (!flag_pic
|| targetm
.binds_local_p (function
)
31290 || cfun
->machine
->call_abi
== MS_ABI
)
31294 tmp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, fnaddr
), UNSPEC_GOTPCREL
);
31295 tmp
= gen_rtx_CONST (Pmode
, tmp
);
31296 fnaddr
= gen_rtx_MEM (Pmode
, tmp
);
31301 if (!flag_pic
|| targetm
.binds_local_p (function
))
31304 else if (TARGET_MACHO
)
31306 fnaddr
= machopic_indirect_call_target (DECL_RTL (function
));
31307 fnaddr
= XEXP (fnaddr
, 0);
31309 #endif /* TARGET_MACHO */
31312 tmp
= gen_rtx_REG (Pmode
, CX_REG
);
31313 output_set_got (tmp
, NULL_RTX
);
31315 fnaddr
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, fnaddr
), UNSPEC_GOT
);
31316 fnaddr
= gen_rtx_PLUS (Pmode
, fnaddr
, tmp
);
31317 fnaddr
= gen_rtx_MEM (Pmode
, fnaddr
);
31321 /* Our sibling call patterns do not allow memories, because we have no
31322 predicate that can distinguish between frame and non-frame memory.
31323 For our purposes here, we can get away with (ab)using a jump pattern,
31324 because we're going to do no optimization. */
31325 if (MEM_P (fnaddr
))
31326 emit_jump_insn (gen_indirect_jump (fnaddr
));
31329 tmp
= gen_rtx_MEM (QImode
, fnaddr
);
31330 tmp
= gen_rtx_CALL (VOIDmode
, tmp
, const0_rtx
);
31331 tmp
= emit_call_insn (tmp
);
31332 SIBLING_CALL_P (tmp
) = 1;
31336 /* Emit just enough of rest_of_compilation to get the insns emitted.
31337 Note that use_thunk calls assemble_start_function et al. */
31338 tmp
= get_insns ();
31339 insn_locators_alloc ();
31340 shorten_branches (tmp
);
31341 final_start_function (tmp
, file
, 1);
31342 final (tmp
, file
, 1);
31343 final_end_function ();
31347 x86_file_start (void)
31349 default_file_start ();
31351 darwin_file_start ();
31353 if (X86_FILE_START_VERSION_DIRECTIVE
)
31354 fputs ("\t.version\t\"01.01\"\n", asm_out_file
);
31355 if (X86_FILE_START_FLTUSED
)
31356 fputs ("\t.global\t__fltused\n", asm_out_file
);
31357 if (ix86_asm_dialect
== ASM_INTEL
)
31358 fputs ("\t.intel_syntax noprefix\n", asm_out_file
);
31362 x86_field_alignment (tree field
, int computed
)
31364 enum machine_mode mode
;
31365 tree type
= TREE_TYPE (field
);
31367 if (TARGET_64BIT
|| TARGET_ALIGN_DOUBLE
)
31369 mode
= TYPE_MODE (strip_array_types (type
));
31370 if (mode
== DFmode
|| mode
== DCmode
31371 || GET_MODE_CLASS (mode
) == MODE_INT
31372 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
31373 return MIN (32, computed
);
31377 /* Output assembler code to FILE to increment profiler label # LABELNO
31378 for profiling a function entry. */
31380 x86_function_profiler (FILE *file
, int labelno ATTRIBUTE_UNUSED
)
31382 const char *mcount_name
= (flag_fentry
? MCOUNT_NAME_BEFORE_PROLOGUE
31387 #ifndef NO_PROFILE_COUNTERS
31388 fprintf (file
, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX
, labelno
);
31391 if (DEFAULT_ABI
== SYSV_ABI
&& flag_pic
)
31392 fprintf (file
, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name
);
31394 fprintf (file
, "\tcall\t%s\n", mcount_name
);
31398 #ifndef NO_PROFILE_COUNTERS
31399 fprintf (file
, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER
"\n",
31402 fprintf (file
, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name
);
31406 #ifndef NO_PROFILE_COUNTERS
31407 fprintf (file
, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER
"\n",
31410 fprintf (file
, "\tcall\t%s\n", mcount_name
);
31414 /* We don't have exact information about the insn sizes, but we may assume
31415 quite safely that we are informed about all 1 byte insns and memory
31416 address sizes. This is enough to eliminate unnecessary padding in
31420 min_insn_size (rtx insn
)
31424 if (!INSN_P (insn
) || !active_insn_p (insn
))
31427 /* Discard alignments we've emit and jump instructions. */
31428 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
31429 && XINT (PATTERN (insn
), 1) == UNSPECV_ALIGN
)
31431 if (JUMP_TABLE_DATA_P (insn
))
31434 /* Important case - calls are always 5 bytes.
31435 It is common to have many calls in the row. */
31437 && symbolic_reference_mentioned_p (PATTERN (insn
))
31438 && !SIBLING_CALL_P (insn
))
31440 len
= get_attr_length (insn
);
31444 /* For normal instructions we rely on get_attr_length being exact,
31445 with a few exceptions. */
31446 if (!JUMP_P (insn
))
31448 enum attr_type type
= get_attr_type (insn
);
31453 if (GET_CODE (PATTERN (insn
)) == ASM_INPUT
31454 || asm_noperands (PATTERN (insn
)) >= 0)
31461 /* Otherwise trust get_attr_length. */
31465 l
= get_attr_length_address (insn
);
31466 if (l
< 4 && symbolic_reference_mentioned_p (PATTERN (insn
)))
31475 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
31477 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
31481 ix86_avoid_jump_mispredicts (void)
31483 rtx insn
, start
= get_insns ();
31484 int nbytes
= 0, njumps
= 0;
31487 /* Look for all minimal intervals of instructions containing 4 jumps.
31488 The intervals are bounded by START and INSN. NBYTES is the total
31489 size of instructions in the interval including INSN and not including
31490 START. When the NBYTES is smaller than 16 bytes, it is possible
31491 that the end of START and INSN ends up in the same 16byte page.
31493 The smallest offset in the page INSN can start is the case where START
31494 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
31495 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
31497 for (insn
= start
; insn
; insn
= NEXT_INSN (insn
))
31501 if (LABEL_P (insn
))
31503 int align
= label_to_alignment (insn
);
31504 int max_skip
= label_to_max_skip (insn
);
31508 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
31509 already in the current 16 byte page, because otherwise
31510 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
31511 bytes to reach 16 byte boundary. */
31513 || (align
<= 3 && max_skip
!= (1 << align
) - 1))
31516 fprintf (dump_file
, "Label %i with max_skip %i\n",
31517 INSN_UID (insn
), max_skip
);
31520 while (nbytes
+ max_skip
>= 16)
31522 start
= NEXT_INSN (start
);
31523 if ((JUMP_P (start
)
31524 && GET_CODE (PATTERN (start
)) != ADDR_VEC
31525 && GET_CODE (PATTERN (start
)) != ADDR_DIFF_VEC
)
31527 njumps
--, isjump
= 1;
31530 nbytes
-= min_insn_size (start
);
31536 min_size
= min_insn_size (insn
);
31537 nbytes
+= min_size
;
31539 fprintf (dump_file
, "Insn %i estimated to %i bytes\n",
31540 INSN_UID (insn
), min_size
);
31542 && GET_CODE (PATTERN (insn
)) != ADDR_VEC
31543 && GET_CODE (PATTERN (insn
)) != ADDR_DIFF_VEC
)
31551 start
= NEXT_INSN (start
);
31552 if ((JUMP_P (start
)
31553 && GET_CODE (PATTERN (start
)) != ADDR_VEC
31554 && GET_CODE (PATTERN (start
)) != ADDR_DIFF_VEC
)
31556 njumps
--, isjump
= 1;
31559 nbytes
-= min_insn_size (start
);
31561 gcc_assert (njumps
>= 0);
31563 fprintf (dump_file
, "Interval %i to %i has %i bytes\n",
31564 INSN_UID (start
), INSN_UID (insn
), nbytes
);
31566 if (njumps
== 3 && isjump
&& nbytes
< 16)
31568 int padsize
= 15 - nbytes
+ min_insn_size (insn
);
31571 fprintf (dump_file
, "Padding insn %i by %i bytes!\n",
31572 INSN_UID (insn
), padsize
);
31573 emit_insn_before (gen_pad (GEN_INT (padsize
)), insn
);
31579 /* AMD Athlon works faster
31580 when RET is not destination of conditional jump or directly preceded
31581 by other jump instruction. We avoid the penalty by inserting NOP just
31582 before the RET instructions in such cases. */
31584 ix86_pad_returns (void)
31589 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
31591 basic_block bb
= e
->src
;
31592 rtx ret
= BB_END (bb
);
31594 bool replace
= false;
31596 if (!JUMP_P (ret
) || !ANY_RETURN_P (PATTERN (ret
))
31597 || optimize_bb_for_size_p (bb
))
31599 for (prev
= PREV_INSN (ret
); prev
; prev
= PREV_INSN (prev
))
31600 if (active_insn_p (prev
) || LABEL_P (prev
))
31602 if (prev
&& LABEL_P (prev
))
31607 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
31608 if (EDGE_FREQUENCY (e
) && e
->src
->index
>= 0
31609 && !(e
->flags
& EDGE_FALLTHRU
))
31614 prev
= prev_active_insn (ret
);
31616 && ((JUMP_P (prev
) && any_condjump_p (prev
))
31619 /* Empty functions get branch mispredict even when
31620 the jump destination is not visible to us. */
31621 if (!prev
&& !optimize_function_for_size_p (cfun
))
31626 emit_jump_insn_before (gen_simple_return_internal_long (), ret
);
31632 /* Count the minimum number of instructions in BB. Return 4 if the
31633 number of instructions >= 4. */
31636 ix86_count_insn_bb (basic_block bb
)
31639 int insn_count
= 0;
31641 /* Count number of instructions in this block. Return 4 if the number
31642 of instructions >= 4. */
31643 FOR_BB_INSNS (bb
, insn
)
31645 /* Only happen in exit blocks. */
31647 && ANY_RETURN_P (PATTERN (insn
)))
31650 if (NONDEBUG_INSN_P (insn
)
31651 && GET_CODE (PATTERN (insn
)) != USE
31652 && GET_CODE (PATTERN (insn
)) != CLOBBER
)
31655 if (insn_count
>= 4)
31664 /* Count the minimum number of instructions in code path in BB.
31665 Return 4 if the number of instructions >= 4. */
31668 ix86_count_insn (basic_block bb
)
31672 int min_prev_count
;
31674 /* Only bother counting instructions along paths with no
31675 more than 2 basic blocks between entry and exit. Given
31676 that BB has an edge to exit, determine if a predecessor
31677 of BB has an edge from entry. If so, compute the number
31678 of instructions in the predecessor block. If there
31679 happen to be multiple such blocks, compute the minimum. */
31680 min_prev_count
= 4;
31681 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
31684 edge_iterator prev_ei
;
31686 if (e
->src
== ENTRY_BLOCK_PTR
)
31688 min_prev_count
= 0;
31691 FOR_EACH_EDGE (prev_e
, prev_ei
, e
->src
->preds
)
31693 if (prev_e
->src
== ENTRY_BLOCK_PTR
)
31695 int count
= ix86_count_insn_bb (e
->src
);
31696 if (count
< min_prev_count
)
31697 min_prev_count
= count
;
31703 if (min_prev_count
< 4)
31704 min_prev_count
+= ix86_count_insn_bb (bb
);
31706 return min_prev_count
;
31709 /* Pad short funtion to 4 instructions. */
31712 ix86_pad_short_function (void)
31717 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
31719 rtx ret
= BB_END (e
->src
);
31720 if (JUMP_P (ret
) && ANY_RETURN_P (PATTERN (ret
)))
31722 int insn_count
= ix86_count_insn (e
->src
);
31724 /* Pad short function. */
31725 if (insn_count
< 4)
31729 /* Find epilogue. */
31732 || NOTE_KIND (insn
) != NOTE_INSN_EPILOGUE_BEG
))
31733 insn
= PREV_INSN (insn
);
31738 /* Two NOPs count as one instruction. */
31739 insn_count
= 2 * (4 - insn_count
);
31740 emit_insn_before (gen_nops (GEN_INT (insn_count
)), insn
);
31746 /* Implement machine specific optimizations. We implement padding of returns
31747 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
31751 /* We are freeing block_for_insn in the toplev to keep compatibility
31752 with old MDEP_REORGS that are not CFG based. Recompute it now. */
31753 compute_bb_for_insn ();
31755 /* Run the vzeroupper optimization if needed. */
31756 if (TARGET_VZEROUPPER
)
31757 move_or_delete_vzeroupper ();
31759 if (optimize
&& optimize_function_for_speed_p (cfun
))
31761 if (TARGET_PAD_SHORT_FUNCTION
)
31762 ix86_pad_short_function ();
31763 else if (TARGET_PAD_RETURNS
)
31764 ix86_pad_returns ();
31765 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
31766 if (TARGET_FOUR_JUMP_LIMIT
)
31767 ix86_avoid_jump_mispredicts ();
31772 /* Return nonzero when QImode register that must be represented via REX prefix
31775 x86_extended_QIreg_mentioned_p (rtx insn
)
31778 extract_insn_cached (insn
);
31779 for (i
= 0; i
< recog_data
.n_operands
; i
++)
31780 if (REG_P (recog_data
.operand
[i
])
31781 && REGNO (recog_data
.operand
[i
]) > BX_REG
)
31786 /* Return nonzero when P points to register encoded via REX prefix.
31787 Called via for_each_rtx. */
31789 extended_reg_mentioned_1 (rtx
*p
, void *data ATTRIBUTE_UNUSED
)
31791 unsigned int regno
;
31794 regno
= REGNO (*p
);
31795 return REX_INT_REGNO_P (regno
) || REX_SSE_REGNO_P (regno
);
31798 /* Return true when INSN mentions register that must be encoded using REX
31801 x86_extended_reg_mentioned_p (rtx insn
)
31803 return for_each_rtx (INSN_P (insn
) ? &PATTERN (insn
) : &insn
,
31804 extended_reg_mentioned_1
, NULL
);
31807 /* If profitable, negate (without causing overflow) integer constant
31808 of mode MODE at location LOC. Return true in this case. */
31810 x86_maybe_negate_const_int (rtx
*loc
, enum machine_mode mode
)
31814 if (!CONST_INT_P (*loc
))
31820 /* DImode x86_64 constants must fit in 32 bits. */
31821 gcc_assert (x86_64_immediate_operand (*loc
, mode
));
31832 gcc_unreachable ();
31835 /* Avoid overflows. */
31836 if (mode_signbit_p (mode
, *loc
))
31839 val
= INTVAL (*loc
);
31841 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
31842 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
31843 if ((val
< 0 && val
!= -128)
31846 *loc
= GEN_INT (-val
);
31853 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
31854 optabs would emit if we didn't have TFmode patterns. */
31857 x86_emit_floatuns (rtx operands
[2])
31859 rtx neglab
, donelab
, i0
, i1
, f0
, in
, out
;
31860 enum machine_mode mode
, inmode
;
31862 inmode
= GET_MODE (operands
[1]);
31863 gcc_assert (inmode
== SImode
|| inmode
== DImode
);
31866 in
= force_reg (inmode
, operands
[1]);
31867 mode
= GET_MODE (out
);
31868 neglab
= gen_label_rtx ();
31869 donelab
= gen_label_rtx ();
31870 f0
= gen_reg_rtx (mode
);
31872 emit_cmp_and_jump_insns (in
, const0_rtx
, LT
, const0_rtx
, inmode
, 0, neglab
);
31874 expand_float (out
, in
, 0);
31876 emit_jump_insn (gen_jump (donelab
));
31879 emit_label (neglab
);
31881 i0
= expand_simple_binop (inmode
, LSHIFTRT
, in
, const1_rtx
, NULL
,
31883 i1
= expand_simple_binop (inmode
, AND
, in
, const1_rtx
, NULL
,
31885 i0
= expand_simple_binop (inmode
, IOR
, i0
, i1
, i0
, 1, OPTAB_DIRECT
);
31887 expand_float (f0
, i0
, 0);
31889 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_PLUS (mode
, f0
, f0
)));
31891 emit_label (donelab
);
31894 /* AVX2 does support 32-byte integer vector operations,
31895 thus the longest vector we are faced with is V32QImode. */
31896 #define MAX_VECT_LEN 32
31898 struct expand_vec_perm_d
31900 rtx target
, op0
, op1
;
31901 unsigned char perm
[MAX_VECT_LEN
];
31902 enum machine_mode vmode
;
31903 unsigned char nelt
;
31907 static bool expand_vec_perm_1 (struct expand_vec_perm_d
*d
);
31908 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d
*d
);
31910 /* Get a vector mode of the same size as the original but with elements
31911 twice as wide. This is only guaranteed to apply to integral vectors. */
31913 static inline enum machine_mode
31914 get_mode_wider_vector (enum machine_mode o
)
31916 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
31917 enum machine_mode n
= GET_MODE_WIDER_MODE (o
);
31918 gcc_assert (GET_MODE_NUNITS (o
) == GET_MODE_NUNITS (n
) * 2);
31919 gcc_assert (GET_MODE_SIZE (o
) == GET_MODE_SIZE (n
));
31923 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
31924 with all elements equal to VAR. Return true if successful. */
31927 ix86_expand_vector_init_duplicate (bool mmx_ok
, enum machine_mode mode
,
31928 rtx target
, rtx val
)
31951 /* First attempt to recognize VAL as-is. */
31952 dup
= gen_rtx_VEC_DUPLICATE (mode
, val
);
31953 insn
= emit_insn (gen_rtx_SET (VOIDmode
, target
, dup
));
31954 if (recog_memoized (insn
) < 0)
31957 /* If that fails, force VAL into a register. */
31960 XEXP (dup
, 0) = force_reg (GET_MODE_INNER (mode
), val
);
31961 seq
= get_insns ();
31964 emit_insn_before (seq
, insn
);
31966 ok
= recog_memoized (insn
) >= 0;
31975 if (TARGET_SSE
|| TARGET_3DNOW_A
)
31979 val
= gen_lowpart (SImode
, val
);
31980 x
= gen_rtx_TRUNCATE (HImode
, val
);
31981 x
= gen_rtx_VEC_DUPLICATE (mode
, x
);
31982 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
31995 struct expand_vec_perm_d dperm
;
31999 memset (&dperm
, 0, sizeof (dperm
));
32000 dperm
.target
= target
;
32001 dperm
.vmode
= mode
;
32002 dperm
.nelt
= GET_MODE_NUNITS (mode
);
32003 dperm
.op0
= dperm
.op1
= gen_reg_rtx (mode
);
32005 /* Extend to SImode using a paradoxical SUBREG. */
32006 tmp1
= gen_reg_rtx (SImode
);
32007 emit_move_insn (tmp1
, gen_lowpart (SImode
, val
));
32009 /* Insert the SImode value as low element of a V4SImode vector. */
32010 tmp2
= gen_lowpart (V4SImode
, dperm
.op0
);
32011 emit_insn (gen_vec_setv4si_0 (tmp2
, CONST0_RTX (V4SImode
), tmp1
));
32013 ok
= (expand_vec_perm_1 (&dperm
)
32014 || expand_vec_perm_broadcast_1 (&dperm
));
32026 /* Replicate the value once into the next wider mode and recurse. */
32028 enum machine_mode smode
, wsmode
, wvmode
;
32031 smode
= GET_MODE_INNER (mode
);
32032 wvmode
= get_mode_wider_vector (mode
);
32033 wsmode
= GET_MODE_INNER (wvmode
);
32035 val
= convert_modes (wsmode
, smode
, val
, true);
32036 x
= expand_simple_binop (wsmode
, ASHIFT
, val
,
32037 GEN_INT (GET_MODE_BITSIZE (smode
)),
32038 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
32039 val
= expand_simple_binop (wsmode
, IOR
, val
, x
, x
, 1, OPTAB_LIB_WIDEN
);
32041 x
= gen_lowpart (wvmode
, target
);
32042 ok
= ix86_expand_vector_init_duplicate (mmx_ok
, wvmode
, x
, val
);
32050 enum machine_mode hvmode
= (mode
== V16HImode
? V8HImode
: V16QImode
);
32051 rtx x
= gen_reg_rtx (hvmode
);
32053 ok
= ix86_expand_vector_init_duplicate (false, hvmode
, x
, val
);
32056 x
= gen_rtx_VEC_CONCAT (mode
, x
, x
);
32057 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
32066 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
32067 whose ONE_VAR element is VAR, and other elements are zero. Return true
32071 ix86_expand_vector_init_one_nonzero (bool mmx_ok
, enum machine_mode mode
,
32072 rtx target
, rtx var
, int one_var
)
32074 enum machine_mode vsimode
;
32077 bool use_vector_set
= false;
32082 /* For SSE4.1, we normally use vector set. But if the second
32083 element is zero and inter-unit moves are OK, we use movq
32085 use_vector_set
= (TARGET_64BIT
32087 && !(TARGET_INTER_UNIT_MOVES
32093 use_vector_set
= TARGET_SSE4_1
;
32096 use_vector_set
= TARGET_SSE2
;
32099 use_vector_set
= TARGET_SSE
|| TARGET_3DNOW_A
;
32106 use_vector_set
= TARGET_AVX
;
32109 /* Use ix86_expand_vector_set in 64bit mode only. */
32110 use_vector_set
= TARGET_AVX
&& TARGET_64BIT
;
32116 if (use_vector_set
)
32118 emit_insn (gen_rtx_SET (VOIDmode
, target
, CONST0_RTX (mode
)));
32119 var
= force_reg (GET_MODE_INNER (mode
), var
);
32120 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
32136 var
= force_reg (GET_MODE_INNER (mode
), var
);
32137 x
= gen_rtx_VEC_CONCAT (mode
, var
, CONST0_RTX (GET_MODE_INNER (mode
)));
32138 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
32143 if (!REG_P (target
) || REGNO (target
) < FIRST_PSEUDO_REGISTER
)
32144 new_target
= gen_reg_rtx (mode
);
32146 new_target
= target
;
32147 var
= force_reg (GET_MODE_INNER (mode
), var
);
32148 x
= gen_rtx_VEC_DUPLICATE (mode
, var
);
32149 x
= gen_rtx_VEC_MERGE (mode
, x
, CONST0_RTX (mode
), const1_rtx
);
32150 emit_insn (gen_rtx_SET (VOIDmode
, new_target
, x
));
32153 /* We need to shuffle the value to the correct position, so
32154 create a new pseudo to store the intermediate result. */
32156 /* With SSE2, we can use the integer shuffle insns. */
32157 if (mode
!= V4SFmode
&& TARGET_SSE2
)
32159 emit_insn (gen_sse2_pshufd_1 (new_target
, new_target
,
32161 GEN_INT (one_var
== 1 ? 0 : 1),
32162 GEN_INT (one_var
== 2 ? 0 : 1),
32163 GEN_INT (one_var
== 3 ? 0 : 1)));
32164 if (target
!= new_target
)
32165 emit_move_insn (target
, new_target
);
32169 /* Otherwise convert the intermediate result to V4SFmode and
32170 use the SSE1 shuffle instructions. */
32171 if (mode
!= V4SFmode
)
32173 tmp
= gen_reg_rtx (V4SFmode
);
32174 emit_move_insn (tmp
, gen_lowpart (V4SFmode
, new_target
));
32179 emit_insn (gen_sse_shufps_v4sf (tmp
, tmp
, tmp
,
32181 GEN_INT (one_var
== 1 ? 0 : 1),
32182 GEN_INT (one_var
== 2 ? 0+4 : 1+4),
32183 GEN_INT (one_var
== 3 ? 0+4 : 1+4)));
32185 if (mode
!= V4SFmode
)
32186 emit_move_insn (target
, gen_lowpart (V4SImode
, tmp
));
32187 else if (tmp
!= target
)
32188 emit_move_insn (target
, tmp
);
32190 else if (target
!= new_target
)
32191 emit_move_insn (target
, new_target
);
32196 vsimode
= V4SImode
;
32202 vsimode
= V2SImode
;
32208 /* Zero extend the variable element to SImode and recurse. */
32209 var
= convert_modes (SImode
, GET_MODE_INNER (mode
), var
, true);
32211 x
= gen_reg_rtx (vsimode
);
32212 if (!ix86_expand_vector_init_one_nonzero (mmx_ok
, vsimode
, x
,
32214 gcc_unreachable ();
32216 emit_move_insn (target
, gen_lowpart (mode
, x
));
32224 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
32225 consisting of the values in VALS. It is known that all elements
32226 except ONE_VAR are constants. Return true if successful. */
32229 ix86_expand_vector_init_one_var (bool mmx_ok
, enum machine_mode mode
,
32230 rtx target
, rtx vals
, int one_var
)
32232 rtx var
= XVECEXP (vals
, 0, one_var
);
32233 enum machine_mode wmode
;
32236 const_vec
= copy_rtx (vals
);
32237 XVECEXP (const_vec
, 0, one_var
) = CONST0_RTX (GET_MODE_INNER (mode
));
32238 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (const_vec
, 0));
32246 /* For the two element vectors, it's just as easy to use
32247 the general case. */
32251 /* Use ix86_expand_vector_set in 64bit mode only. */
32274 /* There's no way to set one QImode entry easily. Combine
32275 the variable value with its adjacent constant value, and
32276 promote to an HImode set. */
32277 x
= XVECEXP (vals
, 0, one_var
^ 1);
32280 var
= convert_modes (HImode
, QImode
, var
, true);
32281 var
= expand_simple_binop (HImode
, ASHIFT
, var
, GEN_INT (8),
32282 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
32283 x
= GEN_INT (INTVAL (x
) & 0xff);
32287 var
= convert_modes (HImode
, QImode
, var
, true);
32288 x
= gen_int_mode (INTVAL (x
) << 8, HImode
);
32290 if (x
!= const0_rtx
)
32291 var
= expand_simple_binop (HImode
, IOR
, var
, x
, var
,
32292 1, OPTAB_LIB_WIDEN
);
32294 x
= gen_reg_rtx (wmode
);
32295 emit_move_insn (x
, gen_lowpart (wmode
, const_vec
));
32296 ix86_expand_vector_set (mmx_ok
, x
, var
, one_var
>> 1);
32298 emit_move_insn (target
, gen_lowpart (mode
, x
));
32305 emit_move_insn (target
, const_vec
);
32306 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
32310 /* A subroutine of ix86_expand_vector_init_general. Use vector
32311 concatenate to handle the most general case: all values variable,
32312 and none identical. */
32315 ix86_expand_vector_init_concat (enum machine_mode mode
,
32316 rtx target
, rtx
*ops
, int n
)
32318 enum machine_mode cmode
, hmode
= VOIDmode
;
32319 rtx first
[8], second
[4];
32359 gcc_unreachable ();
32362 if (!register_operand (ops
[1], cmode
))
32363 ops
[1] = force_reg (cmode
, ops
[1]);
32364 if (!register_operand (ops
[0], cmode
))
32365 ops
[0] = force_reg (cmode
, ops
[0]);
32366 emit_insn (gen_rtx_SET (VOIDmode
, target
,
32367 gen_rtx_VEC_CONCAT (mode
, ops
[0],
32387 gcc_unreachable ();
32403 gcc_unreachable ();
32408 /* FIXME: We process inputs backward to help RA. PR 36222. */
32411 for (; i
> 0; i
-= 2, j
--)
32413 first
[j
] = gen_reg_rtx (cmode
);
32414 v
= gen_rtvec (2, ops
[i
- 1], ops
[i
]);
32415 ix86_expand_vector_init (false, first
[j
],
32416 gen_rtx_PARALLEL (cmode
, v
));
32422 gcc_assert (hmode
!= VOIDmode
);
32423 for (i
= j
= 0; i
< n
; i
+= 2, j
++)
32425 second
[j
] = gen_reg_rtx (hmode
);
32426 ix86_expand_vector_init_concat (hmode
, second
[j
],
32430 ix86_expand_vector_init_concat (mode
, target
, second
, n
);
32433 ix86_expand_vector_init_concat (mode
, target
, first
, n
);
32437 gcc_unreachable ();
32441 /* A subroutine of ix86_expand_vector_init_general. Use vector
32442 interleave to handle the most general case: all values variable,
32443 and none identical. */
32446 ix86_expand_vector_init_interleave (enum machine_mode mode
,
32447 rtx target
, rtx
*ops
, int n
)
32449 enum machine_mode first_imode
, second_imode
, third_imode
, inner_mode
;
32452 rtx (*gen_load_even
) (rtx
, rtx
, rtx
);
32453 rtx (*gen_interleave_first_low
) (rtx
, rtx
, rtx
);
32454 rtx (*gen_interleave_second_low
) (rtx
, rtx
, rtx
);
32459 gen_load_even
= gen_vec_setv8hi
;
32460 gen_interleave_first_low
= gen_vec_interleave_lowv4si
;
32461 gen_interleave_second_low
= gen_vec_interleave_lowv2di
;
32462 inner_mode
= HImode
;
32463 first_imode
= V4SImode
;
32464 second_imode
= V2DImode
;
32465 third_imode
= VOIDmode
;
32468 gen_load_even
= gen_vec_setv16qi
;
32469 gen_interleave_first_low
= gen_vec_interleave_lowv8hi
;
32470 gen_interleave_second_low
= gen_vec_interleave_lowv4si
;
32471 inner_mode
= QImode
;
32472 first_imode
= V8HImode
;
32473 second_imode
= V4SImode
;
32474 third_imode
= V2DImode
;
32477 gcc_unreachable ();
32480 for (i
= 0; i
< n
; i
++)
32482 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
32483 op0
= gen_reg_rtx (SImode
);
32484 emit_move_insn (op0
, gen_lowpart (SImode
, ops
[i
+ i
]));
32486 /* Insert the SImode value as low element of V4SImode vector. */
32487 op1
= gen_reg_rtx (V4SImode
);
32488 op0
= gen_rtx_VEC_MERGE (V4SImode
,
32489 gen_rtx_VEC_DUPLICATE (V4SImode
,
32491 CONST0_RTX (V4SImode
),
32493 emit_insn (gen_rtx_SET (VOIDmode
, op1
, op0
));
32495 /* Cast the V4SImode vector back to a vector in orignal mode. */
32496 op0
= gen_reg_rtx (mode
);
32497 emit_move_insn (op0
, gen_lowpart (mode
, op1
));
32499 /* Load even elements into the second positon. */
32500 emit_insn (gen_load_even (op0
,
32501 force_reg (inner_mode
,
32505 /* Cast vector to FIRST_IMODE vector. */
32506 ops
[i
] = gen_reg_rtx (first_imode
);
32507 emit_move_insn (ops
[i
], gen_lowpart (first_imode
, op0
));
32510 /* Interleave low FIRST_IMODE vectors. */
32511 for (i
= j
= 0; i
< n
; i
+= 2, j
++)
32513 op0
= gen_reg_rtx (first_imode
);
32514 emit_insn (gen_interleave_first_low (op0
, ops
[i
], ops
[i
+ 1]));
32516 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
32517 ops
[j
] = gen_reg_rtx (second_imode
);
32518 emit_move_insn (ops
[j
], gen_lowpart (second_imode
, op0
));
32521 /* Interleave low SECOND_IMODE vectors. */
32522 switch (second_imode
)
32525 for (i
= j
= 0; i
< n
/ 2; i
+= 2, j
++)
32527 op0
= gen_reg_rtx (second_imode
);
32528 emit_insn (gen_interleave_second_low (op0
, ops
[i
],
32531 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
32533 ops
[j
] = gen_reg_rtx (third_imode
);
32534 emit_move_insn (ops
[j
], gen_lowpart (third_imode
, op0
));
32536 second_imode
= V2DImode
;
32537 gen_interleave_second_low
= gen_vec_interleave_lowv2di
;
32541 op0
= gen_reg_rtx (second_imode
);
32542 emit_insn (gen_interleave_second_low (op0
, ops
[0],
32545 /* Cast the SECOND_IMODE vector back to a vector on original
32547 emit_insn (gen_rtx_SET (VOIDmode
, target
,
32548 gen_lowpart (mode
, op0
)));
32552 gcc_unreachable ();
32556 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
32557 all values variable, and none identical. */
32560 ix86_expand_vector_init_general (bool mmx_ok
, enum machine_mode mode
,
32561 rtx target
, rtx vals
)
32563 rtx ops
[32], op0
, op1
;
32564 enum machine_mode half_mode
= VOIDmode
;
32571 if (!mmx_ok
&& !TARGET_SSE
)
32583 n
= GET_MODE_NUNITS (mode
);
32584 for (i
= 0; i
< n
; i
++)
32585 ops
[i
] = XVECEXP (vals
, 0, i
);
32586 ix86_expand_vector_init_concat (mode
, target
, ops
, n
);
32590 half_mode
= V16QImode
;
32594 half_mode
= V8HImode
;
32598 n
= GET_MODE_NUNITS (mode
);
32599 for (i
= 0; i
< n
; i
++)
32600 ops
[i
] = XVECEXP (vals
, 0, i
);
32601 op0
= gen_reg_rtx (half_mode
);
32602 op1
= gen_reg_rtx (half_mode
);
32603 ix86_expand_vector_init_interleave (half_mode
, op0
, ops
,
32605 ix86_expand_vector_init_interleave (half_mode
, op1
,
32606 &ops
[n
>> 1], n
>> 2);
32607 emit_insn (gen_rtx_SET (VOIDmode
, target
,
32608 gen_rtx_VEC_CONCAT (mode
, op0
, op1
)));
32612 if (!TARGET_SSE4_1
)
32620 /* Don't use ix86_expand_vector_init_interleave if we can't
32621 move from GPR to SSE register directly. */
32622 if (!TARGET_INTER_UNIT_MOVES
)
32625 n
= GET_MODE_NUNITS (mode
);
32626 for (i
= 0; i
< n
; i
++)
32627 ops
[i
] = XVECEXP (vals
, 0, i
);
32628 ix86_expand_vector_init_interleave (mode
, target
, ops
, n
>> 1);
32636 gcc_unreachable ();
32640 int i
, j
, n_elts
, n_words
, n_elt_per_word
;
32641 enum machine_mode inner_mode
;
32642 rtx words
[4], shift
;
32644 inner_mode
= GET_MODE_INNER (mode
);
32645 n_elts
= GET_MODE_NUNITS (mode
);
32646 n_words
= GET_MODE_SIZE (mode
) / UNITS_PER_WORD
;
32647 n_elt_per_word
= n_elts
/ n_words
;
32648 shift
= GEN_INT (GET_MODE_BITSIZE (inner_mode
));
32650 for (i
= 0; i
< n_words
; ++i
)
32652 rtx word
= NULL_RTX
;
32654 for (j
= 0; j
< n_elt_per_word
; ++j
)
32656 rtx elt
= XVECEXP (vals
, 0, (i
+1)*n_elt_per_word
- j
- 1);
32657 elt
= convert_modes (word_mode
, inner_mode
, elt
, true);
32663 word
= expand_simple_binop (word_mode
, ASHIFT
, word
, shift
,
32664 word
, 1, OPTAB_LIB_WIDEN
);
32665 word
= expand_simple_binop (word_mode
, IOR
, word
, elt
,
32666 word
, 1, OPTAB_LIB_WIDEN
);
32674 emit_move_insn (target
, gen_lowpart (mode
, words
[0]));
32675 else if (n_words
== 2)
32677 rtx tmp
= gen_reg_rtx (mode
);
32678 emit_clobber (tmp
);
32679 emit_move_insn (gen_lowpart (word_mode
, tmp
), words
[0]);
32680 emit_move_insn (gen_highpart (word_mode
, tmp
), words
[1]);
32681 emit_move_insn (target
, tmp
);
32683 else if (n_words
== 4)
32685 rtx tmp
= gen_reg_rtx (V4SImode
);
32686 gcc_assert (word_mode
== SImode
);
32687 vals
= gen_rtx_PARALLEL (V4SImode
, gen_rtvec_v (4, words
));
32688 ix86_expand_vector_init_general (false, V4SImode
, tmp
, vals
);
32689 emit_move_insn (target
, gen_lowpart (mode
, tmp
));
32692 gcc_unreachable ();
32696 /* Initialize vector TARGET via VALS. Suppress the use of MMX
32697 instructions unless MMX_OK is true. */
32700 ix86_expand_vector_init (bool mmx_ok
, rtx target
, rtx vals
)
32702 enum machine_mode mode
= GET_MODE (target
);
32703 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
32704 int n_elts
= GET_MODE_NUNITS (mode
);
32705 int n_var
= 0, one_var
= -1;
32706 bool all_same
= true, all_const_zero
= true;
32710 for (i
= 0; i
< n_elts
; ++i
)
32712 x
= XVECEXP (vals
, 0, i
);
32713 if (!(CONST_INT_P (x
)
32714 || GET_CODE (x
) == CONST_DOUBLE
32715 || GET_CODE (x
) == CONST_FIXED
))
32716 n_var
++, one_var
= i
;
32717 else if (x
!= CONST0_RTX (inner_mode
))
32718 all_const_zero
= false;
32719 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
32723 /* Constants are best loaded from the constant pool. */
32726 emit_move_insn (target
, gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
32730 /* If all values are identical, broadcast the value. */
32732 && ix86_expand_vector_init_duplicate (mmx_ok
, mode
, target
,
32733 XVECEXP (vals
, 0, 0)))
32736 /* Values where only one field is non-constant are best loaded from
32737 the pool and overwritten via move later. */
32741 && ix86_expand_vector_init_one_nonzero (mmx_ok
, mode
, target
,
32742 XVECEXP (vals
, 0, one_var
),
32746 if (ix86_expand_vector_init_one_var (mmx_ok
, mode
, target
, vals
, one_var
))
32750 ix86_expand_vector_init_general (mmx_ok
, mode
, target
, vals
);
32754 ix86_expand_vector_set (bool mmx_ok
, rtx target
, rtx val
, int elt
)
32756 enum machine_mode mode
= GET_MODE (target
);
32757 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
32758 enum machine_mode half_mode
;
32759 bool use_vec_merge
= false;
32761 static rtx (*gen_extract
[6][2]) (rtx
, rtx
)
32763 { gen_vec_extract_lo_v32qi
, gen_vec_extract_hi_v32qi
},
32764 { gen_vec_extract_lo_v16hi
, gen_vec_extract_hi_v16hi
},
32765 { gen_vec_extract_lo_v8si
, gen_vec_extract_hi_v8si
},
32766 { gen_vec_extract_lo_v4di
, gen_vec_extract_hi_v4di
},
32767 { gen_vec_extract_lo_v8sf
, gen_vec_extract_hi_v8sf
},
32768 { gen_vec_extract_lo_v4df
, gen_vec_extract_hi_v4df
}
32770 static rtx (*gen_insert
[6][2]) (rtx
, rtx
, rtx
)
32772 { gen_vec_set_lo_v32qi
, gen_vec_set_hi_v32qi
},
32773 { gen_vec_set_lo_v16hi
, gen_vec_set_hi_v16hi
},
32774 { gen_vec_set_lo_v8si
, gen_vec_set_hi_v8si
},
32775 { gen_vec_set_lo_v4di
, gen_vec_set_hi_v4di
},
32776 { gen_vec_set_lo_v8sf
, gen_vec_set_hi_v8sf
},
32777 { gen_vec_set_lo_v4df
, gen_vec_set_hi_v4df
}
32787 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
32788 ix86_expand_vector_extract (true, tmp
, target
, 1 - elt
);
32790 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
32792 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
32793 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
32799 use_vec_merge
= TARGET_SSE4_1
&& TARGET_64BIT
;
32803 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
32804 ix86_expand_vector_extract (false, tmp
, target
, 1 - elt
);
32806 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
32808 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
32809 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
32816 /* For the two element vectors, we implement a VEC_CONCAT with
32817 the extraction of the other element. */
32819 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (1 - elt
)));
32820 tmp
= gen_rtx_VEC_SELECT (inner_mode
, target
, tmp
);
32823 op0
= val
, op1
= tmp
;
32825 op0
= tmp
, op1
= val
;
32827 tmp
= gen_rtx_VEC_CONCAT (mode
, op0
, op1
);
32828 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
32833 use_vec_merge
= TARGET_SSE4_1
;
32840 use_vec_merge
= true;
32844 /* tmp = target = A B C D */
32845 tmp
= copy_to_reg (target
);
32846 /* target = A A B B */
32847 emit_insn (gen_vec_interleave_lowv4sf (target
, target
, target
));
32848 /* target = X A B B */
32849 ix86_expand_vector_set (false, target
, val
, 0);
32850 /* target = A X C D */
32851 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
32852 const1_rtx
, const0_rtx
,
32853 GEN_INT (2+4), GEN_INT (3+4)));
32857 /* tmp = target = A B C D */
32858 tmp
= copy_to_reg (target
);
32859 /* tmp = X B C D */
32860 ix86_expand_vector_set (false, tmp
, val
, 0);
32861 /* target = A B X D */
32862 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
32863 const0_rtx
, const1_rtx
,
32864 GEN_INT (0+4), GEN_INT (3+4)));
32868 /* tmp = target = A B C D */
32869 tmp
= copy_to_reg (target
);
32870 /* tmp = X B C D */
32871 ix86_expand_vector_set (false, tmp
, val
, 0);
32872 /* target = A B X D */
32873 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
32874 const0_rtx
, const1_rtx
,
32875 GEN_INT (2+4), GEN_INT (0+4)));
32879 gcc_unreachable ();
32884 use_vec_merge
= TARGET_SSE4_1
;
32888 /* Element 0 handled by vec_merge below. */
32891 use_vec_merge
= true;
32897 /* With SSE2, use integer shuffles to swap element 0 and ELT,
32898 store into element 0, then shuffle them back. */
32902 order
[0] = GEN_INT (elt
);
32903 order
[1] = const1_rtx
;
32904 order
[2] = const2_rtx
;
32905 order
[3] = GEN_INT (3);
32906 order
[elt
] = const0_rtx
;
32908 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
32909 order
[1], order
[2], order
[3]));
32911 ix86_expand_vector_set (false, target
, val
, 0);
32913 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
32914 order
[1], order
[2], order
[3]));
32918 /* For SSE1, we have to reuse the V4SF code. */
32919 ix86_expand_vector_set (false, gen_lowpart (V4SFmode
, target
),
32920 gen_lowpart (SFmode
, val
), elt
);
32925 use_vec_merge
= TARGET_SSE2
;
32928 use_vec_merge
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
32932 use_vec_merge
= TARGET_SSE4_1
;
32939 half_mode
= V16QImode
;
32945 half_mode
= V8HImode
;
32951 half_mode
= V4SImode
;
32957 half_mode
= V2DImode
;
32963 half_mode
= V4SFmode
;
32969 half_mode
= V2DFmode
;
32975 /* Compute offset. */
32979 gcc_assert (i
<= 1);
32981 /* Extract the half. */
32982 tmp
= gen_reg_rtx (half_mode
);
32983 emit_insn (gen_extract
[j
][i
] (tmp
, target
));
32985 /* Put val in tmp at elt. */
32986 ix86_expand_vector_set (false, tmp
, val
, elt
);
32989 emit_insn (gen_insert
[j
][i
] (target
, target
, tmp
));
32998 tmp
= gen_rtx_VEC_DUPLICATE (mode
, val
);
32999 tmp
= gen_rtx_VEC_MERGE (mode
, tmp
, target
, GEN_INT (1 << elt
));
33000 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
33004 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
), false);
33006 emit_move_insn (mem
, target
);
33008 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
33009 emit_move_insn (tmp
, val
);
33011 emit_move_insn (target
, mem
);
33016 ix86_expand_vector_extract (bool mmx_ok
, rtx target
, rtx vec
, int elt
)
33018 enum machine_mode mode
= GET_MODE (vec
);
33019 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
33020 bool use_vec_extr
= false;
33033 use_vec_extr
= true;
33037 use_vec_extr
= TARGET_SSE4_1
;
33049 tmp
= gen_reg_rtx (mode
);
33050 emit_insn (gen_sse_shufps_v4sf (tmp
, vec
, vec
,
33051 GEN_INT (elt
), GEN_INT (elt
),
33052 GEN_INT (elt
+4), GEN_INT (elt
+4)));
33056 tmp
= gen_reg_rtx (mode
);
33057 emit_insn (gen_vec_interleave_highv4sf (tmp
, vec
, vec
));
33061 gcc_unreachable ();
33064 use_vec_extr
= true;
33069 use_vec_extr
= TARGET_SSE4_1
;
33083 tmp
= gen_reg_rtx (mode
);
33084 emit_insn (gen_sse2_pshufd_1 (tmp
, vec
,
33085 GEN_INT (elt
), GEN_INT (elt
),
33086 GEN_INT (elt
), GEN_INT (elt
)));
33090 tmp
= gen_reg_rtx (mode
);
33091 emit_insn (gen_vec_interleave_highv4si (tmp
, vec
, vec
));
33095 gcc_unreachable ();
33098 use_vec_extr
= true;
33103 /* For SSE1, we have to reuse the V4SF code. */
33104 ix86_expand_vector_extract (false, gen_lowpart (SFmode
, target
),
33105 gen_lowpart (V4SFmode
, vec
), elt
);
33111 use_vec_extr
= TARGET_SSE2
;
33114 use_vec_extr
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
33118 use_vec_extr
= TARGET_SSE4_1
;
33124 tmp
= gen_reg_rtx (V4SFmode
);
33126 emit_insn (gen_vec_extract_lo_v8sf (tmp
, vec
));
33128 emit_insn (gen_vec_extract_hi_v8sf (tmp
, vec
));
33129 ix86_expand_vector_extract (false, target
, tmp
, elt
& 3);
33137 tmp
= gen_reg_rtx (V2DFmode
);
33139 emit_insn (gen_vec_extract_lo_v4df (tmp
, vec
));
33141 emit_insn (gen_vec_extract_hi_v4df (tmp
, vec
));
33142 ix86_expand_vector_extract (false, target
, tmp
, elt
& 1);
33150 tmp
= gen_reg_rtx (V16QImode
);
33152 emit_insn (gen_vec_extract_lo_v32qi (tmp
, vec
));
33154 emit_insn (gen_vec_extract_hi_v32qi (tmp
, vec
));
33155 ix86_expand_vector_extract (false, target
, tmp
, elt
& 15);
33163 tmp
= gen_reg_rtx (V8HImode
);
33165 emit_insn (gen_vec_extract_lo_v16hi (tmp
, vec
));
33167 emit_insn (gen_vec_extract_hi_v16hi (tmp
, vec
));
33168 ix86_expand_vector_extract (false, target
, tmp
, elt
& 7);
33176 tmp
= gen_reg_rtx (V4SImode
);
33178 emit_insn (gen_vec_extract_lo_v8si (tmp
, vec
));
33180 emit_insn (gen_vec_extract_hi_v8si (tmp
, vec
));
33181 ix86_expand_vector_extract (false, target
, tmp
, elt
& 3);
33189 tmp
= gen_reg_rtx (V2DImode
);
33191 emit_insn (gen_vec_extract_lo_v4di (tmp
, vec
));
33193 emit_insn (gen_vec_extract_hi_v4di (tmp
, vec
));
33194 ix86_expand_vector_extract (false, target
, tmp
, elt
& 1);
33200 /* ??? Could extract the appropriate HImode element and shift. */
33207 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (elt
)));
33208 tmp
= gen_rtx_VEC_SELECT (inner_mode
, vec
, tmp
);
33210 /* Let the rtl optimizers know about the zero extension performed. */
33211 if (inner_mode
== QImode
|| inner_mode
== HImode
)
33213 tmp
= gen_rtx_ZERO_EXTEND (SImode
, tmp
);
33214 target
= gen_lowpart (SImode
, target
);
33217 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
33221 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
), false);
33223 emit_move_insn (mem
, vec
);
33225 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
33226 emit_move_insn (target
, tmp
);
33230 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
33231 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
33232 The upper bits of DEST are undefined, though they shouldn't cause
33233 exceptions (some bits from src or all zeros are ok). */
33236 emit_reduc_half (rtx dest
, rtx src
, int i
)
33239 switch (GET_MODE (src
))
33243 tem
= gen_sse_movhlps (dest
, src
, src
);
33245 tem
= gen_sse_shufps_v4sf (dest
, src
, src
, const1_rtx
, const1_rtx
,
33246 GEN_INT (1 + 4), GEN_INT (1 + 4));
33249 tem
= gen_vec_interleave_highv2df (dest
, src
, src
);
33255 tem
= gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode
, dest
),
33256 gen_lowpart (V1TImode
, src
),
33261 tem
= gen_avx_vperm2f128v8sf3 (dest
, src
, src
, const1_rtx
);
33263 tem
= gen_avx_shufps256 (dest
, src
, src
,
33264 GEN_INT (i
== 128 ? 2 + (3 << 2) : 1));
33268 tem
= gen_avx_vperm2f128v4df3 (dest
, src
, src
, const1_rtx
);
33270 tem
= gen_avx_shufpd256 (dest
, src
, src
, const1_rtx
);
33277 tem
= gen_avx2_permv2ti (gen_lowpart (V4DImode
, dest
),
33278 gen_lowpart (V4DImode
, src
),
33279 gen_lowpart (V4DImode
, src
),
33282 tem
= gen_avx2_lshrv2ti3 (gen_lowpart (V2TImode
, dest
),
33283 gen_lowpart (V2TImode
, src
),
33287 gcc_unreachable ();
33292 /* Expand a vector reduction. FN is the binary pattern to reduce;
33293 DEST is the destination; IN is the input vector. */
33296 ix86_expand_reduc (rtx (*fn
) (rtx
, rtx
, rtx
), rtx dest
, rtx in
)
33298 rtx half
, dst
, vec
= in
;
33299 enum machine_mode mode
= GET_MODE (in
);
33302 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
33304 && mode
== V8HImode
33305 && fn
== gen_uminv8hi3
)
33307 emit_insn (gen_sse4_1_phminposuw (dest
, in
));
33311 for (i
= GET_MODE_BITSIZE (mode
);
33312 i
> GET_MODE_BITSIZE (GET_MODE_INNER (mode
));
33315 half
= gen_reg_rtx (mode
);
33316 emit_reduc_half (half
, vec
, i
);
33317 if (i
== GET_MODE_BITSIZE (GET_MODE_INNER (mode
)) * 2)
33320 dst
= gen_reg_rtx (mode
);
33321 emit_insn (fn (dst
, half
, vec
));
33326 /* Target hook for scalar_mode_supported_p. */
33328 ix86_scalar_mode_supported_p (enum machine_mode mode
)
33330 if (DECIMAL_FLOAT_MODE_P (mode
))
33331 return default_decimal_float_supported_p ();
33332 else if (mode
== TFmode
)
33335 return default_scalar_mode_supported_p (mode
);
33338 /* Implements target hook vector_mode_supported_p. */
33340 ix86_vector_mode_supported_p (enum machine_mode mode
)
33342 if (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
33344 if (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
33346 if (TARGET_AVX
&& VALID_AVX256_REG_MODE (mode
))
33348 if (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
))
33350 if (TARGET_3DNOW
&& VALID_MMX_REG_MODE_3DNOW (mode
))
33355 /* Target hook for c_mode_for_suffix. */
33356 static enum machine_mode
33357 ix86_c_mode_for_suffix (char suffix
)
33367 /* Worker function for TARGET_MD_ASM_CLOBBERS.
33369 We do this in the new i386 backend to maintain source compatibility
33370 with the old cc0-based compiler. */
33373 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED
,
33374 tree inputs ATTRIBUTE_UNUSED
,
33377 clobbers
= tree_cons (NULL_TREE
, build_string (5, "flags"),
33379 clobbers
= tree_cons (NULL_TREE
, build_string (4, "fpsr"),
33384 /* Implements target vector targetm.asm.encode_section_info. */
33386 static void ATTRIBUTE_UNUSED
33387 ix86_encode_section_info (tree decl
, rtx rtl
, int first
)
33389 default_encode_section_info (decl
, rtl
, first
);
33391 if (TREE_CODE (decl
) == VAR_DECL
33392 && (TREE_STATIC (decl
) || DECL_EXTERNAL (decl
))
33393 && ix86_in_large_data_p (decl
))
33394 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= SYMBOL_FLAG_FAR_ADDR
;
33397 /* Worker function for REVERSE_CONDITION. */
33400 ix86_reverse_condition (enum rtx_code code
, enum machine_mode mode
)
33402 return (mode
!= CCFPmode
&& mode
!= CCFPUmode
33403 ? reverse_condition (code
)
33404 : reverse_condition_maybe_unordered (code
));
33407 /* Output code to perform an x87 FP register move, from OPERANDS[1]
33411 output_387_reg_move (rtx insn
, rtx
*operands
)
33413 if (REG_P (operands
[0]))
33415 if (REG_P (operands
[1])
33416 && find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
33418 if (REGNO (operands
[0]) == FIRST_STACK_REG
)
33419 return output_387_ffreep (operands
, 0);
33420 return "fstp\t%y0";
33422 if (STACK_TOP_P (operands
[0]))
33423 return "fld%Z1\t%y1";
33426 else if (MEM_P (operands
[0]))
33428 gcc_assert (REG_P (operands
[1]));
33429 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
33430 return "fstp%Z0\t%y0";
33433 /* There is no non-popping store to memory for XFmode.
33434 So if we need one, follow the store with a load. */
33435 if (GET_MODE (operands
[0]) == XFmode
)
33436 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
33438 return "fst%Z0\t%y0";
33445 /* Output code to perform a conditional jump to LABEL, if C2 flag in
33446 FP status register is set. */
33449 ix86_emit_fp_unordered_jump (rtx label
)
33451 rtx reg
= gen_reg_rtx (HImode
);
33454 emit_insn (gen_x86_fnstsw_1 (reg
));
33456 if (TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_insn_for_size_p ()))
33458 emit_insn (gen_x86_sahf_1 (reg
));
33460 temp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
33461 temp
= gen_rtx_UNORDERED (VOIDmode
, temp
, const0_rtx
);
33465 emit_insn (gen_testqi_ext_ccno_0 (reg
, GEN_INT (0x04)));
33467 temp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
33468 temp
= gen_rtx_NE (VOIDmode
, temp
, const0_rtx
);
33471 temp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, temp
,
33472 gen_rtx_LABEL_REF (VOIDmode
, label
),
33474 temp
= gen_rtx_SET (VOIDmode
, pc_rtx
, temp
);
33476 emit_jump_insn (temp
);
33477 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
33480 /* Output code to perform a log1p XFmode calculation. */
33482 void ix86_emit_i387_log1p (rtx op0
, rtx op1
)
33484 rtx label1
= gen_label_rtx ();
33485 rtx label2
= gen_label_rtx ();
33487 rtx tmp
= gen_reg_rtx (XFmode
);
33488 rtx tmp2
= gen_reg_rtx (XFmode
);
33491 emit_insn (gen_absxf2 (tmp
, op1
));
33492 test
= gen_rtx_GE (VOIDmode
, tmp
,
33493 CONST_DOUBLE_FROM_REAL_VALUE (
33494 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode
),
33496 emit_jump_insn (gen_cbranchxf4 (test
, XEXP (test
, 0), XEXP (test
, 1), label1
));
33498 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
33499 emit_insn (gen_fyl2xp1xf3_i387 (op0
, op1
, tmp2
));
33500 emit_jump (label2
);
33502 emit_label (label1
);
33503 emit_move_insn (tmp
, CONST1_RTX (XFmode
));
33504 emit_insn (gen_addxf3 (tmp
, op1
, tmp
));
33505 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
33506 emit_insn (gen_fyl2xxf3_i387 (op0
, tmp
, tmp2
));
33508 emit_label (label2
);
33511 /* Emit code for round calculation. */
33512 void ix86_emit_i387_round (rtx op0
, rtx op1
)
33514 enum machine_mode inmode
= GET_MODE (op1
);
33515 enum machine_mode outmode
= GET_MODE (op0
);
33516 rtx e1
, e2
, res
, tmp
, tmp1
, half
;
33517 rtx scratch
= gen_reg_rtx (HImode
);
33518 rtx flags
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
33519 rtx jump_label
= gen_label_rtx ();
33521 rtx (*gen_abs
) (rtx
, rtx
);
33522 rtx (*gen_neg
) (rtx
, rtx
);
33527 gen_abs
= gen_abssf2
;
33530 gen_abs
= gen_absdf2
;
33533 gen_abs
= gen_absxf2
;
33536 gcc_unreachable ();
33542 gen_neg
= gen_negsf2
;
33545 gen_neg
= gen_negdf2
;
33548 gen_neg
= gen_negxf2
;
33551 gen_neg
= gen_neghi2
;
33554 gen_neg
= gen_negsi2
;
33557 gen_neg
= gen_negdi2
;
33560 gcc_unreachable ();
33563 e1
= gen_reg_rtx (inmode
);
33564 e2
= gen_reg_rtx (inmode
);
33565 res
= gen_reg_rtx (outmode
);
33567 half
= CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf
, inmode
);
33569 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
33571 /* scratch = fxam(op1) */
33572 emit_insn (gen_rtx_SET (VOIDmode
, scratch
,
33573 gen_rtx_UNSPEC (HImode
, gen_rtvec (1, op1
),
33575 /* e1 = fabs(op1) */
33576 emit_insn (gen_abs (e1
, op1
));
33578 /* e2 = e1 + 0.5 */
33579 half
= force_reg (inmode
, half
);
33580 emit_insn (gen_rtx_SET (VOIDmode
, e2
,
33581 gen_rtx_PLUS (inmode
, e1
, half
)));
33583 /* res = floor(e2) */
33584 if (inmode
!= XFmode
)
33586 tmp1
= gen_reg_rtx (XFmode
);
33588 emit_insn (gen_rtx_SET (VOIDmode
, tmp1
,
33589 gen_rtx_FLOAT_EXTEND (XFmode
, e2
)));
33599 rtx tmp0
= gen_reg_rtx (XFmode
);
33601 emit_insn (gen_frndintxf2_floor (tmp0
, tmp1
));
33603 emit_insn (gen_rtx_SET (VOIDmode
, res
,
33604 gen_rtx_UNSPEC (outmode
, gen_rtvec (1, tmp0
),
33605 UNSPEC_TRUNC_NOOP
)));
33609 emit_insn (gen_frndintxf2_floor (res
, tmp1
));
33612 emit_insn (gen_lfloorxfhi2 (res
, tmp1
));
33615 emit_insn (gen_lfloorxfsi2 (res
, tmp1
));
33618 emit_insn (gen_lfloorxfdi2 (res
, tmp1
));
33621 gcc_unreachable ();
33624 /* flags = signbit(a) */
33625 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x02)));
33627 /* if (flags) then res = -res */
33628 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
,
33629 gen_rtx_EQ (VOIDmode
, flags
, const0_rtx
),
33630 gen_rtx_LABEL_REF (VOIDmode
, jump_label
),
33632 insn
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
33633 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
33634 JUMP_LABEL (insn
) = jump_label
;
33636 emit_insn (gen_neg (res
, res
));
33638 emit_label (jump_label
);
33639 LABEL_NUSES (jump_label
) = 1;
33641 emit_move_insn (op0
, res
);
33644 /* Output code to perform a Newton-Rhapson approximation of a single precision
33645 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
33647 void ix86_emit_swdivsf (rtx res
, rtx a
, rtx b
, enum machine_mode mode
)
33649 rtx x0
, x1
, e0
, e1
;
33651 x0
= gen_reg_rtx (mode
);
33652 e0
= gen_reg_rtx (mode
);
33653 e1
= gen_reg_rtx (mode
);
33654 x1
= gen_reg_rtx (mode
);
33656 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
33658 b
= force_reg (mode
, b
);
33660 /* x0 = rcp(b) estimate */
33661 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
33662 gen_rtx_UNSPEC (mode
, gen_rtvec (1, b
),
33665 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
33666 gen_rtx_MULT (mode
, x0
, b
)));
33669 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
33670 gen_rtx_MULT (mode
, x0
, e0
)));
33673 emit_insn (gen_rtx_SET (VOIDmode
, e1
,
33674 gen_rtx_PLUS (mode
, x0
, x0
)));
33677 emit_insn (gen_rtx_SET (VOIDmode
, x1
,
33678 gen_rtx_MINUS (mode
, e1
, e0
)));
33681 emit_insn (gen_rtx_SET (VOIDmode
, res
,
33682 gen_rtx_MULT (mode
, a
, x1
)));
33685 /* Output code to perform a Newton-Rhapson approximation of a
33686 single precision floating point [reciprocal] square root. */
33688 void ix86_emit_swsqrtsf (rtx res
, rtx a
, enum machine_mode mode
,
33691 rtx x0
, e0
, e1
, e2
, e3
, mthree
, mhalf
;
33694 x0
= gen_reg_rtx (mode
);
33695 e0
= gen_reg_rtx (mode
);
33696 e1
= gen_reg_rtx (mode
);
33697 e2
= gen_reg_rtx (mode
);
33698 e3
= gen_reg_rtx (mode
);
33700 real_from_integer (&r
, VOIDmode
, -3, -1, 0);
33701 mthree
= CONST_DOUBLE_FROM_REAL_VALUE (r
, SFmode
);
33703 real_arithmetic (&r
, NEGATE_EXPR
, &dconsthalf
, NULL
);
33704 mhalf
= CONST_DOUBLE_FROM_REAL_VALUE (r
, SFmode
);
33706 if (VECTOR_MODE_P (mode
))
33708 mthree
= ix86_build_const_vector (mode
, true, mthree
);
33709 mhalf
= ix86_build_const_vector (mode
, true, mhalf
);
33712 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
33713 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
33715 a
= force_reg (mode
, a
);
33717 /* x0 = rsqrt(a) estimate */
33718 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
33719 gen_rtx_UNSPEC (mode
, gen_rtvec (1, a
),
33722 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
33727 zero
= gen_reg_rtx (mode
);
33728 mask
= gen_reg_rtx (mode
);
33730 zero
= force_reg (mode
, CONST0_RTX(mode
));
33731 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
33732 gen_rtx_NE (mode
, zero
, a
)));
33734 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
33735 gen_rtx_AND (mode
, x0
, mask
)));
33739 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
33740 gen_rtx_MULT (mode
, x0
, a
)));
33742 emit_insn (gen_rtx_SET (VOIDmode
, e1
,
33743 gen_rtx_MULT (mode
, e0
, x0
)));
33746 mthree
= force_reg (mode
, mthree
);
33747 emit_insn (gen_rtx_SET (VOIDmode
, e2
,
33748 gen_rtx_PLUS (mode
, e1
, mthree
)));
33750 mhalf
= force_reg (mode
, mhalf
);
33752 /* e3 = -.5 * x0 */
33753 emit_insn (gen_rtx_SET (VOIDmode
, e3
,
33754 gen_rtx_MULT (mode
, x0
, mhalf
)));
33756 /* e3 = -.5 * e0 */
33757 emit_insn (gen_rtx_SET (VOIDmode
, e3
,
33758 gen_rtx_MULT (mode
, e0
, mhalf
)));
33759 /* ret = e2 * e3 */
33760 emit_insn (gen_rtx_SET (VOIDmode
, res
,
33761 gen_rtx_MULT (mode
, e2
, e3
)));
33764 #ifdef TARGET_SOLARIS
33765 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
33768 i386_solaris_elf_named_section (const char *name
, unsigned int flags
,
33771 /* With Binutils 2.15, the "@unwind" marker must be specified on
33772 every occurrence of the ".eh_frame" section, not just the first
33775 && strcmp (name
, ".eh_frame") == 0)
33777 fprintf (asm_out_file
, "\t.section\t%s,\"%s\",@unwind\n", name
,
33778 flags
& SECTION_WRITE
? "aw" : "a");
33783 if (HAVE_COMDAT_GROUP
&& flags
& SECTION_LINKONCE
)
33785 solaris_elf_asm_comdat_section (name
, flags
, decl
);
33790 default_elf_asm_named_section (name
, flags
, decl
);
33792 #endif /* TARGET_SOLARIS */
33794 /* Return the mangling of TYPE if it is an extended fundamental type. */
33796 static const char *
33797 ix86_mangle_type (const_tree type
)
33799 type
= TYPE_MAIN_VARIANT (type
);
33801 if (TREE_CODE (type
) != VOID_TYPE
&& TREE_CODE (type
) != BOOLEAN_TYPE
33802 && TREE_CODE (type
) != INTEGER_TYPE
&& TREE_CODE (type
) != REAL_TYPE
)
33805 switch (TYPE_MODE (type
))
33808 /* __float128 is "g". */
33811 /* "long double" or __float80 is "e". */
33818 /* For 32-bit code we can save PIC register setup by using
33819 __stack_chk_fail_local hidden function instead of calling
33820 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
33821 register, so it is better to call __stack_chk_fail directly. */
33823 static tree ATTRIBUTE_UNUSED
33824 ix86_stack_protect_fail (void)
33826 return TARGET_64BIT
33827 ? default_external_stack_protect_fail ()
33828 : default_hidden_stack_protect_fail ();
33831 /* Select a format to encode pointers in exception handling data. CODE
33832 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
33833 true if the symbol may be affected by dynamic relocations.
33835 ??? All x86 object file formats are capable of representing this.
33836 After all, the relocation needed is the same as for the call insn.
33837 Whether or not a particular assembler allows us to enter such, I
33838 guess we'll have to see. */
33840 asm_preferred_eh_data_format (int code
, int global
)
33844 int type
= DW_EH_PE_sdata8
;
33846 || ix86_cmodel
== CM_SMALL_PIC
33847 || (ix86_cmodel
== CM_MEDIUM_PIC
&& (global
|| code
)))
33848 type
= DW_EH_PE_sdata4
;
33849 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
33851 if (ix86_cmodel
== CM_SMALL
33852 || (ix86_cmodel
== CM_MEDIUM
&& code
))
33853 return DW_EH_PE_udata4
;
33854 return DW_EH_PE_absptr
;
33857 /* Expand copysign from SIGN to the positive value ABS_VALUE
33858 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
33861 ix86_sse_copysign_to_positive (rtx result
, rtx abs_value
, rtx sign
, rtx mask
)
33863 enum machine_mode mode
= GET_MODE (sign
);
33864 rtx sgn
= gen_reg_rtx (mode
);
33865 if (mask
== NULL_RTX
)
33867 enum machine_mode vmode
;
33869 if (mode
== SFmode
)
33871 else if (mode
== DFmode
)
33876 mask
= ix86_build_signbit_mask (vmode
, VECTOR_MODE_P (mode
), false);
33877 if (!VECTOR_MODE_P (mode
))
33879 /* We need to generate a scalar mode mask in this case. */
33880 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
33881 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
33882 mask
= gen_reg_rtx (mode
);
33883 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
33887 mask
= gen_rtx_NOT (mode
, mask
);
33888 emit_insn (gen_rtx_SET (VOIDmode
, sgn
,
33889 gen_rtx_AND (mode
, mask
, sign
)));
33890 emit_insn (gen_rtx_SET (VOIDmode
, result
,
33891 gen_rtx_IOR (mode
, abs_value
, sgn
)));
33894 /* Expand fabs (OP0) and return a new rtx that holds the result. The
33895 mask for masking out the sign-bit is stored in *SMASK, if that is
33898 ix86_expand_sse_fabs (rtx op0
, rtx
*smask
)
33900 enum machine_mode vmode
, mode
= GET_MODE (op0
);
33903 xa
= gen_reg_rtx (mode
);
33904 if (mode
== SFmode
)
33906 else if (mode
== DFmode
)
33910 mask
= ix86_build_signbit_mask (vmode
, VECTOR_MODE_P (mode
), true);
33911 if (!VECTOR_MODE_P (mode
))
33913 /* We need to generate a scalar mode mask in this case. */
33914 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
33915 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
33916 mask
= gen_reg_rtx (mode
);
33917 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
33919 emit_insn (gen_rtx_SET (VOIDmode
, xa
,
33920 gen_rtx_AND (mode
, op0
, mask
)));
33928 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
33929 swapping the operands if SWAP_OPERANDS is true. The expanded
33930 code is a forward jump to a newly created label in case the
33931 comparison is true. The generated label rtx is returned. */
33933 ix86_expand_sse_compare_and_jump (enum rtx_code code
, rtx op0
, rtx op1
,
33934 bool swap_operands
)
33945 label
= gen_label_rtx ();
33946 tmp
= gen_rtx_REG (CCFPUmode
, FLAGS_REG
);
33947 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
33948 gen_rtx_COMPARE (CCFPUmode
, op0
, op1
)));
33949 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
, tmp
, const0_rtx
);
33950 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
33951 gen_rtx_LABEL_REF (VOIDmode
, label
), pc_rtx
);
33952 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
33953 JUMP_LABEL (tmp
) = label
;
33958 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
33959 using comparison code CODE. Operands are swapped for the comparison if
33960 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
33962 ix86_expand_sse_compare_mask (enum rtx_code code
, rtx op0
, rtx op1
,
33963 bool swap_operands
)
33965 rtx (*insn
)(rtx
, rtx
, rtx
, rtx
);
33966 enum machine_mode mode
= GET_MODE (op0
);
33967 rtx mask
= gen_reg_rtx (mode
);
33976 insn
= mode
== DFmode
? gen_setcc_df_sse
: gen_setcc_sf_sse
;
33978 emit_insn (insn (mask
, op0
, op1
,
33979 gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
33983 /* Generate and return a rtx of mode MODE for 2**n where n is the number
33984 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
33986 ix86_gen_TWO52 (enum machine_mode mode
)
33988 REAL_VALUE_TYPE TWO52r
;
33991 real_ldexp (&TWO52r
, &dconst1
, mode
== DFmode
? 52 : 23);
33992 TWO52
= const_double_from_real_value (TWO52r
, mode
);
33993 TWO52
= force_reg (mode
, TWO52
);
33998 /* Expand SSE sequence for computing lround from OP1 storing
34001 ix86_expand_lround (rtx op0
, rtx op1
)
34003 /* C code for the stuff we're doing below:
34004 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
34007 enum machine_mode mode
= GET_MODE (op1
);
34008 const struct real_format
*fmt
;
34009 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
34012 /* load nextafter (0.5, 0.0) */
34013 fmt
= REAL_MODE_FORMAT (mode
);
34014 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
34015 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
34017 /* adj = copysign (0.5, op1) */
34018 adj
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
34019 ix86_sse_copysign_to_positive (adj
, adj
, force_reg (mode
, op1
), NULL_RTX
);
34021 /* adj = op1 + adj */
34022 adj
= expand_simple_binop (mode
, PLUS
, adj
, op1
, NULL_RTX
, 0, OPTAB_DIRECT
);
34024 /* op0 = (imode)adj */
34025 expand_fix (op0
, adj
, 0);
34028 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
34031 ix86_expand_lfloorceil (rtx op0
, rtx op1
, bool do_floor
)
34033 /* C code for the stuff we're doing below (for do_floor):
34035 xi -= (double)xi > op1 ? 1 : 0;
34038 enum machine_mode fmode
= GET_MODE (op1
);
34039 enum machine_mode imode
= GET_MODE (op0
);
34040 rtx ireg
, freg
, label
, tmp
;
34042 /* reg = (long)op1 */
34043 ireg
= gen_reg_rtx (imode
);
34044 expand_fix (ireg
, op1
, 0);
34046 /* freg = (double)reg */
34047 freg
= gen_reg_rtx (fmode
);
34048 expand_float (freg
, ireg
, 0);
34050 /* ireg = (freg > op1) ? ireg - 1 : ireg */
34051 label
= ix86_expand_sse_compare_and_jump (UNLE
,
34052 freg
, op1
, !do_floor
);
34053 tmp
= expand_simple_binop (imode
, do_floor
? MINUS
: PLUS
,
34054 ireg
, const1_rtx
, NULL_RTX
, 0, OPTAB_DIRECT
);
34055 emit_move_insn (ireg
, tmp
);
34057 emit_label (label
);
34058 LABEL_NUSES (label
) = 1;
34060 emit_move_insn (op0
, ireg
);
34063 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
34064 result in OPERAND0. */
34066 ix86_expand_rint (rtx operand0
, rtx operand1
)
34068 /* C code for the stuff we're doing below:
34069 xa = fabs (operand1);
34070 if (!isless (xa, 2**52))
34072 xa = xa + 2**52 - 2**52;
34073 return copysign (xa, operand1);
34075 enum machine_mode mode
= GET_MODE (operand0
);
34076 rtx res
, xa
, label
, TWO52
, mask
;
34078 res
= gen_reg_rtx (mode
);
34079 emit_move_insn (res
, operand1
);
34081 /* xa = abs (operand1) */
34082 xa
= ix86_expand_sse_fabs (res
, &mask
);
34084 /* if (!isless (xa, TWO52)) goto label; */
34085 TWO52
= ix86_gen_TWO52 (mode
);
34086 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
34088 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
34089 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
34091 ix86_sse_copysign_to_positive (res
, xa
, res
, mask
);
34093 emit_label (label
);
34094 LABEL_NUSES (label
) = 1;
34096 emit_move_insn (operand0
, res
);
34099 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
34102 ix86_expand_floorceildf_32 (rtx operand0
, rtx operand1
, bool do_floor
)
34104 /* C code for the stuff we expand below.
34105 double xa = fabs (x), x2;
34106 if (!isless (xa, TWO52))
34108 xa = xa + TWO52 - TWO52;
34109 x2 = copysign (xa, x);
34118 enum machine_mode mode
= GET_MODE (operand0
);
34119 rtx xa
, TWO52
, tmp
, label
, one
, res
, mask
;
34121 TWO52
= ix86_gen_TWO52 (mode
);
34123 /* Temporary for holding the result, initialized to the input
34124 operand to ease control flow. */
34125 res
= gen_reg_rtx (mode
);
34126 emit_move_insn (res
, operand1
);
34128 /* xa = abs (operand1) */
34129 xa
= ix86_expand_sse_fabs (res
, &mask
);
34131 /* if (!isless (xa, TWO52)) goto label; */
34132 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
34134 /* xa = xa + TWO52 - TWO52; */
34135 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
34136 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
34138 /* xa = copysign (xa, operand1) */
34139 ix86_sse_copysign_to_positive (xa
, xa
, res
, mask
);
34141 /* generate 1.0 or -1.0 */
34142 one
= force_reg (mode
,
34143 const_double_from_real_value (do_floor
34144 ? dconst1
: dconstm1
, mode
));
34146 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
34147 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
34148 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
34149 gen_rtx_AND (mode
, one
, tmp
)));
34150 /* We always need to subtract here to preserve signed zero. */
34151 tmp
= expand_simple_binop (mode
, MINUS
,
34152 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
34153 emit_move_insn (res
, tmp
);
34155 emit_label (label
);
34156 LABEL_NUSES (label
) = 1;
34158 emit_move_insn (operand0
, res
);
34161 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
34164 ix86_expand_floorceil (rtx operand0
, rtx operand1
, bool do_floor
)
34166 /* C code for the stuff we expand below.
34167 double xa = fabs (x), x2;
34168 if (!isless (xa, TWO52))
34170 x2 = (double)(long)x;
34177 if (HONOR_SIGNED_ZEROS (mode))
34178 return copysign (x2, x);
34181 enum machine_mode mode
= GET_MODE (operand0
);
34182 rtx xa
, xi
, TWO52
, tmp
, label
, one
, res
, mask
;
34184 TWO52
= ix86_gen_TWO52 (mode
);
34186 /* Temporary for holding the result, initialized to the input
34187 operand to ease control flow. */
34188 res
= gen_reg_rtx (mode
);
34189 emit_move_insn (res
, operand1
);
34191 /* xa = abs (operand1) */
34192 xa
= ix86_expand_sse_fabs (res
, &mask
);
34194 /* if (!isless (xa, TWO52)) goto label; */
34195 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
34197 /* xa = (double)(long)x */
34198 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
34199 expand_fix (xi
, res
, 0);
34200 expand_float (xa
, xi
, 0);
34203 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
34205 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
34206 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
34207 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
34208 gen_rtx_AND (mode
, one
, tmp
)));
34209 tmp
= expand_simple_binop (mode
, do_floor
? MINUS
: PLUS
,
34210 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
34211 emit_move_insn (res
, tmp
);
34213 if (HONOR_SIGNED_ZEROS (mode
))
34214 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
34216 emit_label (label
);
34217 LABEL_NUSES (label
) = 1;
34219 emit_move_insn (operand0
, res
);
34222 /* Expand SSE sequence for computing round from OPERAND1 storing
34223 into OPERAND0. Sequence that works without relying on DImode truncation
34224 via cvttsd2siq that is only available on 64bit targets. */
34226 ix86_expand_rounddf_32 (rtx operand0
, rtx operand1
)
34228 /* C code for the stuff we expand below.
34229 double xa = fabs (x), xa2, x2;
34230 if (!isless (xa, TWO52))
34232 Using the absolute value and copying back sign makes
34233 -0.0 -> -0.0 correct.
34234 xa2 = xa + TWO52 - TWO52;
34239 else if (dxa > 0.5)
34241 x2 = copysign (xa2, x);
34244 enum machine_mode mode
= GET_MODE (operand0
);
34245 rtx xa
, xa2
, dxa
, TWO52
, tmp
, label
, half
, mhalf
, one
, res
, mask
;
34247 TWO52
= ix86_gen_TWO52 (mode
);
34249 /* Temporary for holding the result, initialized to the input
34250 operand to ease control flow. */
34251 res
= gen_reg_rtx (mode
);
34252 emit_move_insn (res
, operand1
);
34254 /* xa = abs (operand1) */
34255 xa
= ix86_expand_sse_fabs (res
, &mask
);
34257 /* if (!isless (xa, TWO52)) goto label; */
34258 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
34260 /* xa2 = xa + TWO52 - TWO52; */
34261 xa2
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
34262 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, TWO52
, xa2
, 0, OPTAB_DIRECT
);
34264 /* dxa = xa2 - xa; */
34265 dxa
= expand_simple_binop (mode
, MINUS
, xa2
, xa
, NULL_RTX
, 0, OPTAB_DIRECT
);
34267 /* generate 0.5, 1.0 and -0.5 */
34268 half
= force_reg (mode
, const_double_from_real_value (dconsthalf
, mode
));
34269 one
= expand_simple_binop (mode
, PLUS
, half
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
34270 mhalf
= expand_simple_binop (mode
, MINUS
, half
, one
, NULL_RTX
,
34274 tmp
= gen_reg_rtx (mode
);
34275 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
34276 tmp
= ix86_expand_sse_compare_mask (UNGT
, dxa
, half
, false);
34277 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
34278 gen_rtx_AND (mode
, one
, tmp
)));
34279 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
34280 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
34281 tmp
= ix86_expand_sse_compare_mask (UNGE
, mhalf
, dxa
, false);
34282 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
34283 gen_rtx_AND (mode
, one
, tmp
)));
34284 xa2
= expand_simple_binop (mode
, PLUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
34286 /* res = copysign (xa2, operand1) */
34287 ix86_sse_copysign_to_positive (res
, xa2
, force_reg (mode
, operand1
), mask
);
34289 emit_label (label
);
34290 LABEL_NUSES (label
) = 1;
34292 emit_move_insn (operand0
, res
);
34295 /* Expand SSE sequence for computing trunc from OPERAND1 storing
34298 ix86_expand_trunc (rtx operand0
, rtx operand1
)
34300 /* C code for SSE variant we expand below.
34301 double xa = fabs (x), x2;
34302 if (!isless (xa, TWO52))
34304 x2 = (double)(long)x;
34305 if (HONOR_SIGNED_ZEROS (mode))
34306 return copysign (x2, x);
34309 enum machine_mode mode
= GET_MODE (operand0
);
34310 rtx xa
, xi
, TWO52
, label
, res
, mask
;
34312 TWO52
= ix86_gen_TWO52 (mode
);
34314 /* Temporary for holding the result, initialized to the input
34315 operand to ease control flow. */
34316 res
= gen_reg_rtx (mode
);
34317 emit_move_insn (res
, operand1
);
34319 /* xa = abs (operand1) */
34320 xa
= ix86_expand_sse_fabs (res
, &mask
);
34322 /* if (!isless (xa, TWO52)) goto label; */
34323 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
34325 /* x = (double)(long)x */
34326 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
34327 expand_fix (xi
, res
, 0);
34328 expand_float (res
, xi
, 0);
34330 if (HONOR_SIGNED_ZEROS (mode
))
34331 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
34333 emit_label (label
);
34334 LABEL_NUSES (label
) = 1;
34336 emit_move_insn (operand0
, res
);
34339 /* Expand SSE sequence for computing trunc from OPERAND1 storing
34342 ix86_expand_truncdf_32 (rtx operand0
, rtx operand1
)
34344 enum machine_mode mode
= GET_MODE (operand0
);
34345 rtx xa
, mask
, TWO52
, label
, one
, res
, smask
, tmp
;
34347 /* C code for SSE variant we expand below.
34348 double xa = fabs (x), x2;
34349 if (!isless (xa, TWO52))
34351 xa2 = xa + TWO52 - TWO52;
34355 x2 = copysign (xa2, x);
34359 TWO52
= ix86_gen_TWO52 (mode
);
34361 /* Temporary for holding the result, initialized to the input
34362 operand to ease control flow. */
34363 res
= gen_reg_rtx (mode
);
34364 emit_move_insn (res
, operand1
);
34366 /* xa = abs (operand1) */
34367 xa
= ix86_expand_sse_fabs (res
, &smask
);
34369 /* if (!isless (xa, TWO52)) goto label; */
34370 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
34372 /* res = xa + TWO52 - TWO52; */
34373 tmp
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
34374 tmp
= expand_simple_binop (mode
, MINUS
, tmp
, TWO52
, tmp
, 0, OPTAB_DIRECT
);
34375 emit_move_insn (res
, tmp
);
34378 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
34380 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
34381 mask
= ix86_expand_sse_compare_mask (UNGT
, res
, xa
, false);
34382 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
34383 gen_rtx_AND (mode
, mask
, one
)));
34384 tmp
= expand_simple_binop (mode
, MINUS
,
34385 res
, mask
, NULL_RTX
, 0, OPTAB_DIRECT
);
34386 emit_move_insn (res
, tmp
);
34388 /* res = copysign (res, operand1) */
34389 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), smask
);
34391 emit_label (label
);
34392 LABEL_NUSES (label
) = 1;
34394 emit_move_insn (operand0
, res
);
34397 /* Expand SSE sequence for computing round from OPERAND1 storing
34400 ix86_expand_round (rtx operand0
, rtx operand1
)
34402 /* C code for the stuff we're doing below:
34403 double xa = fabs (x);
34404 if (!isless (xa, TWO52))
34406 xa = (double)(long)(xa + nextafter (0.5, 0.0));
34407 return copysign (xa, x);
34409 enum machine_mode mode
= GET_MODE (operand0
);
34410 rtx res
, TWO52
, xa
, label
, xi
, half
, mask
;
34411 const struct real_format
*fmt
;
34412 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
34414 /* Temporary for holding the result, initialized to the input
34415 operand to ease control flow. */
34416 res
= gen_reg_rtx (mode
);
34417 emit_move_insn (res
, operand1
);
34419 TWO52
= ix86_gen_TWO52 (mode
);
34420 xa
= ix86_expand_sse_fabs (res
, &mask
);
34421 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
34423 /* load nextafter (0.5, 0.0) */
34424 fmt
= REAL_MODE_FORMAT (mode
);
34425 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
34426 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
34428 /* xa = xa + 0.5 */
34429 half
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
34430 xa
= expand_simple_binop (mode
, PLUS
, xa
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
34432 /* xa = (double)(int64_t)xa */
34433 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
34434 expand_fix (xi
, xa
, 0);
34435 expand_float (xa
, xi
, 0);
34437 /* res = copysign (xa, operand1) */
34438 ix86_sse_copysign_to_positive (res
, xa
, force_reg (mode
, operand1
), mask
);
34440 emit_label (label
);
34441 LABEL_NUSES (label
) = 1;
34443 emit_move_insn (operand0
, res
);
34446 /* Expand SSE sequence for computing round
34447 from OP1 storing into OP0 using sse4 round insn. */
34449 ix86_expand_round_sse4 (rtx op0
, rtx op1
)
34451 enum machine_mode mode
= GET_MODE (op0
);
34452 rtx e1
, e2
, res
, half
;
34453 const struct real_format
*fmt
;
34454 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
34455 rtx (*gen_copysign
) (rtx
, rtx
, rtx
);
34456 rtx (*gen_round
) (rtx
, rtx
, rtx
);
34461 gen_copysign
= gen_copysignsf3
;
34462 gen_round
= gen_sse4_1_roundsf2
;
34465 gen_copysign
= gen_copysigndf3
;
34466 gen_round
= gen_sse4_1_rounddf2
;
34469 gcc_unreachable ();
34472 /* round (a) = trunc (a + copysign (0.5, a)) */
34474 /* load nextafter (0.5, 0.0) */
34475 fmt
= REAL_MODE_FORMAT (mode
);
34476 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
34477 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
34478 half
= const_double_from_real_value (pred_half
, mode
);
34480 /* e1 = copysign (0.5, op1) */
34481 e1
= gen_reg_rtx (mode
);
34482 emit_insn (gen_copysign (e1
, half
, op1
));
34484 /* e2 = op1 + e1 */
34485 e2
= expand_simple_binop (mode
, PLUS
, op1
, e1
, NULL_RTX
, 0, OPTAB_DIRECT
);
34487 /* res = trunc (e2) */
34488 res
= gen_reg_rtx (mode
);
34489 emit_insn (gen_round (res
, e2
, GEN_INT (ROUND_TRUNC
)));
34491 emit_move_insn (op0
, res
);
34495 /* Table of valid machine attributes. */
34496 static const struct attribute_spec ix86_attribute_table
[] =
34498 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
34499 affects_type_identity } */
34500 /* Stdcall attribute says callee is responsible for popping arguments
34501 if they are not variable. */
34502 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
34504 /* Fastcall attribute says callee is responsible for popping arguments
34505 if they are not variable. */
34506 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
34508 /* Thiscall attribute says callee is responsible for popping arguments
34509 if they are not variable. */
34510 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
34512 /* Cdecl attribute says the callee is a normal C declaration */
34513 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
34515 /* Regparm attribute specifies how many integer arguments are to be
34516 passed in registers. */
34517 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute
,
34519 /* Sseregparm attribute says we are using x86_64 calling conventions
34520 for FP arguments. */
34521 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
34523 /* force_align_arg_pointer says this function realigns the stack at entry. */
34524 { (const char *)&ix86_force_align_arg_pointer_string
, 0, 0,
34525 false, true, true, ix86_handle_cconv_attribute
, false },
34526 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
34527 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
, false },
34528 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
, false },
34529 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute
,
34532 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
,
34534 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
,
34536 #ifdef SUBTARGET_ATTRIBUTE_TABLE
34537 SUBTARGET_ATTRIBUTE_TABLE
,
34539 /* ms_abi and sysv_abi calling convention function attributes. */
34540 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute
, true },
34541 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute
, true },
34542 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute
,
34544 { "callee_pop_aggregate_return", 1, 1, false, true, true,
34545 ix86_handle_callee_pop_aggregate_return
, true },
34547 { NULL
, 0, 0, false, false, false, NULL
, false }
34550 /* Implement targetm.vectorize.builtin_vectorization_cost. */
34552 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
34553 tree vectype ATTRIBUTE_UNUSED
,
34554 int misalign ATTRIBUTE_UNUSED
)
34556 switch (type_of_cost
)
34559 return ix86_cost
->scalar_stmt_cost
;
34562 return ix86_cost
->scalar_load_cost
;
34565 return ix86_cost
->scalar_store_cost
;
34568 return ix86_cost
->vec_stmt_cost
;
34571 return ix86_cost
->vec_align_load_cost
;
34574 return ix86_cost
->vec_store_cost
;
34576 case vec_to_scalar
:
34577 return ix86_cost
->vec_to_scalar_cost
;
34579 case scalar_to_vec
:
34580 return ix86_cost
->scalar_to_vec_cost
;
34582 case unaligned_load
:
34583 case unaligned_store
:
34584 return ix86_cost
->vec_unalign_load_cost
;
34586 case cond_branch_taken
:
34587 return ix86_cost
->cond_taken_branch_cost
;
34589 case cond_branch_not_taken
:
34590 return ix86_cost
->cond_not_taken_branch_cost
;
34596 gcc_unreachable ();
34601 /* Return a vector mode with twice as many elements as VMODE. */
34602 /* ??? Consider moving this to a table generated by genmodes.c. */
34604 static enum machine_mode
34605 doublesize_vector_mode (enum machine_mode vmode
)
34609 case V2SFmode
: return V4SFmode
;
34610 case V1DImode
: return V2DImode
;
34611 case V2SImode
: return V4SImode
;
34612 case V4HImode
: return V8HImode
;
34613 case V8QImode
: return V16QImode
;
34615 case V2DFmode
: return V4DFmode
;
34616 case V4SFmode
: return V8SFmode
;
34617 case V2DImode
: return V4DImode
;
34618 case V4SImode
: return V8SImode
;
34619 case V8HImode
: return V16HImode
;
34620 case V16QImode
: return V32QImode
;
34622 case V4DFmode
: return V8DFmode
;
34623 case V8SFmode
: return V16SFmode
;
34624 case V4DImode
: return V8DImode
;
34625 case V8SImode
: return V16SImode
;
34626 case V16HImode
: return V32HImode
;
34627 case V32QImode
: return V64QImode
;
34630 gcc_unreachable ();
34634 /* Construct (set target (vec_select op0 (parallel perm))) and
34635 return true if that's a valid instruction in the active ISA. */
34638 expand_vselect (rtx target
, rtx op0
, const unsigned char *perm
, unsigned nelt
)
34640 rtx rperm
[MAX_VECT_LEN
], x
;
34643 for (i
= 0; i
< nelt
; ++i
)
34644 rperm
[i
] = GEN_INT (perm
[i
]);
34646 x
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (nelt
, rperm
));
34647 x
= gen_rtx_VEC_SELECT (GET_MODE (target
), op0
, x
);
34648 x
= gen_rtx_SET (VOIDmode
, target
, x
);
34651 if (recog_memoized (x
) < 0)
34659 /* Similar, but generate a vec_concat from op0 and op1 as well. */
34662 expand_vselect_vconcat (rtx target
, rtx op0
, rtx op1
,
34663 const unsigned char *perm
, unsigned nelt
)
34665 enum machine_mode v2mode
;
34668 v2mode
= doublesize_vector_mode (GET_MODE (op0
));
34669 x
= gen_rtx_VEC_CONCAT (v2mode
, op0
, op1
);
34670 return expand_vselect (target
, x
, perm
, nelt
);
34673 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
34674 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
34677 expand_vec_perm_blend (struct expand_vec_perm_d
*d
)
34679 enum machine_mode vmode
= d
->vmode
;
34680 unsigned i
, mask
, nelt
= d
->nelt
;
34681 rtx target
, op0
, op1
, x
;
34682 rtx rperm
[32], vperm
;
34684 if (d
->op0
== d
->op1
)
34686 if (TARGET_AVX2
&& GET_MODE_SIZE (vmode
) == 32)
34688 else if (TARGET_AVX
&& (vmode
== V4DFmode
|| vmode
== V8SFmode
))
34690 else if (TARGET_SSE4_1
&& GET_MODE_SIZE (vmode
) == 16)
34695 /* This is a blend, not a permute. Elements must stay in their
34696 respective lanes. */
34697 for (i
= 0; i
< nelt
; ++i
)
34699 unsigned e
= d
->perm
[i
];
34700 if (!(e
== i
|| e
== i
+ nelt
))
34707 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
34708 decision should be extracted elsewhere, so that we only try that
34709 sequence once all budget==3 options have been tried. */
34710 target
= d
->target
;
34723 for (i
= 0; i
< nelt
; ++i
)
34724 mask
|= (d
->perm
[i
] >= nelt
) << i
;
34728 for (i
= 0; i
< 2; ++i
)
34729 mask
|= (d
->perm
[i
] >= 2 ? 15 : 0) << (i
* 4);
34734 for (i
= 0; i
< 4; ++i
)
34735 mask
|= (d
->perm
[i
] >= 4 ? 3 : 0) << (i
* 2);
34740 /* See if bytes move in pairs so we can use pblendw with
34741 an immediate argument, rather than pblendvb with a vector
34743 for (i
= 0; i
< 16; i
+= 2)
34744 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
34747 for (i
= 0; i
< nelt
; ++i
)
34748 rperm
[i
] = (d
->perm
[i
] < nelt
? const0_rtx
: constm1_rtx
);
34751 vperm
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
34752 vperm
= force_reg (vmode
, vperm
);
34754 if (GET_MODE_SIZE (vmode
) == 16)
34755 emit_insn (gen_sse4_1_pblendvb (target
, op0
, op1
, vperm
));
34757 emit_insn (gen_avx2_pblendvb (target
, op0
, op1
, vperm
));
34761 for (i
= 0; i
< 8; ++i
)
34762 mask
|= (d
->perm
[i
* 2] >= 16) << i
;
34767 target
= gen_lowpart (vmode
, target
);
34768 op0
= gen_lowpart (vmode
, op0
);
34769 op1
= gen_lowpart (vmode
, op1
);
34773 /* See if bytes move in pairs. If not, vpblendvb must be used. */
34774 for (i
= 0; i
< 32; i
+= 2)
34775 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
34777 /* See if bytes move in quadruplets. If yes, vpblendd
34778 with immediate can be used. */
34779 for (i
= 0; i
< 32; i
+= 4)
34780 if (d
->perm
[i
] + 2 != d
->perm
[i
+ 2])
34784 /* See if bytes move the same in both lanes. If yes,
34785 vpblendw with immediate can be used. */
34786 for (i
= 0; i
< 16; i
+= 2)
34787 if (d
->perm
[i
] + 16 != d
->perm
[i
+ 16])
34790 /* Use vpblendw. */
34791 for (i
= 0; i
< 16; ++i
)
34792 mask
|= (d
->perm
[i
* 2] >= 32) << i
;
34797 /* Use vpblendd. */
34798 for (i
= 0; i
< 8; ++i
)
34799 mask
|= (d
->perm
[i
* 4] >= 32) << i
;
34804 /* See if words move in pairs. If yes, vpblendd can be used. */
34805 for (i
= 0; i
< 16; i
+= 2)
34806 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
34810 /* See if words move the same in both lanes. If not,
34811 vpblendvb must be used. */
34812 for (i
= 0; i
< 8; i
++)
34813 if (d
->perm
[i
] + 8 != d
->perm
[i
+ 8])
34815 /* Use vpblendvb. */
34816 for (i
= 0; i
< 32; ++i
)
34817 rperm
[i
] = (d
->perm
[i
/ 2] < 16 ? const0_rtx
: constm1_rtx
);
34821 target
= gen_lowpart (vmode
, target
);
34822 op0
= gen_lowpart (vmode
, op0
);
34823 op1
= gen_lowpart (vmode
, op1
);
34824 goto finish_pblendvb
;
34827 /* Use vpblendw. */
34828 for (i
= 0; i
< 16; ++i
)
34829 mask
|= (d
->perm
[i
] >= 16) << i
;
34833 /* Use vpblendd. */
34834 for (i
= 0; i
< 8; ++i
)
34835 mask
|= (d
->perm
[i
* 2] >= 16) << i
;
34840 /* Use vpblendd. */
34841 for (i
= 0; i
< 4; ++i
)
34842 mask
|= (d
->perm
[i
] >= 4 ? 3 : 0) << (i
* 2);
34847 gcc_unreachable ();
34850 /* This matches five different patterns with the different modes. */
34851 x
= gen_rtx_VEC_MERGE (vmode
, op1
, op0
, GEN_INT (mask
));
34852 x
= gen_rtx_SET (VOIDmode
, target
, x
);
34858 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
34859 in terms of the variable form of vpermilps.
34861 Note that we will have already failed the immediate input vpermilps,
34862 which requires that the high and low part shuffle be identical; the
34863 variable form doesn't require that. */
34866 expand_vec_perm_vpermil (struct expand_vec_perm_d
*d
)
34868 rtx rperm
[8], vperm
;
34871 if (!TARGET_AVX
|| d
->vmode
!= V8SFmode
|| d
->op0
!= d
->op1
)
34874 /* We can only permute within the 128-bit lane. */
34875 for (i
= 0; i
< 8; ++i
)
34877 unsigned e
= d
->perm
[i
];
34878 if (i
< 4 ? e
>= 4 : e
< 4)
34885 for (i
= 0; i
< 8; ++i
)
34887 unsigned e
= d
->perm
[i
];
34889 /* Within each 128-bit lane, the elements of op0 are numbered
34890 from 0 and the elements of op1 are numbered from 4. */
34896 rperm
[i
] = GEN_INT (e
);
34899 vperm
= gen_rtx_CONST_VECTOR (V8SImode
, gen_rtvec_v (8, rperm
));
34900 vperm
= force_reg (V8SImode
, vperm
);
34901 emit_insn (gen_avx_vpermilvarv8sf3 (d
->target
, d
->op0
, vperm
));
34906 /* Return true if permutation D can be performed as VMODE permutation
34910 valid_perm_using_mode_p (enum machine_mode vmode
, struct expand_vec_perm_d
*d
)
34912 unsigned int i
, j
, chunk
;
34914 if (GET_MODE_CLASS (vmode
) != MODE_VECTOR_INT
34915 || GET_MODE_CLASS (d
->vmode
) != MODE_VECTOR_INT
34916 || GET_MODE_SIZE (vmode
) != GET_MODE_SIZE (d
->vmode
))
34919 if (GET_MODE_NUNITS (vmode
) >= d
->nelt
)
34922 chunk
= d
->nelt
/ GET_MODE_NUNITS (vmode
);
34923 for (i
= 0; i
< d
->nelt
; i
+= chunk
)
34924 if (d
->perm
[i
] & (chunk
- 1))
34927 for (j
= 1; j
< chunk
; ++j
)
34928 if (d
->perm
[i
] + j
!= d
->perm
[i
+ j
])
34934 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
34935 in terms of pshufb, vpperm, vpermq, vpermd or vperm2i128. */
34938 expand_vec_perm_pshufb (struct expand_vec_perm_d
*d
)
34940 unsigned i
, nelt
, eltsz
, mask
;
34941 unsigned char perm
[32];
34942 enum machine_mode vmode
= V16QImode
;
34943 rtx rperm
[32], vperm
, target
, op0
, op1
;
34947 if (d
->op0
!= d
->op1
)
34949 if (!TARGET_XOP
|| GET_MODE_SIZE (d
->vmode
) != 16)
34952 && valid_perm_using_mode_p (V2TImode
, d
))
34957 /* Use vperm2i128 insn. The pattern uses
34958 V4DImode instead of V2TImode. */
34959 target
= gen_lowpart (V4DImode
, d
->target
);
34960 op0
= gen_lowpart (V4DImode
, d
->op0
);
34961 op1
= gen_lowpart (V4DImode
, d
->op1
);
34963 = GEN_INT (((d
->perm
[0] & (nelt
/ 2)) ? 1 : 0)
34964 || ((d
->perm
[nelt
/ 2] & (nelt
/ 2)) ? 2 : 0));
34965 emit_insn (gen_avx2_permv2ti (target
, op0
, op1
, rperm
[0]));
34973 if (GET_MODE_SIZE (d
->vmode
) == 16)
34978 else if (GET_MODE_SIZE (d
->vmode
) == 32)
34983 /* V4DImode should be already handled through
34984 expand_vselect by vpermq instruction. */
34985 gcc_assert (d
->vmode
!= V4DImode
);
34988 if (d
->vmode
== V8SImode
34989 || d
->vmode
== V16HImode
34990 || d
->vmode
== V32QImode
)
34992 /* First see if vpermq can be used for
34993 V8SImode/V16HImode/V32QImode. */
34994 if (valid_perm_using_mode_p (V4DImode
, d
))
34996 for (i
= 0; i
< 4; i
++)
34997 perm
[i
] = (d
->perm
[i
* nelt
/ 4] * 4 / nelt
) & 3;
35000 return expand_vselect (gen_lowpart (V4DImode
, d
->target
),
35001 gen_lowpart (V4DImode
, d
->op0
),
35005 /* Next see if vpermd can be used. */
35006 if (valid_perm_using_mode_p (V8SImode
, d
))
35010 if (vmode
== V32QImode
)
35012 /* vpshufb only works intra lanes, it is not
35013 possible to shuffle bytes in between the lanes. */
35014 for (i
= 0; i
< nelt
; ++i
)
35015 if ((d
->perm
[i
] ^ i
) & (nelt
/ 2))
35026 if (vmode
== V8SImode
)
35027 for (i
= 0; i
< 8; ++i
)
35028 rperm
[i
] = GEN_INT ((d
->perm
[i
* nelt
/ 8] * 8 / nelt
) & 7);
35031 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
35032 if (d
->op0
!= d
->op1
)
35033 mask
= 2 * nelt
- 1;
35034 else if (vmode
== V16QImode
)
35037 mask
= nelt
/ 2 - 1;
35039 for (i
= 0; i
< nelt
; ++i
)
35041 unsigned j
, e
= d
->perm
[i
] & mask
;
35042 for (j
= 0; j
< eltsz
; ++j
)
35043 rperm
[i
* eltsz
+ j
] = GEN_INT (e
* eltsz
+ j
);
35047 vperm
= gen_rtx_CONST_VECTOR (vmode
,
35048 gen_rtvec_v (GET_MODE_NUNITS (vmode
), rperm
));
35049 vperm
= force_reg (vmode
, vperm
);
35051 target
= gen_lowpart (vmode
, d
->target
);
35052 op0
= gen_lowpart (vmode
, d
->op0
);
35053 if (d
->op0
== d
->op1
)
35055 if (vmode
== V16QImode
)
35056 emit_insn (gen_ssse3_pshufbv16qi3 (target
, op0
, vperm
));
35057 else if (vmode
== V32QImode
)
35058 emit_insn (gen_avx2_pshufbv32qi3 (target
, op0
, vperm
));
35060 emit_insn (gen_avx2_permvarv8si (target
, vperm
, op0
));
35064 op1
= gen_lowpart (vmode
, d
->op1
);
35065 emit_insn (gen_xop_pperm (target
, op0
, op1
, vperm
));
35071 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
35072 in a single instruction. */
35075 expand_vec_perm_1 (struct expand_vec_perm_d
*d
)
35077 unsigned i
, nelt
= d
->nelt
;
35078 unsigned char perm2
[MAX_VECT_LEN
];
35080 /* Check plain VEC_SELECT first, because AVX has instructions that could
35081 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
35082 input where SEL+CONCAT may not. */
35083 if (d
->op0
== d
->op1
)
35085 int mask
= nelt
- 1;
35086 bool identity_perm
= true;
35087 bool broadcast_perm
= true;
35089 for (i
= 0; i
< nelt
; i
++)
35091 perm2
[i
] = d
->perm
[i
] & mask
;
35093 identity_perm
= false;
35095 broadcast_perm
= false;
35101 emit_move_insn (d
->target
, d
->op0
);
35104 else if (broadcast_perm
&& TARGET_AVX2
)
35106 /* Use vpbroadcast{b,w,d}. */
35107 rtx op
= d
->op0
, (*gen
) (rtx
, rtx
) = NULL
;
35111 op
= gen_lowpart (V16QImode
, op
);
35112 gen
= gen_avx2_pbroadcastv32qi
;
35115 op
= gen_lowpart (V8HImode
, op
);
35116 gen
= gen_avx2_pbroadcastv16hi
;
35119 op
= gen_lowpart (V4SImode
, op
);
35120 gen
= gen_avx2_pbroadcastv8si
;
35123 gen
= gen_avx2_pbroadcastv16qi
;
35126 gen
= gen_avx2_pbroadcastv8hi
;
35128 /* For other modes prefer other shuffles this function creates. */
35134 emit_insn (gen (d
->target
, op
));
35139 if (expand_vselect (d
->target
, d
->op0
, perm2
, nelt
))
35142 /* There are plenty of patterns in sse.md that are written for
35143 SEL+CONCAT and are not replicated for a single op. Perhaps
35144 that should be changed, to avoid the nastiness here. */
35146 /* Recognize interleave style patterns, which means incrementing
35147 every other permutation operand. */
35148 for (i
= 0; i
< nelt
; i
+= 2)
35150 perm2
[i
] = d
->perm
[i
] & mask
;
35151 perm2
[i
+ 1] = (d
->perm
[i
+ 1] & mask
) + nelt
;
35153 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op0
, perm2
, nelt
))
35156 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
35159 for (i
= 0; i
< nelt
; i
+= 4)
35161 perm2
[i
+ 0] = d
->perm
[i
+ 0] & mask
;
35162 perm2
[i
+ 1] = d
->perm
[i
+ 1] & mask
;
35163 perm2
[i
+ 2] = (d
->perm
[i
+ 2] & mask
) + nelt
;
35164 perm2
[i
+ 3] = (d
->perm
[i
+ 3] & mask
) + nelt
;
35167 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op0
, perm2
, nelt
))
35172 /* Finally, try the fully general two operand permute. */
35173 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op1
, d
->perm
, nelt
))
35176 /* Recognize interleave style patterns with reversed operands. */
35177 if (d
->op0
!= d
->op1
)
35179 for (i
= 0; i
< nelt
; ++i
)
35181 unsigned e
= d
->perm
[i
];
35189 if (expand_vselect_vconcat (d
->target
, d
->op1
, d
->op0
, perm2
, nelt
))
35193 /* Try the SSE4.1 blend variable merge instructions. */
35194 if (expand_vec_perm_blend (d
))
35197 /* Try one of the AVX vpermil variable permutations. */
35198 if (expand_vec_perm_vpermil (d
))
35201 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
35202 vpshufb, vpermd or vpermq variable permutation. */
35203 if (expand_vec_perm_pshufb (d
))
35209 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
35210 in terms of a pair of pshuflw + pshufhw instructions. */
35213 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d
*d
)
35215 unsigned char perm2
[MAX_VECT_LEN
];
35219 if (d
->vmode
!= V8HImode
|| d
->op0
!= d
->op1
)
35222 /* The two permutations only operate in 64-bit lanes. */
35223 for (i
= 0; i
< 4; ++i
)
35224 if (d
->perm
[i
] >= 4)
35226 for (i
= 4; i
< 8; ++i
)
35227 if (d
->perm
[i
] < 4)
35233 /* Emit the pshuflw. */
35234 memcpy (perm2
, d
->perm
, 4);
35235 for (i
= 4; i
< 8; ++i
)
35237 ok
= expand_vselect (d
->target
, d
->op0
, perm2
, 8);
35240 /* Emit the pshufhw. */
35241 memcpy (perm2
+ 4, d
->perm
+ 4, 4);
35242 for (i
= 0; i
< 4; ++i
)
35244 ok
= expand_vselect (d
->target
, d
->target
, perm2
, 8);
35250 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
35251 the permutation using the SSSE3 palignr instruction. This succeeds
35252 when all of the elements in PERM fit within one vector and we merely
35253 need to shift them down so that a single vector permutation has a
35254 chance to succeed. */
35257 expand_vec_perm_palignr (struct expand_vec_perm_d
*d
)
35259 unsigned i
, nelt
= d
->nelt
;
35264 /* Even with AVX, palignr only operates on 128-bit vectors. */
35265 if (!TARGET_SSSE3
|| GET_MODE_SIZE (d
->vmode
) != 16)
35268 min
= nelt
, max
= 0;
35269 for (i
= 0; i
< nelt
; ++i
)
35271 unsigned e
= d
->perm
[i
];
35277 if (min
== 0 || max
- min
>= nelt
)
35280 /* Given that we have SSSE3, we know we'll be able to implement the
35281 single operand permutation after the palignr with pshufb. */
35285 shift
= GEN_INT (min
* GET_MODE_BITSIZE (GET_MODE_INNER (d
->vmode
)));
35286 emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode
, d
->target
),
35287 gen_lowpart (TImode
, d
->op1
),
35288 gen_lowpart (TImode
, d
->op0
), shift
));
35290 d
->op0
= d
->op1
= d
->target
;
35293 for (i
= 0; i
< nelt
; ++i
)
35295 unsigned e
= d
->perm
[i
] - min
;
35301 /* Test for the degenerate case where the alignment by itself
35302 produces the desired permutation. */
35306 ok
= expand_vec_perm_1 (d
);
35312 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
35313 a two vector permutation into a single vector permutation by using
35314 an interleave operation to merge the vectors. */
35317 expand_vec_perm_interleave2 (struct expand_vec_perm_d
*d
)
35319 struct expand_vec_perm_d dremap
, dfinal
;
35320 unsigned i
, nelt
= d
->nelt
, nelt2
= nelt
/ 2;
35321 unsigned HOST_WIDE_INT contents
;
35322 unsigned char remap
[2 * MAX_VECT_LEN
];
35324 bool ok
, same_halves
= false;
35326 if (GET_MODE_SIZE (d
->vmode
) == 16)
35328 if (d
->op0
== d
->op1
)
35331 else if (GET_MODE_SIZE (d
->vmode
) == 32)
35335 /* For 32-byte modes allow even d->op0 == d->op1.
35336 The lack of cross-lane shuffling in some instructions
35337 might prevent a single insn shuffle. */
35342 /* Examine from whence the elements come. */
35344 for (i
= 0; i
< nelt
; ++i
)
35345 contents
|= ((unsigned HOST_WIDE_INT
) 1) << d
->perm
[i
];
35347 memset (remap
, 0xff, sizeof (remap
));
35350 if (GET_MODE_SIZE (d
->vmode
) == 16)
35352 unsigned HOST_WIDE_INT h1
, h2
, h3
, h4
;
35354 /* Split the two input vectors into 4 halves. */
35355 h1
= (((unsigned HOST_WIDE_INT
) 1) << nelt2
) - 1;
35360 /* If the elements from the low halves use interleave low, and similarly
35361 for interleave high. If the elements are from mis-matched halves, we
35362 can use shufps for V4SF/V4SI or do a DImode shuffle. */
35363 if ((contents
& (h1
| h3
)) == contents
)
35366 for (i
= 0; i
< nelt2
; ++i
)
35369 remap
[i
+ nelt
] = i
* 2 + 1;
35370 dremap
.perm
[i
* 2] = i
;
35371 dremap
.perm
[i
* 2 + 1] = i
+ nelt
;
35374 else if ((contents
& (h2
| h4
)) == contents
)
35377 for (i
= 0; i
< nelt2
; ++i
)
35379 remap
[i
+ nelt2
] = i
* 2;
35380 remap
[i
+ nelt
+ nelt2
] = i
* 2 + 1;
35381 dremap
.perm
[i
* 2] = i
+ nelt2
;
35382 dremap
.perm
[i
* 2 + 1] = i
+ nelt
+ nelt2
;
35385 else if ((contents
& (h1
| h4
)) == contents
)
35388 for (i
= 0; i
< nelt2
; ++i
)
35391 remap
[i
+ nelt
+ nelt2
] = i
+ nelt2
;
35392 dremap
.perm
[i
] = i
;
35393 dremap
.perm
[i
+ nelt2
] = i
+ nelt
+ nelt2
;
35398 dremap
.vmode
= V2DImode
;
35400 dremap
.perm
[0] = 0;
35401 dremap
.perm
[1] = 3;
35404 else if ((contents
& (h2
| h3
)) == contents
)
35407 for (i
= 0; i
< nelt2
; ++i
)
35409 remap
[i
+ nelt2
] = i
;
35410 remap
[i
+ nelt
] = i
+ nelt2
;
35411 dremap
.perm
[i
] = i
+ nelt2
;
35412 dremap
.perm
[i
+ nelt2
] = i
+ nelt
;
35417 dremap
.vmode
= V2DImode
;
35419 dremap
.perm
[0] = 1;
35420 dremap
.perm
[1] = 2;
35428 unsigned int nelt4
= nelt
/ 4, nzcnt
= 0;
35429 unsigned HOST_WIDE_INT q
[8];
35430 unsigned int nonzero_halves
[4];
35432 /* Split the two input vectors into 8 quarters. */
35433 q
[0] = (((unsigned HOST_WIDE_INT
) 1) << nelt4
) - 1;
35434 for (i
= 1; i
< 8; ++i
)
35435 q
[i
] = q
[0] << (nelt4
* i
);
35436 for (i
= 0; i
< 4; ++i
)
35437 if (((q
[2 * i
] | q
[2 * i
+ 1]) & contents
) != 0)
35439 nonzero_halves
[nzcnt
] = i
;
35445 gcc_assert (d
->op0
== d
->op1
);
35446 nonzero_halves
[1] = nonzero_halves
[0];
35447 same_halves
= true;
35449 else if (d
->op0
== d
->op1
)
35451 gcc_assert (nonzero_halves
[0] == 0);
35452 gcc_assert (nonzero_halves
[1] == 1);
35457 if (d
->perm
[0] / nelt2
== nonzero_halves
[1])
35459 /* Attempt to increase the likelyhood that dfinal
35460 shuffle will be intra-lane. */
35461 char tmph
= nonzero_halves
[0];
35462 nonzero_halves
[0] = nonzero_halves
[1];
35463 nonzero_halves
[1] = tmph
;
35466 /* vperm2f128 or vperm2i128. */
35467 for (i
= 0; i
< nelt2
; ++i
)
35469 remap
[i
+ nonzero_halves
[1] * nelt2
] = i
+ nelt2
;
35470 remap
[i
+ nonzero_halves
[0] * nelt2
] = i
;
35471 dremap
.perm
[i
+ nelt2
] = i
+ nonzero_halves
[1] * nelt2
;
35472 dremap
.perm
[i
] = i
+ nonzero_halves
[0] * nelt2
;
35475 if (d
->vmode
!= V8SFmode
35476 && d
->vmode
!= V4DFmode
35477 && d
->vmode
!= V8SImode
)
35479 dremap
.vmode
= V8SImode
;
35481 for (i
= 0; i
< 4; ++i
)
35483 dremap
.perm
[i
] = i
+ nonzero_halves
[0] * 4;
35484 dremap
.perm
[i
+ 4] = i
+ nonzero_halves
[1] * 4;
35488 else if (d
->op0
== d
->op1
)
35490 else if (TARGET_AVX2
35491 && (contents
& (q
[0] | q
[2] | q
[4] | q
[6])) == contents
)
35494 for (i
= 0; i
< nelt4
; ++i
)
35497 remap
[i
+ nelt
] = i
* 2 + 1;
35498 remap
[i
+ nelt2
] = i
* 2 + nelt2
;
35499 remap
[i
+ nelt
+ nelt2
] = i
* 2 + nelt2
+ 1;
35500 dremap
.perm
[i
* 2] = i
;
35501 dremap
.perm
[i
* 2 + 1] = i
+ nelt
;
35502 dremap
.perm
[i
* 2 + nelt2
] = i
+ nelt2
;
35503 dremap
.perm
[i
* 2 + nelt2
+ 1] = i
+ nelt
+ nelt2
;
35506 else if (TARGET_AVX2
35507 && (contents
& (q
[1] | q
[3] | q
[5] | q
[7])) == contents
)
35510 for (i
= 0; i
< nelt4
; ++i
)
35512 remap
[i
+ nelt4
] = i
* 2;
35513 remap
[i
+ nelt
+ nelt4
] = i
* 2 + 1;
35514 remap
[i
+ nelt2
+ nelt4
] = i
* 2 + nelt2
;
35515 remap
[i
+ nelt
+ nelt2
+ nelt4
] = i
* 2 + nelt2
+ 1;
35516 dremap
.perm
[i
* 2] = i
+ nelt4
;
35517 dremap
.perm
[i
* 2 + 1] = i
+ nelt
+ nelt4
;
35518 dremap
.perm
[i
* 2 + nelt2
] = i
+ nelt2
+ nelt4
;
35519 dremap
.perm
[i
* 2 + nelt2
+ 1] = i
+ nelt
+ nelt2
+ nelt4
;
35526 /* Use the remapping array set up above to move the elements from their
35527 swizzled locations into their final destinations. */
35529 for (i
= 0; i
< nelt
; ++i
)
35531 unsigned e
= remap
[d
->perm
[i
]];
35532 gcc_assert (e
< nelt
);
35533 /* If same_halves is true, both halves of the remapped vector are the
35534 same. Avoid cross-lane accesses if possible. */
35535 if (same_halves
&& i
>= nelt2
)
35537 gcc_assert (e
< nelt2
);
35538 dfinal
.perm
[i
] = e
+ nelt2
;
35541 dfinal
.perm
[i
] = e
;
35543 dfinal
.op0
= gen_reg_rtx (dfinal
.vmode
);
35544 dfinal
.op1
= dfinal
.op0
;
35545 dremap
.target
= dfinal
.op0
;
35547 /* Test if the final remap can be done with a single insn. For V4SFmode or
35548 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
35550 ok
= expand_vec_perm_1 (&dfinal
);
35551 seq
= get_insns ();
35560 if (dremap
.vmode
!= dfinal
.vmode
)
35562 dremap
.target
= gen_lowpart (dremap
.vmode
, dremap
.target
);
35563 dremap
.op0
= gen_lowpart (dremap
.vmode
, dremap
.op0
);
35564 dremap
.op1
= gen_lowpart (dremap
.vmode
, dremap
.op1
);
35567 ok
= expand_vec_perm_1 (&dremap
);
35574 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
35575 a single vector cross-lane permutation into vpermq followed
35576 by any of the single insn permutations. */
35579 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d
*d
)
35581 struct expand_vec_perm_d dremap
, dfinal
;
35582 unsigned i
, j
, nelt
= d
->nelt
, nelt2
= nelt
/ 2, nelt4
= nelt
/ 4;
35583 unsigned contents
[2];
35587 && (d
->vmode
== V32QImode
|| d
->vmode
== V16HImode
)
35588 && d
->op0
== d
->op1
))
35593 for (i
= 0; i
< nelt2
; ++i
)
35595 contents
[0] |= 1u << (d
->perm
[i
] / nelt4
);
35596 contents
[1] |= 1u << (d
->perm
[i
+ nelt2
] / nelt4
);
35599 for (i
= 0; i
< 2; ++i
)
35601 unsigned int cnt
= 0;
35602 for (j
= 0; j
< 4; ++j
)
35603 if ((contents
[i
] & (1u << j
)) != 0 && ++cnt
> 2)
35611 dremap
.vmode
= V4DImode
;
35613 dremap
.target
= gen_reg_rtx (V4DImode
);
35614 dremap
.op0
= gen_lowpart (V4DImode
, d
->op0
);
35615 dremap
.op1
= dremap
.op0
;
35616 for (i
= 0; i
< 2; ++i
)
35618 unsigned int cnt
= 0;
35619 for (j
= 0; j
< 4; ++j
)
35620 if ((contents
[i
] & (1u << j
)) != 0)
35621 dremap
.perm
[2 * i
+ cnt
++] = j
;
35622 for (; cnt
< 2; ++cnt
)
35623 dremap
.perm
[2 * i
+ cnt
] = 0;
35627 dfinal
.op0
= gen_lowpart (dfinal
.vmode
, dremap
.target
);
35628 dfinal
.op1
= dfinal
.op0
;
35629 for (i
= 0, j
= 0; i
< nelt
; ++i
)
35633 dfinal
.perm
[i
] = (d
->perm
[i
] & (nelt4
- 1)) | (j
? nelt2
: 0);
35634 if ((d
->perm
[i
] / nelt4
) == dremap
.perm
[j
])
35636 else if ((d
->perm
[i
] / nelt4
) == dremap
.perm
[j
+ 1])
35637 dfinal
.perm
[i
] |= nelt4
;
35639 gcc_unreachable ();
35642 ok
= expand_vec_perm_1 (&dremap
);
35645 ok
= expand_vec_perm_1 (&dfinal
);
35651 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
35652 a two vector permutation using 2 intra-lane interleave insns
35653 and cross-lane shuffle for 32-byte vectors. */
35656 expand_vec_perm_interleave3 (struct expand_vec_perm_d
*d
)
35659 rtx (*gen
) (rtx
, rtx
, rtx
);
35661 if (d
->op0
== d
->op1
)
35663 if (TARGET_AVX2
&& GET_MODE_SIZE (d
->vmode
) == 32)
35665 else if (TARGET_AVX
&& (d
->vmode
== V8SFmode
|| d
->vmode
== V4DFmode
))
35671 if (d
->perm
[0] != 0 && d
->perm
[0] != nelt
/ 2)
35673 for (i
= 0; i
< nelt
; i
+= 2)
35674 if (d
->perm
[i
] != d
->perm
[0] + i
/ 2
35675 || d
->perm
[i
+ 1] != d
->perm
[0] + i
/ 2 + nelt
)
35685 gen
= gen_vec_interleave_highv32qi
;
35687 gen
= gen_vec_interleave_lowv32qi
;
35691 gen
= gen_vec_interleave_highv16hi
;
35693 gen
= gen_vec_interleave_lowv16hi
;
35697 gen
= gen_vec_interleave_highv8si
;
35699 gen
= gen_vec_interleave_lowv8si
;
35703 gen
= gen_vec_interleave_highv4di
;
35705 gen
= gen_vec_interleave_lowv4di
;
35709 gen
= gen_vec_interleave_highv8sf
;
35711 gen
= gen_vec_interleave_lowv8sf
;
35715 gen
= gen_vec_interleave_highv4df
;
35717 gen
= gen_vec_interleave_lowv4df
;
35720 gcc_unreachable ();
35723 emit_insn (gen (d
->target
, d
->op0
, d
->op1
));
35727 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
35728 permutation with two pshufb insns and an ior. We should have already
35729 failed all two instruction sequences. */
35732 expand_vec_perm_pshufb2 (struct expand_vec_perm_d
*d
)
35734 rtx rperm
[2][16], vperm
, l
, h
, op
, m128
;
35735 unsigned int i
, nelt
, eltsz
;
35737 if (!TARGET_SSSE3
|| GET_MODE_SIZE (d
->vmode
) != 16)
35739 gcc_assert (d
->op0
!= d
->op1
);
35742 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
35744 /* Generate two permutation masks. If the required element is within
35745 the given vector it is shuffled into the proper lane. If the required
35746 element is in the other vector, force a zero into the lane by setting
35747 bit 7 in the permutation mask. */
35748 m128
= GEN_INT (-128);
35749 for (i
= 0; i
< nelt
; ++i
)
35751 unsigned j
, e
= d
->perm
[i
];
35752 unsigned which
= (e
>= nelt
);
35756 for (j
= 0; j
< eltsz
; ++j
)
35758 rperm
[which
][i
*eltsz
+ j
] = GEN_INT (e
*eltsz
+ j
);
35759 rperm
[1-which
][i
*eltsz
+ j
] = m128
;
35763 vperm
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, rperm
[0]));
35764 vperm
= force_reg (V16QImode
, vperm
);
35766 l
= gen_reg_rtx (V16QImode
);
35767 op
= gen_lowpart (V16QImode
, d
->op0
);
35768 emit_insn (gen_ssse3_pshufbv16qi3 (l
, op
, vperm
));
35770 vperm
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, rperm
[1]));
35771 vperm
= force_reg (V16QImode
, vperm
);
35773 h
= gen_reg_rtx (V16QImode
);
35774 op
= gen_lowpart (V16QImode
, d
->op1
);
35775 emit_insn (gen_ssse3_pshufbv16qi3 (h
, op
, vperm
));
35777 op
= gen_lowpart (V16QImode
, d
->target
);
35778 emit_insn (gen_iorv16qi3 (op
, l
, h
));
35783 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
35784 with two vpshufb insns, vpermq and vpor. We should have already failed
35785 all two or three instruction sequences. */
35788 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d
*d
)
35790 rtx rperm
[2][32], vperm
, l
, h
, hp
, op
, m128
;
35791 unsigned int i
, nelt
, eltsz
;
35794 || d
->op0
!= d
->op1
35795 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
35802 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
35804 /* Generate two permutation masks. If the required element is within
35805 the same lane, it is shuffled in. If the required element from the
35806 other lane, force a zero by setting bit 7 in the permutation mask.
35807 In the other mask the mask has non-negative elements if element
35808 is requested from the other lane, but also moved to the other lane,
35809 so that the result of vpshufb can have the two V2TImode halves
35811 m128
= GEN_INT (-128);
35812 for (i
= 0; i
< nelt
; ++i
)
35814 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
35815 unsigned which
= ((d
->perm
[i
] ^ i
) & (nelt
/ 2)) * eltsz
;
35817 for (j
= 0; j
< eltsz
; ++j
)
35819 rperm
[!!which
][(i
* eltsz
+ j
) ^ which
] = GEN_INT (e
* eltsz
+ j
);
35820 rperm
[!which
][(i
* eltsz
+ j
) ^ (which
^ 16)] = m128
;
35824 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[1]));
35825 vperm
= force_reg (V32QImode
, vperm
);
35827 h
= gen_reg_rtx (V32QImode
);
35828 op
= gen_lowpart (V32QImode
, d
->op0
);
35829 emit_insn (gen_avx2_pshufbv32qi3 (h
, op
, vperm
));
35831 /* Swap the 128-byte lanes of h into hp. */
35832 hp
= gen_reg_rtx (V4DImode
);
35833 op
= gen_lowpart (V4DImode
, h
);
35834 emit_insn (gen_avx2_permv4di_1 (hp
, op
, const2_rtx
, GEN_INT (3), const0_rtx
,
35837 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[0]));
35838 vperm
= force_reg (V32QImode
, vperm
);
35840 l
= gen_reg_rtx (V32QImode
);
35841 op
= gen_lowpart (V32QImode
, d
->op0
);
35842 emit_insn (gen_avx2_pshufbv32qi3 (l
, op
, vperm
));
35844 op
= gen_lowpart (V32QImode
, d
->target
);
35845 emit_insn (gen_iorv32qi3 (op
, l
, gen_lowpart (V32QImode
, hp
)));
35850 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
35851 and extract-odd permutations of two V32QImode and V16QImode operand
35852 with two vpshufb insns, vpor and vpermq. We should have already
35853 failed all two or three instruction sequences. */
35856 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d
*d
)
35858 rtx rperm
[2][32], vperm
, l
, h
, ior
, op
, m128
;
35859 unsigned int i
, nelt
, eltsz
;
35862 || d
->op0
== d
->op1
35863 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
35866 for (i
= 0; i
< d
->nelt
; ++i
)
35867 if ((d
->perm
[i
] ^ (i
* 2)) & (3 * d
->nelt
/ 2))
35874 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
35876 /* Generate two permutation masks. In the first permutation mask
35877 the first quarter will contain indexes for the first half
35878 of the op0, the second quarter will contain bit 7 set, third quarter
35879 will contain indexes for the second half of the op0 and the
35880 last quarter bit 7 set. In the second permutation mask
35881 the first quarter will contain bit 7 set, the second quarter
35882 indexes for the first half of the op1, the third quarter bit 7 set
35883 and last quarter indexes for the second half of the op1.
35884 I.e. the first mask e.g. for V32QImode extract even will be:
35885 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
35886 (all values masked with 0xf except for -128) and second mask
35887 for extract even will be
35888 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
35889 m128
= GEN_INT (-128);
35890 for (i
= 0; i
< nelt
; ++i
)
35892 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
35893 unsigned which
= d
->perm
[i
] >= nelt
;
35894 unsigned xorv
= (i
>= nelt
/ 4 && i
< 3 * nelt
/ 4) ? 24 : 0;
35896 for (j
= 0; j
< eltsz
; ++j
)
35898 rperm
[which
][(i
* eltsz
+ j
) ^ xorv
] = GEN_INT (e
* eltsz
+ j
);
35899 rperm
[1 - which
][(i
* eltsz
+ j
) ^ xorv
] = m128
;
35903 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[0]));
35904 vperm
= force_reg (V32QImode
, vperm
);
35906 l
= gen_reg_rtx (V32QImode
);
35907 op
= gen_lowpart (V32QImode
, d
->op0
);
35908 emit_insn (gen_avx2_pshufbv32qi3 (l
, op
, vperm
));
35910 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[1]));
35911 vperm
= force_reg (V32QImode
, vperm
);
35913 h
= gen_reg_rtx (V32QImode
);
35914 op
= gen_lowpart (V32QImode
, d
->op1
);
35915 emit_insn (gen_avx2_pshufbv32qi3 (h
, op
, vperm
));
35917 ior
= gen_reg_rtx (V32QImode
);
35918 emit_insn (gen_iorv32qi3 (ior
, l
, h
));
35920 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
35921 op
= gen_lowpart (V4DImode
, d
->target
);
35922 ior
= gen_lowpart (V4DImode
, ior
);
35923 emit_insn (gen_avx2_permv4di_1 (op
, ior
, const0_rtx
, const2_rtx
,
35924 const1_rtx
, GEN_INT (3)));
35929 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
35930 and extract-odd permutations. */
35933 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d
*d
, unsigned odd
)
35940 t1
= gen_reg_rtx (V4DFmode
);
35941 t2
= gen_reg_rtx (V4DFmode
);
35943 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
35944 emit_insn (gen_avx_vperm2f128v4df3 (t1
, d
->op0
, d
->op1
, GEN_INT (0x20)));
35945 emit_insn (gen_avx_vperm2f128v4df3 (t2
, d
->op0
, d
->op1
, GEN_INT (0x31)));
35947 /* Now an unpck[lh]pd will produce the result required. */
35949 t3
= gen_avx_unpckhpd256 (d
->target
, t1
, t2
);
35951 t3
= gen_avx_unpcklpd256 (d
->target
, t1
, t2
);
35957 int mask
= odd
? 0xdd : 0x88;
35959 t1
= gen_reg_rtx (V8SFmode
);
35960 t2
= gen_reg_rtx (V8SFmode
);
35961 t3
= gen_reg_rtx (V8SFmode
);
35963 /* Shuffle within the 128-bit lanes to produce:
35964 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
35965 emit_insn (gen_avx_shufps256 (t1
, d
->op0
, d
->op1
,
35968 /* Shuffle the lanes around to produce:
35969 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
35970 emit_insn (gen_avx_vperm2f128v8sf3 (t2
, t1
, t1
,
35973 /* Shuffle within the 128-bit lanes to produce:
35974 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
35975 emit_insn (gen_avx_shufps256 (t3
, t1
, t2
, GEN_INT (0x44)));
35977 /* Shuffle within the 128-bit lanes to produce:
35978 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
35979 emit_insn (gen_avx_shufps256 (t2
, t1
, t2
, GEN_INT (0xee)));
35981 /* Shuffle the lanes around to produce:
35982 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
35983 emit_insn (gen_avx_vperm2f128v8sf3 (d
->target
, t3
, t2
,
35992 /* These are always directly implementable by expand_vec_perm_1. */
35993 gcc_unreachable ();
35997 return expand_vec_perm_pshufb2 (d
);
36000 /* We need 2*log2(N)-1 operations to achieve odd/even
36001 with interleave. */
36002 t1
= gen_reg_rtx (V8HImode
);
36003 t2
= gen_reg_rtx (V8HImode
);
36004 emit_insn (gen_vec_interleave_highv8hi (t1
, d
->op0
, d
->op1
));
36005 emit_insn (gen_vec_interleave_lowv8hi (d
->target
, d
->op0
, d
->op1
));
36006 emit_insn (gen_vec_interleave_highv8hi (t2
, d
->target
, t1
));
36007 emit_insn (gen_vec_interleave_lowv8hi (d
->target
, d
->target
, t1
));
36009 t3
= gen_vec_interleave_highv8hi (d
->target
, d
->target
, t2
);
36011 t3
= gen_vec_interleave_lowv8hi (d
->target
, d
->target
, t2
);
36018 return expand_vec_perm_pshufb2 (d
);
36021 t1
= gen_reg_rtx (V16QImode
);
36022 t2
= gen_reg_rtx (V16QImode
);
36023 t3
= gen_reg_rtx (V16QImode
);
36024 emit_insn (gen_vec_interleave_highv16qi (t1
, d
->op0
, d
->op1
));
36025 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->op0
, d
->op1
));
36026 emit_insn (gen_vec_interleave_highv16qi (t2
, d
->target
, t1
));
36027 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t1
));
36028 emit_insn (gen_vec_interleave_highv16qi (t3
, d
->target
, t2
));
36029 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t2
));
36031 t3
= gen_vec_interleave_highv16qi (d
->target
, d
->target
, t3
);
36033 t3
= gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t3
);
36040 return expand_vec_perm_vpshufb2_vpermq_even_odd (d
);
36045 struct expand_vec_perm_d d_copy
= *d
;
36046 d_copy
.vmode
= V4DFmode
;
36047 d_copy
.target
= gen_lowpart (V4DFmode
, d
->target
);
36048 d_copy
.op0
= gen_lowpart (V4DFmode
, d
->op0
);
36049 d_copy
.op1
= gen_lowpart (V4DFmode
, d
->op1
);
36050 return expand_vec_perm_even_odd_1 (&d_copy
, odd
);
36053 t1
= gen_reg_rtx (V4DImode
);
36054 t2
= gen_reg_rtx (V4DImode
);
36056 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
36057 emit_insn (gen_avx2_permv2ti (t1
, d
->op0
, d
->op1
, GEN_INT (0x20)));
36058 emit_insn (gen_avx2_permv2ti (t2
, d
->op0
, d
->op1
, GEN_INT (0x31)));
36060 /* Now an vpunpck[lh]qdq will produce the result required. */
36062 t3
= gen_avx2_interleave_highv4di (d
->target
, t1
, t2
);
36064 t3
= gen_avx2_interleave_lowv4di (d
->target
, t1
, t2
);
36071 struct expand_vec_perm_d d_copy
= *d
;
36072 d_copy
.vmode
= V8SFmode
;
36073 d_copy
.target
= gen_lowpart (V8SFmode
, d
->target
);
36074 d_copy
.op0
= gen_lowpart (V8SFmode
, d
->op0
);
36075 d_copy
.op1
= gen_lowpart (V8SFmode
, d
->op1
);
36076 return expand_vec_perm_even_odd_1 (&d_copy
, odd
);
36079 t1
= gen_reg_rtx (V8SImode
);
36080 t2
= gen_reg_rtx (V8SImode
);
36082 /* Shuffle the lanes around into
36083 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
36084 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode
, t1
),
36085 gen_lowpart (V4DImode
, d
->op0
),
36086 gen_lowpart (V4DImode
, d
->op1
),
36088 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode
, t2
),
36089 gen_lowpart (V4DImode
, d
->op0
),
36090 gen_lowpart (V4DImode
, d
->op1
),
36093 /* Swap the 2nd and 3rd position in each lane into
36094 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
36095 emit_insn (gen_avx2_pshufdv3 (t1
, t1
,
36096 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
36097 emit_insn (gen_avx2_pshufdv3 (t2
, t2
,
36098 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
36100 /* Now an vpunpck[lh]qdq will produce
36101 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
36103 t3
= gen_avx2_interleave_highv4di (gen_lowpart (V4DImode
, d
->target
),
36104 gen_lowpart (V4DImode
, t1
),
36105 gen_lowpart (V4DImode
, t2
));
36107 t3
= gen_avx2_interleave_lowv4di (gen_lowpart (V4DImode
, d
->target
),
36108 gen_lowpart (V4DImode
, t1
),
36109 gen_lowpart (V4DImode
, t2
));
36114 gcc_unreachable ();
36120 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
36121 extract-even and extract-odd permutations. */
36124 expand_vec_perm_even_odd (struct expand_vec_perm_d
*d
)
36126 unsigned i
, odd
, nelt
= d
->nelt
;
36129 if (odd
!= 0 && odd
!= 1)
36132 for (i
= 1; i
< nelt
; ++i
)
36133 if (d
->perm
[i
] != 2 * i
+ odd
)
36136 return expand_vec_perm_even_odd_1 (d
, odd
);
36139 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
36140 permutations. We assume that expand_vec_perm_1 has already failed. */
36143 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d
*d
)
36145 unsigned elt
= d
->perm
[0], nelt2
= d
->nelt
/ 2;
36146 enum machine_mode vmode
= d
->vmode
;
36147 unsigned char perm2
[4];
36155 /* These are special-cased in sse.md so that we can optionally
36156 use the vbroadcast instruction. They expand to two insns
36157 if the input happens to be in a register. */
36158 gcc_unreachable ();
36164 /* These are always implementable using standard shuffle patterns. */
36165 gcc_unreachable ();
36169 /* These can be implemented via interleave. We save one insn by
36170 stopping once we have promoted to V4SImode and then use pshufd. */
36173 optab otab
= vec_interleave_low_optab
;
36177 otab
= vec_interleave_high_optab
;
36182 op0
= expand_binop (vmode
, otab
, op0
, op0
, NULL
, 0, OPTAB_DIRECT
);
36183 vmode
= get_mode_wider_vector (vmode
);
36184 op0
= gen_lowpart (vmode
, op0
);
36186 while (vmode
!= V4SImode
);
36188 memset (perm2
, elt
, 4);
36189 ok
= expand_vselect (gen_lowpart (V4SImode
, d
->target
), op0
, perm2
, 4);
36197 /* For AVX2 broadcasts of the first element vpbroadcast* or
36198 vpermq should be used by expand_vec_perm_1. */
36199 gcc_assert (!TARGET_AVX2
|| d
->perm
[0]);
36203 gcc_unreachable ();
36207 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
36208 broadcast permutations. */
36211 expand_vec_perm_broadcast (struct expand_vec_perm_d
*d
)
36213 unsigned i
, elt
, nelt
= d
->nelt
;
36215 if (d
->op0
!= d
->op1
)
36219 for (i
= 1; i
< nelt
; ++i
)
36220 if (d
->perm
[i
] != elt
)
36223 return expand_vec_perm_broadcast_1 (d
);
36226 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
36227 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
36228 all the shorter instruction sequences. */
36231 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d
*d
)
36233 rtx rperm
[4][32], vperm
, l
[2], h
[2], op
, m128
;
36234 unsigned int i
, nelt
, eltsz
;
36238 || d
->op0
== d
->op1
36239 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
36246 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
36248 /* Generate 4 permutation masks. If the required element is within
36249 the same lane, it is shuffled in. If the required element from the
36250 other lane, force a zero by setting bit 7 in the permutation mask.
36251 In the other mask the mask has non-negative elements if element
36252 is requested from the other lane, but also moved to the other lane,
36253 so that the result of vpshufb can have the two V2TImode halves
36255 m128
= GEN_INT (-128);
36256 for (i
= 0; i
< 32; ++i
)
36258 rperm
[0][i
] = m128
;
36259 rperm
[1][i
] = m128
;
36260 rperm
[2][i
] = m128
;
36261 rperm
[3][i
] = m128
;
36267 for (i
= 0; i
< nelt
; ++i
)
36269 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
36270 unsigned xlane
= ((d
->perm
[i
] ^ i
) & (nelt
/ 2)) * eltsz
;
36271 unsigned int which
= ((d
->perm
[i
] & nelt
) ? 2 : 0) + (xlane
? 1 : 0);
36273 for (j
= 0; j
< eltsz
; ++j
)
36274 rperm
[which
][(i
* eltsz
+ j
) ^ xlane
] = GEN_INT (e
* eltsz
+ j
);
36275 used
[which
] = true;
36278 for (i
= 0; i
< 2; ++i
)
36280 if (!used
[2 * i
+ 1])
36285 vperm
= gen_rtx_CONST_VECTOR (V32QImode
,
36286 gen_rtvec_v (32, rperm
[2 * i
+ 1]));
36287 vperm
= force_reg (V32QImode
, vperm
);
36288 h
[i
] = gen_reg_rtx (V32QImode
);
36289 op
= gen_lowpart (V32QImode
, i
? d
->op1
: d
->op0
);
36290 emit_insn (gen_avx2_pshufbv32qi3 (h
[i
], op
, vperm
));
36293 /* Swap the 128-byte lanes of h[X]. */
36294 for (i
= 0; i
< 2; ++i
)
36296 if (h
[i
] == NULL_RTX
)
36298 op
= gen_reg_rtx (V4DImode
);
36299 emit_insn (gen_avx2_permv4di_1 (op
, gen_lowpart (V4DImode
, h
[i
]),
36300 const2_rtx
, GEN_INT (3), const0_rtx
,
36302 h
[i
] = gen_lowpart (V32QImode
, op
);
36305 for (i
= 0; i
< 2; ++i
)
36312 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[2 * i
]));
36313 vperm
= force_reg (V32QImode
, vperm
);
36314 l
[i
] = gen_reg_rtx (V32QImode
);
36315 op
= gen_lowpart (V32QImode
, i
? d
->op1
: d
->op0
);
36316 emit_insn (gen_avx2_pshufbv32qi3 (l
[i
], op
, vperm
));
36319 for (i
= 0; i
< 2; ++i
)
36323 op
= gen_reg_rtx (V32QImode
);
36324 emit_insn (gen_iorv32qi3 (op
, l
[i
], h
[i
]));
36331 gcc_assert (l
[0] && l
[1]);
36332 op
= gen_lowpart (V32QImode
, d
->target
);
36333 emit_insn (gen_iorv32qi3 (op
, l
[0], l
[1]));
36337 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
36338 With all of the interface bits taken care of, perform the expansion
36339 in D and return true on success. */
36342 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
36344 /* Try a single instruction expansion. */
36345 if (expand_vec_perm_1 (d
))
36348 /* Try sequences of two instructions. */
36350 if (expand_vec_perm_pshuflw_pshufhw (d
))
36353 if (expand_vec_perm_palignr (d
))
36356 if (expand_vec_perm_interleave2 (d
))
36359 if (expand_vec_perm_broadcast (d
))
36362 if (expand_vec_perm_vpermq_perm_1 (d
))
36365 /* Try sequences of three instructions. */
36367 if (expand_vec_perm_pshufb2 (d
))
36370 if (expand_vec_perm_interleave3 (d
))
36373 /* Try sequences of four instructions. */
36375 if (expand_vec_perm_vpshufb2_vpermq (d
))
36378 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d
))
36381 /* ??? Look for narrow permutations whose element orderings would
36382 allow the promotion to a wider mode. */
36384 /* ??? Look for sequences of interleave or a wider permute that place
36385 the data into the correct lanes for a half-vector shuffle like
36386 pshuf[lh]w or vpermilps. */
36388 /* ??? Look for sequences of interleave that produce the desired results.
36389 The combinatorics of punpck[lh] get pretty ugly... */
36391 if (expand_vec_perm_even_odd (d
))
36394 /* Even longer sequences. */
36395 if (expand_vec_perm_vpshufb4_vpermq2 (d
))
36402 ix86_expand_vec_perm_const (rtx operands
[4])
36404 struct expand_vec_perm_d d
;
36405 unsigned char perm
[MAX_VECT_LEN
];
36406 int i
, nelt
, which
;
36409 d
.target
= operands
[0];
36410 d
.op0
= operands
[1];
36411 d
.op1
= operands
[2];
36414 d
.vmode
= GET_MODE (d
.target
);
36415 gcc_assert (VECTOR_MODE_P (d
.vmode
));
36416 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
36417 d
.testing_p
= false;
36419 gcc_assert (GET_CODE (sel
) == CONST_VECTOR
);
36420 gcc_assert (XVECLEN (sel
, 0) == nelt
);
36421 gcc_checking_assert (sizeof (d
.perm
) == sizeof (perm
));
36423 for (i
= which
= 0; i
< nelt
; ++i
)
36425 rtx e
= XVECEXP (sel
, 0, i
);
36426 int ei
= INTVAL (e
) & (2 * nelt
- 1);
36428 which
|= (ei
< nelt
? 1 : 2);
36439 if (!rtx_equal_p (d
.op0
, d
.op1
))
36442 /* The elements of PERM do not suggest that only the first operand
36443 is used, but both operands are identical. Allow easier matching
36444 of the permutation by folding the permutation into the single
36446 for (i
= 0; i
< nelt
; ++i
)
36447 if (d
.perm
[i
] >= nelt
)
36456 for (i
= 0; i
< nelt
; ++i
)
36462 if (ix86_expand_vec_perm_const_1 (&d
))
36465 /* If the mask says both arguments are needed, but they are the same,
36466 the above tried to expand with d.op0 == d.op1. If that didn't work,
36467 retry with d.op0 != d.op1 as that is what testing has been done with. */
36468 if (which
== 3 && d
.op0
== d
.op1
)
36473 memcpy (d
.perm
, perm
, sizeof (perm
));
36474 d
.op1
= gen_reg_rtx (d
.vmode
);
36476 ok
= ix86_expand_vec_perm_const_1 (&d
);
36477 seq
= get_insns ();
36481 emit_move_insn (d
.op1
, d
.op0
);
36490 /* Implement targetm.vectorize.vec_perm_const_ok. */
36493 ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode
,
36494 const unsigned char *sel
)
36496 struct expand_vec_perm_d d
;
36497 unsigned int i
, nelt
, which
;
36501 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
36502 d
.testing_p
= true;
36504 /* Given sufficient ISA support we can just return true here
36505 for selected vector modes. */
36506 if (GET_MODE_SIZE (d
.vmode
) == 16)
36508 /* All implementable with a single vpperm insn. */
36511 /* All implementable with 2 pshufb + 1 ior. */
36514 /* All implementable with shufpd or unpck[lh]pd. */
36519 /* Extract the values from the vector CST into the permutation
36521 memcpy (d
.perm
, sel
, nelt
);
36522 for (i
= which
= 0; i
< nelt
; ++i
)
36524 unsigned char e
= d
.perm
[i
];
36525 gcc_assert (e
< 2 * nelt
);
36526 which
|= (e
< nelt
? 1 : 2);
36529 /* For all elements from second vector, fold the elements to first. */
36531 for (i
= 0; i
< nelt
; ++i
)
36534 /* Check whether the mask can be applied to the vector type. */
36535 one_vec
= (which
!= 3);
36537 /* Implementable with shufps or pshufd. */
36538 if (one_vec
&& (d
.vmode
== V4SFmode
|| d
.vmode
== V4SImode
))
36541 /* Otherwise we have to go through the motions and see if we can
36542 figure out how to generate the requested permutation. */
36543 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
36544 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
36546 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
36549 ret
= ix86_expand_vec_perm_const_1 (&d
);
36556 ix86_expand_vec_extract_even_odd (rtx targ
, rtx op0
, rtx op1
, unsigned odd
)
36558 struct expand_vec_perm_d d
;
36564 d
.vmode
= GET_MODE (targ
);
36565 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
36566 d
.testing_p
= false;
36568 for (i
= 0; i
< nelt
; ++i
)
36569 d
.perm
[i
] = i
* 2 + odd
;
36571 /* We'll either be able to implement the permutation directly... */
36572 if (expand_vec_perm_1 (&d
))
36575 /* ... or we use the special-case patterns. */
36576 expand_vec_perm_even_odd_1 (&d
, odd
);
36579 /* Expand an insert into a vector register through pinsr insn.
36580 Return true if successful. */
36583 ix86_expand_pinsr (rtx
*operands
)
36585 rtx dst
= operands
[0];
36586 rtx src
= operands
[3];
36588 unsigned int size
= INTVAL (operands
[1]);
36589 unsigned int pos
= INTVAL (operands
[2]);
36591 if (GET_CODE (dst
) == SUBREG
)
36593 pos
+= SUBREG_BYTE (dst
) * BITS_PER_UNIT
;
36594 dst
= SUBREG_REG (dst
);
36597 if (GET_CODE (src
) == SUBREG
)
36598 src
= SUBREG_REG (src
);
36600 switch (GET_MODE (dst
))
36607 enum machine_mode srcmode
, dstmode
;
36608 rtx (*pinsr
)(rtx
, rtx
, rtx
, rtx
);
36610 srcmode
= mode_for_size (size
, MODE_INT
, 0);
36615 if (!TARGET_SSE4_1
)
36617 dstmode
= V16QImode
;
36618 pinsr
= gen_sse4_1_pinsrb
;
36624 dstmode
= V8HImode
;
36625 pinsr
= gen_sse2_pinsrw
;
36629 if (!TARGET_SSE4_1
)
36631 dstmode
= V4SImode
;
36632 pinsr
= gen_sse4_1_pinsrd
;
36636 gcc_assert (TARGET_64BIT
);
36637 if (!TARGET_SSE4_1
)
36639 dstmode
= V2DImode
;
36640 pinsr
= gen_sse4_1_pinsrq
;
36647 dst
= gen_lowpart (dstmode
, dst
);
36648 src
= gen_lowpart (srcmode
, src
);
36652 emit_insn (pinsr (dst
, dst
, src
, GEN_INT (1 << pos
)));
36661 /* This function returns the calling abi specific va_list type node.
36662 It returns the FNDECL specific va_list type. */
36665 ix86_fn_abi_va_list (tree fndecl
)
36668 return va_list_type_node
;
36669 gcc_assert (fndecl
!= NULL_TREE
);
36671 if (ix86_function_abi ((const_tree
) fndecl
) == MS_ABI
)
36672 return ms_va_list_type_node
;
36674 return sysv_va_list_type_node
;
36677 /* Returns the canonical va_list type specified by TYPE. If there
36678 is no valid TYPE provided, it return NULL_TREE. */
36681 ix86_canonical_va_list_type (tree type
)
36685 /* Resolve references and pointers to va_list type. */
36686 if (TREE_CODE (type
) == MEM_REF
)
36687 type
= TREE_TYPE (type
);
36688 else if (POINTER_TYPE_P (type
) && POINTER_TYPE_P (TREE_TYPE(type
)))
36689 type
= TREE_TYPE (type
);
36690 else if (POINTER_TYPE_P (type
) && TREE_CODE (TREE_TYPE (type
)) == ARRAY_TYPE
)
36691 type
= TREE_TYPE (type
);
36693 if (TARGET_64BIT
&& va_list_type_node
!= NULL_TREE
)
36695 wtype
= va_list_type_node
;
36696 gcc_assert (wtype
!= NULL_TREE
);
36698 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
36700 /* If va_list is an array type, the argument may have decayed
36701 to a pointer type, e.g. by being passed to another function.
36702 In that case, unwrap both types so that we can compare the
36703 underlying records. */
36704 if (TREE_CODE (htype
) == ARRAY_TYPE
36705 || POINTER_TYPE_P (htype
))
36707 wtype
= TREE_TYPE (wtype
);
36708 htype
= TREE_TYPE (htype
);
36711 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
36712 return va_list_type_node
;
36713 wtype
= sysv_va_list_type_node
;
36714 gcc_assert (wtype
!= NULL_TREE
);
36716 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
36718 /* If va_list is an array type, the argument may have decayed
36719 to a pointer type, e.g. by being passed to another function.
36720 In that case, unwrap both types so that we can compare the
36721 underlying records. */
36722 if (TREE_CODE (htype
) == ARRAY_TYPE
36723 || POINTER_TYPE_P (htype
))
36725 wtype
= TREE_TYPE (wtype
);
36726 htype
= TREE_TYPE (htype
);
36729 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
36730 return sysv_va_list_type_node
;
36731 wtype
= ms_va_list_type_node
;
36732 gcc_assert (wtype
!= NULL_TREE
);
36734 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
36736 /* If va_list is an array type, the argument may have decayed
36737 to a pointer type, e.g. by being passed to another function.
36738 In that case, unwrap both types so that we can compare the
36739 underlying records. */
36740 if (TREE_CODE (htype
) == ARRAY_TYPE
36741 || POINTER_TYPE_P (htype
))
36743 wtype
= TREE_TYPE (wtype
);
36744 htype
= TREE_TYPE (htype
);
36747 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
36748 return ms_va_list_type_node
;
36751 return std_canonical_va_list_type (type
);
36754 /* Iterate through the target-specific builtin types for va_list.
36755 IDX denotes the iterator, *PTREE is set to the result type of
36756 the va_list builtin, and *PNAME to its internal type.
36757 Returns zero if there is no element for this index, otherwise
36758 IDX should be increased upon the next call.
36759 Note, do not iterate a base builtin's name like __builtin_va_list.
36760 Used from c_common_nodes_and_builtins. */
36763 ix86_enum_va_list (int idx
, const char **pname
, tree
*ptree
)
36773 *ptree
= ms_va_list_type_node
;
36774 *pname
= "__builtin_ms_va_list";
36778 *ptree
= sysv_va_list_type_node
;
36779 *pname
= "__builtin_sysv_va_list";
36787 #undef TARGET_SCHED_DISPATCH
36788 #define TARGET_SCHED_DISPATCH has_dispatch
36789 #undef TARGET_SCHED_DISPATCH_DO
36790 #define TARGET_SCHED_DISPATCH_DO do_dispatch
36791 #undef TARGET_SCHED_REASSOCIATION_WIDTH
36792 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
36794 /* The size of the dispatch window is the total number of bytes of
36795 object code allowed in a window. */
36796 #define DISPATCH_WINDOW_SIZE 16
36798 /* Number of dispatch windows considered for scheduling. */
36799 #define MAX_DISPATCH_WINDOWS 3
36801 /* Maximum number of instructions in a window. */
36804 /* Maximum number of immediate operands in a window. */
36807 /* Maximum number of immediate bits allowed in a window. */
36808 #define MAX_IMM_SIZE 128
36810 /* Maximum number of 32 bit immediates allowed in a window. */
36811 #define MAX_IMM_32 4
36813 /* Maximum number of 64 bit immediates allowed in a window. */
36814 #define MAX_IMM_64 2
36816 /* Maximum total of loads or prefetches allowed in a window. */
36819 /* Maximum total of stores allowed in a window. */
36820 #define MAX_STORE 1
36826 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
36827 enum dispatch_group
{
36842 /* Number of allowable groups in a dispatch window. It is an array
36843 indexed by dispatch_group enum. 100 is used as a big number,
36844 because the number of these kind of operations does not have any
36845 effect in dispatch window, but we need them for other reasons in
36847 static unsigned int num_allowable_groups
[disp_last
] = {
36848 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG
, BIG
36851 char group_name
[disp_last
+ 1][16] = {
36852 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
36853 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
36854 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
36857 /* Instruction path. */
36860 path_single
, /* Single micro op. */
36861 path_double
, /* Double micro op. */
36862 path_multi
, /* Instructions with more than 2 micro op.. */
36866 /* sched_insn_info defines a window to the instructions scheduled in
36867 the basic block. It contains a pointer to the insn_info table and
36868 the instruction scheduled.
36870 Windows are allocated for each basic block and are linked
36872 typedef struct sched_insn_info_s
{
36874 enum dispatch_group group
;
36875 enum insn_path path
;
36880 /* Linked list of dispatch windows. This is a two way list of
36881 dispatch windows of a basic block. It contains information about
36882 the number of uops in the window and the total number of
36883 instructions and of bytes in the object code for this dispatch
36885 typedef struct dispatch_windows_s
{
36886 int num_insn
; /* Number of insn in the window. */
36887 int num_uops
; /* Number of uops in the window. */
36888 int window_size
; /* Number of bytes in the window. */
36889 int window_num
; /* Window number between 0 or 1. */
36890 int num_imm
; /* Number of immediates in an insn. */
36891 int num_imm_32
; /* Number of 32 bit immediates in an insn. */
36892 int num_imm_64
; /* Number of 64 bit immediates in an insn. */
36893 int imm_size
; /* Total immediates in the window. */
36894 int num_loads
; /* Total memory loads in the window. */
36895 int num_stores
; /* Total memory stores in the window. */
36896 int violation
; /* Violation exists in window. */
36897 sched_insn_info
*window
; /* Pointer to the window. */
36898 struct dispatch_windows_s
*next
;
36899 struct dispatch_windows_s
*prev
;
36900 } dispatch_windows
;
36902 /* Immediate valuse used in an insn. */
36903 typedef struct imm_info_s
36910 static dispatch_windows
*dispatch_window_list
;
36911 static dispatch_windows
*dispatch_window_list1
;
36913 /* Get dispatch group of insn. */
36915 static enum dispatch_group
36916 get_mem_group (rtx insn
)
36918 enum attr_memory memory
;
36920 if (INSN_CODE (insn
) < 0)
36921 return disp_no_group
;
36922 memory
= get_attr_memory (insn
);
36923 if (memory
== MEMORY_STORE
)
36926 if (memory
== MEMORY_LOAD
)
36929 if (memory
== MEMORY_BOTH
)
36930 return disp_load_store
;
36932 return disp_no_group
;
36935 /* Return true if insn is a compare instruction. */
36940 enum attr_type type
;
36942 type
= get_attr_type (insn
);
36943 return (type
== TYPE_TEST
36944 || type
== TYPE_ICMP
36945 || type
== TYPE_FCMP
36946 || GET_CODE (PATTERN (insn
)) == COMPARE
);
36949 /* Return true if a dispatch violation encountered. */
36952 dispatch_violation (void)
36954 if (dispatch_window_list
->next
)
36955 return dispatch_window_list
->next
->violation
;
36956 return dispatch_window_list
->violation
;
36959 /* Return true if insn is a branch instruction. */
36962 is_branch (rtx insn
)
36964 return (CALL_P (insn
) || JUMP_P (insn
));
36967 /* Return true if insn is a prefetch instruction. */
36970 is_prefetch (rtx insn
)
36972 return NONJUMP_INSN_P (insn
) && GET_CODE (PATTERN (insn
)) == PREFETCH
;
36975 /* This function initializes a dispatch window and the list container holding a
36976 pointer to the window. */
36979 init_window (int window_num
)
36982 dispatch_windows
*new_list
;
36984 if (window_num
== 0)
36985 new_list
= dispatch_window_list
;
36987 new_list
= dispatch_window_list1
;
36989 new_list
->num_insn
= 0;
36990 new_list
->num_uops
= 0;
36991 new_list
->window_size
= 0;
36992 new_list
->next
= NULL
;
36993 new_list
->prev
= NULL
;
36994 new_list
->window_num
= window_num
;
36995 new_list
->num_imm
= 0;
36996 new_list
->num_imm_32
= 0;
36997 new_list
->num_imm_64
= 0;
36998 new_list
->imm_size
= 0;
36999 new_list
->num_loads
= 0;
37000 new_list
->num_stores
= 0;
37001 new_list
->violation
= false;
37003 for (i
= 0; i
< MAX_INSN
; i
++)
37005 new_list
->window
[i
].insn
= NULL
;
37006 new_list
->window
[i
].group
= disp_no_group
;
37007 new_list
->window
[i
].path
= no_path
;
37008 new_list
->window
[i
].byte_len
= 0;
37009 new_list
->window
[i
].imm_bytes
= 0;
37014 /* This function allocates and initializes a dispatch window and the
37015 list container holding a pointer to the window. */
37017 static dispatch_windows
*
37018 allocate_window (void)
37020 dispatch_windows
*new_list
= XNEW (struct dispatch_windows_s
);
37021 new_list
->window
= XNEWVEC (struct sched_insn_info_s
, MAX_INSN
+ 1);
37026 /* This routine initializes the dispatch scheduling information. It
37027 initiates building dispatch scheduler tables and constructs the
37028 first dispatch window. */
37031 init_dispatch_sched (void)
37033 /* Allocate a dispatch list and a window. */
37034 dispatch_window_list
= allocate_window ();
37035 dispatch_window_list1
= allocate_window ();
37040 /* This function returns true if a branch is detected. End of a basic block
37041 does not have to be a branch, but here we assume only branches end a
37045 is_end_basic_block (enum dispatch_group group
)
37047 return group
== disp_branch
;
37050 /* This function is called when the end of a window processing is reached. */
37053 process_end_window (void)
37055 gcc_assert (dispatch_window_list
->num_insn
<= MAX_INSN
);
37056 if (dispatch_window_list
->next
)
37058 gcc_assert (dispatch_window_list1
->num_insn
<= MAX_INSN
);
37059 gcc_assert (dispatch_window_list
->window_size
37060 + dispatch_window_list1
->window_size
<= 48);
37066 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
37067 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
37068 for 48 bytes of instructions. Note that these windows are not dispatch
37069 windows that their sizes are DISPATCH_WINDOW_SIZE. */
37071 static dispatch_windows
*
37072 allocate_next_window (int window_num
)
37074 if (window_num
== 0)
37076 if (dispatch_window_list
->next
)
37079 return dispatch_window_list
;
37082 dispatch_window_list
->next
= dispatch_window_list1
;
37083 dispatch_window_list1
->prev
= dispatch_window_list
;
37085 return dispatch_window_list1
;
37088 /* Increment the number of immediate operands of an instruction. */
37091 find_constant_1 (rtx
*in_rtx
, imm_info
*imm_values
)
37096 switch ( GET_CODE (*in_rtx
))
37101 (imm_values
->imm
)++;
37102 if (x86_64_immediate_operand (*in_rtx
, SImode
))
37103 (imm_values
->imm32
)++;
37105 (imm_values
->imm64
)++;
37109 (imm_values
->imm
)++;
37110 (imm_values
->imm64
)++;
37114 if (LABEL_KIND (*in_rtx
) == LABEL_NORMAL
)
37116 (imm_values
->imm
)++;
37117 (imm_values
->imm32
)++;
37128 /* Compute number of immediate operands of an instruction. */
37131 find_constant (rtx in_rtx
, imm_info
*imm_values
)
37133 for_each_rtx (INSN_P (in_rtx
) ? &PATTERN (in_rtx
) : &in_rtx
,
37134 (rtx_function
) find_constant_1
, (void *) imm_values
);
37137 /* Return total size of immediate operands of an instruction along with number
37138 of corresponding immediate-operands. It initializes its parameters to zero
37139 befor calling FIND_CONSTANT.
37140 INSN is the input instruction. IMM is the total of immediates.
37141 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
37145 get_num_immediates (rtx insn
, int *imm
, int *imm32
, int *imm64
)
37147 imm_info imm_values
= {0, 0, 0};
37149 find_constant (insn
, &imm_values
);
37150 *imm
= imm_values
.imm
;
37151 *imm32
= imm_values
.imm32
;
37152 *imm64
= imm_values
.imm64
;
37153 return imm_values
.imm32
* 4 + imm_values
.imm64
* 8;
37156 /* This function indicates if an operand of an instruction is an
37160 has_immediate (rtx insn
)
37162 int num_imm_operand
;
37163 int num_imm32_operand
;
37164 int num_imm64_operand
;
37167 return get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
37168 &num_imm64_operand
);
37172 /* Return single or double path for instructions. */
37174 static enum insn_path
37175 get_insn_path (rtx insn
)
37177 enum attr_amdfam10_decode path
= get_attr_amdfam10_decode (insn
);
37179 if ((int)path
== 0)
37180 return path_single
;
37182 if ((int)path
== 1)
37183 return path_double
;
37188 /* Return insn dispatch group. */
37190 static enum dispatch_group
37191 get_insn_group (rtx insn
)
37193 enum dispatch_group group
= get_mem_group (insn
);
37197 if (is_branch (insn
))
37198 return disp_branch
;
37203 if (has_immediate (insn
))
37206 if (is_prefetch (insn
))
37207 return disp_prefetch
;
37209 return disp_no_group
;
37212 /* Count number of GROUP restricted instructions in a dispatch
37213 window WINDOW_LIST. */
37216 count_num_restricted (rtx insn
, dispatch_windows
*window_list
)
37218 enum dispatch_group group
= get_insn_group (insn
);
37220 int num_imm_operand
;
37221 int num_imm32_operand
;
37222 int num_imm64_operand
;
37224 if (group
== disp_no_group
)
37227 if (group
== disp_imm
)
37229 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
37230 &num_imm64_operand
);
37231 if (window_list
->imm_size
+ imm_size
> MAX_IMM_SIZE
37232 || num_imm_operand
+ window_list
->num_imm
> MAX_IMM
37233 || (num_imm32_operand
> 0
37234 && (window_list
->num_imm_32
+ num_imm32_operand
> MAX_IMM_32
37235 || window_list
->num_imm_64
* 2 + num_imm32_operand
> MAX_IMM_32
))
37236 || (num_imm64_operand
> 0
37237 && (window_list
->num_imm_64
+ num_imm64_operand
> MAX_IMM_64
37238 || window_list
->num_imm_32
+ num_imm64_operand
* 2 > MAX_IMM_32
))
37239 || (window_list
->imm_size
+ imm_size
== MAX_IMM_SIZE
37240 && num_imm64_operand
> 0
37241 && ((window_list
->num_imm_64
> 0
37242 && window_list
->num_insn
>= 2)
37243 || window_list
->num_insn
>= 3)))
37249 if ((group
== disp_load_store
37250 && (window_list
->num_loads
>= MAX_LOAD
37251 || window_list
->num_stores
>= MAX_STORE
))
37252 || ((group
== disp_load
37253 || group
== disp_prefetch
)
37254 && window_list
->num_loads
>= MAX_LOAD
)
37255 || (group
== disp_store
37256 && window_list
->num_stores
>= MAX_STORE
))
37262 /* This function returns true if insn satisfies dispatch rules on the
37263 last window scheduled. */
37266 fits_dispatch_window (rtx insn
)
37268 dispatch_windows
*window_list
= dispatch_window_list
;
37269 dispatch_windows
*window_list_next
= dispatch_window_list
->next
;
37270 unsigned int num_restrict
;
37271 enum dispatch_group group
= get_insn_group (insn
);
37272 enum insn_path path
= get_insn_path (insn
);
37275 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
37276 instructions should be given the lowest priority in the
37277 scheduling process in Haifa scheduler to make sure they will be
37278 scheduled in the same dispatch window as the refrence to them. */
37279 if (group
== disp_jcc
|| group
== disp_cmp
)
37282 /* Check nonrestricted. */
37283 if (group
== disp_no_group
|| group
== disp_branch
)
37286 /* Get last dispatch window. */
37287 if (window_list_next
)
37288 window_list
= window_list_next
;
37290 if (window_list
->window_num
== 1)
37292 sum
= window_list
->prev
->window_size
+ window_list
->window_size
;
37295 || (min_insn_size (insn
) + sum
) >= 48)
37296 /* Window 1 is full. Go for next window. */
37300 num_restrict
= count_num_restricted (insn
, window_list
);
37302 if (num_restrict
> num_allowable_groups
[group
])
37305 /* See if it fits in the first window. */
37306 if (window_list
->window_num
== 0)
37308 /* The first widow should have only single and double path
37310 if (path
== path_double
37311 && (window_list
->num_uops
+ 2) > MAX_INSN
)
37313 else if (path
!= path_single
)
37319 /* Add an instruction INSN with NUM_UOPS micro-operations to the
37320 dispatch window WINDOW_LIST. */
37323 add_insn_window (rtx insn
, dispatch_windows
*window_list
, int num_uops
)
37325 int byte_len
= min_insn_size (insn
);
37326 int num_insn
= window_list
->num_insn
;
37328 sched_insn_info
*window
= window_list
->window
;
37329 enum dispatch_group group
= get_insn_group (insn
);
37330 enum insn_path path
= get_insn_path (insn
);
37331 int num_imm_operand
;
37332 int num_imm32_operand
;
37333 int num_imm64_operand
;
37335 if (!window_list
->violation
&& group
!= disp_cmp
37336 && !fits_dispatch_window (insn
))
37337 window_list
->violation
= true;
37339 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
37340 &num_imm64_operand
);
37342 /* Initialize window with new instruction. */
37343 window
[num_insn
].insn
= insn
;
37344 window
[num_insn
].byte_len
= byte_len
;
37345 window
[num_insn
].group
= group
;
37346 window
[num_insn
].path
= path
;
37347 window
[num_insn
].imm_bytes
= imm_size
;
37349 window_list
->window_size
+= byte_len
;
37350 window_list
->num_insn
= num_insn
+ 1;
37351 window_list
->num_uops
= window_list
->num_uops
+ num_uops
;
37352 window_list
->imm_size
+= imm_size
;
37353 window_list
->num_imm
+= num_imm_operand
;
37354 window_list
->num_imm_32
+= num_imm32_operand
;
37355 window_list
->num_imm_64
+= num_imm64_operand
;
37357 if (group
== disp_store
)
37358 window_list
->num_stores
+= 1;
37359 else if (group
== disp_load
37360 || group
== disp_prefetch
)
37361 window_list
->num_loads
+= 1;
37362 else if (group
== disp_load_store
)
37364 window_list
->num_stores
+= 1;
37365 window_list
->num_loads
+= 1;
37369 /* Adds a scheduled instruction, INSN, to the current dispatch window.
37370 If the total bytes of instructions or the number of instructions in
37371 the window exceed allowable, it allocates a new window. */
37374 add_to_dispatch_window (rtx insn
)
37377 dispatch_windows
*window_list
;
37378 dispatch_windows
*next_list
;
37379 dispatch_windows
*window0_list
;
37380 enum insn_path path
;
37381 enum dispatch_group insn_group
;
37389 if (INSN_CODE (insn
) < 0)
37392 byte_len
= min_insn_size (insn
);
37393 window_list
= dispatch_window_list
;
37394 next_list
= window_list
->next
;
37395 path
= get_insn_path (insn
);
37396 insn_group
= get_insn_group (insn
);
37398 /* Get the last dispatch window. */
37400 window_list
= dispatch_window_list
->next
;
37402 if (path
== path_single
)
37404 else if (path
== path_double
)
37407 insn_num_uops
= (int) path
;
37409 /* If current window is full, get a new window.
37410 Window number zero is full, if MAX_INSN uops are scheduled in it.
37411 Window number one is full, if window zero's bytes plus window
37412 one's bytes is 32, or if the bytes of the new instruction added
37413 to the total makes it greater than 48, or it has already MAX_INSN
37414 instructions in it. */
37415 num_insn
= window_list
->num_insn
;
37416 num_uops
= window_list
->num_uops
;
37417 window_num
= window_list
->window_num
;
37418 insn_fits
= fits_dispatch_window (insn
);
37420 if (num_insn
>= MAX_INSN
37421 || num_uops
+ insn_num_uops
> MAX_INSN
37424 window_num
= ~window_num
& 1;
37425 window_list
= allocate_next_window (window_num
);
37428 if (window_num
== 0)
37430 add_insn_window (insn
, window_list
, insn_num_uops
);
37431 if (window_list
->num_insn
>= MAX_INSN
37432 && insn_group
== disp_branch
)
37434 process_end_window ();
37438 else if (window_num
== 1)
37440 window0_list
= window_list
->prev
;
37441 sum
= window0_list
->window_size
+ window_list
->window_size
;
37443 || (byte_len
+ sum
) >= 48)
37445 process_end_window ();
37446 window_list
= dispatch_window_list
;
37449 add_insn_window (insn
, window_list
, insn_num_uops
);
37452 gcc_unreachable ();
37454 if (is_end_basic_block (insn_group
))
37456 /* End of basic block is reached do end-basic-block process. */
37457 process_end_window ();
37462 /* Print the dispatch window, WINDOW_NUM, to FILE. */
37464 DEBUG_FUNCTION
static void
37465 debug_dispatch_window_file (FILE *file
, int window_num
)
37467 dispatch_windows
*list
;
37470 if (window_num
== 0)
37471 list
= dispatch_window_list
;
37473 list
= dispatch_window_list1
;
37475 fprintf (file
, "Window #%d:\n", list
->window_num
);
37476 fprintf (file
, " num_insn = %d, num_uops = %d, window_size = %d\n",
37477 list
->num_insn
, list
->num_uops
, list
->window_size
);
37478 fprintf (file
, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
37479 list
->num_imm
, list
->num_imm_32
, list
->num_imm_64
, list
->imm_size
);
37481 fprintf (file
, " num_loads = %d, num_stores = %d\n", list
->num_loads
,
37483 fprintf (file
, " insn info:\n");
37485 for (i
= 0; i
< MAX_INSN
; i
++)
37487 if (!list
->window
[i
].insn
)
37489 fprintf (file
, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
37490 i
, group_name
[list
->window
[i
].group
],
37491 i
, (void *)list
->window
[i
].insn
,
37492 i
, list
->window
[i
].path
,
37493 i
, list
->window
[i
].byte_len
,
37494 i
, list
->window
[i
].imm_bytes
);
37498 /* Print to stdout a dispatch window. */
37500 DEBUG_FUNCTION
void
37501 debug_dispatch_window (int window_num
)
37503 debug_dispatch_window_file (stdout
, window_num
);
37506 /* Print INSN dispatch information to FILE. */
37508 DEBUG_FUNCTION
static void
37509 debug_insn_dispatch_info_file (FILE *file
, rtx insn
)
37512 enum insn_path path
;
37513 enum dispatch_group group
;
37515 int num_imm_operand
;
37516 int num_imm32_operand
;
37517 int num_imm64_operand
;
37519 if (INSN_CODE (insn
) < 0)
37522 byte_len
= min_insn_size (insn
);
37523 path
= get_insn_path (insn
);
37524 group
= get_insn_group (insn
);
37525 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
37526 &num_imm64_operand
);
37528 fprintf (file
, " insn info:\n");
37529 fprintf (file
, " group = %s, path = %d, byte_len = %d\n",
37530 group_name
[group
], path
, byte_len
);
37531 fprintf (file
, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
37532 num_imm_operand
, num_imm32_operand
, num_imm64_operand
, imm_size
);
37535 /* Print to STDERR the status of the ready list with respect to
37536 dispatch windows. */
37538 DEBUG_FUNCTION
void
37539 debug_ready_dispatch (void)
37542 int no_ready
= number_in_ready ();
37544 fprintf (stdout
, "Number of ready: %d\n", no_ready
);
37546 for (i
= 0; i
< no_ready
; i
++)
37547 debug_insn_dispatch_info_file (stdout
, get_ready_element (i
));
37550 /* This routine is the driver of the dispatch scheduler. */
37553 do_dispatch (rtx insn
, int mode
)
37555 if (mode
== DISPATCH_INIT
)
37556 init_dispatch_sched ();
37557 else if (mode
== ADD_TO_DISPATCH_WINDOW
)
37558 add_to_dispatch_window (insn
);
37561 /* Return TRUE if Dispatch Scheduling is supported. */
37564 has_dispatch (rtx insn
, int action
)
37566 if ((ix86_tune
== PROCESSOR_BDVER1
|| ix86_tune
== PROCESSOR_BDVER2
)
37567 && flag_dispatch_scheduler
)
37573 case IS_DISPATCH_ON
:
37578 return is_cmp (insn
);
37580 case DISPATCH_VIOLATION
:
37581 return dispatch_violation ();
37583 case FITS_DISPATCH_WINDOW
:
37584 return fits_dispatch_window (insn
);
37590 /* Implementation of reassociation_width target hook used by
37591 reassoc phase to identify parallelism level in reassociated
37592 tree. Statements tree_code is passed in OPC. Arguments type
37595 Currently parallel reassociation is enabled for Atom
37596 processors only and we set reassociation width to be 2
37597 because Atom may issue up to 2 instructions per cycle.
37599 Return value should be fixed if parallel reassociation is
37600 enabled for other processors. */
37603 ix86_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED
,
37604 enum machine_mode mode
)
37608 if (INTEGRAL_MODE_P (mode
) && TARGET_REASSOC_INT_TO_PARALLEL
)
37610 else if (FLOAT_MODE_P (mode
) && TARGET_REASSOC_FP_TO_PARALLEL
)
37616 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
37617 place emms and femms instructions. */
37619 static enum machine_mode
37620 ix86_preferred_simd_mode (enum machine_mode mode
)
37628 return TARGET_AVX2
? V32QImode
: V16QImode
;
37630 return TARGET_AVX2
? V16HImode
: V8HImode
;
37632 return TARGET_AVX2
? V8SImode
: V4SImode
;
37634 return TARGET_AVX2
? V4DImode
: V2DImode
;
37637 if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
37643 if (!TARGET_VECTORIZE_DOUBLE
)
37645 else if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
37647 else if (TARGET_SSE2
)
37656 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
37659 static unsigned int
37660 ix86_autovectorize_vector_sizes (void)
37662 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? 32 | 16 : 0;
37665 /* Initialize the GCC target structure. */
37666 #undef TARGET_RETURN_IN_MEMORY
37667 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
37669 #undef TARGET_LEGITIMIZE_ADDRESS
37670 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
37672 #undef TARGET_ATTRIBUTE_TABLE
37673 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
37674 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
37675 # undef TARGET_MERGE_DECL_ATTRIBUTES
37676 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
37679 #undef TARGET_COMP_TYPE_ATTRIBUTES
37680 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
37682 #undef TARGET_INIT_BUILTINS
37683 #define TARGET_INIT_BUILTINS ix86_init_builtins
37684 #undef TARGET_BUILTIN_DECL
37685 #define TARGET_BUILTIN_DECL ix86_builtin_decl
37686 #undef TARGET_EXPAND_BUILTIN
37687 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
37689 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
37690 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
37691 ix86_builtin_vectorized_function
37693 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
37694 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
37696 #undef TARGET_BUILTIN_RECIPROCAL
37697 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
37699 #undef TARGET_ASM_FUNCTION_EPILOGUE
37700 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
37702 #undef TARGET_ENCODE_SECTION_INFO
37703 #ifndef SUBTARGET_ENCODE_SECTION_INFO
37704 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
37706 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
37709 #undef TARGET_ASM_OPEN_PAREN
37710 #define TARGET_ASM_OPEN_PAREN ""
37711 #undef TARGET_ASM_CLOSE_PAREN
37712 #define TARGET_ASM_CLOSE_PAREN ""
37714 #undef TARGET_ASM_BYTE_OP
37715 #define TARGET_ASM_BYTE_OP ASM_BYTE
37717 #undef TARGET_ASM_ALIGNED_HI_OP
37718 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
37719 #undef TARGET_ASM_ALIGNED_SI_OP
37720 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
37722 #undef TARGET_ASM_ALIGNED_DI_OP
37723 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
37726 #undef TARGET_PROFILE_BEFORE_PROLOGUE
37727 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
37729 #undef TARGET_ASM_UNALIGNED_HI_OP
37730 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
37731 #undef TARGET_ASM_UNALIGNED_SI_OP
37732 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
37733 #undef TARGET_ASM_UNALIGNED_DI_OP
37734 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
37736 #undef TARGET_PRINT_OPERAND
37737 #define TARGET_PRINT_OPERAND ix86_print_operand
37738 #undef TARGET_PRINT_OPERAND_ADDRESS
37739 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
37740 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
37741 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
37742 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
37743 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
37745 #undef TARGET_SCHED_INIT_GLOBAL
37746 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
37747 #undef TARGET_SCHED_ADJUST_COST
37748 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
37749 #undef TARGET_SCHED_ISSUE_RATE
37750 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
37751 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
37752 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
37753 ia32_multipass_dfa_lookahead
37755 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
37756 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
37759 #undef TARGET_HAVE_TLS
37760 #define TARGET_HAVE_TLS true
37762 #undef TARGET_CANNOT_FORCE_CONST_MEM
37763 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
37764 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
37765 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
37767 #undef TARGET_DELEGITIMIZE_ADDRESS
37768 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
37770 #undef TARGET_MS_BITFIELD_LAYOUT_P
37771 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
37774 #undef TARGET_BINDS_LOCAL_P
37775 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
37777 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
37778 #undef TARGET_BINDS_LOCAL_P
37779 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
37782 #undef TARGET_ASM_OUTPUT_MI_THUNK
37783 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
37784 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
37785 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
37787 #undef TARGET_ASM_FILE_START
37788 #define TARGET_ASM_FILE_START x86_file_start
37790 #undef TARGET_OPTION_OVERRIDE
37791 #define TARGET_OPTION_OVERRIDE ix86_option_override
37793 #undef TARGET_REGISTER_MOVE_COST
37794 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
37795 #undef TARGET_MEMORY_MOVE_COST
37796 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
37797 #undef TARGET_RTX_COSTS
37798 #define TARGET_RTX_COSTS ix86_rtx_costs
37799 #undef TARGET_ADDRESS_COST
37800 #define TARGET_ADDRESS_COST ix86_address_cost
37802 #undef TARGET_FIXED_CONDITION_CODE_REGS
37803 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
37804 #undef TARGET_CC_MODES_COMPATIBLE
37805 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
37807 #undef TARGET_MACHINE_DEPENDENT_REORG
37808 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
37810 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
37811 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
37813 #undef TARGET_BUILD_BUILTIN_VA_LIST
37814 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
37816 #undef TARGET_ENUM_VA_LIST_P
37817 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
37819 #undef TARGET_FN_ABI_VA_LIST
37820 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
37822 #undef TARGET_CANONICAL_VA_LIST_TYPE
37823 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
37825 #undef TARGET_EXPAND_BUILTIN_VA_START
37826 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
37828 #undef TARGET_MD_ASM_CLOBBERS
37829 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
37831 #undef TARGET_PROMOTE_PROTOTYPES
37832 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
37833 #undef TARGET_STRUCT_VALUE_RTX
37834 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
37835 #undef TARGET_SETUP_INCOMING_VARARGS
37836 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
37837 #undef TARGET_MUST_PASS_IN_STACK
37838 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
37839 #undef TARGET_FUNCTION_ARG_ADVANCE
37840 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
37841 #undef TARGET_FUNCTION_ARG
37842 #define TARGET_FUNCTION_ARG ix86_function_arg
37843 #undef TARGET_FUNCTION_ARG_BOUNDARY
37844 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
37845 #undef TARGET_PASS_BY_REFERENCE
37846 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
37847 #undef TARGET_INTERNAL_ARG_POINTER
37848 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
37849 #undef TARGET_UPDATE_STACK_BOUNDARY
37850 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
37851 #undef TARGET_GET_DRAP_RTX
37852 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
37853 #undef TARGET_STRICT_ARGUMENT_NAMING
37854 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
37855 #undef TARGET_STATIC_CHAIN
37856 #define TARGET_STATIC_CHAIN ix86_static_chain
37857 #undef TARGET_TRAMPOLINE_INIT
37858 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
37859 #undef TARGET_RETURN_POPS_ARGS
37860 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
37862 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
37863 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
37865 #undef TARGET_SCALAR_MODE_SUPPORTED_P
37866 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
37868 #undef TARGET_VECTOR_MODE_SUPPORTED_P
37869 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
37871 #undef TARGET_C_MODE_FOR_SUFFIX
37872 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
37875 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
37876 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
37879 #ifdef SUBTARGET_INSERT_ATTRIBUTES
37880 #undef TARGET_INSERT_ATTRIBUTES
37881 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
37884 #undef TARGET_MANGLE_TYPE
37885 #define TARGET_MANGLE_TYPE ix86_mangle_type
37887 #ifndef TARGET_MACHO
37888 #undef TARGET_STACK_PROTECT_FAIL
37889 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
37892 #undef TARGET_FUNCTION_VALUE
37893 #define TARGET_FUNCTION_VALUE ix86_function_value
37895 #undef TARGET_FUNCTION_VALUE_REGNO_P
37896 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
37898 #undef TARGET_PROMOTE_FUNCTION_MODE
37899 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
37901 #undef TARGET_SECONDARY_RELOAD
37902 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
37904 #undef TARGET_CLASS_MAX_NREGS
37905 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
37907 #undef TARGET_PREFERRED_RELOAD_CLASS
37908 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
37909 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
37910 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
37911 #undef TARGET_CLASS_LIKELY_SPILLED_P
37912 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
37914 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
37915 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
37916 ix86_builtin_vectorization_cost
37917 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
37918 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
37919 ix86_vectorize_vec_perm_const_ok
37920 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
37921 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
37922 ix86_preferred_simd_mode
37923 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
37924 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
37925 ix86_autovectorize_vector_sizes
37927 #undef TARGET_SET_CURRENT_FUNCTION
37928 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
37930 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
37931 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
37933 #undef TARGET_OPTION_SAVE
37934 #define TARGET_OPTION_SAVE ix86_function_specific_save
37936 #undef TARGET_OPTION_RESTORE
37937 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
37939 #undef TARGET_OPTION_PRINT
37940 #define TARGET_OPTION_PRINT ix86_function_specific_print
37942 #undef TARGET_CAN_INLINE_P
37943 #define TARGET_CAN_INLINE_P ix86_can_inline_p
37945 #undef TARGET_EXPAND_TO_RTL_HOOK
37946 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
37948 #undef TARGET_LEGITIMATE_ADDRESS_P
37949 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
37951 #undef TARGET_LEGITIMATE_CONSTANT_P
37952 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
37954 #undef TARGET_FRAME_POINTER_REQUIRED
37955 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
37957 #undef TARGET_CAN_ELIMINATE
37958 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
37960 #undef TARGET_EXTRA_LIVE_ON_ENTRY
37961 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
37963 #undef TARGET_ASM_CODE_END
37964 #define TARGET_ASM_CODE_END ix86_code_end
37966 #undef TARGET_CONDITIONAL_REGISTER_USAGE
37967 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
37970 #undef TARGET_INIT_LIBFUNCS
37971 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
37974 struct gcc_target targetm
= TARGET_INITIALIZER
;
37976 #include "gt-i386.h"