1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
3 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
34 #include "insn-codes.h"
35 #include "insn-attr.h"
42 #include "diagnostic-core.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "common/common-target.h"
49 #include "langhooks.h"
54 #include "tm-constrs.h"
58 #include "sched-int.h"
62 #include "diagnostic.h"
64 enum upper_128bits_state
71 typedef struct block_info_def
73 /* State of the upper 128bits of AVX registers at exit. */
74 enum upper_128bits_state state
;
75 /* TRUE if state of the upper 128bits of AVX registers is unchanged
78 /* TRUE if block has been processed. */
80 /* TRUE if block has been scanned. */
82 /* Previous state of the upper 128bits of AVX registers at entry. */
83 enum upper_128bits_state prev
;
86 #define BLOCK_INFO(B) ((block_info) (B)->aux)
88 enum call_avx256_state
90 /* Callee returns 256bit AVX register. */
91 callee_return_avx256
= -1,
92 /* Callee returns and passes 256bit AVX register. */
93 callee_return_pass_avx256
,
94 /* Callee passes 256bit AVX register. */
96 /* Callee doesn't return nor passe 256bit AVX register, or no
97 256bit AVX register in function return. */
99 /* vzeroupper intrinsic. */
103 /* Check if a 256bit AVX register is referenced in stores. */
106 check_avx256_stores (rtx dest
, const_rtx set
, void *data
)
109 && VALID_AVX256_REG_MODE (GET_MODE (dest
)))
110 || (GET_CODE (set
) == SET
111 && REG_P (SET_SRC (set
))
112 && VALID_AVX256_REG_MODE (GET_MODE (SET_SRC (set
)))))
114 enum upper_128bits_state
*state
115 = (enum upper_128bits_state
*) data
;
120 /* Helper function for move_or_delete_vzeroupper_1. Look for vzeroupper
121 in basic block BB. Delete it if upper 128bit AVX registers are
122 unused. If it isn't deleted, move it to just before a jump insn.
124 STATE is state of the upper 128bits of AVX registers at entry. */
127 move_or_delete_vzeroupper_2 (basic_block bb
,
128 enum upper_128bits_state state
)
131 rtx vzeroupper_insn
= NULL_RTX
;
136 if (BLOCK_INFO (bb
)->unchanged
)
139 fprintf (dump_file
, " [bb %i] unchanged: upper 128bits: %d\n",
142 BLOCK_INFO (bb
)->state
= state
;
146 if (BLOCK_INFO (bb
)->scanned
&& BLOCK_INFO (bb
)->prev
== state
)
149 fprintf (dump_file
, " [bb %i] scanned: upper 128bits: %d\n",
150 bb
->index
, BLOCK_INFO (bb
)->state
);
154 BLOCK_INFO (bb
)->prev
= state
;
157 fprintf (dump_file
, " [bb %i] entry: upper 128bits: %d\n",
162 /* BB_END changes when it is deleted. */
163 bb_end
= BB_END (bb
);
165 while (insn
!= bb_end
)
167 insn
= NEXT_INSN (insn
);
169 if (!NONDEBUG_INSN_P (insn
))
172 /* Move vzeroupper before jump/call. */
173 if (JUMP_P (insn
) || CALL_P (insn
))
175 if (!vzeroupper_insn
)
178 if (PREV_INSN (insn
) != vzeroupper_insn
)
182 fprintf (dump_file
, "Move vzeroupper after:\n");
183 print_rtl_single (dump_file
, PREV_INSN (insn
));
184 fprintf (dump_file
, "before:\n");
185 print_rtl_single (dump_file
, insn
);
187 reorder_insns_nobb (vzeroupper_insn
, vzeroupper_insn
,
190 vzeroupper_insn
= NULL_RTX
;
194 pat
= PATTERN (insn
);
196 /* Check insn for vzeroupper intrinsic. */
197 if (GET_CODE (pat
) == UNSPEC_VOLATILE
198 && XINT (pat
, 1) == UNSPECV_VZEROUPPER
)
202 /* Found vzeroupper intrinsic. */
203 fprintf (dump_file
, "Found vzeroupper:\n");
204 print_rtl_single (dump_file
, insn
);
209 /* Check insn for vzeroall intrinsic. */
210 if (GET_CODE (pat
) == PARALLEL
211 && GET_CODE (XVECEXP (pat
, 0, 0)) == UNSPEC_VOLATILE
212 && XINT (XVECEXP (pat
, 0, 0), 1) == UNSPECV_VZEROALL
)
217 /* Delete pending vzeroupper insertion. */
220 delete_insn (vzeroupper_insn
);
221 vzeroupper_insn
= NULL_RTX
;
224 else if (state
!= used
)
226 note_stores (pat
, check_avx256_stores
, &state
);
233 /* Process vzeroupper intrinsic. */
234 avx256
= INTVAL (XVECEXP (pat
, 0, 0));
238 /* Since the upper 128bits are cleared, callee must not pass
239 256bit AVX register. We only need to check if callee
240 returns 256bit AVX register. */
241 if (avx256
== callee_return_avx256
)
247 /* Remove unnecessary vzeroupper since upper 128bits are
251 fprintf (dump_file
, "Delete redundant vzeroupper:\n");
252 print_rtl_single (dump_file
, insn
);
258 /* Set state to UNUSED if callee doesn't return 256bit AVX
260 if (avx256
!= callee_return_pass_avx256
)
263 if (avx256
== callee_return_pass_avx256
264 || avx256
== callee_pass_avx256
)
266 /* Must remove vzeroupper since callee passes in 256bit
270 fprintf (dump_file
, "Delete callee pass vzeroupper:\n");
271 print_rtl_single (dump_file
, insn
);
277 vzeroupper_insn
= insn
;
283 BLOCK_INFO (bb
)->state
= state
;
284 BLOCK_INFO (bb
)->unchanged
= unchanged
;
285 BLOCK_INFO (bb
)->scanned
= true;
288 fprintf (dump_file
, " [bb %i] exit: %s: upper 128bits: %d\n",
289 bb
->index
, unchanged
? "unchanged" : "changed",
293 /* Helper function for move_or_delete_vzeroupper. Process vzeroupper
294 in BLOCK and check its predecessor blocks. Treat UNKNOWN state
295 as USED if UNKNOWN_IS_UNUSED is true. Return TRUE if the exit
299 move_or_delete_vzeroupper_1 (basic_block block
, bool unknown_is_unused
)
303 enum upper_128bits_state state
, old_state
, new_state
;
307 fprintf (dump_file
, " Process [bb %i]: status: %d\n",
308 block
->index
, BLOCK_INFO (block
)->processed
);
310 if (BLOCK_INFO (block
)->processed
)
315 /* Check all predecessor edges of this block. */
316 seen_unknown
= false;
317 FOR_EACH_EDGE (e
, ei
, block
->preds
)
321 switch (BLOCK_INFO (e
->src
)->state
)
324 if (!unknown_is_unused
)
338 old_state
= BLOCK_INFO (block
)->state
;
339 move_or_delete_vzeroupper_2 (block
, state
);
340 new_state
= BLOCK_INFO (block
)->state
;
342 if (state
!= unknown
|| new_state
== used
)
343 BLOCK_INFO (block
)->processed
= true;
345 /* Need to rescan if the upper 128bits of AVX registers are changed
347 if (new_state
!= old_state
)
349 if (new_state
== used
)
350 cfun
->machine
->rescan_vzeroupper_p
= 1;
357 /* Go through the instruction stream looking for vzeroupper. Delete
358 it if upper 128bit AVX registers are unused. If it isn't deleted,
359 move it to just before a jump insn. */
362 move_or_delete_vzeroupper (void)
367 fibheap_t worklist
, pending
, fibheap_swap
;
368 sbitmap visited
, in_worklist
, in_pending
, sbitmap_swap
;
373 /* Set up block info for each basic block. */
374 alloc_aux_for_blocks (sizeof (struct block_info_def
));
376 /* Process outgoing edges of entry point. */
378 fprintf (dump_file
, "Process outgoing edges of entry point\n");
380 FOR_EACH_EDGE (e
, ei
, ENTRY_BLOCK_PTR
->succs
)
382 move_or_delete_vzeroupper_2 (e
->dest
,
383 cfun
->machine
->caller_pass_avx256_p
385 BLOCK_INFO (e
->dest
)->processed
= true;
388 /* Compute reverse completion order of depth first search of the CFG
389 so that the data-flow runs faster. */
390 rc_order
= XNEWVEC (int, n_basic_blocks
- NUM_FIXED_BLOCKS
);
391 bb_order
= XNEWVEC (int, last_basic_block
);
392 pre_and_rev_post_order_compute (NULL
, rc_order
, false);
393 for (i
= 0; i
< n_basic_blocks
- NUM_FIXED_BLOCKS
; i
++)
394 bb_order
[rc_order
[i
]] = i
;
397 worklist
= fibheap_new ();
398 pending
= fibheap_new ();
399 visited
= sbitmap_alloc (last_basic_block
);
400 in_worklist
= sbitmap_alloc (last_basic_block
);
401 in_pending
= sbitmap_alloc (last_basic_block
);
402 sbitmap_zero (in_worklist
);
404 /* Don't check outgoing edges of entry point. */
405 sbitmap_ones (in_pending
);
407 if (BLOCK_INFO (bb
)->processed
)
408 RESET_BIT (in_pending
, bb
->index
);
411 move_or_delete_vzeroupper_1 (bb
, false);
412 fibheap_insert (pending
, bb_order
[bb
->index
], bb
);
416 fprintf (dump_file
, "Check remaining basic blocks\n");
418 while (!fibheap_empty (pending
))
420 fibheap_swap
= pending
;
422 worklist
= fibheap_swap
;
423 sbitmap_swap
= in_pending
;
424 in_pending
= in_worklist
;
425 in_worklist
= sbitmap_swap
;
427 sbitmap_zero (visited
);
429 cfun
->machine
->rescan_vzeroupper_p
= 0;
431 while (!fibheap_empty (worklist
))
433 bb
= (basic_block
) fibheap_extract_min (worklist
);
434 RESET_BIT (in_worklist
, bb
->index
);
435 gcc_assert (!TEST_BIT (visited
, bb
->index
));
436 if (!TEST_BIT (visited
, bb
->index
))
440 SET_BIT (visited
, bb
->index
);
442 if (move_or_delete_vzeroupper_1 (bb
, false))
443 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
445 if (e
->dest
== EXIT_BLOCK_PTR
446 || BLOCK_INFO (e
->dest
)->processed
)
449 if (TEST_BIT (visited
, e
->dest
->index
))
451 if (!TEST_BIT (in_pending
, e
->dest
->index
))
453 /* Send E->DEST to next round. */
454 SET_BIT (in_pending
, e
->dest
->index
);
455 fibheap_insert (pending
,
456 bb_order
[e
->dest
->index
],
460 else if (!TEST_BIT (in_worklist
, e
->dest
->index
))
462 /* Add E->DEST to current round. */
463 SET_BIT (in_worklist
, e
->dest
->index
);
464 fibheap_insert (worklist
, bb_order
[e
->dest
->index
],
471 if (!cfun
->machine
->rescan_vzeroupper_p
)
476 fibheap_delete (worklist
);
477 fibheap_delete (pending
);
478 sbitmap_free (visited
);
479 sbitmap_free (in_worklist
);
480 sbitmap_free (in_pending
);
483 fprintf (dump_file
, "Process remaining basic blocks\n");
486 move_or_delete_vzeroupper_1 (bb
, true);
488 free_aux_for_blocks ();
491 static rtx
legitimize_dllimport_symbol (rtx
, bool);
493 #ifndef CHECK_STACK_LIMIT
494 #define CHECK_STACK_LIMIT (-1)
497 /* Return index of given mode in mult and division cost tables. */
498 #define MODE_INDEX(mode) \
499 ((mode) == QImode ? 0 \
500 : (mode) == HImode ? 1 \
501 : (mode) == SImode ? 2 \
502 : (mode) == DImode ? 3 \
505 /* Processor costs (relative to an add) */
506 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
507 #define COSTS_N_BYTES(N) ((N) * 2)
509 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
512 struct processor_costs ix86_size_cost
= {/* costs for tuning for size */
513 COSTS_N_BYTES (2), /* cost of an add instruction */
514 COSTS_N_BYTES (3), /* cost of a lea instruction */
515 COSTS_N_BYTES (2), /* variable shift costs */
516 COSTS_N_BYTES (3), /* constant shift costs */
517 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
518 COSTS_N_BYTES (3), /* HI */
519 COSTS_N_BYTES (3), /* SI */
520 COSTS_N_BYTES (3), /* DI */
521 COSTS_N_BYTES (5)}, /* other */
522 0, /* cost of multiply per each bit set */
523 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
524 COSTS_N_BYTES (3), /* HI */
525 COSTS_N_BYTES (3), /* SI */
526 COSTS_N_BYTES (3), /* DI */
527 COSTS_N_BYTES (5)}, /* other */
528 COSTS_N_BYTES (3), /* cost of movsx */
529 COSTS_N_BYTES (3), /* cost of movzx */
530 0, /* "large" insn */
532 2, /* cost for loading QImode using movzbl */
533 {2, 2, 2}, /* cost of loading integer registers
534 in QImode, HImode and SImode.
535 Relative to reg-reg move (2). */
536 {2, 2, 2}, /* cost of storing integer registers */
537 2, /* cost of reg,reg fld/fst */
538 {2, 2, 2}, /* cost of loading fp registers
539 in SFmode, DFmode and XFmode */
540 {2, 2, 2}, /* cost of storing fp registers
541 in SFmode, DFmode and XFmode */
542 3, /* cost of moving MMX register */
543 {3, 3}, /* cost of loading MMX registers
544 in SImode and DImode */
545 {3, 3}, /* cost of storing MMX registers
546 in SImode and DImode */
547 3, /* cost of moving SSE register */
548 {3, 3, 3}, /* cost of loading SSE registers
549 in SImode, DImode and TImode */
550 {3, 3, 3}, /* cost of storing SSE registers
551 in SImode, DImode and TImode */
552 3, /* MMX or SSE register to integer */
553 0, /* size of l1 cache */
554 0, /* size of l2 cache */
555 0, /* size of prefetch block */
556 0, /* number of parallel prefetches */
558 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
559 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
560 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
561 COSTS_N_BYTES (2), /* cost of FABS instruction. */
562 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
563 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
564 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
565 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}}},
566 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
567 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}}},
568 1, /* scalar_stmt_cost. */
569 1, /* scalar load_cost. */
570 1, /* scalar_store_cost. */
571 1, /* vec_stmt_cost. */
572 1, /* vec_to_scalar_cost. */
573 1, /* scalar_to_vec_cost. */
574 1, /* vec_align_load_cost. */
575 1, /* vec_unalign_load_cost. */
576 1, /* vec_store_cost. */
577 1, /* cond_taken_branch_cost. */
578 1, /* cond_not_taken_branch_cost. */
581 /* Processor costs (relative to an add) */
583 struct processor_costs i386_cost
= { /* 386 specific costs */
584 COSTS_N_INSNS (1), /* cost of an add instruction */
585 COSTS_N_INSNS (1), /* cost of a lea instruction */
586 COSTS_N_INSNS (3), /* variable shift costs */
587 COSTS_N_INSNS (2), /* constant shift costs */
588 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
589 COSTS_N_INSNS (6), /* HI */
590 COSTS_N_INSNS (6), /* SI */
591 COSTS_N_INSNS (6), /* DI */
592 COSTS_N_INSNS (6)}, /* other */
593 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
594 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
595 COSTS_N_INSNS (23), /* HI */
596 COSTS_N_INSNS (23), /* SI */
597 COSTS_N_INSNS (23), /* DI */
598 COSTS_N_INSNS (23)}, /* other */
599 COSTS_N_INSNS (3), /* cost of movsx */
600 COSTS_N_INSNS (2), /* cost of movzx */
601 15, /* "large" insn */
603 4, /* cost for loading QImode using movzbl */
604 {2, 4, 2}, /* cost of loading integer registers
605 in QImode, HImode and SImode.
606 Relative to reg-reg move (2). */
607 {2, 4, 2}, /* cost of storing integer registers */
608 2, /* cost of reg,reg fld/fst */
609 {8, 8, 8}, /* cost of loading fp registers
610 in SFmode, DFmode and XFmode */
611 {8, 8, 8}, /* cost of storing fp registers
612 in SFmode, DFmode and XFmode */
613 2, /* cost of moving MMX register */
614 {4, 8}, /* cost of loading MMX registers
615 in SImode and DImode */
616 {4, 8}, /* cost of storing MMX registers
617 in SImode and DImode */
618 2, /* cost of moving SSE register */
619 {4, 8, 16}, /* cost of loading SSE registers
620 in SImode, DImode and TImode */
621 {4, 8, 16}, /* cost of storing SSE registers
622 in SImode, DImode and TImode */
623 3, /* MMX or SSE register to integer */
624 0, /* size of l1 cache */
625 0, /* size of l2 cache */
626 0, /* size of prefetch block */
627 0, /* number of parallel prefetches */
629 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
630 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
631 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
632 COSTS_N_INSNS (22), /* cost of FABS instruction. */
633 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
634 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
635 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
636 DUMMY_STRINGOP_ALGS
},
637 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
638 DUMMY_STRINGOP_ALGS
},
639 1, /* scalar_stmt_cost. */
640 1, /* scalar load_cost. */
641 1, /* scalar_store_cost. */
642 1, /* vec_stmt_cost. */
643 1, /* vec_to_scalar_cost. */
644 1, /* scalar_to_vec_cost. */
645 1, /* vec_align_load_cost. */
646 2, /* vec_unalign_load_cost. */
647 1, /* vec_store_cost. */
648 3, /* cond_taken_branch_cost. */
649 1, /* cond_not_taken_branch_cost. */
653 struct processor_costs i486_cost
= { /* 486 specific costs */
654 COSTS_N_INSNS (1), /* cost of an add instruction */
655 COSTS_N_INSNS (1), /* cost of a lea instruction */
656 COSTS_N_INSNS (3), /* variable shift costs */
657 COSTS_N_INSNS (2), /* constant shift costs */
658 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
659 COSTS_N_INSNS (12), /* HI */
660 COSTS_N_INSNS (12), /* SI */
661 COSTS_N_INSNS (12), /* DI */
662 COSTS_N_INSNS (12)}, /* other */
663 1, /* cost of multiply per each bit set */
664 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
665 COSTS_N_INSNS (40), /* HI */
666 COSTS_N_INSNS (40), /* SI */
667 COSTS_N_INSNS (40), /* DI */
668 COSTS_N_INSNS (40)}, /* other */
669 COSTS_N_INSNS (3), /* cost of movsx */
670 COSTS_N_INSNS (2), /* cost of movzx */
671 15, /* "large" insn */
673 4, /* cost for loading QImode using movzbl */
674 {2, 4, 2}, /* cost of loading integer registers
675 in QImode, HImode and SImode.
676 Relative to reg-reg move (2). */
677 {2, 4, 2}, /* cost of storing integer registers */
678 2, /* cost of reg,reg fld/fst */
679 {8, 8, 8}, /* cost of loading fp registers
680 in SFmode, DFmode and XFmode */
681 {8, 8, 8}, /* cost of storing fp registers
682 in SFmode, DFmode and XFmode */
683 2, /* cost of moving MMX register */
684 {4, 8}, /* cost of loading MMX registers
685 in SImode and DImode */
686 {4, 8}, /* cost of storing MMX registers
687 in SImode and DImode */
688 2, /* cost of moving SSE register */
689 {4, 8, 16}, /* cost of loading SSE registers
690 in SImode, DImode and TImode */
691 {4, 8, 16}, /* cost of storing SSE registers
692 in SImode, DImode and TImode */
693 3, /* MMX or SSE register to integer */
694 4, /* size of l1 cache. 486 has 8kB cache
695 shared for code and data, so 4kB is
696 not really precise. */
697 4, /* size of l2 cache */
698 0, /* size of prefetch block */
699 0, /* number of parallel prefetches */
701 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
702 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
703 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
704 COSTS_N_INSNS (3), /* cost of FABS instruction. */
705 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
706 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
707 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
}}},
708 DUMMY_STRINGOP_ALGS
},
709 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
}}},
710 DUMMY_STRINGOP_ALGS
},
711 1, /* scalar_stmt_cost. */
712 1, /* scalar load_cost. */
713 1, /* scalar_store_cost. */
714 1, /* vec_stmt_cost. */
715 1, /* vec_to_scalar_cost. */
716 1, /* scalar_to_vec_cost. */
717 1, /* vec_align_load_cost. */
718 2, /* vec_unalign_load_cost. */
719 1, /* vec_store_cost. */
720 3, /* cond_taken_branch_cost. */
721 1, /* cond_not_taken_branch_cost. */
725 struct processor_costs pentium_cost
= {
726 COSTS_N_INSNS (1), /* cost of an add instruction */
727 COSTS_N_INSNS (1), /* cost of a lea instruction */
728 COSTS_N_INSNS (4), /* variable shift costs */
729 COSTS_N_INSNS (1), /* constant shift costs */
730 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
731 COSTS_N_INSNS (11), /* HI */
732 COSTS_N_INSNS (11), /* SI */
733 COSTS_N_INSNS (11), /* DI */
734 COSTS_N_INSNS (11)}, /* other */
735 0, /* cost of multiply per each bit set */
736 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
737 COSTS_N_INSNS (25), /* HI */
738 COSTS_N_INSNS (25), /* SI */
739 COSTS_N_INSNS (25), /* DI */
740 COSTS_N_INSNS (25)}, /* other */
741 COSTS_N_INSNS (3), /* cost of movsx */
742 COSTS_N_INSNS (2), /* cost of movzx */
743 8, /* "large" insn */
745 6, /* cost for loading QImode using movzbl */
746 {2, 4, 2}, /* cost of loading integer registers
747 in QImode, HImode and SImode.
748 Relative to reg-reg move (2). */
749 {2, 4, 2}, /* cost of storing integer registers */
750 2, /* cost of reg,reg fld/fst */
751 {2, 2, 6}, /* cost of loading fp registers
752 in SFmode, DFmode and XFmode */
753 {4, 4, 6}, /* cost of storing fp registers
754 in SFmode, DFmode and XFmode */
755 8, /* cost of moving MMX register */
756 {8, 8}, /* cost of loading MMX registers
757 in SImode and DImode */
758 {8, 8}, /* cost of storing MMX registers
759 in SImode and DImode */
760 2, /* cost of moving SSE register */
761 {4, 8, 16}, /* cost of loading SSE registers
762 in SImode, DImode and TImode */
763 {4, 8, 16}, /* cost of storing SSE registers
764 in SImode, DImode and TImode */
765 3, /* MMX or SSE register to integer */
766 8, /* size of l1 cache. */
767 8, /* size of l2 cache */
768 0, /* size of prefetch block */
769 0, /* number of parallel prefetches */
771 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
772 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
773 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
774 COSTS_N_INSNS (1), /* cost of FABS instruction. */
775 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
776 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
777 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
778 DUMMY_STRINGOP_ALGS
},
779 {{libcall
, {{-1, rep_prefix_4_byte
}}},
780 DUMMY_STRINGOP_ALGS
},
781 1, /* scalar_stmt_cost. */
782 1, /* scalar load_cost. */
783 1, /* scalar_store_cost. */
784 1, /* vec_stmt_cost. */
785 1, /* vec_to_scalar_cost. */
786 1, /* scalar_to_vec_cost. */
787 1, /* vec_align_load_cost. */
788 2, /* vec_unalign_load_cost. */
789 1, /* vec_store_cost. */
790 3, /* cond_taken_branch_cost. */
791 1, /* cond_not_taken_branch_cost. */
795 struct processor_costs pentiumpro_cost
= {
796 COSTS_N_INSNS (1), /* cost of an add instruction */
797 COSTS_N_INSNS (1), /* cost of a lea instruction */
798 COSTS_N_INSNS (1), /* variable shift costs */
799 COSTS_N_INSNS (1), /* constant shift costs */
800 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
801 COSTS_N_INSNS (4), /* HI */
802 COSTS_N_INSNS (4), /* SI */
803 COSTS_N_INSNS (4), /* DI */
804 COSTS_N_INSNS (4)}, /* other */
805 0, /* cost of multiply per each bit set */
806 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
807 COSTS_N_INSNS (17), /* HI */
808 COSTS_N_INSNS (17), /* SI */
809 COSTS_N_INSNS (17), /* DI */
810 COSTS_N_INSNS (17)}, /* other */
811 COSTS_N_INSNS (1), /* cost of movsx */
812 COSTS_N_INSNS (1), /* cost of movzx */
813 8, /* "large" insn */
815 2, /* cost for loading QImode using movzbl */
816 {4, 4, 4}, /* cost of loading integer registers
817 in QImode, HImode and SImode.
818 Relative to reg-reg move (2). */
819 {2, 2, 2}, /* cost of storing integer registers */
820 2, /* cost of reg,reg fld/fst */
821 {2, 2, 6}, /* cost of loading fp registers
822 in SFmode, DFmode and XFmode */
823 {4, 4, 6}, /* cost of storing fp registers
824 in SFmode, DFmode and XFmode */
825 2, /* cost of moving MMX register */
826 {2, 2}, /* cost of loading MMX registers
827 in SImode and DImode */
828 {2, 2}, /* cost of storing MMX registers
829 in SImode and DImode */
830 2, /* cost of moving SSE register */
831 {2, 2, 8}, /* cost of loading SSE registers
832 in SImode, DImode and TImode */
833 {2, 2, 8}, /* cost of storing SSE registers
834 in SImode, DImode and TImode */
835 3, /* MMX or SSE register to integer */
836 8, /* size of l1 cache. */
837 256, /* size of l2 cache */
838 32, /* size of prefetch block */
839 6, /* number of parallel prefetches */
841 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
842 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
843 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
844 COSTS_N_INSNS (2), /* cost of FABS instruction. */
845 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
846 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
847 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
848 (we ensure the alignment). For small blocks inline loop is still a
849 noticeable win, for bigger blocks either rep movsl or rep movsb is
850 way to go. Rep movsb has apparently more expensive startup time in CPU,
851 but after 4K the difference is down in the noise. */
852 {{rep_prefix_4_byte
, {{128, loop
}, {1024, unrolled_loop
},
853 {8192, rep_prefix_4_byte
}, {-1, rep_prefix_1_byte
}}},
854 DUMMY_STRINGOP_ALGS
},
855 {{rep_prefix_4_byte
, {{1024, unrolled_loop
},
856 {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
857 DUMMY_STRINGOP_ALGS
},
858 1, /* scalar_stmt_cost. */
859 1, /* scalar load_cost. */
860 1, /* scalar_store_cost. */
861 1, /* vec_stmt_cost. */
862 1, /* vec_to_scalar_cost. */
863 1, /* scalar_to_vec_cost. */
864 1, /* vec_align_load_cost. */
865 2, /* vec_unalign_load_cost. */
866 1, /* vec_store_cost. */
867 3, /* cond_taken_branch_cost. */
868 1, /* cond_not_taken_branch_cost. */
872 struct processor_costs geode_cost
= {
873 COSTS_N_INSNS (1), /* cost of an add instruction */
874 COSTS_N_INSNS (1), /* cost of a lea instruction */
875 COSTS_N_INSNS (2), /* variable shift costs */
876 COSTS_N_INSNS (1), /* constant shift costs */
877 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
878 COSTS_N_INSNS (4), /* HI */
879 COSTS_N_INSNS (7), /* SI */
880 COSTS_N_INSNS (7), /* DI */
881 COSTS_N_INSNS (7)}, /* other */
882 0, /* cost of multiply per each bit set */
883 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
884 COSTS_N_INSNS (23), /* HI */
885 COSTS_N_INSNS (39), /* SI */
886 COSTS_N_INSNS (39), /* DI */
887 COSTS_N_INSNS (39)}, /* other */
888 COSTS_N_INSNS (1), /* cost of movsx */
889 COSTS_N_INSNS (1), /* cost of movzx */
890 8, /* "large" insn */
892 1, /* cost for loading QImode using movzbl */
893 {1, 1, 1}, /* cost of loading integer registers
894 in QImode, HImode and SImode.
895 Relative to reg-reg move (2). */
896 {1, 1, 1}, /* cost of storing integer registers */
897 1, /* cost of reg,reg fld/fst */
898 {1, 1, 1}, /* cost of loading fp registers
899 in SFmode, DFmode and XFmode */
900 {4, 6, 6}, /* cost of storing fp registers
901 in SFmode, DFmode and XFmode */
903 1, /* cost of moving MMX register */
904 {1, 1}, /* cost of loading MMX registers
905 in SImode and DImode */
906 {1, 1}, /* cost of storing MMX registers
907 in SImode and DImode */
908 1, /* cost of moving SSE register */
909 {1, 1, 1}, /* cost of loading SSE registers
910 in SImode, DImode and TImode */
911 {1, 1, 1}, /* cost of storing SSE registers
912 in SImode, DImode and TImode */
913 1, /* MMX or SSE register to integer */
914 64, /* size of l1 cache. */
915 128, /* size of l2 cache. */
916 32, /* size of prefetch block */
917 1, /* number of parallel prefetches */
919 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
920 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
921 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
922 COSTS_N_INSNS (1), /* cost of FABS instruction. */
923 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
924 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
925 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
926 DUMMY_STRINGOP_ALGS
},
927 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
928 DUMMY_STRINGOP_ALGS
},
929 1, /* scalar_stmt_cost. */
930 1, /* scalar load_cost. */
931 1, /* scalar_store_cost. */
932 1, /* vec_stmt_cost. */
933 1, /* vec_to_scalar_cost. */
934 1, /* scalar_to_vec_cost. */
935 1, /* vec_align_load_cost. */
936 2, /* vec_unalign_load_cost. */
937 1, /* vec_store_cost. */
938 3, /* cond_taken_branch_cost. */
939 1, /* cond_not_taken_branch_cost. */
943 struct processor_costs k6_cost
= {
944 COSTS_N_INSNS (1), /* cost of an add instruction */
945 COSTS_N_INSNS (2), /* cost of a lea instruction */
946 COSTS_N_INSNS (1), /* variable shift costs */
947 COSTS_N_INSNS (1), /* constant shift costs */
948 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
949 COSTS_N_INSNS (3), /* HI */
950 COSTS_N_INSNS (3), /* SI */
951 COSTS_N_INSNS (3), /* DI */
952 COSTS_N_INSNS (3)}, /* other */
953 0, /* cost of multiply per each bit set */
954 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
955 COSTS_N_INSNS (18), /* HI */
956 COSTS_N_INSNS (18), /* SI */
957 COSTS_N_INSNS (18), /* DI */
958 COSTS_N_INSNS (18)}, /* other */
959 COSTS_N_INSNS (2), /* cost of movsx */
960 COSTS_N_INSNS (2), /* cost of movzx */
961 8, /* "large" insn */
963 3, /* cost for loading QImode using movzbl */
964 {4, 5, 4}, /* cost of loading integer registers
965 in QImode, HImode and SImode.
966 Relative to reg-reg move (2). */
967 {2, 3, 2}, /* cost of storing integer registers */
968 4, /* cost of reg,reg fld/fst */
969 {6, 6, 6}, /* cost of loading fp registers
970 in SFmode, DFmode and XFmode */
971 {4, 4, 4}, /* cost of storing fp registers
972 in SFmode, DFmode and XFmode */
973 2, /* cost of moving MMX register */
974 {2, 2}, /* cost of loading MMX registers
975 in SImode and DImode */
976 {2, 2}, /* cost of storing MMX registers
977 in SImode and DImode */
978 2, /* cost of moving SSE register */
979 {2, 2, 8}, /* cost of loading SSE registers
980 in SImode, DImode and TImode */
981 {2, 2, 8}, /* cost of storing SSE registers
982 in SImode, DImode and TImode */
983 6, /* MMX or SSE register to integer */
984 32, /* size of l1 cache. */
985 32, /* size of l2 cache. Some models
986 have integrated l2 cache, but
987 optimizing for k6 is not important
988 enough to worry about that. */
989 32, /* size of prefetch block */
990 1, /* number of parallel prefetches */
992 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
993 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
994 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
995 COSTS_N_INSNS (2), /* cost of FABS instruction. */
996 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
997 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
998 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
999 DUMMY_STRINGOP_ALGS
},
1000 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
1001 DUMMY_STRINGOP_ALGS
},
1002 1, /* scalar_stmt_cost. */
1003 1, /* scalar load_cost. */
1004 1, /* scalar_store_cost. */
1005 1, /* vec_stmt_cost. */
1006 1, /* vec_to_scalar_cost. */
1007 1, /* scalar_to_vec_cost. */
1008 1, /* vec_align_load_cost. */
1009 2, /* vec_unalign_load_cost. */
1010 1, /* vec_store_cost. */
1011 3, /* cond_taken_branch_cost. */
1012 1, /* cond_not_taken_branch_cost. */
1016 struct processor_costs athlon_cost
= {
1017 COSTS_N_INSNS (1), /* cost of an add instruction */
1018 COSTS_N_INSNS (2), /* cost of a lea instruction */
1019 COSTS_N_INSNS (1), /* variable shift costs */
1020 COSTS_N_INSNS (1), /* constant shift costs */
1021 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
1022 COSTS_N_INSNS (5), /* HI */
1023 COSTS_N_INSNS (5), /* SI */
1024 COSTS_N_INSNS (5), /* DI */
1025 COSTS_N_INSNS (5)}, /* other */
1026 0, /* cost of multiply per each bit set */
1027 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1028 COSTS_N_INSNS (26), /* HI */
1029 COSTS_N_INSNS (42), /* SI */
1030 COSTS_N_INSNS (74), /* DI */
1031 COSTS_N_INSNS (74)}, /* other */
1032 COSTS_N_INSNS (1), /* cost of movsx */
1033 COSTS_N_INSNS (1), /* cost of movzx */
1034 8, /* "large" insn */
1036 4, /* cost for loading QImode using movzbl */
1037 {3, 4, 3}, /* cost of loading integer registers
1038 in QImode, HImode and SImode.
1039 Relative to reg-reg move (2). */
1040 {3, 4, 3}, /* cost of storing integer registers */
1041 4, /* cost of reg,reg fld/fst */
1042 {4, 4, 12}, /* cost of loading fp registers
1043 in SFmode, DFmode and XFmode */
1044 {6, 6, 8}, /* cost of storing fp registers
1045 in SFmode, DFmode and XFmode */
1046 2, /* cost of moving MMX register */
1047 {4, 4}, /* cost of loading MMX registers
1048 in SImode and DImode */
1049 {4, 4}, /* cost of storing MMX registers
1050 in SImode and DImode */
1051 2, /* cost of moving SSE register */
1052 {4, 4, 6}, /* cost of loading SSE registers
1053 in SImode, DImode and TImode */
1054 {4, 4, 5}, /* cost of storing SSE registers
1055 in SImode, DImode and TImode */
1056 5, /* MMX or SSE register to integer */
1057 64, /* size of l1 cache. */
1058 256, /* size of l2 cache. */
1059 64, /* size of prefetch block */
1060 6, /* number of parallel prefetches */
1061 5, /* Branch cost */
1062 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1063 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1064 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
1065 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1066 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1067 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1068 /* For some reason, Athlon deals better with REP prefix (relative to loops)
1069 compared to K8. Alignment becomes important after 8 bytes for memcpy and
1070 128 bytes for memset. */
1071 {{libcall
, {{2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1072 DUMMY_STRINGOP_ALGS
},
1073 {{libcall
, {{2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1074 DUMMY_STRINGOP_ALGS
},
1075 1, /* scalar_stmt_cost. */
1076 1, /* scalar load_cost. */
1077 1, /* scalar_store_cost. */
1078 1, /* vec_stmt_cost. */
1079 1, /* vec_to_scalar_cost. */
1080 1, /* scalar_to_vec_cost. */
1081 1, /* vec_align_load_cost. */
1082 2, /* vec_unalign_load_cost. */
1083 1, /* vec_store_cost. */
1084 3, /* cond_taken_branch_cost. */
1085 1, /* cond_not_taken_branch_cost. */
1089 struct processor_costs k8_cost
= {
1090 COSTS_N_INSNS (1), /* cost of an add instruction */
1091 COSTS_N_INSNS (2), /* cost of a lea instruction */
1092 COSTS_N_INSNS (1), /* variable shift costs */
1093 COSTS_N_INSNS (1), /* constant shift costs */
1094 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1095 COSTS_N_INSNS (4), /* HI */
1096 COSTS_N_INSNS (3), /* SI */
1097 COSTS_N_INSNS (4), /* DI */
1098 COSTS_N_INSNS (5)}, /* other */
1099 0, /* cost of multiply per each bit set */
1100 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1101 COSTS_N_INSNS (26), /* HI */
1102 COSTS_N_INSNS (42), /* SI */
1103 COSTS_N_INSNS (74), /* DI */
1104 COSTS_N_INSNS (74)}, /* other */
1105 COSTS_N_INSNS (1), /* cost of movsx */
1106 COSTS_N_INSNS (1), /* cost of movzx */
1107 8, /* "large" insn */
1109 4, /* cost for loading QImode using movzbl */
1110 {3, 4, 3}, /* cost of loading integer registers
1111 in QImode, HImode and SImode.
1112 Relative to reg-reg move (2). */
1113 {3, 4, 3}, /* cost of storing integer registers */
1114 4, /* cost of reg,reg fld/fst */
1115 {4, 4, 12}, /* cost of loading fp registers
1116 in SFmode, DFmode and XFmode */
1117 {6, 6, 8}, /* cost of storing fp registers
1118 in SFmode, DFmode and XFmode */
1119 2, /* cost of moving MMX register */
1120 {3, 3}, /* cost of loading MMX registers
1121 in SImode and DImode */
1122 {4, 4}, /* cost of storing MMX registers
1123 in SImode and DImode */
1124 2, /* cost of moving SSE register */
1125 {4, 3, 6}, /* cost of loading SSE registers
1126 in SImode, DImode and TImode */
1127 {4, 4, 5}, /* cost of storing SSE registers
1128 in SImode, DImode and TImode */
1129 5, /* MMX or SSE register to integer */
1130 64, /* size of l1 cache. */
1131 512, /* size of l2 cache. */
1132 64, /* size of prefetch block */
1133 /* New AMD processors never drop prefetches; if they cannot be performed
1134 immediately, they are queued. We set number of simultaneous prefetches
1135 to a large constant to reflect this (it probably is not a good idea not
1136 to limit number of prefetches at all, as their execution also takes some
1138 100, /* number of parallel prefetches */
1139 3, /* Branch cost */
1140 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1141 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1142 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1143 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1144 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1145 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1146 /* K8 has optimized REP instruction for medium sized blocks, but for very
1147 small blocks it is better to use loop. For large blocks, libcall can
1148 do nontemporary accesses and beat inline considerably. */
1149 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1150 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1151 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
1152 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1153 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1154 4, /* scalar_stmt_cost. */
1155 2, /* scalar load_cost. */
1156 2, /* scalar_store_cost. */
1157 5, /* vec_stmt_cost. */
1158 0, /* vec_to_scalar_cost. */
1159 2, /* scalar_to_vec_cost. */
1160 2, /* vec_align_load_cost. */
1161 3, /* vec_unalign_load_cost. */
1162 3, /* vec_store_cost. */
1163 3, /* cond_taken_branch_cost. */
1164 2, /* cond_not_taken_branch_cost. */
1167 struct processor_costs amdfam10_cost
= {
1168 COSTS_N_INSNS (1), /* cost of an add instruction */
1169 COSTS_N_INSNS (2), /* cost of a lea instruction */
1170 COSTS_N_INSNS (1), /* variable shift costs */
1171 COSTS_N_INSNS (1), /* constant shift costs */
1172 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1173 COSTS_N_INSNS (4), /* HI */
1174 COSTS_N_INSNS (3), /* SI */
1175 COSTS_N_INSNS (4), /* DI */
1176 COSTS_N_INSNS (5)}, /* other */
1177 0, /* cost of multiply per each bit set */
1178 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1179 COSTS_N_INSNS (35), /* HI */
1180 COSTS_N_INSNS (51), /* SI */
1181 COSTS_N_INSNS (83), /* DI */
1182 COSTS_N_INSNS (83)}, /* other */
1183 COSTS_N_INSNS (1), /* cost of movsx */
1184 COSTS_N_INSNS (1), /* cost of movzx */
1185 8, /* "large" insn */
1187 4, /* cost for loading QImode using movzbl */
1188 {3, 4, 3}, /* cost of loading integer registers
1189 in QImode, HImode and SImode.
1190 Relative to reg-reg move (2). */
1191 {3, 4, 3}, /* cost of storing integer registers */
1192 4, /* cost of reg,reg fld/fst */
1193 {4, 4, 12}, /* cost of loading fp registers
1194 in SFmode, DFmode and XFmode */
1195 {6, 6, 8}, /* cost of storing fp registers
1196 in SFmode, DFmode and XFmode */
1197 2, /* cost of moving MMX register */
1198 {3, 3}, /* cost of loading MMX registers
1199 in SImode and DImode */
1200 {4, 4}, /* cost of storing MMX registers
1201 in SImode and DImode */
1202 2, /* cost of moving SSE register */
1203 {4, 4, 3}, /* cost of loading SSE registers
1204 in SImode, DImode and TImode */
1205 {4, 4, 5}, /* cost of storing SSE registers
1206 in SImode, DImode and TImode */
1207 3, /* MMX or SSE register to integer */
1209 MOVD reg64, xmmreg Double FSTORE 4
1210 MOVD reg32, xmmreg Double FSTORE 4
1212 MOVD reg64, xmmreg Double FADD 3
1214 MOVD reg32, xmmreg Double FADD 3
1216 64, /* size of l1 cache. */
1217 512, /* size of l2 cache. */
1218 64, /* size of prefetch block */
1219 /* New AMD processors never drop prefetches; if they cannot be performed
1220 immediately, they are queued. We set number of simultaneous prefetches
1221 to a large constant to reflect this (it probably is not a good idea not
1222 to limit number of prefetches at all, as their execution also takes some
1224 100, /* number of parallel prefetches */
1225 2, /* Branch cost */
1226 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1227 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1228 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1229 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1230 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1231 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1233 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
1234 very small blocks it is better to use loop. For large blocks, libcall can
1235 do nontemporary accesses and beat inline considerably. */
1236 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1237 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1238 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
1239 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1240 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1241 4, /* scalar_stmt_cost. */
1242 2, /* scalar load_cost. */
1243 2, /* scalar_store_cost. */
1244 6, /* vec_stmt_cost. */
1245 0, /* vec_to_scalar_cost. */
1246 2, /* scalar_to_vec_cost. */
1247 2, /* vec_align_load_cost. */
1248 2, /* vec_unalign_load_cost. */
1249 2, /* vec_store_cost. */
1250 2, /* cond_taken_branch_cost. */
1251 1, /* cond_not_taken_branch_cost. */
1254 struct processor_costs bdver1_cost
= {
1255 COSTS_N_INSNS (1), /* cost of an add instruction */
1256 COSTS_N_INSNS (1), /* cost of a lea instruction */
1257 COSTS_N_INSNS (1), /* variable shift costs */
1258 COSTS_N_INSNS (1), /* constant shift costs */
1259 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1260 COSTS_N_INSNS (4), /* HI */
1261 COSTS_N_INSNS (4), /* SI */
1262 COSTS_N_INSNS (6), /* DI */
1263 COSTS_N_INSNS (6)}, /* other */
1264 0, /* cost of multiply per each bit set */
1265 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1266 COSTS_N_INSNS (35), /* HI */
1267 COSTS_N_INSNS (51), /* SI */
1268 COSTS_N_INSNS (83), /* DI */
1269 COSTS_N_INSNS (83)}, /* other */
1270 COSTS_N_INSNS (1), /* cost of movsx */
1271 COSTS_N_INSNS (1), /* cost of movzx */
1272 8, /* "large" insn */
1274 4, /* cost for loading QImode using movzbl */
1275 {5, 5, 4}, /* cost of loading integer registers
1276 in QImode, HImode and SImode.
1277 Relative to reg-reg move (2). */
1278 {4, 4, 4}, /* cost of storing integer registers */
1279 2, /* cost of reg,reg fld/fst */
1280 {5, 5, 12}, /* cost of loading fp registers
1281 in SFmode, DFmode and XFmode */
1282 {4, 4, 8}, /* cost of storing fp registers
1283 in SFmode, DFmode and XFmode */
1284 2, /* cost of moving MMX register */
1285 {4, 4}, /* cost of loading MMX registers
1286 in SImode and DImode */
1287 {4, 4}, /* cost of storing MMX registers
1288 in SImode and DImode */
1289 2, /* cost of moving SSE register */
1290 {4, 4, 4}, /* cost of loading SSE registers
1291 in SImode, DImode and TImode */
1292 {4, 4, 4}, /* cost of storing SSE registers
1293 in SImode, DImode and TImode */
1294 2, /* MMX or SSE register to integer */
1296 MOVD reg64, xmmreg Double FSTORE 4
1297 MOVD reg32, xmmreg Double FSTORE 4
1299 MOVD reg64, xmmreg Double FADD 3
1301 MOVD reg32, xmmreg Double FADD 3
1303 16, /* size of l1 cache. */
1304 2048, /* size of l2 cache. */
1305 64, /* size of prefetch block */
1306 /* New AMD processors never drop prefetches; if they cannot be performed
1307 immediately, they are queued. We set number of simultaneous prefetches
1308 to a large constant to reflect this (it probably is not a good idea not
1309 to limit number of prefetches at all, as their execution also takes some
1311 100, /* number of parallel prefetches */
1312 2, /* Branch cost */
1313 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1314 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1315 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1316 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1317 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1318 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1320 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
1321 very small blocks it is better to use loop. For large blocks, libcall
1322 can do nontemporary accesses and beat inline considerably. */
1323 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1324 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1325 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
1326 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1327 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1328 6, /* scalar_stmt_cost. */
1329 4, /* scalar load_cost. */
1330 4, /* scalar_store_cost. */
1331 6, /* vec_stmt_cost. */
1332 0, /* vec_to_scalar_cost. */
1333 2, /* scalar_to_vec_cost. */
1334 4, /* vec_align_load_cost. */
1335 4, /* vec_unalign_load_cost. */
1336 4, /* vec_store_cost. */
1337 2, /* cond_taken_branch_cost. */
1338 1, /* cond_not_taken_branch_cost. */
1341 struct processor_costs bdver2_cost
= {
1342 COSTS_N_INSNS (1), /* cost of an add instruction */
1343 COSTS_N_INSNS (1), /* cost of a lea instruction */
1344 COSTS_N_INSNS (1), /* variable shift costs */
1345 COSTS_N_INSNS (1), /* constant shift costs */
1346 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1347 COSTS_N_INSNS (4), /* HI */
1348 COSTS_N_INSNS (4), /* SI */
1349 COSTS_N_INSNS (6), /* DI */
1350 COSTS_N_INSNS (6)}, /* other */
1351 0, /* cost of multiply per each bit set */
1352 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1353 COSTS_N_INSNS (35), /* HI */
1354 COSTS_N_INSNS (51), /* SI */
1355 COSTS_N_INSNS (83), /* DI */
1356 COSTS_N_INSNS (83)}, /* other */
1357 COSTS_N_INSNS (1), /* cost of movsx */
1358 COSTS_N_INSNS (1), /* cost of movzx */
1359 8, /* "large" insn */
1361 4, /* cost for loading QImode using movzbl */
1362 {5, 5, 4}, /* cost of loading integer registers
1363 in QImode, HImode and SImode.
1364 Relative to reg-reg move (2). */
1365 {4, 4, 4}, /* cost of storing integer registers */
1366 2, /* cost of reg,reg fld/fst */
1367 {5, 5, 12}, /* cost of loading fp registers
1368 in SFmode, DFmode and XFmode */
1369 {4, 4, 8}, /* cost of storing fp registers
1370 in SFmode, DFmode and XFmode */
1371 2, /* cost of moving MMX register */
1372 {4, 4}, /* cost of loading MMX registers
1373 in SImode and DImode */
1374 {4, 4}, /* cost of storing MMX registers
1375 in SImode and DImode */
1376 2, /* cost of moving SSE register */
1377 {4, 4, 4}, /* cost of loading SSE registers
1378 in SImode, DImode and TImode */
1379 {4, 4, 4}, /* cost of storing SSE registers
1380 in SImode, DImode and TImode */
1381 2, /* MMX or SSE register to integer */
1383 MOVD reg64, xmmreg Double FSTORE 4
1384 MOVD reg32, xmmreg Double FSTORE 4
1386 MOVD reg64, xmmreg Double FADD 3
1388 MOVD reg32, xmmreg Double FADD 3
1390 16, /* size of l1 cache. */
1391 2048, /* size of l2 cache. */
1392 64, /* size of prefetch block */
1393 /* New AMD processors never drop prefetches; if they cannot be performed
1394 immediately, they are queued. We set number of simultaneous prefetches
1395 to a large constant to reflect this (it probably is not a good idea not
1396 to limit number of prefetches at all, as their execution also takes some
1398 100, /* number of parallel prefetches */
1399 2, /* Branch cost */
1400 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1401 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1402 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1403 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1404 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1405 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1407 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1408 very small blocks it is better to use loop. For large blocks, libcall
1409 can do nontemporary accesses and beat inline considerably. */
1410 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1411 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1412 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
1413 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1414 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1415 6, /* scalar_stmt_cost. */
1416 4, /* scalar load_cost. */
1417 4, /* scalar_store_cost. */
1418 6, /* vec_stmt_cost. */
1419 0, /* vec_to_scalar_cost. */
1420 2, /* scalar_to_vec_cost. */
1421 4, /* vec_align_load_cost. */
1422 4, /* vec_unalign_load_cost. */
1423 4, /* vec_store_cost. */
1424 2, /* cond_taken_branch_cost. */
1425 1, /* cond_not_taken_branch_cost. */
1428 struct processor_costs btver1_cost
= {
1429 COSTS_N_INSNS (1), /* cost of an add instruction */
1430 COSTS_N_INSNS (2), /* cost of a lea instruction */
1431 COSTS_N_INSNS (1), /* variable shift costs */
1432 COSTS_N_INSNS (1), /* constant shift costs */
1433 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1434 COSTS_N_INSNS (4), /* HI */
1435 COSTS_N_INSNS (3), /* SI */
1436 COSTS_N_INSNS (4), /* DI */
1437 COSTS_N_INSNS (5)}, /* other */
1438 0, /* cost of multiply per each bit set */
1439 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1440 COSTS_N_INSNS (35), /* HI */
1441 COSTS_N_INSNS (51), /* SI */
1442 COSTS_N_INSNS (83), /* DI */
1443 COSTS_N_INSNS (83)}, /* other */
1444 COSTS_N_INSNS (1), /* cost of movsx */
1445 COSTS_N_INSNS (1), /* cost of movzx */
1446 8, /* "large" insn */
1448 4, /* cost for loading QImode using movzbl */
1449 {3, 4, 3}, /* cost of loading integer registers
1450 in QImode, HImode and SImode.
1451 Relative to reg-reg move (2). */
1452 {3, 4, 3}, /* cost of storing integer registers */
1453 4, /* cost of reg,reg fld/fst */
1454 {4, 4, 12}, /* cost of loading fp registers
1455 in SFmode, DFmode and XFmode */
1456 {6, 6, 8}, /* cost of storing fp registers
1457 in SFmode, DFmode and XFmode */
1458 2, /* cost of moving MMX register */
1459 {3, 3}, /* cost of loading MMX registers
1460 in SImode and DImode */
1461 {4, 4}, /* cost of storing MMX registers
1462 in SImode and DImode */
1463 2, /* cost of moving SSE register */
1464 {4, 4, 3}, /* cost of loading SSE registers
1465 in SImode, DImode and TImode */
1466 {4, 4, 5}, /* cost of storing SSE registers
1467 in SImode, DImode and TImode */
1468 3, /* MMX or SSE register to integer */
1470 MOVD reg64, xmmreg Double FSTORE 4
1471 MOVD reg32, xmmreg Double FSTORE 4
1473 MOVD reg64, xmmreg Double FADD 3
1475 MOVD reg32, xmmreg Double FADD 3
1477 32, /* size of l1 cache. */
1478 512, /* size of l2 cache. */
1479 64, /* size of prefetch block */
1480 100, /* number of parallel prefetches */
1481 2, /* Branch cost */
1482 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1483 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1484 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1485 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1486 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1487 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1489 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1490 very small blocks it is better to use loop. For large blocks, libcall can
1491 do nontemporary accesses and beat inline considerably. */
1492 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1493 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1494 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
1495 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1496 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1497 4, /* scalar_stmt_cost. */
1498 2, /* scalar load_cost. */
1499 2, /* scalar_store_cost. */
1500 6, /* vec_stmt_cost. */
1501 0, /* vec_to_scalar_cost. */
1502 2, /* scalar_to_vec_cost. */
1503 2, /* vec_align_load_cost. */
1504 2, /* vec_unalign_load_cost. */
1505 2, /* vec_store_cost. */
1506 2, /* cond_taken_branch_cost. */
1507 1, /* cond_not_taken_branch_cost. */
1511 struct processor_costs pentium4_cost
= {
1512 COSTS_N_INSNS (1), /* cost of an add instruction */
1513 COSTS_N_INSNS (3), /* cost of a lea instruction */
1514 COSTS_N_INSNS (4), /* variable shift costs */
1515 COSTS_N_INSNS (4), /* constant shift costs */
1516 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1517 COSTS_N_INSNS (15), /* HI */
1518 COSTS_N_INSNS (15), /* SI */
1519 COSTS_N_INSNS (15), /* DI */
1520 COSTS_N_INSNS (15)}, /* other */
1521 0, /* cost of multiply per each bit set */
1522 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1523 COSTS_N_INSNS (56), /* HI */
1524 COSTS_N_INSNS (56), /* SI */
1525 COSTS_N_INSNS (56), /* DI */
1526 COSTS_N_INSNS (56)}, /* other */
1527 COSTS_N_INSNS (1), /* cost of movsx */
1528 COSTS_N_INSNS (1), /* cost of movzx */
1529 16, /* "large" insn */
1531 2, /* cost for loading QImode using movzbl */
1532 {4, 5, 4}, /* cost of loading integer registers
1533 in QImode, HImode and SImode.
1534 Relative to reg-reg move (2). */
1535 {2, 3, 2}, /* cost of storing integer registers */
1536 2, /* cost of reg,reg fld/fst */
1537 {2, 2, 6}, /* cost of loading fp registers
1538 in SFmode, DFmode and XFmode */
1539 {4, 4, 6}, /* cost of storing fp registers
1540 in SFmode, DFmode and XFmode */
1541 2, /* cost of moving MMX register */
1542 {2, 2}, /* cost of loading MMX registers
1543 in SImode and DImode */
1544 {2, 2}, /* cost of storing MMX registers
1545 in SImode and DImode */
1546 12, /* cost of moving SSE register */
1547 {12, 12, 12}, /* cost of loading SSE registers
1548 in SImode, DImode and TImode */
1549 {2, 2, 8}, /* cost of storing SSE registers
1550 in SImode, DImode and TImode */
1551 10, /* MMX or SSE register to integer */
1552 8, /* size of l1 cache. */
1553 256, /* size of l2 cache. */
1554 64, /* size of prefetch block */
1555 6, /* number of parallel prefetches */
1556 2, /* Branch cost */
1557 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1558 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1559 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1560 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1561 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1562 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1563 {{libcall
, {{12, loop_1_byte
}, {-1, rep_prefix_4_byte
}}},
1564 DUMMY_STRINGOP_ALGS
},
1565 {{libcall
, {{6, loop_1_byte
}, {48, loop
}, {20480, rep_prefix_4_byte
},
1567 DUMMY_STRINGOP_ALGS
},
1568 1, /* scalar_stmt_cost. */
1569 1, /* scalar load_cost. */
1570 1, /* scalar_store_cost. */
1571 1, /* vec_stmt_cost. */
1572 1, /* vec_to_scalar_cost. */
1573 1, /* scalar_to_vec_cost. */
1574 1, /* vec_align_load_cost. */
1575 2, /* vec_unalign_load_cost. */
1576 1, /* vec_store_cost. */
1577 3, /* cond_taken_branch_cost. */
1578 1, /* cond_not_taken_branch_cost. */
1582 struct processor_costs nocona_cost
= {
1583 COSTS_N_INSNS (1), /* cost of an add instruction */
1584 COSTS_N_INSNS (1), /* cost of a lea instruction */
1585 COSTS_N_INSNS (1), /* variable shift costs */
1586 COSTS_N_INSNS (1), /* constant shift costs */
1587 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1588 COSTS_N_INSNS (10), /* HI */
1589 COSTS_N_INSNS (10), /* SI */
1590 COSTS_N_INSNS (10), /* DI */
1591 COSTS_N_INSNS (10)}, /* other */
1592 0, /* cost of multiply per each bit set */
1593 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1594 COSTS_N_INSNS (66), /* HI */
1595 COSTS_N_INSNS (66), /* SI */
1596 COSTS_N_INSNS (66), /* DI */
1597 COSTS_N_INSNS (66)}, /* other */
1598 COSTS_N_INSNS (1), /* cost of movsx */
1599 COSTS_N_INSNS (1), /* cost of movzx */
1600 16, /* "large" insn */
1601 17, /* MOVE_RATIO */
1602 4, /* cost for loading QImode using movzbl */
1603 {4, 4, 4}, /* cost of loading integer registers
1604 in QImode, HImode and SImode.
1605 Relative to reg-reg move (2). */
1606 {4, 4, 4}, /* cost of storing integer registers */
1607 3, /* cost of reg,reg fld/fst */
1608 {12, 12, 12}, /* cost of loading fp registers
1609 in SFmode, DFmode and XFmode */
1610 {4, 4, 4}, /* cost of storing fp registers
1611 in SFmode, DFmode and XFmode */
1612 6, /* cost of moving MMX register */
1613 {12, 12}, /* cost of loading MMX registers
1614 in SImode and DImode */
1615 {12, 12}, /* cost of storing MMX registers
1616 in SImode and DImode */
1617 6, /* cost of moving SSE register */
1618 {12, 12, 12}, /* cost of loading SSE registers
1619 in SImode, DImode and TImode */
1620 {12, 12, 12}, /* cost of storing SSE registers
1621 in SImode, DImode and TImode */
1622 8, /* MMX or SSE register to integer */
1623 8, /* size of l1 cache. */
1624 1024, /* size of l2 cache. */
1625 128, /* size of prefetch block */
1626 8, /* number of parallel prefetches */
1627 1, /* Branch cost */
1628 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1629 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1630 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1631 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1632 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1633 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1634 {{libcall
, {{12, loop_1_byte
}, {-1, rep_prefix_4_byte
}}},
1635 {libcall
, {{32, loop
}, {20000, rep_prefix_8_byte
},
1636 {100000, unrolled_loop
}, {-1, libcall
}}}},
1637 {{libcall
, {{6, loop_1_byte
}, {48, loop
}, {20480, rep_prefix_4_byte
},
1639 {libcall
, {{24, loop
}, {64, unrolled_loop
},
1640 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1641 1, /* scalar_stmt_cost. */
1642 1, /* scalar load_cost. */
1643 1, /* scalar_store_cost. */
1644 1, /* vec_stmt_cost. */
1645 1, /* vec_to_scalar_cost. */
1646 1, /* scalar_to_vec_cost. */
1647 1, /* vec_align_load_cost. */
1648 2, /* vec_unalign_load_cost. */
1649 1, /* vec_store_cost. */
1650 3, /* cond_taken_branch_cost. */
1651 1, /* cond_not_taken_branch_cost. */
1655 struct processor_costs atom_cost
= {
1656 COSTS_N_INSNS (1), /* cost of an add instruction */
1657 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1658 COSTS_N_INSNS (1), /* variable shift costs */
1659 COSTS_N_INSNS (1), /* constant shift costs */
1660 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1661 COSTS_N_INSNS (4), /* HI */
1662 COSTS_N_INSNS (3), /* SI */
1663 COSTS_N_INSNS (4), /* DI */
1664 COSTS_N_INSNS (2)}, /* other */
1665 0, /* cost of multiply per each bit set */
1666 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1667 COSTS_N_INSNS (26), /* HI */
1668 COSTS_N_INSNS (42), /* SI */
1669 COSTS_N_INSNS (74), /* DI */
1670 COSTS_N_INSNS (74)}, /* other */
1671 COSTS_N_INSNS (1), /* cost of movsx */
1672 COSTS_N_INSNS (1), /* cost of movzx */
1673 8, /* "large" insn */
1674 17, /* MOVE_RATIO */
1675 4, /* cost for loading QImode using movzbl */
1676 {4, 4, 4}, /* cost of loading integer registers
1677 in QImode, HImode and SImode.
1678 Relative to reg-reg move (2). */
1679 {4, 4, 4}, /* cost of storing integer registers */
1680 4, /* cost of reg,reg fld/fst */
1681 {12, 12, 12}, /* cost of loading fp registers
1682 in SFmode, DFmode and XFmode */
1683 {6, 6, 8}, /* cost of storing fp registers
1684 in SFmode, DFmode and XFmode */
1685 2, /* cost of moving MMX register */
1686 {8, 8}, /* cost of loading MMX registers
1687 in SImode and DImode */
1688 {8, 8}, /* cost of storing MMX registers
1689 in SImode and DImode */
1690 2, /* cost of moving SSE register */
1691 {8, 8, 8}, /* cost of loading SSE registers
1692 in SImode, DImode and TImode */
1693 {8, 8, 8}, /* cost of storing SSE registers
1694 in SImode, DImode and TImode */
1695 5, /* MMX or SSE register to integer */
1696 32, /* size of l1 cache. */
1697 256, /* size of l2 cache. */
1698 64, /* size of prefetch block */
1699 6, /* number of parallel prefetches */
1700 3, /* Branch cost */
1701 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1702 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1703 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1704 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1705 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1706 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1707 {{libcall
, {{11, loop
}, {-1, rep_prefix_4_byte
}}},
1708 {libcall
, {{32, loop
}, {64, rep_prefix_4_byte
},
1709 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1710 {{libcall
, {{8, loop
}, {15, unrolled_loop
},
1711 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1712 {libcall
, {{24, loop
}, {32, unrolled_loop
},
1713 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1714 1, /* scalar_stmt_cost. */
1715 1, /* scalar load_cost. */
1716 1, /* scalar_store_cost. */
1717 1, /* vec_stmt_cost. */
1718 1, /* vec_to_scalar_cost. */
1719 1, /* scalar_to_vec_cost. */
1720 1, /* vec_align_load_cost. */
1721 2, /* vec_unalign_load_cost. */
1722 1, /* vec_store_cost. */
1723 3, /* cond_taken_branch_cost. */
1724 1, /* cond_not_taken_branch_cost. */
1727 /* Generic64 should produce code tuned for Nocona and K8. */
1729 struct processor_costs generic64_cost
= {
1730 COSTS_N_INSNS (1), /* cost of an add instruction */
1731 /* On all chips taken into consideration lea is 2 cycles and more. With
1732 this cost however our current implementation of synth_mult results in
1733 use of unnecessary temporary registers causing regression on several
1734 SPECfp benchmarks. */
1735 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1736 COSTS_N_INSNS (1), /* variable shift costs */
1737 COSTS_N_INSNS (1), /* constant shift costs */
1738 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1739 COSTS_N_INSNS (4), /* HI */
1740 COSTS_N_INSNS (3), /* SI */
1741 COSTS_N_INSNS (4), /* DI */
1742 COSTS_N_INSNS (2)}, /* other */
1743 0, /* cost of multiply per each bit set */
1744 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1745 COSTS_N_INSNS (26), /* HI */
1746 COSTS_N_INSNS (42), /* SI */
1747 COSTS_N_INSNS (74), /* DI */
1748 COSTS_N_INSNS (74)}, /* other */
1749 COSTS_N_INSNS (1), /* cost of movsx */
1750 COSTS_N_INSNS (1), /* cost of movzx */
1751 8, /* "large" insn */
1752 17, /* MOVE_RATIO */
1753 4, /* cost for loading QImode using movzbl */
1754 {4, 4, 4}, /* cost of loading integer registers
1755 in QImode, HImode and SImode.
1756 Relative to reg-reg move (2). */
1757 {4, 4, 4}, /* cost of storing integer registers */
1758 4, /* cost of reg,reg fld/fst */
1759 {12, 12, 12}, /* cost of loading fp registers
1760 in SFmode, DFmode and XFmode */
1761 {6, 6, 8}, /* cost of storing fp registers
1762 in SFmode, DFmode and XFmode */
1763 2, /* cost of moving MMX register */
1764 {8, 8}, /* cost of loading MMX registers
1765 in SImode and DImode */
1766 {8, 8}, /* cost of storing MMX registers
1767 in SImode and DImode */
1768 2, /* cost of moving SSE register */
1769 {8, 8, 8}, /* cost of loading SSE registers
1770 in SImode, DImode and TImode */
1771 {8, 8, 8}, /* cost of storing SSE registers
1772 in SImode, DImode and TImode */
1773 5, /* MMX or SSE register to integer */
1774 32, /* size of l1 cache. */
1775 512, /* size of l2 cache. */
1776 64, /* size of prefetch block */
1777 6, /* number of parallel prefetches */
1778 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1779 value is increased to perhaps more appropriate value of 5. */
1780 3, /* Branch cost */
1781 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1782 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1783 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1784 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1785 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1786 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1787 {DUMMY_STRINGOP_ALGS
,
1788 {libcall
, {{32, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1789 {DUMMY_STRINGOP_ALGS
,
1790 {libcall
, {{32, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1791 1, /* scalar_stmt_cost. */
1792 1, /* scalar load_cost. */
1793 1, /* scalar_store_cost. */
1794 1, /* vec_stmt_cost. */
1795 1, /* vec_to_scalar_cost. */
1796 1, /* scalar_to_vec_cost. */
1797 1, /* vec_align_load_cost. */
1798 2, /* vec_unalign_load_cost. */
1799 1, /* vec_store_cost. */
1800 3, /* cond_taken_branch_cost. */
1801 1, /* cond_not_taken_branch_cost. */
1804 /* Generic32 should produce code tuned for PPro, Pentium4, Nocona,
1807 struct processor_costs generic32_cost
= {
1808 COSTS_N_INSNS (1), /* cost of an add instruction */
1809 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1810 COSTS_N_INSNS (1), /* variable shift costs */
1811 COSTS_N_INSNS (1), /* constant shift costs */
1812 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1813 COSTS_N_INSNS (4), /* HI */
1814 COSTS_N_INSNS (3), /* SI */
1815 COSTS_N_INSNS (4), /* DI */
1816 COSTS_N_INSNS (2)}, /* other */
1817 0, /* cost of multiply per each bit set */
1818 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1819 COSTS_N_INSNS (26), /* HI */
1820 COSTS_N_INSNS (42), /* SI */
1821 COSTS_N_INSNS (74), /* DI */
1822 COSTS_N_INSNS (74)}, /* other */
1823 COSTS_N_INSNS (1), /* cost of movsx */
1824 COSTS_N_INSNS (1), /* cost of movzx */
1825 8, /* "large" insn */
1826 17, /* MOVE_RATIO */
1827 4, /* cost for loading QImode using movzbl */
1828 {4, 4, 4}, /* cost of loading integer registers
1829 in QImode, HImode and SImode.
1830 Relative to reg-reg move (2). */
1831 {4, 4, 4}, /* cost of storing integer registers */
1832 4, /* cost of reg,reg fld/fst */
1833 {12, 12, 12}, /* cost of loading fp registers
1834 in SFmode, DFmode and XFmode */
1835 {6, 6, 8}, /* cost of storing fp registers
1836 in SFmode, DFmode and XFmode */
1837 2, /* cost of moving MMX register */
1838 {8, 8}, /* cost of loading MMX registers
1839 in SImode and DImode */
1840 {8, 8}, /* cost of storing MMX registers
1841 in SImode and DImode */
1842 2, /* cost of moving SSE register */
1843 {8, 8, 8}, /* cost of loading SSE registers
1844 in SImode, DImode and TImode */
1845 {8, 8, 8}, /* cost of storing SSE registers
1846 in SImode, DImode and TImode */
1847 5, /* MMX or SSE register to integer */
1848 32, /* size of l1 cache. */
1849 256, /* size of l2 cache. */
1850 64, /* size of prefetch block */
1851 6, /* number of parallel prefetches */
1852 3, /* Branch cost */
1853 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1854 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1855 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1856 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1857 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1858 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1859 {{libcall
, {{32, loop
}, {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
1860 DUMMY_STRINGOP_ALGS
},
1861 {{libcall
, {{32, loop
}, {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
1862 DUMMY_STRINGOP_ALGS
},
1863 1, /* scalar_stmt_cost. */
1864 1, /* scalar load_cost. */
1865 1, /* scalar_store_cost. */
1866 1, /* vec_stmt_cost. */
1867 1, /* vec_to_scalar_cost. */
1868 1, /* scalar_to_vec_cost. */
1869 1, /* vec_align_load_cost. */
1870 2, /* vec_unalign_load_cost. */
1871 1, /* vec_store_cost. */
1872 3, /* cond_taken_branch_cost. */
1873 1, /* cond_not_taken_branch_cost. */
1876 const struct processor_costs
*ix86_cost
= &pentium_cost
;
1878 /* Processor feature/optimization bitmasks. */
1879 #define m_386 (1<<PROCESSOR_I386)
1880 #define m_486 (1<<PROCESSOR_I486)
1881 #define m_PENT (1<<PROCESSOR_PENTIUM)
1882 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1883 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1884 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1885 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
1886 #define m_CORE2_32 (1<<PROCESSOR_CORE2_32)
1887 #define m_CORE2_64 (1<<PROCESSOR_CORE2_64)
1888 #define m_COREI7_32 (1<<PROCESSOR_COREI7_32)
1889 #define m_COREI7_64 (1<<PROCESSOR_COREI7_64)
1890 #define m_COREI7 (m_COREI7_32 | m_COREI7_64)
1891 #define m_CORE2I7_32 (m_CORE2_32 | m_COREI7_32)
1892 #define m_CORE2I7_64 (m_CORE2_64 | m_COREI7_64)
1893 #define m_CORE2I7 (m_CORE2I7_32 | m_CORE2I7_64)
1894 #define m_ATOM (1<<PROCESSOR_ATOM)
1896 #define m_GEODE (1<<PROCESSOR_GEODE)
1897 #define m_K6 (1<<PROCESSOR_K6)
1898 #define m_K6_GEODE (m_K6 | m_GEODE)
1899 #define m_K8 (1<<PROCESSOR_K8)
1900 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1901 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1902 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1903 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
1904 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
1905 #define m_BDVER (m_BDVER1 | m_BDVER2)
1906 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
1907 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1)
1909 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1910 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1912 /* Generic instruction choice should be common subset of supported CPUs
1913 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1914 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1916 /* Feature tests against the various tunings. */
1917 unsigned char ix86_tune_features
[X86_TUNE_LAST
];
1919 /* Feature tests against the various tunings used to create ix86_tune_features
1920 based on the processor mask. */
1921 static unsigned int initial_ix86_tune_features
[X86_TUNE_LAST
] = {
1922 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1923 negatively, so enabling for Generic64 seems like good code size
1924 tradeoff. We can't enable it for 32bit generic because it does not
1925 work well with PPro base chips. */
1926 m_386
| m_CORE2I7_64
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC64
,
1928 /* X86_TUNE_PUSH_MEMORY */
1929 m_386
| m_P4_NOCONA
| m_CORE2I7
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
1931 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1934 /* X86_TUNE_UNROLL_STRLEN */
1935 m_486
| m_PENT
| m_PPRO
| m_ATOM
| m_CORE2I7
| m_K6
| m_AMD_MULTIPLE
| m_GENERIC
,
1937 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1938 on simulation result. But after P4 was made, no performance benefit
1939 was observed with branch hints. It also increases the code size.
1940 As a result, icc never generates branch hints. */
1943 /* X86_TUNE_DOUBLE_WITH_ADD */
1946 /* X86_TUNE_USE_SAHF */
1947 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_K6_GEODE
| m_K8
| m_AMDFAM10
| m_BDVER
| m_BTVER1
| m_GENERIC
,
1949 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1950 partial dependencies. */
1951 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
1953 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1954 register stalls on Generic32 compilation setting as well. However
1955 in current implementation the partial register stalls are not eliminated
1956 very well - they can be introduced via subregs synthesized by combine
1957 and can happen in caller/callee saving sequences. Because this option
1958 pays back little on PPro based chips and is in conflict with partial reg
1959 dependencies used by Athlon/P4 based chips, it is better to leave it off
1960 for generic32 for now. */
1963 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1964 m_CORE2I7
| m_GENERIC
,
1966 /* X86_TUNE_USE_HIMODE_FIOP */
1967 m_386
| m_486
| m_K6_GEODE
,
1969 /* X86_TUNE_USE_SIMODE_FIOP */
1970 ~(m_PENT
| m_PPRO
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
),
1972 /* X86_TUNE_USE_MOV0 */
1975 /* X86_TUNE_USE_CLTD */
1976 ~(m_PENT
| m_CORE2I7
| m_ATOM
| m_K6
| m_GENERIC
),
1978 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1981 /* X86_TUNE_SPLIT_LONG_MOVES */
1984 /* X86_TUNE_READ_MODIFY_WRITE */
1987 /* X86_TUNE_READ_MODIFY */
1990 /* X86_TUNE_PROMOTE_QIMODE */
1991 m_386
| m_486
| m_PENT
| m_CORE2I7
| m_ATOM
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
1993 /* X86_TUNE_FAST_PREFIX */
1994 ~(m_386
| m_486
| m_PENT
),
1996 /* X86_TUNE_SINGLE_STRINGOP */
1997 m_386
| m_P4_NOCONA
,
1999 /* X86_TUNE_QIMODE_MATH */
2002 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
2003 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
2004 might be considered for Generic32 if our scheme for avoiding partial
2005 stalls was more effective. */
2008 /* X86_TUNE_PROMOTE_QI_REGS */
2011 /* X86_TUNE_PROMOTE_HI_REGS */
2014 /* X86_TUNE_SINGLE_POP: Enable if single pop insn is preferred
2015 over esp addition. */
2016 m_386
| m_486
| m_PENT
| m_PPRO
,
2018 /* X86_TUNE_DOUBLE_POP: Enable if double pop insn is preferred
2019 over esp addition. */
2022 /* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred
2023 over esp subtraction. */
2024 m_386
| m_486
| m_PENT
| m_K6_GEODE
,
2026 /* X86_TUNE_DOUBLE_PUSH. Enable if double push insn is preferred
2027 over esp subtraction. */
2028 m_PENT
| m_K6_GEODE
,
2030 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
2031 for DFmode copies */
2032 ~(m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_GEODE
| m_AMD_MULTIPLE
| m_ATOM
| m_GENERIC
),
2034 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
2035 m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
,
2037 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
2038 conflict here in between PPro/Pentium4 based chips that thread 128bit
2039 SSE registers as single units versus K8 based chips that divide SSE
2040 registers to two 64bit halves. This knob promotes all store destinations
2041 to be 128bit to allow register renaming on 128bit SSE units, but usually
2042 results in one extra microop on 64bit SSE units. Experimental results
2043 shows that disabling this option on P4 brings over 20% SPECfp regression,
2044 while enabling it on K8 brings roughly 2.4% regression that can be partly
2045 masked by careful scheduling of moves. */
2046 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMDFAM10
| m_BDVER
| m_GENERIC
,
2048 /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
2049 m_COREI7
| m_AMDFAM10
| m_BDVER
| m_BTVER1
,
2051 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
2054 /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
2057 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
2058 are resolved on SSE register parts instead of whole registers, so we may
2059 maintain just lower part of scalar values in proper format leaving the
2060 upper part undefined. */
2063 /* X86_TUNE_SSE_TYPELESS_STORES */
2066 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
2067 m_PPRO
| m_P4_NOCONA
,
2069 /* X86_TUNE_MEMORY_MISMATCH_STALL */
2070 m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
,
2072 /* X86_TUNE_PROLOGUE_USING_MOVE */
2073 m_PPRO
| m_CORE2I7
| m_ATOM
| m_ATHLON_K8
| m_GENERIC
,
2075 /* X86_TUNE_EPILOGUE_USING_MOVE */
2076 m_PPRO
| m_CORE2I7
| m_ATOM
| m_ATHLON_K8
| m_GENERIC
,
2078 /* X86_TUNE_SHIFT1 */
2081 /* X86_TUNE_USE_FFREEP */
2084 /* X86_TUNE_INTER_UNIT_MOVES */
2085 ~(m_AMD_MULTIPLE
| m_GENERIC
),
2087 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
2088 ~(m_AMDFAM10
| m_BDVER
),
2090 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
2091 than 4 branch instructions in the 16 byte window. */
2092 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
,
2094 /* X86_TUNE_SCHEDULE */
2095 m_PENT
| m_PPRO
| m_CORE2I7
| m_ATOM
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
2097 /* X86_TUNE_USE_BT */
2098 m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
,
2100 /* X86_TUNE_USE_INCDEC */
2101 ~(m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_GENERIC
),
2103 /* X86_TUNE_PAD_RETURNS */
2104 m_CORE2I7
| m_AMD_MULTIPLE
| m_GENERIC
,
2106 /* X86_TUNE_PAD_SHORT_FUNCTION: Pad short funtion. */
2109 /* X86_TUNE_EXT_80387_CONSTANTS */
2110 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_K6_GEODE
| m_ATHLON_K8
| m_GENERIC
,
2112 /* X86_TUNE_SHORTEN_X87_SSE */
2115 /* X86_TUNE_AVOID_VECTOR_DECODE */
2116 m_CORE2I7_64
| m_K8
| m_GENERIC64
,
2118 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
2119 and SImode multiply, but 386 and 486 do HImode multiply faster. */
2122 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
2123 vector path on AMD machines. */
2124 m_CORE2I7_64
| m_K8
| m_AMDFAM10
| m_BDVER
| m_BTVER1
| m_GENERIC64
,
2126 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
2128 m_CORE2I7_64
| m_K8
| m_AMDFAM10
| m_BDVER
| m_BTVER1
| m_GENERIC64
,
2130 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
2134 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
2135 but one byte longer. */
2138 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
2139 operand that cannot be represented using a modRM byte. The XOR
2140 replacement is long decoded, so this split helps here as well. */
2143 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
2145 m_CORE2I7
| m_AMDFAM10
| m_GENERIC
,
2147 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
2148 from integer to FP. */
2151 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
2152 with a subsequent conditional jump instruction into a single
2153 compare-and-branch uop. */
2156 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
2157 will impact LEA instruction selection. */
2160 /* X86_TUNE_VECTORIZE_DOUBLE: Enable double precision vector
2164 /* X86_SOFTARE_PREFETCHING_BENEFICIAL: Enable software prefetching
2165 at -O3. For the moment, the prefetching seems badly tuned for Intel
2167 m_K6_GEODE
| m_AMD_MULTIPLE
,
2169 /* X86_TUNE_AVX128_OPTIMAL: Enable 128-bit AVX instruction generation for
2170 the auto-vectorizer. */
2173 /* X86_TUNE_REASSOC_INT_TO_PARALLEL: Try to produce parallel computations
2174 during reassociation of integer computation. */
2177 /* X86_TUNE_REASSOC_FP_TO_PARALLEL: Try to produce parallel computations
2178 during reassociation of fp computation. */
2182 /* Feature tests against the various architecture variations. */
2183 unsigned char ix86_arch_features
[X86_ARCH_LAST
];
2185 /* Feature tests against the various architecture variations, used to create
2186 ix86_arch_features based on the processor mask. */
2187 static unsigned int initial_ix86_arch_features
[X86_ARCH_LAST
] = {
2188 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
2189 ~(m_386
| m_486
| m_PENT
| m_K6
),
2191 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2194 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2197 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2200 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2204 static const unsigned int x86_accumulate_outgoing_args
2205 = m_PPRO
| m_P4_NOCONA
| m_ATOM
| m_CORE2I7
| m_AMD_MULTIPLE
| m_GENERIC
;
2207 static const unsigned int x86_arch_always_fancy_math_387
2208 = m_PENT
| m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
;
2210 static const unsigned int x86_avx256_split_unaligned_load
2211 = m_COREI7
| m_GENERIC
;
2213 static const unsigned int x86_avx256_split_unaligned_store
2214 = m_COREI7
| m_BDVER
| m_GENERIC
;
2216 /* In case the average insn count for single function invocation is
2217 lower than this constant, emit fast (but longer) prologue and
2219 #define FAST_PROLOGUE_INSN_COUNT 20
2221 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2222 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
;
2223 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
2224 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
;
2226 /* Array of the smallest class containing reg number REGNO, indexed by
2227 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2229 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
2231 /* ax, dx, cx, bx */
2232 AREG
, DREG
, CREG
, BREG
,
2233 /* si, di, bp, sp */
2234 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
2236 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
2237 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
2240 /* flags, fpsr, fpcr, frame */
2241 NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
2243 SSE_FIRST_REG
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
2246 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
2249 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
2250 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
2251 /* SSE REX registers */
2252 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
2256 /* The "default" register map used in 32bit mode. */
2258 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
2260 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2261 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2262 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2263 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2264 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2265 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2266 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2269 /* The "default" register map used in 64bit mode. */
2271 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
2273 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2274 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2275 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2276 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2277 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2278 8,9,10,11,12,13,14,15, /* extended integer registers */
2279 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2282 /* Define the register numbers to be used in Dwarf debugging information.
2283 The SVR4 reference port C compiler uses the following register numbers
2284 in its Dwarf output code:
2285 0 for %eax (gcc regno = 0)
2286 1 for %ecx (gcc regno = 2)
2287 2 for %edx (gcc regno = 1)
2288 3 for %ebx (gcc regno = 3)
2289 4 for %esp (gcc regno = 7)
2290 5 for %ebp (gcc regno = 6)
2291 6 for %esi (gcc regno = 4)
2292 7 for %edi (gcc regno = 5)
2293 The following three DWARF register numbers are never generated by
2294 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2295 believes these numbers have these meanings.
2296 8 for %eip (no gcc equivalent)
2297 9 for %eflags (gcc regno = 17)
2298 10 for %trapno (no gcc equivalent)
2299 It is not at all clear how we should number the FP stack registers
2300 for the x86 architecture. If the version of SDB on x86/svr4 were
2301 a bit less brain dead with respect to floating-point then we would
2302 have a precedent to follow with respect to DWARF register numbers
2303 for x86 FP registers, but the SDB on x86/svr4 is so completely
2304 broken with respect to FP registers that it is hardly worth thinking
2305 of it as something to strive for compatibility with.
2306 The version of x86/svr4 SDB I have at the moment does (partially)
2307 seem to believe that DWARF register number 11 is associated with
2308 the x86 register %st(0), but that's about all. Higher DWARF
2309 register numbers don't seem to be associated with anything in
2310 particular, and even for DWARF regno 11, SDB only seems to under-
2311 stand that it should say that a variable lives in %st(0) (when
2312 asked via an `=' command) if we said it was in DWARF regno 11,
2313 but SDB still prints garbage when asked for the value of the
2314 variable in question (via a `/' command).
2315 (Also note that the labels SDB prints for various FP stack regs
2316 when doing an `x' command are all wrong.)
2317 Note that these problems generally don't affect the native SVR4
2318 C compiler because it doesn't allow the use of -O with -g and
2319 because when it is *not* optimizing, it allocates a memory
2320 location for each floating-point variable, and the memory
2321 location is what gets described in the DWARF AT_location
2322 attribute for the variable in question.
2323 Regardless of the severe mental illness of the x86/svr4 SDB, we
2324 do something sensible here and we use the following DWARF
2325 register numbers. Note that these are all stack-top-relative
2327 11 for %st(0) (gcc regno = 8)
2328 12 for %st(1) (gcc regno = 9)
2329 13 for %st(2) (gcc regno = 10)
2330 14 for %st(3) (gcc regno = 11)
2331 15 for %st(4) (gcc regno = 12)
2332 16 for %st(5) (gcc regno = 13)
2333 17 for %st(6) (gcc regno = 14)
2334 18 for %st(7) (gcc regno = 15)
2336 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
2338 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2339 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2340 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2341 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2342 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2343 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2344 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2347 /* Define parameter passing and return registers. */
2349 static int const x86_64_int_parameter_registers
[6] =
2351 DI_REG
, SI_REG
, DX_REG
, CX_REG
, R8_REG
, R9_REG
2354 static int const x86_64_ms_abi_int_parameter_registers
[4] =
2356 CX_REG
, DX_REG
, R8_REG
, R9_REG
2359 static int const x86_64_int_return_registers
[4] =
2361 AX_REG
, DX_REG
, DI_REG
, SI_REG
2364 /* Define the structure for the machine field in struct function. */
2366 struct GTY(()) stack_local_entry
{
2367 unsigned short mode
;
2370 struct stack_local_entry
*next
;
2373 /* Structure describing stack frame layout.
2374 Stack grows downward:
2380 saved static chain if ix86_static_chain_on_stack
2382 saved frame pointer if frame_pointer_needed
2383 <- HARD_FRAME_POINTER
2389 <- sse_regs_save_offset
2392 [va_arg registers] |
2396 [padding2] | = to_allocate
2405 int outgoing_arguments_size
;
2406 HOST_WIDE_INT frame
;
2408 /* The offsets relative to ARG_POINTER. */
2409 HOST_WIDE_INT frame_pointer_offset
;
2410 HOST_WIDE_INT hard_frame_pointer_offset
;
2411 HOST_WIDE_INT stack_pointer_offset
;
2412 HOST_WIDE_INT hfp_save_offset
;
2413 HOST_WIDE_INT reg_save_offset
;
2414 HOST_WIDE_INT sse_reg_save_offset
;
2416 /* When save_regs_using_mov is set, emit prologue using
2417 move instead of push instructions. */
2418 bool save_regs_using_mov
;
2421 /* Which cpu are we scheduling for. */
2422 enum attr_cpu ix86_schedule
;
2424 /* Which cpu are we optimizing for. */
2425 enum processor_type ix86_tune
;
2427 /* Which instruction set architecture to use. */
2428 enum processor_type ix86_arch
;
2430 /* true if sse prefetch instruction is not NOOP. */
2431 int x86_prefetch_sse
;
2433 /* -mstackrealign option */
2434 static const char ix86_force_align_arg_pointer_string
[]
2435 = "force_align_arg_pointer";
2437 static rtx (*ix86_gen_leave
) (void);
2438 static rtx (*ix86_gen_add3
) (rtx
, rtx
, rtx
);
2439 static rtx (*ix86_gen_sub3
) (rtx
, rtx
, rtx
);
2440 static rtx (*ix86_gen_sub3_carry
) (rtx
, rtx
, rtx
, rtx
, rtx
);
2441 static rtx (*ix86_gen_one_cmpl2
) (rtx
, rtx
);
2442 static rtx (*ix86_gen_monitor
) (rtx
, rtx
, rtx
);
2443 static rtx (*ix86_gen_andsp
) (rtx
, rtx
, rtx
);
2444 static rtx (*ix86_gen_allocate_stack_worker
) (rtx
, rtx
);
2445 static rtx (*ix86_gen_adjust_stack_and_probe
) (rtx
, rtx
, rtx
);
2446 static rtx (*ix86_gen_probe_stack_range
) (rtx
, rtx
, rtx
);
2447 static rtx (*ix86_gen_tls_global_dynamic_64
) (rtx
, rtx
, rtx
);
2448 static rtx (*ix86_gen_tls_local_dynamic_base_64
) (rtx
, rtx
);
2450 /* Preferred alignment for stack boundary in bits. */
2451 unsigned int ix86_preferred_stack_boundary
;
2453 /* Alignment for incoming stack boundary in bits specified at
2455 static unsigned int ix86_user_incoming_stack_boundary
;
2457 /* Default alignment for incoming stack boundary in bits. */
2458 static unsigned int ix86_default_incoming_stack_boundary
;
2460 /* Alignment for incoming stack boundary in bits. */
2461 unsigned int ix86_incoming_stack_boundary
;
2463 /* Calling abi specific va_list type nodes. */
2464 static GTY(()) tree sysv_va_list_type_node
;
2465 static GTY(()) tree ms_va_list_type_node
;
2467 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2468 char internal_label_prefix
[16];
2469 int internal_label_prefix_len
;
2471 /* Fence to use after loop using movnt. */
2474 /* Register class used for passing given 64bit part of the argument.
2475 These represent classes as documented by the PS ABI, with the exception
2476 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2477 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2479 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2480 whenever possible (upper half does contain padding). */
2481 enum x86_64_reg_class
2484 X86_64_INTEGER_CLASS
,
2485 X86_64_INTEGERSI_CLASS
,
2492 X86_64_COMPLEX_X87_CLASS
,
2496 #define MAX_CLASSES 4
2498 /* Table of constants used by fldpi, fldln2, etc.... */
2499 static REAL_VALUE_TYPE ext_80387_constants_table
[5];
2500 static bool ext_80387_constants_init
= 0;
2503 static struct machine_function
* ix86_init_machine_status (void);
2504 static rtx
ix86_function_value (const_tree
, const_tree
, bool);
2505 static bool ix86_function_value_regno_p (const unsigned int);
2506 static unsigned int ix86_function_arg_boundary (enum machine_mode
,
2508 static rtx
ix86_static_chain (const_tree
, bool);
2509 static int ix86_function_regparm (const_tree
, const_tree
);
2510 static void ix86_compute_frame_layout (struct ix86_frame
*);
2511 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode
,
2513 static void ix86_add_new_builtins (HOST_WIDE_INT
);
2514 static tree
ix86_canonical_va_list_type (tree
);
2515 static void predict_jump (int);
2516 static unsigned int split_stack_prologue_scratch_regno (void);
2517 static bool i386_asm_output_addr_const_extra (FILE *, rtx
);
2519 enum ix86_function_specific_strings
2521 IX86_FUNCTION_SPECIFIC_ARCH
,
2522 IX86_FUNCTION_SPECIFIC_TUNE
,
2523 IX86_FUNCTION_SPECIFIC_MAX
2526 static char *ix86_target_string (HOST_WIDE_INT
, int, const char *,
2527 const char *, enum fpmath_unit
, bool);
2528 static void ix86_debug_options (void) ATTRIBUTE_UNUSED
;
2529 static void ix86_function_specific_save (struct cl_target_option
*);
2530 static void ix86_function_specific_restore (struct cl_target_option
*);
2531 static void ix86_function_specific_print (FILE *, int,
2532 struct cl_target_option
*);
2533 static bool ix86_valid_target_attribute_p (tree
, tree
, tree
, int);
2534 static bool ix86_valid_target_attribute_inner_p (tree
, char *[],
2535 struct gcc_options
*);
2536 static bool ix86_can_inline_p (tree
, tree
);
2537 static void ix86_set_current_function (tree
);
2538 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2540 static enum calling_abi
ix86_function_abi (const_tree
);
2543 #ifndef SUBTARGET32_DEFAULT_CPU
2544 #define SUBTARGET32_DEFAULT_CPU "i386"
2547 /* The svr4 ABI for the i386 says that records and unions are returned
2549 #ifndef DEFAULT_PCC_STRUCT_RETURN
2550 #define DEFAULT_PCC_STRUCT_RETURN 1
2553 /* Whether -mtune= or -march= were specified */
2554 static int ix86_tune_defaulted
;
2555 static int ix86_arch_specified
;
2557 /* Vectorization library interface and handlers. */
2558 static tree (*ix86_veclib_handler
) (enum built_in_function
, tree
, tree
);
2560 static tree
ix86_veclibabi_svml (enum built_in_function
, tree
, tree
);
2561 static tree
ix86_veclibabi_acml (enum built_in_function
, tree
, tree
);
2563 /* Processor target table, indexed by processor number */
2566 const struct processor_costs
*cost
; /* Processor costs */
2567 const int align_loop
; /* Default alignments. */
2568 const int align_loop_max_skip
;
2569 const int align_jump
;
2570 const int align_jump_max_skip
;
2571 const int align_func
;
2574 static const struct ptt processor_target_table
[PROCESSOR_max
] =
2576 {&i386_cost
, 4, 3, 4, 3, 4},
2577 {&i486_cost
, 16, 15, 16, 15, 16},
2578 {&pentium_cost
, 16, 7, 16, 7, 16},
2579 {&pentiumpro_cost
, 16, 15, 16, 10, 16},
2580 {&geode_cost
, 0, 0, 0, 0, 0},
2581 {&k6_cost
, 32, 7, 32, 7, 32},
2582 {&athlon_cost
, 16, 7, 16, 7, 16},
2583 {&pentium4_cost
, 0, 0, 0, 0, 0},
2584 {&k8_cost
, 16, 7, 16, 7, 16},
2585 {&nocona_cost
, 0, 0, 0, 0, 0},
2586 /* Core 2 32-bit. */
2587 {&generic32_cost
, 16, 10, 16, 10, 16},
2588 /* Core 2 64-bit. */
2589 {&generic64_cost
, 16, 10, 16, 10, 16},
2590 /* Core i7 32-bit. */
2591 {&generic32_cost
, 16, 10, 16, 10, 16},
2592 /* Core i7 64-bit. */
2593 {&generic64_cost
, 16, 10, 16, 10, 16},
2594 {&generic32_cost
, 16, 7, 16, 7, 16},
2595 {&generic64_cost
, 16, 10, 16, 10, 16},
2596 {&amdfam10_cost
, 32, 24, 32, 7, 32},
2597 {&bdver1_cost
, 32, 24, 32, 7, 32},
2598 {&bdver2_cost
, 32, 24, 32, 7, 32},
2599 {&btver1_cost
, 32, 24, 32, 7, 32},
2600 {&atom_cost
, 16, 15, 16, 7, 16}
2603 static const char *const cpu_names
[TARGET_CPU_DEFAULT_max
] =
2633 /* Return true if a red-zone is in use. */
2636 ix86_using_red_zone (void)
2638 return TARGET_RED_ZONE
&& !TARGET_64BIT_MS_ABI
;
2641 /* Return a string that documents the current -m options. The caller is
2642 responsible for freeing the string. */
2645 ix86_target_string (HOST_WIDE_INT isa
, int flags
, const char *arch
,
2646 const char *tune
, enum fpmath_unit fpmath
,
2649 struct ix86_target_opts
2651 const char *option
; /* option string */
2652 HOST_WIDE_INT mask
; /* isa mask options */
2655 /* This table is ordered so that options like -msse4.2 that imply
2656 preceding options while match those first. */
2657 static struct ix86_target_opts isa_opts
[] =
2659 { "-m64", OPTION_MASK_ISA_64BIT
},
2660 { "-mfma4", OPTION_MASK_ISA_FMA4
},
2661 { "-mfma", OPTION_MASK_ISA_FMA
},
2662 { "-mxop", OPTION_MASK_ISA_XOP
},
2663 { "-mlwp", OPTION_MASK_ISA_LWP
},
2664 { "-msse4a", OPTION_MASK_ISA_SSE4A
},
2665 { "-msse4.2", OPTION_MASK_ISA_SSE4_2
},
2666 { "-msse4.1", OPTION_MASK_ISA_SSE4_1
},
2667 { "-mssse3", OPTION_MASK_ISA_SSSE3
},
2668 { "-msse3", OPTION_MASK_ISA_SSE3
},
2669 { "-msse2", OPTION_MASK_ISA_SSE2
},
2670 { "-msse", OPTION_MASK_ISA_SSE
},
2671 { "-m3dnow", OPTION_MASK_ISA_3DNOW
},
2672 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A
},
2673 { "-mmmx", OPTION_MASK_ISA_MMX
},
2674 { "-mabm", OPTION_MASK_ISA_ABM
},
2675 { "-mbmi", OPTION_MASK_ISA_BMI
},
2676 { "-mbmi2", OPTION_MASK_ISA_BMI2
},
2677 { "-mlzcnt", OPTION_MASK_ISA_LZCNT
},
2678 { "-mtbm", OPTION_MASK_ISA_TBM
},
2679 { "-mpopcnt", OPTION_MASK_ISA_POPCNT
},
2680 { "-mmovbe", OPTION_MASK_ISA_MOVBE
},
2681 { "-mcrc32", OPTION_MASK_ISA_CRC32
},
2682 { "-maes", OPTION_MASK_ISA_AES
},
2683 { "-mpclmul", OPTION_MASK_ISA_PCLMUL
},
2684 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE
},
2685 { "-mrdrnd", OPTION_MASK_ISA_RDRND
},
2686 { "-mf16c", OPTION_MASK_ISA_F16C
},
2687 { "-mrtm", OPTION_MASK_ISA_RTM
},
2691 static struct ix86_target_opts flag_opts
[] =
2693 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE
},
2694 { "-m80387", MASK_80387
},
2695 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS
},
2696 { "-malign-double", MASK_ALIGN_DOUBLE
},
2697 { "-mcld", MASK_CLD
},
2698 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS
},
2699 { "-mieee-fp", MASK_IEEE_FP
},
2700 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS
},
2701 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY
},
2702 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT
},
2703 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS
},
2704 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387
},
2705 { "-mno-push-args", MASK_NO_PUSH_ARGS
},
2706 { "-mno-red-zone", MASK_NO_RED_ZONE
},
2707 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER
},
2708 { "-mrecip", MASK_RECIP
},
2709 { "-mrtd", MASK_RTD
},
2710 { "-msseregparm", MASK_SSEREGPARM
},
2711 { "-mstack-arg-probe", MASK_STACK_PROBE
},
2712 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS
},
2713 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS
},
2714 { "-m8bit-idiv", MASK_USE_8BIT_IDIV
},
2715 { "-mvzeroupper", MASK_VZEROUPPER
},
2716 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD
},
2717 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE
},
2718 { "-mprefer-avx128", MASK_PREFER_AVX128
},
2721 const char *opts
[ARRAY_SIZE (isa_opts
) + ARRAY_SIZE (flag_opts
) + 6][2];
2724 char target_other
[40];
2733 memset (opts
, '\0', sizeof (opts
));
2735 /* Add -march= option. */
2738 opts
[num
][0] = "-march=";
2739 opts
[num
++][1] = arch
;
2742 /* Add -mtune= option. */
2745 opts
[num
][0] = "-mtune=";
2746 opts
[num
++][1] = tune
;
2749 /* Pick out the options in isa options. */
2750 for (i
= 0; i
< ARRAY_SIZE (isa_opts
); i
++)
2752 if ((isa
& isa_opts
[i
].mask
) != 0)
2754 opts
[num
++][0] = isa_opts
[i
].option
;
2755 isa
&= ~ isa_opts
[i
].mask
;
2759 if (isa
&& add_nl_p
)
2761 opts
[num
++][0] = isa_other
;
2762 sprintf (isa_other
, "(other isa: %#" HOST_WIDE_INT_PRINT
"x)",
2766 /* Add flag options. */
2767 for (i
= 0; i
< ARRAY_SIZE (flag_opts
); i
++)
2769 if ((flags
& flag_opts
[i
].mask
) != 0)
2771 opts
[num
++][0] = flag_opts
[i
].option
;
2772 flags
&= ~ flag_opts
[i
].mask
;
2776 if (flags
&& add_nl_p
)
2778 opts
[num
++][0] = target_other
;
2779 sprintf (target_other
, "(other flags: %#x)", flags
);
2782 /* Add -fpmath= option. */
2785 opts
[num
][0] = "-mfpmath=";
2786 switch ((int) fpmath
)
2789 opts
[num
++][1] = "387";
2793 opts
[num
++][1] = "sse";
2796 case FPMATH_387
| FPMATH_SSE
:
2797 opts
[num
++][1] = "sse+387";
2809 gcc_assert (num
< ARRAY_SIZE (opts
));
2811 /* Size the string. */
2813 sep_len
= (add_nl_p
) ? 3 : 1;
2814 for (i
= 0; i
< num
; i
++)
2817 for (j
= 0; j
< 2; j
++)
2819 len
+= strlen (opts
[i
][j
]);
2822 /* Build the string. */
2823 ret
= ptr
= (char *) xmalloc (len
);
2826 for (i
= 0; i
< num
; i
++)
2830 for (j
= 0; j
< 2; j
++)
2831 len2
[j
] = (opts
[i
][j
]) ? strlen (opts
[i
][j
]) : 0;
2838 if (add_nl_p
&& line_len
+ len2
[0] + len2
[1] > 70)
2846 for (j
= 0; j
< 2; j
++)
2849 memcpy (ptr
, opts
[i
][j
], len2
[j
]);
2851 line_len
+= len2
[j
];
2856 gcc_assert (ret
+ len
>= ptr
);
2861 /* Return true, if profiling code should be emitted before
2862 prologue. Otherwise it returns false.
2863 Note: For x86 with "hotfix" it is sorried. */
2865 ix86_profile_before_prologue (void)
2867 return flag_fentry
!= 0;
2870 /* Function that is callable from the debugger to print the current
2873 ix86_debug_options (void)
2875 char *opts
= ix86_target_string (ix86_isa_flags
, target_flags
,
2876 ix86_arch_string
, ix86_tune_string
,
2881 fprintf (stderr
, "%s\n\n", opts
);
2885 fputs ("<no options>\n\n", stderr
);
2890 /* Override various settings based on options. If MAIN_ARGS_P, the
2891 options are from the command line, otherwise they are from
2895 ix86_option_override_internal (bool main_args_p
)
2898 unsigned int ix86_arch_mask
, ix86_tune_mask
;
2899 const bool ix86_tune_specified
= (ix86_tune_string
!= NULL
);
2904 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
2905 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
2906 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
2907 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
2908 #define PTA_AES (HOST_WIDE_INT_1 << 4)
2909 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
2910 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
2911 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
2912 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
2913 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
2914 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
2915 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
2916 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
2917 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
2918 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
2919 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
2920 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
2921 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
2922 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
2923 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
2924 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
2925 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
2926 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
2927 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
2928 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
2929 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
2930 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
2931 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
2932 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
2933 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
2934 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
2935 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
2936 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
2937 /* if this reaches 64, need to widen struct pta flags below */
2941 const char *const name
; /* processor name or nickname. */
2942 const enum processor_type processor
;
2943 const enum attr_cpu schedule
;
2944 const unsigned HOST_WIDE_INT flags
;
2946 const processor_alias_table
[] =
2948 {"i386", PROCESSOR_I386
, CPU_NONE
, 0},
2949 {"i486", PROCESSOR_I486
, CPU_NONE
, 0},
2950 {"i586", PROCESSOR_PENTIUM
, CPU_PENTIUM
, 0},
2951 {"pentium", PROCESSOR_PENTIUM
, CPU_PENTIUM
, 0},
2952 {"pentium-mmx", PROCESSOR_PENTIUM
, CPU_PENTIUM
, PTA_MMX
},
2953 {"winchip-c6", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
},
2954 {"winchip2", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
| PTA_3DNOW
},
2955 {"c3", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
| PTA_3DNOW
},
2956 {"c3-2", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, PTA_MMX
| PTA_SSE
},
2957 {"i686", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, 0},
2958 {"pentiumpro", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, 0},
2959 {"pentium2", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, PTA_MMX
},
2960 {"pentium3", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
2962 {"pentium3m", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
2964 {"pentium-m", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
2965 PTA_MMX
| PTA_SSE
| PTA_SSE2
},
2966 {"pentium4", PROCESSOR_PENTIUM4
, CPU_NONE
,
2967 PTA_MMX
|PTA_SSE
| PTA_SSE2
},
2968 {"pentium4m", PROCESSOR_PENTIUM4
, CPU_NONE
,
2969 PTA_MMX
| PTA_SSE
| PTA_SSE2
},
2970 {"prescott", PROCESSOR_NOCONA
, CPU_NONE
,
2971 PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
},
2972 {"nocona", PROCESSOR_NOCONA
, CPU_NONE
,
2973 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2974 | PTA_CX16
| PTA_NO_SAHF
},
2975 {"core2", PROCESSOR_CORE2_64
, CPU_CORE2
,
2976 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2977 | PTA_SSSE3
| PTA_CX16
},
2978 {"corei7", PROCESSOR_COREI7_64
, CPU_COREI7
,
2979 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2980 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_CX16
},
2981 {"corei7-avx", PROCESSOR_COREI7_64
, CPU_COREI7
,
2982 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2983 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AVX
2984 | PTA_CX16
| PTA_POPCNT
| PTA_AES
| PTA_PCLMUL
},
2985 {"core-avx-i", PROCESSOR_COREI7_64
, CPU_COREI7
,
2986 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2987 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AVX
2988 | PTA_CX16
| PTA_POPCNT
| PTA_AES
| PTA_PCLMUL
| PTA_FSGSBASE
2989 | PTA_RDRND
| PTA_F16C
},
2990 {"core-avx2", PROCESSOR_COREI7_64
, CPU_COREI7
,
2991 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2992 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AVX
| PTA_AVX2
2993 | PTA_CX16
| PTA_POPCNT
| PTA_AES
| PTA_PCLMUL
| PTA_FSGSBASE
2994 | PTA_RDRND
| PTA_F16C
| PTA_BMI
| PTA_BMI2
| PTA_LZCNT
2995 | PTA_FMA
| PTA_MOVBE
| PTA_RTM
},
2996 {"atom", PROCESSOR_ATOM
, CPU_ATOM
,
2997 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2998 | PTA_SSSE3
| PTA_CX16
| PTA_MOVBE
},
2999 {"geode", PROCESSOR_GEODE
, CPU_GEODE
,
3000 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
|PTA_PREFETCH_SSE
},
3001 {"k6", PROCESSOR_K6
, CPU_K6
, PTA_MMX
},
3002 {"k6-2", PROCESSOR_K6
, CPU_K6
, PTA_MMX
| PTA_3DNOW
},
3003 {"k6-3", PROCESSOR_K6
, CPU_K6
, PTA_MMX
| PTA_3DNOW
},
3004 {"athlon", PROCESSOR_ATHLON
, CPU_ATHLON
,
3005 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_PREFETCH_SSE
},
3006 {"athlon-tbird", PROCESSOR_ATHLON
, CPU_ATHLON
,
3007 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_PREFETCH_SSE
},
3008 {"athlon-4", PROCESSOR_ATHLON
, CPU_ATHLON
,
3009 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
},
3010 {"athlon-xp", PROCESSOR_ATHLON
, CPU_ATHLON
,
3011 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
},
3012 {"athlon-mp", PROCESSOR_ATHLON
, CPU_ATHLON
,
3013 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
},
3014 {"x86-64", PROCESSOR_K8
, CPU_K8
,
3015 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_NO_SAHF
},
3016 {"k8", PROCESSOR_K8
, CPU_K8
,
3017 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3018 | PTA_SSE2
| PTA_NO_SAHF
},
3019 {"k8-sse3", PROCESSOR_K8
, CPU_K8
,
3020 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3021 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
},
3022 {"opteron", PROCESSOR_K8
, CPU_K8
,
3023 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3024 | PTA_SSE2
| PTA_NO_SAHF
},
3025 {"opteron-sse3", PROCESSOR_K8
, CPU_K8
,
3026 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3027 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
},
3028 {"athlon64", PROCESSOR_K8
, CPU_K8
,
3029 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3030 | PTA_SSE2
| PTA_NO_SAHF
},
3031 {"athlon64-sse3", PROCESSOR_K8
, CPU_K8
,
3032 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3033 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
},
3034 {"athlon-fx", PROCESSOR_K8
, CPU_K8
,
3035 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3036 | PTA_SSE2
| PTA_NO_SAHF
},
3037 {"amdfam10", PROCESSOR_AMDFAM10
, CPU_AMDFAM10
,
3038 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3039 | PTA_SSE2
| PTA_SSE3
| PTA_SSE4A
| PTA_CX16
| PTA_ABM
},
3040 {"barcelona", PROCESSOR_AMDFAM10
, CPU_AMDFAM10
,
3041 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3042 | PTA_SSE2
| PTA_SSE3
| PTA_SSE4A
| PTA_CX16
| PTA_ABM
},
3043 {"bdver1", PROCESSOR_BDVER1
, CPU_BDVER1
,
3044 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3045 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
3046 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
| PTA_FMA4
3047 | PTA_XOP
| PTA_LWP
},
3048 {"bdver2", PROCESSOR_BDVER2
, CPU_BDVER2
,
3049 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3050 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
3051 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
3052 | PTA_XOP
| PTA_LWP
| PTA_BMI
| PTA_TBM
| PTA_F16C
3054 {"btver1", PROCESSOR_BTVER1
, CPU_GENERIC64
,
3055 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3056 | PTA_SSSE3
| PTA_SSE4A
|PTA_ABM
| PTA_CX16
},
3057 {"generic32", PROCESSOR_GENERIC32
, CPU_PENTIUMPRO
,
3058 0 /* flags are only used for -march switch. */ },
3059 {"generic64", PROCESSOR_GENERIC64
, CPU_GENERIC64
,
3060 PTA_64BIT
/* flags are only used for -march switch. */ },
3063 /* -mrecip options. */
3066 const char *string
; /* option name */
3067 unsigned int mask
; /* mask bits to set */
3069 const recip_options
[] =
3071 { "all", RECIP_MASK_ALL
},
3072 { "none", RECIP_MASK_NONE
},
3073 { "div", RECIP_MASK_DIV
},
3074 { "sqrt", RECIP_MASK_SQRT
},
3075 { "vec-div", RECIP_MASK_VEC_DIV
},
3076 { "vec-sqrt", RECIP_MASK_VEC_SQRT
},
3079 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
3081 /* Set up prefix/suffix so the error messages refer to either the command
3082 line argument, or the attribute(target). */
3091 prefix
= "option(\"";
3096 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3097 SUBTARGET_OVERRIDE_OPTIONS
;
3100 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3101 SUBSUBTARGET_OVERRIDE_OPTIONS
;
3105 ix86_isa_flags
|= OPTION_MASK_ISA_64BIT
;
3107 /* -fPIC is the default for x86_64. */
3108 if (TARGET_MACHO
&& TARGET_64BIT
)
3111 /* Need to check -mtune=generic first. */
3112 if (ix86_tune_string
)
3114 if (!strcmp (ix86_tune_string
, "generic")
3115 || !strcmp (ix86_tune_string
, "i686")
3116 /* As special support for cross compilers we read -mtune=native
3117 as -mtune=generic. With native compilers we won't see the
3118 -mtune=native, as it was changed by the driver. */
3119 || !strcmp (ix86_tune_string
, "native"))
3122 ix86_tune_string
= "generic64";
3124 ix86_tune_string
= "generic32";
3126 /* If this call is for setting the option attribute, allow the
3127 generic32/generic64 that was previously set. */
3128 else if (!main_args_p
3129 && (!strcmp (ix86_tune_string
, "generic32")
3130 || !strcmp (ix86_tune_string
, "generic64")))
3132 else if (!strncmp (ix86_tune_string
, "generic", 7))
3133 error ("bad value (%s) for %stune=%s %s",
3134 ix86_tune_string
, prefix
, suffix
, sw
);
3135 else if (!strcmp (ix86_tune_string
, "x86-64"))
3136 warning (OPT_Wdeprecated
, "%stune=x86-64%s is deprecated; use "
3137 "%stune=k8%s or %stune=generic%s instead as appropriate",
3138 prefix
, suffix
, prefix
, suffix
, prefix
, suffix
);
3142 if (ix86_arch_string
)
3143 ix86_tune_string
= ix86_arch_string
;
3144 if (!ix86_tune_string
)
3146 ix86_tune_string
= cpu_names
[TARGET_CPU_DEFAULT
];
3147 ix86_tune_defaulted
= 1;
3150 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
3151 need to use a sensible tune option. */
3152 if (!strcmp (ix86_tune_string
, "generic")
3153 || !strcmp (ix86_tune_string
, "x86-64")
3154 || !strcmp (ix86_tune_string
, "i686"))
3157 ix86_tune_string
= "generic64";
3159 ix86_tune_string
= "generic32";
3163 if (ix86_stringop_alg
== rep_prefix_8_byte
&& !TARGET_64BIT
)
3165 /* rep; movq isn't available in 32-bit code. */
3166 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3167 ix86_stringop_alg
= no_stringop
;
3170 if (!ix86_arch_string
)
3171 ix86_arch_string
= TARGET_64BIT
? "x86-64" : SUBTARGET32_DEFAULT_CPU
;
3173 ix86_arch_specified
= 1;
3175 if (global_options_set
.x_ix86_pmode
)
3177 if ((TARGET_LP64
&& ix86_pmode
== PMODE_SI
)
3178 || (!TARGET_64BIT
&& ix86_pmode
== PMODE_DI
))
3179 error ("address mode %qs not supported in the %s bit mode",
3180 TARGET_64BIT
? "short" : "long",
3181 TARGET_64BIT
? "64" : "32");
3184 ix86_pmode
= TARGET_LP64
? PMODE_DI
: PMODE_SI
;
3186 if (!global_options_set
.x_ix86_abi
)
3187 ix86_abi
= DEFAULT_ABI
;
3189 if (global_options_set
.x_ix86_cmodel
)
3191 switch (ix86_cmodel
)
3196 ix86_cmodel
= CM_SMALL_PIC
;
3198 error ("code model %qs not supported in the %s bit mode",
3205 ix86_cmodel
= CM_MEDIUM_PIC
;
3207 error ("code model %qs not supported in the %s bit mode",
3209 else if (TARGET_X32
)
3210 error ("code model %qs not supported in x32 mode",
3217 ix86_cmodel
= CM_LARGE_PIC
;
3219 error ("code model %qs not supported in the %s bit mode",
3221 else if (TARGET_X32
)
3222 error ("code model %qs not supported in x32 mode",
3228 error ("code model %s does not support PIC mode", "32");
3230 error ("code model %qs not supported in the %s bit mode",
3237 error ("code model %s does not support PIC mode", "kernel");
3238 ix86_cmodel
= CM_32
;
3241 error ("code model %qs not supported in the %s bit mode",
3251 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3252 use of rip-relative addressing. This eliminates fixups that
3253 would otherwise be needed if this object is to be placed in a
3254 DLL, and is essentially just as efficient as direct addressing. */
3255 if (TARGET_64BIT
&& DEFAULT_ABI
== MS_ABI
)
3256 ix86_cmodel
= CM_SMALL_PIC
, flag_pic
= 1;
3257 else if (TARGET_64BIT
)
3258 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
3260 ix86_cmodel
= CM_32
;
3262 if (TARGET_MACHO
&& ix86_asm_dialect
== ASM_INTEL
)
3264 error ("-masm=intel not supported in this configuration");
3265 ix86_asm_dialect
= ASM_ATT
;
3267 if ((TARGET_64BIT
!= 0) != ((ix86_isa_flags
& OPTION_MASK_ISA_64BIT
) != 0))
3268 sorry ("%i-bit mode not compiled in",
3269 (ix86_isa_flags
& OPTION_MASK_ISA_64BIT
) ? 64 : 32);
3271 for (i
= 0; i
< pta_size
; i
++)
3272 if (! strcmp (ix86_arch_string
, processor_alias_table
[i
].name
))
3274 ix86_schedule
= processor_alias_table
[i
].schedule
;
3275 ix86_arch
= processor_alias_table
[i
].processor
;
3276 /* Default cpu tuning to the architecture. */
3277 ix86_tune
= ix86_arch
;
3279 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
3280 error ("CPU you selected does not support x86-64 "
3283 if (processor_alias_table
[i
].flags
& PTA_MMX
3284 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_MMX
))
3285 ix86_isa_flags
|= OPTION_MASK_ISA_MMX
;
3286 if (processor_alias_table
[i
].flags
& PTA_3DNOW
3287 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_3DNOW
))
3288 ix86_isa_flags
|= OPTION_MASK_ISA_3DNOW
;
3289 if (processor_alias_table
[i
].flags
& PTA_3DNOW_A
3290 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_3DNOW_A
))
3291 ix86_isa_flags
|= OPTION_MASK_ISA_3DNOW_A
;
3292 if (processor_alias_table
[i
].flags
& PTA_SSE
3293 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE
))
3294 ix86_isa_flags
|= OPTION_MASK_ISA_SSE
;
3295 if (processor_alias_table
[i
].flags
& PTA_SSE2
3296 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE2
))
3297 ix86_isa_flags
|= OPTION_MASK_ISA_SSE2
;
3298 if (processor_alias_table
[i
].flags
& PTA_SSE3
3299 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE3
))
3300 ix86_isa_flags
|= OPTION_MASK_ISA_SSE3
;
3301 if (processor_alias_table
[i
].flags
& PTA_SSSE3
3302 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSSE3
))
3303 ix86_isa_flags
|= OPTION_MASK_ISA_SSSE3
;
3304 if (processor_alias_table
[i
].flags
& PTA_SSE4_1
3305 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4_1
))
3306 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_1
;
3307 if (processor_alias_table
[i
].flags
& PTA_SSE4_2
3308 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4_2
))
3309 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_2
;
3310 if (processor_alias_table
[i
].flags
& PTA_AVX
3311 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX
))
3312 ix86_isa_flags
|= OPTION_MASK_ISA_AVX
;
3313 if (processor_alias_table
[i
].flags
& PTA_AVX2
3314 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX2
))
3315 ix86_isa_flags
|= OPTION_MASK_ISA_AVX2
;
3316 if (processor_alias_table
[i
].flags
& PTA_FMA
3317 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FMA
))
3318 ix86_isa_flags
|= OPTION_MASK_ISA_FMA
;
3319 if (processor_alias_table
[i
].flags
& PTA_SSE4A
3320 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4A
))
3321 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4A
;
3322 if (processor_alias_table
[i
].flags
& PTA_FMA4
3323 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FMA4
))
3324 ix86_isa_flags
|= OPTION_MASK_ISA_FMA4
;
3325 if (processor_alias_table
[i
].flags
& PTA_XOP
3326 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_XOP
))
3327 ix86_isa_flags
|= OPTION_MASK_ISA_XOP
;
3328 if (processor_alias_table
[i
].flags
& PTA_LWP
3329 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_LWP
))
3330 ix86_isa_flags
|= OPTION_MASK_ISA_LWP
;
3331 if (processor_alias_table
[i
].flags
& PTA_ABM
3332 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_ABM
))
3333 ix86_isa_flags
|= OPTION_MASK_ISA_ABM
;
3334 if (processor_alias_table
[i
].flags
& PTA_BMI
3335 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_BMI
))
3336 ix86_isa_flags
|= OPTION_MASK_ISA_BMI
;
3337 if (processor_alias_table
[i
].flags
& (PTA_LZCNT
| PTA_ABM
)
3338 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_LZCNT
))
3339 ix86_isa_flags
|= OPTION_MASK_ISA_LZCNT
;
3340 if (processor_alias_table
[i
].flags
& PTA_TBM
3341 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_TBM
))
3342 ix86_isa_flags
|= OPTION_MASK_ISA_TBM
;
3343 if (processor_alias_table
[i
].flags
& PTA_BMI2
3344 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_BMI2
))
3345 ix86_isa_flags
|= OPTION_MASK_ISA_BMI2
;
3346 if (processor_alias_table
[i
].flags
& PTA_CX16
3347 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_CX16
))
3348 ix86_isa_flags
|= OPTION_MASK_ISA_CX16
;
3349 if (processor_alias_table
[i
].flags
& (PTA_POPCNT
| PTA_ABM
)
3350 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_POPCNT
))
3351 ix86_isa_flags
|= OPTION_MASK_ISA_POPCNT
;
3352 if (!(TARGET_64BIT
&& (processor_alias_table
[i
].flags
& PTA_NO_SAHF
))
3353 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SAHF
))
3354 ix86_isa_flags
|= OPTION_MASK_ISA_SAHF
;
3355 if (processor_alias_table
[i
].flags
& PTA_MOVBE
3356 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_MOVBE
))
3357 ix86_isa_flags
|= OPTION_MASK_ISA_MOVBE
;
3358 if (processor_alias_table
[i
].flags
& PTA_AES
3359 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_AES
))
3360 ix86_isa_flags
|= OPTION_MASK_ISA_AES
;
3361 if (processor_alias_table
[i
].flags
& PTA_PCLMUL
3362 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_PCLMUL
))
3363 ix86_isa_flags
|= OPTION_MASK_ISA_PCLMUL
;
3364 if (processor_alias_table
[i
].flags
& PTA_FSGSBASE
3365 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FSGSBASE
))
3366 ix86_isa_flags
|= OPTION_MASK_ISA_FSGSBASE
;
3367 if (processor_alias_table
[i
].flags
& PTA_RDRND
3368 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_RDRND
))
3369 ix86_isa_flags
|= OPTION_MASK_ISA_RDRND
;
3370 if (processor_alias_table
[i
].flags
& PTA_F16C
3371 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_F16C
))
3372 ix86_isa_flags
|= OPTION_MASK_ISA_F16C
;
3373 if (processor_alias_table
[i
].flags
& PTA_RTM
3374 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_RTM
))
3375 ix86_isa_flags
|= OPTION_MASK_ISA_RTM
;
3376 if (processor_alias_table
[i
].flags
& (PTA_PREFETCH_SSE
| PTA_SSE
))
3377 x86_prefetch_sse
= true;
3382 if (!strcmp (ix86_arch_string
, "generic"))
3383 error ("generic CPU can be used only for %stune=%s %s",
3384 prefix
, suffix
, sw
);
3385 else if (!strncmp (ix86_arch_string
, "generic", 7) || i
== pta_size
)
3386 error ("bad value (%s) for %sarch=%s %s",
3387 ix86_arch_string
, prefix
, suffix
, sw
);
3389 ix86_arch_mask
= 1u << ix86_arch
;
3390 for (i
= 0; i
< X86_ARCH_LAST
; ++i
)
3391 ix86_arch_features
[i
] = !!(initial_ix86_arch_features
[i
] & ix86_arch_mask
);
3393 for (i
= 0; i
< pta_size
; i
++)
3394 if (! strcmp (ix86_tune_string
, processor_alias_table
[i
].name
))
3396 ix86_schedule
= processor_alias_table
[i
].schedule
;
3397 ix86_tune
= processor_alias_table
[i
].processor
;
3400 if (!(processor_alias_table
[i
].flags
& PTA_64BIT
))
3402 if (ix86_tune_defaulted
)
3404 ix86_tune_string
= "x86-64";
3405 for (i
= 0; i
< pta_size
; i
++)
3406 if (! strcmp (ix86_tune_string
,
3407 processor_alias_table
[i
].name
))
3409 ix86_schedule
= processor_alias_table
[i
].schedule
;
3410 ix86_tune
= processor_alias_table
[i
].processor
;
3413 error ("CPU you selected does not support x86-64 "
3419 /* Adjust tuning when compiling for 32-bit ABI. */
3422 case PROCESSOR_GENERIC64
:
3423 ix86_tune
= PROCESSOR_GENERIC32
;
3424 ix86_schedule
= CPU_PENTIUMPRO
;
3427 case PROCESSOR_CORE2_64
:
3428 ix86_tune
= PROCESSOR_CORE2_32
;
3431 case PROCESSOR_COREI7_64
:
3432 ix86_tune
= PROCESSOR_COREI7_32
;
3439 /* Intel CPUs have always interpreted SSE prefetch instructions as
3440 NOPs; so, we can enable SSE prefetch instructions even when
3441 -mtune (rather than -march) points us to a processor that has them.
3442 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3443 higher processors. */
3445 && (processor_alias_table
[i
].flags
& (PTA_PREFETCH_SSE
| PTA_SSE
)))
3446 x86_prefetch_sse
= true;
3450 if (ix86_tune_specified
&& i
== pta_size
)
3451 error ("bad value (%s) for %stune=%s %s",
3452 ix86_tune_string
, prefix
, suffix
, sw
);
3454 ix86_tune_mask
= 1u << ix86_tune
;
3455 for (i
= 0; i
< X86_TUNE_LAST
; ++i
)
3456 ix86_tune_features
[i
] = !!(initial_ix86_tune_features
[i
] & ix86_tune_mask
);
3458 #ifndef USE_IX86_FRAME_POINTER
3459 #define USE_IX86_FRAME_POINTER 0
3462 #ifndef USE_X86_64_FRAME_POINTER
3463 #define USE_X86_64_FRAME_POINTER 0
3466 /* Set the default values for switches whose default depends on TARGET_64BIT
3467 in case they weren't overwritten by command line options. */
3470 if (optimize
>= 1 && !global_options_set
.x_flag_omit_frame_pointer
)
3471 flag_omit_frame_pointer
= !USE_X86_64_FRAME_POINTER
;
3472 if (flag_asynchronous_unwind_tables
== 2)
3473 flag_unwind_tables
= flag_asynchronous_unwind_tables
= 1;
3474 if (flag_pcc_struct_return
== 2)
3475 flag_pcc_struct_return
= 0;
3479 if (optimize
>= 1 && !global_options_set
.x_flag_omit_frame_pointer
)
3480 flag_omit_frame_pointer
= !(USE_IX86_FRAME_POINTER
|| optimize_size
);
3481 if (flag_asynchronous_unwind_tables
== 2)
3482 flag_asynchronous_unwind_tables
= !USE_IX86_FRAME_POINTER
;
3483 if (flag_pcc_struct_return
== 2)
3484 flag_pcc_struct_return
= DEFAULT_PCC_STRUCT_RETURN
;
3488 ix86_cost
= &ix86_size_cost
;
3490 ix86_cost
= processor_target_table
[ix86_tune
].cost
;
3492 /* Arrange to set up i386_stack_locals for all functions. */
3493 init_machine_status
= ix86_init_machine_status
;
3495 /* Validate -mregparm= value. */
3496 if (global_options_set
.x_ix86_regparm
)
3499 warning (0, "-mregparm is ignored in 64-bit mode");
3500 if (ix86_regparm
> REGPARM_MAX
)
3502 error ("-mregparm=%d is not between 0 and %d",
3503 ix86_regparm
, REGPARM_MAX
);
3508 ix86_regparm
= REGPARM_MAX
;
3510 /* Default align_* from the processor table. */
3511 if (align_loops
== 0)
3513 align_loops
= processor_target_table
[ix86_tune
].align_loop
;
3514 align_loops_max_skip
= processor_target_table
[ix86_tune
].align_loop_max_skip
;
3516 if (align_jumps
== 0)
3518 align_jumps
= processor_target_table
[ix86_tune
].align_jump
;
3519 align_jumps_max_skip
= processor_target_table
[ix86_tune
].align_jump_max_skip
;
3521 if (align_functions
== 0)
3523 align_functions
= processor_target_table
[ix86_tune
].align_func
;
3526 /* Provide default for -mbranch-cost= value. */
3527 if (!global_options_set
.x_ix86_branch_cost
)
3528 ix86_branch_cost
= ix86_cost
->branch_cost
;
3532 target_flags
|= TARGET_SUBTARGET64_DEFAULT
& ~target_flags_explicit
;
3534 /* Enable by default the SSE and MMX builtins. Do allow the user to
3535 explicitly disable any of these. In particular, disabling SSE and
3536 MMX for kernel code is extremely useful. */
3537 if (!ix86_arch_specified
)
3539 |= ((OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_MMX
3540 | TARGET_SUBTARGET64_ISA_DEFAULT
) & ~ix86_isa_flags_explicit
);
3543 warning (0, "%srtd%s is ignored in 64bit mode", prefix
, suffix
);
3547 target_flags
|= TARGET_SUBTARGET32_DEFAULT
& ~target_flags_explicit
;
3549 if (!ix86_arch_specified
)
3551 |= TARGET_SUBTARGET32_ISA_DEFAULT
& ~ix86_isa_flags_explicit
;
3553 /* i386 ABI does not specify red zone. It still makes sense to use it
3554 when programmer takes care to stack from being destroyed. */
3555 if (!(target_flags_explicit
& MASK_NO_RED_ZONE
))
3556 target_flags
|= MASK_NO_RED_ZONE
;
3559 /* Keep nonleaf frame pointers. */
3560 if (flag_omit_frame_pointer
)
3561 target_flags
&= ~MASK_OMIT_LEAF_FRAME_POINTER
;
3562 else if (TARGET_OMIT_LEAF_FRAME_POINTER
)
3563 flag_omit_frame_pointer
= 1;
3565 /* If we're doing fast math, we don't care about comparison order
3566 wrt NaNs. This lets us use a shorter comparison sequence. */
3567 if (flag_finite_math_only
)
3568 target_flags
&= ~MASK_IEEE_FP
;
3570 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3571 since the insns won't need emulation. */
3572 if (x86_arch_always_fancy_math_387
& ix86_arch_mask
)
3573 target_flags
&= ~MASK_NO_FANCY_MATH_387
;
3575 /* Likewise, if the target doesn't have a 387, or we've specified
3576 software floating point, don't use 387 inline intrinsics. */
3578 target_flags
|= MASK_NO_FANCY_MATH_387
;
3580 /* Turn on MMX builtins for -msse. */
3583 ix86_isa_flags
|= OPTION_MASK_ISA_MMX
& ~ix86_isa_flags_explicit
;
3584 x86_prefetch_sse
= true;
3587 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3588 if (TARGET_SSE4_2
|| TARGET_ABM
)
3589 ix86_isa_flags
|= OPTION_MASK_ISA_POPCNT
& ~ix86_isa_flags_explicit
;
3591 /* Turn on lzcnt instruction for -mabm. */
3593 ix86_isa_flags
|= OPTION_MASK_ISA_LZCNT
& ~ix86_isa_flags_explicit
;
3595 /* Validate -mpreferred-stack-boundary= value or default it to
3596 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3597 ix86_preferred_stack_boundary
= PREFERRED_STACK_BOUNDARY_DEFAULT
;
3598 if (global_options_set
.x_ix86_preferred_stack_boundary_arg
)
3600 int min
= (TARGET_64BIT
? 4 : 2);
3601 int max
= (TARGET_SEH
? 4 : 12);
3603 if (ix86_preferred_stack_boundary_arg
< min
3604 || ix86_preferred_stack_boundary_arg
> max
)
3607 error ("-mpreferred-stack-boundary is not supported "
3610 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3611 ix86_preferred_stack_boundary_arg
, min
, max
);
3614 ix86_preferred_stack_boundary
3615 = (1 << ix86_preferred_stack_boundary_arg
) * BITS_PER_UNIT
;
3618 /* Set the default value for -mstackrealign. */
3619 if (ix86_force_align_arg_pointer
== -1)
3620 ix86_force_align_arg_pointer
= STACK_REALIGN_DEFAULT
;
3622 ix86_default_incoming_stack_boundary
= PREFERRED_STACK_BOUNDARY
;
3624 /* Validate -mincoming-stack-boundary= value or default it to
3625 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3626 ix86_incoming_stack_boundary
= ix86_default_incoming_stack_boundary
;
3627 if (global_options_set
.x_ix86_incoming_stack_boundary_arg
)
3629 if (ix86_incoming_stack_boundary_arg
< (TARGET_64BIT
? 4 : 2)
3630 || ix86_incoming_stack_boundary_arg
> 12)
3631 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3632 ix86_incoming_stack_boundary_arg
, TARGET_64BIT
? 4 : 2);
3635 ix86_user_incoming_stack_boundary
3636 = (1 << ix86_incoming_stack_boundary_arg
) * BITS_PER_UNIT
;
3637 ix86_incoming_stack_boundary
3638 = ix86_user_incoming_stack_boundary
;
3642 /* Accept -msseregparm only if at least SSE support is enabled. */
3643 if (TARGET_SSEREGPARM
3645 error ("%ssseregparm%s used without SSE enabled", prefix
, suffix
);
3647 if (global_options_set
.x_ix86_fpmath
)
3649 if (ix86_fpmath
& FPMATH_SSE
)
3653 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3654 ix86_fpmath
= FPMATH_387
;
3656 else if ((ix86_fpmath
& FPMATH_387
) && !TARGET_80387
)
3658 warning (0, "387 instruction set disabled, using SSE arithmetics");
3659 ix86_fpmath
= FPMATH_SSE
;
3664 ix86_fpmath
= TARGET_FPMATH_DEFAULT
;
3666 /* If the i387 is disabled, then do not return values in it. */
3668 target_flags
&= ~MASK_FLOAT_RETURNS
;
3670 /* Use external vectorized library in vectorizing intrinsics. */
3671 if (global_options_set
.x_ix86_veclibabi_type
)
3672 switch (ix86_veclibabi_type
)
3674 case ix86_veclibabi_type_svml
:
3675 ix86_veclib_handler
= ix86_veclibabi_svml
;
3678 case ix86_veclibabi_type_acml
:
3679 ix86_veclib_handler
= ix86_veclibabi_acml
;
3686 if ((!USE_IX86_FRAME_POINTER
3687 || (x86_accumulate_outgoing_args
& ix86_tune_mask
))
3688 && !(target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
3690 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
3692 /* ??? Unwind info is not correct around the CFG unless either a frame
3693 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3694 unwind info generation to be aware of the CFG and propagating states
3696 if ((flag_unwind_tables
|| flag_asynchronous_unwind_tables
3697 || flag_exceptions
|| flag_non_call_exceptions
)
3698 && flag_omit_frame_pointer
3699 && !(target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
3701 if (target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
3702 warning (0, "unwind tables currently require either a frame pointer "
3703 "or %saccumulate-outgoing-args%s for correctness",
3705 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
3708 /* If stack probes are required, the space used for large function
3709 arguments on the stack must also be probed, so enable
3710 -maccumulate-outgoing-args so this happens in the prologue. */
3711 if (TARGET_STACK_PROBE
3712 && !(target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
3714 if (target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
3715 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3716 "for correctness", prefix
, suffix
);
3717 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
3720 /* For sane SSE instruction set generation we need fcomi instruction.
3721 It is safe to enable all CMOVE instructions. Also, RDRAND intrinsic
3722 expands to a sequence that includes conditional move. */
3723 if (TARGET_SSE
|| TARGET_RDRND
)
3726 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3729 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix
, "LX", 0);
3730 p
= strchr (internal_label_prefix
, 'X');
3731 internal_label_prefix_len
= p
- internal_label_prefix
;
3735 /* When scheduling description is not available, disable scheduler pass
3736 so it won't slow down the compilation and make x87 code slower. */
3737 if (!TARGET_SCHEDULE
)
3738 flag_schedule_insns_after_reload
= flag_schedule_insns
= 0;
3740 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES
,
3741 ix86_cost
->simultaneous_prefetches
,
3742 global_options
.x_param_values
,
3743 global_options_set
.x_param_values
);
3744 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
, ix86_cost
->prefetch_block
,
3745 global_options
.x_param_values
,
3746 global_options_set
.x_param_values
);
3747 maybe_set_param_value (PARAM_L1_CACHE_SIZE
, ix86_cost
->l1_cache_size
,
3748 global_options
.x_param_values
,
3749 global_options_set
.x_param_values
);
3750 maybe_set_param_value (PARAM_L2_CACHE_SIZE
, ix86_cost
->l2_cache_size
,
3751 global_options
.x_param_values
,
3752 global_options_set
.x_param_values
);
3754 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
3755 if (flag_prefetch_loop_arrays
< 0
3758 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL
)
3759 flag_prefetch_loop_arrays
= 1;
3761 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3762 can be optimized to ap = __builtin_next_arg (0). */
3763 if (!TARGET_64BIT
&& !flag_split_stack
)
3764 targetm
.expand_builtin_va_start
= NULL
;
3768 ix86_gen_leave
= gen_leave_rex64
;
3769 if (Pmode
== DImode
)
3771 ix86_gen_monitor
= gen_sse3_monitor64_di
;
3772 ix86_gen_tls_global_dynamic_64
= gen_tls_global_dynamic_64_di
;
3773 ix86_gen_tls_local_dynamic_base_64
3774 = gen_tls_local_dynamic_base_64_di
;
3778 ix86_gen_monitor
= gen_sse3_monitor64_si
;
3779 ix86_gen_tls_global_dynamic_64
= gen_tls_global_dynamic_64_si
;
3780 ix86_gen_tls_local_dynamic_base_64
3781 = gen_tls_local_dynamic_base_64_si
;
3786 ix86_gen_leave
= gen_leave
;
3787 ix86_gen_monitor
= gen_sse3_monitor
;
3790 if (Pmode
== DImode
)
3792 ix86_gen_add3
= gen_adddi3
;
3793 ix86_gen_sub3
= gen_subdi3
;
3794 ix86_gen_sub3_carry
= gen_subdi3_carry
;
3795 ix86_gen_one_cmpl2
= gen_one_cmpldi2
;
3796 ix86_gen_andsp
= gen_anddi3
;
3797 ix86_gen_allocate_stack_worker
= gen_allocate_stack_worker_probe_di
;
3798 ix86_gen_adjust_stack_and_probe
= gen_adjust_stack_and_probedi
;
3799 ix86_gen_probe_stack_range
= gen_probe_stack_rangedi
;
3803 ix86_gen_add3
= gen_addsi3
;
3804 ix86_gen_sub3
= gen_subsi3
;
3805 ix86_gen_sub3_carry
= gen_subsi3_carry
;
3806 ix86_gen_one_cmpl2
= gen_one_cmplsi2
;
3807 ix86_gen_andsp
= gen_andsi3
;
3808 ix86_gen_allocate_stack_worker
= gen_allocate_stack_worker_probe_si
;
3809 ix86_gen_adjust_stack_and_probe
= gen_adjust_stack_and_probesi
;
3810 ix86_gen_probe_stack_range
= gen_probe_stack_rangesi
;
3814 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3816 target_flags
|= MASK_CLD
& ~target_flags_explicit
;
3819 if (!TARGET_64BIT
&& flag_pic
)
3821 if (flag_fentry
> 0)
3822 sorry ("-mfentry isn%'t supported for 32-bit in combination "
3826 else if (TARGET_SEH
)
3828 if (flag_fentry
== 0)
3829 sorry ("-mno-fentry isn%'t compatible with SEH");
3832 else if (flag_fentry
< 0)
3834 #if defined(PROFILE_BEFORE_PROLOGUE)
3843 /* When not optimize for size, enable vzeroupper optimization for
3844 TARGET_AVX with -fexpensive-optimizations and split 32-byte
3845 AVX unaligned load/store. */
3848 if (flag_expensive_optimizations
3849 && !(target_flags_explicit
& MASK_VZEROUPPER
))
3850 target_flags
|= MASK_VZEROUPPER
;
3851 if ((x86_avx256_split_unaligned_load
& ix86_tune_mask
)
3852 && !(target_flags_explicit
& MASK_AVX256_SPLIT_UNALIGNED_LOAD
))
3853 target_flags
|= MASK_AVX256_SPLIT_UNALIGNED_LOAD
;
3854 if ((x86_avx256_split_unaligned_store
& ix86_tune_mask
)
3855 && !(target_flags_explicit
& MASK_AVX256_SPLIT_UNALIGNED_STORE
))
3856 target_flags
|= MASK_AVX256_SPLIT_UNALIGNED_STORE
;
3857 /* Enable 128-bit AVX instruction generation for the auto-vectorizer. */
3858 if (TARGET_AVX128_OPTIMAL
&& !(target_flags_explicit
& MASK_PREFER_AVX128
))
3859 target_flags
|= MASK_PREFER_AVX128
;
3864 /* Disable vzeroupper pass if TARGET_AVX is disabled. */
3865 target_flags
&= ~MASK_VZEROUPPER
;
3868 if (ix86_recip_name
)
3870 char *p
= ASTRDUP (ix86_recip_name
);
3872 unsigned int mask
, i
;
3875 while ((q
= strtok (p
, ",")) != NULL
)
3886 if (!strcmp (q
, "default"))
3887 mask
= RECIP_MASK_ALL
;
3890 for (i
= 0; i
< ARRAY_SIZE (recip_options
); i
++)
3891 if (!strcmp (q
, recip_options
[i
].string
))
3893 mask
= recip_options
[i
].mask
;
3897 if (i
== ARRAY_SIZE (recip_options
))
3899 error ("unknown option for -mrecip=%s", q
);
3901 mask
= RECIP_MASK_NONE
;
3905 recip_mask_explicit
|= mask
;
3907 recip_mask
&= ~mask
;
3914 recip_mask
|= RECIP_MASK_ALL
& ~recip_mask_explicit
;
3915 else if (target_flags_explicit
& MASK_RECIP
)
3916 recip_mask
&= ~(RECIP_MASK_ALL
& ~recip_mask_explicit
);
3918 /* Save the initial options in case the user does function specific
3921 target_option_default_node
= target_option_current_node
3922 = build_target_option_node ();
3925 /* Return TRUE if VAL is passed in register with 256bit AVX modes. */
3928 function_pass_avx256_p (const_rtx val
)
3933 if (REG_P (val
) && VALID_AVX256_REG_MODE (GET_MODE (val
)))
3936 if (GET_CODE (val
) == PARALLEL
)
3941 for (i
= XVECLEN (val
, 0) - 1; i
>= 0; i
--)
3943 r
= XVECEXP (val
, 0, i
);
3944 if (GET_CODE (r
) == EXPR_LIST
3946 && REG_P (XEXP (r
, 0))
3947 && (GET_MODE (XEXP (r
, 0)) == OImode
3948 || VALID_AVX256_REG_MODE (GET_MODE (XEXP (r
, 0)))))
3956 /* Implement the TARGET_OPTION_OVERRIDE hook. */
3959 ix86_option_override (void)
3961 ix86_option_override_internal (true);
3964 /* Update register usage after having seen the compiler flags. */
3967 ix86_conditional_register_usage (void)
3972 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
3974 if (fixed_regs
[i
] > 1)
3975 fixed_regs
[i
] = (fixed_regs
[i
] == (TARGET_64BIT
? 3 : 2));
3976 if (call_used_regs
[i
] > 1)
3977 call_used_regs
[i
] = (call_used_regs
[i
] == (TARGET_64BIT
? 3 : 2));
3980 /* The PIC register, if it exists, is fixed. */
3981 j
= PIC_OFFSET_TABLE_REGNUM
;
3982 if (j
!= INVALID_REGNUM
)
3983 fixed_regs
[j
] = call_used_regs
[j
] = 1;
3985 /* The 64-bit MS_ABI changes the set of call-used registers. */
3986 if (TARGET_64BIT_MS_ABI
)
3988 call_used_regs
[SI_REG
] = 0;
3989 call_used_regs
[DI_REG
] = 0;
3990 call_used_regs
[XMM6_REG
] = 0;
3991 call_used_regs
[XMM7_REG
] = 0;
3992 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
3993 call_used_regs
[i
] = 0;
3996 /* The default setting of CLOBBERED_REGS is for 32-bit; add in the
3997 other call-clobbered regs for 64-bit. */
4000 CLEAR_HARD_REG_SET (reg_class_contents
[(int)CLOBBERED_REGS
]);
4002 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4003 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)GENERAL_REGS
], i
)
4004 && call_used_regs
[i
])
4005 SET_HARD_REG_BIT (reg_class_contents
[(int)CLOBBERED_REGS
], i
);
4008 /* If MMX is disabled, squash the registers. */
4010 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4011 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)MMX_REGS
], i
))
4012 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4014 /* If SSE is disabled, squash the registers. */
4016 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4017 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)SSE_REGS
], i
))
4018 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4020 /* If the FPU is disabled, squash the registers. */
4021 if (! (TARGET_80387
|| TARGET_FLOAT_RETURNS_IN_80387
))
4022 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4023 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)FLOAT_REGS
], i
))
4024 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4026 /* If 32-bit, squash the 64-bit registers. */
4029 for (i
= FIRST_REX_INT_REG
; i
<= LAST_REX_INT_REG
; i
++)
4031 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
4037 /* Save the current options */
4040 ix86_function_specific_save (struct cl_target_option
*ptr
)
4042 ptr
->arch
= ix86_arch
;
4043 ptr
->schedule
= ix86_schedule
;
4044 ptr
->tune
= ix86_tune
;
4045 ptr
->branch_cost
= ix86_branch_cost
;
4046 ptr
->tune_defaulted
= ix86_tune_defaulted
;
4047 ptr
->arch_specified
= ix86_arch_specified
;
4048 ptr
->x_ix86_isa_flags_explicit
= ix86_isa_flags_explicit
;
4049 ptr
->ix86_target_flags_explicit
= target_flags_explicit
;
4050 ptr
->x_recip_mask_explicit
= recip_mask_explicit
;
4052 /* The fields are char but the variables are not; make sure the
4053 values fit in the fields. */
4054 gcc_assert (ptr
->arch
== ix86_arch
);
4055 gcc_assert (ptr
->schedule
== ix86_schedule
);
4056 gcc_assert (ptr
->tune
== ix86_tune
);
4057 gcc_assert (ptr
->branch_cost
== ix86_branch_cost
);
4060 /* Restore the current options */
4063 ix86_function_specific_restore (struct cl_target_option
*ptr
)
4065 enum processor_type old_tune
= ix86_tune
;
4066 enum processor_type old_arch
= ix86_arch
;
4067 unsigned int ix86_arch_mask
, ix86_tune_mask
;
4070 ix86_arch
= (enum processor_type
) ptr
->arch
;
4071 ix86_schedule
= (enum attr_cpu
) ptr
->schedule
;
4072 ix86_tune
= (enum processor_type
) ptr
->tune
;
4073 ix86_branch_cost
= ptr
->branch_cost
;
4074 ix86_tune_defaulted
= ptr
->tune_defaulted
;
4075 ix86_arch_specified
= ptr
->arch_specified
;
4076 ix86_isa_flags_explicit
= ptr
->x_ix86_isa_flags_explicit
;
4077 target_flags_explicit
= ptr
->ix86_target_flags_explicit
;
4078 recip_mask_explicit
= ptr
->x_recip_mask_explicit
;
4080 /* Recreate the arch feature tests if the arch changed */
4081 if (old_arch
!= ix86_arch
)
4083 ix86_arch_mask
= 1u << ix86_arch
;
4084 for (i
= 0; i
< X86_ARCH_LAST
; ++i
)
4085 ix86_arch_features
[i
]
4086 = !!(initial_ix86_arch_features
[i
] & ix86_arch_mask
);
4089 /* Recreate the tune optimization tests */
4090 if (old_tune
!= ix86_tune
)
4092 ix86_tune_mask
= 1u << ix86_tune
;
4093 for (i
= 0; i
< X86_TUNE_LAST
; ++i
)
4094 ix86_tune_features
[i
]
4095 = !!(initial_ix86_tune_features
[i
] & ix86_tune_mask
);
4099 /* Print the current options */
4102 ix86_function_specific_print (FILE *file
, int indent
,
4103 struct cl_target_option
*ptr
)
4106 = ix86_target_string (ptr
->x_ix86_isa_flags
, ptr
->x_target_flags
,
4107 NULL
, NULL
, ptr
->x_ix86_fpmath
, false);
4109 fprintf (file
, "%*sarch = %d (%s)\n",
4112 ((ptr
->arch
< TARGET_CPU_DEFAULT_max
)
4113 ? cpu_names
[ptr
->arch
]
4116 fprintf (file
, "%*stune = %d (%s)\n",
4119 ((ptr
->tune
< TARGET_CPU_DEFAULT_max
)
4120 ? cpu_names
[ptr
->tune
]
4123 fprintf (file
, "%*sbranch_cost = %d\n", indent
, "", ptr
->branch_cost
);
4127 fprintf (file
, "%*s%s\n", indent
, "", target_string
);
4128 free (target_string
);
4133 /* Inner function to process the attribute((target(...))), take an argument and
4134 set the current options from the argument. If we have a list, recursively go
4138 ix86_valid_target_attribute_inner_p (tree args
, char *p_strings
[],
4139 struct gcc_options
*enum_opts_set
)
4144 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4145 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4146 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4147 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4148 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4164 enum ix86_opt_type type
;
4169 IX86_ATTR_ISA ("3dnow", OPT_m3dnow
),
4170 IX86_ATTR_ISA ("abm", OPT_mabm
),
4171 IX86_ATTR_ISA ("bmi", OPT_mbmi
),
4172 IX86_ATTR_ISA ("bmi2", OPT_mbmi2
),
4173 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt
),
4174 IX86_ATTR_ISA ("tbm", OPT_mtbm
),
4175 IX86_ATTR_ISA ("aes", OPT_maes
),
4176 IX86_ATTR_ISA ("avx", OPT_mavx
),
4177 IX86_ATTR_ISA ("avx2", OPT_mavx2
),
4178 IX86_ATTR_ISA ("mmx", OPT_mmmx
),
4179 IX86_ATTR_ISA ("pclmul", OPT_mpclmul
),
4180 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt
),
4181 IX86_ATTR_ISA ("sse", OPT_msse
),
4182 IX86_ATTR_ISA ("sse2", OPT_msse2
),
4183 IX86_ATTR_ISA ("sse3", OPT_msse3
),
4184 IX86_ATTR_ISA ("sse4", OPT_msse4
),
4185 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1
),
4186 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2
),
4187 IX86_ATTR_ISA ("sse4a", OPT_msse4a
),
4188 IX86_ATTR_ISA ("ssse3", OPT_mssse3
),
4189 IX86_ATTR_ISA ("fma4", OPT_mfma4
),
4190 IX86_ATTR_ISA ("fma", OPT_mfma
),
4191 IX86_ATTR_ISA ("xop", OPT_mxop
),
4192 IX86_ATTR_ISA ("lwp", OPT_mlwp
),
4193 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase
),
4194 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd
),
4195 IX86_ATTR_ISA ("f16c", OPT_mf16c
),
4196 IX86_ATTR_ISA ("rtm", OPT_mrtm
),
4199 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_
),
4201 /* string options */
4202 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH
),
4203 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE
),
4206 IX86_ATTR_YES ("cld",
4210 IX86_ATTR_NO ("fancy-math-387",
4211 OPT_mfancy_math_387
,
4212 MASK_NO_FANCY_MATH_387
),
4214 IX86_ATTR_YES ("ieee-fp",
4218 IX86_ATTR_YES ("inline-all-stringops",
4219 OPT_minline_all_stringops
,
4220 MASK_INLINE_ALL_STRINGOPS
),
4222 IX86_ATTR_YES ("inline-stringops-dynamically",
4223 OPT_minline_stringops_dynamically
,
4224 MASK_INLINE_STRINGOPS_DYNAMICALLY
),
4226 IX86_ATTR_NO ("align-stringops",
4227 OPT_mno_align_stringops
,
4228 MASK_NO_ALIGN_STRINGOPS
),
4230 IX86_ATTR_YES ("recip",
4236 /* If this is a list, recurse to get the options. */
4237 if (TREE_CODE (args
) == TREE_LIST
)
4241 for (; args
; args
= TREE_CHAIN (args
))
4242 if (TREE_VALUE (args
)
4243 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args
),
4244 p_strings
, enum_opts_set
))
4250 else if (TREE_CODE (args
) != STRING_CST
)
4253 /* Handle multiple arguments separated by commas. */
4254 next_optstr
= ASTRDUP (TREE_STRING_POINTER (args
));
4256 while (next_optstr
&& *next_optstr
!= '\0')
4258 char *p
= next_optstr
;
4260 char *comma
= strchr (next_optstr
, ',');
4261 const char *opt_string
;
4262 size_t len
, opt_len
;
4267 enum ix86_opt_type type
= ix86_opt_unknown
;
4273 len
= comma
- next_optstr
;
4274 next_optstr
= comma
+ 1;
4282 /* Recognize no-xxx. */
4283 if (len
> 3 && p
[0] == 'n' && p
[1] == 'o' && p
[2] == '-')
4292 /* Find the option. */
4295 for (i
= 0; i
< ARRAY_SIZE (attrs
); i
++)
4297 type
= attrs
[i
].type
;
4298 opt_len
= attrs
[i
].len
;
4299 if (ch
== attrs
[i
].string
[0]
4300 && ((type
!= ix86_opt_str
&& type
!= ix86_opt_enum
)
4303 && memcmp (p
, attrs
[i
].string
, opt_len
) == 0)
4306 mask
= attrs
[i
].mask
;
4307 opt_string
= attrs
[i
].string
;
4312 /* Process the option. */
4315 error ("attribute(target(\"%s\")) is unknown", orig_p
);
4319 else if (type
== ix86_opt_isa
)
4321 struct cl_decoded_option decoded
;
4323 generate_option (opt
, NULL
, opt_set_p
, CL_TARGET
, &decoded
);
4324 ix86_handle_option (&global_options
, &global_options_set
,
4325 &decoded
, input_location
);
4328 else if (type
== ix86_opt_yes
|| type
== ix86_opt_no
)
4330 if (type
== ix86_opt_no
)
4331 opt_set_p
= !opt_set_p
;
4334 target_flags
|= mask
;
4336 target_flags
&= ~mask
;
4339 else if (type
== ix86_opt_str
)
4343 error ("option(\"%s\") was already specified", opt_string
);
4347 p_strings
[opt
] = xstrdup (p
+ opt_len
);
4350 else if (type
== ix86_opt_enum
)
4355 arg_ok
= opt_enum_arg_to_value (opt
, p
+ opt_len
, &value
, CL_TARGET
);
4357 set_option (&global_options
, enum_opts_set
, opt
, value
,
4358 p
+ opt_len
, DK_UNSPECIFIED
, input_location
,
4362 error ("attribute(target(\"%s\")) is unknown", orig_p
);
4374 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4377 ix86_valid_target_attribute_tree (tree args
)
4379 const char *orig_arch_string
= ix86_arch_string
;
4380 const char *orig_tune_string
= ix86_tune_string
;
4381 enum fpmath_unit orig_fpmath_set
= global_options_set
.x_ix86_fpmath
;
4382 int orig_tune_defaulted
= ix86_tune_defaulted
;
4383 int orig_arch_specified
= ix86_arch_specified
;
4384 char *option_strings
[IX86_FUNCTION_SPECIFIC_MAX
] = { NULL
, NULL
};
4387 struct cl_target_option
*def
4388 = TREE_TARGET_OPTION (target_option_default_node
);
4389 struct gcc_options enum_opts_set
;
4391 memset (&enum_opts_set
, 0, sizeof (enum_opts_set
));
4393 /* Process each of the options on the chain. */
4394 if (! ix86_valid_target_attribute_inner_p (args
, option_strings
,
4398 /* If the changed options are different from the default, rerun
4399 ix86_option_override_internal, and then save the options away.
4400 The string options are are attribute options, and will be undone
4401 when we copy the save structure. */
4402 if (ix86_isa_flags
!= def
->x_ix86_isa_flags
4403 || target_flags
!= def
->x_target_flags
4404 || option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
]
4405 || option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
]
4406 || enum_opts_set
.x_ix86_fpmath
)
4408 /* If we are using the default tune= or arch=, undo the string assigned,
4409 and use the default. */
4410 if (option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
])
4411 ix86_arch_string
= option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
];
4412 else if (!orig_arch_specified
)
4413 ix86_arch_string
= NULL
;
4415 if (option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
])
4416 ix86_tune_string
= option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
];
4417 else if (orig_tune_defaulted
)
4418 ix86_tune_string
= NULL
;
4420 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4421 if (enum_opts_set
.x_ix86_fpmath
)
4422 global_options_set
.x_ix86_fpmath
= (enum fpmath_unit
) 1;
4423 else if (!TARGET_64BIT
&& TARGET_SSE
)
4425 ix86_fpmath
= (enum fpmath_unit
) (FPMATH_SSE
| FPMATH_387
);
4426 global_options_set
.x_ix86_fpmath
= (enum fpmath_unit
) 1;
4429 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4430 ix86_option_override_internal (false);
4432 /* Add any builtin functions with the new isa if any. */
4433 ix86_add_new_builtins (ix86_isa_flags
);
4435 /* Save the current options unless we are validating options for
4437 t
= build_target_option_node ();
4439 ix86_arch_string
= orig_arch_string
;
4440 ix86_tune_string
= orig_tune_string
;
4441 global_options_set
.x_ix86_fpmath
= orig_fpmath_set
;
4443 /* Free up memory allocated to hold the strings */
4444 for (i
= 0; i
< IX86_FUNCTION_SPECIFIC_MAX
; i
++)
4445 free (option_strings
[i
]);
4451 /* Hook to validate attribute((target("string"))). */
4454 ix86_valid_target_attribute_p (tree fndecl
,
4455 tree
ARG_UNUSED (name
),
4457 int ARG_UNUSED (flags
))
4459 struct cl_target_option cur_target
;
4461 tree old_optimize
= build_optimization_node ();
4462 tree new_target
, new_optimize
;
4463 tree func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
4465 /* If the function changed the optimization levels as well as setting target
4466 options, start with the optimizations specified. */
4467 if (func_optimize
&& func_optimize
!= old_optimize
)
4468 cl_optimization_restore (&global_options
,
4469 TREE_OPTIMIZATION (func_optimize
));
4471 /* The target attributes may also change some optimization flags, so update
4472 the optimization options if necessary. */
4473 cl_target_option_save (&cur_target
, &global_options
);
4474 new_target
= ix86_valid_target_attribute_tree (args
);
4475 new_optimize
= build_optimization_node ();
4482 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = new_target
;
4484 if (old_optimize
!= new_optimize
)
4485 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
4488 cl_target_option_restore (&global_options
, &cur_target
);
4490 if (old_optimize
!= new_optimize
)
4491 cl_optimization_restore (&global_options
,
4492 TREE_OPTIMIZATION (old_optimize
));
4498 /* Hook to determine if one function can safely inline another. */
4501 ix86_can_inline_p (tree caller
, tree callee
)
4504 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
4505 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
4507 /* If callee has no option attributes, then it is ok to inline. */
4511 /* If caller has no option attributes, but callee does then it is not ok to
4513 else if (!caller_tree
)
4518 struct cl_target_option
*caller_opts
= TREE_TARGET_OPTION (caller_tree
);
4519 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
4521 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4522 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4524 if ((caller_opts
->x_ix86_isa_flags
& callee_opts
->x_ix86_isa_flags
)
4525 != callee_opts
->x_ix86_isa_flags
)
4528 /* See if we have the same non-isa options. */
4529 else if (caller_opts
->x_target_flags
!= callee_opts
->x_target_flags
)
4532 /* See if arch, tune, etc. are the same. */
4533 else if (caller_opts
->arch
!= callee_opts
->arch
)
4536 else if (caller_opts
->tune
!= callee_opts
->tune
)
4539 else if (caller_opts
->x_ix86_fpmath
!= callee_opts
->x_ix86_fpmath
)
4542 else if (caller_opts
->branch_cost
!= callee_opts
->branch_cost
)
4553 /* Remember the last target of ix86_set_current_function. */
4554 static GTY(()) tree ix86_previous_fndecl
;
4556 /* Establish appropriate back-end context for processing the function
4557 FNDECL. The argument might be NULL to indicate processing at top
4558 level, outside of any function scope. */
4560 ix86_set_current_function (tree fndecl
)
4562 /* Only change the context if the function changes. This hook is called
4563 several times in the course of compiling a function, and we don't want to
4564 slow things down too much or call target_reinit when it isn't safe. */
4565 if (fndecl
&& fndecl
!= ix86_previous_fndecl
)
4567 tree old_tree
= (ix86_previous_fndecl
4568 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl
)
4571 tree new_tree
= (fndecl
4572 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl
)
4575 ix86_previous_fndecl
= fndecl
;
4576 if (old_tree
== new_tree
)
4581 cl_target_option_restore (&global_options
,
4582 TREE_TARGET_OPTION (new_tree
));
4588 struct cl_target_option
*def
4589 = TREE_TARGET_OPTION (target_option_current_node
);
4591 cl_target_option_restore (&global_options
, def
);
4598 /* Return true if this goes in large data/bss. */
4601 ix86_in_large_data_p (tree exp
)
4603 if (ix86_cmodel
!= CM_MEDIUM
&& ix86_cmodel
!= CM_MEDIUM_PIC
)
4606 /* Functions are never large data. */
4607 if (TREE_CODE (exp
) == FUNCTION_DECL
)
4610 if (TREE_CODE (exp
) == VAR_DECL
&& DECL_SECTION_NAME (exp
))
4612 const char *section
= TREE_STRING_POINTER (DECL_SECTION_NAME (exp
));
4613 if (strcmp (section
, ".ldata") == 0
4614 || strcmp (section
, ".lbss") == 0)
4620 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (exp
));
4622 /* If this is an incomplete type with size 0, then we can't put it
4623 in data because it might be too big when completed. */
4624 if (!size
|| size
> ix86_section_threshold
)
4631 /* Switch to the appropriate section for output of DECL.
4632 DECL is either a `VAR_DECL' node or a constant of some sort.
4633 RELOC indicates whether forming the initial value of DECL requires
4634 link-time relocations. */
4636 static section
* x86_64_elf_select_section (tree
, int, unsigned HOST_WIDE_INT
)
4640 x86_64_elf_select_section (tree decl
, int reloc
,
4641 unsigned HOST_WIDE_INT align
)
4643 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4644 && ix86_in_large_data_p (decl
))
4646 const char *sname
= NULL
;
4647 unsigned int flags
= SECTION_WRITE
;
4648 switch (categorize_decl_for_section (decl
, reloc
))
4653 case SECCAT_DATA_REL
:
4654 sname
= ".ldata.rel";
4656 case SECCAT_DATA_REL_LOCAL
:
4657 sname
= ".ldata.rel.local";
4659 case SECCAT_DATA_REL_RO
:
4660 sname
= ".ldata.rel.ro";
4662 case SECCAT_DATA_REL_RO_LOCAL
:
4663 sname
= ".ldata.rel.ro.local";
4667 flags
|= SECTION_BSS
;
4670 case SECCAT_RODATA_MERGE_STR
:
4671 case SECCAT_RODATA_MERGE_STR_INIT
:
4672 case SECCAT_RODATA_MERGE_CONST
:
4676 case SECCAT_SRODATA
:
4683 /* We don't split these for medium model. Place them into
4684 default sections and hope for best. */
4689 /* We might get called with string constants, but get_named_section
4690 doesn't like them as they are not DECLs. Also, we need to set
4691 flags in that case. */
4693 return get_section (sname
, flags
, NULL
);
4694 return get_named_section (decl
, sname
, reloc
);
4697 return default_elf_select_section (decl
, reloc
, align
);
4700 /* Build up a unique section name, expressed as a
4701 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4702 RELOC indicates whether the initial value of EXP requires
4703 link-time relocations. */
4705 static void ATTRIBUTE_UNUSED
4706 x86_64_elf_unique_section (tree decl
, int reloc
)
4708 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4709 && ix86_in_large_data_p (decl
))
4711 const char *prefix
= NULL
;
4712 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4713 bool one_only
= DECL_ONE_ONLY (decl
) && !HAVE_COMDAT_GROUP
;
4715 switch (categorize_decl_for_section (decl
, reloc
))
4718 case SECCAT_DATA_REL
:
4719 case SECCAT_DATA_REL_LOCAL
:
4720 case SECCAT_DATA_REL_RO
:
4721 case SECCAT_DATA_REL_RO_LOCAL
:
4722 prefix
= one_only
? ".ld" : ".ldata";
4725 prefix
= one_only
? ".lb" : ".lbss";
4728 case SECCAT_RODATA_MERGE_STR
:
4729 case SECCAT_RODATA_MERGE_STR_INIT
:
4730 case SECCAT_RODATA_MERGE_CONST
:
4731 prefix
= one_only
? ".lr" : ".lrodata";
4733 case SECCAT_SRODATA
:
4740 /* We don't split these for medium model. Place them into
4741 default sections and hope for best. */
4746 const char *name
, *linkonce
;
4749 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
4750 name
= targetm
.strip_name_encoding (name
);
4752 /* If we're using one_only, then there needs to be a .gnu.linkonce
4753 prefix to the section name. */
4754 linkonce
= one_only
? ".gnu.linkonce" : "";
4756 string
= ACONCAT ((linkonce
, prefix
, ".", name
, NULL
));
4758 DECL_SECTION_NAME (decl
) = build_string (strlen (string
), string
);
4762 default_unique_section (decl
, reloc
);
4765 #ifdef COMMON_ASM_OP
4766 /* This says how to output assembler code to declare an
4767 uninitialized external linkage data object.
4769 For medium model x86-64 we need to use .largecomm opcode for
4772 x86_elf_aligned_common (FILE *file
,
4773 const char *name
, unsigned HOST_WIDE_INT size
,
4776 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4777 && size
> (unsigned int)ix86_section_threshold
)
4778 fputs (".largecomm\t", file
);
4780 fputs (COMMON_ASM_OP
, file
);
4781 assemble_name (file
, name
);
4782 fprintf (file
, "," HOST_WIDE_INT_PRINT_UNSIGNED
",%u\n",
4783 size
, align
/ BITS_PER_UNIT
);
4787 /* Utility function for targets to use in implementing
4788 ASM_OUTPUT_ALIGNED_BSS. */
4791 x86_output_aligned_bss (FILE *file
, tree decl ATTRIBUTE_UNUSED
,
4792 const char *name
, unsigned HOST_WIDE_INT size
,
4795 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4796 && size
> (unsigned int)ix86_section_threshold
)
4797 switch_to_section (get_named_section (decl
, ".lbss", 0));
4799 switch_to_section (bss_section
);
4800 ASM_OUTPUT_ALIGN (file
, floor_log2 (align
/ BITS_PER_UNIT
));
4801 #ifdef ASM_DECLARE_OBJECT_NAME
4802 last_assemble_variable_decl
= decl
;
4803 ASM_DECLARE_OBJECT_NAME (file
, name
, decl
);
4805 /* Standard thing is just output label for the object. */
4806 ASM_OUTPUT_LABEL (file
, name
);
4807 #endif /* ASM_DECLARE_OBJECT_NAME */
4808 ASM_OUTPUT_SKIP (file
, size
? size
: 1);
4811 /* Decide whether we must probe the stack before any space allocation
4812 on this target. It's essentially TARGET_STACK_PROBE except when
4813 -fstack-check causes the stack to be already probed differently. */
4816 ix86_target_stack_probe (void)
4818 /* Do not probe the stack twice if static stack checking is enabled. */
4819 if (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
4822 return TARGET_STACK_PROBE
;
4825 /* Decide whether we can make a sibling call to a function. DECL is the
4826 declaration of the function being targeted by the call and EXP is the
4827 CALL_EXPR representing the call. */
4830 ix86_function_ok_for_sibcall (tree decl
, tree exp
)
4832 tree type
, decl_or_type
;
4835 /* If we are generating position-independent code, we cannot sibcall
4836 optimize any indirect call, or a direct call to a global function,
4837 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
4841 && (!decl
|| !targetm
.binds_local_p (decl
)))
4844 /* If we need to align the outgoing stack, then sibcalling would
4845 unalign the stack, which may break the called function. */
4846 if (ix86_minimum_incoming_stack_boundary (true)
4847 < PREFERRED_STACK_BOUNDARY
)
4852 decl_or_type
= decl
;
4853 type
= TREE_TYPE (decl
);
4857 /* We're looking at the CALL_EXPR, we need the type of the function. */
4858 type
= CALL_EXPR_FN (exp
); /* pointer expression */
4859 type
= TREE_TYPE (type
); /* pointer type */
4860 type
= TREE_TYPE (type
); /* function type */
4861 decl_or_type
= type
;
4864 /* Check that the return value locations are the same. Like
4865 if we are returning floats on the 80387 register stack, we cannot
4866 make a sibcall from a function that doesn't return a float to a
4867 function that does or, conversely, from a function that does return
4868 a float to a function that doesn't; the necessary stack adjustment
4869 would not be executed. This is also the place we notice
4870 differences in the return value ABI. Note that it is ok for one
4871 of the functions to have void return type as long as the return
4872 value of the other is passed in a register. */
4873 a
= ix86_function_value (TREE_TYPE (exp
), decl_or_type
, false);
4874 b
= ix86_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
4876 if (STACK_REG_P (a
) || STACK_REG_P (b
))
4878 if (!rtx_equal_p (a
, b
))
4881 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
4883 /* Disable sibcall if we need to generate vzeroupper after
4885 if (TARGET_VZEROUPPER
4886 && cfun
->machine
->callee_return_avx256_p
4887 && !cfun
->machine
->caller_return_avx256_p
)
4890 else if (!rtx_equal_p (a
, b
))
4895 /* The SYSV ABI has more call-clobbered registers;
4896 disallow sibcalls from MS to SYSV. */
4897 if (cfun
->machine
->call_abi
== MS_ABI
4898 && ix86_function_type_abi (type
) == SYSV_ABI
)
4903 /* If this call is indirect, we'll need to be able to use a
4904 call-clobbered register for the address of the target function.
4905 Make sure that all such registers are not used for passing
4906 parameters. Note that DLLIMPORT functions are indirect. */
4908 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES
&& DECL_DLLIMPORT_P (decl
)))
4910 if (ix86_function_regparm (type
, NULL
) >= 3)
4912 /* ??? Need to count the actual number of registers to be used,
4913 not the possible number of registers. Fix later. */
4919 /* Otherwise okay. That also includes certain types of indirect calls. */
4923 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
4924 and "sseregparm" calling convention attributes;
4925 arguments as in struct attribute_spec.handler. */
4928 ix86_handle_cconv_attribute (tree
*node
, tree name
,
4930 int flags ATTRIBUTE_UNUSED
,
4933 if (TREE_CODE (*node
) != FUNCTION_TYPE
4934 && TREE_CODE (*node
) != METHOD_TYPE
4935 && TREE_CODE (*node
) != FIELD_DECL
4936 && TREE_CODE (*node
) != TYPE_DECL
)
4938 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
4940 *no_add_attrs
= true;
4944 /* Can combine regparm with all attributes but fastcall, and thiscall. */
4945 if (is_attribute_p ("regparm", name
))
4949 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
4951 error ("fastcall and regparm attributes are not compatible");
4954 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
4956 error ("regparam and thiscall attributes are not compatible");
4959 cst
= TREE_VALUE (args
);
4960 if (TREE_CODE (cst
) != INTEGER_CST
)
4962 warning (OPT_Wattributes
,
4963 "%qE attribute requires an integer constant argument",
4965 *no_add_attrs
= true;
4967 else if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
4969 warning (OPT_Wattributes
, "argument to %qE attribute larger than %d",
4971 *no_add_attrs
= true;
4979 /* Do not warn when emulating the MS ABI. */
4980 if ((TREE_CODE (*node
) != FUNCTION_TYPE
4981 && TREE_CODE (*node
) != METHOD_TYPE
)
4982 || ix86_function_type_abi (*node
) != MS_ABI
)
4983 warning (OPT_Wattributes
, "%qE attribute ignored",
4985 *no_add_attrs
= true;
4989 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
4990 if (is_attribute_p ("fastcall", name
))
4992 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
4994 error ("fastcall and cdecl attributes are not compatible");
4996 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
4998 error ("fastcall and stdcall attributes are not compatible");
5000 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node
)))
5002 error ("fastcall and regparm attributes are not compatible");
5004 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5006 error ("fastcall and thiscall attributes are not compatible");
5010 /* Can combine stdcall with fastcall (redundant), regparm and
5012 else if (is_attribute_p ("stdcall", name
))
5014 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
5016 error ("stdcall and cdecl attributes are not compatible");
5018 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5020 error ("stdcall and fastcall attributes are not compatible");
5022 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5024 error ("stdcall and thiscall attributes are not compatible");
5028 /* Can combine cdecl with regparm and sseregparm. */
5029 else if (is_attribute_p ("cdecl", name
))
5031 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
5033 error ("stdcall and cdecl attributes are not compatible");
5035 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5037 error ("fastcall and cdecl attributes are not compatible");
5039 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5041 error ("cdecl and thiscall attributes are not compatible");
5044 else if (is_attribute_p ("thiscall", name
))
5046 if (TREE_CODE (*node
) != METHOD_TYPE
&& pedantic
)
5047 warning (OPT_Wattributes
, "%qE attribute is used for none class-method",
5049 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
5051 error ("stdcall and thiscall attributes are not compatible");
5053 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5055 error ("fastcall and thiscall attributes are not compatible");
5057 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
5059 error ("cdecl and thiscall attributes are not compatible");
5063 /* Can combine sseregparm with all attributes. */
5068 /* The transactional memory builtins are implicitly regparm or fastcall
5069 depending on the ABI. Override the generic do-nothing attribute that
5070 these builtins were declared with, and replace it with one of the two
5071 attributes that we expect elsewhere. */
5074 ix86_handle_tm_regparm_attribute (tree
*node
, tree name ATTRIBUTE_UNUSED
,
5075 tree args ATTRIBUTE_UNUSED
,
5076 int flags ATTRIBUTE_UNUSED
,
5081 /* In no case do we want to add the placeholder attribute. */
5082 *no_add_attrs
= true;
5084 /* The 64-bit ABI is unchanged for transactional memory. */
5088 /* ??? Is there a better way to validate 32-bit windows? We have
5089 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5090 if (CHECK_STACK_LIMIT
> 0)
5091 alt
= tree_cons (get_identifier ("fastcall"), NULL
, NULL
);
5094 alt
= tree_cons (NULL
, build_int_cst (NULL
, 2), NULL
);
5095 alt
= tree_cons (get_identifier ("regparm"), alt
, NULL
);
5097 decl_attributes (node
, alt
, flags
);
5102 /* This function determines from TYPE the calling-convention. */
5105 ix86_get_callcvt (const_tree type
)
5107 unsigned int ret
= 0;
5112 return IX86_CALLCVT_CDECL
;
5114 attrs
= TYPE_ATTRIBUTES (type
);
5115 if (attrs
!= NULL_TREE
)
5117 if (lookup_attribute ("cdecl", attrs
))
5118 ret
|= IX86_CALLCVT_CDECL
;
5119 else if (lookup_attribute ("stdcall", attrs
))
5120 ret
|= IX86_CALLCVT_STDCALL
;
5121 else if (lookup_attribute ("fastcall", attrs
))
5122 ret
|= IX86_CALLCVT_FASTCALL
;
5123 else if (lookup_attribute ("thiscall", attrs
))
5124 ret
|= IX86_CALLCVT_THISCALL
;
5126 /* Regparam isn't allowed for thiscall and fastcall. */
5127 if ((ret
& (IX86_CALLCVT_THISCALL
| IX86_CALLCVT_FASTCALL
)) == 0)
5129 if (lookup_attribute ("regparm", attrs
))
5130 ret
|= IX86_CALLCVT_REGPARM
;
5131 if (lookup_attribute ("sseregparm", attrs
))
5132 ret
|= IX86_CALLCVT_SSEREGPARM
;
5135 if (IX86_BASE_CALLCVT(ret
) != 0)
5139 is_stdarg
= stdarg_p (type
);
5140 if (TARGET_RTD
&& !is_stdarg
)
5141 return IX86_CALLCVT_STDCALL
| ret
;
5145 || TREE_CODE (type
) != METHOD_TYPE
5146 || ix86_function_type_abi (type
) != MS_ABI
)
5147 return IX86_CALLCVT_CDECL
| ret
;
5149 return IX86_CALLCVT_THISCALL
;
5152 /* Return 0 if the attributes for two types are incompatible, 1 if they
5153 are compatible, and 2 if they are nearly compatible (which causes a
5154 warning to be generated). */
5157 ix86_comp_type_attributes (const_tree type1
, const_tree type2
)
5159 unsigned int ccvt1
, ccvt2
;
5161 if (TREE_CODE (type1
) != FUNCTION_TYPE
5162 && TREE_CODE (type1
) != METHOD_TYPE
)
5165 ccvt1
= ix86_get_callcvt (type1
);
5166 ccvt2
= ix86_get_callcvt (type2
);
5169 if (ix86_function_regparm (type1
, NULL
)
5170 != ix86_function_regparm (type2
, NULL
))
5176 /* Return the regparm value for a function with the indicated TYPE and DECL.
5177 DECL may be NULL when calling function indirectly
5178 or considering a libcall. */
5181 ix86_function_regparm (const_tree type
, const_tree decl
)
5188 return (ix86_function_type_abi (type
) == SYSV_ABI
5189 ? X86_64_REGPARM_MAX
: X86_64_MS_REGPARM_MAX
);
5190 ccvt
= ix86_get_callcvt (type
);
5191 regparm
= ix86_regparm
;
5193 if ((ccvt
& IX86_CALLCVT_REGPARM
) != 0)
5195 attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (type
));
5198 regparm
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
5202 else if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
5204 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
5207 /* Use register calling convention for local functions when possible. */
5209 && TREE_CODE (decl
) == FUNCTION_DECL
5211 && !(profile_flag
&& !flag_fentry
))
5213 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5214 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE (decl
));
5215 if (i
&& i
->local
&& i
->can_change_signature
)
5217 int local_regparm
, globals
= 0, regno
;
5219 /* Make sure no regparm register is taken by a
5220 fixed register variable. */
5221 for (local_regparm
= 0; local_regparm
< REGPARM_MAX
; local_regparm
++)
5222 if (fixed_regs
[local_regparm
])
5225 /* We don't want to use regparm(3) for nested functions as
5226 these use a static chain pointer in the third argument. */
5227 if (local_regparm
== 3 && DECL_STATIC_CHAIN (decl
))
5230 /* In 32-bit mode save a register for the split stack. */
5231 if (!TARGET_64BIT
&& local_regparm
== 3 && flag_split_stack
)
5234 /* Each fixed register usage increases register pressure,
5235 so less registers should be used for argument passing.
5236 This functionality can be overriden by an explicit
5238 for (regno
= 0; regno
<= DI_REG
; regno
++)
5239 if (fixed_regs
[regno
])
5243 = globals
< local_regparm
? local_regparm
- globals
: 0;
5245 if (local_regparm
> regparm
)
5246 regparm
= local_regparm
;
5253 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5254 DFmode (2) arguments in SSE registers for a function with the
5255 indicated TYPE and DECL. DECL may be NULL when calling function
5256 indirectly or considering a libcall. Otherwise return 0. */
5259 ix86_function_sseregparm (const_tree type
, const_tree decl
, bool warn
)
5261 gcc_assert (!TARGET_64BIT
);
5263 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5264 by the sseregparm attribute. */
5265 if (TARGET_SSEREGPARM
5266 || (type
&& lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type
))))
5273 error ("calling %qD with attribute sseregparm without "
5274 "SSE/SSE2 enabled", decl
);
5276 error ("calling %qT with attribute sseregparm without "
5277 "SSE/SSE2 enabled", type
);
5285 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5286 (and DFmode for SSE2) arguments in SSE registers. */
5287 if (decl
&& TARGET_SSE_MATH
&& optimize
5288 && !(profile_flag
&& !flag_fentry
))
5290 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5291 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE(decl
));
5292 if (i
&& i
->local
&& i
->can_change_signature
)
5293 return TARGET_SSE2
? 2 : 1;
5299 /* Return true if EAX is live at the start of the function. Used by
5300 ix86_expand_prologue to determine if we need special help before
5301 calling allocate_stack_worker. */
5304 ix86_eax_live_at_start_p (void)
5306 /* Cheat. Don't bother working forward from ix86_function_regparm
5307 to the function type to whether an actual argument is located in
5308 eax. Instead just look at cfg info, which is still close enough
5309 to correct at this point. This gives false positives for broken
5310 functions that might use uninitialized data that happens to be
5311 allocated in eax, but who cares? */
5312 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR
), 0);
5316 ix86_keep_aggregate_return_pointer (tree fntype
)
5322 attr
= lookup_attribute ("callee_pop_aggregate_return",
5323 TYPE_ATTRIBUTES (fntype
));
5325 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
))) == 0);
5327 /* For 32-bit MS-ABI the default is to keep aggregate
5329 if (ix86_function_type_abi (fntype
) == MS_ABI
)
5332 return KEEP_AGGREGATE_RETURN_POINTER
!= 0;
5335 /* Value is the number of bytes of arguments automatically
5336 popped when returning from a subroutine call.
5337 FUNDECL is the declaration node of the function (as a tree),
5338 FUNTYPE is the data type of the function (as a tree),
5339 or for a library call it is an identifier node for the subroutine name.
5340 SIZE is the number of bytes of arguments passed on the stack.
5342 On the 80386, the RTD insn may be used to pop them if the number
5343 of args is fixed, but if the number is variable then the caller
5344 must pop them all. RTD can't be used for library calls now
5345 because the library is compiled with the Unix compiler.
5346 Use of RTD is a selectable option, since it is incompatible with
5347 standard Unix calling sequences. If the option is not selected,
5348 the caller must always pop the args.
5350 The attribute stdcall is equivalent to RTD on a per module basis. */
5353 ix86_return_pops_args (tree fundecl
, tree funtype
, int size
)
5357 /* None of the 64-bit ABIs pop arguments. */
5361 ccvt
= ix86_get_callcvt (funtype
);
5363 if ((ccvt
& (IX86_CALLCVT_STDCALL
| IX86_CALLCVT_FASTCALL
5364 | IX86_CALLCVT_THISCALL
)) != 0
5365 && ! stdarg_p (funtype
))
5368 /* Lose any fake structure return argument if it is passed on the stack. */
5369 if (aggregate_value_p (TREE_TYPE (funtype
), fundecl
)
5370 && !ix86_keep_aggregate_return_pointer (funtype
))
5372 int nregs
= ix86_function_regparm (funtype
, fundecl
);
5374 return GET_MODE_SIZE (Pmode
);
5380 /* Argument support functions. */
5382 /* Return true when register may be used to pass function parameters. */
5384 ix86_function_arg_regno_p (int regno
)
5387 const int *parm_regs
;
5392 return (regno
< REGPARM_MAX
5393 || (TARGET_SSE
&& SSE_REGNO_P (regno
) && !fixed_regs
[regno
]));
5395 return (regno
< REGPARM_MAX
5396 || (TARGET_MMX
&& MMX_REGNO_P (regno
)
5397 && (regno
< FIRST_MMX_REG
+ MMX_REGPARM_MAX
))
5398 || (TARGET_SSE
&& SSE_REGNO_P (regno
)
5399 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
)));
5404 if (SSE_REGNO_P (regno
) && TARGET_SSE
)
5409 if (TARGET_SSE
&& SSE_REGNO_P (regno
)
5410 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
))
5414 /* TODO: The function should depend on current function ABI but
5415 builtins.c would need updating then. Therefore we use the
5418 /* RAX is used as hidden argument to va_arg functions. */
5419 if (ix86_abi
== SYSV_ABI
&& regno
== AX_REG
)
5422 if (ix86_abi
== MS_ABI
)
5423 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
5425 parm_regs
= x86_64_int_parameter_registers
;
5426 for (i
= 0; i
< (ix86_abi
== MS_ABI
5427 ? X86_64_MS_REGPARM_MAX
: X86_64_REGPARM_MAX
); i
++)
5428 if (regno
== parm_regs
[i
])
5433 /* Return if we do not know how to pass TYPE solely in registers. */
5436 ix86_must_pass_in_stack (enum machine_mode mode
, const_tree type
)
5438 if (must_pass_in_stack_var_size_or_pad (mode
, type
))
5441 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5442 The layout_type routine is crafty and tries to trick us into passing
5443 currently unsupported vector types on the stack by using TImode. */
5444 return (!TARGET_64BIT
&& mode
== TImode
5445 && type
&& TREE_CODE (type
) != VECTOR_TYPE
);
5448 /* It returns the size, in bytes, of the area reserved for arguments passed
5449 in registers for the function represented by fndecl dependent to the used
5452 ix86_reg_parm_stack_space (const_tree fndecl
)
5454 enum calling_abi call_abi
= SYSV_ABI
;
5455 if (fndecl
!= NULL_TREE
&& TREE_CODE (fndecl
) == FUNCTION_DECL
)
5456 call_abi
= ix86_function_abi (fndecl
);
5458 call_abi
= ix86_function_type_abi (fndecl
);
5459 if (TARGET_64BIT
&& call_abi
== MS_ABI
)
5464 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5467 ix86_function_type_abi (const_tree fntype
)
5469 if (fntype
!= NULL_TREE
&& TYPE_ATTRIBUTES (fntype
) != NULL_TREE
)
5471 enum calling_abi abi
= ix86_abi
;
5472 if (abi
== SYSV_ABI
)
5474 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype
)))
5477 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype
)))
5485 ix86_function_ms_hook_prologue (const_tree fn
)
5487 if (fn
&& lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn
)))
5489 if (decl_function_context (fn
) != NULL_TREE
)
5490 error_at (DECL_SOURCE_LOCATION (fn
),
5491 "ms_hook_prologue is not compatible with nested function");
5498 static enum calling_abi
5499 ix86_function_abi (const_tree fndecl
)
5503 return ix86_function_type_abi (TREE_TYPE (fndecl
));
5506 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
5509 ix86_cfun_abi (void)
5513 return cfun
->machine
->call_abi
;
5516 /* Write the extra assembler code needed to declare a function properly. */
5519 ix86_asm_output_function_label (FILE *asm_out_file
, const char *fname
,
5522 bool is_ms_hook
= ix86_function_ms_hook_prologue (decl
);
5526 int i
, filler_count
= (TARGET_64BIT
? 32 : 16);
5527 unsigned int filler_cc
= 0xcccccccc;
5529 for (i
= 0; i
< filler_count
; i
+= 4)
5530 fprintf (asm_out_file
, ASM_LONG
" %#x\n", filler_cc
);
5533 #ifdef SUBTARGET_ASM_UNWIND_INIT
5534 SUBTARGET_ASM_UNWIND_INIT (asm_out_file
);
5537 ASM_OUTPUT_LABEL (asm_out_file
, fname
);
5539 /* Output magic byte marker, if hot-patch attribute is set. */
5544 /* leaq [%rsp + 0], %rsp */
5545 asm_fprintf (asm_out_file
, ASM_BYTE
5546 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
5550 /* movl.s %edi, %edi
5552 movl.s %esp, %ebp */
5553 asm_fprintf (asm_out_file
, ASM_BYTE
5554 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
5560 extern void init_regs (void);
5562 /* Implementation of call abi switching target hook. Specific to FNDECL
5563 the specific call register sets are set. See also
5564 ix86_conditional_register_usage for more details. */
5566 ix86_call_abi_override (const_tree fndecl
)
5568 if (fndecl
== NULL_TREE
)
5569 cfun
->machine
->call_abi
= ix86_abi
;
5571 cfun
->machine
->call_abi
= ix86_function_type_abi (TREE_TYPE (fndecl
));
5574 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
5575 expensive re-initialization of init_regs each time we switch function context
5576 since this is needed only during RTL expansion. */
5578 ix86_maybe_switch_abi (void)
5581 call_used_regs
[SI_REG
] == (cfun
->machine
->call_abi
== MS_ABI
))
5585 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5586 for a call to a function whose data type is FNTYPE.
5587 For a library call, FNTYPE is 0. */
5590 init_cumulative_args (CUMULATIVE_ARGS
*cum
, /* Argument info to initialize */
5591 tree fntype
, /* tree ptr for function decl */
5592 rtx libname
, /* SYMBOL_REF of library name or 0 */
5596 struct cgraph_local_info
*i
;
5599 memset (cum
, 0, sizeof (*cum
));
5601 /* Initialize for the current callee. */
5604 cfun
->machine
->callee_pass_avx256_p
= false;
5605 cfun
->machine
->callee_return_avx256_p
= false;
5610 i
= cgraph_local_info (fndecl
);
5611 cum
->call_abi
= ix86_function_abi (fndecl
);
5612 fnret_type
= TREE_TYPE (TREE_TYPE (fndecl
));
5617 cum
->call_abi
= ix86_function_type_abi (fntype
);
5619 fnret_type
= TREE_TYPE (fntype
);
5624 if (TARGET_VZEROUPPER
&& fnret_type
)
5626 rtx fnret_value
= ix86_function_value (fnret_type
, fntype
,
5628 if (function_pass_avx256_p (fnret_value
))
5630 /* The return value of this function uses 256bit AVX modes. */
5632 cfun
->machine
->callee_return_avx256_p
= true;
5634 cfun
->machine
->caller_return_avx256_p
= true;
5638 cum
->caller
= caller
;
5640 /* Set up the number of registers to use for passing arguments. */
5642 if (TARGET_64BIT
&& cum
->call_abi
== MS_ABI
&& !ACCUMULATE_OUTGOING_ARGS
)
5643 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
5644 "or subtarget optimization implying it");
5645 cum
->nregs
= ix86_regparm
;
5648 cum
->nregs
= (cum
->call_abi
== SYSV_ABI
5649 ? X86_64_REGPARM_MAX
5650 : X86_64_MS_REGPARM_MAX
);
5654 cum
->sse_nregs
= SSE_REGPARM_MAX
;
5657 cum
->sse_nregs
= (cum
->call_abi
== SYSV_ABI
5658 ? X86_64_SSE_REGPARM_MAX
5659 : X86_64_MS_SSE_REGPARM_MAX
);
5663 cum
->mmx_nregs
= MMX_REGPARM_MAX
;
5664 cum
->warn_avx
= true;
5665 cum
->warn_sse
= true;
5666 cum
->warn_mmx
= true;
5668 /* Because type might mismatch in between caller and callee, we need to
5669 use actual type of function for local calls.
5670 FIXME: cgraph_analyze can be told to actually record if function uses
5671 va_start so for local functions maybe_vaarg can be made aggressive
5673 FIXME: once typesytem is fixed, we won't need this code anymore. */
5674 if (i
&& i
->local
&& i
->can_change_signature
)
5675 fntype
= TREE_TYPE (fndecl
);
5676 cum
->maybe_vaarg
= (fntype
5677 ? (!prototype_p (fntype
) || stdarg_p (fntype
))
5682 /* If there are variable arguments, then we won't pass anything
5683 in registers in 32-bit mode. */
5684 if (stdarg_p (fntype
))
5695 /* Use ecx and edx registers if function has fastcall attribute,
5696 else look for regparm information. */
5699 unsigned int ccvt
= ix86_get_callcvt (fntype
);
5700 if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
5703 cum
->fastcall
= 1; /* Same first register as in fastcall. */
5705 else if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
5711 cum
->nregs
= ix86_function_regparm (fntype
, fndecl
);
5714 /* Set up the number of SSE registers used for passing SFmode
5715 and DFmode arguments. Warn for mismatching ABI. */
5716 cum
->float_in_sse
= ix86_function_sseregparm (fntype
, fndecl
, true);
5720 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
5721 But in the case of vector types, it is some vector mode.
5723 When we have only some of our vector isa extensions enabled, then there
5724 are some modes for which vector_mode_supported_p is false. For these
5725 modes, the generic vector support in gcc will choose some non-vector mode
5726 in order to implement the type. By computing the natural mode, we'll
5727 select the proper ABI location for the operand and not depend on whatever
5728 the middle-end decides to do with these vector types.
5730 The midde-end can't deal with the vector types > 16 bytes. In this
5731 case, we return the original mode and warn ABI change if CUM isn't
5734 static enum machine_mode
5735 type_natural_mode (const_tree type
, const CUMULATIVE_ARGS
*cum
)
5737 enum machine_mode mode
= TYPE_MODE (type
);
5739 if (TREE_CODE (type
) == VECTOR_TYPE
&& !VECTOR_MODE_P (mode
))
5741 HOST_WIDE_INT size
= int_size_in_bytes (type
);
5742 if ((size
== 8 || size
== 16 || size
== 32)
5743 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
5744 && TYPE_VECTOR_SUBPARTS (type
) > 1)
5746 enum machine_mode innermode
= TYPE_MODE (TREE_TYPE (type
));
5748 if (TREE_CODE (TREE_TYPE (type
)) == REAL_TYPE
)
5749 mode
= MIN_MODE_VECTOR_FLOAT
;
5751 mode
= MIN_MODE_VECTOR_INT
;
5753 /* Get the mode which has this inner mode and number of units. */
5754 for (; mode
!= VOIDmode
; mode
= GET_MODE_WIDER_MODE (mode
))
5755 if (GET_MODE_NUNITS (mode
) == TYPE_VECTOR_SUBPARTS (type
)
5756 && GET_MODE_INNER (mode
) == innermode
)
5758 if (size
== 32 && !TARGET_AVX
)
5760 static bool warnedavx
;
5767 warning (0, "AVX vector argument without AVX "
5768 "enabled changes the ABI");
5770 return TYPE_MODE (type
);
5783 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
5784 this may not agree with the mode that the type system has chosen for the
5785 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
5786 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
5789 gen_reg_or_parallel (enum machine_mode mode
, enum machine_mode orig_mode
,
5794 if (orig_mode
!= BLKmode
)
5795 tmp
= gen_rtx_REG (orig_mode
, regno
);
5798 tmp
= gen_rtx_REG (mode
, regno
);
5799 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
, const0_rtx
);
5800 tmp
= gen_rtx_PARALLEL (orig_mode
, gen_rtvec (1, tmp
));
5806 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
5807 of this code is to classify each 8bytes of incoming argument by the register
5808 class and assign registers accordingly. */
5810 /* Return the union class of CLASS1 and CLASS2.
5811 See the x86-64 PS ABI for details. */
5813 static enum x86_64_reg_class
5814 merge_classes (enum x86_64_reg_class class1
, enum x86_64_reg_class class2
)
5816 /* Rule #1: If both classes are equal, this is the resulting class. */
5817 if (class1
== class2
)
5820 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
5822 if (class1
== X86_64_NO_CLASS
)
5824 if (class2
== X86_64_NO_CLASS
)
5827 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
5828 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
5829 return X86_64_MEMORY_CLASS
;
5831 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
5832 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
5833 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
5834 return X86_64_INTEGERSI_CLASS
;
5835 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
5836 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
5837 return X86_64_INTEGER_CLASS
;
5839 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
5841 if (class1
== X86_64_X87_CLASS
5842 || class1
== X86_64_X87UP_CLASS
5843 || class1
== X86_64_COMPLEX_X87_CLASS
5844 || class2
== X86_64_X87_CLASS
5845 || class2
== X86_64_X87UP_CLASS
5846 || class2
== X86_64_COMPLEX_X87_CLASS
)
5847 return X86_64_MEMORY_CLASS
;
5849 /* Rule #6: Otherwise class SSE is used. */
5850 return X86_64_SSE_CLASS
;
5853 /* Classify the argument of type TYPE and mode MODE.
5854 CLASSES will be filled by the register class used to pass each word
5855 of the operand. The number of words is returned. In case the parameter
5856 should be passed in memory, 0 is returned. As a special case for zero
5857 sized containers, classes[0] will be NO_CLASS and 1 is returned.
5859 BIT_OFFSET is used internally for handling records and specifies offset
5860 of the offset in bits modulo 256 to avoid overflow cases.
5862 See the x86-64 PS ABI for details.
5866 classify_argument (enum machine_mode mode
, const_tree type
,
5867 enum x86_64_reg_class classes
[MAX_CLASSES
], int bit_offset
)
5869 HOST_WIDE_INT bytes
=
5870 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
5872 = (bytes
+ (bit_offset
% 64) / 8 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
5874 /* Variable sized entities are always passed/returned in memory. */
5878 if (mode
!= VOIDmode
5879 && targetm
.calls
.must_pass_in_stack (mode
, type
))
5882 if (type
&& AGGREGATE_TYPE_P (type
))
5886 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
5888 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
5892 for (i
= 0; i
< words
; i
++)
5893 classes
[i
] = X86_64_NO_CLASS
;
5895 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
5896 signalize memory class, so handle it as special case. */
5899 classes
[0] = X86_64_NO_CLASS
;
5903 /* Classify each field of record and merge classes. */
5904 switch (TREE_CODE (type
))
5907 /* And now merge the fields of structure. */
5908 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5910 if (TREE_CODE (field
) == FIELD_DECL
)
5914 if (TREE_TYPE (field
) == error_mark_node
)
5917 /* Bitfields are always classified as integer. Handle them
5918 early, since later code would consider them to be
5919 misaligned integers. */
5920 if (DECL_BIT_FIELD (field
))
5922 for (i
= (int_bit_position (field
)
5923 + (bit_offset
% 64)) / 8 / 8;
5924 i
< ((int_bit_position (field
) + (bit_offset
% 64))
5925 + tree_low_cst (DECL_SIZE (field
), 0)
5928 merge_classes (X86_64_INTEGER_CLASS
,
5935 type
= TREE_TYPE (field
);
5937 /* Flexible array member is ignored. */
5938 if (TYPE_MODE (type
) == BLKmode
5939 && TREE_CODE (type
) == ARRAY_TYPE
5940 && TYPE_SIZE (type
) == NULL_TREE
5941 && TYPE_DOMAIN (type
) != NULL_TREE
5942 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type
))
5947 if (!warned
&& warn_psabi
)
5950 inform (input_location
,
5951 "the ABI of passing struct with"
5952 " a flexible array member has"
5953 " changed in GCC 4.4");
5957 num
= classify_argument (TYPE_MODE (type
), type
,
5959 (int_bit_position (field
)
5960 + bit_offset
) % 256);
5963 pos
= (int_bit_position (field
)
5964 + (bit_offset
% 64)) / 8 / 8;
5965 for (i
= 0; i
< num
&& (i
+ pos
) < words
; i
++)
5967 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
5974 /* Arrays are handled as small records. */
5977 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
5978 TREE_TYPE (type
), subclasses
, bit_offset
);
5982 /* The partial classes are now full classes. */
5983 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
5984 subclasses
[0] = X86_64_SSE_CLASS
;
5985 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
5986 && !((bit_offset
% 64) == 0 && bytes
== 4))
5987 subclasses
[0] = X86_64_INTEGER_CLASS
;
5989 for (i
= 0; i
< words
; i
++)
5990 classes
[i
] = subclasses
[i
% num
];
5995 case QUAL_UNION_TYPE
:
5996 /* Unions are similar to RECORD_TYPE but offset is always 0.
5998 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6000 if (TREE_CODE (field
) == FIELD_DECL
)
6004 if (TREE_TYPE (field
) == error_mark_node
)
6007 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
6008 TREE_TYPE (field
), subclasses
,
6012 for (i
= 0; i
< num
; i
++)
6013 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
6024 /* When size > 16 bytes, if the first one isn't
6025 X86_64_SSE_CLASS or any other ones aren't
6026 X86_64_SSEUP_CLASS, everything should be passed in
6028 if (classes
[0] != X86_64_SSE_CLASS
)
6031 for (i
= 1; i
< words
; i
++)
6032 if (classes
[i
] != X86_64_SSEUP_CLASS
)
6036 /* Final merger cleanup. */
6037 for (i
= 0; i
< words
; i
++)
6039 /* If one class is MEMORY, everything should be passed in
6041 if (classes
[i
] == X86_64_MEMORY_CLASS
)
6044 /* The X86_64_SSEUP_CLASS should be always preceded by
6045 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6046 if (classes
[i
] == X86_64_SSEUP_CLASS
6047 && classes
[i
- 1] != X86_64_SSE_CLASS
6048 && classes
[i
- 1] != X86_64_SSEUP_CLASS
)
6050 /* The first one should never be X86_64_SSEUP_CLASS. */
6051 gcc_assert (i
!= 0);
6052 classes
[i
] = X86_64_SSE_CLASS
;
6055 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6056 everything should be passed in memory. */
6057 if (classes
[i
] == X86_64_X87UP_CLASS
6058 && (classes
[i
- 1] != X86_64_X87_CLASS
))
6062 /* The first one should never be X86_64_X87UP_CLASS. */
6063 gcc_assert (i
!= 0);
6064 if (!warned
&& warn_psabi
)
6067 inform (input_location
,
6068 "the ABI of passing union with long double"
6069 " has changed in GCC 4.4");
6077 /* Compute alignment needed. We align all types to natural boundaries with
6078 exception of XFmode that is aligned to 64bits. */
6079 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
6081 int mode_alignment
= GET_MODE_BITSIZE (mode
);
6084 mode_alignment
= 128;
6085 else if (mode
== XCmode
)
6086 mode_alignment
= 256;
6087 if (COMPLEX_MODE_P (mode
))
6088 mode_alignment
/= 2;
6089 /* Misaligned fields are always returned in memory. */
6090 if (bit_offset
% mode_alignment
)
6094 /* for V1xx modes, just use the base mode */
6095 if (VECTOR_MODE_P (mode
) && mode
!= V1DImode
&& mode
!= V1TImode
6096 && GET_MODE_SIZE (GET_MODE_INNER (mode
)) == bytes
)
6097 mode
= GET_MODE_INNER (mode
);
6099 /* Classification of atomic types. */
6104 classes
[0] = X86_64_SSE_CLASS
;
6107 classes
[0] = X86_64_SSE_CLASS
;
6108 classes
[1] = X86_64_SSEUP_CLASS
;
6118 int size
= (bit_offset
% 64)+ (int) GET_MODE_BITSIZE (mode
);
6122 classes
[0] = X86_64_INTEGERSI_CLASS
;
6125 else if (size
<= 64)
6127 classes
[0] = X86_64_INTEGER_CLASS
;
6130 else if (size
<= 64+32)
6132 classes
[0] = X86_64_INTEGER_CLASS
;
6133 classes
[1] = X86_64_INTEGERSI_CLASS
;
6136 else if (size
<= 64+64)
6138 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
6146 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
6150 /* OImode shouldn't be used directly. */
6155 if (!(bit_offset
% 64))
6156 classes
[0] = X86_64_SSESF_CLASS
;
6158 classes
[0] = X86_64_SSE_CLASS
;
6161 classes
[0] = X86_64_SSEDF_CLASS
;
6164 classes
[0] = X86_64_X87_CLASS
;
6165 classes
[1] = X86_64_X87UP_CLASS
;
6168 classes
[0] = X86_64_SSE_CLASS
;
6169 classes
[1] = X86_64_SSEUP_CLASS
;
6172 classes
[0] = X86_64_SSE_CLASS
;
6173 if (!(bit_offset
% 64))
6179 if (!warned
&& warn_psabi
)
6182 inform (input_location
,
6183 "the ABI of passing structure with complex float"
6184 " member has changed in GCC 4.4");
6186 classes
[1] = X86_64_SSESF_CLASS
;
6190 classes
[0] = X86_64_SSEDF_CLASS
;
6191 classes
[1] = X86_64_SSEDF_CLASS
;
6194 classes
[0] = X86_64_COMPLEX_X87_CLASS
;
6197 /* This modes is larger than 16 bytes. */
6205 classes
[0] = X86_64_SSE_CLASS
;
6206 classes
[1] = X86_64_SSEUP_CLASS
;
6207 classes
[2] = X86_64_SSEUP_CLASS
;
6208 classes
[3] = X86_64_SSEUP_CLASS
;
6216 classes
[0] = X86_64_SSE_CLASS
;
6217 classes
[1] = X86_64_SSEUP_CLASS
;
6225 classes
[0] = X86_64_SSE_CLASS
;
6231 gcc_assert (VECTOR_MODE_P (mode
));
6236 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode
)) == MODE_INT
);
6238 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
6239 classes
[0] = X86_64_INTEGERSI_CLASS
;
6241 classes
[0] = X86_64_INTEGER_CLASS
;
6242 classes
[1] = X86_64_INTEGER_CLASS
;
6243 return 1 + (bytes
> 8);
6247 /* Examine the argument and return set number of register required in each
6248 class. Return 0 iff parameter should be passed in memory. */
6250 examine_argument (enum machine_mode mode
, const_tree type
, int in_return
,
6251 int *int_nregs
, int *sse_nregs
)
6253 enum x86_64_reg_class regclass
[MAX_CLASSES
];
6254 int n
= classify_argument (mode
, type
, regclass
, 0);
6260 for (n
--; n
>= 0; n
--)
6261 switch (regclass
[n
])
6263 case X86_64_INTEGER_CLASS
:
6264 case X86_64_INTEGERSI_CLASS
:
6267 case X86_64_SSE_CLASS
:
6268 case X86_64_SSESF_CLASS
:
6269 case X86_64_SSEDF_CLASS
:
6272 case X86_64_NO_CLASS
:
6273 case X86_64_SSEUP_CLASS
:
6275 case X86_64_X87_CLASS
:
6276 case X86_64_X87UP_CLASS
:
6280 case X86_64_COMPLEX_X87_CLASS
:
6281 return in_return
? 2 : 0;
6282 case X86_64_MEMORY_CLASS
:
6288 /* Construct container for the argument used by GCC interface. See
6289 FUNCTION_ARG for the detailed description. */
6292 construct_container (enum machine_mode mode
, enum machine_mode orig_mode
,
6293 const_tree type
, int in_return
, int nintregs
, int nsseregs
,
6294 const int *intreg
, int sse_regno
)
6296 /* The following variables hold the static issued_error state. */
6297 static bool issued_sse_arg_error
;
6298 static bool issued_sse_ret_error
;
6299 static bool issued_x87_ret_error
;
6301 enum machine_mode tmpmode
;
6303 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
6304 enum x86_64_reg_class regclass
[MAX_CLASSES
];
6308 int needed_sseregs
, needed_intregs
;
6309 rtx exp
[MAX_CLASSES
];
6312 n
= classify_argument (mode
, type
, regclass
, 0);
6315 if (!examine_argument (mode
, type
, in_return
, &needed_intregs
,
6318 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
6321 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6322 some less clueful developer tries to use floating-point anyway. */
6323 if (needed_sseregs
&& !TARGET_SSE
)
6327 if (!issued_sse_ret_error
)
6329 error ("SSE register return with SSE disabled");
6330 issued_sse_ret_error
= true;
6333 else if (!issued_sse_arg_error
)
6335 error ("SSE register argument with SSE disabled");
6336 issued_sse_arg_error
= true;
6341 /* Likewise, error if the ABI requires us to return values in the
6342 x87 registers and the user specified -mno-80387. */
6343 if (!TARGET_80387
&& in_return
)
6344 for (i
= 0; i
< n
; i
++)
6345 if (regclass
[i
] == X86_64_X87_CLASS
6346 || regclass
[i
] == X86_64_X87UP_CLASS
6347 || regclass
[i
] == X86_64_COMPLEX_X87_CLASS
)
6349 if (!issued_x87_ret_error
)
6351 error ("x87 register return with x87 disabled");
6352 issued_x87_ret_error
= true;
6357 /* First construct simple cases. Avoid SCmode, since we want to use
6358 single register to pass this type. */
6359 if (n
== 1 && mode
!= SCmode
)
6360 switch (regclass
[0])
6362 case X86_64_INTEGER_CLASS
:
6363 case X86_64_INTEGERSI_CLASS
:
6364 return gen_rtx_REG (mode
, intreg
[0]);
6365 case X86_64_SSE_CLASS
:
6366 case X86_64_SSESF_CLASS
:
6367 case X86_64_SSEDF_CLASS
:
6368 if (mode
!= BLKmode
)
6369 return gen_reg_or_parallel (mode
, orig_mode
,
6370 SSE_REGNO (sse_regno
));
6372 case X86_64_X87_CLASS
:
6373 case X86_64_COMPLEX_X87_CLASS
:
6374 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
6375 case X86_64_NO_CLASS
:
6376 /* Zero sized array, struct or class. */
6382 && regclass
[0] == X86_64_SSE_CLASS
6383 && regclass
[1] == X86_64_SSEUP_CLASS
6385 return gen_reg_or_parallel (mode
, orig_mode
,
6386 SSE_REGNO (sse_regno
));
6388 && regclass
[0] == X86_64_SSE_CLASS
6389 && regclass
[1] == X86_64_SSEUP_CLASS
6390 && regclass
[2] == X86_64_SSEUP_CLASS
6391 && regclass
[3] == X86_64_SSEUP_CLASS
6393 return gen_reg_or_parallel (mode
, orig_mode
,
6394 SSE_REGNO (sse_regno
));
6396 && regclass
[0] == X86_64_X87_CLASS
6397 && regclass
[1] == X86_64_X87UP_CLASS
)
6398 return gen_rtx_REG (XFmode
, FIRST_STACK_REG
);
6401 && regclass
[0] == X86_64_INTEGER_CLASS
6402 && regclass
[1] == X86_64_INTEGER_CLASS
6403 && (mode
== CDImode
|| mode
== TImode
|| mode
== TFmode
)
6404 && intreg
[0] + 1 == intreg
[1])
6405 return gen_rtx_REG (mode
, intreg
[0]);
6407 /* Otherwise figure out the entries of the PARALLEL. */
6408 for (i
= 0; i
< n
; i
++)
6412 switch (regclass
[i
])
6414 case X86_64_NO_CLASS
:
6416 case X86_64_INTEGER_CLASS
:
6417 case X86_64_INTEGERSI_CLASS
:
6418 /* Merge TImodes on aligned occasions here too. */
6419 if (i
* 8 + 8 > bytes
)
6421 = mode_for_size ((bytes
- i
* 8) * BITS_PER_UNIT
, MODE_INT
, 0);
6422 else if (regclass
[i
] == X86_64_INTEGERSI_CLASS
)
6426 /* We've requested 24 bytes we
6427 don't have mode for. Use DImode. */
6428 if (tmpmode
== BLKmode
)
6431 = gen_rtx_EXPR_LIST (VOIDmode
,
6432 gen_rtx_REG (tmpmode
, *intreg
),
6436 case X86_64_SSESF_CLASS
:
6438 = gen_rtx_EXPR_LIST (VOIDmode
,
6439 gen_rtx_REG (SFmode
,
6440 SSE_REGNO (sse_regno
)),
6444 case X86_64_SSEDF_CLASS
:
6446 = gen_rtx_EXPR_LIST (VOIDmode
,
6447 gen_rtx_REG (DFmode
,
6448 SSE_REGNO (sse_regno
)),
6452 case X86_64_SSE_CLASS
:
6460 if (i
== 0 && regclass
[1] == X86_64_SSEUP_CLASS
)
6470 && regclass
[1] == X86_64_SSEUP_CLASS
6471 && regclass
[2] == X86_64_SSEUP_CLASS
6472 && regclass
[3] == X86_64_SSEUP_CLASS
);
6480 = gen_rtx_EXPR_LIST (VOIDmode
,
6481 gen_rtx_REG (tmpmode
,
6482 SSE_REGNO (sse_regno
)),
6491 /* Empty aligned struct, union or class. */
6495 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
6496 for (i
= 0; i
< nexps
; i
++)
6497 XVECEXP (ret
, 0, i
) = exp
[i
];
6501 /* Update the data in CUM to advance over an argument of mode MODE
6502 and data type TYPE. (TYPE is null for libcalls where that information
6503 may not be available.) */
6506 function_arg_advance_32 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6507 const_tree type
, HOST_WIDE_INT bytes
,
6508 HOST_WIDE_INT words
)
6524 cum
->words
+= words
;
6525 cum
->nregs
-= words
;
6526 cum
->regno
+= words
;
6528 if (cum
->nregs
<= 0)
6536 /* OImode shouldn't be used directly. */
6540 if (cum
->float_in_sse
< 2)
6543 if (cum
->float_in_sse
< 1)
6560 if (!type
|| !AGGREGATE_TYPE_P (type
))
6562 cum
->sse_words
+= words
;
6563 cum
->sse_nregs
-= 1;
6564 cum
->sse_regno
+= 1;
6565 if (cum
->sse_nregs
<= 0)
6579 if (!type
|| !AGGREGATE_TYPE_P (type
))
6581 cum
->mmx_words
+= words
;
6582 cum
->mmx_nregs
-= 1;
6583 cum
->mmx_regno
+= 1;
6584 if (cum
->mmx_nregs
<= 0)
6595 function_arg_advance_64 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6596 const_tree type
, HOST_WIDE_INT words
, bool named
)
6598 int int_nregs
, sse_nregs
;
6600 /* Unnamed 256bit vector mode parameters are passed on stack. */
6601 if (!named
&& VALID_AVX256_REG_MODE (mode
))
6604 if (examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
)
6605 && sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
6607 cum
->nregs
-= int_nregs
;
6608 cum
->sse_nregs
-= sse_nregs
;
6609 cum
->regno
+= int_nregs
;
6610 cum
->sse_regno
+= sse_nregs
;
6614 int align
= ix86_function_arg_boundary (mode
, type
) / BITS_PER_WORD
;
6615 cum
->words
= (cum
->words
+ align
- 1) & ~(align
- 1);
6616 cum
->words
+= words
;
6621 function_arg_advance_ms_64 (CUMULATIVE_ARGS
*cum
, HOST_WIDE_INT bytes
,
6622 HOST_WIDE_INT words
)
6624 /* Otherwise, this should be passed indirect. */
6625 gcc_assert (bytes
== 1 || bytes
== 2 || bytes
== 4 || bytes
== 8);
6627 cum
->words
+= words
;
6635 /* Update the data in CUM to advance over an argument of mode MODE and
6636 data type TYPE. (TYPE is null for libcalls where that information
6637 may not be available.) */
6640 ix86_function_arg_advance (cumulative_args_t cum_v
, enum machine_mode mode
,
6641 const_tree type
, bool named
)
6643 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
6644 HOST_WIDE_INT bytes
, words
;
6646 if (mode
== BLKmode
)
6647 bytes
= int_size_in_bytes (type
);
6649 bytes
= GET_MODE_SIZE (mode
);
6650 words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
6653 mode
= type_natural_mode (type
, NULL
);
6655 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
6656 function_arg_advance_ms_64 (cum
, bytes
, words
);
6657 else if (TARGET_64BIT
)
6658 function_arg_advance_64 (cum
, mode
, type
, words
, named
);
6660 function_arg_advance_32 (cum
, mode
, type
, bytes
, words
);
6663 /* Define where to put the arguments to a function.
6664 Value is zero to push the argument on the stack,
6665 or a hard register in which to store the argument.
6667 MODE is the argument's machine mode.
6668 TYPE is the data type of the argument (as a tree).
6669 This is null for libcalls where that information may
6671 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6672 the preceding args and about the function being called.
6673 NAMED is nonzero if this argument is a named parameter
6674 (otherwise it is an extra parameter matching an ellipsis). */
6677 function_arg_32 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6678 enum machine_mode orig_mode
, const_tree type
,
6679 HOST_WIDE_INT bytes
, HOST_WIDE_INT words
)
6681 static bool warnedsse
, warnedmmx
;
6683 /* Avoid the AL settings for the Unix64 ABI. */
6684 if (mode
== VOIDmode
)
6700 if (words
<= cum
->nregs
)
6702 int regno
= cum
->regno
;
6704 /* Fastcall allocates the first two DWORD (SImode) or
6705 smaller arguments to ECX and EDX if it isn't an
6711 || (type
&& AGGREGATE_TYPE_P (type
)))
6714 /* ECX not EAX is the first allocated register. */
6715 if (regno
== AX_REG
)
6718 return gen_rtx_REG (mode
, regno
);
6723 if (cum
->float_in_sse
< 2)
6726 if (cum
->float_in_sse
< 1)
6730 /* In 32bit, we pass TImode in xmm registers. */
6737 if (!type
|| !AGGREGATE_TYPE_P (type
))
6739 if (!TARGET_SSE
&& !warnedsse
&& cum
->warn_sse
)
6742 warning (0, "SSE vector argument without SSE enabled "
6746 return gen_reg_or_parallel (mode
, orig_mode
,
6747 cum
->sse_regno
+ FIRST_SSE_REG
);
6752 /* OImode shouldn't be used directly. */
6761 if (!type
|| !AGGREGATE_TYPE_P (type
))
6764 return gen_reg_or_parallel (mode
, orig_mode
,
6765 cum
->sse_regno
+ FIRST_SSE_REG
);
6775 if (!type
|| !AGGREGATE_TYPE_P (type
))
6777 if (!TARGET_MMX
&& !warnedmmx
&& cum
->warn_mmx
)
6780 warning (0, "MMX vector argument without MMX enabled "
6784 return gen_reg_or_parallel (mode
, orig_mode
,
6785 cum
->mmx_regno
+ FIRST_MMX_REG
);
6794 function_arg_64 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6795 enum machine_mode orig_mode
, const_tree type
, bool named
)
6797 /* Handle a hidden AL argument containing number of registers
6798 for varargs x86-64 functions. */
6799 if (mode
== VOIDmode
)
6800 return GEN_INT (cum
->maybe_vaarg
6801 ? (cum
->sse_nregs
< 0
6802 ? X86_64_SSE_REGPARM_MAX
6817 /* Unnamed 256bit vector mode parameters are passed on stack. */
6823 return construct_container (mode
, orig_mode
, type
, 0, cum
->nregs
,
6825 &x86_64_int_parameter_registers
[cum
->regno
],
6830 function_arg_ms_64 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6831 enum machine_mode orig_mode
, bool named
,
6832 HOST_WIDE_INT bytes
)
6836 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
6837 We use value of -2 to specify that current function call is MSABI. */
6838 if (mode
== VOIDmode
)
6839 return GEN_INT (-2);
6841 /* If we've run out of registers, it goes on the stack. */
6842 if (cum
->nregs
== 0)
6845 regno
= x86_64_ms_abi_int_parameter_registers
[cum
->regno
];
6847 /* Only floating point modes are passed in anything but integer regs. */
6848 if (TARGET_SSE
&& (mode
== SFmode
|| mode
== DFmode
))
6851 regno
= cum
->regno
+ FIRST_SSE_REG
;
6856 /* Unnamed floating parameters are passed in both the
6857 SSE and integer registers. */
6858 t1
= gen_rtx_REG (mode
, cum
->regno
+ FIRST_SSE_REG
);
6859 t2
= gen_rtx_REG (mode
, regno
);
6860 t1
= gen_rtx_EXPR_LIST (VOIDmode
, t1
, const0_rtx
);
6861 t2
= gen_rtx_EXPR_LIST (VOIDmode
, t2
, const0_rtx
);
6862 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, t1
, t2
));
6865 /* Handle aggregated types passed in register. */
6866 if (orig_mode
== BLKmode
)
6868 if (bytes
> 0 && bytes
<= 8)
6869 mode
= (bytes
> 4 ? DImode
: SImode
);
6870 if (mode
== BLKmode
)
6874 return gen_reg_or_parallel (mode
, orig_mode
, regno
);
6877 /* Return where to put the arguments to a function.
6878 Return zero to push the argument on the stack, or a hard register in which to store the argument.
6880 MODE is the argument's machine mode. TYPE is the data type of the
6881 argument. It is null for libcalls where that information may not be
6882 available. CUM gives information about the preceding args and about
6883 the function being called. NAMED is nonzero if this argument is a
6884 named parameter (otherwise it is an extra parameter matching an
6888 ix86_function_arg (cumulative_args_t cum_v
, enum machine_mode omode
,
6889 const_tree type
, bool named
)
6891 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
6892 enum machine_mode mode
= omode
;
6893 HOST_WIDE_INT bytes
, words
;
6896 if (mode
== BLKmode
)
6897 bytes
= int_size_in_bytes (type
);
6899 bytes
= GET_MODE_SIZE (mode
);
6900 words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
6902 /* To simplify the code below, represent vector types with a vector mode
6903 even if MMX/SSE are not active. */
6904 if (type
&& TREE_CODE (type
) == VECTOR_TYPE
)
6905 mode
= type_natural_mode (type
, cum
);
6907 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
6908 arg
= function_arg_ms_64 (cum
, mode
, omode
, named
, bytes
);
6909 else if (TARGET_64BIT
)
6910 arg
= function_arg_64 (cum
, mode
, omode
, type
, named
);
6912 arg
= function_arg_32 (cum
, mode
, omode
, type
, bytes
, words
);
6914 if (TARGET_VZEROUPPER
&& function_pass_avx256_p (arg
))
6916 /* This argument uses 256bit AVX modes. */
6918 cfun
->machine
->callee_pass_avx256_p
= true;
6920 cfun
->machine
->caller_pass_avx256_p
= true;
6926 /* A C expression that indicates when an argument must be passed by
6927 reference. If nonzero for an argument, a copy of that argument is
6928 made in memory and a pointer to the argument is passed instead of
6929 the argument itself. The pointer is passed in whatever way is
6930 appropriate for passing a pointer to that type. */
6933 ix86_pass_by_reference (cumulative_args_t cum_v ATTRIBUTE_UNUSED
,
6934 enum machine_mode mode ATTRIBUTE_UNUSED
,
6935 const_tree type
, bool named ATTRIBUTE_UNUSED
)
6937 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
6939 /* See Windows x64 Software Convention. */
6940 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
6942 int msize
= (int) GET_MODE_SIZE (mode
);
6945 /* Arrays are passed by reference. */
6946 if (TREE_CODE (type
) == ARRAY_TYPE
)
6949 if (AGGREGATE_TYPE_P (type
))
6951 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
6952 are passed by reference. */
6953 msize
= int_size_in_bytes (type
);
6957 /* __m128 is passed by reference. */
6959 case 1: case 2: case 4: case 8:
6965 else if (TARGET_64BIT
&& type
&& int_size_in_bytes (type
) == -1)
6971 /* Return true when TYPE should be 128bit aligned for 32bit argument
6972 passing ABI. XXX: This function is obsolete and is only used for
6973 checking psABI compatibility with previous versions of GCC. */
6976 ix86_compat_aligned_value_p (const_tree type
)
6978 enum machine_mode mode
= TYPE_MODE (type
);
6979 if (((TARGET_SSE
&& SSE_REG_MODE_P (mode
))
6983 && (!TYPE_USER_ALIGN (type
) || TYPE_ALIGN (type
) > 128))
6985 if (TYPE_ALIGN (type
) < 128)
6988 if (AGGREGATE_TYPE_P (type
))
6990 /* Walk the aggregates recursively. */
6991 switch (TREE_CODE (type
))
6995 case QUAL_UNION_TYPE
:
6999 /* Walk all the structure fields. */
7000 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
7002 if (TREE_CODE (field
) == FIELD_DECL
7003 && ix86_compat_aligned_value_p (TREE_TYPE (field
)))
7010 /* Just for use if some languages passes arrays by value. */
7011 if (ix86_compat_aligned_value_p (TREE_TYPE (type
)))
7022 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
7023 XXX: This function is obsolete and is only used for checking psABI
7024 compatibility with previous versions of GCC. */
7027 ix86_compat_function_arg_boundary (enum machine_mode mode
,
7028 const_tree type
, unsigned int align
)
7030 /* In 32bit, only _Decimal128 and __float128 are aligned to their
7031 natural boundaries. */
7032 if (!TARGET_64BIT
&& mode
!= TDmode
&& mode
!= TFmode
)
7034 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
7035 make an exception for SSE modes since these require 128bit
7038 The handling here differs from field_alignment. ICC aligns MMX
7039 arguments to 4 byte boundaries, while structure fields are aligned
7040 to 8 byte boundaries. */
7043 if (!(TARGET_SSE
&& SSE_REG_MODE_P (mode
)))
7044 align
= PARM_BOUNDARY
;
7048 if (!ix86_compat_aligned_value_p (type
))
7049 align
= PARM_BOUNDARY
;
7052 if (align
> BIGGEST_ALIGNMENT
)
7053 align
= BIGGEST_ALIGNMENT
;
7057 /* Return true when TYPE should be 128bit aligned for 32bit argument
7061 ix86_contains_aligned_value_p (const_tree type
)
7063 enum machine_mode mode
= TYPE_MODE (type
);
7065 if (mode
== XFmode
|| mode
== XCmode
)
7068 if (TYPE_ALIGN (type
) < 128)
7071 if (AGGREGATE_TYPE_P (type
))
7073 /* Walk the aggregates recursively. */
7074 switch (TREE_CODE (type
))
7078 case QUAL_UNION_TYPE
:
7082 /* Walk all the structure fields. */
7083 for (field
= TYPE_FIELDS (type
);
7085 field
= DECL_CHAIN (field
))
7087 if (TREE_CODE (field
) == FIELD_DECL
7088 && ix86_contains_aligned_value_p (TREE_TYPE (field
)))
7095 /* Just for use if some languages passes arrays by value. */
7096 if (ix86_contains_aligned_value_p (TREE_TYPE (type
)))
7105 return TYPE_ALIGN (type
) >= 128;
7110 /* Gives the alignment boundary, in bits, of an argument with the
7111 specified mode and type. */
7114 ix86_function_arg_boundary (enum machine_mode mode
, const_tree type
)
7119 /* Since the main variant type is used for call, we convert it to
7120 the main variant type. */
7121 type
= TYPE_MAIN_VARIANT (type
);
7122 align
= TYPE_ALIGN (type
);
7125 align
= GET_MODE_ALIGNMENT (mode
);
7126 if (align
< PARM_BOUNDARY
)
7127 align
= PARM_BOUNDARY
;
7131 unsigned int saved_align
= align
;
7135 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7138 if (mode
== XFmode
|| mode
== XCmode
)
7139 align
= PARM_BOUNDARY
;
7141 else if (!ix86_contains_aligned_value_p (type
))
7142 align
= PARM_BOUNDARY
;
7145 align
= PARM_BOUNDARY
;
7150 && align
!= ix86_compat_function_arg_boundary (mode
, type
,
7154 inform (input_location
,
7155 "The ABI for passing parameters with %d-byte"
7156 " alignment has changed in GCC 4.6",
7157 align
/ BITS_PER_UNIT
);
7164 /* Return true if N is a possible register number of function value. */
7167 ix86_function_value_regno_p (const unsigned int regno
)
7174 case FIRST_FLOAT_REG
:
7175 /* TODO: The function should depend on current function ABI but
7176 builtins.c would need updating then. Therefore we use the
7178 if (TARGET_64BIT
&& ix86_abi
== MS_ABI
)
7180 return TARGET_FLOAT_RETURNS_IN_80387
;
7186 if (TARGET_MACHO
|| TARGET_64BIT
)
7194 /* Define how to find the value returned by a function.
7195 VALTYPE is the data type of the value (as a tree).
7196 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7197 otherwise, FUNC is 0. */
7200 function_value_32 (enum machine_mode orig_mode
, enum machine_mode mode
,
7201 const_tree fntype
, const_tree fn
)
7205 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7206 we normally prevent this case when mmx is not available. However
7207 some ABIs may require the result to be returned like DImode. */
7208 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
7209 regno
= FIRST_MMX_REG
;
7211 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7212 we prevent this case when sse is not available. However some ABIs
7213 may require the result to be returned like integer TImode. */
7214 else if (mode
== TImode
7215 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
7216 regno
= FIRST_SSE_REG
;
7218 /* 32-byte vector modes in %ymm0. */
7219 else if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 32)
7220 regno
= FIRST_SSE_REG
;
7222 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7223 else if (X87_FLOAT_MODE_P (mode
) && TARGET_FLOAT_RETURNS_IN_80387
)
7224 regno
= FIRST_FLOAT_REG
;
7226 /* Most things go in %eax. */
7229 /* Override FP return register with %xmm0 for local functions when
7230 SSE math is enabled or for functions with sseregparm attribute. */
7231 if ((fn
|| fntype
) && (mode
== SFmode
|| mode
== DFmode
))
7233 int sse_level
= ix86_function_sseregparm (fntype
, fn
, false);
7234 if ((sse_level
>= 1 && mode
== SFmode
)
7235 || (sse_level
== 2 && mode
== DFmode
))
7236 regno
= FIRST_SSE_REG
;
7239 /* OImode shouldn't be used directly. */
7240 gcc_assert (mode
!= OImode
);
7242 return gen_rtx_REG (orig_mode
, regno
);
7246 function_value_64 (enum machine_mode orig_mode
, enum machine_mode mode
,
7251 /* Handle libcalls, which don't provide a type node. */
7252 if (valtype
== NULL
)
7266 regno
= FIRST_SSE_REG
;
7270 regno
= FIRST_FLOAT_REG
;
7278 return gen_rtx_REG (mode
, regno
);
7280 else if (POINTER_TYPE_P (valtype
))
7282 /* Pointers are always returned in word_mode. */
7286 ret
= construct_container (mode
, orig_mode
, valtype
, 1,
7287 X86_64_REGPARM_MAX
, X86_64_SSE_REGPARM_MAX
,
7288 x86_64_int_return_registers
, 0);
7290 /* For zero sized structures, construct_container returns NULL, but we
7291 need to keep rest of compiler happy by returning meaningful value. */
7293 ret
= gen_rtx_REG (orig_mode
, AX_REG
);
7299 function_value_ms_64 (enum machine_mode orig_mode
, enum machine_mode mode
)
7301 unsigned int regno
= AX_REG
;
7305 switch (GET_MODE_SIZE (mode
))
7308 if((SCALAR_INT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
7309 && !COMPLEX_MODE_P (mode
))
7310 regno
= FIRST_SSE_REG
;
7314 if (mode
== SFmode
|| mode
== DFmode
)
7315 regno
= FIRST_SSE_REG
;
7321 return gen_rtx_REG (orig_mode
, regno
);
7325 ix86_function_value_1 (const_tree valtype
, const_tree fntype_or_decl
,
7326 enum machine_mode orig_mode
, enum machine_mode mode
)
7328 const_tree fn
, fntype
;
7331 if (fntype_or_decl
&& DECL_P (fntype_or_decl
))
7332 fn
= fntype_or_decl
;
7333 fntype
= fn
? TREE_TYPE (fn
) : fntype_or_decl
;
7335 if (TARGET_64BIT
&& ix86_function_type_abi (fntype
) == MS_ABI
)
7336 return function_value_ms_64 (orig_mode
, mode
);
7337 else if (TARGET_64BIT
)
7338 return function_value_64 (orig_mode
, mode
, valtype
);
7340 return function_value_32 (orig_mode
, mode
, fntype
, fn
);
7344 ix86_function_value (const_tree valtype
, const_tree fntype_or_decl
,
7345 bool outgoing ATTRIBUTE_UNUSED
)
7347 enum machine_mode mode
, orig_mode
;
7349 orig_mode
= TYPE_MODE (valtype
);
7350 mode
= type_natural_mode (valtype
, NULL
);
7351 return ix86_function_value_1 (valtype
, fntype_or_decl
, orig_mode
, mode
);
7354 /* Pointer function arguments and return values are promoted to
7357 static enum machine_mode
7358 ix86_promote_function_mode (const_tree type
, enum machine_mode mode
,
7359 int *punsignedp
, const_tree fntype
,
7362 if (type
!= NULL_TREE
&& POINTER_TYPE_P (type
))
7364 *punsignedp
= POINTERS_EXTEND_UNSIGNED
;
7367 return default_promote_function_mode (type
, mode
, punsignedp
, fntype
,
7372 ix86_libcall_value (enum machine_mode mode
)
7374 return ix86_function_value_1 (NULL
, NULL
, mode
, mode
);
7377 /* Return true iff type is returned in memory. */
7379 static bool ATTRIBUTE_UNUSED
7380 return_in_memory_32 (const_tree type
, enum machine_mode mode
)
7384 if (mode
== BLKmode
)
7387 size
= int_size_in_bytes (type
);
7389 if (MS_AGGREGATE_RETURN
&& AGGREGATE_TYPE_P (type
) && size
<= 8)
7392 if (VECTOR_MODE_P (mode
) || mode
== TImode
)
7394 /* User-created vectors small enough to fit in EAX. */
7398 /* MMX/3dNow values are returned in MM0,
7399 except when it doesn't exits or the ABI prescribes otherwise. */
7401 return !TARGET_MMX
|| TARGET_VECT8_RETURNS
;
7403 /* SSE values are returned in XMM0, except when it doesn't exist. */
7407 /* AVX values are returned in YMM0, except when it doesn't exist. */
7418 /* OImode shouldn't be used directly. */
7419 gcc_assert (mode
!= OImode
);
7424 static bool ATTRIBUTE_UNUSED
7425 return_in_memory_64 (const_tree type
, enum machine_mode mode
)
7427 int needed_intregs
, needed_sseregs
;
7428 return !examine_argument (mode
, type
, 1, &needed_intregs
, &needed_sseregs
);
7431 static bool ATTRIBUTE_UNUSED
7432 return_in_memory_ms_64 (const_tree type
, enum machine_mode mode
)
7434 HOST_WIDE_INT size
= int_size_in_bytes (type
);
7436 /* __m128 is returned in xmm0. */
7437 if ((SCALAR_INT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
7438 && !COMPLEX_MODE_P (mode
) && (GET_MODE_SIZE (mode
) == 16 || size
== 16))
7441 /* Otherwise, the size must be exactly in [1248]. */
7442 return size
!= 1 && size
!= 2 && size
!= 4 && size
!= 8;
7446 ix86_return_in_memory (const_tree type
, const_tree fntype ATTRIBUTE_UNUSED
)
7448 #ifdef SUBTARGET_RETURN_IN_MEMORY
7449 return SUBTARGET_RETURN_IN_MEMORY (type
, fntype
);
7451 const enum machine_mode mode
= type_natural_mode (type
, NULL
);
7455 if (ix86_function_type_abi (fntype
) == MS_ABI
)
7456 return return_in_memory_ms_64 (type
, mode
);
7458 return return_in_memory_64 (type
, mode
);
7461 return return_in_memory_32 (type
, mode
);
7465 /* When returning SSE vector types, we have a choice of either
7466 (1) being abi incompatible with a -march switch, or
7467 (2) generating an error.
7468 Given no good solution, I think the safest thing is one warning.
7469 The user won't be able to use -Werror, but....
7471 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
7472 called in response to actually generating a caller or callee that
7473 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
7474 via aggregate_value_p for general type probing from tree-ssa. */
7477 ix86_struct_value_rtx (tree type
, int incoming ATTRIBUTE_UNUSED
)
7479 static bool warnedsse
, warnedmmx
;
7481 if (!TARGET_64BIT
&& type
)
7483 /* Look at the return type of the function, not the function type. */
7484 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (type
));
7486 if (!TARGET_SSE
&& !warnedsse
)
7489 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
7492 warning (0, "SSE vector return without SSE enabled "
7497 if (!TARGET_MMX
&& !warnedmmx
)
7499 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
7502 warning (0, "MMX vector return without MMX enabled "
7512 /* Create the va_list data type. */
7514 /* Returns the calling convention specific va_list date type.
7515 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
7518 ix86_build_builtin_va_list_abi (enum calling_abi abi
)
7520 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
7522 /* For i386 we use plain pointer to argument area. */
7523 if (!TARGET_64BIT
|| abi
== MS_ABI
)
7524 return build_pointer_type (char_type_node
);
7526 record
= lang_hooks
.types
.make_type (RECORD_TYPE
);
7527 type_decl
= build_decl (BUILTINS_LOCATION
,
7528 TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
7530 f_gpr
= build_decl (BUILTINS_LOCATION
,
7531 FIELD_DECL
, get_identifier ("gp_offset"),
7532 unsigned_type_node
);
7533 f_fpr
= build_decl (BUILTINS_LOCATION
,
7534 FIELD_DECL
, get_identifier ("fp_offset"),
7535 unsigned_type_node
);
7536 f_ovf
= build_decl (BUILTINS_LOCATION
,
7537 FIELD_DECL
, get_identifier ("overflow_arg_area"),
7539 f_sav
= build_decl (BUILTINS_LOCATION
,
7540 FIELD_DECL
, get_identifier ("reg_save_area"),
7543 va_list_gpr_counter_field
= f_gpr
;
7544 va_list_fpr_counter_field
= f_fpr
;
7546 DECL_FIELD_CONTEXT (f_gpr
) = record
;
7547 DECL_FIELD_CONTEXT (f_fpr
) = record
;
7548 DECL_FIELD_CONTEXT (f_ovf
) = record
;
7549 DECL_FIELD_CONTEXT (f_sav
) = record
;
7551 TYPE_STUB_DECL (record
) = type_decl
;
7552 TYPE_NAME (record
) = type_decl
;
7553 TYPE_FIELDS (record
) = f_gpr
;
7554 DECL_CHAIN (f_gpr
) = f_fpr
;
7555 DECL_CHAIN (f_fpr
) = f_ovf
;
7556 DECL_CHAIN (f_ovf
) = f_sav
;
7558 layout_type (record
);
7560 /* The correct type is an array type of one element. */
7561 return build_array_type (record
, build_index_type (size_zero_node
));
7564 /* Setup the builtin va_list data type and for 64-bit the additional
7565 calling convention specific va_list data types. */
7568 ix86_build_builtin_va_list (void)
7570 tree ret
= ix86_build_builtin_va_list_abi (ix86_abi
);
7572 /* Initialize abi specific va_list builtin types. */
7576 if (ix86_abi
== MS_ABI
)
7578 t
= ix86_build_builtin_va_list_abi (SYSV_ABI
);
7579 if (TREE_CODE (t
) != RECORD_TYPE
)
7580 t
= build_variant_type_copy (t
);
7581 sysv_va_list_type_node
= t
;
7586 if (TREE_CODE (t
) != RECORD_TYPE
)
7587 t
= build_variant_type_copy (t
);
7588 sysv_va_list_type_node
= t
;
7590 if (ix86_abi
!= MS_ABI
)
7592 t
= ix86_build_builtin_va_list_abi (MS_ABI
);
7593 if (TREE_CODE (t
) != RECORD_TYPE
)
7594 t
= build_variant_type_copy (t
);
7595 ms_va_list_type_node
= t
;
7600 if (TREE_CODE (t
) != RECORD_TYPE
)
7601 t
= build_variant_type_copy (t
);
7602 ms_va_list_type_node
= t
;
7609 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
7612 setup_incoming_varargs_64 (CUMULATIVE_ARGS
*cum
)
7618 /* GPR size of varargs save area. */
7619 if (cfun
->va_list_gpr_size
)
7620 ix86_varargs_gpr_size
= X86_64_REGPARM_MAX
* UNITS_PER_WORD
;
7622 ix86_varargs_gpr_size
= 0;
7624 /* FPR size of varargs save area. We don't need it if we don't pass
7625 anything in SSE registers. */
7626 if (TARGET_SSE
&& cfun
->va_list_fpr_size
)
7627 ix86_varargs_fpr_size
= X86_64_SSE_REGPARM_MAX
* 16;
7629 ix86_varargs_fpr_size
= 0;
7631 if (! ix86_varargs_gpr_size
&& ! ix86_varargs_fpr_size
)
7634 save_area
= frame_pointer_rtx
;
7635 set
= get_varargs_alias_set ();
7637 max
= cum
->regno
+ cfun
->va_list_gpr_size
/ UNITS_PER_WORD
;
7638 if (max
> X86_64_REGPARM_MAX
)
7639 max
= X86_64_REGPARM_MAX
;
7641 for (i
= cum
->regno
; i
< max
; i
++)
7643 mem
= gen_rtx_MEM (word_mode
,
7644 plus_constant (save_area
, i
* UNITS_PER_WORD
));
7645 MEM_NOTRAP_P (mem
) = 1;
7646 set_mem_alias_set (mem
, set
);
7647 emit_move_insn (mem
,
7648 gen_rtx_REG (word_mode
,
7649 x86_64_int_parameter_registers
[i
]));
7652 if (ix86_varargs_fpr_size
)
7654 enum machine_mode smode
;
7657 /* Now emit code to save SSE registers. The AX parameter contains number
7658 of SSE parameter registers used to call this function, though all we
7659 actually check here is the zero/non-zero status. */
7661 label
= gen_label_rtx ();
7662 test
= gen_rtx_EQ (VOIDmode
, gen_rtx_REG (QImode
, AX_REG
), const0_rtx
);
7663 emit_jump_insn (gen_cbranchqi4 (test
, XEXP (test
, 0), XEXP (test
, 1),
7666 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
7667 we used movdqa (i.e. TImode) instead? Perhaps even better would
7668 be if we could determine the real mode of the data, via a hook
7669 into pass_stdarg. Ignore all that for now. */
7671 if (crtl
->stack_alignment_needed
< GET_MODE_ALIGNMENT (smode
))
7672 crtl
->stack_alignment_needed
= GET_MODE_ALIGNMENT (smode
);
7674 max
= cum
->sse_regno
+ cfun
->va_list_fpr_size
/ 16;
7675 if (max
> X86_64_SSE_REGPARM_MAX
)
7676 max
= X86_64_SSE_REGPARM_MAX
;
7678 for (i
= cum
->sse_regno
; i
< max
; ++i
)
7680 mem
= plus_constant (save_area
, i
* 16 + ix86_varargs_gpr_size
);
7681 mem
= gen_rtx_MEM (smode
, mem
);
7682 MEM_NOTRAP_P (mem
) = 1;
7683 set_mem_alias_set (mem
, set
);
7684 set_mem_align (mem
, GET_MODE_ALIGNMENT (smode
));
7686 emit_move_insn (mem
, gen_rtx_REG (smode
, SSE_REGNO (i
)));
7694 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS
*cum
)
7696 alias_set_type set
= get_varargs_alias_set ();
7699 /* Reset to zero, as there might be a sysv vaarg used
7701 ix86_varargs_gpr_size
= 0;
7702 ix86_varargs_fpr_size
= 0;
7704 for (i
= cum
->regno
; i
< X86_64_MS_REGPARM_MAX
; i
++)
7708 mem
= gen_rtx_MEM (Pmode
,
7709 plus_constant (virtual_incoming_args_rtx
,
7710 i
* UNITS_PER_WORD
));
7711 MEM_NOTRAP_P (mem
) = 1;
7712 set_mem_alias_set (mem
, set
);
7714 reg
= gen_rtx_REG (Pmode
, x86_64_ms_abi_int_parameter_registers
[i
]);
7715 emit_move_insn (mem
, reg
);
7720 ix86_setup_incoming_varargs (cumulative_args_t cum_v
, enum machine_mode mode
,
7721 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
7724 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
7725 CUMULATIVE_ARGS next_cum
;
7728 /* This argument doesn't appear to be used anymore. Which is good,
7729 because the old code here didn't suppress rtl generation. */
7730 gcc_assert (!no_rtl
);
7735 fntype
= TREE_TYPE (current_function_decl
);
7737 /* For varargs, we do not want to skip the dummy va_dcl argument.
7738 For stdargs, we do want to skip the last named argument. */
7740 if (stdarg_p (fntype
))
7741 ix86_function_arg_advance (pack_cumulative_args (&next_cum
), mode
, type
,
7744 if (cum
->call_abi
== MS_ABI
)
7745 setup_incoming_varargs_ms_64 (&next_cum
);
7747 setup_incoming_varargs_64 (&next_cum
);
7750 /* Checks if TYPE is of kind va_list char *. */
7753 is_va_list_char_pointer (tree type
)
7757 /* For 32-bit it is always true. */
7760 canonic
= ix86_canonical_va_list_type (type
);
7761 return (canonic
== ms_va_list_type_node
7762 || (ix86_abi
== MS_ABI
&& canonic
== va_list_type_node
));
7765 /* Implement va_start. */
7768 ix86_va_start (tree valist
, rtx nextarg
)
7770 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
7771 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
7772 tree gpr
, fpr
, ovf
, sav
, t
;
7776 if (flag_split_stack
7777 && cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
7779 unsigned int scratch_regno
;
7781 /* When we are splitting the stack, we can't refer to the stack
7782 arguments using internal_arg_pointer, because they may be on
7783 the old stack. The split stack prologue will arrange to
7784 leave a pointer to the old stack arguments in a scratch
7785 register, which we here copy to a pseudo-register. The split
7786 stack prologue can't set the pseudo-register directly because
7787 it (the prologue) runs before any registers have been saved. */
7789 scratch_regno
= split_stack_prologue_scratch_regno ();
7790 if (scratch_regno
!= INVALID_REGNUM
)
7794 reg
= gen_reg_rtx (Pmode
);
7795 cfun
->machine
->split_stack_varargs_pointer
= reg
;
7798 emit_move_insn (reg
, gen_rtx_REG (Pmode
, scratch_regno
));
7802 push_topmost_sequence ();
7803 emit_insn_after (seq
, entry_of_function ());
7804 pop_topmost_sequence ();
7808 /* Only 64bit target needs something special. */
7809 if (!TARGET_64BIT
|| is_va_list_char_pointer (TREE_TYPE (valist
)))
7811 if (cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
7812 std_expand_builtin_va_start (valist
, nextarg
);
7817 va_r
= expand_expr (valist
, NULL_RTX
, VOIDmode
, EXPAND_WRITE
);
7818 next
= expand_binop (ptr_mode
, add_optab
,
7819 cfun
->machine
->split_stack_varargs_pointer
,
7820 crtl
->args
.arg_offset_rtx
,
7821 NULL_RTX
, 0, OPTAB_LIB_WIDEN
);
7822 convert_move (va_r
, next
, 0);
7827 f_gpr
= TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node
));
7828 f_fpr
= DECL_CHAIN (f_gpr
);
7829 f_ovf
= DECL_CHAIN (f_fpr
);
7830 f_sav
= DECL_CHAIN (f_ovf
);
7832 valist
= build_simple_mem_ref (valist
);
7833 TREE_TYPE (valist
) = TREE_TYPE (sysv_va_list_type_node
);
7834 /* The following should be folded into the MEM_REF offset. */
7835 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), unshare_expr (valist
),
7837 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), unshare_expr (valist
),
7839 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), unshare_expr (valist
),
7841 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), unshare_expr (valist
),
7844 /* Count number of gp and fp argument registers used. */
7845 words
= crtl
->args
.info
.words
;
7846 n_gpr
= crtl
->args
.info
.regno
;
7847 n_fpr
= crtl
->args
.info
.sse_regno
;
7849 if (cfun
->va_list_gpr_size
)
7851 type
= TREE_TYPE (gpr
);
7852 t
= build2 (MODIFY_EXPR
, type
,
7853 gpr
, build_int_cst (type
, n_gpr
* 8));
7854 TREE_SIDE_EFFECTS (t
) = 1;
7855 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7858 if (TARGET_SSE
&& cfun
->va_list_fpr_size
)
7860 type
= TREE_TYPE (fpr
);
7861 t
= build2 (MODIFY_EXPR
, type
, fpr
,
7862 build_int_cst (type
, n_fpr
* 16 + 8*X86_64_REGPARM_MAX
));
7863 TREE_SIDE_EFFECTS (t
) = 1;
7864 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7867 /* Find the overflow area. */
7868 type
= TREE_TYPE (ovf
);
7869 if (cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
7870 ovf_rtx
= crtl
->args
.internal_arg_pointer
;
7872 ovf_rtx
= cfun
->machine
->split_stack_varargs_pointer
;
7873 t
= make_tree (type
, ovf_rtx
);
7875 t
= fold_build_pointer_plus_hwi (t
, words
* UNITS_PER_WORD
);
7876 t
= build2 (MODIFY_EXPR
, type
, ovf
, t
);
7877 TREE_SIDE_EFFECTS (t
) = 1;
7878 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7880 if (ix86_varargs_gpr_size
|| ix86_varargs_fpr_size
)
7882 /* Find the register save area.
7883 Prologue of the function save it right above stack frame. */
7884 type
= TREE_TYPE (sav
);
7885 t
= make_tree (type
, frame_pointer_rtx
);
7886 if (!ix86_varargs_gpr_size
)
7887 t
= fold_build_pointer_plus_hwi (t
, -8 * X86_64_REGPARM_MAX
);
7888 t
= build2 (MODIFY_EXPR
, type
, sav
, t
);
7889 TREE_SIDE_EFFECTS (t
) = 1;
7890 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7894 /* Implement va_arg. */
7897 ix86_gimplify_va_arg (tree valist
, tree type
, gimple_seq
*pre_p
,
7900 static const int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
7901 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
7902 tree gpr
, fpr
, ovf
, sav
, t
;
7904 tree lab_false
, lab_over
= NULL_TREE
;
7909 enum machine_mode nat_mode
;
7910 unsigned int arg_boundary
;
7912 /* Only 64bit target needs something special. */
7913 if (!TARGET_64BIT
|| is_va_list_char_pointer (TREE_TYPE (valist
)))
7914 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
7916 f_gpr
= TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node
));
7917 f_fpr
= DECL_CHAIN (f_gpr
);
7918 f_ovf
= DECL_CHAIN (f_fpr
);
7919 f_sav
= DECL_CHAIN (f_ovf
);
7921 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
),
7922 build_va_arg_indirect_ref (valist
), f_gpr
, NULL_TREE
);
7923 valist
= build_va_arg_indirect_ref (valist
);
7924 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
7925 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
7926 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
7928 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
7930 type
= build_pointer_type (type
);
7931 size
= int_size_in_bytes (type
);
7932 rsize
= (size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
7934 nat_mode
= type_natural_mode (type
, NULL
);
7943 /* Unnamed 256bit vector mode parameters are passed on stack. */
7944 if (!TARGET_64BIT_MS_ABI
)
7951 container
= construct_container (nat_mode
, TYPE_MODE (type
),
7952 type
, 0, X86_64_REGPARM_MAX
,
7953 X86_64_SSE_REGPARM_MAX
, intreg
,
7958 /* Pull the value out of the saved registers. */
7960 addr
= create_tmp_var (ptr_type_node
, "addr");
7964 int needed_intregs
, needed_sseregs
;
7966 tree int_addr
, sse_addr
;
7968 lab_false
= create_artificial_label (UNKNOWN_LOCATION
);
7969 lab_over
= create_artificial_label (UNKNOWN_LOCATION
);
7971 examine_argument (nat_mode
, type
, 0, &needed_intregs
, &needed_sseregs
);
7973 need_temp
= (!REG_P (container
)
7974 && ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
7975 || TYPE_ALIGN (type
) > 128));
7977 /* In case we are passing structure, verify that it is consecutive block
7978 on the register save area. If not we need to do moves. */
7979 if (!need_temp
&& !REG_P (container
))
7981 /* Verify that all registers are strictly consecutive */
7982 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
7986 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
7988 rtx slot
= XVECEXP (container
, 0, i
);
7989 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
7990 || INTVAL (XEXP (slot
, 1)) != i
* 16)
7998 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
8000 rtx slot
= XVECEXP (container
, 0, i
);
8001 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
8002 || INTVAL (XEXP (slot
, 1)) != i
* 8)
8014 int_addr
= create_tmp_var (ptr_type_node
, "int_addr");
8015 sse_addr
= create_tmp_var (ptr_type_node
, "sse_addr");
8018 /* First ensure that we fit completely in registers. */
8021 t
= build_int_cst (TREE_TYPE (gpr
),
8022 (X86_64_REGPARM_MAX
- needed_intregs
+ 1) * 8);
8023 t
= build2 (GE_EXPR
, boolean_type_node
, gpr
, t
);
8024 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
8025 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
8026 gimplify_and_add (t
, pre_p
);
8030 t
= build_int_cst (TREE_TYPE (fpr
),
8031 (X86_64_SSE_REGPARM_MAX
- needed_sseregs
+ 1) * 16
8032 + X86_64_REGPARM_MAX
* 8);
8033 t
= build2 (GE_EXPR
, boolean_type_node
, fpr
, t
);
8034 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
8035 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
8036 gimplify_and_add (t
, pre_p
);
8039 /* Compute index to start of area used for integer regs. */
8042 /* int_addr = gpr + sav; */
8043 t
= fold_build_pointer_plus (sav
, gpr
);
8044 gimplify_assign (int_addr
, t
, pre_p
);
8048 /* sse_addr = fpr + sav; */
8049 t
= fold_build_pointer_plus (sav
, fpr
);
8050 gimplify_assign (sse_addr
, t
, pre_p
);
8054 int i
, prev_size
= 0;
8055 tree temp
= create_tmp_var (type
, "va_arg_tmp");
8058 t
= build1 (ADDR_EXPR
, build_pointer_type (type
), temp
);
8059 gimplify_assign (addr
, t
, pre_p
);
8061 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
8063 rtx slot
= XVECEXP (container
, 0, i
);
8064 rtx reg
= XEXP (slot
, 0);
8065 enum machine_mode mode
= GET_MODE (reg
);
8071 tree dest_addr
, dest
;
8072 int cur_size
= GET_MODE_SIZE (mode
);
8074 gcc_assert (prev_size
<= INTVAL (XEXP (slot
, 1)));
8075 prev_size
= INTVAL (XEXP (slot
, 1));
8076 if (prev_size
+ cur_size
> size
)
8078 cur_size
= size
- prev_size
;
8079 mode
= mode_for_size (cur_size
* BITS_PER_UNIT
, MODE_INT
, 1);
8080 if (mode
== BLKmode
)
8083 piece_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
8084 if (mode
== GET_MODE (reg
))
8085 addr_type
= build_pointer_type (piece_type
);
8087 addr_type
= build_pointer_type_for_mode (piece_type
, ptr_mode
,
8089 daddr_type
= build_pointer_type_for_mode (piece_type
, ptr_mode
,
8092 if (SSE_REGNO_P (REGNO (reg
)))
8094 src_addr
= sse_addr
;
8095 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
8099 src_addr
= int_addr
;
8100 src_offset
= REGNO (reg
) * 8;
8102 src_addr
= fold_convert (addr_type
, src_addr
);
8103 src_addr
= fold_build_pointer_plus_hwi (src_addr
, src_offset
);
8105 dest_addr
= fold_convert (daddr_type
, addr
);
8106 dest_addr
= fold_build_pointer_plus_hwi (dest_addr
, prev_size
);
8107 if (cur_size
== GET_MODE_SIZE (mode
))
8109 src
= build_va_arg_indirect_ref (src_addr
);
8110 dest
= build_va_arg_indirect_ref (dest_addr
);
8112 gimplify_assign (dest
, src
, pre_p
);
8117 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY
),
8118 3, dest_addr
, src_addr
,
8119 size_int (cur_size
));
8120 gimplify_and_add (copy
, pre_p
);
8122 prev_size
+= cur_size
;
8128 t
= build2 (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
8129 build_int_cst (TREE_TYPE (gpr
), needed_intregs
* 8));
8130 gimplify_assign (gpr
, t
, pre_p
);
8135 t
= build2 (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
8136 build_int_cst (TREE_TYPE (fpr
), needed_sseregs
* 16));
8137 gimplify_assign (fpr
, t
, pre_p
);
8140 gimple_seq_add_stmt (pre_p
, gimple_build_goto (lab_over
));
8142 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_false
));
8145 /* ... otherwise out of the overflow area. */
8147 /* When we align parameter on stack for caller, if the parameter
8148 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
8149 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
8150 here with caller. */
8151 arg_boundary
= ix86_function_arg_boundary (VOIDmode
, type
);
8152 if ((unsigned int) arg_boundary
> MAX_SUPPORTED_STACK_ALIGNMENT
)
8153 arg_boundary
= MAX_SUPPORTED_STACK_ALIGNMENT
;
8155 /* Care for on-stack alignment if needed. */
8156 if (arg_boundary
<= 64 || size
== 0)
8160 HOST_WIDE_INT align
= arg_boundary
/ 8;
8161 t
= fold_build_pointer_plus_hwi (ovf
, align
- 1);
8162 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
8163 build_int_cst (TREE_TYPE (t
), -align
));
8166 gimplify_expr (&t
, pre_p
, NULL
, is_gimple_val
, fb_rvalue
);
8167 gimplify_assign (addr
, t
, pre_p
);
8169 t
= fold_build_pointer_plus_hwi (t
, rsize
* UNITS_PER_WORD
);
8170 gimplify_assign (unshare_expr (ovf
), t
, pre_p
);
8173 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_over
));
8175 ptrtype
= build_pointer_type_for_mode (type
, ptr_mode
, true);
8176 addr
= fold_convert (ptrtype
, addr
);
8179 addr
= build_va_arg_indirect_ref (addr
);
8180 return build_va_arg_indirect_ref (addr
);
8183 /* Return true if OPNUM's MEM should be matched
8184 in movabs* patterns. */
8187 ix86_check_movabs (rtx insn
, int opnum
)
8191 set
= PATTERN (insn
);
8192 if (GET_CODE (set
) == PARALLEL
)
8193 set
= XVECEXP (set
, 0, 0);
8194 gcc_assert (GET_CODE (set
) == SET
);
8195 mem
= XEXP (set
, opnum
);
8196 while (GET_CODE (mem
) == SUBREG
)
8197 mem
= SUBREG_REG (mem
);
8198 gcc_assert (MEM_P (mem
));
8199 return volatile_ok
|| !MEM_VOLATILE_P (mem
);
8202 /* Initialize the table of extra 80387 mathematical constants. */
8205 init_ext_80387_constants (void)
8207 static const char * cst
[5] =
8209 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8210 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8211 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8212 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8213 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8217 for (i
= 0; i
< 5; i
++)
8219 real_from_string (&ext_80387_constants_table
[i
], cst
[i
]);
8220 /* Ensure each constant is rounded to XFmode precision. */
8221 real_convert (&ext_80387_constants_table
[i
],
8222 XFmode
, &ext_80387_constants_table
[i
]);
8225 ext_80387_constants_init
= 1;
8228 /* Return non-zero if the constant is something that
8229 can be loaded with a special instruction. */
8232 standard_80387_constant_p (rtx x
)
8234 enum machine_mode mode
= GET_MODE (x
);
8238 if (!(X87_FLOAT_MODE_P (mode
) && (GET_CODE (x
) == CONST_DOUBLE
)))
8241 if (x
== CONST0_RTX (mode
))
8243 if (x
== CONST1_RTX (mode
))
8246 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
8248 /* For XFmode constants, try to find a special 80387 instruction when
8249 optimizing for size or on those CPUs that benefit from them. */
8251 && (optimize_function_for_size_p (cfun
) || TARGET_EXT_80387_CONSTANTS
))
8255 if (! ext_80387_constants_init
)
8256 init_ext_80387_constants ();
8258 for (i
= 0; i
< 5; i
++)
8259 if (real_identical (&r
, &ext_80387_constants_table
[i
]))
8263 /* Load of the constant -0.0 or -1.0 will be split as
8264 fldz;fchs or fld1;fchs sequence. */
8265 if (real_isnegzero (&r
))
8267 if (real_identical (&r
, &dconstm1
))
8273 /* Return the opcode of the special instruction to be used to load
8277 standard_80387_constant_opcode (rtx x
)
8279 switch (standard_80387_constant_p (x
))
8303 /* Return the CONST_DOUBLE representing the 80387 constant that is
8304 loaded by the specified special instruction. The argument IDX
8305 matches the return value from standard_80387_constant_p. */
8308 standard_80387_constant_rtx (int idx
)
8312 if (! ext_80387_constants_init
)
8313 init_ext_80387_constants ();
8329 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table
[i
],
8333 /* Return 1 if X is all 0s and 2 if x is all 1s
8334 in supported SSE/AVX vector mode. */
8337 standard_sse_constant_p (rtx x
)
8339 enum machine_mode mode
= GET_MODE (x
);
8341 if (x
== const0_rtx
|| x
== CONST0_RTX (GET_MODE (x
)))
8343 if (vector_all_ones_operand (x
, mode
))
8365 /* Return the opcode of the special instruction to be used to load
8369 standard_sse_constant_opcode (rtx insn
, rtx x
)
8371 switch (standard_sse_constant_p (x
))
8374 switch (get_attr_mode (insn
))
8377 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
)
8378 return "%vpxor\t%0, %d0";
8380 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
)
8381 return "%vxorpd\t%0, %d0";
8383 return "%vxorps\t%0, %d0";
8386 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
)
8387 return "vpxor\t%x0, %x0, %x0";
8389 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
)
8390 return "vxorpd\t%x0, %x0, %x0";
8392 return "vxorps\t%x0, %x0, %x0";
8400 return "vpcmpeqd\t%0, %0, %0";
8402 return "pcmpeqd\t%0, %0";
8410 /* Returns true if OP contains a symbol reference */
8413 symbolic_reference_mentioned_p (rtx op
)
8418 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
8421 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
8422 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
8428 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
8429 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
8433 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
8440 /* Return true if it is appropriate to emit `ret' instructions in the
8441 body of a function. Do this only if the epilogue is simple, needing a
8442 couple of insns. Prior to reloading, we can't tell how many registers
8443 must be saved, so return false then. Return false if there is no frame
8444 marker to de-allocate. */
8447 ix86_can_use_return_insn_p (void)
8449 struct ix86_frame frame
;
8451 if (! reload_completed
|| frame_pointer_needed
)
8454 /* Don't allow more than 32k pop, since that's all we can do
8455 with one instruction. */
8456 if (crtl
->args
.pops_args
&& crtl
->args
.size
>= 32768)
8459 ix86_compute_frame_layout (&frame
);
8460 return (frame
.stack_pointer_offset
== UNITS_PER_WORD
8461 && (frame
.nregs
+ frame
.nsseregs
) == 0);
8464 /* Value should be nonzero if functions must have frame pointers.
8465 Zero means the frame pointer need not be set up (and parms may
8466 be accessed via the stack pointer) in functions that seem suitable. */
8469 ix86_frame_pointer_required (void)
8471 /* If we accessed previous frames, then the generated code expects
8472 to be able to access the saved ebp value in our frame. */
8473 if (cfun
->machine
->accesses_prev_frame
)
8476 /* Several x86 os'es need a frame pointer for other reasons,
8477 usually pertaining to setjmp. */
8478 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
8481 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
8482 if (TARGET_32BIT_MS_ABI
&& cfun
->calls_setjmp
)
8485 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
8486 turns off the frame pointer by default. Turn it back on now if
8487 we've not got a leaf function. */
8488 if (TARGET_OMIT_LEAF_FRAME_POINTER
8489 && (!current_function_is_leaf
8490 || ix86_current_function_calls_tls_descriptor
))
8493 if (crtl
->profile
&& !flag_fentry
)
8499 /* Record that the current function accesses previous call frames. */
8502 ix86_setup_frame_addresses (void)
8504 cfun
->machine
->accesses_prev_frame
= 1;
8507 #ifndef USE_HIDDEN_LINKONCE
8508 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
8509 # define USE_HIDDEN_LINKONCE 1
8511 # define USE_HIDDEN_LINKONCE 0
8515 static int pic_labels_used
;
8517 /* Fills in the label name that should be used for a pc thunk for
8518 the given register. */
8521 get_pc_thunk_name (char name
[32], unsigned int regno
)
8523 gcc_assert (!TARGET_64BIT
);
8525 if (USE_HIDDEN_LINKONCE
)
8526 sprintf (name
, "__x86.get_pc_thunk.%s", reg_names
[regno
]);
8528 ASM_GENERATE_INTERNAL_LABEL (name
, "LPR", regno
);
8532 /* This function generates code for -fpic that loads %ebx with
8533 the return address of the caller and then returns. */
8536 ix86_code_end (void)
8541 for (regno
= AX_REG
; regno
<= SP_REG
; regno
++)
8546 if (!(pic_labels_used
& (1 << regno
)))
8549 get_pc_thunk_name (name
, regno
);
8551 decl
= build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
8552 get_identifier (name
),
8553 build_function_type_list (void_type_node
, NULL_TREE
));
8554 DECL_RESULT (decl
) = build_decl (BUILTINS_LOCATION
, RESULT_DECL
,
8555 NULL_TREE
, void_type_node
);
8556 TREE_PUBLIC (decl
) = 1;
8557 TREE_STATIC (decl
) = 1;
8562 switch_to_section (darwin_sections
[text_coal_section
]);
8563 fputs ("\t.weak_definition\t", asm_out_file
);
8564 assemble_name (asm_out_file
, name
);
8565 fputs ("\n\t.private_extern\t", asm_out_file
);
8566 assemble_name (asm_out_file
, name
);
8567 putc ('\n', asm_out_file
);
8568 ASM_OUTPUT_LABEL (asm_out_file
, name
);
8569 DECL_WEAK (decl
) = 1;
8573 if (USE_HIDDEN_LINKONCE
)
8575 DECL_COMDAT_GROUP (decl
) = DECL_ASSEMBLER_NAME (decl
);
8577 targetm
.asm_out
.unique_section (decl
, 0);
8578 switch_to_section (get_named_section (decl
, NULL
, 0));
8580 targetm
.asm_out
.globalize_label (asm_out_file
, name
);
8581 fputs ("\t.hidden\t", asm_out_file
);
8582 assemble_name (asm_out_file
, name
);
8583 putc ('\n', asm_out_file
);
8584 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
8588 switch_to_section (text_section
);
8589 ASM_OUTPUT_LABEL (asm_out_file
, name
);
8592 DECL_INITIAL (decl
) = make_node (BLOCK
);
8593 current_function_decl
= decl
;
8594 init_function_start (decl
);
8595 first_function_block_is_cold
= false;
8596 /* Make sure unwind info is emitted for the thunk if needed. */
8597 final_start_function (emit_barrier (), asm_out_file
, 1);
8599 /* Pad stack IP move with 4 instructions (two NOPs count
8600 as one instruction). */
8601 if (TARGET_PAD_SHORT_FUNCTION
)
8606 fputs ("\tnop\n", asm_out_file
);
8609 xops
[0] = gen_rtx_REG (Pmode
, regno
);
8610 xops
[1] = gen_rtx_MEM (Pmode
, stack_pointer_rtx
);
8611 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops
);
8612 fputs ("\tret\n", asm_out_file
);
8613 final_end_function ();
8614 init_insn_lengths ();
8615 free_after_compilation (cfun
);
8617 current_function_decl
= NULL
;
8620 if (flag_split_stack
)
8621 file_end_indicate_split_stack ();
8624 /* Emit code for the SET_GOT patterns. */
8627 output_set_got (rtx dest
, rtx label ATTRIBUTE_UNUSED
)
8633 if (TARGET_VXWORKS_RTP
&& flag_pic
)
8635 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
8636 xops
[2] = gen_rtx_MEM (Pmode
,
8637 gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
));
8638 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
8640 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
8641 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
8642 an unadorned address. */
8643 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
8644 SYMBOL_REF_FLAGS (xops
[2]) |= SYMBOL_FLAG_LOCAL
;
8645 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops
);
8649 xops
[1] = gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
8653 xops
[2] = gen_rtx_LABEL_REF (Pmode
, label
? label
: gen_label_rtx ());
8655 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops
);
8658 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8659 is what will be referenced by the Mach-O PIC subsystem. */
8661 ASM_OUTPUT_LABEL (asm_out_file
, MACHOPIC_FUNCTION_BASE_NAME
);
8664 targetm
.asm_out
.internal_label (asm_out_file
, "L",
8665 CODE_LABEL_NUMBER (XEXP (xops
[2], 0)));
8670 get_pc_thunk_name (name
, REGNO (dest
));
8671 pic_labels_used
|= 1 << REGNO (dest
);
8673 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
8674 xops
[2] = gen_rtx_MEM (QImode
, xops
[2]);
8675 output_asm_insn ("call\t%X2", xops
);
8676 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8677 is what will be referenced by the Mach-O PIC subsystem. */
8680 ASM_OUTPUT_LABEL (asm_out_file
, MACHOPIC_FUNCTION_BASE_NAME
);
8682 targetm
.asm_out
.internal_label (asm_out_file
, "L",
8683 CODE_LABEL_NUMBER (label
));
8688 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops
);
8693 /* Generate an "push" pattern for input ARG. */
8698 struct machine_function
*m
= cfun
->machine
;
8700 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
8701 m
->fs
.cfa_offset
+= UNITS_PER_WORD
;
8702 m
->fs
.sp_offset
+= UNITS_PER_WORD
;
8704 if (REG_P (arg
) && GET_MODE (arg
) != word_mode
)
8705 arg
= gen_rtx_REG (word_mode
, REGNO (arg
));
8707 return gen_rtx_SET (VOIDmode
,
8708 gen_rtx_MEM (word_mode
,
8709 gen_rtx_PRE_DEC (Pmode
,
8710 stack_pointer_rtx
)),
8714 /* Generate an "pop" pattern for input ARG. */
8719 if (REG_P (arg
) && GET_MODE (arg
) != word_mode
)
8720 arg
= gen_rtx_REG (word_mode
, REGNO (arg
));
8722 return gen_rtx_SET (VOIDmode
,
8724 gen_rtx_MEM (word_mode
,
8725 gen_rtx_POST_INC (Pmode
,
8726 stack_pointer_rtx
)));
8729 /* Return >= 0 if there is an unused call-clobbered register available
8730 for the entire function. */
8733 ix86_select_alt_pic_regnum (void)
8735 if (current_function_is_leaf
8737 && !ix86_current_function_calls_tls_descriptor
)
8740 /* Can't use the same register for both PIC and DRAP. */
8742 drap
= REGNO (crtl
->drap_reg
);
8745 for (i
= 2; i
>= 0; --i
)
8746 if (i
!= drap
&& !df_regs_ever_live_p (i
))
8750 return INVALID_REGNUM
;
8753 /* Return TRUE if we need to save REGNO. */
8756 ix86_save_reg (unsigned int regno
, bool maybe_eh_return
)
8758 if (pic_offset_table_rtx
8759 && regno
== REAL_PIC_OFFSET_TABLE_REGNUM
8760 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM
)
8762 || crtl
->calls_eh_return
8763 || crtl
->uses_const_pool
))
8764 return ix86_select_alt_pic_regnum () == INVALID_REGNUM
;
8766 if (crtl
->calls_eh_return
&& maybe_eh_return
)
8771 unsigned test
= EH_RETURN_DATA_REGNO (i
);
8772 if (test
== INVALID_REGNUM
)
8779 if (crtl
->drap_reg
&& regno
== REGNO (crtl
->drap_reg
))
8782 return (df_regs_ever_live_p (regno
)
8783 && !call_used_regs
[regno
]
8784 && !fixed_regs
[regno
]
8785 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
8788 /* Return number of saved general prupose registers. */
8791 ix86_nsaved_regs (void)
8796 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
8797 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
8802 /* Return number of saved SSE registrers. */
8805 ix86_nsaved_sseregs (void)
8810 if (!TARGET_64BIT_MS_ABI
)
8812 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
8813 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
8818 /* Given FROM and TO register numbers, say whether this elimination is
8819 allowed. If stack alignment is needed, we can only replace argument
8820 pointer with hard frame pointer, or replace frame pointer with stack
8821 pointer. Otherwise, frame pointer elimination is automatically
8822 handled and all other eliminations are valid. */
8825 ix86_can_eliminate (const int from
, const int to
)
8827 if (stack_realign_fp
)
8828 return ((from
== ARG_POINTER_REGNUM
8829 && to
== HARD_FRAME_POINTER_REGNUM
)
8830 || (from
== FRAME_POINTER_REGNUM
8831 && to
== STACK_POINTER_REGNUM
));
8833 return to
== STACK_POINTER_REGNUM
? !frame_pointer_needed
: true;
8836 /* Return the offset between two registers, one to be eliminated, and the other
8837 its replacement, at the start of a routine. */
8840 ix86_initial_elimination_offset (int from
, int to
)
8842 struct ix86_frame frame
;
8843 ix86_compute_frame_layout (&frame
);
8845 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
8846 return frame
.hard_frame_pointer_offset
;
8847 else if (from
== FRAME_POINTER_REGNUM
8848 && to
== HARD_FRAME_POINTER_REGNUM
)
8849 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
8852 gcc_assert (to
== STACK_POINTER_REGNUM
);
8854 if (from
== ARG_POINTER_REGNUM
)
8855 return frame
.stack_pointer_offset
;
8857 gcc_assert (from
== FRAME_POINTER_REGNUM
);
8858 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
8862 /* In a dynamically-aligned function, we can't know the offset from
8863 stack pointer to frame pointer, so we must ensure that setjmp
8864 eliminates fp against the hard fp (%ebp) rather than trying to
8865 index from %esp up to the top of the frame across a gap that is
8866 of unknown (at compile-time) size. */
8868 ix86_builtin_setjmp_frame_value (void)
8870 return stack_realign_fp
? hard_frame_pointer_rtx
: virtual_stack_vars_rtx
;
8873 /* When using -fsplit-stack, the allocation routines set a field in
8874 the TCB to the bottom of the stack plus this much space, measured
8877 #define SPLIT_STACK_AVAILABLE 256
8879 /* Fill structure ix86_frame about frame of currently computed function. */
8882 ix86_compute_frame_layout (struct ix86_frame
*frame
)
8884 unsigned int stack_alignment_needed
;
8885 HOST_WIDE_INT offset
;
8886 unsigned int preferred_alignment
;
8887 HOST_WIDE_INT size
= get_frame_size ();
8888 HOST_WIDE_INT to_allocate
;
8890 frame
->nregs
= ix86_nsaved_regs ();
8891 frame
->nsseregs
= ix86_nsaved_sseregs ();
8893 stack_alignment_needed
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
8894 preferred_alignment
= crtl
->preferred_stack_boundary
/ BITS_PER_UNIT
;
8896 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
8897 function prologues and leaf. */
8898 if ((TARGET_64BIT_MS_ABI
&& preferred_alignment
< 16)
8899 && (!current_function_is_leaf
|| cfun
->calls_alloca
!= 0
8900 || ix86_current_function_calls_tls_descriptor
))
8902 preferred_alignment
= 16;
8903 stack_alignment_needed
= 16;
8904 crtl
->preferred_stack_boundary
= 128;
8905 crtl
->stack_alignment_needed
= 128;
8908 gcc_assert (!size
|| stack_alignment_needed
);
8909 gcc_assert (preferred_alignment
>= STACK_BOUNDARY
/ BITS_PER_UNIT
);
8910 gcc_assert (preferred_alignment
<= stack_alignment_needed
);
8912 /* For SEH we have to limit the amount of code movement into the prologue.
8913 At present we do this via a BLOCKAGE, at which point there's very little
8914 scheduling that can be done, which means that there's very little point
8915 in doing anything except PUSHs. */
8917 cfun
->machine
->use_fast_prologue_epilogue
= false;
8919 /* During reload iteration the amount of registers saved can change.
8920 Recompute the value as needed. Do not recompute when amount of registers
8921 didn't change as reload does multiple calls to the function and does not
8922 expect the decision to change within single iteration. */
8923 else if (!optimize_function_for_size_p (cfun
)
8924 && cfun
->machine
->use_fast_prologue_epilogue_nregs
!= frame
->nregs
)
8926 int count
= frame
->nregs
;
8927 struct cgraph_node
*node
= cgraph_get_node (current_function_decl
);
8929 cfun
->machine
->use_fast_prologue_epilogue_nregs
= count
;
8931 /* The fast prologue uses move instead of push to save registers. This
8932 is significantly longer, but also executes faster as modern hardware
8933 can execute the moves in parallel, but can't do that for push/pop.
8935 Be careful about choosing what prologue to emit: When function takes
8936 many instructions to execute we may use slow version as well as in
8937 case function is known to be outside hot spot (this is known with
8938 feedback only). Weight the size of function by number of registers
8939 to save as it is cheap to use one or two push instructions but very
8940 slow to use many of them. */
8942 count
= (count
- 1) * FAST_PROLOGUE_INSN_COUNT
;
8943 if (node
->frequency
< NODE_FREQUENCY_NORMAL
8944 || (flag_branch_probabilities
8945 && node
->frequency
< NODE_FREQUENCY_HOT
))
8946 cfun
->machine
->use_fast_prologue_epilogue
= false;
8948 cfun
->machine
->use_fast_prologue_epilogue
8949 = !expensive_function_p (count
);
8952 frame
->save_regs_using_mov
8953 = (TARGET_PROLOGUE_USING_MOVE
&& cfun
->machine
->use_fast_prologue_epilogue
8954 /* If static stack checking is enabled and done with probes,
8955 the registers need to be saved before allocating the frame. */
8956 && flag_stack_check
!= STATIC_BUILTIN_STACK_CHECK
);
8958 /* Skip return address. */
8959 offset
= UNITS_PER_WORD
;
8961 /* Skip pushed static chain. */
8962 if (ix86_static_chain_on_stack
)
8963 offset
+= UNITS_PER_WORD
;
8965 /* Skip saved base pointer. */
8966 if (frame_pointer_needed
)
8967 offset
+= UNITS_PER_WORD
;
8968 frame
->hfp_save_offset
= offset
;
8970 /* The traditional frame pointer location is at the top of the frame. */
8971 frame
->hard_frame_pointer_offset
= offset
;
8973 /* Register save area */
8974 offset
+= frame
->nregs
* UNITS_PER_WORD
;
8975 frame
->reg_save_offset
= offset
;
8977 /* Align and set SSE register save area. */
8978 if (frame
->nsseregs
)
8980 /* The only ABI that has saved SSE registers (Win64) also has a
8981 16-byte aligned default stack, and thus we don't need to be
8982 within the re-aligned local stack frame to save them. */
8983 gcc_assert (INCOMING_STACK_BOUNDARY
>= 128);
8984 offset
= (offset
+ 16 - 1) & -16;
8985 offset
+= frame
->nsseregs
* 16;
8987 frame
->sse_reg_save_offset
= offset
;
8989 /* The re-aligned stack starts here. Values before this point are not
8990 directly comparable with values below this point. In order to make
8991 sure that no value happens to be the same before and after, force
8992 the alignment computation below to add a non-zero value. */
8993 if (stack_realign_fp
)
8994 offset
= (offset
+ stack_alignment_needed
) & -stack_alignment_needed
;
8997 frame
->va_arg_size
= ix86_varargs_gpr_size
+ ix86_varargs_fpr_size
;
8998 offset
+= frame
->va_arg_size
;
9000 /* Align start of frame for local function. */
9001 if (stack_realign_fp
9002 || offset
!= frame
->sse_reg_save_offset
9004 || !current_function_is_leaf
9005 || cfun
->calls_alloca
9006 || ix86_current_function_calls_tls_descriptor
)
9007 offset
= (offset
+ stack_alignment_needed
- 1) & -stack_alignment_needed
;
9009 /* Frame pointer points here. */
9010 frame
->frame_pointer_offset
= offset
;
9014 /* Add outgoing arguments area. Can be skipped if we eliminated
9015 all the function calls as dead code.
9016 Skipping is however impossible when function calls alloca. Alloca
9017 expander assumes that last crtl->outgoing_args_size
9018 of stack frame are unused. */
9019 if (ACCUMULATE_OUTGOING_ARGS
9020 && (!current_function_is_leaf
|| cfun
->calls_alloca
9021 || ix86_current_function_calls_tls_descriptor
))
9023 offset
+= crtl
->outgoing_args_size
;
9024 frame
->outgoing_arguments_size
= crtl
->outgoing_args_size
;
9027 frame
->outgoing_arguments_size
= 0;
9029 /* Align stack boundary. Only needed if we're calling another function
9031 if (!current_function_is_leaf
|| cfun
->calls_alloca
9032 || ix86_current_function_calls_tls_descriptor
)
9033 offset
= (offset
+ preferred_alignment
- 1) & -preferred_alignment
;
9035 /* We've reached end of stack frame. */
9036 frame
->stack_pointer_offset
= offset
;
9038 /* Size prologue needs to allocate. */
9039 to_allocate
= offset
- frame
->sse_reg_save_offset
;
9041 if ((!to_allocate
&& frame
->nregs
<= 1)
9042 || (TARGET_64BIT
&& to_allocate
>= (HOST_WIDE_INT
) 0x80000000))
9043 frame
->save_regs_using_mov
= false;
9045 if (ix86_using_red_zone ()
9046 && current_function_sp_is_unchanging
9047 && current_function_is_leaf
9048 && !ix86_current_function_calls_tls_descriptor
)
9050 frame
->red_zone_size
= to_allocate
;
9051 if (frame
->save_regs_using_mov
)
9052 frame
->red_zone_size
+= frame
->nregs
* UNITS_PER_WORD
;
9053 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
9054 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
9057 frame
->red_zone_size
= 0;
9058 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
9060 /* The SEH frame pointer location is near the bottom of the frame.
9061 This is enforced by the fact that the difference between the
9062 stack pointer and the frame pointer is limited to 240 bytes in
9063 the unwind data structure. */
9068 /* If we can leave the frame pointer where it is, do so. */
9069 diff
= frame
->stack_pointer_offset
- frame
->hard_frame_pointer_offset
;
9070 if (diff
> 240 || (diff
& 15) != 0)
9072 /* Ideally we'd determine what portion of the local stack frame
9073 (within the constraint of the lowest 240) is most heavily used.
9074 But without that complication, simply bias the frame pointer
9075 by 128 bytes so as to maximize the amount of the local stack
9076 frame that is addressable with 8-bit offsets. */
9077 frame
->hard_frame_pointer_offset
= frame
->stack_pointer_offset
- 128;
9082 /* This is semi-inlined memory_address_length, but simplified
9083 since we know that we're always dealing with reg+offset, and
9084 to avoid having to create and discard all that rtl. */
9087 choose_baseaddr_len (unsigned int regno
, HOST_WIDE_INT offset
)
9093 /* EBP and R13 cannot be encoded without an offset. */
9094 len
= (regno
== BP_REG
|| regno
== R13_REG
);
9096 else if (IN_RANGE (offset
, -128, 127))
9099 /* ESP and R12 must be encoded with a SIB byte. */
9100 if (regno
== SP_REG
|| regno
== R12_REG
)
9106 /* Return an RTX that points to CFA_OFFSET within the stack frame.
9107 The valid base registers are taken from CFUN->MACHINE->FS. */
9110 choose_baseaddr (HOST_WIDE_INT cfa_offset
)
9112 const struct machine_function
*m
= cfun
->machine
;
9113 rtx base_reg
= NULL
;
9114 HOST_WIDE_INT base_offset
= 0;
9116 if (m
->use_fast_prologue_epilogue
)
9118 /* Choose the base register most likely to allow the most scheduling
9119 opportunities. Generally FP is valid througout the function,
9120 while DRAP must be reloaded within the epilogue. But choose either
9121 over the SP due to increased encoding size. */
9125 base_reg
= hard_frame_pointer_rtx
;
9126 base_offset
= m
->fs
.fp_offset
- cfa_offset
;
9128 else if (m
->fs
.drap_valid
)
9130 base_reg
= crtl
->drap_reg
;
9131 base_offset
= 0 - cfa_offset
;
9133 else if (m
->fs
.sp_valid
)
9135 base_reg
= stack_pointer_rtx
;
9136 base_offset
= m
->fs
.sp_offset
- cfa_offset
;
9141 HOST_WIDE_INT toffset
;
9144 /* Choose the base register with the smallest address encoding.
9145 With a tie, choose FP > DRAP > SP. */
9148 base_reg
= stack_pointer_rtx
;
9149 base_offset
= m
->fs
.sp_offset
- cfa_offset
;
9150 len
= choose_baseaddr_len (STACK_POINTER_REGNUM
, base_offset
);
9152 if (m
->fs
.drap_valid
)
9154 toffset
= 0 - cfa_offset
;
9155 tlen
= choose_baseaddr_len (REGNO (crtl
->drap_reg
), toffset
);
9158 base_reg
= crtl
->drap_reg
;
9159 base_offset
= toffset
;
9165 toffset
= m
->fs
.fp_offset
- cfa_offset
;
9166 tlen
= choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM
, toffset
);
9169 base_reg
= hard_frame_pointer_rtx
;
9170 base_offset
= toffset
;
9175 gcc_assert (base_reg
!= NULL
);
9177 return plus_constant (base_reg
, base_offset
);
9180 /* Emit code to save registers in the prologue. */
9183 ix86_emit_save_regs (void)
9188 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
-- > 0; )
9189 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9191 insn
= emit_insn (gen_push (gen_rtx_REG (word_mode
, regno
)));
9192 RTX_FRAME_RELATED_P (insn
) = 1;
9196 /* Emit a single register save at CFA - CFA_OFFSET. */
9199 ix86_emit_save_reg_using_mov (enum machine_mode mode
, unsigned int regno
,
9200 HOST_WIDE_INT cfa_offset
)
9202 struct machine_function
*m
= cfun
->machine
;
9203 rtx reg
= gen_rtx_REG (mode
, regno
);
9204 rtx mem
, addr
, base
, insn
;
9206 addr
= choose_baseaddr (cfa_offset
);
9207 mem
= gen_frame_mem (mode
, addr
);
9209 /* For SSE saves, we need to indicate the 128-bit alignment. */
9210 set_mem_align (mem
, GET_MODE_ALIGNMENT (mode
));
9212 insn
= emit_move_insn (mem
, reg
);
9213 RTX_FRAME_RELATED_P (insn
) = 1;
9216 if (GET_CODE (base
) == PLUS
)
9217 base
= XEXP (base
, 0);
9218 gcc_checking_assert (REG_P (base
));
9220 /* When saving registers into a re-aligned local stack frame, avoid
9221 any tricky guessing by dwarf2out. */
9222 if (m
->fs
.realigned
)
9224 gcc_checking_assert (stack_realign_drap
);
9226 if (regno
== REGNO (crtl
->drap_reg
))
9228 /* A bit of a hack. We force the DRAP register to be saved in
9229 the re-aligned stack frame, which provides us with a copy
9230 of the CFA that will last past the prologue. Install it. */
9231 gcc_checking_assert (cfun
->machine
->fs
.fp_valid
);
9232 addr
= plus_constant (hard_frame_pointer_rtx
,
9233 cfun
->machine
->fs
.fp_offset
- cfa_offset
);
9234 mem
= gen_rtx_MEM (mode
, addr
);
9235 add_reg_note (insn
, REG_CFA_DEF_CFA
, mem
);
9239 /* The frame pointer is a stable reference within the
9240 aligned frame. Use it. */
9241 gcc_checking_assert (cfun
->machine
->fs
.fp_valid
);
9242 addr
= plus_constant (hard_frame_pointer_rtx
,
9243 cfun
->machine
->fs
.fp_offset
- cfa_offset
);
9244 mem
= gen_rtx_MEM (mode
, addr
);
9245 add_reg_note (insn
, REG_CFA_EXPRESSION
,
9246 gen_rtx_SET (VOIDmode
, mem
, reg
));
9250 /* The memory may not be relative to the current CFA register,
9251 which means that we may need to generate a new pattern for
9252 use by the unwind info. */
9253 else if (base
!= m
->fs
.cfa_reg
)
9255 addr
= plus_constant (m
->fs
.cfa_reg
, m
->fs
.cfa_offset
- cfa_offset
);
9256 mem
= gen_rtx_MEM (mode
, addr
);
9257 add_reg_note (insn
, REG_CFA_OFFSET
, gen_rtx_SET (VOIDmode
, mem
, reg
));
9261 /* Emit code to save registers using MOV insns.
9262 First register is stored at CFA - CFA_OFFSET. */
9264 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset
)
9268 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9269 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9271 ix86_emit_save_reg_using_mov (word_mode
, regno
, cfa_offset
);
9272 cfa_offset
-= UNITS_PER_WORD
;
9276 /* Emit code to save SSE registers using MOV insns.
9277 First register is stored at CFA - CFA_OFFSET. */
9279 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset
)
9283 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9284 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9286 ix86_emit_save_reg_using_mov (V4SFmode
, regno
, cfa_offset
);
9291 static GTY(()) rtx queued_cfa_restores
;
9293 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9294 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9295 Don't add the note if the previously saved value will be left untouched
9296 within stack red-zone till return, as unwinders can find the same value
9297 in the register and on the stack. */
9300 ix86_add_cfa_restore_note (rtx insn
, rtx reg
, HOST_WIDE_INT cfa_offset
)
9302 if (!crtl
->shrink_wrapped
9303 && cfa_offset
<= cfun
->machine
->fs
.red_zone_offset
)
9308 add_reg_note (insn
, REG_CFA_RESTORE
, reg
);
9309 RTX_FRAME_RELATED_P (insn
) = 1;
9313 = alloc_reg_note (REG_CFA_RESTORE
, reg
, queued_cfa_restores
);
9316 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9319 ix86_add_queued_cfa_restore_notes (rtx insn
)
9322 if (!queued_cfa_restores
)
9324 for (last
= queued_cfa_restores
; XEXP (last
, 1); last
= XEXP (last
, 1))
9326 XEXP (last
, 1) = REG_NOTES (insn
);
9327 REG_NOTES (insn
) = queued_cfa_restores
;
9328 queued_cfa_restores
= NULL_RTX
;
9329 RTX_FRAME_RELATED_P (insn
) = 1;
9332 /* Expand prologue or epilogue stack adjustment.
9333 The pattern exist to put a dependency on all ebp-based memory accesses.
9334 STYLE should be negative if instructions should be marked as frame related,
9335 zero if %r11 register is live and cannot be freely used and positive
9339 pro_epilogue_adjust_stack (rtx dest
, rtx src
, rtx offset
,
9340 int style
, bool set_cfa
)
9342 struct machine_function
*m
= cfun
->machine
;
9344 bool add_frame_related_expr
= false;
9346 if (Pmode
== SImode
)
9347 insn
= gen_pro_epilogue_adjust_stack_si_add (dest
, src
, offset
);
9348 else if (x86_64_immediate_operand (offset
, DImode
))
9349 insn
= gen_pro_epilogue_adjust_stack_di_add (dest
, src
, offset
);
9353 /* r11 is used by indirect sibcall return as well, set before the
9354 epilogue and used after the epilogue. */
9356 tmp
= gen_rtx_REG (DImode
, R11_REG
);
9359 gcc_assert (src
!= hard_frame_pointer_rtx
9360 && dest
!= hard_frame_pointer_rtx
);
9361 tmp
= hard_frame_pointer_rtx
;
9363 insn
= emit_insn (gen_rtx_SET (DImode
, tmp
, offset
));
9365 add_frame_related_expr
= true;
9367 insn
= gen_pro_epilogue_adjust_stack_di_add (dest
, src
, tmp
);
9370 insn
= emit_insn (insn
);
9372 ix86_add_queued_cfa_restore_notes (insn
);
9378 gcc_assert (m
->fs
.cfa_reg
== src
);
9379 m
->fs
.cfa_offset
+= INTVAL (offset
);
9380 m
->fs
.cfa_reg
= dest
;
9382 r
= gen_rtx_PLUS (Pmode
, src
, offset
);
9383 r
= gen_rtx_SET (VOIDmode
, dest
, r
);
9384 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, r
);
9385 RTX_FRAME_RELATED_P (insn
) = 1;
9389 RTX_FRAME_RELATED_P (insn
) = 1;
9390 if (add_frame_related_expr
)
9392 rtx r
= gen_rtx_PLUS (Pmode
, src
, offset
);
9393 r
= gen_rtx_SET (VOIDmode
, dest
, r
);
9394 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, r
);
9398 if (dest
== stack_pointer_rtx
)
9400 HOST_WIDE_INT ooffset
= m
->fs
.sp_offset
;
9401 bool valid
= m
->fs
.sp_valid
;
9403 if (src
== hard_frame_pointer_rtx
)
9405 valid
= m
->fs
.fp_valid
;
9406 ooffset
= m
->fs
.fp_offset
;
9408 else if (src
== crtl
->drap_reg
)
9410 valid
= m
->fs
.drap_valid
;
9415 /* Else there are two possibilities: SP itself, which we set
9416 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
9417 taken care of this by hand along the eh_return path. */
9418 gcc_checking_assert (src
== stack_pointer_rtx
9419 || offset
== const0_rtx
);
9422 m
->fs
.sp_offset
= ooffset
- INTVAL (offset
);
9423 m
->fs
.sp_valid
= valid
;
9427 /* Find an available register to be used as dynamic realign argument
9428 pointer regsiter. Such a register will be written in prologue and
9429 used in begin of body, so it must not be
9430 1. parameter passing register.
9432 We reuse static-chain register if it is available. Otherwise, we
9433 use DI for i386 and R13 for x86-64. We chose R13 since it has
9436 Return: the regno of chosen register. */
9439 find_drap_reg (void)
9441 tree decl
= cfun
->decl
;
9445 /* Use R13 for nested function or function need static chain.
9446 Since function with tail call may use any caller-saved
9447 registers in epilogue, DRAP must not use caller-saved
9448 register in such case. */
9449 if (DECL_STATIC_CHAIN (decl
) || crtl
->tail_call_emit
)
9456 /* Use DI for nested function or function need static chain.
9457 Since function with tail call may use any caller-saved
9458 registers in epilogue, DRAP must not use caller-saved
9459 register in such case. */
9460 if (DECL_STATIC_CHAIN (decl
) || crtl
->tail_call_emit
)
9463 /* Reuse static chain register if it isn't used for parameter
9465 if (ix86_function_regparm (TREE_TYPE (decl
), decl
) <= 2)
9467 unsigned int ccvt
= ix86_get_callcvt (TREE_TYPE (decl
));
9468 if ((ccvt
& (IX86_CALLCVT_FASTCALL
| IX86_CALLCVT_THISCALL
)) == 0)
9475 /* Return minimum incoming stack alignment. */
9478 ix86_minimum_incoming_stack_boundary (bool sibcall
)
9480 unsigned int incoming_stack_boundary
;
9482 /* Prefer the one specified at command line. */
9483 if (ix86_user_incoming_stack_boundary
)
9484 incoming_stack_boundary
= ix86_user_incoming_stack_boundary
;
9485 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
9486 if -mstackrealign is used, it isn't used for sibcall check and
9487 estimated stack alignment is 128bit. */
9490 && ix86_force_align_arg_pointer
9491 && crtl
->stack_alignment_estimated
== 128)
9492 incoming_stack_boundary
= MIN_STACK_BOUNDARY
;
9494 incoming_stack_boundary
= ix86_default_incoming_stack_boundary
;
9496 /* Incoming stack alignment can be changed on individual functions
9497 via force_align_arg_pointer attribute. We use the smallest
9498 incoming stack boundary. */
9499 if (incoming_stack_boundary
> MIN_STACK_BOUNDARY
9500 && lookup_attribute (ix86_force_align_arg_pointer_string
,
9501 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))))
9502 incoming_stack_boundary
= MIN_STACK_BOUNDARY
;
9504 /* The incoming stack frame has to be aligned at least at
9505 parm_stack_boundary. */
9506 if (incoming_stack_boundary
< crtl
->parm_stack_boundary
)
9507 incoming_stack_boundary
= crtl
->parm_stack_boundary
;
9509 /* Stack at entrance of main is aligned by runtime. We use the
9510 smallest incoming stack boundary. */
9511 if (incoming_stack_boundary
> MAIN_STACK_BOUNDARY
9512 && DECL_NAME (current_function_decl
)
9513 && MAIN_NAME_P (DECL_NAME (current_function_decl
))
9514 && DECL_FILE_SCOPE_P (current_function_decl
))
9515 incoming_stack_boundary
= MAIN_STACK_BOUNDARY
;
9517 return incoming_stack_boundary
;
9520 /* Update incoming stack boundary and estimated stack alignment. */
9523 ix86_update_stack_boundary (void)
9525 ix86_incoming_stack_boundary
9526 = ix86_minimum_incoming_stack_boundary (false);
9528 /* x86_64 vararg needs 16byte stack alignment for register save
9532 && crtl
->stack_alignment_estimated
< 128)
9533 crtl
->stack_alignment_estimated
= 128;
9536 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
9537 needed or an rtx for DRAP otherwise. */
9540 ix86_get_drap_rtx (void)
9542 if (ix86_force_drap
|| !ACCUMULATE_OUTGOING_ARGS
)
9543 crtl
->need_drap
= true;
9545 if (stack_realign_drap
)
9547 /* Assign DRAP to vDRAP and returns vDRAP */
9548 unsigned int regno
= find_drap_reg ();
9553 arg_ptr
= gen_rtx_REG (Pmode
, regno
);
9554 crtl
->drap_reg
= arg_ptr
;
9557 drap_vreg
= copy_to_reg (arg_ptr
);
9561 insn
= emit_insn_before (seq
, NEXT_INSN (entry_of_function ()));
9564 add_reg_note (insn
, REG_CFA_SET_VDRAP
, drap_vreg
);
9565 RTX_FRAME_RELATED_P (insn
) = 1;
9573 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
9576 ix86_internal_arg_pointer (void)
9578 return virtual_incoming_args_rtx
;
9581 struct scratch_reg
{
9586 /* Return a short-lived scratch register for use on function entry.
9587 In 32-bit mode, it is valid only after the registers are saved
9588 in the prologue. This register must be released by means of
9589 release_scratch_register_on_entry once it is dead. */
9592 get_scratch_register_on_entry (struct scratch_reg
*sr
)
9600 /* We always use R11 in 64-bit mode. */
9605 tree decl
= current_function_decl
, fntype
= TREE_TYPE (decl
);
9607 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype
)) != NULL_TREE
;
9608 bool static_chain_p
= DECL_STATIC_CHAIN (decl
);
9609 int regparm
= ix86_function_regparm (fntype
, decl
);
9611 = crtl
->drap_reg
? REGNO (crtl
->drap_reg
) : INVALID_REGNUM
;
9613 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
9614 for the static chain register. */
9615 if ((regparm
< 1 || (fastcall_p
&& !static_chain_p
))
9616 && drap_regno
!= AX_REG
)
9618 else if (regparm
< 2 && drap_regno
!= DX_REG
)
9620 /* ecx is the static chain register. */
9621 else if (regparm
< 3 && !fastcall_p
&& !static_chain_p
9622 && drap_regno
!= CX_REG
)
9624 else if (ix86_save_reg (BX_REG
, true))
9626 /* esi is the static chain register. */
9627 else if (!(regparm
== 3 && static_chain_p
)
9628 && ix86_save_reg (SI_REG
, true))
9630 else if (ix86_save_reg (DI_REG
, true))
9634 regno
= (drap_regno
== AX_REG
? DX_REG
: AX_REG
);
9639 sr
->reg
= gen_rtx_REG (Pmode
, regno
);
9642 rtx insn
= emit_insn (gen_push (sr
->reg
));
9643 RTX_FRAME_RELATED_P (insn
) = 1;
9647 /* Release a scratch register obtained from the preceding function. */
9650 release_scratch_register_on_entry (struct scratch_reg
*sr
)
9654 rtx x
, insn
= emit_insn (gen_pop (sr
->reg
));
9656 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
9657 RTX_FRAME_RELATED_P (insn
) = 1;
9658 x
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, GEN_INT (UNITS_PER_WORD
));
9659 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
9660 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
9664 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
9666 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
9669 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size
)
9671 /* We skip the probe for the first interval + a small dope of 4 words and
9672 probe that many bytes past the specified size to maintain a protection
9673 area at the botton of the stack. */
9674 const int dope
= 4 * UNITS_PER_WORD
;
9675 rtx size_rtx
= GEN_INT (size
), last
;
9677 /* See if we have a constant small number of probes to generate. If so,
9678 that's the easy case. The run-time loop is made up of 11 insns in the
9679 generic case while the compile-time loop is made up of 3+2*(n-1) insns
9680 for n # of intervals. */
9681 if (size
<= 5 * PROBE_INTERVAL
)
9683 HOST_WIDE_INT i
, adjust
;
9684 bool first_probe
= true;
9686 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
9687 values of N from 1 until it exceeds SIZE. If only one probe is
9688 needed, this will not generate any code. Then adjust and probe
9689 to PROBE_INTERVAL + SIZE. */
9690 for (i
= PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
9694 adjust
= 2 * PROBE_INTERVAL
+ dope
;
9695 first_probe
= false;
9698 adjust
= PROBE_INTERVAL
;
9700 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9701 plus_constant (stack_pointer_rtx
, -adjust
)));
9702 emit_stack_probe (stack_pointer_rtx
);
9706 adjust
= size
+ PROBE_INTERVAL
+ dope
;
9708 adjust
= size
+ PROBE_INTERVAL
- i
;
9710 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9711 plus_constant (stack_pointer_rtx
, -adjust
)));
9712 emit_stack_probe (stack_pointer_rtx
);
9714 /* Adjust back to account for the additional first interval. */
9715 last
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9716 plus_constant (stack_pointer_rtx
,
9717 PROBE_INTERVAL
+ dope
)));
9720 /* Otherwise, do the same as above, but in a loop. Note that we must be
9721 extra careful with variables wrapping around because we might be at
9722 the very top (or the very bottom) of the address space and we have
9723 to be able to handle this case properly; in particular, we use an
9724 equality test for the loop condition. */
9727 HOST_WIDE_INT rounded_size
;
9728 struct scratch_reg sr
;
9730 get_scratch_register_on_entry (&sr
);
9733 /* Step 1: round SIZE to the previous multiple of the interval. */
9735 rounded_size
= size
& -PROBE_INTERVAL
;
9738 /* Step 2: compute initial and final value of the loop counter. */
9740 /* SP = SP_0 + PROBE_INTERVAL. */
9741 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9742 plus_constant (stack_pointer_rtx
,
9743 - (PROBE_INTERVAL
+ dope
))));
9745 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
9746 emit_move_insn (sr
.reg
, GEN_INT (-rounded_size
));
9747 emit_insn (gen_rtx_SET (VOIDmode
, sr
.reg
,
9748 gen_rtx_PLUS (Pmode
, sr
.reg
,
9749 stack_pointer_rtx
)));
9754 while (SP != LAST_ADDR)
9756 SP = SP + PROBE_INTERVAL
9760 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
9761 values of N from 1 until it is equal to ROUNDED_SIZE. */
9763 emit_insn (ix86_gen_adjust_stack_and_probe (sr
.reg
, sr
.reg
, size_rtx
));
9766 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
9767 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
9769 if (size
!= rounded_size
)
9771 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9772 plus_constant (stack_pointer_rtx
,
9773 rounded_size
- size
)));
9774 emit_stack_probe (stack_pointer_rtx
);
9777 /* Adjust back to account for the additional first interval. */
9778 last
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9779 plus_constant (stack_pointer_rtx
,
9780 PROBE_INTERVAL
+ dope
)));
9782 release_scratch_register_on_entry (&sr
);
9785 gcc_assert (cfun
->machine
->fs
.cfa_reg
!= stack_pointer_rtx
);
9787 /* Even if the stack pointer isn't the CFA register, we need to correctly
9788 describe the adjustments made to it, in particular differentiate the
9789 frame-related ones from the frame-unrelated ones. */
9792 rtx expr
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (2));
9793 XVECEXP (expr
, 0, 0)
9794 = gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9795 plus_constant (stack_pointer_rtx
, -size
));
9796 XVECEXP (expr
, 0, 1)
9797 = gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9798 plus_constant (stack_pointer_rtx
,
9799 PROBE_INTERVAL
+ dope
+ size
));
9800 add_reg_note (last
, REG_FRAME_RELATED_EXPR
, expr
);
9801 RTX_FRAME_RELATED_P (last
) = 1;
9803 cfun
->machine
->fs
.sp_offset
+= size
;
9806 /* Make sure nothing is scheduled before we are done. */
9807 emit_insn (gen_blockage ());
9810 /* Adjust the stack pointer up to REG while probing it. */
9813 output_adjust_stack_and_probe (rtx reg
)
9815 static int labelno
= 0;
9816 char loop_lab
[32], end_lab
[32];
9819 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
);
9820 ASM_GENERATE_INTERNAL_LABEL (end_lab
, "LPSRE", labelno
++);
9822 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
9824 /* Jump to END_LAB if SP == LAST_ADDR. */
9825 xops
[0] = stack_pointer_rtx
;
9827 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops
);
9828 fputs ("\tje\t", asm_out_file
);
9829 assemble_name_raw (asm_out_file
, end_lab
);
9830 fputc ('\n', asm_out_file
);
9832 /* SP = SP + PROBE_INTERVAL. */
9833 xops
[1] = GEN_INT (PROBE_INTERVAL
);
9834 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops
);
9837 xops
[1] = const0_rtx
;
9838 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops
);
9840 fprintf (asm_out_file
, "\tjmp\t");
9841 assemble_name_raw (asm_out_file
, loop_lab
);
9842 fputc ('\n', asm_out_file
);
9844 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, end_lab
);
9849 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
9850 inclusive. These are offsets from the current stack pointer. */
9853 ix86_emit_probe_stack_range (HOST_WIDE_INT first
, HOST_WIDE_INT size
)
9855 /* See if we have a constant small number of probes to generate. If so,
9856 that's the easy case. The run-time loop is made up of 7 insns in the
9857 generic case while the compile-time loop is made up of n insns for n #
9859 if (size
<= 7 * PROBE_INTERVAL
)
9863 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
9864 it exceeds SIZE. If only one probe is needed, this will not
9865 generate any code. Then probe at FIRST + SIZE. */
9866 for (i
= PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
9867 emit_stack_probe (plus_constant (stack_pointer_rtx
, -(first
+ i
)));
9869 emit_stack_probe (plus_constant (stack_pointer_rtx
, -(first
+ size
)));
9872 /* Otherwise, do the same as above, but in a loop. Note that we must be
9873 extra careful with variables wrapping around because we might be at
9874 the very top (or the very bottom) of the address space and we have
9875 to be able to handle this case properly; in particular, we use an
9876 equality test for the loop condition. */
9879 HOST_WIDE_INT rounded_size
, last
;
9880 struct scratch_reg sr
;
9882 get_scratch_register_on_entry (&sr
);
9885 /* Step 1: round SIZE to the previous multiple of the interval. */
9887 rounded_size
= size
& -PROBE_INTERVAL
;
9890 /* Step 2: compute initial and final value of the loop counter. */
9892 /* TEST_OFFSET = FIRST. */
9893 emit_move_insn (sr
.reg
, GEN_INT (-first
));
9895 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
9896 last
= first
+ rounded_size
;
9901 while (TEST_ADDR != LAST_ADDR)
9903 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
9907 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
9908 until it is equal to ROUNDED_SIZE. */
9910 emit_insn (ix86_gen_probe_stack_range (sr
.reg
, sr
.reg
, GEN_INT (-last
)));
9913 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
9914 that SIZE is equal to ROUNDED_SIZE. */
9916 if (size
!= rounded_size
)
9917 emit_stack_probe (plus_constant (gen_rtx_PLUS (Pmode
,
9920 rounded_size
- size
));
9922 release_scratch_register_on_entry (&sr
);
9925 /* Make sure nothing is scheduled before we are done. */
9926 emit_insn (gen_blockage ());
9929 /* Probe a range of stack addresses from REG to END, inclusive. These are
9930 offsets from the current stack pointer. */
9933 output_probe_stack_range (rtx reg
, rtx end
)
9935 static int labelno
= 0;
9936 char loop_lab
[32], end_lab
[32];
9939 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
);
9940 ASM_GENERATE_INTERNAL_LABEL (end_lab
, "LPSRE", labelno
++);
9942 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
9944 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
9947 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops
);
9948 fputs ("\tje\t", asm_out_file
);
9949 assemble_name_raw (asm_out_file
, end_lab
);
9950 fputc ('\n', asm_out_file
);
9952 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
9953 xops
[1] = GEN_INT (PROBE_INTERVAL
);
9954 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops
);
9956 /* Probe at TEST_ADDR. */
9957 xops
[0] = stack_pointer_rtx
;
9959 xops
[2] = const0_rtx
;
9960 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops
);
9962 fprintf (asm_out_file
, "\tjmp\t");
9963 assemble_name_raw (asm_out_file
, loop_lab
);
9964 fputc ('\n', asm_out_file
);
9966 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, end_lab
);
9971 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
9972 to be generated in correct form. */
9974 ix86_finalize_stack_realign_flags (void)
9976 /* Check if stack realign is really needed after reload, and
9977 stores result in cfun */
9978 unsigned int incoming_stack_boundary
9979 = (crtl
->parm_stack_boundary
> ix86_incoming_stack_boundary
9980 ? crtl
->parm_stack_boundary
: ix86_incoming_stack_boundary
);
9981 unsigned int stack_realign
= (incoming_stack_boundary
9982 < (current_function_is_leaf
9983 ? crtl
->max_used_stack_slot_alignment
9984 : crtl
->stack_alignment_needed
));
9986 if (crtl
->stack_realign_finalized
)
9988 /* After stack_realign_needed is finalized, we can't no longer
9990 gcc_assert (crtl
->stack_realign_needed
== stack_realign
);
9994 /* If the only reason for frame_pointer_needed is that we conservatively
9995 assumed stack realignment might be needed, but in the end nothing that
9996 needed the stack alignment had been spilled, clear frame_pointer_needed
9997 and say we don't need stack realignment. */
10000 && frame_pointer_needed
10001 && current_function_is_leaf
10002 && flag_omit_frame_pointer
10003 && current_function_sp_is_unchanging
10004 && !ix86_current_function_calls_tls_descriptor
10005 && !crtl
->accesses_prior_frames
10006 && !cfun
->calls_alloca
10007 && !crtl
->calls_eh_return
10008 && !(flag_stack_check
&& STACK_CHECK_MOVING_SP
)
10009 && !ix86_frame_pointer_required ()
10010 && get_frame_size () == 0
10011 && ix86_nsaved_sseregs () == 0
10012 && ix86_varargs_gpr_size
+ ix86_varargs_fpr_size
== 0)
10014 HARD_REG_SET set_up_by_prologue
, prologue_used
;
10017 CLEAR_HARD_REG_SET (prologue_used
);
10018 CLEAR_HARD_REG_SET (set_up_by_prologue
);
10019 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
, STACK_POINTER_REGNUM
);
10020 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
, ARG_POINTER_REGNUM
);
10021 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
,
10022 HARD_FRAME_POINTER_REGNUM
);
10026 FOR_BB_INSNS (bb
, insn
)
10027 if (NONDEBUG_INSN_P (insn
)
10028 && requires_stack_frame_p (insn
, prologue_used
,
10029 set_up_by_prologue
))
10031 crtl
->stack_realign_needed
= stack_realign
;
10032 crtl
->stack_realign_finalized
= true;
10037 frame_pointer_needed
= false;
10038 stack_realign
= false;
10039 crtl
->max_used_stack_slot_alignment
= incoming_stack_boundary
;
10040 crtl
->stack_alignment_needed
= incoming_stack_boundary
;
10041 crtl
->stack_alignment_estimated
= incoming_stack_boundary
;
10042 if (crtl
->preferred_stack_boundary
> incoming_stack_boundary
)
10043 crtl
->preferred_stack_boundary
= incoming_stack_boundary
;
10044 df_finish_pass (true);
10045 df_scan_alloc (NULL
);
10047 df_compute_regs_ever_live (true);
10051 crtl
->stack_realign_needed
= stack_realign
;
10052 crtl
->stack_realign_finalized
= true;
10055 /* Expand the prologue into a bunch of separate insns. */
10058 ix86_expand_prologue (void)
10060 struct machine_function
*m
= cfun
->machine
;
10063 struct ix86_frame frame
;
10064 HOST_WIDE_INT allocate
;
10065 bool int_registers_saved
;
10067 ix86_finalize_stack_realign_flags ();
10069 /* DRAP should not coexist with stack_realign_fp */
10070 gcc_assert (!(crtl
->drap_reg
&& stack_realign_fp
));
10072 memset (&m
->fs
, 0, sizeof (m
->fs
));
10074 /* Initialize CFA state for before the prologue. */
10075 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10076 m
->fs
.cfa_offset
= INCOMING_FRAME_SP_OFFSET
;
10078 /* Track SP offset to the CFA. We continue tracking this after we've
10079 swapped the CFA register away from SP. In the case of re-alignment
10080 this is fudged; we're interested to offsets within the local frame. */
10081 m
->fs
.sp_offset
= INCOMING_FRAME_SP_OFFSET
;
10082 m
->fs
.sp_valid
= true;
10084 ix86_compute_frame_layout (&frame
);
10086 if (!TARGET_64BIT
&& ix86_function_ms_hook_prologue (current_function_decl
))
10088 /* We should have already generated an error for any use of
10089 ms_hook on a nested function. */
10090 gcc_checking_assert (!ix86_static_chain_on_stack
);
10092 /* Check if profiling is active and we shall use profiling before
10093 prologue variant. If so sorry. */
10094 if (crtl
->profile
&& flag_fentry
!= 0)
10095 sorry ("ms_hook_prologue attribute isn%'t compatible "
10096 "with -mfentry for 32-bit");
10098 /* In ix86_asm_output_function_label we emitted:
10099 8b ff movl.s %edi,%edi
10101 8b ec movl.s %esp,%ebp
10103 This matches the hookable function prologue in Win32 API
10104 functions in Microsoft Windows XP Service Pack 2 and newer.
10105 Wine uses this to enable Windows apps to hook the Win32 API
10106 functions provided by Wine.
10108 What that means is that we've already set up the frame pointer. */
10110 if (frame_pointer_needed
10111 && !(crtl
->drap_reg
&& crtl
->stack_realign_needed
))
10115 /* We've decided to use the frame pointer already set up.
10116 Describe this to the unwinder by pretending that both
10117 push and mov insns happen right here.
10119 Putting the unwind info here at the end of the ms_hook
10120 is done so that we can make absolutely certain we get
10121 the required byte sequence at the start of the function,
10122 rather than relying on an assembler that can produce
10123 the exact encoding required.
10125 However it does mean (in the unpatched case) that we have
10126 a 1 insn window where the asynchronous unwind info is
10127 incorrect. However, if we placed the unwind info at
10128 its correct location we would have incorrect unwind info
10129 in the patched case. Which is probably all moot since
10130 I don't expect Wine generates dwarf2 unwind info for the
10131 system libraries that use this feature. */
10133 insn
= emit_insn (gen_blockage ());
10135 push
= gen_push (hard_frame_pointer_rtx
);
10136 mov
= gen_rtx_SET (VOIDmode
, hard_frame_pointer_rtx
,
10137 stack_pointer_rtx
);
10138 RTX_FRAME_RELATED_P (push
) = 1;
10139 RTX_FRAME_RELATED_P (mov
) = 1;
10141 RTX_FRAME_RELATED_P (insn
) = 1;
10142 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
10143 gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, push
, mov
)));
10145 /* Note that gen_push incremented m->fs.cfa_offset, even
10146 though we didn't emit the push insn here. */
10147 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10148 m
->fs
.fp_offset
= m
->fs
.cfa_offset
;
10149 m
->fs
.fp_valid
= true;
10153 /* The frame pointer is not needed so pop %ebp again.
10154 This leaves us with a pristine state. */
10155 emit_insn (gen_pop (hard_frame_pointer_rtx
));
10159 /* The first insn of a function that accepts its static chain on the
10160 stack is to push the register that would be filled in by a direct
10161 call. This insn will be skipped by the trampoline. */
10162 else if (ix86_static_chain_on_stack
)
10164 insn
= emit_insn (gen_push (ix86_static_chain (cfun
->decl
, false)));
10165 emit_insn (gen_blockage ());
10167 /* We don't want to interpret this push insn as a register save,
10168 only as a stack adjustment. The real copy of the register as
10169 a save will be done later, if needed. */
10170 t
= plus_constant (stack_pointer_rtx
, -UNITS_PER_WORD
);
10171 t
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
);
10172 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, t
);
10173 RTX_FRAME_RELATED_P (insn
) = 1;
10176 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
10177 of DRAP is needed and stack realignment is really needed after reload */
10178 if (stack_realign_drap
)
10180 int align_bytes
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10182 /* Only need to push parameter pointer reg if it is caller saved. */
10183 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
10185 /* Push arg pointer reg */
10186 insn
= emit_insn (gen_push (crtl
->drap_reg
));
10187 RTX_FRAME_RELATED_P (insn
) = 1;
10190 /* Grab the argument pointer. */
10191 t
= plus_constant (stack_pointer_rtx
, m
->fs
.sp_offset
);
10192 insn
= emit_insn (gen_rtx_SET (VOIDmode
, crtl
->drap_reg
, t
));
10193 RTX_FRAME_RELATED_P (insn
) = 1;
10194 m
->fs
.cfa_reg
= crtl
->drap_reg
;
10195 m
->fs
.cfa_offset
= 0;
10197 /* Align the stack. */
10198 insn
= emit_insn (ix86_gen_andsp (stack_pointer_rtx
,
10200 GEN_INT (-align_bytes
)));
10201 RTX_FRAME_RELATED_P (insn
) = 1;
10203 /* Replicate the return address on the stack so that return
10204 address can be reached via (argp - 1) slot. This is needed
10205 to implement macro RETURN_ADDR_RTX and intrinsic function
10206 expand_builtin_return_addr etc. */
10207 t
= plus_constant (crtl
->drap_reg
, -UNITS_PER_WORD
);
10208 t
= gen_frame_mem (word_mode
, t
);
10209 insn
= emit_insn (gen_push (t
));
10210 RTX_FRAME_RELATED_P (insn
) = 1;
10212 /* For the purposes of frame and register save area addressing,
10213 we've started over with a new frame. */
10214 m
->fs
.sp_offset
= INCOMING_FRAME_SP_OFFSET
;
10215 m
->fs
.realigned
= true;
10218 if (frame_pointer_needed
&& !m
->fs
.fp_valid
)
10220 /* Note: AT&T enter does NOT have reversed args. Enter is probably
10221 slower on all targets. Also sdb doesn't like it. */
10222 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
10223 RTX_FRAME_RELATED_P (insn
) = 1;
10225 if (m
->fs
.sp_offset
== frame
.hard_frame_pointer_offset
)
10227 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
10228 RTX_FRAME_RELATED_P (insn
) = 1;
10230 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10231 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10232 m
->fs
.fp_offset
= m
->fs
.sp_offset
;
10233 m
->fs
.fp_valid
= true;
10237 int_registers_saved
= (frame
.nregs
== 0);
10239 if (!int_registers_saved
)
10241 /* If saving registers via PUSH, do so now. */
10242 if (!frame
.save_regs_using_mov
)
10244 ix86_emit_save_regs ();
10245 int_registers_saved
= true;
10246 gcc_assert (m
->fs
.sp_offset
== frame
.reg_save_offset
);
10249 /* When using red zone we may start register saving before allocating
10250 the stack frame saving one cycle of the prologue. However, avoid
10251 doing this if we have to probe the stack; at least on x86_64 the
10252 stack probe can turn into a call that clobbers a red zone location. */
10253 else if (ix86_using_red_zone ()
10254 && (! TARGET_STACK_PROBE
10255 || frame
.stack_pointer_offset
< CHECK_STACK_LIMIT
))
10257 ix86_emit_save_regs_using_mov (frame
.reg_save_offset
);
10258 int_registers_saved
= true;
10262 if (stack_realign_fp
)
10264 int align_bytes
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10265 gcc_assert (align_bytes
> MIN_STACK_BOUNDARY
/ BITS_PER_UNIT
);
10267 /* The computation of the size of the re-aligned stack frame means
10268 that we must allocate the size of the register save area before
10269 performing the actual alignment. Otherwise we cannot guarantee
10270 that there's enough storage above the realignment point. */
10271 if (m
->fs
.sp_offset
!= frame
.sse_reg_save_offset
)
10272 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10273 GEN_INT (m
->fs
.sp_offset
10274 - frame
.sse_reg_save_offset
),
10277 /* Align the stack. */
10278 insn
= emit_insn (ix86_gen_andsp (stack_pointer_rtx
,
10280 GEN_INT (-align_bytes
)));
10282 /* For the purposes of register save area addressing, the stack
10283 pointer is no longer valid. As for the value of sp_offset,
10284 see ix86_compute_frame_layout, which we need to match in order
10285 to pass verification of stack_pointer_offset at the end. */
10286 m
->fs
.sp_offset
= (m
->fs
.sp_offset
+ align_bytes
) & -align_bytes
;
10287 m
->fs
.sp_valid
= false;
10290 allocate
= frame
.stack_pointer_offset
- m
->fs
.sp_offset
;
10292 if (flag_stack_usage_info
)
10294 /* We start to count from ARG_POINTER. */
10295 HOST_WIDE_INT stack_size
= frame
.stack_pointer_offset
;
10297 /* If it was realigned, take into account the fake frame. */
10298 if (stack_realign_drap
)
10300 if (ix86_static_chain_on_stack
)
10301 stack_size
+= UNITS_PER_WORD
;
10303 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
10304 stack_size
+= UNITS_PER_WORD
;
10306 /* This over-estimates by 1 minimal-stack-alignment-unit but
10307 mitigates that by counting in the new return address slot. */
10308 current_function_dynamic_stack_size
10309 += crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10312 current_function_static_stack_size
= stack_size
;
10315 /* The stack has already been decremented by the instruction calling us
10316 so probe if the size is non-negative to preserve the protection area. */
10317 if (allocate
>= 0 && flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
10319 /* We expect the registers to be saved when probes are used. */
10320 gcc_assert (int_registers_saved
);
10322 if (STACK_CHECK_MOVING_SP
)
10324 ix86_adjust_stack_and_probe (allocate
);
10329 HOST_WIDE_INT size
= allocate
;
10331 if (TARGET_64BIT
&& size
>= (HOST_WIDE_INT
) 0x80000000)
10332 size
= 0x80000000 - STACK_CHECK_PROTECT
- 1;
10334 if (TARGET_STACK_PROBE
)
10335 ix86_emit_probe_stack_range (0, size
+ STACK_CHECK_PROTECT
);
10337 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT
, size
);
10343 else if (!ix86_target_stack_probe ()
10344 || frame
.stack_pointer_offset
< CHECK_STACK_LIMIT
)
10346 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10347 GEN_INT (-allocate
), -1,
10348 m
->fs
.cfa_reg
== stack_pointer_rtx
);
10352 rtx eax
= gen_rtx_REG (Pmode
, AX_REG
);
10354 rtx (*adjust_stack_insn
)(rtx
, rtx
, rtx
);
10356 bool eax_live
= false;
10357 bool r10_live
= false;
10360 r10_live
= (DECL_STATIC_CHAIN (current_function_decl
) != 0);
10361 if (!TARGET_64BIT_MS_ABI
)
10362 eax_live
= ix86_eax_live_at_start_p ();
10366 emit_insn (gen_push (eax
));
10367 allocate
-= UNITS_PER_WORD
;
10371 r10
= gen_rtx_REG (Pmode
, R10_REG
);
10372 emit_insn (gen_push (r10
));
10373 allocate
-= UNITS_PER_WORD
;
10376 emit_move_insn (eax
, GEN_INT (allocate
));
10377 emit_insn (ix86_gen_allocate_stack_worker (eax
, eax
));
10379 /* Use the fact that AX still contains ALLOCATE. */
10380 adjust_stack_insn
= (Pmode
== DImode
10381 ? gen_pro_epilogue_adjust_stack_di_sub
10382 : gen_pro_epilogue_adjust_stack_si_sub
);
10384 insn
= emit_insn (adjust_stack_insn (stack_pointer_rtx
,
10385 stack_pointer_rtx
, eax
));
10387 /* Note that SEH directives need to continue tracking the stack
10388 pointer even after the frame pointer has been set up. */
10389 if (m
->fs
.cfa_reg
== stack_pointer_rtx
|| TARGET_SEH
)
10391 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10392 m
->fs
.cfa_offset
+= allocate
;
10394 RTX_FRAME_RELATED_P (insn
) = 1;
10395 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
10396 gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10397 plus_constant (stack_pointer_rtx
,
10400 m
->fs
.sp_offset
+= allocate
;
10402 if (r10_live
&& eax_live
)
10404 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
);
10405 emit_move_insn (gen_rtx_REG (word_mode
, R10_REG
),
10406 gen_frame_mem (word_mode
, t
));
10407 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
- UNITS_PER_WORD
);
10408 emit_move_insn (gen_rtx_REG (word_mode
, AX_REG
),
10409 gen_frame_mem (word_mode
, t
));
10411 else if (eax_live
|| r10_live
)
10413 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
);
10414 emit_move_insn (gen_rtx_REG (word_mode
,
10415 (eax_live
? AX_REG
: R10_REG
)),
10416 gen_frame_mem (word_mode
, t
));
10419 gcc_assert (m
->fs
.sp_offset
== frame
.stack_pointer_offset
);
10421 /* If we havn't already set up the frame pointer, do so now. */
10422 if (frame_pointer_needed
&& !m
->fs
.fp_valid
)
10424 insn
= ix86_gen_add3 (hard_frame_pointer_rtx
, stack_pointer_rtx
,
10425 GEN_INT (frame
.stack_pointer_offset
10426 - frame
.hard_frame_pointer_offset
));
10427 insn
= emit_insn (insn
);
10428 RTX_FRAME_RELATED_P (insn
) = 1;
10429 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, NULL
);
10431 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10432 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10433 m
->fs
.fp_offset
= frame
.hard_frame_pointer_offset
;
10434 m
->fs
.fp_valid
= true;
10437 if (!int_registers_saved
)
10438 ix86_emit_save_regs_using_mov (frame
.reg_save_offset
);
10439 if (frame
.nsseregs
)
10440 ix86_emit_save_sse_regs_using_mov (frame
.sse_reg_save_offset
);
10442 pic_reg_used
= false;
10443 if (pic_offset_table_rtx
10444 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM
)
10447 unsigned int alt_pic_reg_used
= ix86_select_alt_pic_regnum ();
10449 if (alt_pic_reg_used
!= INVALID_REGNUM
)
10450 SET_REGNO (pic_offset_table_rtx
, alt_pic_reg_used
);
10452 pic_reg_used
= true;
10459 if (ix86_cmodel
== CM_LARGE_PIC
)
10461 rtx tmp_reg
= gen_rtx_REG (DImode
, R11_REG
);
10462 rtx label
= gen_label_rtx ();
10463 emit_label (label
);
10464 LABEL_PRESERVE_P (label
) = 1;
10465 gcc_assert (REGNO (pic_offset_table_rtx
) != REGNO (tmp_reg
));
10466 insn
= emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx
, label
));
10467 insn
= emit_insn (gen_set_got_offset_rex64 (tmp_reg
, label
));
10468 insn
= emit_insn (gen_adddi3 (pic_offset_table_rtx
,
10469 pic_offset_table_rtx
, tmp_reg
));
10472 insn
= emit_insn (gen_set_got_rex64 (pic_offset_table_rtx
));
10476 insn
= emit_insn (gen_set_got (pic_offset_table_rtx
));
10477 RTX_FRAME_RELATED_P (insn
) = 1;
10478 add_reg_note (insn
, REG_CFA_FLUSH_QUEUE
, NULL_RTX
);
10482 /* In the pic_reg_used case, make sure that the got load isn't deleted
10483 when mcount needs it. Blockage to avoid call movement across mcount
10484 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
10486 if (crtl
->profile
&& !flag_fentry
&& pic_reg_used
)
10487 emit_insn (gen_prologue_use (pic_offset_table_rtx
));
10489 if (crtl
->drap_reg
&& !crtl
->stack_realign_needed
)
10491 /* vDRAP is setup but after reload it turns out stack realign
10492 isn't necessary, here we will emit prologue to setup DRAP
10493 without stack realign adjustment */
10494 t
= choose_baseaddr (0);
10495 emit_insn (gen_rtx_SET (VOIDmode
, crtl
->drap_reg
, t
));
10498 /* Prevent instructions from being scheduled into register save push
10499 sequence when access to the redzone area is done through frame pointer.
10500 The offset between the frame pointer and the stack pointer is calculated
10501 relative to the value of the stack pointer at the end of the function
10502 prologue, and moving instructions that access redzone area via frame
10503 pointer inside push sequence violates this assumption. */
10504 if (frame_pointer_needed
&& frame
.red_zone_size
)
10505 emit_insn (gen_memory_blockage ());
10507 /* Emit cld instruction if stringops are used in the function. */
10508 if (TARGET_CLD
&& ix86_current_function_needs_cld
)
10509 emit_insn (gen_cld ());
10511 /* SEH requires that the prologue end within 256 bytes of the start of
10512 the function. Prevent instruction schedules that would extend that.
10513 Further, prevent alloca modifications to the stack pointer from being
10514 combined with prologue modifications. */
10516 emit_insn (gen_prologue_use (stack_pointer_rtx
));
10519 /* Emit code to restore REG using a POP insn. */
10522 ix86_emit_restore_reg_using_pop (rtx reg
)
10524 struct machine_function
*m
= cfun
->machine
;
10525 rtx insn
= emit_insn (gen_pop (reg
));
10527 ix86_add_cfa_restore_note (insn
, reg
, m
->fs
.sp_offset
);
10528 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
10530 if (m
->fs
.cfa_reg
== crtl
->drap_reg
10531 && REGNO (reg
) == REGNO (crtl
->drap_reg
))
10533 /* Previously we'd represented the CFA as an expression
10534 like *(%ebp - 8). We've just popped that value from
10535 the stack, which means we need to reset the CFA to
10536 the drap register. This will remain until we restore
10537 the stack pointer. */
10538 add_reg_note (insn
, REG_CFA_DEF_CFA
, reg
);
10539 RTX_FRAME_RELATED_P (insn
) = 1;
10541 /* This means that the DRAP register is valid for addressing too. */
10542 m
->fs
.drap_valid
= true;
10546 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10548 rtx x
= plus_constant (stack_pointer_rtx
, UNITS_PER_WORD
);
10549 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
10550 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, x
);
10551 RTX_FRAME_RELATED_P (insn
) = 1;
10553 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
10556 /* When the frame pointer is the CFA, and we pop it, we are
10557 swapping back to the stack pointer as the CFA. This happens
10558 for stack frames that don't allocate other data, so we assume
10559 the stack pointer is now pointing at the return address, i.e.
10560 the function entry state, which makes the offset be 1 word. */
10561 if (reg
== hard_frame_pointer_rtx
)
10563 m
->fs
.fp_valid
= false;
10564 if (m
->fs
.cfa_reg
== hard_frame_pointer_rtx
)
10566 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10567 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
10569 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10570 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
10571 GEN_INT (m
->fs
.cfa_offset
)));
10572 RTX_FRAME_RELATED_P (insn
) = 1;
10577 /* Emit code to restore saved registers using POP insns. */
10580 ix86_emit_restore_regs_using_pop (void)
10582 unsigned int regno
;
10584 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
10585 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, false))
10586 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode
, regno
));
10589 /* Emit code and notes for the LEAVE instruction. */
10592 ix86_emit_leave (void)
10594 struct machine_function
*m
= cfun
->machine
;
10595 rtx insn
= emit_insn (ix86_gen_leave ());
10597 ix86_add_queued_cfa_restore_notes (insn
);
10599 gcc_assert (m
->fs
.fp_valid
);
10600 m
->fs
.sp_valid
= true;
10601 m
->fs
.sp_offset
= m
->fs
.fp_offset
- UNITS_PER_WORD
;
10602 m
->fs
.fp_valid
= false;
10604 if (m
->fs
.cfa_reg
== hard_frame_pointer_rtx
)
10606 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10607 m
->fs
.cfa_offset
= m
->fs
.sp_offset
;
10609 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10610 plus_constant (stack_pointer_rtx
, m
->fs
.sp_offset
));
10611 RTX_FRAME_RELATED_P (insn
) = 1;
10613 ix86_add_cfa_restore_note (insn
, hard_frame_pointer_rtx
,
10617 /* Emit code to restore saved registers using MOV insns.
10618 First register is restored from CFA - CFA_OFFSET. */
10620 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset
,
10621 bool maybe_eh_return
)
10623 struct machine_function
*m
= cfun
->machine
;
10624 unsigned int regno
;
10626 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
10627 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, maybe_eh_return
))
10629 rtx reg
= gen_rtx_REG (word_mode
, regno
);
10632 mem
= choose_baseaddr (cfa_offset
);
10633 mem
= gen_frame_mem (word_mode
, mem
);
10634 insn
= emit_move_insn (reg
, mem
);
10636 if (m
->fs
.cfa_reg
== crtl
->drap_reg
&& regno
== REGNO (crtl
->drap_reg
))
10638 /* Previously we'd represented the CFA as an expression
10639 like *(%ebp - 8). We've just popped that value from
10640 the stack, which means we need to reset the CFA to
10641 the drap register. This will remain until we restore
10642 the stack pointer. */
10643 add_reg_note (insn
, REG_CFA_DEF_CFA
, reg
);
10644 RTX_FRAME_RELATED_P (insn
) = 1;
10646 /* This means that the DRAP register is valid for addressing. */
10647 m
->fs
.drap_valid
= true;
10650 ix86_add_cfa_restore_note (NULL_RTX
, reg
, cfa_offset
);
10652 cfa_offset
-= UNITS_PER_WORD
;
10656 /* Emit code to restore saved registers using MOV insns.
10657 First register is restored from CFA - CFA_OFFSET. */
10659 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset
,
10660 bool maybe_eh_return
)
10662 unsigned int regno
;
10664 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
10665 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, maybe_eh_return
))
10667 rtx reg
= gen_rtx_REG (V4SFmode
, regno
);
10670 mem
= choose_baseaddr (cfa_offset
);
10671 mem
= gen_rtx_MEM (V4SFmode
, mem
);
10672 set_mem_align (mem
, 128);
10673 emit_move_insn (reg
, mem
);
10675 ix86_add_cfa_restore_note (NULL_RTX
, reg
, cfa_offset
);
10681 /* Emit vzeroupper if needed. */
10684 ix86_maybe_emit_epilogue_vzeroupper (void)
10686 if (TARGET_VZEROUPPER
10687 && !TREE_THIS_VOLATILE (cfun
->decl
)
10688 && !cfun
->machine
->caller_return_avx256_p
)
10689 emit_insn (gen_avx_vzeroupper (GEN_INT (call_no_avx256
)));
10692 /* Restore function stack, frame, and registers. */
10695 ix86_expand_epilogue (int style
)
10697 struct machine_function
*m
= cfun
->machine
;
10698 struct machine_frame_state frame_state_save
= m
->fs
;
10699 struct ix86_frame frame
;
10700 bool restore_regs_via_mov
;
10703 ix86_finalize_stack_realign_flags ();
10704 ix86_compute_frame_layout (&frame
);
10706 m
->fs
.sp_valid
= (!frame_pointer_needed
10707 || (current_function_sp_is_unchanging
10708 && !stack_realign_fp
));
10709 gcc_assert (!m
->fs
.sp_valid
10710 || m
->fs
.sp_offset
== frame
.stack_pointer_offset
);
10712 /* The FP must be valid if the frame pointer is present. */
10713 gcc_assert (frame_pointer_needed
== m
->fs
.fp_valid
);
10714 gcc_assert (!m
->fs
.fp_valid
10715 || m
->fs
.fp_offset
== frame
.hard_frame_pointer_offset
);
10717 /* We must have *some* valid pointer to the stack frame. */
10718 gcc_assert (m
->fs
.sp_valid
|| m
->fs
.fp_valid
);
10720 /* The DRAP is never valid at this point. */
10721 gcc_assert (!m
->fs
.drap_valid
);
10723 /* See the comment about red zone and frame
10724 pointer usage in ix86_expand_prologue. */
10725 if (frame_pointer_needed
&& frame
.red_zone_size
)
10726 emit_insn (gen_memory_blockage ());
10728 using_drap
= crtl
->drap_reg
&& crtl
->stack_realign_needed
;
10729 gcc_assert (!using_drap
|| m
->fs
.cfa_reg
== crtl
->drap_reg
);
10731 /* Determine the CFA offset of the end of the red-zone. */
10732 m
->fs
.red_zone_offset
= 0;
10733 if (ix86_using_red_zone () && crtl
->args
.pops_args
< 65536)
10735 /* The red-zone begins below the return address. */
10736 m
->fs
.red_zone_offset
= RED_ZONE_SIZE
+ UNITS_PER_WORD
;
10738 /* When the register save area is in the aligned portion of
10739 the stack, determine the maximum runtime displacement that
10740 matches up with the aligned frame. */
10741 if (stack_realign_drap
)
10742 m
->fs
.red_zone_offset
-= (crtl
->stack_alignment_needed
/ BITS_PER_UNIT
10746 /* Special care must be taken for the normal return case of a function
10747 using eh_return: the eax and edx registers are marked as saved, but
10748 not restored along this path. Adjust the save location to match. */
10749 if (crtl
->calls_eh_return
&& style
!= 2)
10750 frame
.reg_save_offset
-= 2 * UNITS_PER_WORD
;
10752 /* EH_RETURN requires the use of moves to function properly. */
10753 if (crtl
->calls_eh_return
)
10754 restore_regs_via_mov
= true;
10755 /* SEH requires the use of pops to identify the epilogue. */
10756 else if (TARGET_SEH
)
10757 restore_regs_via_mov
= false;
10758 /* If we're only restoring one register and sp is not valid then
10759 using a move instruction to restore the register since it's
10760 less work than reloading sp and popping the register. */
10761 else if (!m
->fs
.sp_valid
&& frame
.nregs
<= 1)
10762 restore_regs_via_mov
= true;
10763 else if (TARGET_EPILOGUE_USING_MOVE
10764 && cfun
->machine
->use_fast_prologue_epilogue
10765 && (frame
.nregs
> 1
10766 || m
->fs
.sp_offset
!= frame
.reg_save_offset
))
10767 restore_regs_via_mov
= true;
10768 else if (frame_pointer_needed
10770 && m
->fs
.sp_offset
!= frame
.reg_save_offset
)
10771 restore_regs_via_mov
= true;
10772 else if (frame_pointer_needed
10773 && TARGET_USE_LEAVE
10774 && cfun
->machine
->use_fast_prologue_epilogue
10775 && frame
.nregs
== 1)
10776 restore_regs_via_mov
= true;
10778 restore_regs_via_mov
= false;
10780 if (restore_regs_via_mov
|| frame
.nsseregs
)
10782 /* Ensure that the entire register save area is addressable via
10783 the stack pointer, if we will restore via sp. */
10785 && m
->fs
.sp_offset
> 0x7fffffff
10786 && !(m
->fs
.fp_valid
|| m
->fs
.drap_valid
)
10787 && (frame
.nsseregs
+ frame
.nregs
) != 0)
10789 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10790 GEN_INT (m
->fs
.sp_offset
10791 - frame
.sse_reg_save_offset
),
10793 m
->fs
.cfa_reg
== stack_pointer_rtx
);
10797 /* If there are any SSE registers to restore, then we have to do it
10798 via moves, since there's obviously no pop for SSE regs. */
10799 if (frame
.nsseregs
)
10800 ix86_emit_restore_sse_regs_using_mov (frame
.sse_reg_save_offset
,
10803 if (restore_regs_via_mov
)
10808 ix86_emit_restore_regs_using_mov (frame
.reg_save_offset
, style
== 2);
10810 /* eh_return epilogues need %ecx added to the stack pointer. */
10813 rtx insn
, sa
= EH_RETURN_STACKADJ_RTX
;
10815 /* Stack align doesn't work with eh_return. */
10816 gcc_assert (!stack_realign_drap
);
10817 /* Neither does regparm nested functions. */
10818 gcc_assert (!ix86_static_chain_on_stack
);
10820 if (frame_pointer_needed
)
10822 t
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
10823 t
= plus_constant (t
, m
->fs
.fp_offset
- UNITS_PER_WORD
);
10824 emit_insn (gen_rtx_SET (VOIDmode
, sa
, t
));
10826 t
= gen_frame_mem (Pmode
, hard_frame_pointer_rtx
);
10827 insn
= emit_move_insn (hard_frame_pointer_rtx
, t
);
10829 /* Note that we use SA as a temporary CFA, as the return
10830 address is at the proper place relative to it. We
10831 pretend this happens at the FP restore insn because
10832 prior to this insn the FP would be stored at the wrong
10833 offset relative to SA, and after this insn we have no
10834 other reasonable register to use for the CFA. We don't
10835 bother resetting the CFA to the SP for the duration of
10836 the return insn. */
10837 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10838 plus_constant (sa
, UNITS_PER_WORD
));
10839 ix86_add_queued_cfa_restore_notes (insn
);
10840 add_reg_note (insn
, REG_CFA_RESTORE
, hard_frame_pointer_rtx
);
10841 RTX_FRAME_RELATED_P (insn
) = 1;
10843 m
->fs
.cfa_reg
= sa
;
10844 m
->fs
.cfa_offset
= UNITS_PER_WORD
;
10845 m
->fs
.fp_valid
= false;
10847 pro_epilogue_adjust_stack (stack_pointer_rtx
, sa
,
10848 const0_rtx
, style
, false);
10852 t
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
10853 t
= plus_constant (t
, m
->fs
.sp_offset
- UNITS_PER_WORD
);
10854 insn
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
));
10855 ix86_add_queued_cfa_restore_notes (insn
);
10857 gcc_assert (m
->fs
.cfa_reg
== stack_pointer_rtx
);
10858 if (m
->fs
.cfa_offset
!= UNITS_PER_WORD
)
10860 m
->fs
.cfa_offset
= UNITS_PER_WORD
;
10861 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10862 plus_constant (stack_pointer_rtx
,
10864 RTX_FRAME_RELATED_P (insn
) = 1;
10867 m
->fs
.sp_offset
= UNITS_PER_WORD
;
10868 m
->fs
.sp_valid
= true;
10873 /* SEH requires that the function end with (1) a stack adjustment
10874 if necessary, (2) a sequence of pops, and (3) a return or
10875 jump instruction. Prevent insns from the function body from
10876 being scheduled into this sequence. */
10879 /* Prevent a catch region from being adjacent to the standard
10880 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
10881 several other flags that would be interesting to test are
10883 if (flag_non_call_exceptions
)
10884 emit_insn (gen_nops (const1_rtx
));
10886 emit_insn (gen_blockage ());
10889 /* First step is to deallocate the stack frame so that we can
10890 pop the registers. */
10891 if (!m
->fs
.sp_valid
)
10893 pro_epilogue_adjust_stack (stack_pointer_rtx
, hard_frame_pointer_rtx
,
10894 GEN_INT (m
->fs
.fp_offset
10895 - frame
.reg_save_offset
),
10898 else if (m
->fs
.sp_offset
!= frame
.reg_save_offset
)
10900 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10901 GEN_INT (m
->fs
.sp_offset
10902 - frame
.reg_save_offset
),
10904 m
->fs
.cfa_reg
== stack_pointer_rtx
);
10907 ix86_emit_restore_regs_using_pop ();
10910 /* If we used a stack pointer and haven't already got rid of it,
10912 if (m
->fs
.fp_valid
)
10914 /* If the stack pointer is valid and pointing at the frame
10915 pointer store address, then we only need a pop. */
10916 if (m
->fs
.sp_valid
&& m
->fs
.sp_offset
== frame
.hfp_save_offset
)
10917 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx
);
10918 /* Leave results in shorter dependency chains on CPUs that are
10919 able to grok it fast. */
10920 else if (TARGET_USE_LEAVE
10921 || optimize_function_for_size_p (cfun
)
10922 || !cfun
->machine
->use_fast_prologue_epilogue
)
10923 ix86_emit_leave ();
10926 pro_epilogue_adjust_stack (stack_pointer_rtx
,
10927 hard_frame_pointer_rtx
,
10928 const0_rtx
, style
, !using_drap
);
10929 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx
);
10935 int param_ptr_offset
= UNITS_PER_WORD
;
10938 gcc_assert (stack_realign_drap
);
10940 if (ix86_static_chain_on_stack
)
10941 param_ptr_offset
+= UNITS_PER_WORD
;
10942 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
10943 param_ptr_offset
+= UNITS_PER_WORD
;
10945 insn
= emit_insn (gen_rtx_SET
10946 (VOIDmode
, stack_pointer_rtx
,
10947 gen_rtx_PLUS (Pmode
,
10949 GEN_INT (-param_ptr_offset
))));
10950 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10951 m
->fs
.cfa_offset
= param_ptr_offset
;
10952 m
->fs
.sp_offset
= param_ptr_offset
;
10953 m
->fs
.realigned
= false;
10955 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10956 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
10957 GEN_INT (param_ptr_offset
)));
10958 RTX_FRAME_RELATED_P (insn
) = 1;
10960 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
10961 ix86_emit_restore_reg_using_pop (crtl
->drap_reg
);
10964 /* At this point the stack pointer must be valid, and we must have
10965 restored all of the registers. We may not have deallocated the
10966 entire stack frame. We've delayed this until now because it may
10967 be possible to merge the local stack deallocation with the
10968 deallocation forced by ix86_static_chain_on_stack. */
10969 gcc_assert (m
->fs
.sp_valid
);
10970 gcc_assert (!m
->fs
.fp_valid
);
10971 gcc_assert (!m
->fs
.realigned
);
10972 if (m
->fs
.sp_offset
!= UNITS_PER_WORD
)
10974 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10975 GEN_INT (m
->fs
.sp_offset
- UNITS_PER_WORD
),
10979 ix86_add_queued_cfa_restore_notes (get_last_insn ());
10981 /* Sibcall epilogues don't want a return instruction. */
10984 m
->fs
= frame_state_save
;
10988 /* Emit vzeroupper if needed. */
10989 ix86_maybe_emit_epilogue_vzeroupper ();
10991 if (crtl
->args
.pops_args
&& crtl
->args
.size
)
10993 rtx popc
= GEN_INT (crtl
->args
.pops_args
);
10995 /* i386 can only pop 64K bytes. If asked to pop more, pop return
10996 address, do explicit add, and jump indirectly to the caller. */
10998 if (crtl
->args
.pops_args
>= 65536)
11000 rtx ecx
= gen_rtx_REG (SImode
, CX_REG
);
11003 /* There is no "pascal" calling convention in any 64bit ABI. */
11004 gcc_assert (!TARGET_64BIT
);
11006 insn
= emit_insn (gen_pop (ecx
));
11007 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
11008 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
11010 add_reg_note (insn
, REG_CFA_ADJUST_CFA
,
11011 copy_rtx (XVECEXP (PATTERN (insn
), 0, 1)));
11012 add_reg_note (insn
, REG_CFA_REGISTER
,
11013 gen_rtx_SET (VOIDmode
, ecx
, pc_rtx
));
11014 RTX_FRAME_RELATED_P (insn
) = 1;
11016 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
11018 emit_jump_insn (gen_simple_return_indirect_internal (ecx
));
11021 emit_jump_insn (gen_simple_return_pop_internal (popc
));
11024 emit_jump_insn (gen_simple_return_internal ());
11026 /* Restore the state back to the state from the prologue,
11027 so that it's correct for the next epilogue. */
11028 m
->fs
= frame_state_save
;
11031 /* Reset from the function's potential modifications. */
11034 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
11035 HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
11037 if (pic_offset_table_rtx
)
11038 SET_REGNO (pic_offset_table_rtx
, REAL_PIC_OFFSET_TABLE_REGNUM
);
11040 /* Mach-O doesn't support labels at the end of objects, so if
11041 it looks like we might want one, insert a NOP. */
11043 rtx insn
= get_last_insn ();
11044 rtx deleted_debug_label
= NULL_RTX
;
11047 && NOTE_KIND (insn
) != NOTE_INSN_DELETED_LABEL
)
11049 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
11050 notes only, instead set their CODE_LABEL_NUMBER to -1,
11051 otherwise there would be code generation differences
11052 in between -g and -g0. */
11053 if (NOTE_P (insn
) && NOTE_KIND (insn
) == NOTE_INSN_DELETED_DEBUG_LABEL
)
11054 deleted_debug_label
= insn
;
11055 insn
= PREV_INSN (insn
);
11060 && NOTE_KIND (insn
) == NOTE_INSN_DELETED_LABEL
)))
11061 fputs ("\tnop\n", file
);
11062 else if (deleted_debug_label
)
11063 for (insn
= deleted_debug_label
; insn
; insn
= NEXT_INSN (insn
))
11064 if (NOTE_KIND (insn
) == NOTE_INSN_DELETED_DEBUG_LABEL
)
11065 CODE_LABEL_NUMBER (insn
) = -1;
11071 /* Return a scratch register to use in the split stack prologue. The
11072 split stack prologue is used for -fsplit-stack. It is the first
11073 instructions in the function, even before the regular prologue.
11074 The scratch register can be any caller-saved register which is not
11075 used for parameters or for the static chain. */
11077 static unsigned int
11078 split_stack_prologue_scratch_regno (void)
11087 is_fastcall
= (lookup_attribute ("fastcall",
11088 TYPE_ATTRIBUTES (TREE_TYPE (cfun
->decl
)))
11090 regparm
= ix86_function_regparm (TREE_TYPE (cfun
->decl
), cfun
->decl
);
11094 if (DECL_STATIC_CHAIN (cfun
->decl
))
11096 sorry ("-fsplit-stack does not support fastcall with "
11097 "nested function");
11098 return INVALID_REGNUM
;
11102 else if (regparm
< 3)
11104 if (!DECL_STATIC_CHAIN (cfun
->decl
))
11110 sorry ("-fsplit-stack does not support 2 register "
11111 " parameters for a nested function");
11112 return INVALID_REGNUM
;
11119 /* FIXME: We could make this work by pushing a register
11120 around the addition and comparison. */
11121 sorry ("-fsplit-stack does not support 3 register parameters");
11122 return INVALID_REGNUM
;
11127 /* A SYMBOL_REF for the function which allocates new stackspace for
11130 static GTY(()) rtx split_stack_fn
;
11132 /* A SYMBOL_REF for the more stack function when using the large
11135 static GTY(()) rtx split_stack_fn_large
;
11137 /* Handle -fsplit-stack. These are the first instructions in the
11138 function, even before the regular prologue. */
11141 ix86_expand_split_stack_prologue (void)
11143 struct ix86_frame frame
;
11144 HOST_WIDE_INT allocate
;
11145 unsigned HOST_WIDE_INT args_size
;
11146 rtx label
, limit
, current
, jump_insn
, allocate_rtx
, call_insn
, call_fusage
;
11147 rtx scratch_reg
= NULL_RTX
;
11148 rtx varargs_label
= NULL_RTX
;
11151 gcc_assert (flag_split_stack
&& reload_completed
);
11153 ix86_finalize_stack_realign_flags ();
11154 ix86_compute_frame_layout (&frame
);
11155 allocate
= frame
.stack_pointer_offset
- INCOMING_FRAME_SP_OFFSET
;
11157 /* This is the label we will branch to if we have enough stack
11158 space. We expect the basic block reordering pass to reverse this
11159 branch if optimizing, so that we branch in the unlikely case. */
11160 label
= gen_label_rtx ();
11162 /* We need to compare the stack pointer minus the frame size with
11163 the stack boundary in the TCB. The stack boundary always gives
11164 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
11165 can compare directly. Otherwise we need to do an addition. */
11167 limit
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
11168 UNSPEC_STACK_CHECK
);
11169 limit
= gen_rtx_CONST (Pmode
, limit
);
11170 limit
= gen_rtx_MEM (Pmode
, limit
);
11171 if (allocate
< SPLIT_STACK_AVAILABLE
)
11172 current
= stack_pointer_rtx
;
11175 unsigned int scratch_regno
;
11178 /* We need a scratch register to hold the stack pointer minus
11179 the required frame size. Since this is the very start of the
11180 function, the scratch register can be any caller-saved
11181 register which is not used for parameters. */
11182 offset
= GEN_INT (- allocate
);
11183 scratch_regno
= split_stack_prologue_scratch_regno ();
11184 if (scratch_regno
== INVALID_REGNUM
)
11186 scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
11187 if (!TARGET_64BIT
|| x86_64_immediate_operand (offset
, Pmode
))
11189 /* We don't use ix86_gen_add3 in this case because it will
11190 want to split to lea, but when not optimizing the insn
11191 will not be split after this point. */
11192 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11193 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11198 emit_move_insn (scratch_reg
, offset
);
11199 emit_insn (gen_adddi3 (scratch_reg
, scratch_reg
,
11200 stack_pointer_rtx
));
11202 current
= scratch_reg
;
11205 ix86_expand_branch (GEU
, current
, limit
, label
);
11206 jump_insn
= get_last_insn ();
11207 JUMP_LABEL (jump_insn
) = label
;
11209 /* Mark the jump as very likely to be taken. */
11210 add_reg_note (jump_insn
, REG_BR_PROB
,
11211 GEN_INT (REG_BR_PROB_BASE
- REG_BR_PROB_BASE
/ 100));
11213 if (split_stack_fn
== NULL_RTX
)
11214 split_stack_fn
= gen_rtx_SYMBOL_REF (Pmode
, "__morestack");
11215 fn
= split_stack_fn
;
11217 /* Get more stack space. We pass in the desired stack space and the
11218 size of the arguments to copy to the new stack. In 32-bit mode
11219 we push the parameters; __morestack will return on a new stack
11220 anyhow. In 64-bit mode we pass the parameters in r10 and
11222 allocate_rtx
= GEN_INT (allocate
);
11223 args_size
= crtl
->args
.size
>= 0 ? crtl
->args
.size
: 0;
11224 call_fusage
= NULL_RTX
;
11229 reg10
= gen_rtx_REG (Pmode
, R10_REG
);
11230 reg11
= gen_rtx_REG (Pmode
, R11_REG
);
11232 /* If this function uses a static chain, it will be in %r10.
11233 Preserve it across the call to __morestack. */
11234 if (DECL_STATIC_CHAIN (cfun
->decl
))
11238 rax
= gen_rtx_REG (word_mode
, AX_REG
);
11239 emit_move_insn (rax
, gen_rtx_REG (word_mode
, R10_REG
));
11240 use_reg (&call_fusage
, rax
);
11243 if (ix86_cmodel
== CM_LARGE
|| ix86_cmodel
== CM_LARGE_PIC
)
11245 HOST_WIDE_INT argval
;
11247 /* When using the large model we need to load the address
11248 into a register, and we've run out of registers. So we
11249 switch to a different calling convention, and we call a
11250 different function: __morestack_large. We pass the
11251 argument size in the upper 32 bits of r10 and pass the
11252 frame size in the lower 32 bits. */
11253 gcc_assert ((allocate
& (HOST_WIDE_INT
) 0xffffffff) == allocate
);
11254 gcc_assert ((args_size
& 0xffffffff) == args_size
);
11256 if (split_stack_fn_large
== NULL_RTX
)
11257 split_stack_fn_large
=
11258 gen_rtx_SYMBOL_REF (Pmode
, "__morestack_large_model");
11260 if (ix86_cmodel
== CM_LARGE_PIC
)
11264 label
= gen_label_rtx ();
11265 emit_label (label
);
11266 LABEL_PRESERVE_P (label
) = 1;
11267 emit_insn (gen_set_rip_rex64 (reg10
, label
));
11268 emit_insn (gen_set_got_offset_rex64 (reg11
, label
));
11269 emit_insn (gen_adddi3 (reg10
, reg10
, reg11
));
11270 x
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, split_stack_fn_large
),
11272 x
= gen_rtx_CONST (Pmode
, x
);
11273 emit_move_insn (reg11
, x
);
11274 x
= gen_rtx_PLUS (Pmode
, reg10
, reg11
);
11275 x
= gen_const_mem (Pmode
, x
);
11276 emit_move_insn (reg11
, x
);
11279 emit_move_insn (reg11
, split_stack_fn_large
);
11283 argval
= ((args_size
<< 16) << 16) + allocate
;
11284 emit_move_insn (reg10
, GEN_INT (argval
));
11288 emit_move_insn (reg10
, allocate_rtx
);
11289 emit_move_insn (reg11
, GEN_INT (args_size
));
11290 use_reg (&call_fusage
, reg11
);
11293 use_reg (&call_fusage
, reg10
);
11297 emit_insn (gen_push (GEN_INT (args_size
)));
11298 emit_insn (gen_push (allocate_rtx
));
11300 call_insn
= ix86_expand_call (NULL_RTX
, gen_rtx_MEM (QImode
, fn
),
11301 GEN_INT (UNITS_PER_WORD
), constm1_rtx
,
11303 add_function_usage_to (call_insn
, call_fusage
);
11305 /* In order to make call/return prediction work right, we now need
11306 to execute a return instruction. See
11307 libgcc/config/i386/morestack.S for the details on how this works.
11309 For flow purposes gcc must not see this as a return
11310 instruction--we need control flow to continue at the subsequent
11311 label. Therefore, we use an unspec. */
11312 gcc_assert (crtl
->args
.pops_args
< 65536);
11313 emit_insn (gen_split_stack_return (GEN_INT (crtl
->args
.pops_args
)));
11315 /* If we are in 64-bit mode and this function uses a static chain,
11316 we saved %r10 in %rax before calling _morestack. */
11317 if (TARGET_64BIT
&& DECL_STATIC_CHAIN (cfun
->decl
))
11318 emit_move_insn (gen_rtx_REG (word_mode
, R10_REG
),
11319 gen_rtx_REG (word_mode
, AX_REG
));
11321 /* If this function calls va_start, we need to store a pointer to
11322 the arguments on the old stack, because they may not have been
11323 all copied to the new stack. At this point the old stack can be
11324 found at the frame pointer value used by __morestack, because
11325 __morestack has set that up before calling back to us. Here we
11326 store that pointer in a scratch register, and in
11327 ix86_expand_prologue we store the scratch register in a stack
11329 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11331 unsigned int scratch_regno
;
11335 scratch_regno
= split_stack_prologue_scratch_regno ();
11336 scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
11337 frame_reg
= gen_rtx_REG (Pmode
, BP_REG
);
11341 return address within this function
11342 return address of caller of this function
11344 So we add three words to get to the stack arguments.
11348 return address within this function
11349 first argument to __morestack
11350 second argument to __morestack
11351 return address of caller of this function
11353 So we add five words to get to the stack arguments.
11355 words
= TARGET_64BIT
? 3 : 5;
11356 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11357 gen_rtx_PLUS (Pmode
, frame_reg
,
11358 GEN_INT (words
* UNITS_PER_WORD
))));
11360 varargs_label
= gen_label_rtx ();
11361 emit_jump_insn (gen_jump (varargs_label
));
11362 JUMP_LABEL (get_last_insn ()) = varargs_label
;
11367 emit_label (label
);
11368 LABEL_NUSES (label
) = 1;
11370 /* If this function calls va_start, we now have to set the scratch
11371 register for the case where we do not call __morestack. In this
11372 case we need to set it based on the stack pointer. */
11373 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11375 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11376 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11377 GEN_INT (UNITS_PER_WORD
))));
11379 emit_label (varargs_label
);
11380 LABEL_NUSES (varargs_label
) = 1;
11384 /* We may have to tell the dataflow pass that the split stack prologue
11385 is initializing a scratch register. */
11388 ix86_live_on_entry (bitmap regs
)
11390 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11392 gcc_assert (flag_split_stack
);
11393 bitmap_set_bit (regs
, split_stack_prologue_scratch_regno ());
11397 /* Determine if op is suitable SUBREG RTX for address. */
11400 ix86_address_subreg_operand (rtx op
)
11402 enum machine_mode mode
;
11407 mode
= GET_MODE (op
);
11409 if (GET_MODE_CLASS (mode
) != MODE_INT
)
11412 /* Don't allow SUBREGs that span more than a word. It can lead to spill
11413 failures when the register is one word out of a two word structure. */
11414 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
11417 /* Allow only SUBREGs of non-eliminable hard registers. */
11418 return register_no_elim_operand (op
, mode
);
11421 /* Extract the parts of an RTL expression that is a valid memory address
11422 for an instruction. Return 0 if the structure of the address is
11423 grossly off. Return -1 if the address contains ASHIFT, so it is not
11424 strictly valid, but still used for computing length of lea instruction. */
11427 ix86_decompose_address (rtx addr
, struct ix86_address
*out
)
11429 rtx base
= NULL_RTX
, index
= NULL_RTX
, disp
= NULL_RTX
;
11430 rtx base_reg
, index_reg
;
11431 HOST_WIDE_INT scale
= 1;
11432 rtx scale_rtx
= NULL_RTX
;
11435 enum ix86_address_seg seg
= SEG_DEFAULT
;
11437 /* Allow zero-extended SImode addresses,
11438 they will be emitted with addr32 prefix. */
11439 if (TARGET_64BIT
&& GET_MODE (addr
) == DImode
)
11441 if (GET_CODE (addr
) == ZERO_EXTEND
11442 && GET_MODE (XEXP (addr
, 0)) == SImode
)
11443 addr
= XEXP (addr
, 0);
11444 else if (GET_CODE (addr
) == AND
11445 && const_32bit_mask (XEXP (addr
, 1), DImode
))
11447 addr
= XEXP (addr
, 0);
11449 /* Adjust SUBREGs. */
11450 if (GET_CODE (addr
) == SUBREG
11451 && GET_MODE (SUBREG_REG (addr
)) == SImode
)
11452 addr
= SUBREG_REG (addr
);
11453 else if (GET_MODE (addr
) == DImode
)
11454 addr
= gen_rtx_SUBREG (SImode
, addr
, 0);
11462 else if (GET_CODE (addr
) == SUBREG
)
11464 if (ix86_address_subreg_operand (SUBREG_REG (addr
)))
11469 else if (GET_CODE (addr
) == PLUS
)
11471 rtx addends
[4], op
;
11479 addends
[n
++] = XEXP (op
, 1);
11482 while (GET_CODE (op
) == PLUS
);
11487 for (i
= n
; i
>= 0; --i
)
11490 switch (GET_CODE (op
))
11495 index
= XEXP (op
, 0);
11496 scale_rtx
= XEXP (op
, 1);
11502 index
= XEXP (op
, 0);
11503 tmp
= XEXP (op
, 1);
11504 if (!CONST_INT_P (tmp
))
11506 scale
= INTVAL (tmp
);
11507 if ((unsigned HOST_WIDE_INT
) scale
> 3)
11509 scale
= 1 << scale
;
11513 if (XINT (op
, 1) == UNSPEC_TP
11514 && TARGET_TLS_DIRECT_SEG_REFS
11515 && seg
== SEG_DEFAULT
)
11516 seg
= TARGET_64BIT
? SEG_FS
: SEG_GS
;
11522 if (!ix86_address_subreg_operand (SUBREG_REG (op
)))
11549 else if (GET_CODE (addr
) == MULT
)
11551 index
= XEXP (addr
, 0); /* index*scale */
11552 scale_rtx
= XEXP (addr
, 1);
11554 else if (GET_CODE (addr
) == ASHIFT
)
11556 /* We're called for lea too, which implements ashift on occasion. */
11557 index
= XEXP (addr
, 0);
11558 tmp
= XEXP (addr
, 1);
11559 if (!CONST_INT_P (tmp
))
11561 scale
= INTVAL (tmp
);
11562 if ((unsigned HOST_WIDE_INT
) scale
> 3)
11564 scale
= 1 << scale
;
11568 disp
= addr
; /* displacement */
11574 else if (GET_CODE (index
) == SUBREG
11575 && ix86_address_subreg_operand (SUBREG_REG (index
)))
11581 /* Address override works only on the (%reg) part of %fs:(%reg). */
11582 if (seg
!= SEG_DEFAULT
11583 && ((base
&& GET_MODE (base
) != word_mode
)
11584 || (index
&& GET_MODE (index
) != word_mode
)))
11587 /* Extract the integral value of scale. */
11590 if (!CONST_INT_P (scale_rtx
))
11592 scale
= INTVAL (scale_rtx
);
11595 base_reg
= base
&& GET_CODE (base
) == SUBREG
? SUBREG_REG (base
) : base
;
11596 index_reg
= index
&& GET_CODE (index
) == SUBREG
? SUBREG_REG (index
) : index
;
11598 /* Avoid useless 0 displacement. */
11599 if (disp
== const0_rtx
&& (base
|| index
))
11602 /* Allow arg pointer and stack pointer as index if there is not scaling. */
11603 if (base_reg
&& index_reg
&& scale
== 1
11604 && (index_reg
== arg_pointer_rtx
11605 || index_reg
== frame_pointer_rtx
11606 || (REG_P (index_reg
) && REGNO (index_reg
) == STACK_POINTER_REGNUM
)))
11609 tmp
= base
, base
= index
, index
= tmp
;
11610 tmp
= base_reg
, base_reg
= index_reg
, index_reg
= tmp
;
11613 /* Special case: %ebp cannot be encoded as a base without a displacement.
11617 && (base_reg
== hard_frame_pointer_rtx
11618 || base_reg
== frame_pointer_rtx
11619 || base_reg
== arg_pointer_rtx
11620 || (REG_P (base_reg
)
11621 && (REGNO (base_reg
) == HARD_FRAME_POINTER_REGNUM
11622 || REGNO (base_reg
) == R13_REG
))))
11625 /* Special case: on K6, [%esi] makes the instruction vector decoded.
11626 Avoid this by transforming to [%esi+0].
11627 Reload calls address legitimization without cfun defined, so we need
11628 to test cfun for being non-NULL. */
11629 if (TARGET_K6
&& cfun
&& optimize_function_for_speed_p (cfun
)
11630 && base_reg
&& !index_reg
&& !disp
11631 && REG_P (base_reg
) && REGNO (base_reg
) == SI_REG
)
11634 /* Special case: encode reg+reg instead of reg*2. */
11635 if (!base
&& index
&& scale
== 2)
11636 base
= index
, base_reg
= index_reg
, scale
= 1;
11638 /* Special case: scaling cannot be encoded without base or displacement. */
11639 if (!base
&& !disp
&& index
&& scale
!= 1)
11643 out
->index
= index
;
11645 out
->scale
= scale
;
11651 /* Return cost of the memory address x.
11652 For i386, it is better to use a complex address than let gcc copy
11653 the address into a reg and make a new pseudo. But not if the address
11654 requires to two regs - that would mean more pseudos with longer
11657 ix86_address_cost (rtx x
, bool speed ATTRIBUTE_UNUSED
)
11659 struct ix86_address parts
;
11661 int ok
= ix86_decompose_address (x
, &parts
);
11665 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
11666 parts
.base
= SUBREG_REG (parts
.base
);
11667 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
11668 parts
.index
= SUBREG_REG (parts
.index
);
11670 /* Attempt to minimize number of registers in the address. */
11672 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
))
11674 && (!REG_P (parts
.index
)
11675 || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)))
11679 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
11681 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
11682 && parts
.base
!= parts
.index
)
11685 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
11686 since it's predecode logic can't detect the length of instructions
11687 and it degenerates to vector decoded. Increase cost of such
11688 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
11689 to split such addresses or even refuse such addresses at all.
11691 Following addressing modes are affected:
11696 The first and last case may be avoidable by explicitly coding the zero in
11697 memory address, but I don't have AMD-K6 machine handy to check this
11701 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
11702 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
11703 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
11709 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
11710 this is used for to form addresses to local data when -fPIC is in
11714 darwin_local_data_pic (rtx disp
)
11716 return (GET_CODE (disp
) == UNSPEC
11717 && XINT (disp
, 1) == UNSPEC_MACHOPIC_OFFSET
);
11720 /* Determine if a given RTX is a valid constant. We already know this
11721 satisfies CONSTANT_P. */
11724 ix86_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
11726 switch (GET_CODE (x
))
11731 if (GET_CODE (x
) == PLUS
)
11733 if (!CONST_INT_P (XEXP (x
, 1)))
11738 if (TARGET_MACHO
&& darwin_local_data_pic (x
))
11741 /* Only some unspecs are valid as "constants". */
11742 if (GET_CODE (x
) == UNSPEC
)
11743 switch (XINT (x
, 1))
11746 case UNSPEC_GOTOFF
:
11747 case UNSPEC_PLTOFF
:
11748 return TARGET_64BIT
;
11750 case UNSPEC_NTPOFF
:
11751 x
= XVECEXP (x
, 0, 0);
11752 return (GET_CODE (x
) == SYMBOL_REF
11753 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
11754 case UNSPEC_DTPOFF
:
11755 x
= XVECEXP (x
, 0, 0);
11756 return (GET_CODE (x
) == SYMBOL_REF
11757 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
);
11762 /* We must have drilled down to a symbol. */
11763 if (GET_CODE (x
) == LABEL_REF
)
11765 if (GET_CODE (x
) != SYMBOL_REF
)
11770 /* TLS symbols are never valid. */
11771 if (SYMBOL_REF_TLS_MODEL (x
))
11774 /* DLLIMPORT symbols are never valid. */
11775 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
11776 && SYMBOL_REF_DLLIMPORT_P (x
))
11780 /* mdynamic-no-pic */
11781 if (MACHO_DYNAMIC_NO_PIC_P
)
11782 return machopic_symbol_defined_p (x
);
11787 if (GET_MODE (x
) == TImode
11788 && x
!= CONST0_RTX (TImode
)
11794 if (!standard_sse_constant_p (x
))
11801 /* Otherwise we handle everything else in the move patterns. */
11805 /* Determine if it's legal to put X into the constant pool. This
11806 is not possible for the address of thread-local symbols, which
11807 is checked above. */
11810 ix86_cannot_force_const_mem (enum machine_mode mode
, rtx x
)
11812 /* We can always put integral constants and vectors in memory. */
11813 switch (GET_CODE (x
))
11823 return !ix86_legitimate_constant_p (mode
, x
);
11827 /* Nonzero if the constant value X is a legitimate general operand
11828 when generating PIC code. It is given that flag_pic is on and
11829 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
11832 legitimate_pic_operand_p (rtx x
)
11836 switch (GET_CODE (x
))
11839 inner
= XEXP (x
, 0);
11840 if (GET_CODE (inner
) == PLUS
11841 && CONST_INT_P (XEXP (inner
, 1)))
11842 inner
= XEXP (inner
, 0);
11844 /* Only some unspecs are valid as "constants". */
11845 if (GET_CODE (inner
) == UNSPEC
)
11846 switch (XINT (inner
, 1))
11849 case UNSPEC_GOTOFF
:
11850 case UNSPEC_PLTOFF
:
11851 return TARGET_64BIT
;
11853 x
= XVECEXP (inner
, 0, 0);
11854 return (GET_CODE (x
) == SYMBOL_REF
11855 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
11856 case UNSPEC_MACHOPIC_OFFSET
:
11857 return legitimate_pic_address_disp_p (x
);
11865 return legitimate_pic_address_disp_p (x
);
11872 /* Determine if a given CONST RTX is a valid memory displacement
11876 legitimate_pic_address_disp_p (rtx disp
)
11880 /* In 64bit mode we can allow direct addresses of symbols and labels
11881 when they are not dynamic symbols. */
11884 rtx op0
= disp
, op1
;
11886 switch (GET_CODE (disp
))
11892 if (GET_CODE (XEXP (disp
, 0)) != PLUS
)
11894 op0
= XEXP (XEXP (disp
, 0), 0);
11895 op1
= XEXP (XEXP (disp
, 0), 1);
11896 if (!CONST_INT_P (op1
)
11897 || INTVAL (op1
) >= 16*1024*1024
11898 || INTVAL (op1
) < -16*1024*1024)
11900 if (GET_CODE (op0
) == LABEL_REF
)
11902 if (GET_CODE (op0
) == CONST
11903 && GET_CODE (XEXP (op0
, 0)) == UNSPEC
11904 && XINT (XEXP (op0
, 0), 1) == UNSPEC_PCREL
)
11906 if (GET_CODE (op0
) == UNSPEC
11907 && XINT (op0
, 1) == UNSPEC_PCREL
)
11909 if (GET_CODE (op0
) != SYMBOL_REF
)
11914 /* TLS references should always be enclosed in UNSPEC. */
11915 if (SYMBOL_REF_TLS_MODEL (op0
))
11917 if (!SYMBOL_REF_FAR_ADDR_P (op0
) && SYMBOL_REF_LOCAL_P (op0
)
11918 && ix86_cmodel
!= CM_LARGE_PIC
)
11926 if (GET_CODE (disp
) != CONST
)
11928 disp
= XEXP (disp
, 0);
11932 /* We are unsafe to allow PLUS expressions. This limit allowed distance
11933 of GOT tables. We should not need these anyway. */
11934 if (GET_CODE (disp
) != UNSPEC
11935 || (XINT (disp
, 1) != UNSPEC_GOTPCREL
11936 && XINT (disp
, 1) != UNSPEC_GOTOFF
11937 && XINT (disp
, 1) != UNSPEC_PCREL
11938 && XINT (disp
, 1) != UNSPEC_PLTOFF
))
11941 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
11942 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
11948 if (GET_CODE (disp
) == PLUS
)
11950 if (!CONST_INT_P (XEXP (disp
, 1)))
11952 disp
= XEXP (disp
, 0);
11956 if (TARGET_MACHO
&& darwin_local_data_pic (disp
))
11959 if (GET_CODE (disp
) != UNSPEC
)
11962 switch (XINT (disp
, 1))
11967 /* We need to check for both symbols and labels because VxWorks loads
11968 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
11970 return (GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
11971 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
);
11972 case UNSPEC_GOTOFF
:
11973 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
11974 While ABI specify also 32bit relocation but we don't produce it in
11975 small PIC model at all. */
11976 if ((GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
11977 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
)
11979 return gotoff_operand (XVECEXP (disp
, 0, 0), Pmode
);
11981 case UNSPEC_GOTTPOFF
:
11982 case UNSPEC_GOTNTPOFF
:
11983 case UNSPEC_INDNTPOFF
:
11986 disp
= XVECEXP (disp
, 0, 0);
11987 return (GET_CODE (disp
) == SYMBOL_REF
11988 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_INITIAL_EXEC
);
11989 case UNSPEC_NTPOFF
:
11990 disp
= XVECEXP (disp
, 0, 0);
11991 return (GET_CODE (disp
) == SYMBOL_REF
11992 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_EXEC
);
11993 case UNSPEC_DTPOFF
:
11994 disp
= XVECEXP (disp
, 0, 0);
11995 return (GET_CODE (disp
) == SYMBOL_REF
11996 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_DYNAMIC
);
12002 /* Recognizes RTL expressions that are valid memory addresses for an
12003 instruction. The MODE argument is the machine mode for the MEM
12004 expression that wants to use this address.
12006 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
12007 convert common non-canonical forms to canonical form so that they will
12011 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED
,
12012 rtx addr
, bool strict
)
12014 struct ix86_address parts
;
12015 rtx base
, index
, disp
;
12016 HOST_WIDE_INT scale
;
12018 /* Since constant address in x32 is signed extended to 64bit,
12019 we have to prevent addresses from 0x80000000 to 0xffffffff. */
12021 && CONST_INT_P (addr
)
12022 && INTVAL (addr
) < 0)
12025 if (ix86_decompose_address (addr
, &parts
) <= 0)
12026 /* Decomposition failed. */
12030 index
= parts
.index
;
12032 scale
= parts
.scale
;
12034 /* Validate base register. */
12041 else if (GET_CODE (base
) == SUBREG
&& REG_P (SUBREG_REG (base
)))
12042 reg
= SUBREG_REG (base
);
12044 /* Base is not a register. */
12047 if (GET_MODE (base
) != SImode
&& GET_MODE (base
) != DImode
)
12050 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (reg
))
12051 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (reg
)))
12052 /* Base is not valid. */
12056 /* Validate index register. */
12063 else if (GET_CODE (index
) == SUBREG
&& REG_P (SUBREG_REG (index
)))
12064 reg
= SUBREG_REG (index
);
12066 /* Index is not a register. */
12069 if (GET_MODE (index
) != SImode
&& GET_MODE (index
) != DImode
)
12072 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (reg
))
12073 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (reg
)))
12074 /* Index is not valid. */
12078 /* Index and base should have the same mode. */
12080 && GET_MODE (base
) != GET_MODE (index
))
12083 /* Validate scale factor. */
12087 /* Scale without index. */
12090 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
12091 /* Scale is not a valid multiplier. */
12095 /* Validate displacement. */
12098 if (GET_CODE (disp
) == CONST
12099 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
12100 && XINT (XEXP (disp
, 0), 1) != UNSPEC_MACHOPIC_OFFSET
)
12101 switch (XINT (XEXP (disp
, 0), 1))
12103 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
12104 used. While ABI specify also 32bit relocations, we don't produce
12105 them at all and use IP relative instead. */
12107 case UNSPEC_GOTOFF
:
12108 gcc_assert (flag_pic
);
12110 goto is_legitimate_pic
;
12112 /* 64bit address unspec. */
12115 case UNSPEC_GOTPCREL
:
12117 gcc_assert (flag_pic
);
12118 goto is_legitimate_pic
;
12120 case UNSPEC_GOTTPOFF
:
12121 case UNSPEC_GOTNTPOFF
:
12122 case UNSPEC_INDNTPOFF
:
12123 case UNSPEC_NTPOFF
:
12124 case UNSPEC_DTPOFF
:
12127 case UNSPEC_STACK_CHECK
:
12128 gcc_assert (flag_split_stack
);
12132 /* Invalid address unspec. */
12136 else if (SYMBOLIC_CONST (disp
)
12140 && MACHOPIC_INDIRECT
12141 && !machopic_operand_p (disp
)
12147 if (TARGET_64BIT
&& (index
|| base
))
12149 /* foo@dtpoff(%rX) is ok. */
12150 if (GET_CODE (disp
) != CONST
12151 || GET_CODE (XEXP (disp
, 0)) != PLUS
12152 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) != UNSPEC
12153 || !CONST_INT_P (XEXP (XEXP (disp
, 0), 1))
12154 || (XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_DTPOFF
12155 && XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_NTPOFF
))
12156 /* Non-constant pic memory reference. */
12159 else if ((!TARGET_MACHO
|| flag_pic
)
12160 && ! legitimate_pic_address_disp_p (disp
))
12161 /* Displacement is an invalid pic construct. */
12164 else if (MACHO_DYNAMIC_NO_PIC_P
12165 && !ix86_legitimate_constant_p (Pmode
, disp
))
12166 /* displacment must be referenced via non_lazy_pointer */
12170 /* This code used to verify that a symbolic pic displacement
12171 includes the pic_offset_table_rtx register.
12173 While this is good idea, unfortunately these constructs may
12174 be created by "adds using lea" optimization for incorrect
12183 This code is nonsensical, but results in addressing
12184 GOT table with pic_offset_table_rtx base. We can't
12185 just refuse it easily, since it gets matched by
12186 "addsi3" pattern, that later gets split to lea in the
12187 case output register differs from input. While this
12188 can be handled by separate addsi pattern for this case
12189 that never results in lea, this seems to be easier and
12190 correct fix for crash to disable this test. */
12192 else if (GET_CODE (disp
) != LABEL_REF
12193 && !CONST_INT_P (disp
)
12194 && (GET_CODE (disp
) != CONST
12195 || !ix86_legitimate_constant_p (Pmode
, disp
))
12196 && (GET_CODE (disp
) != SYMBOL_REF
12197 || !ix86_legitimate_constant_p (Pmode
, disp
)))
12198 /* Displacement is not constant. */
12200 else if (TARGET_64BIT
12201 && !x86_64_immediate_operand (disp
, VOIDmode
))
12202 /* Displacement is out of range. */
12206 /* Everything looks valid. */
12210 /* Determine if a given RTX is a valid constant address. */
12213 constant_address_p (rtx x
)
12215 return CONSTANT_P (x
) && ix86_legitimate_address_p (Pmode
, x
, 1);
12218 /* Return a unique alias set for the GOT. */
12220 static alias_set_type
12221 ix86_GOT_alias_set (void)
12223 static alias_set_type set
= -1;
12225 set
= new_alias_set ();
12229 /* Return a legitimate reference for ORIG (an address) using the
12230 register REG. If REG is 0, a new pseudo is generated.
12232 There are two types of references that must be handled:
12234 1. Global data references must load the address from the GOT, via
12235 the PIC reg. An insn is emitted to do this load, and the reg is
12238 2. Static data references, constant pool addresses, and code labels
12239 compute the address as an offset from the GOT, whose base is in
12240 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
12241 differentiate them from global data objects. The returned
12242 address is the PIC reg + an unspec constant.
12244 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
12245 reg also appears in the address. */
12248 legitimize_pic_address (rtx orig
, rtx reg
)
12251 rtx new_rtx
= orig
;
12255 if (TARGET_MACHO
&& !TARGET_64BIT
)
12258 reg
= gen_reg_rtx (Pmode
);
12259 /* Use the generic Mach-O PIC machinery. */
12260 return machopic_legitimize_pic_address (orig
, GET_MODE (orig
), reg
);
12264 if (TARGET_64BIT
&& legitimate_pic_address_disp_p (addr
))
12266 else if (TARGET_64BIT
12267 && ix86_cmodel
!= CM_SMALL_PIC
12268 && gotoff_operand (addr
, Pmode
))
12271 /* This symbol may be referenced via a displacement from the PIC
12272 base address (@GOTOFF). */
12274 if (reload_in_progress
)
12275 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12276 if (GET_CODE (addr
) == CONST
)
12277 addr
= XEXP (addr
, 0);
12278 if (GET_CODE (addr
) == PLUS
)
12280 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
12282 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, XEXP (addr
, 1));
12285 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
12286 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12288 tmpreg
= gen_reg_rtx (Pmode
);
12291 emit_move_insn (tmpreg
, new_rtx
);
12295 new_rtx
= expand_simple_binop (Pmode
, PLUS
, reg
, pic_offset_table_rtx
,
12296 tmpreg
, 1, OPTAB_DIRECT
);
12299 else new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, tmpreg
);
12301 else if (!TARGET_64BIT
&& gotoff_operand (addr
, Pmode
))
12303 /* This symbol may be referenced via a displacement from the PIC
12304 base address (@GOTOFF). */
12306 if (reload_in_progress
)
12307 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12308 if (GET_CODE (addr
) == CONST
)
12309 addr
= XEXP (addr
, 0);
12310 if (GET_CODE (addr
) == PLUS
)
12312 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
12314 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, XEXP (addr
, 1));
12317 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
12318 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12319 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
12323 emit_move_insn (reg
, new_rtx
);
12327 else if ((GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (addr
) == 0)
12328 /* We can't use @GOTOFF for text labels on VxWorks;
12329 see gotoff_operand. */
12330 || (TARGET_VXWORKS_RTP
&& GET_CODE (addr
) == LABEL_REF
))
12332 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
12334 if (GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_DLLIMPORT_P (addr
))
12335 return legitimize_dllimport_symbol (addr
, true);
12336 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
12337 && GET_CODE (XEXP (XEXP (addr
, 0), 0)) == SYMBOL_REF
12338 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr
, 0), 0)))
12340 rtx t
= legitimize_dllimport_symbol (XEXP (XEXP (addr
, 0), 0), true);
12341 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (addr
, 0), 1));
12345 /* For x64 PE-COFF there is no GOT table. So we use address
12347 if (TARGET_64BIT
&& DEFAULT_ABI
== MS_ABI
)
12349 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_PCREL
);
12350 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12353 reg
= gen_reg_rtx (Pmode
);
12354 emit_move_insn (reg
, new_rtx
);
12357 else if (TARGET_64BIT
&& ix86_cmodel
!= CM_LARGE_PIC
)
12359 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTPCREL
);
12360 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12361 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
12362 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
12365 reg
= gen_reg_rtx (Pmode
);
12366 /* Use directly gen_movsi, otherwise the address is loaded
12367 into register for CSE. We don't want to CSE this addresses,
12368 instead we CSE addresses from the GOT table, so skip this. */
12369 emit_insn (gen_movsi (reg
, new_rtx
));
12374 /* This symbol must be referenced via a load from the
12375 Global Offset Table (@GOT). */
12377 if (reload_in_progress
)
12378 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12379 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
12380 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12382 new_rtx
= force_reg (Pmode
, new_rtx
);
12383 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
12384 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
12385 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
12388 reg
= gen_reg_rtx (Pmode
);
12389 emit_move_insn (reg
, new_rtx
);
12395 if (CONST_INT_P (addr
)
12396 && !x86_64_immediate_operand (addr
, VOIDmode
))
12400 emit_move_insn (reg
, addr
);
12404 new_rtx
= force_reg (Pmode
, addr
);
12406 else if (GET_CODE (addr
) == CONST
)
12408 addr
= XEXP (addr
, 0);
12410 /* We must match stuff we generate before. Assume the only
12411 unspecs that can get here are ours. Not that we could do
12412 anything with them anyway.... */
12413 if (GET_CODE (addr
) == UNSPEC
12414 || (GET_CODE (addr
) == PLUS
12415 && GET_CODE (XEXP (addr
, 0)) == UNSPEC
))
12417 gcc_assert (GET_CODE (addr
) == PLUS
);
12419 if (GET_CODE (addr
) == PLUS
)
12421 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
12423 /* Check first to see if this is a constant offset from a @GOTOFF
12424 symbol reference. */
12425 if (gotoff_operand (op0
, Pmode
)
12426 && CONST_INT_P (op1
))
12430 if (reload_in_progress
)
12431 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12432 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
),
12434 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, op1
);
12435 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12436 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
12440 emit_move_insn (reg
, new_rtx
);
12446 if (INTVAL (op1
) < -16*1024*1024
12447 || INTVAL (op1
) >= 16*1024*1024)
12449 if (!x86_64_immediate_operand (op1
, Pmode
))
12450 op1
= force_reg (Pmode
, op1
);
12451 new_rtx
= gen_rtx_PLUS (Pmode
, force_reg (Pmode
, op0
), op1
);
12457 base
= legitimize_pic_address (XEXP (addr
, 0), reg
);
12458 new_rtx
= legitimize_pic_address (XEXP (addr
, 1),
12459 base
== reg
? NULL_RTX
: reg
);
12461 if (CONST_INT_P (new_rtx
))
12462 new_rtx
= plus_constant (base
, INTVAL (new_rtx
));
12465 if (GET_CODE (new_rtx
) == PLUS
&& CONSTANT_P (XEXP (new_rtx
, 1)))
12467 base
= gen_rtx_PLUS (Pmode
, base
, XEXP (new_rtx
, 0));
12468 new_rtx
= XEXP (new_rtx
, 1);
12470 new_rtx
= gen_rtx_PLUS (Pmode
, base
, new_rtx
);
12478 /* Load the thread pointer. If TO_REG is true, force it into a register. */
12481 get_thread_pointer (bool to_reg
)
12483 rtx tp
= gen_rtx_UNSPEC (ptr_mode
, gen_rtvec (1, const0_rtx
), UNSPEC_TP
);
12485 if (GET_MODE (tp
) != Pmode
)
12486 tp
= convert_to_mode (Pmode
, tp
, 1);
12489 tp
= copy_addr_to_reg (tp
);
12494 /* Construct the SYMBOL_REF for the tls_get_addr function. */
12496 static GTY(()) rtx ix86_tls_symbol
;
12499 ix86_tls_get_addr (void)
12501 if (!ix86_tls_symbol
)
12504 = ((TARGET_ANY_GNU_TLS
&& !TARGET_64BIT
)
12505 ? "___tls_get_addr" : "__tls_get_addr");
12507 ix86_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
, sym
);
12510 return ix86_tls_symbol
;
12513 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
12515 static GTY(()) rtx ix86_tls_module_base_symbol
;
12518 ix86_tls_module_base (void)
12520 if (!ix86_tls_module_base_symbol
)
12522 ix86_tls_module_base_symbol
12523 = gen_rtx_SYMBOL_REF (Pmode
, "_TLS_MODULE_BASE_");
12525 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol
)
12526 |= TLS_MODEL_GLOBAL_DYNAMIC
<< SYMBOL_FLAG_TLS_SHIFT
;
12529 return ix86_tls_module_base_symbol
;
12532 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
12533 false if we expect this to be used for a memory address and true if
12534 we expect to load the address into a register. */
12537 legitimize_tls_address (rtx x
, enum tls_model model
, bool for_mov
)
12539 rtx dest
, base
, off
;
12540 rtx pic
= NULL_RTX
, tp
= NULL_RTX
;
12545 case TLS_MODEL_GLOBAL_DYNAMIC
:
12546 dest
= gen_reg_rtx (Pmode
);
12551 pic
= pic_offset_table_rtx
;
12554 pic
= gen_reg_rtx (Pmode
);
12555 emit_insn (gen_set_got (pic
));
12559 if (TARGET_GNU2_TLS
)
12562 emit_insn (gen_tls_dynamic_gnu2_64 (dest
, x
));
12564 emit_insn (gen_tls_dynamic_gnu2_32 (dest
, x
, pic
));
12566 tp
= get_thread_pointer (true);
12567 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, tp
, dest
));
12569 set_unique_reg_note (get_last_insn (), REG_EQUAL
, x
);
12573 rtx caddr
= ix86_tls_get_addr ();
12577 rtx rax
= gen_rtx_REG (Pmode
, AX_REG
), insns
;
12580 emit_call_insn (ix86_gen_tls_global_dynamic_64 (rax
, x
,
12582 insns
= get_insns ();
12585 RTL_CONST_CALL_P (insns
) = 1;
12586 emit_libcall_block (insns
, dest
, rax
, x
);
12589 emit_insn (gen_tls_global_dynamic_32 (dest
, x
, pic
, caddr
));
12593 case TLS_MODEL_LOCAL_DYNAMIC
:
12594 base
= gen_reg_rtx (Pmode
);
12599 pic
= pic_offset_table_rtx
;
12602 pic
= gen_reg_rtx (Pmode
);
12603 emit_insn (gen_set_got (pic
));
12607 if (TARGET_GNU2_TLS
)
12609 rtx tmp
= ix86_tls_module_base ();
12612 emit_insn (gen_tls_dynamic_gnu2_64 (base
, tmp
));
12614 emit_insn (gen_tls_dynamic_gnu2_32 (base
, tmp
, pic
));
12616 tp
= get_thread_pointer (true);
12617 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
12618 gen_rtx_MINUS (Pmode
, tmp
, tp
));
12622 rtx caddr
= ix86_tls_get_addr ();
12626 rtx rax
= gen_rtx_REG (Pmode
, AX_REG
), insns
, eqv
;
12629 emit_call_insn (ix86_gen_tls_local_dynamic_base_64 (rax
,
12631 insns
= get_insns ();
12634 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
12635 share the LD_BASE result with other LD model accesses. */
12636 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
12637 UNSPEC_TLS_LD_BASE
);
12639 RTL_CONST_CALL_P (insns
) = 1;
12640 emit_libcall_block (insns
, base
, rax
, eqv
);
12643 emit_insn (gen_tls_local_dynamic_base_32 (base
, pic
, caddr
));
12646 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_DTPOFF
);
12647 off
= gen_rtx_CONST (Pmode
, off
);
12649 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, base
, off
));
12651 if (TARGET_GNU2_TLS
)
12653 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, dest
, tp
));
12655 set_unique_reg_note (get_last_insn (), REG_EQUAL
, x
);
12659 case TLS_MODEL_INITIAL_EXEC
:
12662 if (TARGET_SUN_TLS
)
12664 /* The Sun linker took the AMD64 TLS spec literally
12665 and can only handle %rax as destination of the
12666 initial executable code sequence. */
12668 dest
= gen_reg_rtx (Pmode
);
12669 emit_insn (gen_tls_initial_exec_64_sun (dest
, x
));
12672 else if (Pmode
== SImode
)
12676 addl xgottpoff(%rip), %reg32
12677 to support linker IE->LE optimization and avoid
12678 fs:(%reg32) as memory operand. */
12679 dest
= gen_reg_rtx (Pmode
);
12680 emit_insn (gen_tls_initial_exec_x32 (dest
, x
));
12685 type
= UNSPEC_GOTNTPOFF
;
12689 if (reload_in_progress
)
12690 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12691 pic
= pic_offset_table_rtx
;
12692 type
= TARGET_ANY_GNU_TLS
? UNSPEC_GOTNTPOFF
: UNSPEC_GOTTPOFF
;
12694 else if (!TARGET_ANY_GNU_TLS
)
12696 pic
= gen_reg_rtx (Pmode
);
12697 emit_insn (gen_set_got (pic
));
12698 type
= UNSPEC_GOTTPOFF
;
12703 type
= UNSPEC_INDNTPOFF
;
12706 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), type
);
12707 off
= gen_rtx_CONST (Pmode
, off
);
12709 off
= gen_rtx_PLUS (Pmode
, pic
, off
);
12710 off
= gen_const_mem (Pmode
, off
);
12711 set_mem_alias_set (off
, ix86_GOT_alias_set ());
12713 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
12715 base
= get_thread_pointer (for_mov
12716 || !(TARGET_TLS_DIRECT_SEG_REFS
12717 && TARGET_TLS_INDIRECT_SEG_REFS
));
12718 off
= force_reg (Pmode
, off
);
12719 return gen_rtx_PLUS (Pmode
, base
, off
);
12723 base
= get_thread_pointer (true);
12724 dest
= gen_reg_rtx (Pmode
);
12725 emit_insn (gen_subsi3 (dest
, base
, off
));
12729 case TLS_MODEL_LOCAL_EXEC
:
12730 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
12731 (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
12732 ? UNSPEC_NTPOFF
: UNSPEC_TPOFF
);
12733 off
= gen_rtx_CONST (Pmode
, off
);
12735 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
12737 base
= get_thread_pointer (for_mov
12738 || !(TARGET_TLS_DIRECT_SEG_REFS
12739 && TARGET_TLS_INDIRECT_SEG_REFS
));
12740 return gen_rtx_PLUS (Pmode
, base
, off
);
12744 base
= get_thread_pointer (true);
12745 dest
= gen_reg_rtx (Pmode
);
12746 emit_insn (gen_subsi3 (dest
, base
, off
));
12751 gcc_unreachable ();
12757 /* Create or return the unique __imp_DECL dllimport symbol corresponding
12760 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map
)))
12761 htab_t dllimport_map
;
12764 get_dllimport_decl (tree decl
)
12766 struct tree_map
*h
, in
;
12769 const char *prefix
;
12770 size_t namelen
, prefixlen
;
12775 if (!dllimport_map
)
12776 dllimport_map
= htab_create_ggc (512, tree_map_hash
, tree_map_eq
, 0);
12778 in
.hash
= htab_hash_pointer (decl
);
12779 in
.base
.from
= decl
;
12780 loc
= htab_find_slot_with_hash (dllimport_map
, &in
, in
.hash
, INSERT
);
12781 h
= (struct tree_map
*) *loc
;
12785 *loc
= h
= ggc_alloc_tree_map ();
12787 h
->base
.from
= decl
;
12788 h
->to
= to
= build_decl (DECL_SOURCE_LOCATION (decl
),
12789 VAR_DECL
, NULL
, ptr_type_node
);
12790 DECL_ARTIFICIAL (to
) = 1;
12791 DECL_IGNORED_P (to
) = 1;
12792 DECL_EXTERNAL (to
) = 1;
12793 TREE_READONLY (to
) = 1;
12795 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
12796 name
= targetm
.strip_name_encoding (name
);
12797 prefix
= name
[0] == FASTCALL_PREFIX
|| user_label_prefix
[0] == 0
12798 ? "*__imp_" : "*__imp__";
12799 namelen
= strlen (name
);
12800 prefixlen
= strlen (prefix
);
12801 imp_name
= (char *) alloca (namelen
+ prefixlen
+ 1);
12802 memcpy (imp_name
, prefix
, prefixlen
);
12803 memcpy (imp_name
+ prefixlen
, name
, namelen
+ 1);
12805 name
= ggc_alloc_string (imp_name
, namelen
+ prefixlen
);
12806 rtl
= gen_rtx_SYMBOL_REF (Pmode
, name
);
12807 SET_SYMBOL_REF_DECL (rtl
, to
);
12808 SYMBOL_REF_FLAGS (rtl
) = SYMBOL_FLAG_LOCAL
;
12810 rtl
= gen_const_mem (Pmode
, rtl
);
12811 set_mem_alias_set (rtl
, ix86_GOT_alias_set ());
12813 SET_DECL_RTL (to
, rtl
);
12814 SET_DECL_ASSEMBLER_NAME (to
, get_identifier (name
));
12819 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
12820 true if we require the result be a register. */
12823 legitimize_dllimport_symbol (rtx symbol
, bool want_reg
)
12828 gcc_assert (SYMBOL_REF_DECL (symbol
));
12829 imp_decl
= get_dllimport_decl (SYMBOL_REF_DECL (symbol
));
12831 x
= DECL_RTL (imp_decl
);
12833 x
= force_reg (Pmode
, x
);
12837 /* Try machine-dependent ways of modifying an illegitimate address
12838 to be legitimate. If we find one, return the new, valid address.
12839 This macro is used in only one place: `memory_address' in explow.c.
12841 OLDX is the address as it was before break_out_memory_refs was called.
12842 In some cases it is useful to look at this to decide what needs to be done.
12844 It is always safe for this macro to do nothing. It exists to recognize
12845 opportunities to optimize the output.
12847 For the 80386, we handle X+REG by loading X into a register R and
12848 using R+REG. R will go in a general reg and indexing will be used.
12849 However, if REG is a broken-out memory address or multiplication,
12850 nothing needs to be done because REG can certainly go in a general reg.
12852 When -fpic is used, special handling is needed for symbolic references.
12853 See comments by legitimize_pic_address in i386.c for details. */
12856 ix86_legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
,
12857 enum machine_mode mode
)
12862 log
= GET_CODE (x
) == SYMBOL_REF
? SYMBOL_REF_TLS_MODEL (x
) : 0;
12864 return legitimize_tls_address (x
, (enum tls_model
) log
, false);
12865 if (GET_CODE (x
) == CONST
12866 && GET_CODE (XEXP (x
, 0)) == PLUS
12867 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
12868 && (log
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x
, 0), 0))))
12870 rtx t
= legitimize_tls_address (XEXP (XEXP (x
, 0), 0),
12871 (enum tls_model
) log
, false);
12872 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
12875 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
12877 if (GET_CODE (x
) == SYMBOL_REF
&& SYMBOL_REF_DLLIMPORT_P (x
))
12878 return legitimize_dllimport_symbol (x
, true);
12879 if (GET_CODE (x
) == CONST
12880 && GET_CODE (XEXP (x
, 0)) == PLUS
12881 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
12882 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x
, 0), 0)))
12884 rtx t
= legitimize_dllimport_symbol (XEXP (XEXP (x
, 0), 0), true);
12885 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
12889 if (flag_pic
&& SYMBOLIC_CONST (x
))
12890 return legitimize_pic_address (x
, 0);
12893 if (MACHO_DYNAMIC_NO_PIC_P
&& SYMBOLIC_CONST (x
))
12894 return machopic_indirect_data_reference (x
, 0);
12897 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
12898 if (GET_CODE (x
) == ASHIFT
12899 && CONST_INT_P (XEXP (x
, 1))
12900 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (x
, 1)) < 4)
12903 log
= INTVAL (XEXP (x
, 1));
12904 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
12905 GEN_INT (1 << log
));
12908 if (GET_CODE (x
) == PLUS
)
12910 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
12912 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
12913 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
12914 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 0), 1)) < 4)
12917 log
= INTVAL (XEXP (XEXP (x
, 0), 1));
12918 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
12919 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
12920 GEN_INT (1 << log
));
12923 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
12924 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
12925 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 1), 1)) < 4)
12928 log
= INTVAL (XEXP (XEXP (x
, 1), 1));
12929 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
12930 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
12931 GEN_INT (1 << log
));
12934 /* Put multiply first if it isn't already. */
12935 if (GET_CODE (XEXP (x
, 1)) == MULT
)
12937 rtx tmp
= XEXP (x
, 0);
12938 XEXP (x
, 0) = XEXP (x
, 1);
12943 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
12944 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
12945 created by virtual register instantiation, register elimination, and
12946 similar optimizations. */
12947 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
12950 x
= gen_rtx_PLUS (Pmode
,
12951 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
12952 XEXP (XEXP (x
, 1), 0)),
12953 XEXP (XEXP (x
, 1), 1));
12957 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
12958 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
12959 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
12960 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
12961 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
12962 && CONSTANT_P (XEXP (x
, 1)))
12965 rtx other
= NULL_RTX
;
12967 if (CONST_INT_P (XEXP (x
, 1)))
12969 constant
= XEXP (x
, 1);
12970 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
12972 else if (CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 1), 1)))
12974 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
12975 other
= XEXP (x
, 1);
12983 x
= gen_rtx_PLUS (Pmode
,
12984 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
12985 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
12986 plus_constant (other
, INTVAL (constant
)));
12990 if (changed
&& ix86_legitimate_address_p (mode
, x
, false))
12993 if (GET_CODE (XEXP (x
, 0)) == MULT
)
12996 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
12999 if (GET_CODE (XEXP (x
, 1)) == MULT
)
13002 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
13006 && REG_P (XEXP (x
, 1))
13007 && REG_P (XEXP (x
, 0)))
13010 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
13013 x
= legitimize_pic_address (x
, 0);
13016 if (changed
&& ix86_legitimate_address_p (mode
, x
, false))
13019 if (REG_P (XEXP (x
, 0)))
13021 rtx temp
= gen_reg_rtx (Pmode
);
13022 rtx val
= force_operand (XEXP (x
, 1), temp
);
13025 if (GET_MODE (val
) != Pmode
)
13026 val
= convert_to_mode (Pmode
, val
, 1);
13027 emit_move_insn (temp
, val
);
13030 XEXP (x
, 1) = temp
;
13034 else if (REG_P (XEXP (x
, 1)))
13036 rtx temp
= gen_reg_rtx (Pmode
);
13037 rtx val
= force_operand (XEXP (x
, 0), temp
);
13040 if (GET_MODE (val
) != Pmode
)
13041 val
= convert_to_mode (Pmode
, val
, 1);
13042 emit_move_insn (temp
, val
);
13045 XEXP (x
, 0) = temp
;
13053 /* Print an integer constant expression in assembler syntax. Addition
13054 and subtraction are the only arithmetic that may appear in these
13055 expressions. FILE is the stdio stream to write to, X is the rtx, and
13056 CODE is the operand print code from the output string. */
13059 output_pic_addr_const (FILE *file
, rtx x
, int code
)
13063 switch (GET_CODE (x
))
13066 gcc_assert (flag_pic
);
13071 if (TARGET_64BIT
|| ! TARGET_MACHO_BRANCH_ISLANDS
)
13072 output_addr_const (file
, x
);
13075 const char *name
= XSTR (x
, 0);
13077 /* Mark the decl as referenced so that cgraph will
13078 output the function. */
13079 if (SYMBOL_REF_DECL (x
))
13080 mark_decl_referenced (SYMBOL_REF_DECL (x
));
13083 if (MACHOPIC_INDIRECT
13084 && machopic_classify_symbol (x
) == MACHOPIC_UNDEFINED_FUNCTION
)
13085 name
= machopic_indirection_name (x
, /*stub_p=*/true);
13087 assemble_name (file
, name
);
13089 if (!TARGET_MACHO
&& !(TARGET_64BIT
&& DEFAULT_ABI
== MS_ABI
)
13090 && code
== 'P' && ! SYMBOL_REF_LOCAL_P (x
))
13091 fputs ("@PLT", file
);
13098 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
13099 assemble_name (asm_out_file
, buf
);
13103 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
13107 /* This used to output parentheses around the expression,
13108 but that does not work on the 386 (either ATT or BSD assembler). */
13109 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13113 if (GET_MODE (x
) == VOIDmode
)
13115 /* We can use %d if the number is <32 bits and positive. */
13116 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
13117 fprintf (file
, "0x%lx%08lx",
13118 (unsigned long) CONST_DOUBLE_HIGH (x
),
13119 (unsigned long) CONST_DOUBLE_LOW (x
));
13121 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
13124 /* We can't handle floating point constants;
13125 TARGET_PRINT_OPERAND must handle them. */
13126 output_operand_lossage ("floating constant misused");
13130 /* Some assemblers need integer constants to appear first. */
13131 if (CONST_INT_P (XEXP (x
, 0)))
13133 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13135 output_pic_addr_const (file
, XEXP (x
, 1), code
);
13139 gcc_assert (CONST_INT_P (XEXP (x
, 1)));
13140 output_pic_addr_const (file
, XEXP (x
, 1), code
);
13142 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13148 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
13149 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13151 output_pic_addr_const (file
, XEXP (x
, 1), code
);
13153 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
13157 if (XINT (x
, 1) == UNSPEC_STACK_CHECK
)
13159 bool f
= i386_asm_output_addr_const_extra (file
, x
);
13164 gcc_assert (XVECLEN (x
, 0) == 1);
13165 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
13166 switch (XINT (x
, 1))
13169 fputs ("@GOT", file
);
13171 case UNSPEC_GOTOFF
:
13172 fputs ("@GOTOFF", file
);
13174 case UNSPEC_PLTOFF
:
13175 fputs ("@PLTOFF", file
);
13178 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13179 "(%rip)" : "[rip]", file
);
13181 case UNSPEC_GOTPCREL
:
13182 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13183 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file
);
13185 case UNSPEC_GOTTPOFF
:
13186 /* FIXME: This might be @TPOFF in Sun ld too. */
13187 fputs ("@gottpoff", file
);
13190 fputs ("@tpoff", file
);
13192 case UNSPEC_NTPOFF
:
13194 fputs ("@tpoff", file
);
13196 fputs ("@ntpoff", file
);
13198 case UNSPEC_DTPOFF
:
13199 fputs ("@dtpoff", file
);
13201 case UNSPEC_GOTNTPOFF
:
13203 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13204 "@gottpoff(%rip)": "@gottpoff[rip]", file
);
13206 fputs ("@gotntpoff", file
);
13208 case UNSPEC_INDNTPOFF
:
13209 fputs ("@indntpoff", file
);
13212 case UNSPEC_MACHOPIC_OFFSET
:
13214 machopic_output_function_base_name (file
);
13218 output_operand_lossage ("invalid UNSPEC as operand");
13224 output_operand_lossage ("invalid expression as operand");
13228 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
13229 We need to emit DTP-relative relocations. */
13231 static void ATTRIBUTE_UNUSED
13232 i386_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
13234 fputs (ASM_LONG
, file
);
13235 output_addr_const (file
, x
);
13236 fputs ("@dtpoff", file
);
13242 fputs (", 0", file
);
13245 gcc_unreachable ();
13249 /* Return true if X is a representation of the PIC register. This copes
13250 with calls from ix86_find_base_term, where the register might have
13251 been replaced by a cselib value. */
13254 ix86_pic_register_p (rtx x
)
13256 if (GET_CODE (x
) == VALUE
&& CSELIB_VAL_PTR (x
))
13257 return (pic_offset_table_rtx
13258 && rtx_equal_for_cselib_p (x
, pic_offset_table_rtx
));
13260 return REG_P (x
) && REGNO (x
) == PIC_OFFSET_TABLE_REGNUM
;
13263 /* Helper function for ix86_delegitimize_address.
13264 Attempt to delegitimize TLS local-exec accesses. */
13267 ix86_delegitimize_tls_address (rtx orig_x
)
13269 rtx x
= orig_x
, unspec
;
13270 struct ix86_address addr
;
13272 if (!(TARGET_TLS_DIRECT_SEG_REFS
13273 && TARGET_TLS_INDIRECT_SEG_REFS
))
13277 if (GET_CODE (x
) != PLUS
|| GET_MODE (x
) != Pmode
)
13279 if (ix86_decompose_address (x
, &addr
) == 0
13280 || addr
.seg
!= (TARGET_64BIT
? SEG_FS
: SEG_GS
)
13281 || addr
.disp
== NULL_RTX
13282 || GET_CODE (addr
.disp
) != CONST
)
13284 unspec
= XEXP (addr
.disp
, 0);
13285 if (GET_CODE (unspec
) == PLUS
&& CONST_INT_P (XEXP (unspec
, 1)))
13286 unspec
= XEXP (unspec
, 0);
13287 if (GET_CODE (unspec
) != UNSPEC
|| XINT (unspec
, 1) != UNSPEC_NTPOFF
)
13289 x
= XVECEXP (unspec
, 0, 0);
13290 gcc_assert (GET_CODE (x
) == SYMBOL_REF
);
13291 if (unspec
!= XEXP (addr
.disp
, 0))
13292 x
= gen_rtx_PLUS (Pmode
, x
, XEXP (XEXP (addr
.disp
, 0), 1));
13295 rtx idx
= addr
.index
;
13296 if (addr
.scale
!= 1)
13297 idx
= gen_rtx_MULT (Pmode
, idx
, GEN_INT (addr
.scale
));
13298 x
= gen_rtx_PLUS (Pmode
, idx
, x
);
13301 x
= gen_rtx_PLUS (Pmode
, addr
.base
, x
);
13302 if (MEM_P (orig_x
))
13303 x
= replace_equiv_address_nv (orig_x
, x
);
13307 /* In the name of slightly smaller debug output, and to cater to
13308 general assembler lossage, recognize PIC+GOTOFF and turn it back
13309 into a direct symbol reference.
13311 On Darwin, this is necessary to avoid a crash, because Darwin
13312 has a different PIC label for each routine but the DWARF debugging
13313 information is not associated with any particular routine, so it's
13314 necessary to remove references to the PIC label from RTL stored by
13315 the DWARF output code. */
13318 ix86_delegitimize_address (rtx x
)
13320 rtx orig_x
= delegitimize_mem_from_attrs (x
);
13321 /* addend is NULL or some rtx if x is something+GOTOFF where
13322 something doesn't include the PIC register. */
13323 rtx addend
= NULL_RTX
;
13324 /* reg_addend is NULL or a multiple of some register. */
13325 rtx reg_addend
= NULL_RTX
;
13326 /* const_addend is NULL or a const_int. */
13327 rtx const_addend
= NULL_RTX
;
13328 /* This is the result, or NULL. */
13329 rtx result
= NULL_RTX
;
13338 if (GET_CODE (x
) == CONST
13339 && GET_CODE (XEXP (x
, 0)) == PLUS
13340 && GET_MODE (XEXP (x
, 0)) == Pmode
13341 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
13342 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == UNSPEC
13343 && XINT (XEXP (XEXP (x
, 0), 0), 1) == UNSPEC_PCREL
)
13345 rtx x2
= XVECEXP (XEXP (XEXP (x
, 0), 0), 0, 0);
13346 x
= gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 1), x2
);
13347 if (MEM_P (orig_x
))
13348 x
= replace_equiv_address_nv (orig_x
, x
);
13351 if (GET_CODE (x
) != CONST
13352 || GET_CODE (XEXP (x
, 0)) != UNSPEC
13353 || (XINT (XEXP (x
, 0), 1) != UNSPEC_GOTPCREL
13354 && XINT (XEXP (x
, 0), 1) != UNSPEC_PCREL
)
13355 || (!MEM_P (orig_x
) && XINT (XEXP (x
, 0), 1) != UNSPEC_PCREL
))
13356 return ix86_delegitimize_tls_address (orig_x
);
13357 x
= XVECEXP (XEXP (x
, 0), 0, 0);
13358 if (GET_MODE (orig_x
) != GET_MODE (x
) && MEM_P (orig_x
))
13360 x
= simplify_gen_subreg (GET_MODE (orig_x
), x
,
13368 if (GET_CODE (x
) != PLUS
13369 || GET_CODE (XEXP (x
, 1)) != CONST
)
13370 return ix86_delegitimize_tls_address (orig_x
);
13372 if (ix86_pic_register_p (XEXP (x
, 0)))
13373 /* %ebx + GOT/GOTOFF */
13375 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
13377 /* %ebx + %reg * scale + GOT/GOTOFF */
13378 reg_addend
= XEXP (x
, 0);
13379 if (ix86_pic_register_p (XEXP (reg_addend
, 0)))
13380 reg_addend
= XEXP (reg_addend
, 1);
13381 else if (ix86_pic_register_p (XEXP (reg_addend
, 1)))
13382 reg_addend
= XEXP (reg_addend
, 0);
13385 reg_addend
= NULL_RTX
;
13386 addend
= XEXP (x
, 0);
13390 addend
= XEXP (x
, 0);
13392 x
= XEXP (XEXP (x
, 1), 0);
13393 if (GET_CODE (x
) == PLUS
13394 && CONST_INT_P (XEXP (x
, 1)))
13396 const_addend
= XEXP (x
, 1);
13400 if (GET_CODE (x
) == UNSPEC
13401 && ((XINT (x
, 1) == UNSPEC_GOT
&& MEM_P (orig_x
) && !addend
)
13402 || (XINT (x
, 1) == UNSPEC_GOTOFF
&& !MEM_P (orig_x
))))
13403 result
= XVECEXP (x
, 0, 0);
13405 if (TARGET_MACHO
&& darwin_local_data_pic (x
)
13406 && !MEM_P (orig_x
))
13407 result
= XVECEXP (x
, 0, 0);
13410 return ix86_delegitimize_tls_address (orig_x
);
13413 result
= gen_rtx_CONST (Pmode
, gen_rtx_PLUS (Pmode
, result
, const_addend
));
13415 result
= gen_rtx_PLUS (Pmode
, reg_addend
, result
);
13418 /* If the rest of original X doesn't involve the PIC register, add
13419 addend and subtract pic_offset_table_rtx. This can happen e.g.
13421 leal (%ebx, %ecx, 4), %ecx
13423 movl foo@GOTOFF(%ecx), %edx
13424 in which case we return (%ecx - %ebx) + foo. */
13425 if (pic_offset_table_rtx
)
13426 result
= gen_rtx_PLUS (Pmode
, gen_rtx_MINUS (Pmode
, copy_rtx (addend
),
13427 pic_offset_table_rtx
),
13432 if (GET_MODE (orig_x
) != Pmode
&& MEM_P (orig_x
))
13434 result
= simplify_gen_subreg (GET_MODE (orig_x
), result
, Pmode
, 0);
13435 if (result
== NULL_RTX
)
13441 /* If X is a machine specific address (i.e. a symbol or label being
13442 referenced as a displacement from the GOT implemented using an
13443 UNSPEC), then return the base term. Otherwise return X. */
13446 ix86_find_base_term (rtx x
)
13452 if (GET_CODE (x
) != CONST
)
13454 term
= XEXP (x
, 0);
13455 if (GET_CODE (term
) == PLUS
13456 && (CONST_INT_P (XEXP (term
, 1))
13457 || GET_CODE (XEXP (term
, 1)) == CONST_DOUBLE
))
13458 term
= XEXP (term
, 0);
13459 if (GET_CODE (term
) != UNSPEC
13460 || (XINT (term
, 1) != UNSPEC_GOTPCREL
13461 && XINT (term
, 1) != UNSPEC_PCREL
))
13464 return XVECEXP (term
, 0, 0);
13467 return ix86_delegitimize_address (x
);
13471 put_condition_code (enum rtx_code code
, enum machine_mode mode
, int reverse
,
13472 int fp
, FILE *file
)
13474 const char *suffix
;
13476 if (mode
== CCFPmode
|| mode
== CCFPUmode
)
13478 code
= ix86_fp_compare_code_to_integer (code
);
13482 code
= reverse_condition (code
);
13533 gcc_assert (mode
== CCmode
|| mode
== CCNOmode
|| mode
== CCGCmode
);
13537 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
13538 Those same assemblers have the same but opposite lossage on cmov. */
13539 if (mode
== CCmode
)
13540 suffix
= fp
? "nbe" : "a";
13541 else if (mode
== CCCmode
)
13544 gcc_unreachable ();
13560 gcc_unreachable ();
13564 gcc_assert (mode
== CCmode
|| mode
== CCCmode
);
13581 gcc_unreachable ();
13585 /* ??? As above. */
13586 gcc_assert (mode
== CCmode
|| mode
== CCCmode
);
13587 suffix
= fp
? "nb" : "ae";
13590 gcc_assert (mode
== CCmode
|| mode
== CCGCmode
|| mode
== CCNOmode
);
13594 /* ??? As above. */
13595 if (mode
== CCmode
)
13597 else if (mode
== CCCmode
)
13598 suffix
= fp
? "nb" : "ae";
13600 gcc_unreachable ();
13603 suffix
= fp
? "u" : "p";
13606 suffix
= fp
? "nu" : "np";
13609 gcc_unreachable ();
13611 fputs (suffix
, file
);
13614 /* Print the name of register X to FILE based on its machine mode and number.
13615 If CODE is 'w', pretend the mode is HImode.
13616 If CODE is 'b', pretend the mode is QImode.
13617 If CODE is 'k', pretend the mode is SImode.
13618 If CODE is 'q', pretend the mode is DImode.
13619 If CODE is 'x', pretend the mode is V4SFmode.
13620 If CODE is 't', pretend the mode is V8SFmode.
13621 If CODE is 'h', pretend the reg is the 'high' byte register.
13622 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
13623 If CODE is 'd', duplicate the operand for AVX instruction.
13627 print_reg (rtx x
, int code
, FILE *file
)
13630 bool duplicated
= code
== 'd' && TARGET_AVX
;
13632 gcc_assert (x
== pc_rtx
13633 || (REGNO (x
) != ARG_POINTER_REGNUM
13634 && REGNO (x
) != FRAME_POINTER_REGNUM
13635 && REGNO (x
) != FLAGS_REG
13636 && REGNO (x
) != FPSR_REG
13637 && REGNO (x
) != FPCR_REG
));
13639 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13644 gcc_assert (TARGET_64BIT
);
13645 fputs ("rip", file
);
13649 if (code
== 'w' || MMX_REG_P (x
))
13651 else if (code
== 'b')
13653 else if (code
== 'k')
13655 else if (code
== 'q')
13657 else if (code
== 'y')
13659 else if (code
== 'h')
13661 else if (code
== 'x')
13663 else if (code
== 't')
13666 code
= GET_MODE_SIZE (GET_MODE (x
));
13668 /* Irritatingly, AMD extended registers use different naming convention
13669 from the normal registers: "r%d[bwd]" */
13670 if (REX_INT_REG_P (x
))
13672 gcc_assert (TARGET_64BIT
);
13674 fprint_ul (file
, REGNO (x
) - FIRST_REX_INT_REG
+ 8);
13678 error ("extended registers have no high halves");
13693 error ("unsupported operand size for extended register");
13703 if (STACK_TOP_P (x
))
13712 if (! ANY_FP_REG_P (x
))
13713 putc (code
== 8 && TARGET_64BIT
? 'r' : 'e', file
);
13718 reg
= hi_reg_name
[REGNO (x
)];
13721 if (REGNO (x
) >= ARRAY_SIZE (qi_reg_name
))
13723 reg
= qi_reg_name
[REGNO (x
)];
13726 if (REGNO (x
) >= ARRAY_SIZE (qi_high_reg_name
))
13728 reg
= qi_high_reg_name
[REGNO (x
)];
13733 gcc_assert (!duplicated
);
13735 fputs (hi_reg_name
[REGNO (x
)] + 1, file
);
13740 gcc_unreachable ();
13746 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13747 fprintf (file
, ", %%%s", reg
);
13749 fprintf (file
, ", %s", reg
);
13753 /* Locate some local-dynamic symbol still in use by this function
13754 so that we can print its name in some tls_local_dynamic_base
13758 get_some_local_dynamic_name_1 (rtx
*px
, void *data ATTRIBUTE_UNUSED
)
13762 if (GET_CODE (x
) == SYMBOL_REF
13763 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
)
13765 cfun
->machine
->some_ld_name
= XSTR (x
, 0);
13772 static const char *
13773 get_some_local_dynamic_name (void)
13777 if (cfun
->machine
->some_ld_name
)
13778 return cfun
->machine
->some_ld_name
;
13780 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
13781 if (NONDEBUG_INSN_P (insn
)
13782 && for_each_rtx (&PATTERN (insn
), get_some_local_dynamic_name_1
, 0))
13783 return cfun
->machine
->some_ld_name
;
13788 /* Meaning of CODE:
13789 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
13790 C -- print opcode suffix for set/cmov insn.
13791 c -- like C, but print reversed condition
13792 F,f -- likewise, but for floating-point.
13793 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
13795 R -- print the prefix for register names.
13796 z -- print the opcode suffix for the size of the current operand.
13797 Z -- likewise, with special suffixes for x87 instructions.
13798 * -- print a star (in certain assembler syntax)
13799 A -- print an absolute memory reference.
13800 E -- print address with DImode register names if TARGET_64BIT.
13801 w -- print the operand as if it's a "word" (HImode) even if it isn't.
13802 s -- print a shift double count, followed by the assemblers argument
13804 b -- print the QImode name of the register for the indicated operand.
13805 %b0 would print %al if operands[0] is reg 0.
13806 w -- likewise, print the HImode name of the register.
13807 k -- likewise, print the SImode name of the register.
13808 q -- likewise, print the DImode name of the register.
13809 x -- likewise, print the V4SFmode name of the register.
13810 t -- likewise, print the V8SFmode name of the register.
13811 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
13812 y -- print "st(0)" instead of "st" as a register.
13813 d -- print duplicated register operand for AVX instruction.
13814 D -- print condition for SSE cmp instruction.
13815 P -- if PIC, print an @PLT suffix.
13816 p -- print raw symbol name.
13817 X -- don't print any sort of PIC '@' suffix for a symbol.
13818 & -- print some in-use local-dynamic symbol name.
13819 H -- print a memory address offset by 8; used for sse high-parts
13820 Y -- print condition for XOP pcom* instruction.
13821 + -- print a branch hint as 'cs' or 'ds' prefix
13822 ; -- print a semicolon (after prefixes due to bug in older gas).
13823 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
13824 @ -- print a segment register of thread base pointer load
13825 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
13829 ix86_print_operand (FILE *file
, rtx x
, int code
)
13836 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13842 const char *name
= get_some_local_dynamic_name ();
13844 output_operand_lossage ("'%%&' used without any "
13845 "local dynamic TLS references");
13847 assemble_name (file
, name
);
13852 switch (ASSEMBLER_DIALECT
)
13859 /* Intel syntax. For absolute addresses, registers should not
13860 be surrounded by braces. */
13864 ix86_print_operand (file
, x
, 0);
13871 gcc_unreachable ();
13874 ix86_print_operand (file
, x
, 0);
13878 /* Wrap address in an UNSPEC to declare special handling. */
13880 x
= gen_rtx_UNSPEC (DImode
, gen_rtvec (1, x
), UNSPEC_LEA_ADDR
);
13882 output_address (x
);
13886 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13891 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13896 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13901 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13906 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13911 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13916 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
13918 /* Opcodes don't get size suffixes if using Intel opcodes. */
13919 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
13922 switch (GET_MODE_SIZE (GET_MODE (x
)))
13941 output_operand_lossage
13942 ("invalid operand size for operand code '%c'", code
);
13947 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
13949 (0, "non-integer operand used with operand code '%c'", code
);
13953 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
13954 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
13957 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
13959 switch (GET_MODE_SIZE (GET_MODE (x
)))
13962 #ifdef HAVE_AS_IX86_FILDS
13972 #ifdef HAVE_AS_IX86_FILDQ
13975 fputs ("ll", file
);
13983 else if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
13985 /* 387 opcodes don't get size suffixes
13986 if the operands are registers. */
13987 if (STACK_REG_P (x
))
13990 switch (GET_MODE_SIZE (GET_MODE (x
)))
14011 output_operand_lossage
14012 ("invalid operand type used with operand code '%c'", code
);
14016 output_operand_lossage
14017 ("invalid operand size for operand code '%c'", code
);
14035 if (CONST_INT_P (x
) || ! SHIFT_DOUBLE_OMITS_COUNT
)
14037 ix86_print_operand (file
, x
, 0);
14038 fputs (", ", file
);
14043 /* Little bit of braindamage here. The SSE compare instructions
14044 does use completely different names for the comparisons that the
14045 fp conditional moves. */
14048 switch (GET_CODE (x
))
14051 fputs ("eq", file
);
14054 fputs ("eq_us", file
);
14057 fputs ("lt", file
);
14060 fputs ("nge", file
);
14063 fputs ("le", file
);
14066 fputs ("ngt", file
);
14069 fputs ("unord", file
);
14072 fputs ("neq", file
);
14075 fputs ("neq_oq", file
);
14078 fputs ("ge", file
);
14081 fputs ("nlt", file
);
14084 fputs ("gt", file
);
14087 fputs ("nle", file
);
14090 fputs ("ord", file
);
14093 output_operand_lossage ("operand is not a condition code, "
14094 "invalid operand code 'D'");
14100 switch (GET_CODE (x
))
14104 fputs ("eq", file
);
14108 fputs ("lt", file
);
14112 fputs ("le", file
);
14115 fputs ("unord", file
);
14119 fputs ("neq", file
);
14123 fputs ("nlt", file
);
14127 fputs ("nle", file
);
14130 fputs ("ord", file
);
14133 output_operand_lossage ("operand is not a condition code, "
14134 "invalid operand code 'D'");
14140 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14141 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14143 switch (GET_MODE (x
))
14145 case HImode
: putc ('w', file
); break;
14147 case SFmode
: putc ('l', file
); break;
14149 case DFmode
: putc ('q', file
); break;
14150 default: gcc_unreachable ();
14157 if (!COMPARISON_P (x
))
14159 output_operand_lossage ("operand is neither a constant nor a "
14160 "condition code, invalid operand code "
14164 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 0, file
);
14167 if (!COMPARISON_P (x
))
14169 output_operand_lossage ("operand is neither a constant nor a "
14170 "condition code, invalid operand code "
14174 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14175 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14178 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 1, file
);
14181 /* Like above, but reverse condition */
14183 /* Check to see if argument to %c is really a constant
14184 and not a condition code which needs to be reversed. */
14185 if (!COMPARISON_P (x
))
14187 output_operand_lossage ("operand is neither a constant nor a "
14188 "condition code, invalid operand "
14192 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 0, file
);
14195 if (!COMPARISON_P (x
))
14197 output_operand_lossage ("operand is neither a constant nor a "
14198 "condition code, invalid operand "
14202 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14203 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14206 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 1, file
);
14210 if (!offsettable_memref_p (x
))
14212 output_operand_lossage ("operand is not an offsettable memory "
14213 "reference, invalid operand "
14217 /* It doesn't actually matter what mode we use here, as we're
14218 only going to use this for printing. */
14219 x
= adjust_address_nv (x
, DImode
, 8);
14227 || optimize_function_for_size_p (cfun
)
14228 || !TARGET_BRANCH_PREDICTION_HINTS
)
14231 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
14234 int pred_val
= INTVAL (XEXP (x
, 0));
14236 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
14237 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
14239 bool taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
14241 = final_forward_branch_p (current_output_insn
) == 0;
14243 /* Emit hints only in the case default branch prediction
14244 heuristics would fail. */
14245 if (taken
!= cputaken
)
14247 /* We use 3e (DS) prefix for taken branches and
14248 2e (CS) prefix for not taken branches. */
14250 fputs ("ds ; ", file
);
14252 fputs ("cs ; ", file
);
14260 switch (GET_CODE (x
))
14263 fputs ("neq", file
);
14266 fputs ("eq", file
);
14270 fputs (INTEGRAL_MODE_P (GET_MODE (x
)) ? "ge" : "unlt", file
);
14274 fputs (INTEGRAL_MODE_P (GET_MODE (x
)) ? "gt" : "unle", file
);
14278 fputs ("le", file
);
14282 fputs ("lt", file
);
14285 fputs ("unord", file
);
14288 fputs ("ord", file
);
14291 fputs ("ueq", file
);
14294 fputs ("nlt", file
);
14297 fputs ("nle", file
);
14300 fputs ("ule", file
);
14303 fputs ("ult", file
);
14306 fputs ("une", file
);
14309 output_operand_lossage ("operand is not a condition code, "
14310 "invalid operand code 'Y'");
14316 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
14322 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14325 /* The kernel uses a different segment register for performance
14326 reasons; a system call would not have to trash the userspace
14327 segment register, which would be expensive. */
14328 if (TARGET_64BIT
&& ix86_cmodel
!= CM_KERNEL
)
14329 fputs ("fs", file
);
14331 fputs ("gs", file
);
14335 putc (TARGET_AVX2
? 'i' : 'f', file
);
14339 if (TARGET_64BIT
&& Pmode
!= word_mode
)
14340 fputs ("addr32 ", file
);
14344 output_operand_lossage ("invalid operand code '%c'", code
);
14349 print_reg (x
, code
, file
);
14351 else if (MEM_P (x
))
14353 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
14354 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P'
14355 && GET_MODE (x
) != BLKmode
)
14358 switch (GET_MODE_SIZE (GET_MODE (x
)))
14360 case 1: size
= "BYTE"; break;
14361 case 2: size
= "WORD"; break;
14362 case 4: size
= "DWORD"; break;
14363 case 8: size
= "QWORD"; break;
14364 case 12: size
= "TBYTE"; break;
14366 if (GET_MODE (x
) == XFmode
)
14371 case 32: size
= "YMMWORD"; break;
14373 gcc_unreachable ();
14376 /* Check for explicit size override (codes 'b', 'w', 'k',
14380 else if (code
== 'w')
14382 else if (code
== 'k')
14384 else if (code
== 'q')
14386 else if (code
== 'x')
14389 fputs (size
, file
);
14390 fputs (" PTR ", file
);
14394 /* Avoid (%rip) for call operands. */
14395 if (CONSTANT_ADDRESS_P (x
) && code
== 'P'
14396 && !CONST_INT_P (x
))
14397 output_addr_const (file
, x
);
14398 else if (this_is_asm_operands
&& ! address_operand (x
, VOIDmode
))
14399 output_operand_lossage ("invalid constraints for operand");
14401 output_address (x
);
14404 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
14409 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
14410 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
14412 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14414 /* Sign extend 32bit SFmode immediate to 8 bytes. */
14416 fprintf (file
, "0x%08llx", (unsigned long long) (int) l
);
14418 fprintf (file
, "0x%08x", (unsigned int) l
);
14421 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
14426 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
14427 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
14429 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14431 fprintf (file
, "0x%lx%08lx", l
[1] & 0xffffffff, l
[0] & 0xffffffff);
14434 /* These float cases don't actually occur as immediate operands. */
14435 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == XFmode
)
14439 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
14440 fputs (dstr
, file
);
14445 /* We have patterns that allow zero sets of memory, for instance.
14446 In 64-bit mode, we should probably support all 8-byte vectors,
14447 since we can in fact encode that into an immediate. */
14448 if (GET_CODE (x
) == CONST_VECTOR
)
14450 gcc_assert (x
== CONST0_RTX (GET_MODE (x
)));
14454 if (code
!= 'P' && code
!= 'p')
14456 if (CONST_INT_P (x
) || GET_CODE (x
) == CONST_DOUBLE
)
14458 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14461 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
14462 || GET_CODE (x
) == LABEL_REF
)
14464 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14467 fputs ("OFFSET FLAT:", file
);
14470 if (CONST_INT_P (x
))
14471 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
14472 else if (flag_pic
|| MACHOPIC_INDIRECT
)
14473 output_pic_addr_const (file
, x
, code
);
14475 output_addr_const (file
, x
);
14480 ix86_print_operand_punct_valid_p (unsigned char code
)
14482 return (code
== '@' || code
== '*' || code
== '+' || code
== '&'
14483 || code
== ';' || code
== '~' || code
== '^');
14486 /* Print a memory operand whose address is ADDR. */
14489 ix86_print_operand_address (FILE *file
, rtx addr
)
14491 struct ix86_address parts
;
14492 rtx base
, index
, disp
;
14498 if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_VSIBADDR
)
14500 ok
= ix86_decompose_address (XVECEXP (addr
, 0, 0), &parts
);
14501 gcc_assert (parts
.index
== NULL_RTX
);
14502 parts
.index
= XVECEXP (addr
, 0, 1);
14503 parts
.scale
= INTVAL (XVECEXP (addr
, 0, 2));
14504 addr
= XVECEXP (addr
, 0, 0);
14507 else if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_LEA_ADDR
)
14509 gcc_assert (TARGET_64BIT
);
14510 ok
= ix86_decompose_address (XVECEXP (addr
, 0, 0), &parts
);
14514 ok
= ix86_decompose_address (addr
, &parts
);
14518 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
14520 rtx tmp
= SUBREG_REG (parts
.base
);
14521 parts
.base
= simplify_subreg (GET_MODE (parts
.base
),
14522 tmp
, GET_MODE (tmp
), 0);
14525 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
14527 rtx tmp
= SUBREG_REG (parts
.index
);
14528 parts
.index
= simplify_subreg (GET_MODE (parts
.index
),
14529 tmp
, GET_MODE (tmp
), 0);
14533 index
= parts
.index
;
14535 scale
= parts
.scale
;
14543 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14545 fputs ((parts
.seg
== SEG_FS
? "fs:" : "gs:"), file
);
14548 gcc_unreachable ();
14551 /* Use one byte shorter RIP relative addressing for 64bit mode. */
14552 if (TARGET_64BIT
&& !base
&& !index
)
14556 if (GET_CODE (disp
) == CONST
14557 && GET_CODE (XEXP (disp
, 0)) == PLUS
14558 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
14559 symbol
= XEXP (XEXP (disp
, 0), 0);
14561 if (GET_CODE (symbol
) == LABEL_REF
14562 || (GET_CODE (symbol
) == SYMBOL_REF
14563 && SYMBOL_REF_TLS_MODEL (symbol
) == 0))
14566 if (!base
&& !index
)
14568 /* Displacement only requires special attention. */
14570 if (CONST_INT_P (disp
))
14572 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& parts
.seg
== SEG_DEFAULT
)
14573 fputs ("ds:", file
);
14574 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (disp
));
14577 output_pic_addr_const (file
, disp
, 0);
14579 output_addr_const (file
, disp
);
14583 /* Print SImode register names for zero-extended
14584 addresses to force addr32 prefix. */
14586 && (GET_CODE (addr
) == ZERO_EXTEND
14587 || GET_CODE (addr
) == AND
))
14589 gcc_assert (!code
);
14593 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14598 output_pic_addr_const (file
, disp
, 0);
14599 else if (GET_CODE (disp
) == LABEL_REF
)
14600 output_asm_label (disp
);
14602 output_addr_const (file
, disp
);
14607 print_reg (base
, code
, file
);
14611 print_reg (index
, vsib
? 0 : code
, file
);
14612 if (scale
!= 1 || vsib
)
14613 fprintf (file
, ",%d", scale
);
14619 rtx offset
= NULL_RTX
;
14623 /* Pull out the offset of a symbol; print any symbol itself. */
14624 if (GET_CODE (disp
) == CONST
14625 && GET_CODE (XEXP (disp
, 0)) == PLUS
14626 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
14628 offset
= XEXP (XEXP (disp
, 0), 1);
14629 disp
= gen_rtx_CONST (VOIDmode
,
14630 XEXP (XEXP (disp
, 0), 0));
14634 output_pic_addr_const (file
, disp
, 0);
14635 else if (GET_CODE (disp
) == LABEL_REF
)
14636 output_asm_label (disp
);
14637 else if (CONST_INT_P (disp
))
14640 output_addr_const (file
, disp
);
14646 print_reg (base
, code
, file
);
14649 if (INTVAL (offset
) >= 0)
14651 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
14655 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
14662 print_reg (index
, vsib
? 0 : code
, file
);
14663 if (scale
!= 1 || vsib
)
14664 fprintf (file
, "*%d", scale
);
14671 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
14674 i386_asm_output_addr_const_extra (FILE *file
, rtx x
)
14678 if (GET_CODE (x
) != UNSPEC
)
14681 op
= XVECEXP (x
, 0, 0);
14682 switch (XINT (x
, 1))
14684 case UNSPEC_GOTTPOFF
:
14685 output_addr_const (file
, op
);
14686 /* FIXME: This might be @TPOFF in Sun ld. */
14687 fputs ("@gottpoff", file
);
14690 output_addr_const (file
, op
);
14691 fputs ("@tpoff", file
);
14693 case UNSPEC_NTPOFF
:
14694 output_addr_const (file
, op
);
14696 fputs ("@tpoff", file
);
14698 fputs ("@ntpoff", file
);
14700 case UNSPEC_DTPOFF
:
14701 output_addr_const (file
, op
);
14702 fputs ("@dtpoff", file
);
14704 case UNSPEC_GOTNTPOFF
:
14705 output_addr_const (file
, op
);
14707 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
14708 "@gottpoff(%rip)" : "@gottpoff[rip]", file
);
14710 fputs ("@gotntpoff", file
);
14712 case UNSPEC_INDNTPOFF
:
14713 output_addr_const (file
, op
);
14714 fputs ("@indntpoff", file
);
14717 case UNSPEC_MACHOPIC_OFFSET
:
14718 output_addr_const (file
, op
);
14720 machopic_output_function_base_name (file
);
14724 case UNSPEC_STACK_CHECK
:
14728 gcc_assert (flag_split_stack
);
14730 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
14731 offset
= TARGET_THREAD_SPLIT_STACK_OFFSET
;
14733 gcc_unreachable ();
14736 fprintf (file
, "%s:%d", TARGET_64BIT
? "%fs" : "%gs", offset
);
14747 /* Split one or more double-mode RTL references into pairs of half-mode
14748 references. The RTL can be REG, offsettable MEM, integer constant, or
14749 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
14750 split and "num" is its length. lo_half and hi_half are output arrays
14751 that parallel "operands". */
14754 split_double_mode (enum machine_mode mode
, rtx operands
[],
14755 int num
, rtx lo_half
[], rtx hi_half
[])
14757 enum machine_mode half_mode
;
14763 half_mode
= DImode
;
14766 half_mode
= SImode
;
14769 gcc_unreachable ();
14772 byte
= GET_MODE_SIZE (half_mode
);
14776 rtx op
= operands
[num
];
14778 /* simplify_subreg refuse to split volatile memory addresses,
14779 but we still have to handle it. */
14782 lo_half
[num
] = adjust_address (op
, half_mode
, 0);
14783 hi_half
[num
] = adjust_address (op
, half_mode
, byte
);
14787 lo_half
[num
] = simplify_gen_subreg (half_mode
, op
,
14788 GET_MODE (op
) == VOIDmode
14789 ? mode
: GET_MODE (op
), 0);
14790 hi_half
[num
] = simplify_gen_subreg (half_mode
, op
,
14791 GET_MODE (op
) == VOIDmode
14792 ? mode
: GET_MODE (op
), byte
);
14797 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
14798 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
14799 is the expression of the binary operation. The output may either be
14800 emitted here, or returned to the caller, like all output_* functions.
14802 There is no guarantee that the operands are the same mode, as they
14803 might be within FLOAT or FLOAT_EXTEND expressions. */
14805 #ifndef SYSV386_COMPAT
14806 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
14807 wants to fix the assemblers because that causes incompatibility
14808 with gcc. No-one wants to fix gcc because that causes
14809 incompatibility with assemblers... You can use the option of
14810 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
14811 #define SYSV386_COMPAT 1
14815 output_387_binary_op (rtx insn
, rtx
*operands
)
14817 static char buf
[40];
14820 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]) || SSE_REG_P (operands
[2]);
14822 #ifdef ENABLE_CHECKING
14823 /* Even if we do not want to check the inputs, this documents input
14824 constraints. Which helps in understanding the following code. */
14825 if (STACK_REG_P (operands
[0])
14826 && ((REG_P (operands
[1])
14827 && REGNO (operands
[0]) == REGNO (operands
[1])
14828 && (STACK_REG_P (operands
[2]) || MEM_P (operands
[2])))
14829 || (REG_P (operands
[2])
14830 && REGNO (operands
[0]) == REGNO (operands
[2])
14831 && (STACK_REG_P (operands
[1]) || MEM_P (operands
[1]))))
14832 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
14835 gcc_assert (is_sse
);
14838 switch (GET_CODE (operands
[3]))
14841 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
14842 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
14850 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
14851 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
14859 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
14860 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
14868 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
14869 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
14877 gcc_unreachable ();
14884 strcpy (buf
, ssep
);
14885 if (GET_MODE (operands
[0]) == SFmode
)
14886 strcat (buf
, "ss\t{%2, %1, %0|%0, %1, %2}");
14888 strcat (buf
, "sd\t{%2, %1, %0|%0, %1, %2}");
14892 strcpy (buf
, ssep
+ 1);
14893 if (GET_MODE (operands
[0]) == SFmode
)
14894 strcat (buf
, "ss\t{%2, %0|%0, %2}");
14896 strcat (buf
, "sd\t{%2, %0|%0, %2}");
14902 switch (GET_CODE (operands
[3]))
14906 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
14908 rtx temp
= operands
[2];
14909 operands
[2] = operands
[1];
14910 operands
[1] = temp
;
14913 /* know operands[0] == operands[1]. */
14915 if (MEM_P (operands
[2]))
14921 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
14923 if (STACK_TOP_P (operands
[0]))
14924 /* How is it that we are storing to a dead operand[2]?
14925 Well, presumably operands[1] is dead too. We can't
14926 store the result to st(0) as st(0) gets popped on this
14927 instruction. Instead store to operands[2] (which I
14928 think has to be st(1)). st(1) will be popped later.
14929 gcc <= 2.8.1 didn't have this check and generated
14930 assembly code that the Unixware assembler rejected. */
14931 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14933 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14937 if (STACK_TOP_P (operands
[0]))
14938 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14940 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
14945 if (MEM_P (operands
[1]))
14951 if (MEM_P (operands
[2]))
14957 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
14960 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
14961 derived assemblers, confusingly reverse the direction of
14962 the operation for fsub{r} and fdiv{r} when the
14963 destination register is not st(0). The Intel assembler
14964 doesn't have this brain damage. Read !SYSV386_COMPAT to
14965 figure out what the hardware really does. */
14966 if (STACK_TOP_P (operands
[0]))
14967 p
= "{p\t%0, %2|rp\t%2, %0}";
14969 p
= "{rp\t%2, %0|p\t%0, %2}";
14971 if (STACK_TOP_P (operands
[0]))
14972 /* As above for fmul/fadd, we can't store to st(0). */
14973 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14975 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14980 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
14983 if (STACK_TOP_P (operands
[0]))
14984 p
= "{rp\t%0, %1|p\t%1, %0}";
14986 p
= "{p\t%1, %0|rp\t%0, %1}";
14988 if (STACK_TOP_P (operands
[0]))
14989 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
14991 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
14996 if (STACK_TOP_P (operands
[0]))
14998 if (STACK_TOP_P (operands
[1]))
14999 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15001 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
15004 else if (STACK_TOP_P (operands
[1]))
15007 p
= "{\t%1, %0|r\t%0, %1}";
15009 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
15015 p
= "{r\t%2, %0|\t%0, %2}";
15017 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15023 gcc_unreachable ();
15030 /* Return needed mode for entity in optimize_mode_switching pass. */
15033 ix86_mode_needed (int entity
, rtx insn
)
15035 enum attr_i387_cw mode
;
15037 /* The mode UNINITIALIZED is used to store control word after a
15038 function call or ASM pattern. The mode ANY specify that function
15039 has no requirements on the control word and make no changes in the
15040 bits we are interested in. */
15043 || (NONJUMP_INSN_P (insn
)
15044 && (asm_noperands (PATTERN (insn
)) >= 0
15045 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
)))
15046 return I387_CW_UNINITIALIZED
;
15048 if (recog_memoized (insn
) < 0)
15049 return I387_CW_ANY
;
15051 mode
= get_attr_i387_cw (insn
);
15056 if (mode
== I387_CW_TRUNC
)
15061 if (mode
== I387_CW_FLOOR
)
15066 if (mode
== I387_CW_CEIL
)
15071 if (mode
== I387_CW_MASK_PM
)
15076 gcc_unreachable ();
15079 return I387_CW_ANY
;
15082 /* Output code to initialize control word copies used by trunc?f?i and
15083 rounding patterns. CURRENT_MODE is set to current control word,
15084 while NEW_MODE is set to new control word. */
15087 emit_i387_cw_initialization (int mode
)
15089 rtx stored_mode
= assign_386_stack_local (HImode
, SLOT_CW_STORED
);
15092 enum ix86_stack_slot slot
;
15094 rtx reg
= gen_reg_rtx (HImode
);
15096 emit_insn (gen_x86_fnstcw_1 (stored_mode
));
15097 emit_move_insn (reg
, copy_rtx (stored_mode
));
15099 if (TARGET_64BIT
|| TARGET_PARTIAL_REG_STALL
15100 || optimize_function_for_size_p (cfun
))
15104 case I387_CW_TRUNC
:
15105 /* round toward zero (truncate) */
15106 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0c00)));
15107 slot
= SLOT_CW_TRUNC
;
15110 case I387_CW_FLOOR
:
15111 /* round down toward -oo */
15112 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
15113 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0400)));
15114 slot
= SLOT_CW_FLOOR
;
15118 /* round up toward +oo */
15119 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
15120 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0800)));
15121 slot
= SLOT_CW_CEIL
;
15124 case I387_CW_MASK_PM
:
15125 /* mask precision exception for nearbyint() */
15126 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
15127 slot
= SLOT_CW_MASK_PM
;
15131 gcc_unreachable ();
15138 case I387_CW_TRUNC
:
15139 /* round toward zero (truncate) */
15140 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0xc)));
15141 slot
= SLOT_CW_TRUNC
;
15144 case I387_CW_FLOOR
:
15145 /* round down toward -oo */
15146 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x4)));
15147 slot
= SLOT_CW_FLOOR
;
15151 /* round up toward +oo */
15152 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x8)));
15153 slot
= SLOT_CW_CEIL
;
15156 case I387_CW_MASK_PM
:
15157 /* mask precision exception for nearbyint() */
15158 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
15159 slot
= SLOT_CW_MASK_PM
;
15163 gcc_unreachable ();
15167 gcc_assert (slot
< MAX_386_STACK_LOCALS
);
15169 new_mode
= assign_386_stack_local (HImode
, slot
);
15170 emit_move_insn (new_mode
, reg
);
15173 /* Output code for INSN to convert a float to a signed int. OPERANDS
15174 are the insn operands. The output may be [HSD]Imode and the input
15175 operand may be [SDX]Fmode. */
15178 output_fix_trunc (rtx insn
, rtx
*operands
, bool fisttp
)
15180 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
15181 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
15182 int round_mode
= get_attr_i387_cw (insn
);
15184 /* Jump through a hoop or two for DImode, since the hardware has no
15185 non-popping instruction. We used to do this a different way, but
15186 that was somewhat fragile and broke with post-reload splitters. */
15187 if ((dimode_p
|| fisttp
) && !stack_top_dies
)
15188 output_asm_insn ("fld\t%y1", operands
);
15190 gcc_assert (STACK_TOP_P (operands
[1]));
15191 gcc_assert (MEM_P (operands
[0]));
15192 gcc_assert (GET_MODE (operands
[1]) != TFmode
);
15195 output_asm_insn ("fisttp%Z0\t%0", operands
);
15198 if (round_mode
!= I387_CW_ANY
)
15199 output_asm_insn ("fldcw\t%3", operands
);
15200 if (stack_top_dies
|| dimode_p
)
15201 output_asm_insn ("fistp%Z0\t%0", operands
);
15203 output_asm_insn ("fist%Z0\t%0", operands
);
15204 if (round_mode
!= I387_CW_ANY
)
15205 output_asm_insn ("fldcw\t%2", operands
);
15211 /* Output code for x87 ffreep insn. The OPNO argument, which may only
15212 have the values zero or one, indicates the ffreep insn's operand
15213 from the OPERANDS array. */
15215 static const char *
15216 output_387_ffreep (rtx
*operands ATTRIBUTE_UNUSED
, int opno
)
15218 if (TARGET_USE_FFREEP
)
15219 #ifdef HAVE_AS_IX86_FFREEP
15220 return opno
? "ffreep\t%y1" : "ffreep\t%y0";
15223 static char retval
[32];
15224 int regno
= REGNO (operands
[opno
]);
15226 gcc_assert (FP_REGNO_P (regno
));
15228 regno
-= FIRST_STACK_REG
;
15230 snprintf (retval
, sizeof (retval
), ASM_SHORT
"0xc%ddf", regno
);
15235 return opno
? "fstp\t%y1" : "fstp\t%y0";
15239 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
15240 should be used. UNORDERED_P is true when fucom should be used. */
15243 output_fp_compare (rtx insn
, rtx
*operands
, bool eflags_p
, bool unordered_p
)
15245 int stack_top_dies
;
15246 rtx cmp_op0
, cmp_op1
;
15247 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]);
15251 cmp_op0
= operands
[0];
15252 cmp_op1
= operands
[1];
15256 cmp_op0
= operands
[1];
15257 cmp_op1
= operands
[2];
15262 if (GET_MODE (operands
[0]) == SFmode
)
15264 return "%vucomiss\t{%1, %0|%0, %1}";
15266 return "%vcomiss\t{%1, %0|%0, %1}";
15269 return "%vucomisd\t{%1, %0|%0, %1}";
15271 return "%vcomisd\t{%1, %0|%0, %1}";
15274 gcc_assert (STACK_TOP_P (cmp_op0
));
15276 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
15278 if (cmp_op1
== CONST0_RTX (GET_MODE (cmp_op1
)))
15280 if (stack_top_dies
)
15282 output_asm_insn ("ftst\n\tfnstsw\t%0", operands
);
15283 return output_387_ffreep (operands
, 1);
15286 return "ftst\n\tfnstsw\t%0";
15289 if (STACK_REG_P (cmp_op1
)
15291 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
15292 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
15294 /* If both the top of the 387 stack dies, and the other operand
15295 is also a stack register that dies, then this must be a
15296 `fcompp' float compare */
15300 /* There is no double popping fcomi variant. Fortunately,
15301 eflags is immune from the fstp's cc clobbering. */
15303 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
15305 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
15306 return output_387_ffreep (operands
, 0);
15311 return "fucompp\n\tfnstsw\t%0";
15313 return "fcompp\n\tfnstsw\t%0";
15318 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
15320 static const char * const alt
[16] =
15322 "fcom%Z2\t%y2\n\tfnstsw\t%0",
15323 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
15324 "fucom%Z2\t%y2\n\tfnstsw\t%0",
15325 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
15327 "ficom%Z2\t%y2\n\tfnstsw\t%0",
15328 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
15332 "fcomi\t{%y1, %0|%0, %y1}",
15333 "fcomip\t{%y1, %0|%0, %y1}",
15334 "fucomi\t{%y1, %0|%0, %y1}",
15335 "fucomip\t{%y1, %0|%0, %y1}",
15346 mask
= eflags_p
<< 3;
15347 mask
|= (GET_MODE_CLASS (GET_MODE (cmp_op1
)) == MODE_INT
) << 2;
15348 mask
|= unordered_p
<< 1;
15349 mask
|= stack_top_dies
;
15351 gcc_assert (mask
< 16);
15360 ix86_output_addr_vec_elt (FILE *file
, int value
)
15362 const char *directive
= ASM_LONG
;
15366 directive
= ASM_QUAD
;
15368 gcc_assert (!TARGET_64BIT
);
15371 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
15375 ix86_output_addr_diff_elt (FILE *file
, int value
, int rel
)
15377 const char *directive
= ASM_LONG
;
15380 if (TARGET_64BIT
&& CASE_VECTOR_MODE
== DImode
)
15381 directive
= ASM_QUAD
;
15383 gcc_assert (!TARGET_64BIT
);
15385 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
15386 if (TARGET_64BIT
|| TARGET_VXWORKS_RTP
)
15387 fprintf (file
, "%s%s%d-%s%d\n",
15388 directive
, LPREFIX
, value
, LPREFIX
, rel
);
15389 else if (HAVE_AS_GOTOFF_IN_DATA
)
15390 fprintf (file
, ASM_LONG
"%s%d@GOTOFF\n", LPREFIX
, value
);
15392 else if (TARGET_MACHO
)
15394 fprintf (file
, ASM_LONG
"%s%d-", LPREFIX
, value
);
15395 machopic_output_function_base_name (file
);
15400 asm_fprintf (file
, ASM_LONG
"%U%s+[.-%s%d]\n",
15401 GOT_SYMBOL_NAME
, LPREFIX
, value
);
15404 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
15408 ix86_expand_clear (rtx dest
)
15412 /* We play register width games, which are only valid after reload. */
15413 gcc_assert (reload_completed
);
15415 /* Avoid HImode and its attendant prefix byte. */
15416 if (GET_MODE_SIZE (GET_MODE (dest
)) < 4)
15417 dest
= gen_rtx_REG (SImode
, REGNO (dest
));
15418 tmp
= gen_rtx_SET (VOIDmode
, dest
, const0_rtx
);
15420 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
15421 if (!TARGET_USE_MOV0
|| optimize_insn_for_speed_p ())
15423 rtx clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
15424 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
15430 /* X is an unchanging MEM. If it is a constant pool reference, return
15431 the constant pool rtx, else NULL. */
15434 maybe_get_pool_constant (rtx x
)
15436 x
= ix86_delegitimize_address (XEXP (x
, 0));
15438 if (GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
15439 return get_pool_constant (x
);
15445 ix86_expand_move (enum machine_mode mode
, rtx operands
[])
15448 enum tls_model model
;
15453 if (GET_CODE (op1
) == SYMBOL_REF
)
15455 model
= SYMBOL_REF_TLS_MODEL (op1
);
15458 op1
= legitimize_tls_address (op1
, model
, true);
15459 op1
= force_operand (op1
, op0
);
15462 if (GET_MODE (op1
) != mode
)
15463 op1
= convert_to_mode (mode
, op1
, 1);
15465 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15466 && SYMBOL_REF_DLLIMPORT_P (op1
))
15467 op1
= legitimize_dllimport_symbol (op1
, false);
15469 else if (GET_CODE (op1
) == CONST
15470 && GET_CODE (XEXP (op1
, 0)) == PLUS
15471 && GET_CODE (XEXP (XEXP (op1
, 0), 0)) == SYMBOL_REF
)
15473 rtx addend
= XEXP (XEXP (op1
, 0), 1);
15474 rtx symbol
= XEXP (XEXP (op1
, 0), 0);
15477 model
= SYMBOL_REF_TLS_MODEL (symbol
);
15479 tmp
= legitimize_tls_address (symbol
, model
, true);
15480 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15481 && SYMBOL_REF_DLLIMPORT_P (symbol
))
15482 tmp
= legitimize_dllimport_symbol (symbol
, true);
15486 tmp
= force_operand (tmp
, NULL
);
15487 tmp
= expand_simple_binop (Pmode
, PLUS
, tmp
, addend
,
15488 op0
, 1, OPTAB_DIRECT
);
15491 if (GET_MODE (tmp
) != mode
)
15492 op1
= convert_to_mode (mode
, tmp
, 1);
15496 if ((flag_pic
|| MACHOPIC_INDIRECT
)
15497 && symbolic_operand (op1
, mode
))
15499 if (TARGET_MACHO
&& !TARGET_64BIT
)
15502 /* dynamic-no-pic */
15503 if (MACHOPIC_INDIRECT
)
15505 rtx temp
= ((reload_in_progress
15506 || ((op0
&& REG_P (op0
))
15508 ? op0
: gen_reg_rtx (Pmode
));
15509 op1
= machopic_indirect_data_reference (op1
, temp
);
15511 op1
= machopic_legitimize_pic_address (op1
, mode
,
15512 temp
== op1
? 0 : temp
);
15514 if (op0
!= op1
&& GET_CODE (op0
) != MEM
)
15516 rtx insn
= gen_rtx_SET (VOIDmode
, op0
, op1
);
15520 if (GET_CODE (op0
) == MEM
)
15521 op1
= force_reg (Pmode
, op1
);
15525 if (GET_CODE (temp
) != REG
)
15526 temp
= gen_reg_rtx (Pmode
);
15527 temp
= legitimize_pic_address (op1
, temp
);
15532 /* dynamic-no-pic */
15538 op1
= force_reg (mode
, op1
);
15539 else if (!(TARGET_64BIT
&& x86_64_movabs_operand (op1
, DImode
)))
15541 rtx reg
= can_create_pseudo_p () ? NULL_RTX
: op0
;
15542 op1
= legitimize_pic_address (op1
, reg
);
15545 if (GET_MODE (op1
) != mode
)
15546 op1
= convert_to_mode (mode
, op1
, 1);
15553 && (PUSH_ROUNDING (GET_MODE_SIZE (mode
)) != GET_MODE_SIZE (mode
)
15554 || !push_operand (op0
, mode
))
15556 op1
= force_reg (mode
, op1
);
15558 if (push_operand (op0
, mode
)
15559 && ! general_no_elim_operand (op1
, mode
))
15560 op1
= copy_to_mode_reg (mode
, op1
);
15562 /* Force large constants in 64bit compilation into register
15563 to get them CSEed. */
15564 if (can_create_pseudo_p ()
15565 && (mode
== DImode
) && TARGET_64BIT
15566 && immediate_operand (op1
, mode
)
15567 && !x86_64_zext_immediate_operand (op1
, VOIDmode
)
15568 && !register_operand (op0
, mode
)
15570 op1
= copy_to_mode_reg (mode
, op1
);
15572 if (can_create_pseudo_p ()
15573 && FLOAT_MODE_P (mode
)
15574 && GET_CODE (op1
) == CONST_DOUBLE
)
15576 /* If we are loading a floating point constant to a register,
15577 force the value to memory now, since we'll get better code
15578 out the back end. */
15580 op1
= validize_mem (force_const_mem (mode
, op1
));
15581 if (!register_operand (op0
, mode
))
15583 rtx temp
= gen_reg_rtx (mode
);
15584 emit_insn (gen_rtx_SET (VOIDmode
, temp
, op1
));
15585 emit_move_insn (op0
, temp
);
15591 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
15595 ix86_expand_vector_move (enum machine_mode mode
, rtx operands
[])
15597 rtx op0
= operands
[0], op1
= operands
[1];
15598 unsigned int align
= GET_MODE_ALIGNMENT (mode
);
15600 /* Force constants other than zero into memory. We do not know how
15601 the instructions used to build constants modify the upper 64 bits
15602 of the register, once we have that information we may be able
15603 to handle some of them more efficiently. */
15604 if (can_create_pseudo_p ()
15605 && register_operand (op0
, mode
)
15606 && (CONSTANT_P (op1
)
15607 || (GET_CODE (op1
) == SUBREG
15608 && CONSTANT_P (SUBREG_REG (op1
))))
15609 && !standard_sse_constant_p (op1
))
15610 op1
= validize_mem (force_const_mem (mode
, op1
));
15612 /* We need to check memory alignment for SSE mode since attribute
15613 can make operands unaligned. */
15614 if (can_create_pseudo_p ()
15615 && SSE_REG_MODE_P (mode
)
15616 && ((MEM_P (op0
) && (MEM_ALIGN (op0
) < align
))
15617 || (MEM_P (op1
) && (MEM_ALIGN (op1
) < align
))))
15621 /* ix86_expand_vector_move_misalign() does not like constants ... */
15622 if (CONSTANT_P (op1
)
15623 || (GET_CODE (op1
) == SUBREG
15624 && CONSTANT_P (SUBREG_REG (op1
))))
15625 op1
= validize_mem (force_const_mem (mode
, op1
));
15627 /* ... nor both arguments in memory. */
15628 if (!register_operand (op0
, mode
)
15629 && !register_operand (op1
, mode
))
15630 op1
= force_reg (mode
, op1
);
15632 tmp
[0] = op0
; tmp
[1] = op1
;
15633 ix86_expand_vector_move_misalign (mode
, tmp
);
15637 /* Make operand1 a register if it isn't already. */
15638 if (can_create_pseudo_p ()
15639 && !register_operand (op0
, mode
)
15640 && !register_operand (op1
, mode
))
15642 emit_move_insn (op0
, force_reg (GET_MODE (op0
), op1
));
15646 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
15649 /* Split 32-byte AVX unaligned load and store if needed. */
15652 ix86_avx256_split_vector_move_misalign (rtx op0
, rtx op1
)
15655 rtx (*extract
) (rtx
, rtx
, rtx
);
15656 rtx (*move_unaligned
) (rtx
, rtx
);
15657 enum machine_mode mode
;
15659 switch (GET_MODE (op0
))
15662 gcc_unreachable ();
15664 extract
= gen_avx_vextractf128v32qi
;
15665 move_unaligned
= gen_avx_movdqu256
;
15669 extract
= gen_avx_vextractf128v8sf
;
15670 move_unaligned
= gen_avx_movups256
;
15674 extract
= gen_avx_vextractf128v4df
;
15675 move_unaligned
= gen_avx_movupd256
;
15680 if (MEM_P (op1
) && TARGET_AVX256_SPLIT_UNALIGNED_LOAD
)
15682 rtx r
= gen_reg_rtx (mode
);
15683 m
= adjust_address (op1
, mode
, 0);
15684 emit_move_insn (r
, m
);
15685 m
= adjust_address (op1
, mode
, 16);
15686 r
= gen_rtx_VEC_CONCAT (GET_MODE (op0
), r
, m
);
15687 emit_move_insn (op0
, r
);
15689 else if (MEM_P (op0
) && TARGET_AVX256_SPLIT_UNALIGNED_STORE
)
15691 m
= adjust_address (op0
, mode
, 0);
15692 emit_insn (extract (m
, op1
, const0_rtx
));
15693 m
= adjust_address (op0
, mode
, 16);
15694 emit_insn (extract (m
, op1
, const1_rtx
));
15697 emit_insn (move_unaligned (op0
, op1
));
15700 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
15701 straight to ix86_expand_vector_move. */
15702 /* Code generation for scalar reg-reg moves of single and double precision data:
15703 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
15707 if (x86_sse_partial_reg_dependency == true)
15712 Code generation for scalar loads of double precision data:
15713 if (x86_sse_split_regs == true)
15714 movlpd mem, reg (gas syntax)
15718 Code generation for unaligned packed loads of single precision data
15719 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
15720 if (x86_sse_unaligned_move_optimal)
15723 if (x86_sse_partial_reg_dependency == true)
15735 Code generation for unaligned packed loads of double precision data
15736 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
15737 if (x86_sse_unaligned_move_optimal)
15740 if (x86_sse_split_regs == true)
15753 ix86_expand_vector_move_misalign (enum machine_mode mode
, rtx operands
[])
15762 switch (GET_MODE_CLASS (mode
))
15764 case MODE_VECTOR_INT
:
15766 switch (GET_MODE_SIZE (mode
))
15769 /* If we're optimizing for size, movups is the smallest. */
15770 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
)
15772 op0
= gen_lowpart (V4SFmode
, op0
);
15773 op1
= gen_lowpart (V4SFmode
, op1
);
15774 emit_insn (gen_sse_movups (op0
, op1
));
15777 op0
= gen_lowpart (V16QImode
, op0
);
15778 op1
= gen_lowpart (V16QImode
, op1
);
15779 emit_insn (gen_sse2_movdqu (op0
, op1
));
15782 op0
= gen_lowpart (V32QImode
, op0
);
15783 op1
= gen_lowpart (V32QImode
, op1
);
15784 ix86_avx256_split_vector_move_misalign (op0
, op1
);
15787 gcc_unreachable ();
15790 case MODE_VECTOR_FLOAT
:
15791 op0
= gen_lowpart (mode
, op0
);
15792 op1
= gen_lowpart (mode
, op1
);
15797 emit_insn (gen_sse_movups (op0
, op1
));
15800 ix86_avx256_split_vector_move_misalign (op0
, op1
);
15803 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
)
15805 op0
= gen_lowpart (V4SFmode
, op0
);
15806 op1
= gen_lowpart (V4SFmode
, op1
);
15807 emit_insn (gen_sse_movups (op0
, op1
));
15810 emit_insn (gen_sse2_movupd (op0
, op1
));
15813 ix86_avx256_split_vector_move_misalign (op0
, op1
);
15816 gcc_unreachable ();
15821 gcc_unreachable ();
15829 /* If we're optimizing for size, movups is the smallest. */
15830 if (optimize_insn_for_size_p ()
15831 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
)
15833 op0
= gen_lowpart (V4SFmode
, op0
);
15834 op1
= gen_lowpart (V4SFmode
, op1
);
15835 emit_insn (gen_sse_movups (op0
, op1
));
15839 /* ??? If we have typed data, then it would appear that using
15840 movdqu is the only way to get unaligned data loaded with
15842 if (TARGET_SSE2
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
15844 op0
= gen_lowpart (V16QImode
, op0
);
15845 op1
= gen_lowpart (V16QImode
, op1
);
15846 emit_insn (gen_sse2_movdqu (op0
, op1
));
15850 if (TARGET_SSE2
&& mode
== V2DFmode
)
15854 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
)
15856 op0
= gen_lowpart (V2DFmode
, op0
);
15857 op1
= gen_lowpart (V2DFmode
, op1
);
15858 emit_insn (gen_sse2_movupd (op0
, op1
));
15862 /* When SSE registers are split into halves, we can avoid
15863 writing to the top half twice. */
15864 if (TARGET_SSE_SPLIT_REGS
)
15866 emit_clobber (op0
);
15871 /* ??? Not sure about the best option for the Intel chips.
15872 The following would seem to satisfy; the register is
15873 entirely cleared, breaking the dependency chain. We
15874 then store to the upper half, with a dependency depth
15875 of one. A rumor has it that Intel recommends two movsd
15876 followed by an unpacklpd, but this is unconfirmed. And
15877 given that the dependency depth of the unpacklpd would
15878 still be one, I'm not sure why this would be better. */
15879 zero
= CONST0_RTX (V2DFmode
);
15882 m
= adjust_address (op1
, DFmode
, 0);
15883 emit_insn (gen_sse2_loadlpd (op0
, zero
, m
));
15884 m
= adjust_address (op1
, DFmode
, 8);
15885 emit_insn (gen_sse2_loadhpd (op0
, op0
, m
));
15889 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
)
15891 op0
= gen_lowpart (V4SFmode
, op0
);
15892 op1
= gen_lowpart (V4SFmode
, op1
);
15893 emit_insn (gen_sse_movups (op0
, op1
));
15897 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY
)
15898 emit_move_insn (op0
, CONST0_RTX (mode
));
15900 emit_clobber (op0
);
15902 if (mode
!= V4SFmode
)
15903 op0
= gen_lowpart (V4SFmode
, op0
);
15904 m
= adjust_address (op1
, V2SFmode
, 0);
15905 emit_insn (gen_sse_loadlps (op0
, op0
, m
));
15906 m
= adjust_address (op1
, V2SFmode
, 8);
15907 emit_insn (gen_sse_loadhps (op0
, op0
, m
));
15910 else if (MEM_P (op0
))
15912 /* If we're optimizing for size, movups is the smallest. */
15913 if (optimize_insn_for_size_p ()
15914 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
)
15916 op0
= gen_lowpart (V4SFmode
, op0
);
15917 op1
= gen_lowpart (V4SFmode
, op1
);
15918 emit_insn (gen_sse_movups (op0
, op1
));
15922 /* ??? Similar to above, only less clear because of quote
15923 typeless stores unquote. */
15924 if (TARGET_SSE2
&& !TARGET_SSE_TYPELESS_STORES
15925 && GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
15927 op0
= gen_lowpart (V16QImode
, op0
);
15928 op1
= gen_lowpart (V16QImode
, op1
);
15929 emit_insn (gen_sse2_movdqu (op0
, op1
));
15933 if (TARGET_SSE2
&& mode
== V2DFmode
)
15935 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL
)
15937 op0
= gen_lowpart (V2DFmode
, op0
);
15938 op1
= gen_lowpart (V2DFmode
, op1
);
15939 emit_insn (gen_sse2_movupd (op0
, op1
));
15943 m
= adjust_address (op0
, DFmode
, 0);
15944 emit_insn (gen_sse2_storelpd (m
, op1
));
15945 m
= adjust_address (op0
, DFmode
, 8);
15946 emit_insn (gen_sse2_storehpd (m
, op1
));
15951 if (mode
!= V4SFmode
)
15952 op1
= gen_lowpart (V4SFmode
, op1
);
15954 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL
)
15956 op0
= gen_lowpart (V4SFmode
, op0
);
15957 emit_insn (gen_sse_movups (op0
, op1
));
15961 m
= adjust_address (op0
, V2SFmode
, 0);
15962 emit_insn (gen_sse_storelps (m
, op1
));
15963 m
= adjust_address (op0
, V2SFmode
, 8);
15964 emit_insn (gen_sse_storehps (m
, op1
));
15969 gcc_unreachable ();
15972 /* Expand a push in MODE. This is some mode for which we do not support
15973 proper push instructions, at least from the registers that we expect
15974 the value to live in. */
15977 ix86_expand_push (enum machine_mode mode
, rtx x
)
15981 tmp
= expand_simple_binop (Pmode
, PLUS
, stack_pointer_rtx
,
15982 GEN_INT (-GET_MODE_SIZE (mode
)),
15983 stack_pointer_rtx
, 1, OPTAB_DIRECT
);
15984 if (tmp
!= stack_pointer_rtx
)
15985 emit_move_insn (stack_pointer_rtx
, tmp
);
15987 tmp
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
15989 /* When we push an operand onto stack, it has to be aligned at least
15990 at the function argument boundary. However since we don't have
15991 the argument type, we can't determine the actual argument
15993 emit_move_insn (tmp
, x
);
15996 /* Helper function of ix86_fixup_binary_operands to canonicalize
15997 operand order. Returns true if the operands should be swapped. */
16000 ix86_swap_binary_operands_p (enum rtx_code code
, enum machine_mode mode
,
16003 rtx dst
= operands
[0];
16004 rtx src1
= operands
[1];
16005 rtx src2
= operands
[2];
16007 /* If the operation is not commutative, we can't do anything. */
16008 if (GET_RTX_CLASS (code
) != RTX_COMM_ARITH
)
16011 /* Highest priority is that src1 should match dst. */
16012 if (rtx_equal_p (dst
, src1
))
16014 if (rtx_equal_p (dst
, src2
))
16017 /* Next highest priority is that immediate constants come second. */
16018 if (immediate_operand (src2
, mode
))
16020 if (immediate_operand (src1
, mode
))
16023 /* Lowest priority is that memory references should come second. */
16033 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
16034 destination to use for the operation. If different from the true
16035 destination in operands[0], a copy operation will be required. */
16038 ix86_fixup_binary_operands (enum rtx_code code
, enum machine_mode mode
,
16041 rtx dst
= operands
[0];
16042 rtx src1
= operands
[1];
16043 rtx src2
= operands
[2];
16045 /* Canonicalize operand order. */
16046 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
16050 /* It is invalid to swap operands of different modes. */
16051 gcc_assert (GET_MODE (src1
) == GET_MODE (src2
));
16058 /* Both source operands cannot be in memory. */
16059 if (MEM_P (src1
) && MEM_P (src2
))
16061 /* Optimization: Only read from memory once. */
16062 if (rtx_equal_p (src1
, src2
))
16064 src2
= force_reg (mode
, src2
);
16068 src2
= force_reg (mode
, src2
);
16071 /* If the destination is memory, and we do not have matching source
16072 operands, do things in registers. */
16073 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
16074 dst
= gen_reg_rtx (mode
);
16076 /* Source 1 cannot be a constant. */
16077 if (CONSTANT_P (src1
))
16078 src1
= force_reg (mode
, src1
);
16080 /* Source 1 cannot be a non-matching memory. */
16081 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
16082 src1
= force_reg (mode
, src1
);
16084 /* Improve address combine. */
16086 && GET_MODE_CLASS (mode
) == MODE_INT
16088 src2
= force_reg (mode
, src2
);
16090 operands
[1] = src1
;
16091 operands
[2] = src2
;
16095 /* Similarly, but assume that the destination has already been
16096 set up properly. */
16099 ix86_fixup_binary_operands_no_copy (enum rtx_code code
,
16100 enum machine_mode mode
, rtx operands
[])
16102 rtx dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
16103 gcc_assert (dst
== operands
[0]);
16106 /* Attempt to expand a binary operator. Make the expansion closer to the
16107 actual machine, then just general_operand, which will allow 3 separate
16108 memory references (one output, two input) in a single insn. */
16111 ix86_expand_binary_operator (enum rtx_code code
, enum machine_mode mode
,
16114 rtx src1
, src2
, dst
, op
, clob
;
16116 dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
16117 src1
= operands
[1];
16118 src2
= operands
[2];
16120 /* Emit the instruction. */
16122 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
16123 if (reload_in_progress
)
16125 /* Reload doesn't know about the flags register, and doesn't know that
16126 it doesn't want to clobber it. We can only do this with PLUS. */
16127 gcc_assert (code
== PLUS
);
16130 else if (reload_completed
16132 && !rtx_equal_p (dst
, src1
))
16134 /* This is going to be an LEA; avoid splitting it later. */
16139 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
16140 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
16143 /* Fix up the destination if needed. */
16144 if (dst
!= operands
[0])
16145 emit_move_insn (operands
[0], dst
);
16148 /* Return TRUE or FALSE depending on whether the binary operator meets the
16149 appropriate constraints. */
16152 ix86_binary_operator_ok (enum rtx_code code
, enum machine_mode mode
,
16155 rtx dst
= operands
[0];
16156 rtx src1
= operands
[1];
16157 rtx src2
= operands
[2];
16159 /* Both source operands cannot be in memory. */
16160 if (MEM_P (src1
) && MEM_P (src2
))
16163 /* Canonicalize operand order for commutative operators. */
16164 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
16171 /* If the destination is memory, we must have a matching source operand. */
16172 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
16175 /* Source 1 cannot be a constant. */
16176 if (CONSTANT_P (src1
))
16179 /* Source 1 cannot be a non-matching memory. */
16180 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
16181 /* Support "andhi/andsi/anddi" as a zero-extending move. */
16182 return (code
== AND
16185 || (TARGET_64BIT
&& mode
== DImode
))
16186 && satisfies_constraint_L (src2
));
16191 /* Attempt to expand a unary operator. Make the expansion closer to the
16192 actual machine, then just general_operand, which will allow 2 separate
16193 memory references (one output, one input) in a single insn. */
16196 ix86_expand_unary_operator (enum rtx_code code
, enum machine_mode mode
,
16199 int matching_memory
;
16200 rtx src
, dst
, op
, clob
;
16205 /* If the destination is memory, and we do not have matching source
16206 operands, do things in registers. */
16207 matching_memory
= 0;
16210 if (rtx_equal_p (dst
, src
))
16211 matching_memory
= 1;
16213 dst
= gen_reg_rtx (mode
);
16216 /* When source operand is memory, destination must match. */
16217 if (MEM_P (src
) && !matching_memory
)
16218 src
= force_reg (mode
, src
);
16220 /* Emit the instruction. */
16222 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_e (code
, mode
, src
));
16223 if (reload_in_progress
|| code
== NOT
)
16225 /* Reload doesn't know about the flags register, and doesn't know that
16226 it doesn't want to clobber it. */
16227 gcc_assert (code
== NOT
);
16232 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
16233 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
16236 /* Fix up the destination if needed. */
16237 if (dst
!= operands
[0])
16238 emit_move_insn (operands
[0], dst
);
16241 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
16242 divisor are within the range [0-255]. */
16245 ix86_split_idivmod (enum machine_mode mode
, rtx operands
[],
16248 rtx end_label
, qimode_label
;
16249 rtx insn
, div
, mod
;
16250 rtx scratch
, tmp0
, tmp1
, tmp2
;
16251 rtx (*gen_divmod4_1
) (rtx
, rtx
, rtx
, rtx
);
16252 rtx (*gen_zero_extend
) (rtx
, rtx
);
16253 rtx (*gen_test_ccno_1
) (rtx
, rtx
);
16258 gen_divmod4_1
= signed_p
? gen_divmodsi4_1
: gen_udivmodsi4_1
;
16259 gen_test_ccno_1
= gen_testsi_ccno_1
;
16260 gen_zero_extend
= gen_zero_extendqisi2
;
16263 gen_divmod4_1
= signed_p
? gen_divmoddi4_1
: gen_udivmoddi4_1
;
16264 gen_test_ccno_1
= gen_testdi_ccno_1
;
16265 gen_zero_extend
= gen_zero_extendqidi2
;
16268 gcc_unreachable ();
16271 end_label
= gen_label_rtx ();
16272 qimode_label
= gen_label_rtx ();
16274 scratch
= gen_reg_rtx (mode
);
16276 /* Use 8bit unsigned divimod if dividend and divisor are within
16277 the range [0-255]. */
16278 emit_move_insn (scratch
, operands
[2]);
16279 scratch
= expand_simple_binop (mode
, IOR
, scratch
, operands
[3],
16280 scratch
, 1, OPTAB_DIRECT
);
16281 emit_insn (gen_test_ccno_1 (scratch
, GEN_INT (-0x100)));
16282 tmp0
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
16283 tmp0
= gen_rtx_EQ (VOIDmode
, tmp0
, const0_rtx
);
16284 tmp0
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp0
,
16285 gen_rtx_LABEL_REF (VOIDmode
, qimode_label
),
16287 insn
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp0
));
16288 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
16289 JUMP_LABEL (insn
) = qimode_label
;
16291 /* Generate original signed/unsigned divimod. */
16292 div
= gen_divmod4_1 (operands
[0], operands
[1],
16293 operands
[2], operands
[3]);
16296 /* Branch to the end. */
16297 emit_jump_insn (gen_jump (end_label
));
16300 /* Generate 8bit unsigned divide. */
16301 emit_label (qimode_label
);
16302 /* Don't use operands[0] for result of 8bit divide since not all
16303 registers support QImode ZERO_EXTRACT. */
16304 tmp0
= simplify_gen_subreg (HImode
, scratch
, mode
, 0);
16305 tmp1
= simplify_gen_subreg (HImode
, operands
[2], mode
, 0);
16306 tmp2
= simplify_gen_subreg (QImode
, operands
[3], mode
, 0);
16307 emit_insn (gen_udivmodhiqi3 (tmp0
, tmp1
, tmp2
));
16311 div
= gen_rtx_DIV (SImode
, operands
[2], operands
[3]);
16312 mod
= gen_rtx_MOD (SImode
, operands
[2], operands
[3]);
16316 div
= gen_rtx_UDIV (SImode
, operands
[2], operands
[3]);
16317 mod
= gen_rtx_UMOD (SImode
, operands
[2], operands
[3]);
16320 /* Extract remainder from AH. */
16321 tmp1
= gen_rtx_ZERO_EXTRACT (mode
, tmp0
, GEN_INT (8), GEN_INT (8));
16322 if (REG_P (operands
[1]))
16323 insn
= emit_move_insn (operands
[1], tmp1
);
16326 /* Need a new scratch register since the old one has result
16328 scratch
= gen_reg_rtx (mode
);
16329 emit_move_insn (scratch
, tmp1
);
16330 insn
= emit_move_insn (operands
[1], scratch
);
16332 set_unique_reg_note (insn
, REG_EQUAL
, mod
);
16334 /* Zero extend quotient from AL. */
16335 tmp1
= gen_lowpart (QImode
, tmp0
);
16336 insn
= emit_insn (gen_zero_extend (operands
[0], tmp1
));
16337 set_unique_reg_note (insn
, REG_EQUAL
, div
);
16339 emit_label (end_label
);
16342 #define LEA_MAX_STALL (3)
16343 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
16345 /* Increase given DISTANCE in half-cycles according to
16346 dependencies between PREV and NEXT instructions.
16347 Add 1 half-cycle if there is no dependency and
16348 go to next cycle if there is some dependecy. */
16350 static unsigned int
16351 increase_distance (rtx prev
, rtx next
, unsigned int distance
)
16356 if (!prev
|| !next
)
16357 return distance
+ (distance
& 1) + 2;
16359 if (!DF_INSN_USES (next
) || !DF_INSN_DEFS (prev
))
16360 return distance
+ 1;
16362 for (use_rec
= DF_INSN_USES (next
); *use_rec
; use_rec
++)
16363 for (def_rec
= DF_INSN_DEFS (prev
); *def_rec
; def_rec
++)
16364 if (!DF_REF_IS_ARTIFICIAL (*def_rec
)
16365 && DF_REF_REGNO (*use_rec
) == DF_REF_REGNO (*def_rec
))
16366 return distance
+ (distance
& 1) + 2;
16368 return distance
+ 1;
16371 /* Function checks if instruction INSN defines register number
16372 REGNO1 or REGNO2. */
16375 insn_defines_reg (unsigned int regno1
, unsigned int regno2
,
16380 for (def_rec
= DF_INSN_DEFS (insn
); *def_rec
; def_rec
++)
16381 if (DF_REF_REG_DEF_P (*def_rec
)
16382 && !DF_REF_IS_ARTIFICIAL (*def_rec
)
16383 && (regno1
== DF_REF_REGNO (*def_rec
)
16384 || regno2
== DF_REF_REGNO (*def_rec
)))
16392 /* Function checks if instruction INSN uses register number
16393 REGNO as a part of address expression. */
16396 insn_uses_reg_mem (unsigned int regno
, rtx insn
)
16400 for (use_rec
= DF_INSN_USES (insn
); *use_rec
; use_rec
++)
16401 if (DF_REF_REG_MEM_P (*use_rec
) && regno
== DF_REF_REGNO (*use_rec
))
16407 /* Search backward for non-agu definition of register number REGNO1
16408 or register number REGNO2 in basic block starting from instruction
16409 START up to head of basic block or instruction INSN.
16411 Function puts true value into *FOUND var if definition was found
16412 and false otherwise.
16414 Distance in half-cycles between START and found instruction or head
16415 of BB is added to DISTANCE and returned. */
16418 distance_non_agu_define_in_bb (unsigned int regno1
, unsigned int regno2
,
16419 rtx insn
, int distance
,
16420 rtx start
, bool *found
)
16422 basic_block bb
= start
? BLOCK_FOR_INSN (start
) : NULL
;
16430 && distance
< LEA_SEARCH_THRESHOLD
)
16432 if (NONDEBUG_INSN_P (prev
) && NONJUMP_INSN_P (prev
))
16434 distance
= increase_distance (prev
, next
, distance
);
16435 if (insn_defines_reg (regno1
, regno2
, prev
))
16437 if (recog_memoized (prev
) < 0
16438 || get_attr_type (prev
) != TYPE_LEA
)
16447 if (prev
== BB_HEAD (bb
))
16450 prev
= PREV_INSN (prev
);
16456 /* Search backward for non-agu definition of register number REGNO1
16457 or register number REGNO2 in INSN's basic block until
16458 1. Pass LEA_SEARCH_THRESHOLD instructions, or
16459 2. Reach neighbour BBs boundary, or
16460 3. Reach agu definition.
16461 Returns the distance between the non-agu definition point and INSN.
16462 If no definition point, returns -1. */
16465 distance_non_agu_define (unsigned int regno1
, unsigned int regno2
,
16468 basic_block bb
= BLOCK_FOR_INSN (insn
);
16470 bool found
= false;
16472 if (insn
!= BB_HEAD (bb
))
16473 distance
= distance_non_agu_define_in_bb (regno1
, regno2
, insn
,
16474 distance
, PREV_INSN (insn
),
16477 if (!found
&& distance
< LEA_SEARCH_THRESHOLD
)
16481 bool simple_loop
= false;
16483 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
16486 simple_loop
= true;
16491 distance
= distance_non_agu_define_in_bb (regno1
, regno2
,
16493 BB_END (bb
), &found
);
16496 int shortest_dist
= -1;
16497 bool found_in_bb
= false;
16499 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
16502 = distance_non_agu_define_in_bb (regno1
, regno2
,
16508 if (shortest_dist
< 0)
16509 shortest_dist
= bb_dist
;
16510 else if (bb_dist
> 0)
16511 shortest_dist
= MIN (bb_dist
, shortest_dist
);
16517 distance
= shortest_dist
;
16521 /* get_attr_type may modify recog data. We want to make sure
16522 that recog data is valid for instruction INSN, on which
16523 distance_non_agu_define is called. INSN is unchanged here. */
16524 extract_insn_cached (insn
);
16529 return distance
>> 1;
16532 /* Return the distance in half-cycles between INSN and the next
16533 insn that uses register number REGNO in memory address added
16534 to DISTANCE. Return -1 if REGNO0 is set.
16536 Put true value into *FOUND if register usage was found and
16538 Put true value into *REDEFINED if register redefinition was
16539 found and false otherwise. */
16542 distance_agu_use_in_bb (unsigned int regno
,
16543 rtx insn
, int distance
, rtx start
,
16544 bool *found
, bool *redefined
)
16546 basic_block bb
= start
? BLOCK_FOR_INSN (start
) : NULL
;
16551 *redefined
= false;
16555 && distance
< LEA_SEARCH_THRESHOLD
)
16557 if (NONDEBUG_INSN_P (next
) && NONJUMP_INSN_P (next
))
16559 distance
= increase_distance(prev
, next
, distance
);
16560 if (insn_uses_reg_mem (regno
, next
))
16562 /* Return DISTANCE if OP0 is used in memory
16563 address in NEXT. */
16568 if (insn_defines_reg (regno
, INVALID_REGNUM
, next
))
16570 /* Return -1 if OP0 is set in NEXT. */
16578 if (next
== BB_END (bb
))
16581 next
= NEXT_INSN (next
);
16587 /* Return the distance between INSN and the next insn that uses
16588 register number REGNO0 in memory address. Return -1 if no such
16589 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
16592 distance_agu_use (unsigned int regno0
, rtx insn
)
16594 basic_block bb
= BLOCK_FOR_INSN (insn
);
16596 bool found
= false;
16597 bool redefined
= false;
16599 if (insn
!= BB_END (bb
))
16600 distance
= distance_agu_use_in_bb (regno0
, insn
, distance
,
16602 &found
, &redefined
);
16604 if (!found
&& !redefined
&& distance
< LEA_SEARCH_THRESHOLD
)
16608 bool simple_loop
= false;
16610 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
16613 simple_loop
= true;
16618 distance
= distance_agu_use_in_bb (regno0
, insn
,
16619 distance
, BB_HEAD (bb
),
16620 &found
, &redefined
);
16623 int shortest_dist
= -1;
16624 bool found_in_bb
= false;
16625 bool redefined_in_bb
= false;
16627 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
16630 = distance_agu_use_in_bb (regno0
, insn
,
16631 distance
, BB_HEAD (e
->dest
),
16632 &found_in_bb
, &redefined_in_bb
);
16635 if (shortest_dist
< 0)
16636 shortest_dist
= bb_dist
;
16637 else if (bb_dist
> 0)
16638 shortest_dist
= MIN (bb_dist
, shortest_dist
);
16644 distance
= shortest_dist
;
16648 if (!found
|| redefined
)
16651 return distance
>> 1;
16654 /* Define this macro to tune LEA priority vs ADD, it take effect when
16655 there is a dilemma of choicing LEA or ADD
16656 Negative value: ADD is more preferred than LEA
16658 Positive value: LEA is more preferred than ADD*/
16659 #define IX86_LEA_PRIORITY 0
16661 /* Return true if usage of lea INSN has performance advantage
16662 over a sequence of instructions. Instructions sequence has
16663 SPLIT_COST cycles higher latency than lea latency. */
16666 ix86_lea_outperforms (rtx insn
, unsigned int regno0
, unsigned int regno1
,
16667 unsigned int regno2
, unsigned int split_cost
)
16669 int dist_define
, dist_use
;
16671 dist_define
= distance_non_agu_define (regno1
, regno2
, insn
);
16672 dist_use
= distance_agu_use (regno0
, insn
);
16674 if (dist_define
< 0 || dist_define
>= LEA_MAX_STALL
)
16676 /* If there is no non AGU operand definition, no AGU
16677 operand usage and split cost is 0 then both lea
16678 and non lea variants have same priority. Currently
16679 we prefer lea for 64 bit code and non lea on 32 bit
16681 if (dist_use
< 0 && split_cost
== 0)
16682 return TARGET_64BIT
|| IX86_LEA_PRIORITY
;
16687 /* With longer definitions distance lea is more preferable.
16688 Here we change it to take into account splitting cost and
16690 dist_define
+= split_cost
+ IX86_LEA_PRIORITY
;
16692 /* If there is no use in memory addess then we just check
16693 that split cost does not exceed AGU stall. */
16695 return dist_define
>= LEA_MAX_STALL
;
16697 /* If this insn has both backward non-agu dependence and forward
16698 agu dependence, the one with short distance takes effect. */
16699 return dist_define
>= dist_use
;
16702 /* Return true if it is legal to clobber flags by INSN and
16703 false otherwise. */
16706 ix86_ok_to_clobber_flags (rtx insn
)
16708 basic_block bb
= BLOCK_FOR_INSN (insn
);
16714 if (NONDEBUG_INSN_P (insn
))
16716 for (use
= DF_INSN_USES (insn
); *use
; use
++)
16717 if (DF_REF_REG_USE_P (*use
) && DF_REF_REGNO (*use
) == FLAGS_REG
)
16720 if (insn_defines_reg (FLAGS_REG
, INVALID_REGNUM
, insn
))
16724 if (insn
== BB_END (bb
))
16727 insn
= NEXT_INSN (insn
);
16730 live
= df_get_live_out(bb
);
16731 return !REGNO_REG_SET_P (live
, FLAGS_REG
);
16734 /* Return true if we need to split op0 = op1 + op2 into a sequence of
16735 move and add to avoid AGU stalls. */
16738 ix86_avoid_lea_for_add (rtx insn
, rtx operands
[])
16740 unsigned int regno0
= true_regnum (operands
[0]);
16741 unsigned int regno1
= true_regnum (operands
[1]);
16742 unsigned int regno2
= true_regnum (operands
[2]);
16744 /* Check if we need to optimize. */
16745 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
16748 /* Check it is correct to split here. */
16749 if (!ix86_ok_to_clobber_flags(insn
))
16752 /* We need to split only adds with non destructive
16753 destination operand. */
16754 if (regno0
== regno1
|| regno0
== regno2
)
16757 return !ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, 1);
16760 /* Return true if we should emit lea instruction instead of mov
16764 ix86_use_lea_for_mov (rtx insn
, rtx operands
[])
16766 unsigned int regno0
;
16767 unsigned int regno1
;
16769 /* Check if we need to optimize. */
16770 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
16773 /* Use lea for reg to reg moves only. */
16774 if (!REG_P (operands
[0]) || !REG_P (operands
[1]))
16777 regno0
= true_regnum (operands
[0]);
16778 regno1
= true_regnum (operands
[1]);
16780 return ix86_lea_outperforms (insn
, regno0
, regno1
, -1, 0);
16783 /* Return true if we need to split lea into a sequence of
16784 instructions to avoid AGU stalls. */
16787 ix86_avoid_lea_for_addr (rtx insn
, rtx operands
[])
16789 unsigned int regno0
= true_regnum (operands
[0]) ;
16790 unsigned int regno1
= -1;
16791 unsigned int regno2
= -1;
16792 unsigned int split_cost
= 0;
16793 struct ix86_address parts
;
16796 /* Check we need to optimize. */
16797 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
16800 /* Check it is correct to split here. */
16801 if (!ix86_ok_to_clobber_flags(insn
))
16804 ok
= ix86_decompose_address (operands
[1], &parts
);
16807 /* We should not split into add if non legitimate pic
16808 operand is used as displacement. */
16809 if (parts
.disp
&& flag_pic
&& !LEGITIMATE_PIC_OPERAND_P (parts
.disp
))
16813 regno1
= true_regnum (parts
.base
);
16815 regno2
= true_regnum (parts
.index
);
16817 /* Compute how many cycles we will add to execution time
16818 if split lea into a sequence of instructions. */
16819 if (parts
.base
|| parts
.index
)
16821 /* Have to use mov instruction if non desctructive
16822 destination form is used. */
16823 if (regno1
!= regno0
&& regno2
!= regno0
)
16826 /* Have to add index to base if both exist. */
16827 if (parts
.base
&& parts
.index
)
16830 /* Have to use shift and adds if scale is 2 or greater. */
16831 if (parts
.scale
> 1)
16833 if (regno0
!= regno1
)
16835 else if (regno2
== regno0
)
16838 split_cost
+= parts
.scale
;
16841 /* Have to use add instruction with immediate if
16842 disp is non zero. */
16843 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
16846 /* Subtract the price of lea. */
16850 return !ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, split_cost
);
16853 /* Emit x86 binary operand CODE in mode MODE, where the first operand
16854 matches destination. RTX includes clobber of FLAGS_REG. */
16857 ix86_emit_binop (enum rtx_code code
, enum machine_mode mode
,
16862 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, dst
, src
));
16863 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
16865 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
16868 /* Split lea instructions into a sequence of instructions
16869 which are executed on ALU to avoid AGU stalls.
16870 It is assumed that it is allowed to clobber flags register
16871 at lea position. */
16874 ix86_split_lea_for_addr (rtx operands
[], enum machine_mode mode
)
16876 unsigned int regno0
= true_regnum (operands
[0]) ;
16877 unsigned int regno1
= INVALID_REGNUM
;
16878 unsigned int regno2
= INVALID_REGNUM
;
16879 struct ix86_address parts
;
16883 ok
= ix86_decompose_address (operands
[1], &parts
);
16888 if (GET_MODE (parts
.base
) != mode
)
16889 parts
.base
= gen_rtx_SUBREG (mode
, parts
.base
, 0);
16890 regno1
= true_regnum (parts
.base
);
16895 if (GET_MODE (parts
.index
) != mode
)
16896 parts
.index
= gen_rtx_SUBREG (mode
, parts
.index
, 0);
16897 regno2
= true_regnum (parts
.index
);
16900 if (parts
.scale
> 1)
16902 /* Case r1 = r1 + ... */
16903 if (regno1
== regno0
)
16905 /* If we have a case r1 = r1 + C * r1 then we
16906 should use multiplication which is very
16907 expensive. Assume cost model is wrong if we
16908 have such case here. */
16909 gcc_assert (regno2
!= regno0
);
16911 for (adds
= parts
.scale
; adds
> 0; adds
--)
16912 ix86_emit_binop (PLUS
, mode
, operands
[0], parts
.index
);
16916 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
16917 if (regno0
!= regno2
)
16918 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0], parts
.index
));
16920 /* Use shift for scaling. */
16921 ix86_emit_binop (ASHIFT
, mode
, operands
[0],
16922 GEN_INT (exact_log2 (parts
.scale
)));
16925 ix86_emit_binop (PLUS
, mode
, operands
[0], parts
.base
);
16927 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
16928 ix86_emit_binop (PLUS
, mode
, operands
[0], parts
.disp
);
16931 else if (!parts
.base
&& !parts
.index
)
16933 gcc_assert(parts
.disp
);
16934 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0], parts
.disp
));
16940 if (regno0
!= regno2
)
16941 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0], parts
.index
));
16943 else if (!parts
.index
)
16945 if (regno0
!= regno1
)
16946 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0], parts
.base
));
16950 if (regno0
== regno1
)
16952 else if (regno0
== regno2
)
16956 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0], parts
.base
));
16960 ix86_emit_binop (PLUS
, mode
, operands
[0], tmp
);
16963 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
16964 ix86_emit_binop (PLUS
, mode
, operands
[0], parts
.disp
);
16968 /* Return true if it is ok to optimize an ADD operation to LEA
16969 operation to avoid flag register consumation. For most processors,
16970 ADD is faster than LEA. For the processors like ATOM, if the
16971 destination register of LEA holds an actual address which will be
16972 used soon, LEA is better and otherwise ADD is better. */
16975 ix86_lea_for_add_ok (rtx insn
, rtx operands
[])
16977 unsigned int regno0
= true_regnum (operands
[0]);
16978 unsigned int regno1
= true_regnum (operands
[1]);
16979 unsigned int regno2
= true_regnum (operands
[2]);
16981 /* If a = b + c, (a!=b && a!=c), must use lea form. */
16982 if (regno0
!= regno1
&& regno0
!= regno2
)
16985 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
16988 return ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, 0);
16991 /* Return true if destination reg of SET_BODY is shift count of
16995 ix86_dep_by_shift_count_body (const_rtx set_body
, const_rtx use_body
)
17001 /* Retrieve destination of SET_BODY. */
17002 switch (GET_CODE (set_body
))
17005 set_dest
= SET_DEST (set_body
);
17006 if (!set_dest
|| !REG_P (set_dest
))
17010 for (i
= XVECLEN (set_body
, 0) - 1; i
>= 0; i
--)
17011 if (ix86_dep_by_shift_count_body (XVECEXP (set_body
, 0, i
),
17019 /* Retrieve shift count of USE_BODY. */
17020 switch (GET_CODE (use_body
))
17023 shift_rtx
= XEXP (use_body
, 1);
17026 for (i
= XVECLEN (use_body
, 0) - 1; i
>= 0; i
--)
17027 if (ix86_dep_by_shift_count_body (set_body
,
17028 XVECEXP (use_body
, 0, i
)))
17036 && (GET_CODE (shift_rtx
) == ASHIFT
17037 || GET_CODE (shift_rtx
) == LSHIFTRT
17038 || GET_CODE (shift_rtx
) == ASHIFTRT
17039 || GET_CODE (shift_rtx
) == ROTATE
17040 || GET_CODE (shift_rtx
) == ROTATERT
))
17042 rtx shift_count
= XEXP (shift_rtx
, 1);
17044 /* Return true if shift count is dest of SET_BODY. */
17045 if (REG_P (shift_count
)
17046 && true_regnum (set_dest
) == true_regnum (shift_count
))
17053 /* Return true if destination reg of SET_INSN is shift count of
17057 ix86_dep_by_shift_count (const_rtx set_insn
, const_rtx use_insn
)
17059 return ix86_dep_by_shift_count_body (PATTERN (set_insn
),
17060 PATTERN (use_insn
));
17063 /* Return TRUE or FALSE depending on whether the unary operator meets the
17064 appropriate constraints. */
17067 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED
,
17068 enum machine_mode mode ATTRIBUTE_UNUSED
,
17069 rtx operands
[2] ATTRIBUTE_UNUSED
)
17071 /* If one of operands is memory, source and destination must match. */
17072 if ((MEM_P (operands
[0])
17073 || MEM_P (operands
[1]))
17074 && ! rtx_equal_p (operands
[0], operands
[1]))
17079 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
17080 are ok, keeping in mind the possible movddup alternative. */
17083 ix86_vec_interleave_v2df_operator_ok (rtx operands
[3], bool high
)
17085 if (MEM_P (operands
[0]))
17086 return rtx_equal_p (operands
[0], operands
[1 + high
]);
17087 if (MEM_P (operands
[1]) && MEM_P (operands
[2]))
17088 return TARGET_SSE3
&& rtx_equal_p (operands
[1], operands
[2]);
17092 /* Post-reload splitter for converting an SF or DFmode value in an
17093 SSE register into an unsigned SImode. */
17096 ix86_split_convert_uns_si_sse (rtx operands
[])
17098 enum machine_mode vecmode
;
17099 rtx value
, large
, zero_or_two31
, input
, two31
, x
;
17101 large
= operands
[1];
17102 zero_or_two31
= operands
[2];
17103 input
= operands
[3];
17104 two31
= operands
[4];
17105 vecmode
= GET_MODE (large
);
17106 value
= gen_rtx_REG (vecmode
, REGNO (operands
[0]));
17108 /* Load up the value into the low element. We must ensure that the other
17109 elements are valid floats -- zero is the easiest such value. */
17112 if (vecmode
== V4SFmode
)
17113 emit_insn (gen_vec_setv4sf_0 (value
, CONST0_RTX (V4SFmode
), input
));
17115 emit_insn (gen_sse2_loadlpd (value
, CONST0_RTX (V2DFmode
), input
));
17119 input
= gen_rtx_REG (vecmode
, REGNO (input
));
17120 emit_move_insn (value
, CONST0_RTX (vecmode
));
17121 if (vecmode
== V4SFmode
)
17122 emit_insn (gen_sse_movss (value
, value
, input
));
17124 emit_insn (gen_sse2_movsd (value
, value
, input
));
17127 emit_move_insn (large
, two31
);
17128 emit_move_insn (zero_or_two31
, MEM_P (two31
) ? large
: two31
);
17130 x
= gen_rtx_fmt_ee (LE
, vecmode
, large
, value
);
17131 emit_insn (gen_rtx_SET (VOIDmode
, large
, x
));
17133 x
= gen_rtx_AND (vecmode
, zero_or_two31
, large
);
17134 emit_insn (gen_rtx_SET (VOIDmode
, zero_or_two31
, x
));
17136 x
= gen_rtx_MINUS (vecmode
, value
, zero_or_two31
);
17137 emit_insn (gen_rtx_SET (VOIDmode
, value
, x
));
17139 large
= gen_rtx_REG (V4SImode
, REGNO (large
));
17140 emit_insn (gen_ashlv4si3 (large
, large
, GEN_INT (31)));
17142 x
= gen_rtx_REG (V4SImode
, REGNO (value
));
17143 if (vecmode
== V4SFmode
)
17144 emit_insn (gen_fix_truncv4sfv4si2 (x
, value
));
17146 emit_insn (gen_sse2_cvttpd2dq (x
, value
));
17149 emit_insn (gen_xorv4si3 (value
, value
, large
));
17152 /* Convert an unsigned DImode value into a DFmode, using only SSE.
17153 Expects the 64-bit DImode to be supplied in a pair of integral
17154 registers. Requires SSE2; will use SSE3 if available. For x86_32,
17155 -mfpmath=sse, !optimize_size only. */
17158 ix86_expand_convert_uns_didf_sse (rtx target
, rtx input
)
17160 REAL_VALUE_TYPE bias_lo_rvt
, bias_hi_rvt
;
17161 rtx int_xmm
, fp_xmm
;
17162 rtx biases
, exponents
;
17165 int_xmm
= gen_reg_rtx (V4SImode
);
17166 if (TARGET_INTER_UNIT_MOVES
)
17167 emit_insn (gen_movdi_to_sse (int_xmm
, input
));
17168 else if (TARGET_SSE_SPLIT_REGS
)
17170 emit_clobber (int_xmm
);
17171 emit_move_insn (gen_lowpart (DImode
, int_xmm
), input
);
17175 x
= gen_reg_rtx (V2DImode
);
17176 ix86_expand_vector_init_one_nonzero (false, V2DImode
, x
, input
, 0);
17177 emit_move_insn (int_xmm
, gen_lowpart (V4SImode
, x
));
17180 x
= gen_rtx_CONST_VECTOR (V4SImode
,
17181 gen_rtvec (4, GEN_INT (0x43300000UL
),
17182 GEN_INT (0x45300000UL
),
17183 const0_rtx
, const0_rtx
));
17184 exponents
= validize_mem (force_const_mem (V4SImode
, x
));
17186 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
17187 emit_insn (gen_vec_interleave_lowv4si (int_xmm
, int_xmm
, exponents
));
17189 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
17190 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
17191 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
17192 (0x1.0p84 + double(fp_value_hi_xmm)).
17193 Note these exponents differ by 32. */
17195 fp_xmm
= copy_to_mode_reg (V2DFmode
, gen_lowpart (V2DFmode
, int_xmm
));
17197 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
17198 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
17199 real_ldexp (&bias_lo_rvt
, &dconst1
, 52);
17200 real_ldexp (&bias_hi_rvt
, &dconst1
, 84);
17201 biases
= const_double_from_real_value (bias_lo_rvt
, DFmode
);
17202 x
= const_double_from_real_value (bias_hi_rvt
, DFmode
);
17203 biases
= gen_rtx_CONST_VECTOR (V2DFmode
, gen_rtvec (2, biases
, x
));
17204 biases
= validize_mem (force_const_mem (V2DFmode
, biases
));
17205 emit_insn (gen_subv2df3 (fp_xmm
, fp_xmm
, biases
));
17207 /* Add the upper and lower DFmode values together. */
17209 emit_insn (gen_sse3_haddv2df3 (fp_xmm
, fp_xmm
, fp_xmm
));
17212 x
= copy_to_mode_reg (V2DFmode
, fp_xmm
);
17213 emit_insn (gen_vec_interleave_highv2df (fp_xmm
, fp_xmm
, fp_xmm
));
17214 emit_insn (gen_addv2df3 (fp_xmm
, fp_xmm
, x
));
17217 ix86_expand_vector_extract (false, target
, fp_xmm
, 0);
17220 /* Not used, but eases macroization of patterns. */
17222 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED
,
17223 rtx input ATTRIBUTE_UNUSED
)
17225 gcc_unreachable ();
17228 /* Convert an unsigned SImode value into a DFmode. Only currently used
17229 for SSE, but applicable anywhere. */
17232 ix86_expand_convert_uns_sidf_sse (rtx target
, rtx input
)
17234 REAL_VALUE_TYPE TWO31r
;
17237 x
= expand_simple_binop (SImode
, PLUS
, input
, GEN_INT (-2147483647 - 1),
17238 NULL
, 1, OPTAB_DIRECT
);
17240 fp
= gen_reg_rtx (DFmode
);
17241 emit_insn (gen_floatsidf2 (fp
, x
));
17243 real_ldexp (&TWO31r
, &dconst1
, 31);
17244 x
= const_double_from_real_value (TWO31r
, DFmode
);
17246 x
= expand_simple_binop (DFmode
, PLUS
, fp
, x
, target
, 0, OPTAB_DIRECT
);
17248 emit_move_insn (target
, x
);
17251 /* Convert a signed DImode value into a DFmode. Only used for SSE in
17252 32-bit mode; otherwise we have a direct convert instruction. */
17255 ix86_expand_convert_sign_didf_sse (rtx target
, rtx input
)
17257 REAL_VALUE_TYPE TWO32r
;
17258 rtx fp_lo
, fp_hi
, x
;
17260 fp_lo
= gen_reg_rtx (DFmode
);
17261 fp_hi
= gen_reg_rtx (DFmode
);
17263 emit_insn (gen_floatsidf2 (fp_hi
, gen_highpart (SImode
, input
)));
17265 real_ldexp (&TWO32r
, &dconst1
, 32);
17266 x
= const_double_from_real_value (TWO32r
, DFmode
);
17267 fp_hi
= expand_simple_binop (DFmode
, MULT
, fp_hi
, x
, fp_hi
, 0, OPTAB_DIRECT
);
17269 ix86_expand_convert_uns_sidf_sse (fp_lo
, gen_lowpart (SImode
, input
));
17271 x
= expand_simple_binop (DFmode
, PLUS
, fp_hi
, fp_lo
, target
,
17274 emit_move_insn (target
, x
);
17277 /* Convert an unsigned SImode value into a SFmode, using only SSE.
17278 For x86_32, -mfpmath=sse, !optimize_size only. */
17280 ix86_expand_convert_uns_sisf_sse (rtx target
, rtx input
)
17282 REAL_VALUE_TYPE ONE16r
;
17283 rtx fp_hi
, fp_lo
, int_hi
, int_lo
, x
;
17285 real_ldexp (&ONE16r
, &dconst1
, 16);
17286 x
= const_double_from_real_value (ONE16r
, SFmode
);
17287 int_lo
= expand_simple_binop (SImode
, AND
, input
, GEN_INT(0xffff),
17288 NULL
, 0, OPTAB_DIRECT
);
17289 int_hi
= expand_simple_binop (SImode
, LSHIFTRT
, input
, GEN_INT(16),
17290 NULL
, 0, OPTAB_DIRECT
);
17291 fp_hi
= gen_reg_rtx (SFmode
);
17292 fp_lo
= gen_reg_rtx (SFmode
);
17293 emit_insn (gen_floatsisf2 (fp_hi
, int_hi
));
17294 emit_insn (gen_floatsisf2 (fp_lo
, int_lo
));
17295 fp_hi
= expand_simple_binop (SFmode
, MULT
, fp_hi
, x
, fp_hi
,
17297 fp_hi
= expand_simple_binop (SFmode
, PLUS
, fp_hi
, fp_lo
, target
,
17299 if (!rtx_equal_p (target
, fp_hi
))
17300 emit_move_insn (target
, fp_hi
);
17303 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
17304 a vector of unsigned ints VAL to vector of floats TARGET. */
17307 ix86_expand_vector_convert_uns_vsivsf (rtx target
, rtx val
)
17310 REAL_VALUE_TYPE TWO16r
;
17311 enum machine_mode intmode
= GET_MODE (val
);
17312 enum machine_mode fltmode
= GET_MODE (target
);
17313 rtx (*cvt
) (rtx
, rtx
);
17315 if (intmode
== V4SImode
)
17316 cvt
= gen_floatv4siv4sf2
;
17318 cvt
= gen_floatv8siv8sf2
;
17319 tmp
[0] = ix86_build_const_vector (intmode
, 1, GEN_INT (0xffff));
17320 tmp
[0] = force_reg (intmode
, tmp
[0]);
17321 tmp
[1] = expand_simple_binop (intmode
, AND
, val
, tmp
[0], NULL_RTX
, 1,
17323 tmp
[2] = expand_simple_binop (intmode
, LSHIFTRT
, val
, GEN_INT (16),
17324 NULL_RTX
, 1, OPTAB_DIRECT
);
17325 tmp
[3] = gen_reg_rtx (fltmode
);
17326 emit_insn (cvt (tmp
[3], tmp
[1]));
17327 tmp
[4] = gen_reg_rtx (fltmode
);
17328 emit_insn (cvt (tmp
[4], tmp
[2]));
17329 real_ldexp (&TWO16r
, &dconst1
, 16);
17330 tmp
[5] = const_double_from_real_value (TWO16r
, SFmode
);
17331 tmp
[5] = force_reg (fltmode
, ix86_build_const_vector (fltmode
, 1, tmp
[5]));
17332 tmp
[6] = expand_simple_binop (fltmode
, MULT
, tmp
[4], tmp
[5], NULL_RTX
, 1,
17334 tmp
[7] = expand_simple_binop (fltmode
, PLUS
, tmp
[3], tmp
[6], target
, 1,
17336 if (tmp
[7] != target
)
17337 emit_move_insn (target
, tmp
[7]);
17340 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
17341 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
17342 This is done by doing just signed conversion if < 0x1p31, and otherwise by
17343 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
17346 ix86_expand_adjust_ufix_to_sfix_si (rtx val
, rtx
*xorp
)
17348 REAL_VALUE_TYPE TWO31r
;
17349 rtx two31r
, tmp
[4];
17350 enum machine_mode mode
= GET_MODE (val
);
17351 enum machine_mode scalarmode
= GET_MODE_INNER (mode
);
17352 enum machine_mode intmode
= GET_MODE_SIZE (mode
) == 32 ? V8SImode
: V4SImode
;
17353 rtx (*cmp
) (rtx
, rtx
, rtx
, rtx
);
17356 for (i
= 0; i
< 3; i
++)
17357 tmp
[i
] = gen_reg_rtx (mode
);
17358 real_ldexp (&TWO31r
, &dconst1
, 31);
17359 two31r
= const_double_from_real_value (TWO31r
, scalarmode
);
17360 two31r
= ix86_build_const_vector (mode
, 1, two31r
);
17361 two31r
= force_reg (mode
, two31r
);
17364 case V8SFmode
: cmp
= gen_avx_maskcmpv8sf3
; break;
17365 case V4SFmode
: cmp
= gen_sse_maskcmpv4sf3
; break;
17366 case V4DFmode
: cmp
= gen_avx_maskcmpv4df3
; break;
17367 case V2DFmode
: cmp
= gen_sse2_maskcmpv2df3
; break;
17368 default: gcc_unreachable ();
17370 tmp
[3] = gen_rtx_LE (mode
, two31r
, val
);
17371 emit_insn (cmp (tmp
[0], two31r
, val
, tmp
[3]));
17372 tmp
[1] = expand_simple_binop (mode
, AND
, tmp
[0], two31r
, tmp
[1],
17374 if (intmode
== V4SImode
|| TARGET_AVX2
)
17375 *xorp
= expand_simple_binop (intmode
, ASHIFT
,
17376 gen_lowpart (intmode
, tmp
[0]),
17377 GEN_INT (31), NULL_RTX
, 0,
17381 rtx two31
= GEN_INT ((unsigned HOST_WIDE_INT
) 1 << 31);
17382 two31
= ix86_build_const_vector (intmode
, 1, two31
);
17383 *xorp
= expand_simple_binop (intmode
, AND
,
17384 gen_lowpart (intmode
, tmp
[0]),
17385 two31
, NULL_RTX
, 0,
17388 return expand_simple_binop (mode
, MINUS
, val
, tmp
[1], tmp
[2],
17392 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
17393 then replicate the value for all elements of the vector
17397 ix86_build_const_vector (enum machine_mode mode
, bool vect
, rtx value
)
17401 enum machine_mode scalar_mode
;
17418 n_elt
= GET_MODE_NUNITS (mode
);
17419 v
= rtvec_alloc (n_elt
);
17420 scalar_mode
= GET_MODE_INNER (mode
);
17422 RTVEC_ELT (v
, 0) = value
;
17424 for (i
= 1; i
< n_elt
; ++i
)
17425 RTVEC_ELT (v
, i
) = vect
? value
: CONST0_RTX (scalar_mode
);
17427 return gen_rtx_CONST_VECTOR (mode
, v
);
17430 gcc_unreachable ();
17434 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
17435 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
17436 for an SSE register. If VECT is true, then replicate the mask for
17437 all elements of the vector register. If INVERT is true, then create
17438 a mask excluding the sign bit. */
17441 ix86_build_signbit_mask (enum machine_mode mode
, bool vect
, bool invert
)
17443 enum machine_mode vec_mode
, imode
;
17444 HOST_WIDE_INT hi
, lo
;
17449 /* Find the sign bit, sign extended to 2*HWI. */
17457 mode
= GET_MODE_INNER (mode
);
17459 lo
= 0x80000000, hi
= lo
< 0;
17467 mode
= GET_MODE_INNER (mode
);
17469 if (HOST_BITS_PER_WIDE_INT
>= 64)
17470 lo
= (HOST_WIDE_INT
)1 << shift
, hi
= -1;
17472 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
17477 vec_mode
= VOIDmode
;
17478 if (HOST_BITS_PER_WIDE_INT
>= 64)
17481 lo
= 0, hi
= (HOST_WIDE_INT
)1 << shift
;
17488 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
17492 lo
= ~lo
, hi
= ~hi
;
17498 mask
= immed_double_const (lo
, hi
, imode
);
17500 vec
= gen_rtvec (2, v
, mask
);
17501 v
= gen_rtx_CONST_VECTOR (V2DImode
, vec
);
17502 v
= copy_to_mode_reg (mode
, gen_lowpart (mode
, v
));
17509 gcc_unreachable ();
17513 lo
= ~lo
, hi
= ~hi
;
17515 /* Force this value into the low part of a fp vector constant. */
17516 mask
= immed_double_const (lo
, hi
, imode
);
17517 mask
= gen_lowpart (mode
, mask
);
17519 if (vec_mode
== VOIDmode
)
17520 return force_reg (mode
, mask
);
17522 v
= ix86_build_const_vector (vec_mode
, vect
, mask
);
17523 return force_reg (vec_mode
, v
);
17526 /* Generate code for floating point ABS or NEG. */
17529 ix86_expand_fp_absneg_operator (enum rtx_code code
, enum machine_mode mode
,
17532 rtx mask
, set
, dst
, src
;
17533 bool use_sse
= false;
17534 bool vector_mode
= VECTOR_MODE_P (mode
);
17535 enum machine_mode vmode
= mode
;
17539 else if (mode
== TFmode
)
17541 else if (TARGET_SSE_MATH
)
17543 use_sse
= SSE_FLOAT_MODE_P (mode
);
17544 if (mode
== SFmode
)
17546 else if (mode
== DFmode
)
17550 /* NEG and ABS performed with SSE use bitwise mask operations.
17551 Create the appropriate mask now. */
17553 mask
= ix86_build_signbit_mask (vmode
, vector_mode
, code
== ABS
);
17560 set
= gen_rtx_fmt_e (code
, mode
, src
);
17561 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
17568 use
= gen_rtx_USE (VOIDmode
, mask
);
17570 par
= gen_rtvec (2, set
, use
);
17573 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
17574 par
= gen_rtvec (3, set
, use
, clob
);
17576 emit_insn (gen_rtx_PARALLEL (VOIDmode
, par
));
17582 /* Expand a copysign operation. Special case operand 0 being a constant. */
17585 ix86_expand_copysign (rtx operands
[])
17587 enum machine_mode mode
, vmode
;
17588 rtx dest
, op0
, op1
, mask
, nmask
;
17590 dest
= operands
[0];
17594 mode
= GET_MODE (dest
);
17596 if (mode
== SFmode
)
17598 else if (mode
== DFmode
)
17603 if (GET_CODE (op0
) == CONST_DOUBLE
)
17605 rtx (*copysign_insn
)(rtx
, rtx
, rtx
, rtx
);
17607 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0
)))
17608 op0
= simplify_unary_operation (ABS
, mode
, op0
, mode
);
17610 if (mode
== SFmode
|| mode
== DFmode
)
17612 if (op0
== CONST0_RTX (mode
))
17613 op0
= CONST0_RTX (vmode
);
17616 rtx v
= ix86_build_const_vector (vmode
, false, op0
);
17618 op0
= force_reg (vmode
, v
);
17621 else if (op0
!= CONST0_RTX (mode
))
17622 op0
= force_reg (mode
, op0
);
17624 mask
= ix86_build_signbit_mask (vmode
, 0, 0);
17626 if (mode
== SFmode
)
17627 copysign_insn
= gen_copysignsf3_const
;
17628 else if (mode
== DFmode
)
17629 copysign_insn
= gen_copysigndf3_const
;
17631 copysign_insn
= gen_copysigntf3_const
;
17633 emit_insn (copysign_insn (dest
, op0
, op1
, mask
));
17637 rtx (*copysign_insn
)(rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
17639 nmask
= ix86_build_signbit_mask (vmode
, 0, 1);
17640 mask
= ix86_build_signbit_mask (vmode
, 0, 0);
17642 if (mode
== SFmode
)
17643 copysign_insn
= gen_copysignsf3_var
;
17644 else if (mode
== DFmode
)
17645 copysign_insn
= gen_copysigndf3_var
;
17647 copysign_insn
= gen_copysigntf3_var
;
17649 emit_insn (copysign_insn (dest
, NULL_RTX
, op0
, op1
, nmask
, mask
));
17653 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
17654 be a constant, and so has already been expanded into a vector constant. */
17657 ix86_split_copysign_const (rtx operands
[])
17659 enum machine_mode mode
, vmode
;
17660 rtx dest
, op0
, mask
, x
;
17662 dest
= operands
[0];
17664 mask
= operands
[3];
17666 mode
= GET_MODE (dest
);
17667 vmode
= GET_MODE (mask
);
17669 dest
= simplify_gen_subreg (vmode
, dest
, mode
, 0);
17670 x
= gen_rtx_AND (vmode
, dest
, mask
);
17671 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
17673 if (op0
!= CONST0_RTX (vmode
))
17675 x
= gen_rtx_IOR (vmode
, dest
, op0
);
17676 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
17680 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
17681 so we have to do two masks. */
17684 ix86_split_copysign_var (rtx operands
[])
17686 enum machine_mode mode
, vmode
;
17687 rtx dest
, scratch
, op0
, op1
, mask
, nmask
, x
;
17689 dest
= operands
[0];
17690 scratch
= operands
[1];
17693 nmask
= operands
[4];
17694 mask
= operands
[5];
17696 mode
= GET_MODE (dest
);
17697 vmode
= GET_MODE (mask
);
17699 if (rtx_equal_p (op0
, op1
))
17701 /* Shouldn't happen often (it's useless, obviously), but when it does
17702 we'd generate incorrect code if we continue below. */
17703 emit_move_insn (dest
, op0
);
17707 if (REG_P (mask
) && REGNO (dest
) == REGNO (mask
)) /* alternative 0 */
17709 gcc_assert (REGNO (op1
) == REGNO (scratch
));
17711 x
= gen_rtx_AND (vmode
, scratch
, mask
);
17712 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
17715 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
17716 x
= gen_rtx_NOT (vmode
, dest
);
17717 x
= gen_rtx_AND (vmode
, x
, op0
);
17718 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
17722 if (REGNO (op1
) == REGNO (scratch
)) /* alternative 1,3 */
17724 x
= gen_rtx_AND (vmode
, scratch
, mask
);
17726 else /* alternative 2,4 */
17728 gcc_assert (REGNO (mask
) == REGNO (scratch
));
17729 op1
= simplify_gen_subreg (vmode
, op1
, mode
, 0);
17730 x
= gen_rtx_AND (vmode
, scratch
, op1
);
17732 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
17734 if (REGNO (op0
) == REGNO (dest
)) /* alternative 1,2 */
17736 dest
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
17737 x
= gen_rtx_AND (vmode
, dest
, nmask
);
17739 else /* alternative 3,4 */
17741 gcc_assert (REGNO (nmask
) == REGNO (dest
));
17743 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
17744 x
= gen_rtx_AND (vmode
, dest
, op0
);
17746 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
17749 x
= gen_rtx_IOR (vmode
, dest
, scratch
);
17750 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
17753 /* Return TRUE or FALSE depending on whether the first SET in INSN
17754 has source and destination with matching CC modes, and that the
17755 CC mode is at least as constrained as REQ_MODE. */
17758 ix86_match_ccmode (rtx insn
, enum machine_mode req_mode
)
17761 enum machine_mode set_mode
;
17763 set
= PATTERN (insn
);
17764 if (GET_CODE (set
) == PARALLEL
)
17765 set
= XVECEXP (set
, 0, 0);
17766 gcc_assert (GET_CODE (set
) == SET
);
17767 gcc_assert (GET_CODE (SET_SRC (set
)) == COMPARE
);
17769 set_mode
= GET_MODE (SET_DEST (set
));
17773 if (req_mode
!= CCNOmode
17774 && (req_mode
!= CCmode
17775 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
17779 if (req_mode
== CCGCmode
)
17783 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
17787 if (req_mode
== CCZmode
)
17797 if (set_mode
!= req_mode
)
17802 gcc_unreachable ();
17805 return GET_MODE (SET_SRC (set
)) == set_mode
;
17808 /* Generate insn patterns to do an integer compare of OPERANDS. */
17811 ix86_expand_int_compare (enum rtx_code code
, rtx op0
, rtx op1
)
17813 enum machine_mode cmpmode
;
17816 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
17817 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
17819 /* This is very simple, but making the interface the same as in the
17820 FP case makes the rest of the code easier. */
17821 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
17822 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
17824 /* Return the test that should be put into the flags user, i.e.
17825 the bcc, scc, or cmov instruction. */
17826 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
17829 /* Figure out whether to use ordered or unordered fp comparisons.
17830 Return the appropriate mode to use. */
17833 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED
)
17835 /* ??? In order to make all comparisons reversible, we do all comparisons
17836 non-trapping when compiling for IEEE. Once gcc is able to distinguish
17837 all forms trapping and nontrapping comparisons, we can make inequality
17838 comparisons trapping again, since it results in better code when using
17839 FCOM based compares. */
17840 return TARGET_IEEE_FP
? CCFPUmode
: CCFPmode
;
17844 ix86_cc_mode (enum rtx_code code
, rtx op0
, rtx op1
)
17846 enum machine_mode mode
= GET_MODE (op0
);
17848 if (SCALAR_FLOAT_MODE_P (mode
))
17850 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
17851 return ix86_fp_compare_mode (code
);
17856 /* Only zero flag is needed. */
17857 case EQ
: /* ZF=0 */
17858 case NE
: /* ZF!=0 */
17860 /* Codes needing carry flag. */
17861 case GEU
: /* CF=0 */
17862 case LTU
: /* CF=1 */
17863 /* Detect overflow checks. They need just the carry flag. */
17864 if (GET_CODE (op0
) == PLUS
17865 && rtx_equal_p (op1
, XEXP (op0
, 0)))
17869 case GTU
: /* CF=0 & ZF=0 */
17870 case LEU
: /* CF=1 | ZF=1 */
17871 /* Detect overflow checks. They need just the carry flag. */
17872 if (GET_CODE (op0
) == MINUS
17873 && rtx_equal_p (op1
, XEXP (op0
, 0)))
17877 /* Codes possibly doable only with sign flag when
17878 comparing against zero. */
17879 case GE
: /* SF=OF or SF=0 */
17880 case LT
: /* SF<>OF or SF=1 */
17881 if (op1
== const0_rtx
)
17884 /* For other cases Carry flag is not required. */
17886 /* Codes doable only with sign flag when comparing
17887 against zero, but we miss jump instruction for it
17888 so we need to use relational tests against overflow
17889 that thus needs to be zero. */
17890 case GT
: /* ZF=0 & SF=OF */
17891 case LE
: /* ZF=1 | SF<>OF */
17892 if (op1
== const0_rtx
)
17896 /* strcmp pattern do (use flags) and combine may ask us for proper
17901 gcc_unreachable ();
17905 /* Return the fixed registers used for condition codes. */
17908 ix86_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
17915 /* If two condition code modes are compatible, return a condition code
17916 mode which is compatible with both. Otherwise, return
17919 static enum machine_mode
17920 ix86_cc_modes_compatible (enum machine_mode m1
, enum machine_mode m2
)
17925 if (GET_MODE_CLASS (m1
) != MODE_CC
|| GET_MODE_CLASS (m2
) != MODE_CC
)
17928 if ((m1
== CCGCmode
&& m2
== CCGOCmode
)
17929 || (m1
== CCGOCmode
&& m2
== CCGCmode
))
17932 if (m1
== CCZmode
&& (m2
== CCGCmode
|| m2
== CCGOCmode
))
17934 else if (m2
== CCZmode
&& (m1
== CCGCmode
|| m1
== CCGOCmode
))
17940 gcc_unreachable ();
17970 /* These are only compatible with themselves, which we already
17977 /* Return a comparison we can do and that it is equivalent to
17978 swap_condition (code) apart possibly from orderedness.
17979 But, never change orderedness if TARGET_IEEE_FP, returning
17980 UNKNOWN in that case if necessary. */
17982 static enum rtx_code
17983 ix86_fp_swap_condition (enum rtx_code code
)
17987 case GT
: /* GTU - CF=0 & ZF=0 */
17988 return TARGET_IEEE_FP
? UNKNOWN
: UNLT
;
17989 case GE
: /* GEU - CF=0 */
17990 return TARGET_IEEE_FP
? UNKNOWN
: UNLE
;
17991 case UNLT
: /* LTU - CF=1 */
17992 return TARGET_IEEE_FP
? UNKNOWN
: GT
;
17993 case UNLE
: /* LEU - CF=1 | ZF=1 */
17994 return TARGET_IEEE_FP
? UNKNOWN
: GE
;
17996 return swap_condition (code
);
18000 /* Return cost of comparison CODE using the best strategy for performance.
18001 All following functions do use number of instructions as a cost metrics.
18002 In future this should be tweaked to compute bytes for optimize_size and
18003 take into account performance of various instructions on various CPUs. */
18006 ix86_fp_comparison_cost (enum rtx_code code
)
18010 /* The cost of code using bit-twiddling on %ah. */
18027 arith_cost
= TARGET_IEEE_FP
? 5 : 4;
18031 arith_cost
= TARGET_IEEE_FP
? 6 : 4;
18034 gcc_unreachable ();
18037 switch (ix86_fp_comparison_strategy (code
))
18039 case IX86_FPCMP_COMI
:
18040 return arith_cost
> 4 ? 3 : 2;
18041 case IX86_FPCMP_SAHF
:
18042 return arith_cost
> 4 ? 4 : 3;
18048 /* Return strategy to use for floating-point. We assume that fcomi is always
18049 preferrable where available, since that is also true when looking at size
18050 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
18052 enum ix86_fpcmp_strategy
18053 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED
)
18055 /* Do fcomi/sahf based test when profitable. */
18058 return IX86_FPCMP_COMI
;
18060 if (TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_function_for_size_p (cfun
)))
18061 return IX86_FPCMP_SAHF
;
18063 return IX86_FPCMP_ARITH
;
18066 /* Swap, force into registers, or otherwise massage the two operands
18067 to a fp comparison. The operands are updated in place; the new
18068 comparison code is returned. */
18070 static enum rtx_code
18071 ix86_prepare_fp_compare_args (enum rtx_code code
, rtx
*pop0
, rtx
*pop1
)
18073 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
18074 rtx op0
= *pop0
, op1
= *pop1
;
18075 enum machine_mode op_mode
= GET_MODE (op0
);
18076 int is_sse
= TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (op_mode
);
18078 /* All of the unordered compare instructions only work on registers.
18079 The same is true of the fcomi compare instructions. The XFmode
18080 compare instructions require registers except when comparing
18081 against zero or when converting operand 1 from fixed point to
18085 && (fpcmp_mode
== CCFPUmode
18086 || (op_mode
== XFmode
18087 && ! (standard_80387_constant_p (op0
) == 1
18088 || standard_80387_constant_p (op1
) == 1)
18089 && GET_CODE (op1
) != FLOAT
)
18090 || ix86_fp_comparison_strategy (code
) == IX86_FPCMP_COMI
))
18092 op0
= force_reg (op_mode
, op0
);
18093 op1
= force_reg (op_mode
, op1
);
18097 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
18098 things around if they appear profitable, otherwise force op0
18099 into a register. */
18101 if (standard_80387_constant_p (op0
) == 0
18103 && ! (standard_80387_constant_p (op1
) == 0
18106 enum rtx_code new_code
= ix86_fp_swap_condition (code
);
18107 if (new_code
!= UNKNOWN
)
18110 tmp
= op0
, op0
= op1
, op1
= tmp
;
18116 op0
= force_reg (op_mode
, op0
);
18118 if (CONSTANT_P (op1
))
18120 int tmp
= standard_80387_constant_p (op1
);
18122 op1
= validize_mem (force_const_mem (op_mode
, op1
));
18126 op1
= force_reg (op_mode
, op1
);
18129 op1
= force_reg (op_mode
, op1
);
18133 /* Try to rearrange the comparison to make it cheaper. */
18134 if (ix86_fp_comparison_cost (code
)
18135 > ix86_fp_comparison_cost (swap_condition (code
))
18136 && (REG_P (op1
) || can_create_pseudo_p ()))
18139 tmp
= op0
, op0
= op1
, op1
= tmp
;
18140 code
= swap_condition (code
);
18142 op0
= force_reg (op_mode
, op0
);
18150 /* Convert comparison codes we use to represent FP comparison to integer
18151 code that will result in proper branch. Return UNKNOWN if no such code
18155 ix86_fp_compare_code_to_integer (enum rtx_code code
)
18184 /* Generate insn patterns to do a floating point compare of OPERANDS. */
18187 ix86_expand_fp_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx scratch
)
18189 enum machine_mode fpcmp_mode
, intcmp_mode
;
18192 fpcmp_mode
= ix86_fp_compare_mode (code
);
18193 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
18195 /* Do fcomi/sahf based test when profitable. */
18196 switch (ix86_fp_comparison_strategy (code
))
18198 case IX86_FPCMP_COMI
:
18199 intcmp_mode
= fpcmp_mode
;
18200 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
18201 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
18206 case IX86_FPCMP_SAHF
:
18207 intcmp_mode
= fpcmp_mode
;
18208 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
18209 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
18213 scratch
= gen_reg_rtx (HImode
);
18214 tmp2
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
18215 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, tmp2
)));
18218 case IX86_FPCMP_ARITH
:
18219 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
18220 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
18221 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
18223 scratch
= gen_reg_rtx (HImode
);
18224 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
18226 /* In the unordered case, we have to check C2 for NaN's, which
18227 doesn't happen to work out to anything nice combination-wise.
18228 So do some bit twiddling on the value we've got in AH to come
18229 up with an appropriate set of condition codes. */
18231 intcmp_mode
= CCNOmode
;
18236 if (code
== GT
|| !TARGET_IEEE_FP
)
18238 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
18243 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18244 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
18245 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
18246 intcmp_mode
= CCmode
;
18252 if (code
== LT
&& TARGET_IEEE_FP
)
18254 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18255 emit_insn (gen_cmpqi_ext_3 (scratch
, const1_rtx
));
18256 intcmp_mode
= CCmode
;
18261 emit_insn (gen_testqi_ext_ccno_0 (scratch
, const1_rtx
));
18267 if (code
== GE
|| !TARGET_IEEE_FP
)
18269 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x05)));
18274 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18275 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
, const1_rtx
));
18281 if (code
== LE
&& TARGET_IEEE_FP
)
18283 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18284 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
18285 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
18286 intcmp_mode
= CCmode
;
18291 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
18297 if (code
== EQ
&& TARGET_IEEE_FP
)
18299 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18300 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
18301 intcmp_mode
= CCmode
;
18306 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
18312 if (code
== NE
&& TARGET_IEEE_FP
)
18314 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18315 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
18321 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
18327 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
18331 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
18336 gcc_unreachable ();
18344 /* Return the test that should be put into the flags user, i.e.
18345 the bcc, scc, or cmov instruction. */
18346 return gen_rtx_fmt_ee (code
, VOIDmode
,
18347 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
18352 ix86_expand_compare (enum rtx_code code
, rtx op0
, rtx op1
)
18356 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_CC
)
18357 ret
= gen_rtx_fmt_ee (code
, VOIDmode
, op0
, op1
);
18359 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0
)))
18361 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0
)));
18362 ret
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
);
18365 ret
= ix86_expand_int_compare (code
, op0
, op1
);
18371 ix86_expand_branch (enum rtx_code code
, rtx op0
, rtx op1
, rtx label
)
18373 enum machine_mode mode
= GET_MODE (op0
);
18385 tmp
= ix86_expand_compare (code
, op0
, op1
);
18386 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
18387 gen_rtx_LABEL_REF (VOIDmode
, label
),
18389 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
18396 /* Expand DImode branch into multiple compare+branch. */
18398 rtx lo
[2], hi
[2], label2
;
18399 enum rtx_code code1
, code2
, code3
;
18400 enum machine_mode submode
;
18402 if (CONSTANT_P (op0
) && !CONSTANT_P (op1
))
18404 tmp
= op0
, op0
= op1
, op1
= tmp
;
18405 code
= swap_condition (code
);
18408 split_double_mode (mode
, &op0
, 1, lo
+0, hi
+0);
18409 split_double_mode (mode
, &op1
, 1, lo
+1, hi
+1);
18411 submode
= mode
== DImode
? SImode
: DImode
;
18413 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
18414 avoid two branches. This costs one extra insn, so disable when
18415 optimizing for size. */
18417 if ((code
== EQ
|| code
== NE
)
18418 && (!optimize_insn_for_size_p ()
18419 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
18424 if (hi
[1] != const0_rtx
)
18425 xor1
= expand_binop (submode
, xor_optab
, xor1
, hi
[1],
18426 NULL_RTX
, 0, OPTAB_WIDEN
);
18429 if (lo
[1] != const0_rtx
)
18430 xor0
= expand_binop (submode
, xor_optab
, xor0
, lo
[1],
18431 NULL_RTX
, 0, OPTAB_WIDEN
);
18433 tmp
= expand_binop (submode
, ior_optab
, xor1
, xor0
,
18434 NULL_RTX
, 0, OPTAB_WIDEN
);
18436 ix86_expand_branch (code
, tmp
, const0_rtx
, label
);
18440 /* Otherwise, if we are doing less-than or greater-or-equal-than,
18441 op1 is a constant and the low word is zero, then we can just
18442 examine the high word. Similarly for low word -1 and
18443 less-or-equal-than or greater-than. */
18445 if (CONST_INT_P (hi
[1]))
18448 case LT
: case LTU
: case GE
: case GEU
:
18449 if (lo
[1] == const0_rtx
)
18451 ix86_expand_branch (code
, hi
[0], hi
[1], label
);
18455 case LE
: case LEU
: case GT
: case GTU
:
18456 if (lo
[1] == constm1_rtx
)
18458 ix86_expand_branch (code
, hi
[0], hi
[1], label
);
18466 /* Otherwise, we need two or three jumps. */
18468 label2
= gen_label_rtx ();
18471 code2
= swap_condition (code
);
18472 code3
= unsigned_condition (code
);
18476 case LT
: case GT
: case LTU
: case GTU
:
18479 case LE
: code1
= LT
; code2
= GT
; break;
18480 case GE
: code1
= GT
; code2
= LT
; break;
18481 case LEU
: code1
= LTU
; code2
= GTU
; break;
18482 case GEU
: code1
= GTU
; code2
= LTU
; break;
18484 case EQ
: code1
= UNKNOWN
; code2
= NE
; break;
18485 case NE
: code2
= UNKNOWN
; break;
18488 gcc_unreachable ();
18493 * if (hi(a) < hi(b)) goto true;
18494 * if (hi(a) > hi(b)) goto false;
18495 * if (lo(a) < lo(b)) goto true;
18499 if (code1
!= UNKNOWN
)
18500 ix86_expand_branch (code1
, hi
[0], hi
[1], label
);
18501 if (code2
!= UNKNOWN
)
18502 ix86_expand_branch (code2
, hi
[0], hi
[1], label2
);
18504 ix86_expand_branch (code3
, lo
[0], lo
[1], label
);
18506 if (code2
!= UNKNOWN
)
18507 emit_label (label2
);
18512 gcc_assert (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_CC
);
18517 /* Split branch based on floating point condition. */
18519 ix86_split_fp_branch (enum rtx_code code
, rtx op1
, rtx op2
,
18520 rtx target1
, rtx target2
, rtx tmp
, rtx pushed
)
18525 if (target2
!= pc_rtx
)
18528 code
= reverse_condition_maybe_unordered (code
);
18533 condition
= ix86_expand_fp_compare (code
, op1
, op2
,
18536 /* Remove pushed operand from stack. */
18538 ix86_free_from_memory (GET_MODE (pushed
));
18540 i
= emit_jump_insn (gen_rtx_SET
18542 gen_rtx_IF_THEN_ELSE (VOIDmode
,
18543 condition
, target1
, target2
)));
18544 if (split_branch_probability
>= 0)
18545 add_reg_note (i
, REG_BR_PROB
, GEN_INT (split_branch_probability
));
18549 ix86_expand_setcc (rtx dest
, enum rtx_code code
, rtx op0
, rtx op1
)
18553 gcc_assert (GET_MODE (dest
) == QImode
);
18555 ret
= ix86_expand_compare (code
, op0
, op1
);
18556 PUT_MODE (ret
, QImode
);
18557 emit_insn (gen_rtx_SET (VOIDmode
, dest
, ret
));
18560 /* Expand comparison setting or clearing carry flag. Return true when
18561 successful and set pop for the operation. */
18563 ix86_expand_carry_flag_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx
*pop
)
18565 enum machine_mode mode
=
18566 GET_MODE (op0
) != VOIDmode
? GET_MODE (op0
) : GET_MODE (op1
);
18568 /* Do not handle double-mode compares that go through special path. */
18569 if (mode
== (TARGET_64BIT
? TImode
: DImode
))
18572 if (SCALAR_FLOAT_MODE_P (mode
))
18574 rtx compare_op
, compare_seq
;
18576 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
18578 /* Shortcut: following common codes never translate
18579 into carry flag compares. */
18580 if (code
== EQ
|| code
== NE
|| code
== UNEQ
|| code
== LTGT
18581 || code
== ORDERED
|| code
== UNORDERED
)
18584 /* These comparisons require zero flag; swap operands so they won't. */
18585 if ((code
== GT
|| code
== UNLE
|| code
== LE
|| code
== UNGT
)
18586 && !TARGET_IEEE_FP
)
18591 code
= swap_condition (code
);
18594 /* Try to expand the comparison and verify that we end up with
18595 carry flag based comparison. This fails to be true only when
18596 we decide to expand comparison using arithmetic that is not
18597 too common scenario. */
18599 compare_op
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
);
18600 compare_seq
= get_insns ();
18603 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
18604 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
18605 code
= ix86_fp_compare_code_to_integer (GET_CODE (compare_op
));
18607 code
= GET_CODE (compare_op
);
18609 if (code
!= LTU
&& code
!= GEU
)
18612 emit_insn (compare_seq
);
18617 if (!INTEGRAL_MODE_P (mode
))
18626 /* Convert a==0 into (unsigned)a<1. */
18629 if (op1
!= const0_rtx
)
18632 code
= (code
== EQ
? LTU
: GEU
);
18635 /* Convert a>b into b<a or a>=b-1. */
18638 if (CONST_INT_P (op1
))
18640 op1
= gen_int_mode (INTVAL (op1
) + 1, GET_MODE (op0
));
18641 /* Bail out on overflow. We still can swap operands but that
18642 would force loading of the constant into register. */
18643 if (op1
== const0_rtx
18644 || !x86_64_immediate_operand (op1
, GET_MODE (op1
)))
18646 code
= (code
== GTU
? GEU
: LTU
);
18653 code
= (code
== GTU
? LTU
: GEU
);
18657 /* Convert a>=0 into (unsigned)a<0x80000000. */
18660 if (mode
== DImode
|| op1
!= const0_rtx
)
18662 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
18663 code
= (code
== LT
? GEU
: LTU
);
18667 if (mode
== DImode
|| op1
!= constm1_rtx
)
18669 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
18670 code
= (code
== LE
? GEU
: LTU
);
18676 /* Swapping operands may cause constant to appear as first operand. */
18677 if (!nonimmediate_operand (op0
, VOIDmode
))
18679 if (!can_create_pseudo_p ())
18681 op0
= force_reg (mode
, op0
);
18683 *pop
= ix86_expand_compare (code
, op0
, op1
);
18684 gcc_assert (GET_CODE (*pop
) == LTU
|| GET_CODE (*pop
) == GEU
);
18689 ix86_expand_int_movcc (rtx operands
[])
18691 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
18692 rtx compare_seq
, compare_op
;
18693 enum machine_mode mode
= GET_MODE (operands
[0]);
18694 bool sign_bit_compare_p
= false;
18695 rtx op0
= XEXP (operands
[1], 0);
18696 rtx op1
= XEXP (operands
[1], 1);
18699 compare_op
= ix86_expand_compare (code
, op0
, op1
);
18700 compare_seq
= get_insns ();
18703 compare_code
= GET_CODE (compare_op
);
18705 if ((op1
== const0_rtx
&& (code
== GE
|| code
== LT
))
18706 || (op1
== constm1_rtx
&& (code
== GT
|| code
== LE
)))
18707 sign_bit_compare_p
= true;
18709 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
18710 HImode insns, we'd be swallowed in word prefix ops. */
18712 if ((mode
!= HImode
|| TARGET_FAST_PREFIX
)
18713 && (mode
!= (TARGET_64BIT
? TImode
: DImode
))
18714 && CONST_INT_P (operands
[2])
18715 && CONST_INT_P (operands
[3]))
18717 rtx out
= operands
[0];
18718 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
18719 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
18720 HOST_WIDE_INT diff
;
18723 /* Sign bit compares are better done using shifts than we do by using
18725 if (sign_bit_compare_p
18726 || ix86_expand_carry_flag_compare (code
, op0
, op1
, &compare_op
))
18728 /* Detect overlap between destination and compare sources. */
18731 if (!sign_bit_compare_p
)
18734 bool fpcmp
= false;
18736 compare_code
= GET_CODE (compare_op
);
18738 flags
= XEXP (compare_op
, 0);
18740 if (GET_MODE (flags
) == CCFPmode
18741 || GET_MODE (flags
) == CCFPUmode
)
18745 = ix86_fp_compare_code_to_integer (compare_code
);
18748 /* To simplify rest of code, restrict to the GEU case. */
18749 if (compare_code
== LTU
)
18751 HOST_WIDE_INT tmp
= ct
;
18754 compare_code
= reverse_condition (compare_code
);
18755 code
= reverse_condition (code
);
18760 PUT_CODE (compare_op
,
18761 reverse_condition_maybe_unordered
18762 (GET_CODE (compare_op
)));
18764 PUT_CODE (compare_op
,
18765 reverse_condition (GET_CODE (compare_op
)));
18769 if (reg_overlap_mentioned_p (out
, op0
)
18770 || reg_overlap_mentioned_p (out
, op1
))
18771 tmp
= gen_reg_rtx (mode
);
18773 if (mode
== DImode
)
18774 emit_insn (gen_x86_movdicc_0_m1 (tmp
, flags
, compare_op
));
18776 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode
, tmp
),
18777 flags
, compare_op
));
18781 if (code
== GT
|| code
== GE
)
18782 code
= reverse_condition (code
);
18785 HOST_WIDE_INT tmp
= ct
;
18790 tmp
= emit_store_flag (tmp
, code
, op0
, op1
, VOIDmode
, 0, -1);
18803 tmp
= expand_simple_binop (mode
, PLUS
,
18805 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
18816 tmp
= expand_simple_binop (mode
, IOR
,
18818 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
18820 else if (diff
== -1 && ct
)
18830 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
18832 tmp
= expand_simple_binop (mode
, PLUS
,
18833 copy_rtx (tmp
), GEN_INT (cf
),
18834 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
18842 * andl cf - ct, dest
18852 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
18855 tmp
= expand_simple_binop (mode
, AND
,
18857 gen_int_mode (cf
- ct
, mode
),
18858 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
18860 tmp
= expand_simple_binop (mode
, PLUS
,
18861 copy_rtx (tmp
), GEN_INT (ct
),
18862 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
18865 if (!rtx_equal_p (tmp
, out
))
18866 emit_move_insn (copy_rtx (out
), copy_rtx (tmp
));
18873 enum machine_mode cmp_mode
= GET_MODE (op0
);
18876 tmp
= ct
, ct
= cf
, cf
= tmp
;
18879 if (SCALAR_FLOAT_MODE_P (cmp_mode
))
18881 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode
));
18883 /* We may be reversing unordered compare to normal compare, that
18884 is not valid in general (we may convert non-trapping condition
18885 to trapping one), however on i386 we currently emit all
18886 comparisons unordered. */
18887 compare_code
= reverse_condition_maybe_unordered (compare_code
);
18888 code
= reverse_condition_maybe_unordered (code
);
18892 compare_code
= reverse_condition (compare_code
);
18893 code
= reverse_condition (code
);
18897 compare_code
= UNKNOWN
;
18898 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
18899 && CONST_INT_P (op1
))
18901 if (op1
== const0_rtx
18902 && (code
== LT
|| code
== GE
))
18903 compare_code
= code
;
18904 else if (op1
== constm1_rtx
)
18908 else if (code
== GT
)
18913 /* Optimize dest = (op0 < 0) ? -1 : cf. */
18914 if (compare_code
!= UNKNOWN
18915 && GET_MODE (op0
) == GET_MODE (out
)
18916 && (cf
== -1 || ct
== -1))
18918 /* If lea code below could be used, only optimize
18919 if it results in a 2 insn sequence. */
18921 if (! (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
18922 || diff
== 3 || diff
== 5 || diff
== 9)
18923 || (compare_code
== LT
&& ct
== -1)
18924 || (compare_code
== GE
&& cf
== -1))
18927 * notl op1 (if necessary)
18935 code
= reverse_condition (code
);
18938 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, -1);
18940 out
= expand_simple_binop (mode
, IOR
,
18942 out
, 1, OPTAB_DIRECT
);
18943 if (out
!= operands
[0])
18944 emit_move_insn (operands
[0], out
);
18951 if ((diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
18952 || diff
== 3 || diff
== 5 || diff
== 9)
18953 && ((mode
!= QImode
&& mode
!= HImode
) || !TARGET_PARTIAL_REG_STALL
)
18955 || x86_64_immediate_operand (GEN_INT (cf
), VOIDmode
)))
18961 * lea cf(dest*(ct-cf)),dest
18965 * This also catches the degenerate setcc-only case.
18971 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, 1);
18974 /* On x86_64 the lea instruction operates on Pmode, so we need
18975 to get arithmetics done in proper mode to match. */
18977 tmp
= copy_rtx (out
);
18981 out1
= copy_rtx (out
);
18982 tmp
= gen_rtx_MULT (mode
, out1
, GEN_INT (diff
& ~1));
18986 tmp
= gen_rtx_PLUS (mode
, tmp
, out1
);
18992 tmp
= gen_rtx_PLUS (mode
, tmp
, GEN_INT (cf
));
18995 if (!rtx_equal_p (tmp
, out
))
18998 out
= force_operand (tmp
, copy_rtx (out
));
19000 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (out
), copy_rtx (tmp
)));
19002 if (!rtx_equal_p (out
, operands
[0]))
19003 emit_move_insn (operands
[0], copy_rtx (out
));
19009 * General case: Jumpful:
19010 * xorl dest,dest cmpl op1, op2
19011 * cmpl op1, op2 movl ct, dest
19012 * setcc dest jcc 1f
19013 * decl dest movl cf, dest
19014 * andl (cf-ct),dest 1:
19017 * Size 20. Size 14.
19019 * This is reasonably steep, but branch mispredict costs are
19020 * high on modern cpus, so consider failing only if optimizing
19024 if ((!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
19025 && BRANCH_COST (optimize_insn_for_speed_p (),
19030 enum machine_mode cmp_mode
= GET_MODE (op0
);
19035 if (SCALAR_FLOAT_MODE_P (cmp_mode
))
19037 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode
));
19039 /* We may be reversing unordered compare to normal compare,
19040 that is not valid in general (we may convert non-trapping
19041 condition to trapping one), however on i386 we currently
19042 emit all comparisons unordered. */
19043 code
= reverse_condition_maybe_unordered (code
);
19047 code
= reverse_condition (code
);
19048 if (compare_code
!= UNKNOWN
)
19049 compare_code
= reverse_condition (compare_code
);
19053 if (compare_code
!= UNKNOWN
)
19055 /* notl op1 (if needed)
19060 For x < 0 (resp. x <= -1) there will be no notl,
19061 so if possible swap the constants to get rid of the
19063 True/false will be -1/0 while code below (store flag
19064 followed by decrement) is 0/-1, so the constants need
19065 to be exchanged once more. */
19067 if (compare_code
== GE
|| !cf
)
19069 code
= reverse_condition (code
);
19074 HOST_WIDE_INT tmp
= cf
;
19079 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, -1);
19083 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, 1);
19085 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
),
19087 copy_rtx (out
), 1, OPTAB_DIRECT
);
19090 out
= expand_simple_binop (mode
, AND
, copy_rtx (out
),
19091 gen_int_mode (cf
- ct
, mode
),
19092 copy_rtx (out
), 1, OPTAB_DIRECT
);
19094 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), GEN_INT (ct
),
19095 copy_rtx (out
), 1, OPTAB_DIRECT
);
19096 if (!rtx_equal_p (out
, operands
[0]))
19097 emit_move_insn (operands
[0], copy_rtx (out
));
19103 if (!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
19105 /* Try a few things more with specific constants and a variable. */
19108 rtx var
, orig_out
, out
, tmp
;
19110 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
19113 /* If one of the two operands is an interesting constant, load a
19114 constant with the above and mask it in with a logical operation. */
19116 if (CONST_INT_P (operands
[2]))
19119 if (INTVAL (operands
[2]) == 0 && operands
[3] != constm1_rtx
)
19120 operands
[3] = constm1_rtx
, op
= and_optab
;
19121 else if (INTVAL (operands
[2]) == -1 && operands
[3] != const0_rtx
)
19122 operands
[3] = const0_rtx
, op
= ior_optab
;
19126 else if (CONST_INT_P (operands
[3]))
19129 if (INTVAL (operands
[3]) == 0 && operands
[2] != constm1_rtx
)
19130 operands
[2] = constm1_rtx
, op
= and_optab
;
19131 else if (INTVAL (operands
[3]) == -1 && operands
[3] != const0_rtx
)
19132 operands
[2] = const0_rtx
, op
= ior_optab
;
19139 orig_out
= operands
[0];
19140 tmp
= gen_reg_rtx (mode
);
19143 /* Recurse to get the constant loaded. */
19144 if (ix86_expand_int_movcc (operands
) == 0)
19147 /* Mask in the interesting variable. */
19148 out
= expand_binop (mode
, op
, var
, tmp
, orig_out
, 0,
19150 if (!rtx_equal_p (out
, orig_out
))
19151 emit_move_insn (copy_rtx (orig_out
), copy_rtx (out
));
19157 * For comparison with above,
19167 if (! nonimmediate_operand (operands
[2], mode
))
19168 operands
[2] = force_reg (mode
, operands
[2]);
19169 if (! nonimmediate_operand (operands
[3], mode
))
19170 operands
[3] = force_reg (mode
, operands
[3]);
19172 if (! register_operand (operands
[2], VOIDmode
)
19174 || ! register_operand (operands
[3], VOIDmode
)))
19175 operands
[2] = force_reg (mode
, operands
[2]);
19178 && ! register_operand (operands
[3], VOIDmode
))
19179 operands
[3] = force_reg (mode
, operands
[3]);
19181 emit_insn (compare_seq
);
19182 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
19183 gen_rtx_IF_THEN_ELSE (mode
,
19184 compare_op
, operands
[2],
19189 /* Swap, force into registers, or otherwise massage the two operands
19190 to an sse comparison with a mask result. Thus we differ a bit from
19191 ix86_prepare_fp_compare_args which expects to produce a flags result.
19193 The DEST operand exists to help determine whether to commute commutative
19194 operators. The POP0/POP1 operands are updated in place. The new
19195 comparison code is returned, or UNKNOWN if not implementable. */
19197 static enum rtx_code
19198 ix86_prepare_sse_fp_compare_args (rtx dest
, enum rtx_code code
,
19199 rtx
*pop0
, rtx
*pop1
)
19207 /* AVX supports all the needed comparisons. */
19210 /* We have no LTGT as an operator. We could implement it with
19211 NE & ORDERED, but this requires an extra temporary. It's
19212 not clear that it's worth it. */
19219 /* These are supported directly. */
19226 /* AVX has 3 operand comparisons, no need to swap anything. */
19229 /* For commutative operators, try to canonicalize the destination
19230 operand to be first in the comparison - this helps reload to
19231 avoid extra moves. */
19232 if (!dest
|| !rtx_equal_p (dest
, *pop1
))
19240 /* These are not supported directly before AVX, and furthermore
19241 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
19242 comparison operands to transform into something that is
19247 code
= swap_condition (code
);
19251 gcc_unreachable ();
19257 /* Detect conditional moves that exactly match min/max operational
19258 semantics. Note that this is IEEE safe, as long as we don't
19259 interchange the operands.
19261 Returns FALSE if this conditional move doesn't match a MIN/MAX,
19262 and TRUE if the operation is successful and instructions are emitted. */
19265 ix86_expand_sse_fp_minmax (rtx dest
, enum rtx_code code
, rtx cmp_op0
,
19266 rtx cmp_op1
, rtx if_true
, rtx if_false
)
19268 enum machine_mode mode
;
19274 else if (code
== UNGE
)
19277 if_true
= if_false
;
19283 if (rtx_equal_p (cmp_op0
, if_true
) && rtx_equal_p (cmp_op1
, if_false
))
19285 else if (rtx_equal_p (cmp_op1
, if_true
) && rtx_equal_p (cmp_op0
, if_false
))
19290 mode
= GET_MODE (dest
);
19292 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
19293 but MODE may be a vector mode and thus not appropriate. */
19294 if (!flag_finite_math_only
|| !flag_unsafe_math_optimizations
)
19296 int u
= is_min
? UNSPEC_IEEE_MIN
: UNSPEC_IEEE_MAX
;
19299 if_true
= force_reg (mode
, if_true
);
19300 v
= gen_rtvec (2, if_true
, if_false
);
19301 tmp
= gen_rtx_UNSPEC (mode
, v
, u
);
19305 code
= is_min
? SMIN
: SMAX
;
19306 tmp
= gen_rtx_fmt_ee (code
, mode
, if_true
, if_false
);
19309 emit_insn (gen_rtx_SET (VOIDmode
, dest
, tmp
));
19313 /* Expand an sse vector comparison. Return the register with the result. */
19316 ix86_expand_sse_cmp (rtx dest
, enum rtx_code code
, rtx cmp_op0
, rtx cmp_op1
,
19317 rtx op_true
, rtx op_false
)
19319 enum machine_mode mode
= GET_MODE (dest
);
19320 enum machine_mode cmp_mode
= GET_MODE (cmp_op0
);
19323 cmp_op0
= force_reg (cmp_mode
, cmp_op0
);
19324 if (!nonimmediate_operand (cmp_op1
, cmp_mode
))
19325 cmp_op1
= force_reg (cmp_mode
, cmp_op1
);
19328 || reg_overlap_mentioned_p (dest
, op_true
)
19329 || reg_overlap_mentioned_p (dest
, op_false
))
19330 dest
= gen_reg_rtx (mode
);
19332 x
= gen_rtx_fmt_ee (code
, cmp_mode
, cmp_op0
, cmp_op1
);
19333 if (cmp_mode
!= mode
)
19335 x
= force_reg (cmp_mode
, x
);
19336 convert_move (dest
, x
, false);
19339 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19344 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
19345 operations. This is used for both scalar and vector conditional moves. */
19348 ix86_expand_sse_movcc (rtx dest
, rtx cmp
, rtx op_true
, rtx op_false
)
19350 enum machine_mode mode
= GET_MODE (dest
);
19353 if (vector_all_ones_operand (op_true
, mode
)
19354 && rtx_equal_p (op_false
, CONST0_RTX (mode
)))
19356 emit_insn (gen_rtx_SET (VOIDmode
, dest
, cmp
));
19358 else if (op_false
== CONST0_RTX (mode
))
19360 op_true
= force_reg (mode
, op_true
);
19361 x
= gen_rtx_AND (mode
, cmp
, op_true
);
19362 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19364 else if (op_true
== CONST0_RTX (mode
))
19366 op_false
= force_reg (mode
, op_false
);
19367 x
= gen_rtx_NOT (mode
, cmp
);
19368 x
= gen_rtx_AND (mode
, x
, op_false
);
19369 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19371 else if (INTEGRAL_MODE_P (mode
) && op_true
== CONSTM1_RTX (mode
))
19373 op_false
= force_reg (mode
, op_false
);
19374 x
= gen_rtx_IOR (mode
, cmp
, op_false
);
19375 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19377 else if (TARGET_XOP
)
19379 op_true
= force_reg (mode
, op_true
);
19381 if (!nonimmediate_operand (op_false
, mode
))
19382 op_false
= force_reg (mode
, op_false
);
19384 emit_insn (gen_rtx_SET (mode
, dest
,
19385 gen_rtx_IF_THEN_ELSE (mode
, cmp
,
19391 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
) = NULL
;
19393 if (!nonimmediate_operand (op_true
, mode
))
19394 op_true
= force_reg (mode
, op_true
);
19396 op_false
= force_reg (mode
, op_false
);
19402 gen
= gen_sse4_1_blendvps
;
19406 gen
= gen_sse4_1_blendvpd
;
19414 gen
= gen_sse4_1_pblendvb
;
19415 dest
= gen_lowpart (V16QImode
, dest
);
19416 op_false
= gen_lowpart (V16QImode
, op_false
);
19417 op_true
= gen_lowpart (V16QImode
, op_true
);
19418 cmp
= gen_lowpart (V16QImode
, cmp
);
19423 gen
= gen_avx_blendvps256
;
19427 gen
= gen_avx_blendvpd256
;
19435 gen
= gen_avx2_pblendvb
;
19436 dest
= gen_lowpart (V32QImode
, dest
);
19437 op_false
= gen_lowpart (V32QImode
, op_false
);
19438 op_true
= gen_lowpart (V32QImode
, op_true
);
19439 cmp
= gen_lowpart (V32QImode
, cmp
);
19447 emit_insn (gen (dest
, op_false
, op_true
, cmp
));
19450 op_true
= force_reg (mode
, op_true
);
19452 t2
= gen_reg_rtx (mode
);
19454 t3
= gen_reg_rtx (mode
);
19458 x
= gen_rtx_AND (mode
, op_true
, cmp
);
19459 emit_insn (gen_rtx_SET (VOIDmode
, t2
, x
));
19461 x
= gen_rtx_NOT (mode
, cmp
);
19462 x
= gen_rtx_AND (mode
, x
, op_false
);
19463 emit_insn (gen_rtx_SET (VOIDmode
, t3
, x
));
19465 x
= gen_rtx_IOR (mode
, t3
, t2
);
19466 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19471 /* Expand a floating-point conditional move. Return true if successful. */
19474 ix86_expand_fp_movcc (rtx operands
[])
19476 enum machine_mode mode
= GET_MODE (operands
[0]);
19477 enum rtx_code code
= GET_CODE (operands
[1]);
19478 rtx tmp
, compare_op
;
19479 rtx op0
= XEXP (operands
[1], 0);
19480 rtx op1
= XEXP (operands
[1], 1);
19482 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
19484 enum machine_mode cmode
;
19486 /* Since we've no cmove for sse registers, don't force bad register
19487 allocation just to gain access to it. Deny movcc when the
19488 comparison mode doesn't match the move mode. */
19489 cmode
= GET_MODE (op0
);
19490 if (cmode
== VOIDmode
)
19491 cmode
= GET_MODE (op1
);
19495 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
, &op0
, &op1
);
19496 if (code
== UNKNOWN
)
19499 if (ix86_expand_sse_fp_minmax (operands
[0], code
, op0
, op1
,
19500 operands
[2], operands
[3]))
19503 tmp
= ix86_expand_sse_cmp (operands
[0], code
, op0
, op1
,
19504 operands
[2], operands
[3]);
19505 ix86_expand_sse_movcc (operands
[0], tmp
, operands
[2], operands
[3]);
19509 /* The floating point conditional move instructions don't directly
19510 support conditions resulting from a signed integer comparison. */
19512 compare_op
= ix86_expand_compare (code
, op0
, op1
);
19513 if (!fcmov_comparison_operator (compare_op
, VOIDmode
))
19515 tmp
= gen_reg_rtx (QImode
);
19516 ix86_expand_setcc (tmp
, code
, op0
, op1
);
19518 compare_op
= ix86_expand_compare (NE
, tmp
, const0_rtx
);
19521 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
19522 gen_rtx_IF_THEN_ELSE (mode
, compare_op
,
19523 operands
[2], operands
[3])));
19528 /* Expand a floating-point vector conditional move; a vcond operation
19529 rather than a movcc operation. */
19532 ix86_expand_fp_vcond (rtx operands
[])
19534 enum rtx_code code
= GET_CODE (operands
[3]);
19537 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
19538 &operands
[4], &operands
[5]);
19539 if (code
== UNKNOWN
)
19542 switch (GET_CODE (operands
[3]))
19545 temp
= ix86_expand_sse_cmp (operands
[0], ORDERED
, operands
[4],
19546 operands
[5], operands
[0], operands
[0]);
19547 cmp
= ix86_expand_sse_cmp (operands
[0], NE
, operands
[4],
19548 operands
[5], operands
[1], operands
[2]);
19552 temp
= ix86_expand_sse_cmp (operands
[0], UNORDERED
, operands
[4],
19553 operands
[5], operands
[0], operands
[0]);
19554 cmp
= ix86_expand_sse_cmp (operands
[0], EQ
, operands
[4],
19555 operands
[5], operands
[1], operands
[2]);
19559 gcc_unreachable ();
19561 cmp
= expand_simple_binop (GET_MODE (cmp
), code
, temp
, cmp
, cmp
, 1,
19563 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
19567 if (ix86_expand_sse_fp_minmax (operands
[0], code
, operands
[4],
19568 operands
[5], operands
[1], operands
[2]))
19571 cmp
= ix86_expand_sse_cmp (operands
[0], code
, operands
[4], operands
[5],
19572 operands
[1], operands
[2]);
19573 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
19577 /* Expand a signed/unsigned integral vector conditional move. */
19580 ix86_expand_int_vcond (rtx operands
[])
19582 enum machine_mode data_mode
= GET_MODE (operands
[0]);
19583 enum machine_mode mode
= GET_MODE (operands
[4]);
19584 enum rtx_code code
= GET_CODE (operands
[3]);
19585 bool negate
= false;
19588 cop0
= operands
[4];
19589 cop1
= operands
[5];
19591 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
19592 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
19593 if ((code
== LT
|| code
== GE
)
19594 && data_mode
== mode
19595 && cop1
== CONST0_RTX (mode
)
19596 && operands
[1 + (code
== LT
)] == CONST0_RTX (data_mode
)
19597 && GET_MODE_SIZE (GET_MODE_INNER (data_mode
)) > 1
19598 && GET_MODE_SIZE (GET_MODE_INNER (data_mode
)) <= 8
19599 && (GET_MODE_SIZE (data_mode
) == 16
19600 || (TARGET_AVX2
&& GET_MODE_SIZE (data_mode
) == 32)))
19602 rtx negop
= operands
[2 - (code
== LT
)];
19603 int shift
= GET_MODE_BITSIZE (GET_MODE_INNER (data_mode
)) - 1;
19604 if (negop
== CONST1_RTX (data_mode
))
19606 rtx res
= expand_simple_binop (mode
, LSHIFTRT
, cop0
, GEN_INT (shift
),
19607 operands
[0], 1, OPTAB_DIRECT
);
19608 if (res
!= operands
[0])
19609 emit_move_insn (operands
[0], res
);
19612 else if (GET_MODE_INNER (data_mode
) != DImode
19613 && vector_all_ones_operand (negop
, data_mode
))
19615 rtx res
= expand_simple_binop (mode
, ASHIFTRT
, cop0
, GEN_INT (shift
),
19616 operands
[0], 0, OPTAB_DIRECT
);
19617 if (res
!= operands
[0])
19618 emit_move_insn (operands
[0], res
);
19623 if (!nonimmediate_operand (cop1
, mode
))
19624 cop1
= force_reg (mode
, cop1
);
19625 if (!general_operand (operands
[1], data_mode
))
19626 operands
[1] = force_reg (data_mode
, operands
[1]);
19627 if (!general_operand (operands
[2], data_mode
))
19628 operands
[2] = force_reg (data_mode
, operands
[2]);
19630 /* XOP supports all of the comparisons on all 128-bit vector int types. */
19632 && (mode
== V16QImode
|| mode
== V8HImode
19633 || mode
== V4SImode
|| mode
== V2DImode
))
19637 /* Canonicalize the comparison to EQ, GT, GTU. */
19648 code
= reverse_condition (code
);
19654 code
= reverse_condition (code
);
19660 code
= swap_condition (code
);
19661 x
= cop0
, cop0
= cop1
, cop1
= x
;
19665 gcc_unreachable ();
19668 /* Only SSE4.1/SSE4.2 supports V2DImode. */
19669 if (mode
== V2DImode
)
19674 /* SSE4.1 supports EQ. */
19675 if (!TARGET_SSE4_1
)
19681 /* SSE4.2 supports GT/GTU. */
19682 if (!TARGET_SSE4_2
)
19687 gcc_unreachable ();
19691 /* Unsigned parallel compare is not supported by the hardware.
19692 Play some tricks to turn this into a signed comparison
19696 cop0
= force_reg (mode
, cop0
);
19706 rtx (*gen_sub3
) (rtx
, rtx
, rtx
);
19710 case V8SImode
: gen_sub3
= gen_subv8si3
; break;
19711 case V4DImode
: gen_sub3
= gen_subv4di3
; break;
19712 case V4SImode
: gen_sub3
= gen_subv4si3
; break;
19713 case V2DImode
: gen_sub3
= gen_subv2di3
; break;
19715 gcc_unreachable ();
19717 /* Subtract (-(INT MAX) - 1) from both operands to make
19719 mask
= ix86_build_signbit_mask (mode
, true, false);
19720 t1
= gen_reg_rtx (mode
);
19721 emit_insn (gen_sub3 (t1
, cop0
, mask
));
19723 t2
= gen_reg_rtx (mode
);
19724 emit_insn (gen_sub3 (t2
, cop1
, mask
));
19736 /* Perform a parallel unsigned saturating subtraction. */
19737 x
= gen_reg_rtx (mode
);
19738 emit_insn (gen_rtx_SET (VOIDmode
, x
,
19739 gen_rtx_US_MINUS (mode
, cop0
, cop1
)));
19742 cop1
= CONST0_RTX (mode
);
19748 gcc_unreachable ();
19753 /* Allow the comparison to be done in one mode, but the movcc to
19754 happen in another mode. */
19755 if (data_mode
== mode
)
19757 x
= ix86_expand_sse_cmp (operands
[0], code
, cop0
, cop1
,
19758 operands
[1+negate
], operands
[2-negate
]);
19762 gcc_assert (GET_MODE_SIZE (data_mode
) == GET_MODE_SIZE (mode
));
19763 x
= ix86_expand_sse_cmp (gen_lowpart (mode
, operands
[0]),
19765 operands
[1+negate
], operands
[2-negate
]);
19766 x
= gen_lowpart (data_mode
, x
);
19769 ix86_expand_sse_movcc (operands
[0], x
, operands
[1+negate
],
19770 operands
[2-negate
]);
19774 /* Expand a variable vector permutation. */
19777 ix86_expand_vec_perm (rtx operands
[])
19779 rtx target
= operands
[0];
19780 rtx op0
= operands
[1];
19781 rtx op1
= operands
[2];
19782 rtx mask
= operands
[3];
19783 rtx t1
, t2
, t3
, t4
, vt
, vt2
, vec
[32];
19784 enum machine_mode mode
= GET_MODE (op0
);
19785 enum machine_mode maskmode
= GET_MODE (mask
);
19787 bool one_operand_shuffle
= rtx_equal_p (op0
, op1
);
19789 /* Number of elements in the vector. */
19790 w
= GET_MODE_NUNITS (mode
);
19791 e
= GET_MODE_UNIT_SIZE (mode
);
19792 gcc_assert (w
<= 32);
19796 if (mode
== V4DImode
|| mode
== V4DFmode
|| mode
== V16HImode
)
19798 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
19799 an constant shuffle operand. With a tiny bit of effort we can
19800 use VPERMD instead. A re-interpretation stall for V4DFmode is
19801 unfortunate but there's no avoiding it.
19802 Similarly for V16HImode we don't have instructions for variable
19803 shuffling, while for V32QImode we can use after preparing suitable
19804 masks vpshufb; vpshufb; vpermq; vpor. */
19806 if (mode
== V16HImode
)
19808 maskmode
= mode
= V32QImode
;
19814 maskmode
= mode
= V8SImode
;
19818 t1
= gen_reg_rtx (maskmode
);
19820 /* Replicate the low bits of the V4DImode mask into V8SImode:
19822 t1 = { A A B B C C D D }. */
19823 for (i
= 0; i
< w
/ 2; ++i
)
19824 vec
[i
*2 + 1] = vec
[i
*2] = GEN_INT (i
* 2);
19825 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
19826 vt
= force_reg (maskmode
, vt
);
19827 mask
= gen_lowpart (maskmode
, mask
);
19828 if (maskmode
== V8SImode
)
19829 emit_insn (gen_avx2_permvarv8si (t1
, vt
, mask
));
19831 emit_insn (gen_avx2_pshufbv32qi3 (t1
, mask
, vt
));
19833 /* Multiply the shuffle indicies by two. */
19834 t1
= expand_simple_binop (maskmode
, PLUS
, t1
, t1
, t1
, 1,
19837 /* Add one to the odd shuffle indicies:
19838 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
19839 for (i
= 0; i
< w
/ 2; ++i
)
19841 vec
[i
* 2] = const0_rtx
;
19842 vec
[i
* 2 + 1] = const1_rtx
;
19844 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
19845 vt
= force_const_mem (maskmode
, vt
);
19846 t1
= expand_simple_binop (maskmode
, PLUS
, t1
, vt
, t1
, 1,
19849 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
19850 operands
[3] = mask
= t1
;
19851 target
= gen_lowpart (mode
, target
);
19852 op0
= gen_lowpart (mode
, op0
);
19853 op1
= gen_lowpart (mode
, op1
);
19859 /* The VPERMD and VPERMPS instructions already properly ignore
19860 the high bits of the shuffle elements. No need for us to
19861 perform an AND ourselves. */
19862 if (one_operand_shuffle
)
19863 emit_insn (gen_avx2_permvarv8si (target
, mask
, op0
));
19866 t1
= gen_reg_rtx (V8SImode
);
19867 t2
= gen_reg_rtx (V8SImode
);
19868 emit_insn (gen_avx2_permvarv8si (t1
, mask
, op0
));
19869 emit_insn (gen_avx2_permvarv8si (t2
, mask
, op1
));
19875 mask
= gen_lowpart (V8SFmode
, mask
);
19876 if (one_operand_shuffle
)
19877 emit_insn (gen_avx2_permvarv8sf (target
, mask
, op0
));
19880 t1
= gen_reg_rtx (V8SFmode
);
19881 t2
= gen_reg_rtx (V8SFmode
);
19882 emit_insn (gen_avx2_permvarv8sf (t1
, mask
, op0
));
19883 emit_insn (gen_avx2_permvarv8sf (t2
, mask
, op1
));
19889 /* By combining the two 128-bit input vectors into one 256-bit
19890 input vector, we can use VPERMD and VPERMPS for the full
19891 two-operand shuffle. */
19892 t1
= gen_reg_rtx (V8SImode
);
19893 t2
= gen_reg_rtx (V8SImode
);
19894 emit_insn (gen_avx_vec_concatv8si (t1
, op0
, op1
));
19895 emit_insn (gen_avx_vec_concatv8si (t2
, mask
, mask
));
19896 emit_insn (gen_avx2_permvarv8si (t1
, t2
, t1
));
19897 emit_insn (gen_avx_vextractf128v8si (target
, t1
, const0_rtx
));
19901 t1
= gen_reg_rtx (V8SFmode
);
19902 t2
= gen_reg_rtx (V8SFmode
);
19903 mask
= gen_lowpart (V4SFmode
, mask
);
19904 emit_insn (gen_avx_vec_concatv8sf (t1
, op0
, op1
));
19905 emit_insn (gen_avx_vec_concatv8sf (t2
, mask
, mask
));
19906 emit_insn (gen_avx2_permvarv8sf (t1
, t2
, t1
));
19907 emit_insn (gen_avx_vextractf128v8sf (target
, t1
, const0_rtx
));
19911 t1
= gen_reg_rtx (V32QImode
);
19912 t2
= gen_reg_rtx (V32QImode
);
19913 t3
= gen_reg_rtx (V32QImode
);
19914 vt2
= GEN_INT (128);
19915 for (i
= 0; i
< 32; i
++)
19917 vt
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, vec
));
19918 vt
= force_reg (V32QImode
, vt
);
19919 for (i
= 0; i
< 32; i
++)
19920 vec
[i
] = i
< 16 ? vt2
: const0_rtx
;
19921 vt2
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, vec
));
19922 vt2
= force_reg (V32QImode
, vt2
);
19923 /* From mask create two adjusted masks, which contain the same
19924 bits as mask in the low 7 bits of each vector element.
19925 The first mask will have the most significant bit clear
19926 if it requests element from the same 128-bit lane
19927 and MSB set if it requests element from the other 128-bit lane.
19928 The second mask will have the opposite values of the MSB,
19929 and additionally will have its 128-bit lanes swapped.
19930 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
19931 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
19932 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
19933 stands for other 12 bytes. */
19934 /* The bit whether element is from the same lane or the other
19935 lane is bit 4, so shift it up by 3 to the MSB position. */
19936 emit_insn (gen_ashlv4di3 (gen_lowpart (V4DImode
, t1
),
19937 gen_lowpart (V4DImode
, mask
),
19939 /* Clear MSB bits from the mask just in case it had them set. */
19940 emit_insn (gen_avx2_andnotv32qi3 (t2
, vt
, mask
));
19941 /* After this t1 will have MSB set for elements from other lane. */
19942 emit_insn (gen_xorv32qi3 (t1
, t1
, vt2
));
19943 /* Clear bits other than MSB. */
19944 emit_insn (gen_andv32qi3 (t1
, t1
, vt
));
19945 /* Or in the lower bits from mask into t3. */
19946 emit_insn (gen_iorv32qi3 (t3
, t1
, t2
));
19947 /* And invert MSB bits in t1, so MSB is set for elements from the same
19949 emit_insn (gen_xorv32qi3 (t1
, t1
, vt
));
19950 /* Swap 128-bit lanes in t3. */
19951 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t3
),
19952 gen_lowpart (V4DImode
, t3
),
19953 const2_rtx
, GEN_INT (3),
19954 const0_rtx
, const1_rtx
));
19955 /* And or in the lower bits from mask into t1. */
19956 emit_insn (gen_iorv32qi3 (t1
, t1
, t2
));
19957 if (one_operand_shuffle
)
19959 /* Each of these shuffles will put 0s in places where
19960 element from the other 128-bit lane is needed, otherwise
19961 will shuffle in the requested value. */
19962 emit_insn (gen_avx2_pshufbv32qi3 (t3
, op0
, t3
));
19963 emit_insn (gen_avx2_pshufbv32qi3 (t1
, op0
, t1
));
19964 /* For t3 the 128-bit lanes are swapped again. */
19965 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t3
),
19966 gen_lowpart (V4DImode
, t3
),
19967 const2_rtx
, GEN_INT (3),
19968 const0_rtx
, const1_rtx
));
19969 /* And oring both together leads to the result. */
19970 emit_insn (gen_iorv32qi3 (target
, t1
, t3
));
19974 t4
= gen_reg_rtx (V32QImode
);
19975 /* Similarly to the above one_operand_shuffle code,
19976 just for repeated twice for each operand. merge_two:
19977 code will merge the two results together. */
19978 emit_insn (gen_avx2_pshufbv32qi3 (t4
, op0
, t3
));
19979 emit_insn (gen_avx2_pshufbv32qi3 (t3
, op1
, t3
));
19980 emit_insn (gen_avx2_pshufbv32qi3 (t2
, op0
, t1
));
19981 emit_insn (gen_avx2_pshufbv32qi3 (t1
, op1
, t1
));
19982 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t4
),
19983 gen_lowpart (V4DImode
, t4
),
19984 const2_rtx
, GEN_INT (3),
19985 const0_rtx
, const1_rtx
));
19986 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t3
),
19987 gen_lowpart (V4DImode
, t3
),
19988 const2_rtx
, GEN_INT (3),
19989 const0_rtx
, const1_rtx
));
19990 emit_insn (gen_iorv32qi3 (t4
, t2
, t4
));
19991 emit_insn (gen_iorv32qi3 (t3
, t1
, t3
));
19997 gcc_assert (GET_MODE_SIZE (mode
) <= 16);
20004 /* The XOP VPPERM insn supports three inputs. By ignoring the
20005 one_operand_shuffle special case, we avoid creating another
20006 set of constant vectors in memory. */
20007 one_operand_shuffle
= false;
20009 /* mask = mask & {2*w-1, ...} */
20010 vt
= GEN_INT (2*w
- 1);
20014 /* mask = mask & {w-1, ...} */
20015 vt
= GEN_INT (w
- 1);
20018 for (i
= 0; i
< w
; i
++)
20020 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
20021 mask
= expand_simple_binop (maskmode
, AND
, mask
, vt
,
20022 NULL_RTX
, 0, OPTAB_DIRECT
);
20024 /* For non-QImode operations, convert the word permutation control
20025 into a byte permutation control. */
20026 if (mode
!= V16QImode
)
20028 mask
= expand_simple_binop (maskmode
, ASHIFT
, mask
,
20029 GEN_INT (exact_log2 (e
)),
20030 NULL_RTX
, 0, OPTAB_DIRECT
);
20032 /* Convert mask to vector of chars. */
20033 mask
= force_reg (V16QImode
, gen_lowpart (V16QImode
, mask
));
20035 /* Replicate each of the input bytes into byte positions:
20036 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
20037 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
20038 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
20039 for (i
= 0; i
< 16; ++i
)
20040 vec
[i
] = GEN_INT (i
/e
* e
);
20041 vt
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, vec
));
20042 vt
= force_const_mem (V16QImode
, vt
);
20044 emit_insn (gen_xop_pperm (mask
, mask
, mask
, vt
));
20046 emit_insn (gen_ssse3_pshufbv16qi3 (mask
, mask
, vt
));
20048 /* Convert it into the byte positions by doing
20049 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
20050 for (i
= 0; i
< 16; ++i
)
20051 vec
[i
] = GEN_INT (i
% e
);
20052 vt
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, vec
));
20053 vt
= force_const_mem (V16QImode
, vt
);
20054 emit_insn (gen_addv16qi3 (mask
, mask
, vt
));
20057 /* The actual shuffle operations all operate on V16QImode. */
20058 op0
= gen_lowpart (V16QImode
, op0
);
20059 op1
= gen_lowpart (V16QImode
, op1
);
20060 target
= gen_lowpart (V16QImode
, target
);
20064 emit_insn (gen_xop_pperm (target
, op0
, op1
, mask
));
20066 else if (one_operand_shuffle
)
20068 emit_insn (gen_ssse3_pshufbv16qi3 (target
, op0
, mask
));
20075 /* Shuffle the two input vectors independently. */
20076 t1
= gen_reg_rtx (V16QImode
);
20077 t2
= gen_reg_rtx (V16QImode
);
20078 emit_insn (gen_ssse3_pshufbv16qi3 (t1
, op0
, mask
));
20079 emit_insn (gen_ssse3_pshufbv16qi3 (t2
, op1
, mask
));
20082 /* Then merge them together. The key is whether any given control
20083 element contained a bit set that indicates the second word. */
20084 mask
= operands
[3];
20086 if (maskmode
== V2DImode
&& !TARGET_SSE4_1
)
20088 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
20089 more shuffle to convert the V2DI input mask into a V4SI
20090 input mask. At which point the masking that expand_int_vcond
20091 will work as desired. */
20092 rtx t3
= gen_reg_rtx (V4SImode
);
20093 emit_insn (gen_sse2_pshufd_1 (t3
, gen_lowpart (V4SImode
, mask
),
20094 const0_rtx
, const0_rtx
,
20095 const2_rtx
, const2_rtx
));
20097 maskmode
= V4SImode
;
20101 for (i
= 0; i
< w
; i
++)
20103 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
20104 vt
= force_reg (maskmode
, vt
);
20105 mask
= expand_simple_binop (maskmode
, AND
, mask
, vt
,
20106 NULL_RTX
, 0, OPTAB_DIRECT
);
20108 xops
[0] = gen_lowpart (mode
, operands
[0]);
20109 xops
[1] = gen_lowpart (mode
, t2
);
20110 xops
[2] = gen_lowpart (mode
, t1
);
20111 xops
[3] = gen_rtx_EQ (maskmode
, mask
, vt
);
20114 ok
= ix86_expand_int_vcond (xops
);
20119 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
20120 true if we should do zero extension, else sign extension. HIGH_P is
20121 true if we want the N/2 high elements, else the low elements. */
20124 ix86_expand_sse_unpack (rtx operands
[2], bool unsigned_p
, bool high_p
)
20126 enum machine_mode imode
= GET_MODE (operands
[1]);
20131 rtx (*unpack
)(rtx
, rtx
);
20132 rtx (*extract
)(rtx
, rtx
) = NULL
;
20133 enum machine_mode halfmode
= BLKmode
;
20139 unpack
= gen_avx2_zero_extendv16qiv16hi2
;
20141 unpack
= gen_avx2_sign_extendv16qiv16hi2
;
20142 halfmode
= V16QImode
;
20144 = high_p
? gen_vec_extract_hi_v32qi
: gen_vec_extract_lo_v32qi
;
20148 unpack
= gen_avx2_zero_extendv8hiv8si2
;
20150 unpack
= gen_avx2_sign_extendv8hiv8si2
;
20151 halfmode
= V8HImode
;
20153 = high_p
? gen_vec_extract_hi_v16hi
: gen_vec_extract_lo_v16hi
;
20157 unpack
= gen_avx2_zero_extendv4siv4di2
;
20159 unpack
= gen_avx2_sign_extendv4siv4di2
;
20160 halfmode
= V4SImode
;
20162 = high_p
? gen_vec_extract_hi_v8si
: gen_vec_extract_lo_v8si
;
20166 unpack
= gen_sse4_1_zero_extendv8qiv8hi2
;
20168 unpack
= gen_sse4_1_sign_extendv8qiv8hi2
;
20172 unpack
= gen_sse4_1_zero_extendv4hiv4si2
;
20174 unpack
= gen_sse4_1_sign_extendv4hiv4si2
;
20178 unpack
= gen_sse4_1_zero_extendv2siv2di2
;
20180 unpack
= gen_sse4_1_sign_extendv2siv2di2
;
20183 gcc_unreachable ();
20186 if (GET_MODE_SIZE (imode
) == 32)
20188 tmp
= gen_reg_rtx (halfmode
);
20189 emit_insn (extract (tmp
, operands
[1]));
20193 /* Shift higher 8 bytes to lower 8 bytes. */
20194 tmp
= gen_reg_rtx (imode
);
20195 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode
, tmp
),
20196 gen_lowpart (V1TImode
, operands
[1]),
20202 emit_insn (unpack (operands
[0], tmp
));
20206 rtx (*unpack
)(rtx
, rtx
, rtx
);
20212 unpack
= gen_vec_interleave_highv16qi
;
20214 unpack
= gen_vec_interleave_lowv16qi
;
20218 unpack
= gen_vec_interleave_highv8hi
;
20220 unpack
= gen_vec_interleave_lowv8hi
;
20224 unpack
= gen_vec_interleave_highv4si
;
20226 unpack
= gen_vec_interleave_lowv4si
;
20229 gcc_unreachable ();
20232 dest
= gen_lowpart (imode
, operands
[0]);
20235 tmp
= force_reg (imode
, CONST0_RTX (imode
));
20237 tmp
= ix86_expand_sse_cmp (gen_reg_rtx (imode
), GT
, CONST0_RTX (imode
),
20238 operands
[1], pc_rtx
, pc_rtx
);
20240 emit_insn (unpack (dest
, operands
[1], tmp
));
20244 /* Expand conditional increment or decrement using adb/sbb instructions.
20245 The default case using setcc followed by the conditional move can be
20246 done by generic code. */
20248 ix86_expand_int_addcc (rtx operands
[])
20250 enum rtx_code code
= GET_CODE (operands
[1]);
20252 rtx (*insn
)(rtx
, rtx
, rtx
, rtx
, rtx
);
20254 rtx val
= const0_rtx
;
20255 bool fpcmp
= false;
20256 enum machine_mode mode
;
20257 rtx op0
= XEXP (operands
[1], 0);
20258 rtx op1
= XEXP (operands
[1], 1);
20260 if (operands
[3] != const1_rtx
20261 && operands
[3] != constm1_rtx
)
20263 if (!ix86_expand_carry_flag_compare (code
, op0
, op1
, &compare_op
))
20265 code
= GET_CODE (compare_op
);
20267 flags
= XEXP (compare_op
, 0);
20269 if (GET_MODE (flags
) == CCFPmode
20270 || GET_MODE (flags
) == CCFPUmode
)
20273 code
= ix86_fp_compare_code_to_integer (code
);
20280 PUT_CODE (compare_op
,
20281 reverse_condition_maybe_unordered
20282 (GET_CODE (compare_op
)));
20284 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
20287 mode
= GET_MODE (operands
[0]);
20289 /* Construct either adc or sbb insn. */
20290 if ((code
== LTU
) == (operands
[3] == constm1_rtx
))
20295 insn
= gen_subqi3_carry
;
20298 insn
= gen_subhi3_carry
;
20301 insn
= gen_subsi3_carry
;
20304 insn
= gen_subdi3_carry
;
20307 gcc_unreachable ();
20315 insn
= gen_addqi3_carry
;
20318 insn
= gen_addhi3_carry
;
20321 insn
= gen_addsi3_carry
;
20324 insn
= gen_adddi3_carry
;
20327 gcc_unreachable ();
20330 emit_insn (insn (operands
[0], operands
[2], val
, flags
, compare_op
));
20336 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
20337 but works for floating pointer parameters and nonoffsetable memories.
20338 For pushes, it returns just stack offsets; the values will be saved
20339 in the right order. Maximally three parts are generated. */
20342 ix86_split_to_parts (rtx operand
, rtx
*parts
, enum machine_mode mode
)
20347 size
= mode
==XFmode
? 3 : GET_MODE_SIZE (mode
) / 4;
20349 size
= (GET_MODE_SIZE (mode
) + 4) / 8;
20351 gcc_assert (!REG_P (operand
) || !MMX_REGNO_P (REGNO (operand
)));
20352 gcc_assert (size
>= 2 && size
<= 4);
20354 /* Optimize constant pool reference to immediates. This is used by fp
20355 moves, that force all constants to memory to allow combining. */
20356 if (MEM_P (operand
) && MEM_READONLY_P (operand
))
20358 rtx tmp
= maybe_get_pool_constant (operand
);
20363 if (MEM_P (operand
) && !offsettable_memref_p (operand
))
20365 /* The only non-offsetable memories we handle are pushes. */
20366 int ok
= push_operand (operand
, VOIDmode
);
20370 operand
= copy_rtx (operand
);
20371 PUT_MODE (operand
, word_mode
);
20372 parts
[0] = parts
[1] = parts
[2] = parts
[3] = operand
;
20376 if (GET_CODE (operand
) == CONST_VECTOR
)
20378 enum machine_mode imode
= int_mode_for_mode (mode
);
20379 /* Caution: if we looked through a constant pool memory above,
20380 the operand may actually have a different mode now. That's
20381 ok, since we want to pun this all the way back to an integer. */
20382 operand
= simplify_subreg (imode
, operand
, GET_MODE (operand
), 0);
20383 gcc_assert (operand
!= NULL
);
20389 if (mode
== DImode
)
20390 split_double_mode (mode
, &operand
, 1, &parts
[0], &parts
[1]);
20395 if (REG_P (operand
))
20397 gcc_assert (reload_completed
);
20398 for (i
= 0; i
< size
; i
++)
20399 parts
[i
] = gen_rtx_REG (SImode
, REGNO (operand
) + i
);
20401 else if (offsettable_memref_p (operand
))
20403 operand
= adjust_address (operand
, SImode
, 0);
20404 parts
[0] = operand
;
20405 for (i
= 1; i
< size
; i
++)
20406 parts
[i
] = adjust_address (operand
, SImode
, 4 * i
);
20408 else if (GET_CODE (operand
) == CONST_DOUBLE
)
20413 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
20417 real_to_target (l
, &r
, mode
);
20418 parts
[3] = gen_int_mode (l
[3], SImode
);
20419 parts
[2] = gen_int_mode (l
[2], SImode
);
20422 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r
, l
);
20423 parts
[2] = gen_int_mode (l
[2], SImode
);
20426 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
20429 gcc_unreachable ();
20431 parts
[1] = gen_int_mode (l
[1], SImode
);
20432 parts
[0] = gen_int_mode (l
[0], SImode
);
20435 gcc_unreachable ();
20440 if (mode
== TImode
)
20441 split_double_mode (mode
, &operand
, 1, &parts
[0], &parts
[1]);
20442 if (mode
== XFmode
|| mode
== TFmode
)
20444 enum machine_mode upper_mode
= mode
==XFmode
? SImode
: DImode
;
20445 if (REG_P (operand
))
20447 gcc_assert (reload_completed
);
20448 parts
[0] = gen_rtx_REG (DImode
, REGNO (operand
) + 0);
20449 parts
[1] = gen_rtx_REG (upper_mode
, REGNO (operand
) + 1);
20451 else if (offsettable_memref_p (operand
))
20453 operand
= adjust_address (operand
, DImode
, 0);
20454 parts
[0] = operand
;
20455 parts
[1] = adjust_address (operand
, upper_mode
, 8);
20457 else if (GET_CODE (operand
) == CONST_DOUBLE
)
20462 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
20463 real_to_target (l
, &r
, mode
);
20465 /* Do not use shift by 32 to avoid warning on 32bit systems. */
20466 if (HOST_BITS_PER_WIDE_INT
>= 64)
20469 ((l
[0] & (((HOST_WIDE_INT
) 2 << 31) - 1))
20470 + ((((HOST_WIDE_INT
) l
[1]) << 31) << 1),
20473 parts
[0] = immed_double_const (l
[0], l
[1], DImode
);
20475 if (upper_mode
== SImode
)
20476 parts
[1] = gen_int_mode (l
[2], SImode
);
20477 else if (HOST_BITS_PER_WIDE_INT
>= 64)
20480 ((l
[2] & (((HOST_WIDE_INT
) 2 << 31) - 1))
20481 + ((((HOST_WIDE_INT
) l
[3]) << 31) << 1),
20484 parts
[1] = immed_double_const (l
[2], l
[3], DImode
);
20487 gcc_unreachable ();
20494 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
20495 Return false when normal moves are needed; true when all required
20496 insns have been emitted. Operands 2-4 contain the input values
20497 int the correct order; operands 5-7 contain the output values. */
20500 ix86_split_long_move (rtx operands
[])
20505 int collisions
= 0;
20506 enum machine_mode mode
= GET_MODE (operands
[0]);
20507 bool collisionparts
[4];
20509 /* The DFmode expanders may ask us to move double.
20510 For 64bit target this is single move. By hiding the fact
20511 here we simplify i386.md splitters. */
20512 if (TARGET_64BIT
&& GET_MODE_SIZE (GET_MODE (operands
[0])) == 8)
20514 /* Optimize constant pool reference to immediates. This is used by
20515 fp moves, that force all constants to memory to allow combining. */
20517 if (MEM_P (operands
[1])
20518 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
20519 && CONSTANT_POOL_ADDRESS_P (XEXP (operands
[1], 0)))
20520 operands
[1] = get_pool_constant (XEXP (operands
[1], 0));
20521 if (push_operand (operands
[0], VOIDmode
))
20523 operands
[0] = copy_rtx (operands
[0]);
20524 PUT_MODE (operands
[0], word_mode
);
20527 operands
[0] = gen_lowpart (DImode
, operands
[0]);
20528 operands
[1] = gen_lowpart (DImode
, operands
[1]);
20529 emit_move_insn (operands
[0], operands
[1]);
20533 /* The only non-offsettable memory we handle is push. */
20534 if (push_operand (operands
[0], VOIDmode
))
20537 gcc_assert (!MEM_P (operands
[0])
20538 || offsettable_memref_p (operands
[0]));
20540 nparts
= ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands
[0]));
20541 ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands
[0]));
20543 /* When emitting push, take care for source operands on the stack. */
20544 if (push
&& MEM_P (operands
[1])
20545 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
20547 rtx src_base
= XEXP (part
[1][nparts
- 1], 0);
20549 /* Compensate for the stack decrement by 4. */
20550 if (!TARGET_64BIT
&& nparts
== 3
20551 && mode
== XFmode
&& TARGET_128BIT_LONG_DOUBLE
)
20552 src_base
= plus_constant (src_base
, 4);
20554 /* src_base refers to the stack pointer and is
20555 automatically decreased by emitted push. */
20556 for (i
= 0; i
< nparts
; i
++)
20557 part
[1][i
] = change_address (part
[1][i
],
20558 GET_MODE (part
[1][i
]), src_base
);
20561 /* We need to do copy in the right order in case an address register
20562 of the source overlaps the destination. */
20563 if (REG_P (part
[0][0]) && MEM_P (part
[1][0]))
20567 for (i
= 0; i
< nparts
; i
++)
20570 = reg_overlap_mentioned_p (part
[0][i
], XEXP (part
[1][0], 0));
20571 if (collisionparts
[i
])
20575 /* Collision in the middle part can be handled by reordering. */
20576 if (collisions
== 1 && nparts
== 3 && collisionparts
[1])
20578 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
20579 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
20581 else if (collisions
== 1
20583 && (collisionparts
[1] || collisionparts
[2]))
20585 if (collisionparts
[1])
20587 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
20588 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
20592 tmp
= part
[0][2]; part
[0][2] = part
[0][3]; part
[0][3] = tmp
;
20593 tmp
= part
[1][2]; part
[1][2] = part
[1][3]; part
[1][3] = tmp
;
20597 /* If there are more collisions, we can't handle it by reordering.
20598 Do an lea to the last part and use only one colliding move. */
20599 else if (collisions
> 1)
20605 base
= part
[0][nparts
- 1];
20607 /* Handle the case when the last part isn't valid for lea.
20608 Happens in 64-bit mode storing the 12-byte XFmode. */
20609 if (GET_MODE (base
) != Pmode
)
20610 base
= gen_rtx_REG (Pmode
, REGNO (base
));
20612 emit_insn (gen_rtx_SET (VOIDmode
, base
, XEXP (part
[1][0], 0)));
20613 part
[1][0] = replace_equiv_address (part
[1][0], base
);
20614 for (i
= 1; i
< nparts
; i
++)
20616 tmp
= plus_constant (base
, UNITS_PER_WORD
* i
);
20617 part
[1][i
] = replace_equiv_address (part
[1][i
], tmp
);
20628 if (TARGET_128BIT_LONG_DOUBLE
&& mode
== XFmode
)
20629 emit_insn (gen_addsi3 (stack_pointer_rtx
,
20630 stack_pointer_rtx
, GEN_INT (-4)));
20631 emit_move_insn (part
[0][2], part
[1][2]);
20633 else if (nparts
== 4)
20635 emit_move_insn (part
[0][3], part
[1][3]);
20636 emit_move_insn (part
[0][2], part
[1][2]);
20641 /* In 64bit mode we don't have 32bit push available. In case this is
20642 register, it is OK - we will just use larger counterpart. We also
20643 retype memory - these comes from attempt to avoid REX prefix on
20644 moving of second half of TFmode value. */
20645 if (GET_MODE (part
[1][1]) == SImode
)
20647 switch (GET_CODE (part
[1][1]))
20650 part
[1][1] = adjust_address (part
[1][1], DImode
, 0);
20654 part
[1][1] = gen_rtx_REG (DImode
, REGNO (part
[1][1]));
20658 gcc_unreachable ();
20661 if (GET_MODE (part
[1][0]) == SImode
)
20662 part
[1][0] = part
[1][1];
20665 emit_move_insn (part
[0][1], part
[1][1]);
20666 emit_move_insn (part
[0][0], part
[1][0]);
20670 /* Choose correct order to not overwrite the source before it is copied. */
20671 if ((REG_P (part
[0][0])
20672 && REG_P (part
[1][1])
20673 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
20675 && REGNO (part
[0][0]) == REGNO (part
[1][2]))
20677 && REGNO (part
[0][0]) == REGNO (part
[1][3]))))
20679 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
20681 for (i
= 0, j
= nparts
- 1; i
< nparts
; i
++, j
--)
20683 operands
[2 + i
] = part
[0][j
];
20684 operands
[6 + i
] = part
[1][j
];
20689 for (i
= 0; i
< nparts
; i
++)
20691 operands
[2 + i
] = part
[0][i
];
20692 operands
[6 + i
] = part
[1][i
];
20696 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
20697 if (optimize_insn_for_size_p ())
20699 for (j
= 0; j
< nparts
- 1; j
++)
20700 if (CONST_INT_P (operands
[6 + j
])
20701 && operands
[6 + j
] != const0_rtx
20702 && REG_P (operands
[2 + j
]))
20703 for (i
= j
; i
< nparts
- 1; i
++)
20704 if (CONST_INT_P (operands
[7 + i
])
20705 && INTVAL (operands
[7 + i
]) == INTVAL (operands
[6 + j
]))
20706 operands
[7 + i
] = operands
[2 + j
];
20709 for (i
= 0; i
< nparts
; i
++)
20710 emit_move_insn (operands
[2 + i
], operands
[6 + i
]);
20715 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
20716 left shift by a constant, either using a single shift or
20717 a sequence of add instructions. */
20720 ix86_expand_ashl_const (rtx operand
, int count
, enum machine_mode mode
)
20722 rtx (*insn
)(rtx
, rtx
, rtx
);
20725 || (count
* ix86_cost
->add
<= ix86_cost
->shift_const
20726 && !optimize_insn_for_size_p ()))
20728 insn
= mode
== DImode
? gen_addsi3
: gen_adddi3
;
20729 while (count
-- > 0)
20730 emit_insn (insn (operand
, operand
, operand
));
20734 insn
= mode
== DImode
? gen_ashlsi3
: gen_ashldi3
;
20735 emit_insn (insn (operand
, operand
, GEN_INT (count
)));
20740 ix86_split_ashl (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
20742 rtx (*gen_ashl3
)(rtx
, rtx
, rtx
);
20743 rtx (*gen_shld
)(rtx
, rtx
, rtx
);
20744 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
20746 rtx low
[2], high
[2];
20749 if (CONST_INT_P (operands
[2]))
20751 split_double_mode (mode
, operands
, 2, low
, high
);
20752 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
20754 if (count
>= half_width
)
20756 emit_move_insn (high
[0], low
[1]);
20757 emit_move_insn (low
[0], const0_rtx
);
20759 if (count
> half_width
)
20760 ix86_expand_ashl_const (high
[0], count
- half_width
, mode
);
20764 gen_shld
= mode
== DImode
? gen_x86_shld
: gen_x86_64_shld
;
20766 if (!rtx_equal_p (operands
[0], operands
[1]))
20767 emit_move_insn (operands
[0], operands
[1]);
20769 emit_insn (gen_shld (high
[0], low
[0], GEN_INT (count
)));
20770 ix86_expand_ashl_const (low
[0], count
, mode
);
20775 split_double_mode (mode
, operands
, 1, low
, high
);
20777 gen_ashl3
= mode
== DImode
? gen_ashlsi3
: gen_ashldi3
;
20779 if (operands
[1] == const1_rtx
)
20781 /* Assuming we've chosen a QImode capable registers, then 1 << N
20782 can be done with two 32/64-bit shifts, no branches, no cmoves. */
20783 if (ANY_QI_REG_P (low
[0]) && ANY_QI_REG_P (high
[0]))
20785 rtx s
, d
, flags
= gen_rtx_REG (CCZmode
, FLAGS_REG
);
20787 ix86_expand_clear (low
[0]);
20788 ix86_expand_clear (high
[0]);
20789 emit_insn (gen_testqi_ccz_1 (operands
[2], GEN_INT (half_width
)));
20791 d
= gen_lowpart (QImode
, low
[0]);
20792 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
20793 s
= gen_rtx_EQ (QImode
, flags
, const0_rtx
);
20794 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
20796 d
= gen_lowpart (QImode
, high
[0]);
20797 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
20798 s
= gen_rtx_NE (QImode
, flags
, const0_rtx
);
20799 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
20802 /* Otherwise, we can get the same results by manually performing
20803 a bit extract operation on bit 5/6, and then performing the two
20804 shifts. The two methods of getting 0/1 into low/high are exactly
20805 the same size. Avoiding the shift in the bit extract case helps
20806 pentium4 a bit; no one else seems to care much either way. */
20809 enum machine_mode half_mode
;
20810 rtx (*gen_lshr3
)(rtx
, rtx
, rtx
);
20811 rtx (*gen_and3
)(rtx
, rtx
, rtx
);
20812 rtx (*gen_xor3
)(rtx
, rtx
, rtx
);
20813 HOST_WIDE_INT bits
;
20816 if (mode
== DImode
)
20818 half_mode
= SImode
;
20819 gen_lshr3
= gen_lshrsi3
;
20820 gen_and3
= gen_andsi3
;
20821 gen_xor3
= gen_xorsi3
;
20826 half_mode
= DImode
;
20827 gen_lshr3
= gen_lshrdi3
;
20828 gen_and3
= gen_anddi3
;
20829 gen_xor3
= gen_xordi3
;
20833 if (TARGET_PARTIAL_REG_STALL
&& !optimize_insn_for_size_p ())
20834 x
= gen_rtx_ZERO_EXTEND (half_mode
, operands
[2]);
20836 x
= gen_lowpart (half_mode
, operands
[2]);
20837 emit_insn (gen_rtx_SET (VOIDmode
, high
[0], x
));
20839 emit_insn (gen_lshr3 (high
[0], high
[0], GEN_INT (bits
)));
20840 emit_insn (gen_and3 (high
[0], high
[0], const1_rtx
));
20841 emit_move_insn (low
[0], high
[0]);
20842 emit_insn (gen_xor3 (low
[0], low
[0], const1_rtx
));
20845 emit_insn (gen_ashl3 (low
[0], low
[0], operands
[2]));
20846 emit_insn (gen_ashl3 (high
[0], high
[0], operands
[2]));
20850 if (operands
[1] == constm1_rtx
)
20852 /* For -1 << N, we can avoid the shld instruction, because we
20853 know that we're shifting 0...31/63 ones into a -1. */
20854 emit_move_insn (low
[0], constm1_rtx
);
20855 if (optimize_insn_for_size_p ())
20856 emit_move_insn (high
[0], low
[0]);
20858 emit_move_insn (high
[0], constm1_rtx
);
20862 gen_shld
= mode
== DImode
? gen_x86_shld
: gen_x86_64_shld
;
20864 if (!rtx_equal_p (operands
[0], operands
[1]))
20865 emit_move_insn (operands
[0], operands
[1]);
20867 split_double_mode (mode
, operands
, 1, low
, high
);
20868 emit_insn (gen_shld (high
[0], low
[0], operands
[2]));
20871 emit_insn (gen_ashl3 (low
[0], low
[0], operands
[2]));
20873 if (TARGET_CMOVE
&& scratch
)
20875 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
20876 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
20878 ix86_expand_clear (scratch
);
20879 emit_insn (gen_x86_shift_adj_1 (high
[0], low
[0], operands
[2], scratch
));
20883 rtx (*gen_x86_shift_adj_2
)(rtx
, rtx
, rtx
)
20884 = mode
== DImode
? gen_x86_shiftsi_adj_2
: gen_x86_shiftdi_adj_2
;
20886 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
20891 ix86_split_ashr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
20893 rtx (*gen_ashr3
)(rtx
, rtx
, rtx
)
20894 = mode
== DImode
? gen_ashrsi3
: gen_ashrdi3
;
20895 rtx (*gen_shrd
)(rtx
, rtx
, rtx
);
20896 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
20898 rtx low
[2], high
[2];
20901 if (CONST_INT_P (operands
[2]))
20903 split_double_mode (mode
, operands
, 2, low
, high
);
20904 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
20906 if (count
== GET_MODE_BITSIZE (mode
) - 1)
20908 emit_move_insn (high
[0], high
[1]);
20909 emit_insn (gen_ashr3 (high
[0], high
[0],
20910 GEN_INT (half_width
- 1)));
20911 emit_move_insn (low
[0], high
[0]);
20914 else if (count
>= half_width
)
20916 emit_move_insn (low
[0], high
[1]);
20917 emit_move_insn (high
[0], low
[0]);
20918 emit_insn (gen_ashr3 (high
[0], high
[0],
20919 GEN_INT (half_width
- 1)));
20921 if (count
> half_width
)
20922 emit_insn (gen_ashr3 (low
[0], low
[0],
20923 GEN_INT (count
- half_width
)));
20927 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
20929 if (!rtx_equal_p (operands
[0], operands
[1]))
20930 emit_move_insn (operands
[0], operands
[1]);
20932 emit_insn (gen_shrd (low
[0], high
[0], GEN_INT (count
)));
20933 emit_insn (gen_ashr3 (high
[0], high
[0], GEN_INT (count
)));
20938 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
20940 if (!rtx_equal_p (operands
[0], operands
[1]))
20941 emit_move_insn (operands
[0], operands
[1]);
20943 split_double_mode (mode
, operands
, 1, low
, high
);
20945 emit_insn (gen_shrd (low
[0], high
[0], operands
[2]));
20946 emit_insn (gen_ashr3 (high
[0], high
[0], operands
[2]));
20948 if (TARGET_CMOVE
&& scratch
)
20950 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
20951 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
20953 emit_move_insn (scratch
, high
[0]);
20954 emit_insn (gen_ashr3 (scratch
, scratch
,
20955 GEN_INT (half_width
- 1)));
20956 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
20961 rtx (*gen_x86_shift_adj_3
)(rtx
, rtx
, rtx
)
20962 = mode
== DImode
? gen_x86_shiftsi_adj_3
: gen_x86_shiftdi_adj_3
;
20964 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
20970 ix86_split_lshr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
20972 rtx (*gen_lshr3
)(rtx
, rtx
, rtx
)
20973 = mode
== DImode
? gen_lshrsi3
: gen_lshrdi3
;
20974 rtx (*gen_shrd
)(rtx
, rtx
, rtx
);
20975 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
20977 rtx low
[2], high
[2];
20980 if (CONST_INT_P (operands
[2]))
20982 split_double_mode (mode
, operands
, 2, low
, high
);
20983 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
20985 if (count
>= half_width
)
20987 emit_move_insn (low
[0], high
[1]);
20988 ix86_expand_clear (high
[0]);
20990 if (count
> half_width
)
20991 emit_insn (gen_lshr3 (low
[0], low
[0],
20992 GEN_INT (count
- half_width
)));
20996 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
20998 if (!rtx_equal_p (operands
[0], operands
[1]))
20999 emit_move_insn (operands
[0], operands
[1]);
21001 emit_insn (gen_shrd (low
[0], high
[0], GEN_INT (count
)));
21002 emit_insn (gen_lshr3 (high
[0], high
[0], GEN_INT (count
)));
21007 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
21009 if (!rtx_equal_p (operands
[0], operands
[1]))
21010 emit_move_insn (operands
[0], operands
[1]);
21012 split_double_mode (mode
, operands
, 1, low
, high
);
21014 emit_insn (gen_shrd (low
[0], high
[0], operands
[2]));
21015 emit_insn (gen_lshr3 (high
[0], high
[0], operands
[2]));
21017 if (TARGET_CMOVE
&& scratch
)
21019 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
21020 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
21022 ix86_expand_clear (scratch
);
21023 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
21028 rtx (*gen_x86_shift_adj_2
)(rtx
, rtx
, rtx
)
21029 = mode
== DImode
? gen_x86_shiftsi_adj_2
: gen_x86_shiftdi_adj_2
;
21031 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
21036 /* Predict just emitted jump instruction to be taken with probability PROB. */
21038 predict_jump (int prob
)
21040 rtx insn
= get_last_insn ();
21041 gcc_assert (JUMP_P (insn
));
21042 add_reg_note (insn
, REG_BR_PROB
, GEN_INT (prob
));
21045 /* Helper function for the string operations below. Dest VARIABLE whether
21046 it is aligned to VALUE bytes. If true, jump to the label. */
21048 ix86_expand_aligntest (rtx variable
, int value
, bool epilogue
)
21050 rtx label
= gen_label_rtx ();
21051 rtx tmpcount
= gen_reg_rtx (GET_MODE (variable
));
21052 if (GET_MODE (variable
) == DImode
)
21053 emit_insn (gen_anddi3 (tmpcount
, variable
, GEN_INT (value
)));
21055 emit_insn (gen_andsi3 (tmpcount
, variable
, GEN_INT (value
)));
21056 emit_cmp_and_jump_insns (tmpcount
, const0_rtx
, EQ
, 0, GET_MODE (variable
),
21059 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
21061 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
21065 /* Adjust COUNTER by the VALUE. */
21067 ix86_adjust_counter (rtx countreg
, HOST_WIDE_INT value
)
21069 rtx (*gen_add
)(rtx
, rtx
, rtx
)
21070 = GET_MODE (countreg
) == DImode
? gen_adddi3
: gen_addsi3
;
21072 emit_insn (gen_add (countreg
, countreg
, GEN_INT (-value
)));
21075 /* Zero extend possibly SImode EXP to Pmode register. */
21077 ix86_zero_extend_to_Pmode (rtx exp
)
21079 if (GET_MODE (exp
) != Pmode
)
21080 exp
= convert_to_mode (Pmode
, exp
, 1);
21081 return force_reg (Pmode
, exp
);
21084 /* Divide COUNTREG by SCALE. */
21086 scale_counter (rtx countreg
, int scale
)
21092 if (CONST_INT_P (countreg
))
21093 return GEN_INT (INTVAL (countreg
) / scale
);
21094 gcc_assert (REG_P (countreg
));
21096 sc
= expand_simple_binop (GET_MODE (countreg
), LSHIFTRT
, countreg
,
21097 GEN_INT (exact_log2 (scale
)),
21098 NULL
, 1, OPTAB_DIRECT
);
21102 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
21103 DImode for constant loop counts. */
21105 static enum machine_mode
21106 counter_mode (rtx count_exp
)
21108 if (GET_MODE (count_exp
) != VOIDmode
)
21109 return GET_MODE (count_exp
);
21110 if (!CONST_INT_P (count_exp
))
21112 if (TARGET_64BIT
&& (INTVAL (count_exp
) & ~0xffffffff))
21117 /* When SRCPTR is non-NULL, output simple loop to move memory
21118 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
21119 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
21120 equivalent loop to set memory by VALUE (supposed to be in MODE).
21122 The size is rounded down to whole number of chunk size moved at once.
21123 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
21127 expand_set_or_movmem_via_loop (rtx destmem
, rtx srcmem
,
21128 rtx destptr
, rtx srcptr
, rtx value
,
21129 rtx count
, enum machine_mode mode
, int unroll
,
21132 rtx out_label
, top_label
, iter
, tmp
;
21133 enum machine_mode iter_mode
= counter_mode (count
);
21134 rtx piece_size
= GEN_INT (GET_MODE_SIZE (mode
) * unroll
);
21135 rtx piece_size_mask
= GEN_INT (~((GET_MODE_SIZE (mode
) * unroll
) - 1));
21141 top_label
= gen_label_rtx ();
21142 out_label
= gen_label_rtx ();
21143 iter
= gen_reg_rtx (iter_mode
);
21145 size
= expand_simple_binop (iter_mode
, AND
, count
, piece_size_mask
,
21146 NULL
, 1, OPTAB_DIRECT
);
21147 /* Those two should combine. */
21148 if (piece_size
== const1_rtx
)
21150 emit_cmp_and_jump_insns (size
, const0_rtx
, EQ
, NULL_RTX
, iter_mode
,
21152 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
21154 emit_move_insn (iter
, const0_rtx
);
21156 emit_label (top_label
);
21158 tmp
= convert_modes (Pmode
, iter_mode
, iter
, true);
21159 x_addr
= gen_rtx_PLUS (Pmode
, destptr
, tmp
);
21160 destmem
= change_address (destmem
, mode
, x_addr
);
21164 y_addr
= gen_rtx_PLUS (Pmode
, srcptr
, copy_rtx (tmp
));
21165 srcmem
= change_address (srcmem
, mode
, y_addr
);
21167 /* When unrolling for chips that reorder memory reads and writes,
21168 we can save registers by using single temporary.
21169 Also using 4 temporaries is overkill in 32bit mode. */
21170 if (!TARGET_64BIT
&& 0)
21172 for (i
= 0; i
< unroll
; i
++)
21177 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
21179 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
21181 emit_move_insn (destmem
, srcmem
);
21187 gcc_assert (unroll
<= 4);
21188 for (i
= 0; i
< unroll
; i
++)
21190 tmpreg
[i
] = gen_reg_rtx (mode
);
21194 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
21196 emit_move_insn (tmpreg
[i
], srcmem
);
21198 for (i
= 0; i
< unroll
; i
++)
21203 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
21205 emit_move_insn (destmem
, tmpreg
[i
]);
21210 for (i
= 0; i
< unroll
; i
++)
21214 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
21215 emit_move_insn (destmem
, value
);
21218 tmp
= expand_simple_binop (iter_mode
, PLUS
, iter
, piece_size
, iter
,
21219 true, OPTAB_LIB_WIDEN
);
21221 emit_move_insn (iter
, tmp
);
21223 emit_cmp_and_jump_insns (iter
, size
, LT
, NULL_RTX
, iter_mode
,
21225 if (expected_size
!= -1)
21227 expected_size
/= GET_MODE_SIZE (mode
) * unroll
;
21228 if (expected_size
== 0)
21230 else if (expected_size
> REG_BR_PROB_BASE
)
21231 predict_jump (REG_BR_PROB_BASE
- 1);
21233 predict_jump (REG_BR_PROB_BASE
- (REG_BR_PROB_BASE
+ expected_size
/ 2) / expected_size
);
21236 predict_jump (REG_BR_PROB_BASE
* 80 / 100);
21237 iter
= ix86_zero_extend_to_Pmode (iter
);
21238 tmp
= expand_simple_binop (Pmode
, PLUS
, destptr
, iter
, destptr
,
21239 true, OPTAB_LIB_WIDEN
);
21240 if (tmp
!= destptr
)
21241 emit_move_insn (destptr
, tmp
);
21244 tmp
= expand_simple_binop (Pmode
, PLUS
, srcptr
, iter
, srcptr
,
21245 true, OPTAB_LIB_WIDEN
);
21247 emit_move_insn (srcptr
, tmp
);
21249 emit_label (out_label
);
21252 /* Output "rep; mov" instruction.
21253 Arguments have same meaning as for previous function */
21255 expand_movmem_via_rep_mov (rtx destmem
, rtx srcmem
,
21256 rtx destptr
, rtx srcptr
,
21258 enum machine_mode mode
)
21263 HOST_WIDE_INT rounded_count
;
21265 /* If the size is known, it is shorter to use rep movs. */
21266 if (mode
== QImode
&& CONST_INT_P (count
)
21267 && !(INTVAL (count
) & 3))
21270 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
21271 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
21272 if (srcptr
!= XEXP (srcmem
, 0) || GET_MODE (srcmem
) != BLKmode
)
21273 srcmem
= adjust_automodify_address_nv (srcmem
, BLKmode
, srcptr
, 0);
21274 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
21275 if (mode
!= QImode
)
21277 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
21278 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
21279 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
21280 srcexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
21281 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
21282 srcexp
= gen_rtx_PLUS (Pmode
, srcexp
, srcptr
);
21286 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
21287 srcexp
= gen_rtx_PLUS (Pmode
, srcptr
, countreg
);
21289 if (CONST_INT_P (count
))
21291 rounded_count
= (INTVAL (count
)
21292 & ~((HOST_WIDE_INT
) GET_MODE_SIZE (mode
) - 1));
21293 destmem
= shallow_copy_rtx (destmem
);
21294 srcmem
= shallow_copy_rtx (srcmem
);
21295 set_mem_size (destmem
, rounded_count
);
21296 set_mem_size (srcmem
, rounded_count
);
21300 if (MEM_SIZE_KNOWN_P (destmem
))
21301 clear_mem_size (destmem
);
21302 if (MEM_SIZE_KNOWN_P (srcmem
))
21303 clear_mem_size (srcmem
);
21305 emit_insn (gen_rep_mov (destptr
, destmem
, srcptr
, srcmem
, countreg
,
21309 /* Output "rep; stos" instruction.
21310 Arguments have same meaning as for previous function */
21312 expand_setmem_via_rep_stos (rtx destmem
, rtx destptr
, rtx value
,
21313 rtx count
, enum machine_mode mode
,
21318 HOST_WIDE_INT rounded_count
;
21320 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
21321 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
21322 value
= force_reg (mode
, gen_lowpart (mode
, value
));
21323 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
21324 if (mode
!= QImode
)
21326 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
21327 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
21328 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
21331 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
21332 if (orig_value
== const0_rtx
&& CONST_INT_P (count
))
21334 rounded_count
= (INTVAL (count
)
21335 & ~((HOST_WIDE_INT
) GET_MODE_SIZE (mode
) - 1));
21336 destmem
= shallow_copy_rtx (destmem
);
21337 set_mem_size (destmem
, rounded_count
);
21339 else if (MEM_SIZE_KNOWN_P (destmem
))
21340 clear_mem_size (destmem
);
21341 emit_insn (gen_rep_stos (destptr
, countreg
, destmem
, value
, destexp
));
21345 emit_strmov (rtx destmem
, rtx srcmem
,
21346 rtx destptr
, rtx srcptr
, enum machine_mode mode
, int offset
)
21348 rtx src
= adjust_automodify_address_nv (srcmem
, mode
, srcptr
, offset
);
21349 rtx dest
= adjust_automodify_address_nv (destmem
, mode
, destptr
, offset
);
21350 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
21353 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
21355 expand_movmem_epilogue (rtx destmem
, rtx srcmem
,
21356 rtx destptr
, rtx srcptr
, rtx count
, int max_size
)
21359 if (CONST_INT_P (count
))
21361 HOST_WIDE_INT countval
= INTVAL (count
);
21364 if ((countval
& 0x10) && max_size
> 16)
21368 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
21369 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
+ 8);
21372 gcc_unreachable ();
21375 if ((countval
& 0x08) && max_size
> 8)
21378 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
21381 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
);
21382 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
+ 4);
21386 if ((countval
& 0x04) && max_size
> 4)
21388 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
);
21391 if ((countval
& 0x02) && max_size
> 2)
21393 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, HImode
, offset
);
21396 if ((countval
& 0x01) && max_size
> 1)
21398 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, QImode
, offset
);
21405 count
= expand_simple_binop (GET_MODE (count
), AND
, count
, GEN_INT (max_size
- 1),
21406 count
, 1, OPTAB_DIRECT
);
21407 expand_set_or_movmem_via_loop (destmem
, srcmem
, destptr
, srcptr
, NULL
,
21408 count
, QImode
, 1, 4);
21412 /* When there are stringops, we can cheaply increase dest and src pointers.
21413 Otherwise we save code size by maintaining offset (zero is readily
21414 available from preceding rep operation) and using x86 addressing modes.
21416 if (TARGET_SINGLE_STRINGOP
)
21420 rtx label
= ix86_expand_aligntest (count
, 4, true);
21421 src
= change_address (srcmem
, SImode
, srcptr
);
21422 dest
= change_address (destmem
, SImode
, destptr
);
21423 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
21424 emit_label (label
);
21425 LABEL_NUSES (label
) = 1;
21429 rtx label
= ix86_expand_aligntest (count
, 2, true);
21430 src
= change_address (srcmem
, HImode
, srcptr
);
21431 dest
= change_address (destmem
, HImode
, destptr
);
21432 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
21433 emit_label (label
);
21434 LABEL_NUSES (label
) = 1;
21438 rtx label
= ix86_expand_aligntest (count
, 1, true);
21439 src
= change_address (srcmem
, QImode
, srcptr
);
21440 dest
= change_address (destmem
, QImode
, destptr
);
21441 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
21442 emit_label (label
);
21443 LABEL_NUSES (label
) = 1;
21448 rtx offset
= force_reg (Pmode
, const0_rtx
);
21453 rtx label
= ix86_expand_aligntest (count
, 4, true);
21454 src
= change_address (srcmem
, SImode
, srcptr
);
21455 dest
= change_address (destmem
, SImode
, destptr
);
21456 emit_move_insn (dest
, src
);
21457 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (4), NULL
,
21458 true, OPTAB_LIB_WIDEN
);
21460 emit_move_insn (offset
, tmp
);
21461 emit_label (label
);
21462 LABEL_NUSES (label
) = 1;
21466 rtx label
= ix86_expand_aligntest (count
, 2, true);
21467 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
21468 src
= change_address (srcmem
, HImode
, tmp
);
21469 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
21470 dest
= change_address (destmem
, HImode
, tmp
);
21471 emit_move_insn (dest
, src
);
21472 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (2), tmp
,
21473 true, OPTAB_LIB_WIDEN
);
21475 emit_move_insn (offset
, tmp
);
21476 emit_label (label
);
21477 LABEL_NUSES (label
) = 1;
21481 rtx label
= ix86_expand_aligntest (count
, 1, true);
21482 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
21483 src
= change_address (srcmem
, QImode
, tmp
);
21484 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
21485 dest
= change_address (destmem
, QImode
, tmp
);
21486 emit_move_insn (dest
, src
);
21487 emit_label (label
);
21488 LABEL_NUSES (label
) = 1;
21493 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
21495 expand_setmem_epilogue_via_loop (rtx destmem
, rtx destptr
, rtx value
,
21496 rtx count
, int max_size
)
21499 expand_simple_binop (counter_mode (count
), AND
, count
,
21500 GEN_INT (max_size
- 1), count
, 1, OPTAB_DIRECT
);
21501 expand_set_or_movmem_via_loop (destmem
, NULL
, destptr
, NULL
,
21502 gen_lowpart (QImode
, value
), count
, QImode
,
21506 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
21508 expand_setmem_epilogue (rtx destmem
, rtx destptr
, rtx value
, rtx count
, int max_size
)
21512 if (CONST_INT_P (count
))
21514 HOST_WIDE_INT countval
= INTVAL (count
);
21517 if ((countval
& 0x10) && max_size
> 16)
21521 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
21522 emit_insn (gen_strset (destptr
, dest
, value
));
21523 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
+ 8);
21524 emit_insn (gen_strset (destptr
, dest
, value
));
21527 gcc_unreachable ();
21530 if ((countval
& 0x08) && max_size
> 8)
21534 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
21535 emit_insn (gen_strset (destptr
, dest
, value
));
21539 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
21540 emit_insn (gen_strset (destptr
, dest
, value
));
21541 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
+ 4);
21542 emit_insn (gen_strset (destptr
, dest
, value
));
21546 if ((countval
& 0x04) && max_size
> 4)
21548 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
21549 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
21552 if ((countval
& 0x02) && max_size
> 2)
21554 dest
= adjust_automodify_address_nv (destmem
, HImode
, destptr
, offset
);
21555 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
21558 if ((countval
& 0x01) && max_size
> 1)
21560 dest
= adjust_automodify_address_nv (destmem
, QImode
, destptr
, offset
);
21561 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
21568 expand_setmem_epilogue_via_loop (destmem
, destptr
, value
, count
, max_size
);
21573 rtx label
= ix86_expand_aligntest (count
, 16, true);
21576 dest
= change_address (destmem
, DImode
, destptr
);
21577 emit_insn (gen_strset (destptr
, dest
, value
));
21578 emit_insn (gen_strset (destptr
, dest
, value
));
21582 dest
= change_address (destmem
, SImode
, destptr
);
21583 emit_insn (gen_strset (destptr
, dest
, value
));
21584 emit_insn (gen_strset (destptr
, dest
, value
));
21585 emit_insn (gen_strset (destptr
, dest
, value
));
21586 emit_insn (gen_strset (destptr
, dest
, value
));
21588 emit_label (label
);
21589 LABEL_NUSES (label
) = 1;
21593 rtx label
= ix86_expand_aligntest (count
, 8, true);
21596 dest
= change_address (destmem
, DImode
, destptr
);
21597 emit_insn (gen_strset (destptr
, dest
, value
));
21601 dest
= change_address (destmem
, SImode
, destptr
);
21602 emit_insn (gen_strset (destptr
, dest
, value
));
21603 emit_insn (gen_strset (destptr
, dest
, value
));
21605 emit_label (label
);
21606 LABEL_NUSES (label
) = 1;
21610 rtx label
= ix86_expand_aligntest (count
, 4, true);
21611 dest
= change_address (destmem
, SImode
, destptr
);
21612 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
21613 emit_label (label
);
21614 LABEL_NUSES (label
) = 1;
21618 rtx label
= ix86_expand_aligntest (count
, 2, true);
21619 dest
= change_address (destmem
, HImode
, destptr
);
21620 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
21621 emit_label (label
);
21622 LABEL_NUSES (label
) = 1;
21626 rtx label
= ix86_expand_aligntest (count
, 1, true);
21627 dest
= change_address (destmem
, QImode
, destptr
);
21628 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
21629 emit_label (label
);
21630 LABEL_NUSES (label
) = 1;
21634 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
21635 DESIRED_ALIGNMENT. */
21637 expand_movmem_prologue (rtx destmem
, rtx srcmem
,
21638 rtx destptr
, rtx srcptr
, rtx count
,
21639 int align
, int desired_alignment
)
21641 if (align
<= 1 && desired_alignment
> 1)
21643 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
21644 srcmem
= change_address (srcmem
, QImode
, srcptr
);
21645 destmem
= change_address (destmem
, QImode
, destptr
);
21646 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
21647 ix86_adjust_counter (count
, 1);
21648 emit_label (label
);
21649 LABEL_NUSES (label
) = 1;
21651 if (align
<= 2 && desired_alignment
> 2)
21653 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
21654 srcmem
= change_address (srcmem
, HImode
, srcptr
);
21655 destmem
= change_address (destmem
, HImode
, destptr
);
21656 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
21657 ix86_adjust_counter (count
, 2);
21658 emit_label (label
);
21659 LABEL_NUSES (label
) = 1;
21661 if (align
<= 4 && desired_alignment
> 4)
21663 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
21664 srcmem
= change_address (srcmem
, SImode
, srcptr
);
21665 destmem
= change_address (destmem
, SImode
, destptr
);
21666 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
21667 ix86_adjust_counter (count
, 4);
21668 emit_label (label
);
21669 LABEL_NUSES (label
) = 1;
21671 gcc_assert (desired_alignment
<= 8);
21674 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
21675 ALIGN_BYTES is how many bytes need to be copied. */
21677 expand_constant_movmem_prologue (rtx dst
, rtx
*srcp
, rtx destreg
, rtx srcreg
,
21678 int desired_align
, int align_bytes
)
21681 rtx orig_dst
= dst
;
21682 rtx orig_src
= src
;
21684 int src_align_bytes
= get_mem_align_offset (src
, desired_align
* BITS_PER_UNIT
);
21685 if (src_align_bytes
>= 0)
21686 src_align_bytes
= desired_align
- src_align_bytes
;
21687 if (align_bytes
& 1)
21689 dst
= adjust_automodify_address_nv (dst
, QImode
, destreg
, 0);
21690 src
= adjust_automodify_address_nv (src
, QImode
, srcreg
, 0);
21692 emit_insn (gen_strmov (destreg
, dst
, srcreg
, src
));
21694 if (align_bytes
& 2)
21696 dst
= adjust_automodify_address_nv (dst
, HImode
, destreg
, off
);
21697 src
= adjust_automodify_address_nv (src
, HImode
, srcreg
, off
);
21698 if (MEM_ALIGN (dst
) < 2 * BITS_PER_UNIT
)
21699 set_mem_align (dst
, 2 * BITS_PER_UNIT
);
21700 if (src_align_bytes
>= 0
21701 && (src_align_bytes
& 1) == (align_bytes
& 1)
21702 && MEM_ALIGN (src
) < 2 * BITS_PER_UNIT
)
21703 set_mem_align (src
, 2 * BITS_PER_UNIT
);
21705 emit_insn (gen_strmov (destreg
, dst
, srcreg
, src
));
21707 if (align_bytes
& 4)
21709 dst
= adjust_automodify_address_nv (dst
, SImode
, destreg
, off
);
21710 src
= adjust_automodify_address_nv (src
, SImode
, srcreg
, off
);
21711 if (MEM_ALIGN (dst
) < 4 * BITS_PER_UNIT
)
21712 set_mem_align (dst
, 4 * BITS_PER_UNIT
);
21713 if (src_align_bytes
>= 0)
21715 unsigned int src_align
= 0;
21716 if ((src_align_bytes
& 3) == (align_bytes
& 3))
21718 else if ((src_align_bytes
& 1) == (align_bytes
& 1))
21720 if (MEM_ALIGN (src
) < src_align
* BITS_PER_UNIT
)
21721 set_mem_align (src
, src_align
* BITS_PER_UNIT
);
21724 emit_insn (gen_strmov (destreg
, dst
, srcreg
, src
));
21726 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
, off
);
21727 src
= adjust_automodify_address_nv (src
, BLKmode
, srcreg
, off
);
21728 if (MEM_ALIGN (dst
) < (unsigned int) desired_align
* BITS_PER_UNIT
)
21729 set_mem_align (dst
, desired_align
* BITS_PER_UNIT
);
21730 if (src_align_bytes
>= 0)
21732 unsigned int src_align
= 0;
21733 if ((src_align_bytes
& 7) == (align_bytes
& 7))
21735 else if ((src_align_bytes
& 3) == (align_bytes
& 3))
21737 else if ((src_align_bytes
& 1) == (align_bytes
& 1))
21739 if (src_align
> (unsigned int) desired_align
)
21740 src_align
= desired_align
;
21741 if (MEM_ALIGN (src
) < src_align
* BITS_PER_UNIT
)
21742 set_mem_align (src
, src_align
* BITS_PER_UNIT
);
21744 if (MEM_SIZE_KNOWN_P (orig_dst
))
21745 set_mem_size (dst
, MEM_SIZE (orig_dst
) - align_bytes
);
21746 if (MEM_SIZE_KNOWN_P (orig_src
))
21747 set_mem_size (src
, MEM_SIZE (orig_src
) - align_bytes
);
21752 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
21753 DESIRED_ALIGNMENT. */
21755 expand_setmem_prologue (rtx destmem
, rtx destptr
, rtx value
, rtx count
,
21756 int align
, int desired_alignment
)
21758 if (align
<= 1 && desired_alignment
> 1)
21760 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
21761 destmem
= change_address (destmem
, QImode
, destptr
);
21762 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (QImode
, value
)));
21763 ix86_adjust_counter (count
, 1);
21764 emit_label (label
);
21765 LABEL_NUSES (label
) = 1;
21767 if (align
<= 2 && desired_alignment
> 2)
21769 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
21770 destmem
= change_address (destmem
, HImode
, destptr
);
21771 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (HImode
, value
)));
21772 ix86_adjust_counter (count
, 2);
21773 emit_label (label
);
21774 LABEL_NUSES (label
) = 1;
21776 if (align
<= 4 && desired_alignment
> 4)
21778 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
21779 destmem
= change_address (destmem
, SImode
, destptr
);
21780 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (SImode
, value
)));
21781 ix86_adjust_counter (count
, 4);
21782 emit_label (label
);
21783 LABEL_NUSES (label
) = 1;
21785 gcc_assert (desired_alignment
<= 8);
21788 /* Set enough from DST to align DST known to by aligned by ALIGN to
21789 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
21791 expand_constant_setmem_prologue (rtx dst
, rtx destreg
, rtx value
,
21792 int desired_align
, int align_bytes
)
21795 rtx orig_dst
= dst
;
21796 if (align_bytes
& 1)
21798 dst
= adjust_automodify_address_nv (dst
, QImode
, destreg
, 0);
21800 emit_insn (gen_strset (destreg
, dst
,
21801 gen_lowpart (QImode
, value
)));
21803 if (align_bytes
& 2)
21805 dst
= adjust_automodify_address_nv (dst
, HImode
, destreg
, off
);
21806 if (MEM_ALIGN (dst
) < 2 * BITS_PER_UNIT
)
21807 set_mem_align (dst
, 2 * BITS_PER_UNIT
);
21809 emit_insn (gen_strset (destreg
, dst
,
21810 gen_lowpart (HImode
, value
)));
21812 if (align_bytes
& 4)
21814 dst
= adjust_automodify_address_nv (dst
, SImode
, destreg
, off
);
21815 if (MEM_ALIGN (dst
) < 4 * BITS_PER_UNIT
)
21816 set_mem_align (dst
, 4 * BITS_PER_UNIT
);
21818 emit_insn (gen_strset (destreg
, dst
,
21819 gen_lowpart (SImode
, value
)));
21821 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
, off
);
21822 if (MEM_ALIGN (dst
) < (unsigned int) desired_align
* BITS_PER_UNIT
)
21823 set_mem_align (dst
, desired_align
* BITS_PER_UNIT
);
21824 if (MEM_SIZE_KNOWN_P (orig_dst
))
21825 set_mem_size (dst
, MEM_SIZE (orig_dst
) - align_bytes
);
21829 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
21830 static enum stringop_alg
21831 decide_alg (HOST_WIDE_INT count
, HOST_WIDE_INT expected_size
, bool memset
,
21832 int *dynamic_check
)
21834 const struct stringop_algs
* algs
;
21835 bool optimize_for_speed
;
21836 /* Algorithms using the rep prefix want at least edi and ecx;
21837 additionally, memset wants eax and memcpy wants esi. Don't
21838 consider such algorithms if the user has appropriated those
21839 registers for their own purposes. */
21840 bool rep_prefix_usable
= !(fixed_regs
[CX_REG
] || fixed_regs
[DI_REG
]
21842 ? fixed_regs
[AX_REG
] : fixed_regs
[SI_REG
]));
21844 #define ALG_USABLE_P(alg) (rep_prefix_usable \
21845 || (alg != rep_prefix_1_byte \
21846 && alg != rep_prefix_4_byte \
21847 && alg != rep_prefix_8_byte))
21848 const struct processor_costs
*cost
;
21850 /* Even if the string operation call is cold, we still might spend a lot
21851 of time processing large blocks. */
21852 if (optimize_function_for_size_p (cfun
)
21853 || (optimize_insn_for_size_p ()
21854 && expected_size
!= -1 && expected_size
< 256))
21855 optimize_for_speed
= false;
21857 optimize_for_speed
= true;
21859 cost
= optimize_for_speed
? ix86_cost
: &ix86_size_cost
;
21861 *dynamic_check
= -1;
21863 algs
= &cost
->memset
[TARGET_64BIT
!= 0];
21865 algs
= &cost
->memcpy
[TARGET_64BIT
!= 0];
21866 if (ix86_stringop_alg
!= no_stringop
&& ALG_USABLE_P (ix86_stringop_alg
))
21867 return ix86_stringop_alg
;
21868 /* rep; movq or rep; movl is the smallest variant. */
21869 else if (!optimize_for_speed
)
21871 if (!count
|| (count
& 3))
21872 return rep_prefix_usable
? rep_prefix_1_byte
: loop_1_byte
;
21874 return rep_prefix_usable
? rep_prefix_4_byte
: loop
;
21876 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
21878 else if (expected_size
!= -1 && expected_size
< 4)
21879 return loop_1_byte
;
21880 else if (expected_size
!= -1)
21883 enum stringop_alg alg
= libcall
;
21884 for (i
= 0; i
< MAX_STRINGOP_ALGS
; i
++)
21886 /* We get here if the algorithms that were not libcall-based
21887 were rep-prefix based and we are unable to use rep prefixes
21888 based on global register usage. Break out of the loop and
21889 use the heuristic below. */
21890 if (algs
->size
[i
].max
== 0)
21892 if (algs
->size
[i
].max
>= expected_size
|| algs
->size
[i
].max
== -1)
21894 enum stringop_alg candidate
= algs
->size
[i
].alg
;
21896 if (candidate
!= libcall
&& ALG_USABLE_P (candidate
))
21898 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
21899 last non-libcall inline algorithm. */
21900 if (TARGET_INLINE_ALL_STRINGOPS
)
21902 /* When the current size is best to be copied by a libcall,
21903 but we are still forced to inline, run the heuristic below
21904 that will pick code for medium sized blocks. */
21905 if (alg
!= libcall
)
21909 else if (ALG_USABLE_P (candidate
))
21913 gcc_assert (TARGET_INLINE_ALL_STRINGOPS
|| !rep_prefix_usable
);
21915 /* When asked to inline the call anyway, try to pick meaningful choice.
21916 We look for maximal size of block that is faster to copy by hand and
21917 take blocks of at most of that size guessing that average size will
21918 be roughly half of the block.
21920 If this turns out to be bad, we might simply specify the preferred
21921 choice in ix86_costs. */
21922 if ((TARGET_INLINE_ALL_STRINGOPS
|| TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
21923 && (algs
->unknown_size
== libcall
|| !ALG_USABLE_P (algs
->unknown_size
)))
21926 enum stringop_alg alg
;
21928 bool any_alg_usable_p
= true;
21930 for (i
= 0; i
< MAX_STRINGOP_ALGS
; i
++)
21932 enum stringop_alg candidate
= algs
->size
[i
].alg
;
21933 any_alg_usable_p
= any_alg_usable_p
&& ALG_USABLE_P (candidate
);
21935 if (candidate
!= libcall
&& candidate
21936 && ALG_USABLE_P (candidate
))
21937 max
= algs
->size
[i
].max
;
21939 /* If there aren't any usable algorithms, then recursing on
21940 smaller sizes isn't going to find anything. Just return the
21941 simple byte-at-a-time copy loop. */
21942 if (!any_alg_usable_p
)
21944 /* Pick something reasonable. */
21945 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
21946 *dynamic_check
= 128;
21947 return loop_1_byte
;
21951 alg
= decide_alg (count
, max
/ 2, memset
, dynamic_check
);
21952 gcc_assert (*dynamic_check
== -1);
21953 gcc_assert (alg
!= libcall
);
21954 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
21955 *dynamic_check
= max
;
21958 return ALG_USABLE_P (algs
->unknown_size
) ? algs
->unknown_size
: libcall
;
21959 #undef ALG_USABLE_P
21962 /* Decide on alignment. We know that the operand is already aligned to ALIGN
21963 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
21965 decide_alignment (int align
,
21966 enum stringop_alg alg
,
21969 int desired_align
= 0;
21973 gcc_unreachable ();
21975 case unrolled_loop
:
21976 desired_align
= GET_MODE_SIZE (Pmode
);
21978 case rep_prefix_8_byte
:
21981 case rep_prefix_4_byte
:
21982 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
21983 copying whole cacheline at once. */
21984 if (TARGET_PENTIUMPRO
)
21989 case rep_prefix_1_byte
:
21990 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
21991 copying whole cacheline at once. */
21992 if (TARGET_PENTIUMPRO
)
22006 if (desired_align
< align
)
22007 desired_align
= align
;
22008 if (expected_size
!= -1 && expected_size
< 4)
22009 desired_align
= align
;
22010 return desired_align
;
22013 /* Return the smallest power of 2 greater than VAL. */
22015 smallest_pow2_greater_than (int val
)
22023 /* Expand string move (memcpy) operation. Use i386 string operations
22024 when profitable. expand_setmem contains similar code. The code
22025 depends upon architecture, block size and alignment, but always has
22026 the same overall structure:
22028 1) Prologue guard: Conditional that jumps up to epilogues for small
22029 blocks that can be handled by epilogue alone. This is faster
22030 but also needed for correctness, since prologue assume the block
22031 is larger than the desired alignment.
22033 Optional dynamic check for size and libcall for large
22034 blocks is emitted here too, with -minline-stringops-dynamically.
22036 2) Prologue: copy first few bytes in order to get destination
22037 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
22038 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
22039 copied. We emit either a jump tree on power of two sized
22040 blocks, or a byte loop.
22042 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
22043 with specified algorithm.
22045 4) Epilogue: code copying tail of the block that is too small to be
22046 handled by main body (or up to size guarded by prologue guard). */
22049 ix86_expand_movmem (rtx dst
, rtx src
, rtx count_exp
, rtx align_exp
,
22050 rtx expected_align_exp
, rtx expected_size_exp
)
22056 rtx jump_around_label
= NULL
;
22057 HOST_WIDE_INT align
= 1;
22058 unsigned HOST_WIDE_INT count
= 0;
22059 HOST_WIDE_INT expected_size
= -1;
22060 int size_needed
= 0, epilogue_size_needed
;
22061 int desired_align
= 0, align_bytes
= 0;
22062 enum stringop_alg alg
;
22064 bool need_zero_guard
= false;
22066 if (CONST_INT_P (align_exp
))
22067 align
= INTVAL (align_exp
);
22068 /* i386 can do misaligned access on reasonably increased cost. */
22069 if (CONST_INT_P (expected_align_exp
)
22070 && INTVAL (expected_align_exp
) > align
)
22071 align
= INTVAL (expected_align_exp
);
22072 /* ALIGN is the minimum of destination and source alignment, but we care here
22073 just about destination alignment. */
22074 else if (MEM_ALIGN (dst
) > (unsigned HOST_WIDE_INT
) align
* BITS_PER_UNIT
)
22075 align
= MEM_ALIGN (dst
) / BITS_PER_UNIT
;
22077 if (CONST_INT_P (count_exp
))
22078 count
= expected_size
= INTVAL (count_exp
);
22079 if (CONST_INT_P (expected_size_exp
) && count
== 0)
22080 expected_size
= INTVAL (expected_size_exp
);
22082 /* Make sure we don't need to care about overflow later on. */
22083 if (count
> ((unsigned HOST_WIDE_INT
) 1 << 30))
22086 /* Step 0: Decide on preferred algorithm, desired alignment and
22087 size of chunks to be copied by main loop. */
22089 alg
= decide_alg (count
, expected_size
, false, &dynamic_check
);
22090 desired_align
= decide_alignment (align
, alg
, expected_size
);
22092 if (!TARGET_ALIGN_STRINGOPS
)
22093 align
= desired_align
;
22095 if (alg
== libcall
)
22097 gcc_assert (alg
!= no_stringop
);
22099 count_exp
= copy_to_mode_reg (GET_MODE (count_exp
), count_exp
);
22100 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
22101 srcreg
= copy_to_mode_reg (Pmode
, XEXP (src
, 0));
22106 gcc_unreachable ();
22108 need_zero_guard
= true;
22109 size_needed
= GET_MODE_SIZE (word_mode
);
22111 case unrolled_loop
:
22112 need_zero_guard
= true;
22113 size_needed
= GET_MODE_SIZE (word_mode
) * (TARGET_64BIT
? 4 : 2);
22115 case rep_prefix_8_byte
:
22118 case rep_prefix_4_byte
:
22121 case rep_prefix_1_byte
:
22125 need_zero_guard
= true;
22130 epilogue_size_needed
= size_needed
;
22132 /* Step 1: Prologue guard. */
22134 /* Alignment code needs count to be in register. */
22135 if (CONST_INT_P (count_exp
) && desired_align
> align
)
22137 if (INTVAL (count_exp
) > desired_align
22138 && INTVAL (count_exp
) > size_needed
)
22141 = get_mem_align_offset (dst
, desired_align
* BITS_PER_UNIT
);
22142 if (align_bytes
<= 0)
22145 align_bytes
= desired_align
- align_bytes
;
22147 if (align_bytes
== 0)
22148 count_exp
= force_reg (counter_mode (count_exp
), count_exp
);
22150 gcc_assert (desired_align
>= 1 && align
>= 1);
22152 /* Ensure that alignment prologue won't copy past end of block. */
22153 if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
22155 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
22156 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
22157 Make sure it is power of 2. */
22158 epilogue_size_needed
= smallest_pow2_greater_than (epilogue_size_needed
);
22162 if (count
< (unsigned HOST_WIDE_INT
)epilogue_size_needed
)
22164 /* If main algorithm works on QImode, no epilogue is needed.
22165 For small sizes just don't align anything. */
22166 if (size_needed
== 1)
22167 desired_align
= align
;
22174 label
= gen_label_rtx ();
22175 emit_cmp_and_jump_insns (count_exp
,
22176 GEN_INT (epilogue_size_needed
),
22177 LTU
, 0, counter_mode (count_exp
), 1, label
);
22178 if (expected_size
== -1 || expected_size
< epilogue_size_needed
)
22179 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
22181 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
22185 /* Emit code to decide on runtime whether library call or inline should be
22187 if (dynamic_check
!= -1)
22189 if (CONST_INT_P (count_exp
))
22191 if (UINTVAL (count_exp
) >= (unsigned HOST_WIDE_INT
)dynamic_check
)
22193 emit_block_move_via_libcall (dst
, src
, count_exp
, false);
22194 count_exp
= const0_rtx
;
22200 rtx hot_label
= gen_label_rtx ();
22201 jump_around_label
= gen_label_rtx ();
22202 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
22203 LEU
, 0, GET_MODE (count_exp
), 1, hot_label
);
22204 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
22205 emit_block_move_via_libcall (dst
, src
, count_exp
, false);
22206 emit_jump (jump_around_label
);
22207 emit_label (hot_label
);
22211 /* Step 2: Alignment prologue. */
22213 if (desired_align
> align
)
22215 if (align_bytes
== 0)
22217 /* Except for the first move in epilogue, we no longer know
22218 constant offset in aliasing info. It don't seems to worth
22219 the pain to maintain it for the first move, so throw away
22221 src
= change_address (src
, BLKmode
, srcreg
);
22222 dst
= change_address (dst
, BLKmode
, destreg
);
22223 expand_movmem_prologue (dst
, src
, destreg
, srcreg
, count_exp
, align
,
22228 /* If we know how many bytes need to be stored before dst is
22229 sufficiently aligned, maintain aliasing info accurately. */
22230 dst
= expand_constant_movmem_prologue (dst
, &src
, destreg
, srcreg
,
22231 desired_align
, align_bytes
);
22232 count_exp
= plus_constant (count_exp
, -align_bytes
);
22233 count
-= align_bytes
;
22235 if (need_zero_guard
22236 && (count
< (unsigned HOST_WIDE_INT
) size_needed
22237 || (align_bytes
== 0
22238 && count
< ((unsigned HOST_WIDE_INT
) size_needed
22239 + desired_align
- align
))))
22241 /* It is possible that we copied enough so the main loop will not
22243 gcc_assert (size_needed
> 1);
22244 if (label
== NULL_RTX
)
22245 label
= gen_label_rtx ();
22246 emit_cmp_and_jump_insns (count_exp
,
22247 GEN_INT (size_needed
),
22248 LTU
, 0, counter_mode (count_exp
), 1, label
);
22249 if (expected_size
== -1
22250 || expected_size
< (desired_align
- align
) / 2 + size_needed
)
22251 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
22253 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
22256 if (label
&& size_needed
== 1)
22258 emit_label (label
);
22259 LABEL_NUSES (label
) = 1;
22261 epilogue_size_needed
= 1;
22263 else if (label
== NULL_RTX
)
22264 epilogue_size_needed
= size_needed
;
22266 /* Step 3: Main loop. */
22272 gcc_unreachable ();
22274 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
22275 count_exp
, QImode
, 1, expected_size
);
22278 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
22279 count_exp
, word_mode
, 1, expected_size
);
22281 case unrolled_loop
:
22282 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
22283 registers for 4 temporaries anyway. */
22284 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
22285 count_exp
, word_mode
, TARGET_64BIT
? 4 : 2,
22288 case rep_prefix_8_byte
:
22289 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
22292 case rep_prefix_4_byte
:
22293 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
22296 case rep_prefix_1_byte
:
22297 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
22301 /* Adjust properly the offset of src and dest memory for aliasing. */
22302 if (CONST_INT_P (count_exp
))
22304 src
= adjust_automodify_address_nv (src
, BLKmode
, srcreg
,
22305 (count
/ size_needed
) * size_needed
);
22306 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
22307 (count
/ size_needed
) * size_needed
);
22311 src
= change_address (src
, BLKmode
, srcreg
);
22312 dst
= change_address (dst
, BLKmode
, destreg
);
22315 /* Step 4: Epilogue to copy the remaining bytes. */
22319 /* When the main loop is done, COUNT_EXP might hold original count,
22320 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
22321 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
22322 bytes. Compensate if needed. */
22324 if (size_needed
< epilogue_size_needed
)
22327 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
22328 GEN_INT (size_needed
- 1), count_exp
, 1,
22330 if (tmp
!= count_exp
)
22331 emit_move_insn (count_exp
, tmp
);
22333 emit_label (label
);
22334 LABEL_NUSES (label
) = 1;
22337 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
22338 expand_movmem_epilogue (dst
, src
, destreg
, srcreg
, count_exp
,
22339 epilogue_size_needed
);
22340 if (jump_around_label
)
22341 emit_label (jump_around_label
);
22345 /* Helper function for memcpy. For QImode value 0xXY produce
22346 0xXYXYXYXY of wide specified by MODE. This is essentially
22347 a * 0x10101010, but we can do slightly better than
22348 synth_mult by unwinding the sequence by hand on CPUs with
22351 promote_duplicated_reg (enum machine_mode mode
, rtx val
)
22353 enum machine_mode valmode
= GET_MODE (val
);
22355 int nops
= mode
== DImode
? 3 : 2;
22357 gcc_assert (mode
== SImode
|| mode
== DImode
);
22358 if (val
== const0_rtx
)
22359 return copy_to_mode_reg (mode
, const0_rtx
);
22360 if (CONST_INT_P (val
))
22362 HOST_WIDE_INT v
= INTVAL (val
) & 255;
22366 if (mode
== DImode
)
22367 v
|= (v
<< 16) << 16;
22368 return copy_to_mode_reg (mode
, gen_int_mode (v
, mode
));
22371 if (valmode
== VOIDmode
)
22373 if (valmode
!= QImode
)
22374 val
= gen_lowpart (QImode
, val
);
22375 if (mode
== QImode
)
22377 if (!TARGET_PARTIAL_REG_STALL
)
22379 if (ix86_cost
->mult_init
[mode
== DImode
? 3 : 2]
22380 + ix86_cost
->mult_bit
* (mode
== DImode
? 8 : 4)
22381 <= (ix86_cost
->shift_const
+ ix86_cost
->add
) * nops
22382 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL
== 0)))
22384 rtx reg
= convert_modes (mode
, QImode
, val
, true);
22385 tmp
= promote_duplicated_reg (mode
, const1_rtx
);
22386 return expand_simple_binop (mode
, MULT
, reg
, tmp
, NULL
, 1,
22391 rtx reg
= convert_modes (mode
, QImode
, val
, true);
22393 if (!TARGET_PARTIAL_REG_STALL
)
22394 if (mode
== SImode
)
22395 emit_insn (gen_movsi_insv_1 (reg
, reg
));
22397 emit_insn (gen_movdi_insv_1 (reg
, reg
));
22400 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (8),
22401 NULL
, 1, OPTAB_DIRECT
);
22403 expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
22405 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (16),
22406 NULL
, 1, OPTAB_DIRECT
);
22407 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
22408 if (mode
== SImode
)
22410 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (32),
22411 NULL
, 1, OPTAB_DIRECT
);
22412 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
22417 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
22418 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
22419 alignment from ALIGN to DESIRED_ALIGN. */
22421 promote_duplicated_reg_to_size (rtx val
, int size_needed
, int desired_align
, int align
)
22426 && (size_needed
> 4 || (desired_align
> align
&& desired_align
> 4)))
22427 promoted_val
= promote_duplicated_reg (DImode
, val
);
22428 else if (size_needed
> 2 || (desired_align
> align
&& desired_align
> 2))
22429 promoted_val
= promote_duplicated_reg (SImode
, val
);
22430 else if (size_needed
> 1 || (desired_align
> align
&& desired_align
> 1))
22431 promoted_val
= promote_duplicated_reg (HImode
, val
);
22433 promoted_val
= val
;
22435 return promoted_val
;
22438 /* Expand string clear operation (bzero). Use i386 string operations when
22439 profitable. See expand_movmem comment for explanation of individual
22440 steps performed. */
22442 ix86_expand_setmem (rtx dst
, rtx count_exp
, rtx val_exp
, rtx align_exp
,
22443 rtx expected_align_exp
, rtx expected_size_exp
)
22448 rtx jump_around_label
= NULL
;
22449 HOST_WIDE_INT align
= 1;
22450 unsigned HOST_WIDE_INT count
= 0;
22451 HOST_WIDE_INT expected_size
= -1;
22452 int size_needed
= 0, epilogue_size_needed
;
22453 int desired_align
= 0, align_bytes
= 0;
22454 enum stringop_alg alg
;
22455 rtx promoted_val
= NULL
;
22456 bool force_loopy_epilogue
= false;
22458 bool need_zero_guard
= false;
22460 if (CONST_INT_P (align_exp
))
22461 align
= INTVAL (align_exp
);
22462 /* i386 can do misaligned access on reasonably increased cost. */
22463 if (CONST_INT_P (expected_align_exp
)
22464 && INTVAL (expected_align_exp
) > align
)
22465 align
= INTVAL (expected_align_exp
);
22466 if (CONST_INT_P (count_exp
))
22467 count
= expected_size
= INTVAL (count_exp
);
22468 if (CONST_INT_P (expected_size_exp
) && count
== 0)
22469 expected_size
= INTVAL (expected_size_exp
);
22471 /* Make sure we don't need to care about overflow later on. */
22472 if (count
> ((unsigned HOST_WIDE_INT
) 1 << 30))
22475 /* Step 0: Decide on preferred algorithm, desired alignment and
22476 size of chunks to be copied by main loop. */
22478 alg
= decide_alg (count
, expected_size
, true, &dynamic_check
);
22479 desired_align
= decide_alignment (align
, alg
, expected_size
);
22481 if (!TARGET_ALIGN_STRINGOPS
)
22482 align
= desired_align
;
22484 if (alg
== libcall
)
22486 gcc_assert (alg
!= no_stringop
);
22488 count_exp
= copy_to_mode_reg (counter_mode (count_exp
), count_exp
);
22489 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
22494 gcc_unreachable ();
22496 need_zero_guard
= true;
22497 size_needed
= GET_MODE_SIZE (word_mode
);
22499 case unrolled_loop
:
22500 need_zero_guard
= true;
22501 size_needed
= GET_MODE_SIZE (word_mode
) * 4;
22503 case rep_prefix_8_byte
:
22506 case rep_prefix_4_byte
:
22509 case rep_prefix_1_byte
:
22513 need_zero_guard
= true;
22517 epilogue_size_needed
= size_needed
;
22519 /* Step 1: Prologue guard. */
22521 /* Alignment code needs count to be in register. */
22522 if (CONST_INT_P (count_exp
) && desired_align
> align
)
22524 if (INTVAL (count_exp
) > desired_align
22525 && INTVAL (count_exp
) > size_needed
)
22528 = get_mem_align_offset (dst
, desired_align
* BITS_PER_UNIT
);
22529 if (align_bytes
<= 0)
22532 align_bytes
= desired_align
- align_bytes
;
22534 if (align_bytes
== 0)
22536 enum machine_mode mode
= SImode
;
22537 if (TARGET_64BIT
&& (count
& ~0xffffffff))
22539 count_exp
= force_reg (mode
, count_exp
);
22542 /* Do the cheap promotion to allow better CSE across the
22543 main loop and epilogue (ie one load of the big constant in the
22544 front of all code. */
22545 if (CONST_INT_P (val_exp
))
22546 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
22547 desired_align
, align
);
22548 /* Ensure that alignment prologue won't copy past end of block. */
22549 if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
22551 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
22552 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
22553 Make sure it is power of 2. */
22554 epilogue_size_needed
= smallest_pow2_greater_than (epilogue_size_needed
);
22556 /* To improve performance of small blocks, we jump around the VAL
22557 promoting mode. This mean that if the promoted VAL is not constant,
22558 we might not use it in the epilogue and have to use byte
22560 if (epilogue_size_needed
> 2 && !promoted_val
)
22561 force_loopy_epilogue
= true;
22564 if (count
< (unsigned HOST_WIDE_INT
)epilogue_size_needed
)
22566 /* If main algorithm works on QImode, no epilogue is needed.
22567 For small sizes just don't align anything. */
22568 if (size_needed
== 1)
22569 desired_align
= align
;
22576 label
= gen_label_rtx ();
22577 emit_cmp_and_jump_insns (count_exp
,
22578 GEN_INT (epilogue_size_needed
),
22579 LTU
, 0, counter_mode (count_exp
), 1, label
);
22580 if (expected_size
== -1 || expected_size
<= epilogue_size_needed
)
22581 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
22583 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
22586 if (dynamic_check
!= -1)
22588 rtx hot_label
= gen_label_rtx ();
22589 jump_around_label
= gen_label_rtx ();
22590 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
22591 LEU
, 0, counter_mode (count_exp
), 1, hot_label
);
22592 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
22593 set_storage_via_libcall (dst
, count_exp
, val_exp
, false);
22594 emit_jump (jump_around_label
);
22595 emit_label (hot_label
);
22598 /* Step 2: Alignment prologue. */
22600 /* Do the expensive promotion once we branched off the small blocks. */
22602 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
22603 desired_align
, align
);
22604 gcc_assert (desired_align
>= 1 && align
>= 1);
22606 if (desired_align
> align
)
22608 if (align_bytes
== 0)
22610 /* Except for the first move in epilogue, we no longer know
22611 constant offset in aliasing info. It don't seems to worth
22612 the pain to maintain it for the first move, so throw away
22614 dst
= change_address (dst
, BLKmode
, destreg
);
22615 expand_setmem_prologue (dst
, destreg
, promoted_val
, count_exp
, align
,
22620 /* If we know how many bytes need to be stored before dst is
22621 sufficiently aligned, maintain aliasing info accurately. */
22622 dst
= expand_constant_setmem_prologue (dst
, destreg
, promoted_val
,
22623 desired_align
, align_bytes
);
22624 count_exp
= plus_constant (count_exp
, -align_bytes
);
22625 count
-= align_bytes
;
22627 if (need_zero_guard
22628 && (count
< (unsigned HOST_WIDE_INT
) size_needed
22629 || (align_bytes
== 0
22630 && count
< ((unsigned HOST_WIDE_INT
) size_needed
22631 + desired_align
- align
))))
22633 /* It is possible that we copied enough so the main loop will not
22635 gcc_assert (size_needed
> 1);
22636 if (label
== NULL_RTX
)
22637 label
= gen_label_rtx ();
22638 emit_cmp_and_jump_insns (count_exp
,
22639 GEN_INT (size_needed
),
22640 LTU
, 0, counter_mode (count_exp
), 1, label
);
22641 if (expected_size
== -1
22642 || expected_size
< (desired_align
- align
) / 2 + size_needed
)
22643 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
22645 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
22648 if (label
&& size_needed
== 1)
22650 emit_label (label
);
22651 LABEL_NUSES (label
) = 1;
22653 promoted_val
= val_exp
;
22654 epilogue_size_needed
= 1;
22656 else if (label
== NULL_RTX
)
22657 epilogue_size_needed
= size_needed
;
22659 /* Step 3: Main loop. */
22665 gcc_unreachable ();
22667 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
22668 count_exp
, QImode
, 1, expected_size
);
22671 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
22672 count_exp
, word_mode
, 1, expected_size
);
22674 case unrolled_loop
:
22675 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
22676 count_exp
, word_mode
, 4, expected_size
);
22678 case rep_prefix_8_byte
:
22679 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
22682 case rep_prefix_4_byte
:
22683 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
22686 case rep_prefix_1_byte
:
22687 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
22691 /* Adjust properly the offset of src and dest memory for aliasing. */
22692 if (CONST_INT_P (count_exp
))
22693 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
22694 (count
/ size_needed
) * size_needed
);
22696 dst
= change_address (dst
, BLKmode
, destreg
);
22698 /* Step 4: Epilogue to copy the remaining bytes. */
22702 /* When the main loop is done, COUNT_EXP might hold original count,
22703 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
22704 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
22705 bytes. Compensate if needed. */
22707 if (size_needed
< epilogue_size_needed
)
22710 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
22711 GEN_INT (size_needed
- 1), count_exp
, 1,
22713 if (tmp
!= count_exp
)
22714 emit_move_insn (count_exp
, tmp
);
22716 emit_label (label
);
22717 LABEL_NUSES (label
) = 1;
22720 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
22722 if (force_loopy_epilogue
)
22723 expand_setmem_epilogue_via_loop (dst
, destreg
, val_exp
, count_exp
,
22724 epilogue_size_needed
);
22726 expand_setmem_epilogue (dst
, destreg
, promoted_val
, count_exp
,
22727 epilogue_size_needed
);
22729 if (jump_around_label
)
22730 emit_label (jump_around_label
);
22734 /* Expand the appropriate insns for doing strlen if not just doing
22737 out = result, initialized with the start address
22738 align_rtx = alignment of the address.
22739 scratch = scratch register, initialized with the startaddress when
22740 not aligned, otherwise undefined
22742 This is just the body. It needs the initializations mentioned above and
22743 some address computing at the end. These things are done in i386.md. */
22746 ix86_expand_strlensi_unroll_1 (rtx out
, rtx src
, rtx align_rtx
)
22750 rtx align_2_label
= NULL_RTX
;
22751 rtx align_3_label
= NULL_RTX
;
22752 rtx align_4_label
= gen_label_rtx ();
22753 rtx end_0_label
= gen_label_rtx ();
22755 rtx tmpreg
= gen_reg_rtx (SImode
);
22756 rtx scratch
= gen_reg_rtx (SImode
);
22760 if (CONST_INT_P (align_rtx
))
22761 align
= INTVAL (align_rtx
);
22763 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
22765 /* Is there a known alignment and is it less than 4? */
22768 rtx scratch1
= gen_reg_rtx (Pmode
);
22769 emit_move_insn (scratch1
, out
);
22770 /* Is there a known alignment and is it not 2? */
22773 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
22774 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
22776 /* Leave just the 3 lower bits. */
22777 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (3),
22778 NULL_RTX
, 0, OPTAB_WIDEN
);
22780 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
22781 Pmode
, 1, align_4_label
);
22782 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, EQ
, NULL
,
22783 Pmode
, 1, align_2_label
);
22784 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, GTU
, NULL
,
22785 Pmode
, 1, align_3_label
);
22789 /* Since the alignment is 2, we have to check 2 or 0 bytes;
22790 check if is aligned to 4 - byte. */
22792 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, const2_rtx
,
22793 NULL_RTX
, 0, OPTAB_WIDEN
);
22795 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
22796 Pmode
, 1, align_4_label
);
22799 mem
= change_address (src
, QImode
, out
);
22801 /* Now compare the bytes. */
22803 /* Compare the first n unaligned byte on a byte per byte basis. */
22804 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
22805 QImode
, 1, end_0_label
);
22807 /* Increment the address. */
22808 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
22810 /* Not needed with an alignment of 2 */
22813 emit_label (align_2_label
);
22815 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
22818 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
22820 emit_label (align_3_label
);
22823 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
22826 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
22829 /* Generate loop to check 4 bytes at a time. It is not a good idea to
22830 align this loop. It gives only huge programs, but does not help to
22832 emit_label (align_4_label
);
22834 mem
= change_address (src
, SImode
, out
);
22835 emit_move_insn (scratch
, mem
);
22836 emit_insn (ix86_gen_add3 (out
, out
, GEN_INT (4)));
22838 /* This formula yields a nonzero result iff one of the bytes is zero.
22839 This saves three branches inside loop and many cycles. */
22841 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
22842 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
22843 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
22844 emit_insn (gen_andsi3 (tmpreg
, tmpreg
,
22845 gen_int_mode (0x80808080, SImode
)));
22846 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0, SImode
, 1,
22851 rtx reg
= gen_reg_rtx (SImode
);
22852 rtx reg2
= gen_reg_rtx (Pmode
);
22853 emit_move_insn (reg
, tmpreg
);
22854 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
22856 /* If zero is not in the first two bytes, move two bytes forward. */
22857 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
22858 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
22859 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
22860 emit_insn (gen_rtx_SET (VOIDmode
, tmpreg
,
22861 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
22864 /* Emit lea manually to avoid clobbering of flags. */
22865 emit_insn (gen_rtx_SET (SImode
, reg2
,
22866 gen_rtx_PLUS (Pmode
, out
, const2_rtx
)));
22868 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
22869 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
22870 emit_insn (gen_rtx_SET (VOIDmode
, out
,
22871 gen_rtx_IF_THEN_ELSE (Pmode
, tmp
,
22877 rtx end_2_label
= gen_label_rtx ();
22878 /* Is zero in the first two bytes? */
22880 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
22881 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
22882 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
22883 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
22884 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
22886 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
22887 JUMP_LABEL (tmp
) = end_2_label
;
22889 /* Not in the first two. Move two bytes forward. */
22890 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
22891 emit_insn (ix86_gen_add3 (out
, out
, const2_rtx
));
22893 emit_label (end_2_label
);
22897 /* Avoid branch in fixing the byte. */
22898 tmpreg
= gen_lowpart (QImode
, tmpreg
);
22899 emit_insn (gen_addqi3_cc (tmpreg
, tmpreg
, tmpreg
));
22900 tmp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
22901 cmp
= gen_rtx_LTU (VOIDmode
, tmp
, const0_rtx
);
22902 emit_insn (ix86_gen_sub3_carry (out
, out
, GEN_INT (3), tmp
, cmp
));
22904 emit_label (end_0_label
);
22907 /* Expand strlen. */
22910 ix86_expand_strlen (rtx out
, rtx src
, rtx eoschar
, rtx align
)
22912 rtx addr
, scratch1
, scratch2
, scratch3
, scratch4
;
22914 /* The generic case of strlen expander is long. Avoid it's
22915 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
22917 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
22918 && !TARGET_INLINE_ALL_STRINGOPS
22919 && !optimize_insn_for_size_p ()
22920 && (!CONST_INT_P (align
) || INTVAL (align
) < 4))
22923 addr
= force_reg (Pmode
, XEXP (src
, 0));
22924 scratch1
= gen_reg_rtx (Pmode
);
22926 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
22927 && !optimize_insn_for_size_p ())
22929 /* Well it seems that some optimizer does not combine a call like
22930 foo(strlen(bar), strlen(bar));
22931 when the move and the subtraction is done here. It does calculate
22932 the length just once when these instructions are done inside of
22933 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
22934 often used and I use one fewer register for the lifetime of
22935 output_strlen_unroll() this is better. */
22937 emit_move_insn (out
, addr
);
22939 ix86_expand_strlensi_unroll_1 (out
, src
, align
);
22941 /* strlensi_unroll_1 returns the address of the zero at the end of
22942 the string, like memchr(), so compute the length by subtracting
22943 the start address. */
22944 emit_insn (ix86_gen_sub3 (out
, out
, addr
));
22950 /* Can't use this if the user has appropriated eax, ecx, or edi. */
22951 if (fixed_regs
[AX_REG
] || fixed_regs
[CX_REG
] || fixed_regs
[DI_REG
])
22954 scratch2
= gen_reg_rtx (Pmode
);
22955 scratch3
= gen_reg_rtx (Pmode
);
22956 scratch4
= force_reg (Pmode
, constm1_rtx
);
22958 emit_move_insn (scratch3
, addr
);
22959 eoschar
= force_reg (QImode
, eoschar
);
22961 src
= replace_equiv_address_nv (src
, scratch3
);
22963 /* If .md starts supporting :P, this can be done in .md. */
22964 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (4, src
, eoschar
, align
,
22965 scratch4
), UNSPEC_SCAS
);
22966 emit_insn (gen_strlenqi_1 (scratch1
, scratch3
, unspec
));
22967 emit_insn (ix86_gen_one_cmpl2 (scratch2
, scratch1
));
22968 emit_insn (ix86_gen_add3 (out
, scratch2
, constm1_rtx
));
22973 /* For given symbol (function) construct code to compute address of it's PLT
22974 entry in large x86-64 PIC model. */
22976 construct_plt_address (rtx symbol
)
22978 rtx tmp
= gen_reg_rtx (Pmode
);
22979 rtx unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, symbol
), UNSPEC_PLTOFF
);
22981 gcc_assert (GET_CODE (symbol
) == SYMBOL_REF
);
22982 gcc_assert (ix86_cmodel
== CM_LARGE_PIC
);
22984 emit_move_insn (tmp
, gen_rtx_CONST (Pmode
, unspec
));
22985 emit_insn (gen_adddi3 (tmp
, tmp
, pic_offset_table_rtx
));
22990 ix86_expand_call (rtx retval
, rtx fnaddr
, rtx callarg1
,
22992 rtx pop
, bool sibcall
)
22994 /* We need to represent that SI and DI registers are clobbered
22996 static int clobbered_registers
[] = {
22997 XMM6_REG
, XMM7_REG
, XMM8_REG
,
22998 XMM9_REG
, XMM10_REG
, XMM11_REG
,
22999 XMM12_REG
, XMM13_REG
, XMM14_REG
,
23000 XMM15_REG
, SI_REG
, DI_REG
23002 rtx vec
[ARRAY_SIZE (clobbered_registers
) + 3];
23003 rtx use
= NULL
, call
;
23004 unsigned int vec_len
;
23006 if (pop
== const0_rtx
)
23008 gcc_assert (!TARGET_64BIT
|| !pop
);
23010 if (TARGET_MACHO
&& !TARGET_64BIT
)
23013 if (flag_pic
&& GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
)
23014 fnaddr
= machopic_indirect_call_target (fnaddr
);
23019 /* Static functions and indirect calls don't need the pic register. */
23020 if (flag_pic
&& (!TARGET_64BIT
|| ix86_cmodel
== CM_LARGE_PIC
)
23021 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
23022 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr
, 0)))
23023 use_reg (&use
, pic_offset_table_rtx
);
23026 if (TARGET_64BIT
&& INTVAL (callarg2
) >= 0)
23028 rtx al
= gen_rtx_REG (QImode
, AX_REG
);
23029 emit_move_insn (al
, callarg2
);
23030 use_reg (&use
, al
);
23033 if (ix86_cmodel
== CM_LARGE_PIC
23035 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
23036 && !local_symbolic_operand (XEXP (fnaddr
, 0), VOIDmode
))
23037 fnaddr
= gen_rtx_MEM (QImode
, construct_plt_address (XEXP (fnaddr
, 0)));
23039 ? !sibcall_insn_operand (XEXP (fnaddr
, 0), word_mode
)
23040 : !call_insn_operand (XEXP (fnaddr
, 0), word_mode
))
23042 fnaddr
= XEXP (fnaddr
, 0);
23043 if (GET_MODE (fnaddr
) != word_mode
)
23044 fnaddr
= convert_to_mode (word_mode
, fnaddr
, 1);
23045 fnaddr
= gen_rtx_MEM (QImode
, copy_to_mode_reg (word_mode
, fnaddr
));
23049 call
= gen_rtx_CALL (VOIDmode
, fnaddr
, callarg1
);
23051 call
= gen_rtx_SET (VOIDmode
, retval
, call
);
23052 vec
[vec_len
++] = call
;
23056 pop
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, pop
);
23057 pop
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, pop
);
23058 vec
[vec_len
++] = pop
;
23061 if (TARGET_64BIT_MS_ABI
23062 && (!callarg2
|| INTVAL (callarg2
) != -2))
23066 vec
[vec_len
++] = gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, const0_rtx
),
23067 UNSPEC_MS_TO_SYSV_CALL
);
23069 for (i
= 0; i
< ARRAY_SIZE (clobbered_registers
); i
++)
23071 = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers
[i
])
23073 gen_rtx_REG (SSE_REGNO_P (clobbered_registers
[i
])
23075 clobbered_registers
[i
]));
23078 /* Add UNSPEC_CALL_NEEDS_VZEROUPPER decoration. */
23079 if (TARGET_VZEROUPPER
)
23082 if (cfun
->machine
->callee_pass_avx256_p
)
23084 if (cfun
->machine
->callee_return_avx256_p
)
23085 avx256
= callee_return_pass_avx256
;
23087 avx256
= callee_pass_avx256
;
23089 else if (cfun
->machine
->callee_return_avx256_p
)
23090 avx256
= callee_return_avx256
;
23092 avx256
= call_no_avx256
;
23094 if (reload_completed
)
23095 emit_insn (gen_avx_vzeroupper (GEN_INT (avx256
)));
23097 vec
[vec_len
++] = gen_rtx_UNSPEC (VOIDmode
,
23098 gen_rtvec (1, GEN_INT (avx256
)),
23099 UNSPEC_CALL_NEEDS_VZEROUPPER
);
23103 call
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (vec_len
, vec
));
23104 call
= emit_call_insn (call
);
23106 CALL_INSN_FUNCTION_USAGE (call
) = use
;
23112 ix86_split_call_vzeroupper (rtx insn
, rtx vzeroupper
)
23114 rtx pat
= PATTERN (insn
);
23115 rtvec vec
= XVEC (pat
, 0);
23116 int len
= GET_NUM_ELEM (vec
) - 1;
23118 /* Strip off the last entry of the parallel. */
23119 gcc_assert (GET_CODE (RTVEC_ELT (vec
, len
)) == UNSPEC
);
23120 gcc_assert (XINT (RTVEC_ELT (vec
, len
), 1) == UNSPEC_CALL_NEEDS_VZEROUPPER
);
23122 pat
= RTVEC_ELT (vec
, 0);
23124 pat
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (len
, &RTVEC_ELT (vec
, 0)));
23126 emit_insn (gen_avx_vzeroupper (vzeroupper
));
23127 emit_call_insn (pat
);
23130 /* Output the assembly for a call instruction. */
23133 ix86_output_call_insn (rtx insn
, rtx call_op
)
23135 bool direct_p
= constant_call_address_operand (call_op
, VOIDmode
);
23136 bool seh_nop_p
= false;
23139 if (SIBLING_CALL_P (insn
))
23143 /* SEH epilogue detection requires the indirect branch case
23144 to include REX.W. */
23145 else if (TARGET_SEH
)
23146 xasm
= "rex.W jmp %A0";
23150 output_asm_insn (xasm
, &call_op
);
23154 /* SEH unwinding can require an extra nop to be emitted in several
23155 circumstances. Determine if we have one of those. */
23160 for (i
= NEXT_INSN (insn
); i
; i
= NEXT_INSN (i
))
23162 /* If we get to another real insn, we don't need the nop. */
23166 /* If we get to the epilogue note, prevent a catch region from
23167 being adjacent to the standard epilogue sequence. If non-
23168 call-exceptions, we'll have done this during epilogue emission. */
23169 if (NOTE_P (i
) && NOTE_KIND (i
) == NOTE_INSN_EPILOGUE_BEG
23170 && !flag_non_call_exceptions
23171 && !can_throw_internal (insn
))
23178 /* If we didn't find a real insn following the call, prevent the
23179 unwinder from looking into the next function. */
23185 xasm
= "call\t%P0";
23187 xasm
= "call\t%A0";
23189 output_asm_insn (xasm
, &call_op
);
23197 /* Clear stack slot assignments remembered from previous functions.
23198 This is called from INIT_EXPANDERS once before RTL is emitted for each
23201 static struct machine_function
*
23202 ix86_init_machine_status (void)
23204 struct machine_function
*f
;
23206 f
= ggc_alloc_cleared_machine_function ();
23207 f
->use_fast_prologue_epilogue_nregs
= -1;
23208 f
->tls_descriptor_call_expanded_p
= 0;
23209 f
->call_abi
= ix86_abi
;
23214 /* Return a MEM corresponding to a stack slot with mode MODE.
23215 Allocate a new slot if necessary.
23217 The RTL for a function can have several slots available: N is
23218 which slot to use. */
23221 assign_386_stack_local (enum machine_mode mode
, enum ix86_stack_slot n
)
23223 struct stack_local_entry
*s
;
23225 gcc_assert (n
< MAX_386_STACK_LOCALS
);
23227 /* Virtual slot is valid only before vregs are instantiated. */
23228 gcc_assert ((n
== SLOT_VIRTUAL
) == !virtuals_instantiated
);
23230 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
23231 if (s
->mode
== mode
&& s
->n
== n
)
23232 return validize_mem (copy_rtx (s
->rtl
));
23234 s
= ggc_alloc_stack_local_entry ();
23237 s
->rtl
= assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
23239 s
->next
= ix86_stack_locals
;
23240 ix86_stack_locals
= s
;
23241 return validize_mem (s
->rtl
);
23244 /* Calculate the length of the memory address in the instruction encoding.
23245 Includes addr32 prefix, does not include the one-byte modrm, opcode,
23246 or other prefixes. */
23249 memory_address_length (rtx addr
)
23251 struct ix86_address parts
;
23252 rtx base
, index
, disp
;
23256 if (GET_CODE (addr
) == PRE_DEC
23257 || GET_CODE (addr
) == POST_INC
23258 || GET_CODE (addr
) == PRE_MODIFY
23259 || GET_CODE (addr
) == POST_MODIFY
)
23262 ok
= ix86_decompose_address (addr
, &parts
);
23265 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
23266 parts
.base
= SUBREG_REG (parts
.base
);
23267 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
23268 parts
.index
= SUBREG_REG (parts
.index
);
23271 index
= parts
.index
;
23274 /* Add length of addr32 prefix. */
23275 len
= (GET_CODE (addr
) == ZERO_EXTEND
23276 || GET_CODE (addr
) == AND
);
23279 - esp as the base always wants an index,
23280 - ebp as the base always wants a displacement,
23281 - r12 as the base always wants an index,
23282 - r13 as the base always wants a displacement. */
23284 /* Register Indirect. */
23285 if (base
&& !index
&& !disp
)
23287 /* esp (for its index) and ebp (for its displacement) need
23288 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
23291 && (addr
== arg_pointer_rtx
23292 || addr
== frame_pointer_rtx
23293 || REGNO (addr
) == SP_REG
23294 || REGNO (addr
) == BP_REG
23295 || REGNO (addr
) == R12_REG
23296 || REGNO (addr
) == R13_REG
))
23300 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
23301 is not disp32, but disp32(%rip), so for disp32
23302 SIB byte is needed, unless print_operand_address
23303 optimizes it into disp32(%rip) or (%rip) is implied
23305 else if (disp
&& !base
&& !index
)
23312 if (GET_CODE (disp
) == CONST
)
23313 symbol
= XEXP (disp
, 0);
23314 if (GET_CODE (symbol
) == PLUS
23315 && CONST_INT_P (XEXP (symbol
, 1)))
23316 symbol
= XEXP (symbol
, 0);
23318 if (GET_CODE (symbol
) != LABEL_REF
23319 && (GET_CODE (symbol
) != SYMBOL_REF
23320 || SYMBOL_REF_TLS_MODEL (symbol
) != 0)
23321 && (GET_CODE (symbol
) != UNSPEC
23322 || (XINT (symbol
, 1) != UNSPEC_GOTPCREL
23323 && XINT (symbol
, 1) != UNSPEC_PCREL
23324 && XINT (symbol
, 1) != UNSPEC_GOTNTPOFF
)))
23331 /* Find the length of the displacement constant. */
23334 if (base
&& satisfies_constraint_K (disp
))
23339 /* ebp always wants a displacement. Similarly r13. */
23340 else if (base
&& REG_P (base
)
23341 && (REGNO (base
) == BP_REG
|| REGNO (base
) == R13_REG
))
23344 /* An index requires the two-byte modrm form.... */
23346 /* ...like esp (or r12), which always wants an index. */
23347 || base
== arg_pointer_rtx
23348 || base
== frame_pointer_rtx
23349 || (base
&& REG_P (base
)
23350 && (REGNO (base
) == SP_REG
|| REGNO (base
) == R12_REG
)))
23367 /* Compute default value for "length_immediate" attribute. When SHORTFORM
23368 is set, expect that insn have 8bit immediate alternative. */
23370 ix86_attr_length_immediate_default (rtx insn
, bool shortform
)
23374 extract_insn_cached (insn
);
23375 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
23376 if (CONSTANT_P (recog_data
.operand
[i
]))
23378 enum attr_mode mode
= get_attr_mode (insn
);
23381 if (shortform
&& CONST_INT_P (recog_data
.operand
[i
]))
23383 HOST_WIDE_INT ival
= INTVAL (recog_data
.operand
[i
]);
23390 ival
= trunc_int_for_mode (ival
, HImode
);
23393 ival
= trunc_int_for_mode (ival
, SImode
);
23398 if (IN_RANGE (ival
, -128, 127))
23415 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
23420 fatal_insn ("unknown insn mode", insn
);
23425 /* Compute default value for "length_address" attribute. */
23427 ix86_attr_length_address_default (rtx insn
)
23431 if (get_attr_type (insn
) == TYPE_LEA
)
23433 rtx set
= PATTERN (insn
), addr
;
23435 if (GET_CODE (set
) == PARALLEL
)
23436 set
= XVECEXP (set
, 0, 0);
23438 gcc_assert (GET_CODE (set
) == SET
);
23440 addr
= SET_SRC (set
);
23441 if (TARGET_64BIT
&& get_attr_mode (insn
) == MODE_SI
)
23443 if (GET_CODE (addr
) == ZERO_EXTEND
)
23444 addr
= XEXP (addr
, 0);
23445 if (GET_CODE (addr
) == SUBREG
)
23446 addr
= SUBREG_REG (addr
);
23449 return memory_address_length (addr
);
23452 extract_insn_cached (insn
);
23453 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
23454 if (MEM_P (recog_data
.operand
[i
]))
23456 constrain_operands_cached (reload_completed
);
23457 if (which_alternative
!= -1)
23459 const char *constraints
= recog_data
.constraints
[i
];
23460 int alt
= which_alternative
;
23462 while (*constraints
== '=' || *constraints
== '+')
23465 while (*constraints
++ != ',')
23467 /* Skip ignored operands. */
23468 if (*constraints
== 'X')
23471 return memory_address_length (XEXP (recog_data
.operand
[i
], 0));
23476 /* Compute default value for "length_vex" attribute. It includes
23477 2 or 3 byte VEX prefix and 1 opcode byte. */
23480 ix86_attr_length_vex_default (rtx insn
, bool has_0f_opcode
, bool has_vex_w
)
23484 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
23485 byte VEX prefix. */
23486 if (!has_0f_opcode
|| has_vex_w
)
23489 /* We can always use 2 byte VEX prefix in 32bit. */
23493 extract_insn_cached (insn
);
23495 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
23496 if (REG_P (recog_data
.operand
[i
]))
23498 /* REX.W bit uses 3 byte VEX prefix. */
23499 if (GET_MODE (recog_data
.operand
[i
]) == DImode
23500 && GENERAL_REG_P (recog_data
.operand
[i
]))
23505 /* REX.X or REX.B bits use 3 byte VEX prefix. */
23506 if (MEM_P (recog_data
.operand
[i
])
23507 && x86_extended_reg_mentioned_p (recog_data
.operand
[i
]))
23514 /* Return the maximum number of instructions a cpu can issue. */
23517 ix86_issue_rate (void)
23521 case PROCESSOR_PENTIUM
:
23522 case PROCESSOR_ATOM
:
23526 case PROCESSOR_PENTIUMPRO
:
23527 case PROCESSOR_PENTIUM4
:
23528 case PROCESSOR_CORE2_32
:
23529 case PROCESSOR_CORE2_64
:
23530 case PROCESSOR_COREI7_32
:
23531 case PROCESSOR_COREI7_64
:
23532 case PROCESSOR_ATHLON
:
23534 case PROCESSOR_AMDFAM10
:
23535 case PROCESSOR_NOCONA
:
23536 case PROCESSOR_GENERIC32
:
23537 case PROCESSOR_GENERIC64
:
23538 case PROCESSOR_BDVER1
:
23539 case PROCESSOR_BDVER2
:
23540 case PROCESSOR_BTVER1
:
23548 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
23549 by DEP_INSN and nothing set by DEP_INSN. */
23552 ix86_flags_dependent (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
23556 /* Simplify the test for uninteresting insns. */
23557 if (insn_type
!= TYPE_SETCC
23558 && insn_type
!= TYPE_ICMOV
23559 && insn_type
!= TYPE_FCMOV
23560 && insn_type
!= TYPE_IBR
)
23563 if ((set
= single_set (dep_insn
)) != 0)
23565 set
= SET_DEST (set
);
23568 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
23569 && XVECLEN (PATTERN (dep_insn
), 0) == 2
23570 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
23571 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
23573 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
23574 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
23579 if (!REG_P (set
) || REGNO (set
) != FLAGS_REG
)
23582 /* This test is true if the dependent insn reads the flags but
23583 not any other potentially set register. */
23584 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
23587 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
23593 /* Return true iff USE_INSN has a memory address with operands set by
23597 ix86_agi_dependent (rtx set_insn
, rtx use_insn
)
23600 extract_insn_cached (use_insn
);
23601 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
23602 if (MEM_P (recog_data
.operand
[i
]))
23604 rtx addr
= XEXP (recog_data
.operand
[i
], 0);
23605 return modified_in_p (addr
, set_insn
) != 0;
23611 ix86_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
)
23613 enum attr_type insn_type
, dep_insn_type
;
23614 enum attr_memory memory
;
23616 int dep_insn_code_number
;
23618 /* Anti and output dependencies have zero cost on all CPUs. */
23619 if (REG_NOTE_KIND (link
) != 0)
23622 dep_insn_code_number
= recog_memoized (dep_insn
);
23624 /* If we can't recognize the insns, we can't really do anything. */
23625 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
23628 insn_type
= get_attr_type (insn
);
23629 dep_insn_type
= get_attr_type (dep_insn
);
23633 case PROCESSOR_PENTIUM
:
23634 /* Address Generation Interlock adds a cycle of latency. */
23635 if (insn_type
== TYPE_LEA
)
23637 rtx addr
= PATTERN (insn
);
23639 if (GET_CODE (addr
) == PARALLEL
)
23640 addr
= XVECEXP (addr
, 0, 0);
23642 gcc_assert (GET_CODE (addr
) == SET
);
23644 addr
= SET_SRC (addr
);
23645 if (modified_in_p (addr
, dep_insn
))
23648 else if (ix86_agi_dependent (dep_insn
, insn
))
23651 /* ??? Compares pair with jump/setcc. */
23652 if (ix86_flags_dependent (insn
, dep_insn
, insn_type
))
23655 /* Floating point stores require value to be ready one cycle earlier. */
23656 if (insn_type
== TYPE_FMOV
23657 && get_attr_memory (insn
) == MEMORY_STORE
23658 && !ix86_agi_dependent (dep_insn
, insn
))
23662 case PROCESSOR_PENTIUMPRO
:
23663 memory
= get_attr_memory (insn
);
23665 /* INT->FP conversion is expensive. */
23666 if (get_attr_fp_int_src (dep_insn
))
23669 /* There is one cycle extra latency between an FP op and a store. */
23670 if (insn_type
== TYPE_FMOV
23671 && (set
= single_set (dep_insn
)) != NULL_RTX
23672 && (set2
= single_set (insn
)) != NULL_RTX
23673 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
23674 && MEM_P (SET_DEST (set2
)))
23677 /* Show ability of reorder buffer to hide latency of load by executing
23678 in parallel with previous instruction in case
23679 previous instruction is not needed to compute the address. */
23680 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
23681 && !ix86_agi_dependent (dep_insn
, insn
))
23683 /* Claim moves to take one cycle, as core can issue one load
23684 at time and the next load can start cycle later. */
23685 if (dep_insn_type
== TYPE_IMOV
23686 || dep_insn_type
== TYPE_FMOV
)
23694 memory
= get_attr_memory (insn
);
23696 /* The esp dependency is resolved before the instruction is really
23698 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
23699 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
23702 /* INT->FP conversion is expensive. */
23703 if (get_attr_fp_int_src (dep_insn
))
23706 /* Show ability of reorder buffer to hide latency of load by executing
23707 in parallel with previous instruction in case
23708 previous instruction is not needed to compute the address. */
23709 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
23710 && !ix86_agi_dependent (dep_insn
, insn
))
23712 /* Claim moves to take one cycle, as core can issue one load
23713 at time and the next load can start cycle later. */
23714 if (dep_insn_type
== TYPE_IMOV
23715 || dep_insn_type
== TYPE_FMOV
)
23724 case PROCESSOR_ATHLON
:
23726 case PROCESSOR_AMDFAM10
:
23727 case PROCESSOR_BDVER1
:
23728 case PROCESSOR_BDVER2
:
23729 case PROCESSOR_BTVER1
:
23730 case PROCESSOR_ATOM
:
23731 case PROCESSOR_GENERIC32
:
23732 case PROCESSOR_GENERIC64
:
23733 memory
= get_attr_memory (insn
);
23735 /* Show ability of reorder buffer to hide latency of load by executing
23736 in parallel with previous instruction in case
23737 previous instruction is not needed to compute the address. */
23738 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
23739 && !ix86_agi_dependent (dep_insn
, insn
))
23741 enum attr_unit unit
= get_attr_unit (insn
);
23744 /* Because of the difference between the length of integer and
23745 floating unit pipeline preparation stages, the memory operands
23746 for floating point are cheaper.
23748 ??? For Athlon it the difference is most probably 2. */
23749 if (unit
== UNIT_INTEGER
|| unit
== UNIT_UNKNOWN
)
23752 loadcost
= TARGET_ATHLON
? 2 : 0;
23754 if (cost
>= loadcost
)
23767 /* How many alternative schedules to try. This should be as wide as the
23768 scheduling freedom in the DFA, but no wider. Making this value too
23769 large results extra work for the scheduler. */
23772 ia32_multipass_dfa_lookahead (void)
23776 case PROCESSOR_PENTIUM
:
23779 case PROCESSOR_PENTIUMPRO
:
23783 case PROCESSOR_CORE2_32
:
23784 case PROCESSOR_CORE2_64
:
23785 case PROCESSOR_COREI7_32
:
23786 case PROCESSOR_COREI7_64
:
23787 /* Generally, we want haifa-sched:max_issue() to look ahead as far
23788 as many instructions can be executed on a cycle, i.e.,
23789 issue_rate. I wonder why tuning for many CPUs does not do this. */
23790 return ix86_issue_rate ();
23799 /* Model decoder of Core 2/i7.
23800 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
23801 track the instruction fetch block boundaries and make sure that long
23802 (9+ bytes) instructions are assigned to D0. */
23804 /* Maximum length of an insn that can be handled by
23805 a secondary decoder unit. '8' for Core 2/i7. */
23806 static int core2i7_secondary_decoder_max_insn_size
;
23808 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
23809 '16' for Core 2/i7. */
23810 static int core2i7_ifetch_block_size
;
23812 /* Maximum number of instructions decoder can handle per cycle.
23813 '6' for Core 2/i7. */
23814 static int core2i7_ifetch_block_max_insns
;
23816 typedef struct ix86_first_cycle_multipass_data_
*
23817 ix86_first_cycle_multipass_data_t
;
23818 typedef const struct ix86_first_cycle_multipass_data_
*
23819 const_ix86_first_cycle_multipass_data_t
;
23821 /* A variable to store target state across calls to max_issue within
23823 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data
,
23824 *ix86_first_cycle_multipass_data
= &_ix86_first_cycle_multipass_data
;
23826 /* Initialize DATA. */
23828 core2i7_first_cycle_multipass_init (void *_data
)
23830 ix86_first_cycle_multipass_data_t data
23831 = (ix86_first_cycle_multipass_data_t
) _data
;
23833 data
->ifetch_block_len
= 0;
23834 data
->ifetch_block_n_insns
= 0;
23835 data
->ready_try_change
= NULL
;
23836 data
->ready_try_change_size
= 0;
23839 /* Advancing the cycle; reset ifetch block counts. */
23841 core2i7_dfa_post_advance_cycle (void)
23843 ix86_first_cycle_multipass_data_t data
= ix86_first_cycle_multipass_data
;
23845 gcc_assert (data
->ifetch_block_n_insns
<= core2i7_ifetch_block_max_insns
);
23847 data
->ifetch_block_len
= 0;
23848 data
->ifetch_block_n_insns
= 0;
23851 static int min_insn_size (rtx
);
23853 /* Filter out insns from ready_try that the core will not be able to issue
23854 on current cycle due to decoder. */
23856 core2i7_first_cycle_multipass_filter_ready_try
23857 (const_ix86_first_cycle_multipass_data_t data
,
23858 char *ready_try
, int n_ready
, bool first_cycle_insn_p
)
23865 if (ready_try
[n_ready
])
23868 insn
= get_ready_element (n_ready
);
23869 insn_size
= min_insn_size (insn
);
23871 if (/* If this is a too long an insn for a secondary decoder ... */
23872 (!first_cycle_insn_p
23873 && insn_size
> core2i7_secondary_decoder_max_insn_size
)
23874 /* ... or it would not fit into the ifetch block ... */
23875 || data
->ifetch_block_len
+ insn_size
> core2i7_ifetch_block_size
23876 /* ... or the decoder is full already ... */
23877 || data
->ifetch_block_n_insns
+ 1 > core2i7_ifetch_block_max_insns
)
23878 /* ... mask the insn out. */
23880 ready_try
[n_ready
] = 1;
23882 if (data
->ready_try_change
)
23883 SET_BIT (data
->ready_try_change
, n_ready
);
23888 /* Prepare for a new round of multipass lookahead scheduling. */
23890 core2i7_first_cycle_multipass_begin (void *_data
, char *ready_try
, int n_ready
,
23891 bool first_cycle_insn_p
)
23893 ix86_first_cycle_multipass_data_t data
23894 = (ix86_first_cycle_multipass_data_t
) _data
;
23895 const_ix86_first_cycle_multipass_data_t prev_data
23896 = ix86_first_cycle_multipass_data
;
23898 /* Restore the state from the end of the previous round. */
23899 data
->ifetch_block_len
= prev_data
->ifetch_block_len
;
23900 data
->ifetch_block_n_insns
= prev_data
->ifetch_block_n_insns
;
23902 /* Filter instructions that cannot be issued on current cycle due to
23903 decoder restrictions. */
23904 core2i7_first_cycle_multipass_filter_ready_try (data
, ready_try
, n_ready
,
23905 first_cycle_insn_p
);
23908 /* INSN is being issued in current solution. Account for its impact on
23909 the decoder model. */
23911 core2i7_first_cycle_multipass_issue (void *_data
, char *ready_try
, int n_ready
,
23912 rtx insn
, const void *_prev_data
)
23914 ix86_first_cycle_multipass_data_t data
23915 = (ix86_first_cycle_multipass_data_t
) _data
;
23916 const_ix86_first_cycle_multipass_data_t prev_data
23917 = (const_ix86_first_cycle_multipass_data_t
) _prev_data
;
23919 int insn_size
= min_insn_size (insn
);
23921 data
->ifetch_block_len
= prev_data
->ifetch_block_len
+ insn_size
;
23922 data
->ifetch_block_n_insns
= prev_data
->ifetch_block_n_insns
+ 1;
23923 gcc_assert (data
->ifetch_block_len
<= core2i7_ifetch_block_size
23924 && data
->ifetch_block_n_insns
<= core2i7_ifetch_block_max_insns
);
23926 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
23927 if (!data
->ready_try_change
)
23929 data
->ready_try_change
= sbitmap_alloc (n_ready
);
23930 data
->ready_try_change_size
= n_ready
;
23932 else if (data
->ready_try_change_size
< n_ready
)
23934 data
->ready_try_change
= sbitmap_resize (data
->ready_try_change
,
23936 data
->ready_try_change_size
= n_ready
;
23938 sbitmap_zero (data
->ready_try_change
);
23940 /* Filter out insns from ready_try that the core will not be able to issue
23941 on current cycle due to decoder. */
23942 core2i7_first_cycle_multipass_filter_ready_try (data
, ready_try
, n_ready
,
23946 /* Revert the effect on ready_try. */
23948 core2i7_first_cycle_multipass_backtrack (const void *_data
,
23950 int n_ready ATTRIBUTE_UNUSED
)
23952 const_ix86_first_cycle_multipass_data_t data
23953 = (const_ix86_first_cycle_multipass_data_t
) _data
;
23954 unsigned int i
= 0;
23955 sbitmap_iterator sbi
;
23957 gcc_assert (sbitmap_last_set_bit (data
->ready_try_change
) < n_ready
);
23958 EXECUTE_IF_SET_IN_SBITMAP (data
->ready_try_change
, 0, i
, sbi
)
23964 /* Save the result of multipass lookahead scheduling for the next round. */
23966 core2i7_first_cycle_multipass_end (const void *_data
)
23968 const_ix86_first_cycle_multipass_data_t data
23969 = (const_ix86_first_cycle_multipass_data_t
) _data
;
23970 ix86_first_cycle_multipass_data_t next_data
23971 = ix86_first_cycle_multipass_data
;
23975 next_data
->ifetch_block_len
= data
->ifetch_block_len
;
23976 next_data
->ifetch_block_n_insns
= data
->ifetch_block_n_insns
;
23980 /* Deallocate target data. */
23982 core2i7_first_cycle_multipass_fini (void *_data
)
23984 ix86_first_cycle_multipass_data_t data
23985 = (ix86_first_cycle_multipass_data_t
) _data
;
23987 if (data
->ready_try_change
)
23989 sbitmap_free (data
->ready_try_change
);
23990 data
->ready_try_change
= NULL
;
23991 data
->ready_try_change_size
= 0;
23995 /* Prepare for scheduling pass. */
23997 ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED
,
23998 int verbose ATTRIBUTE_UNUSED
,
23999 int max_uid ATTRIBUTE_UNUSED
)
24001 /* Install scheduling hooks for current CPU. Some of these hooks are used
24002 in time-critical parts of the scheduler, so we only set them up when
24003 they are actually used. */
24006 case PROCESSOR_CORE2_32
:
24007 case PROCESSOR_CORE2_64
:
24008 case PROCESSOR_COREI7_32
:
24009 case PROCESSOR_COREI7_64
:
24010 targetm
.sched
.dfa_post_advance_cycle
24011 = core2i7_dfa_post_advance_cycle
;
24012 targetm
.sched
.first_cycle_multipass_init
24013 = core2i7_first_cycle_multipass_init
;
24014 targetm
.sched
.first_cycle_multipass_begin
24015 = core2i7_first_cycle_multipass_begin
;
24016 targetm
.sched
.first_cycle_multipass_issue
24017 = core2i7_first_cycle_multipass_issue
;
24018 targetm
.sched
.first_cycle_multipass_backtrack
24019 = core2i7_first_cycle_multipass_backtrack
;
24020 targetm
.sched
.first_cycle_multipass_end
24021 = core2i7_first_cycle_multipass_end
;
24022 targetm
.sched
.first_cycle_multipass_fini
24023 = core2i7_first_cycle_multipass_fini
;
24025 /* Set decoder parameters. */
24026 core2i7_secondary_decoder_max_insn_size
= 8;
24027 core2i7_ifetch_block_size
= 16;
24028 core2i7_ifetch_block_max_insns
= 6;
24032 targetm
.sched
.dfa_post_advance_cycle
= NULL
;
24033 targetm
.sched
.first_cycle_multipass_init
= NULL
;
24034 targetm
.sched
.first_cycle_multipass_begin
= NULL
;
24035 targetm
.sched
.first_cycle_multipass_issue
= NULL
;
24036 targetm
.sched
.first_cycle_multipass_backtrack
= NULL
;
24037 targetm
.sched
.first_cycle_multipass_end
= NULL
;
24038 targetm
.sched
.first_cycle_multipass_fini
= NULL
;
24044 /* Compute the alignment given to a constant that is being placed in memory.
24045 EXP is the constant and ALIGN is the alignment that the object would
24047 The value of this function is used instead of that alignment to align
24051 ix86_constant_alignment (tree exp
, int align
)
24053 if (TREE_CODE (exp
) == REAL_CST
|| TREE_CODE (exp
) == VECTOR_CST
24054 || TREE_CODE (exp
) == INTEGER_CST
)
24056 if (TYPE_MODE (TREE_TYPE (exp
)) == DFmode
&& align
< 64)
24058 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp
))) && align
< 128)
24061 else if (!optimize_size
&& TREE_CODE (exp
) == STRING_CST
24062 && TREE_STRING_LENGTH (exp
) >= 31 && align
< BITS_PER_WORD
)
24063 return BITS_PER_WORD
;
24068 /* Compute the alignment for a static variable.
24069 TYPE is the data type, and ALIGN is the alignment that
24070 the object would ordinarily have. The value of this function is used
24071 instead of that alignment to align the object. */
24074 ix86_data_alignment (tree type
, int align
)
24076 int max_align
= optimize_size
? BITS_PER_WORD
: MIN (256, MAX_OFILE_ALIGNMENT
);
24078 if (AGGREGATE_TYPE_P (type
)
24079 && TYPE_SIZE (type
)
24080 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
24081 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= (unsigned) max_align
24082 || TREE_INT_CST_HIGH (TYPE_SIZE (type
)))
24083 && align
< max_align
)
24086 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
24087 to 16byte boundary. */
24090 if (AGGREGATE_TYPE_P (type
)
24091 && TYPE_SIZE (type
)
24092 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
24093 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 128
24094 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
24098 if (TREE_CODE (type
) == ARRAY_TYPE
)
24100 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
24102 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
24105 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
24108 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
24110 if ((TYPE_MODE (type
) == XCmode
24111 || TYPE_MODE (type
) == TCmode
) && align
< 128)
24114 else if ((TREE_CODE (type
) == RECORD_TYPE
24115 || TREE_CODE (type
) == UNION_TYPE
24116 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
24117 && TYPE_FIELDS (type
))
24119 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
24121 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
24124 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
24125 || TREE_CODE (type
) == INTEGER_TYPE
)
24127 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
24129 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
24136 /* Compute the alignment for a local variable or a stack slot. EXP is
24137 the data type or decl itself, MODE is the widest mode available and
24138 ALIGN is the alignment that the object would ordinarily have. The
24139 value of this macro is used instead of that alignment to align the
24143 ix86_local_alignment (tree exp
, enum machine_mode mode
,
24144 unsigned int align
)
24148 if (exp
&& DECL_P (exp
))
24150 type
= TREE_TYPE (exp
);
24159 /* Don't do dynamic stack realignment for long long objects with
24160 -mpreferred-stack-boundary=2. */
24163 && ix86_preferred_stack_boundary
< 64
24164 && (mode
== DImode
|| (type
&& TYPE_MODE (type
) == DImode
))
24165 && (!type
|| !TYPE_USER_ALIGN (type
))
24166 && (!decl
|| !DECL_USER_ALIGN (decl
)))
24169 /* If TYPE is NULL, we are allocating a stack slot for caller-save
24170 register in MODE. We will return the largest alignment of XF
24174 if (mode
== XFmode
&& align
< GET_MODE_ALIGNMENT (DFmode
))
24175 align
= GET_MODE_ALIGNMENT (DFmode
);
24179 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
24180 to 16byte boundary. Exact wording is:
24182 An array uses the same alignment as its elements, except that a local or
24183 global array variable of length at least 16 bytes or
24184 a C99 variable-length array variable always has alignment of at least 16 bytes.
24186 This was added to allow use of aligned SSE instructions at arrays. This
24187 rule is meant for static storage (where compiler can not do the analysis
24188 by itself). We follow it for automatic variables only when convenient.
24189 We fully control everything in the function compiled and functions from
24190 other unit can not rely on the alignment.
24192 Exclude va_list type. It is the common case of local array where
24193 we can not benefit from the alignment. */
24194 if (TARGET_64BIT
&& optimize_function_for_speed_p (cfun
)
24197 if (AGGREGATE_TYPE_P (type
)
24198 && (va_list_type_node
== NULL_TREE
24199 || (TYPE_MAIN_VARIANT (type
)
24200 != TYPE_MAIN_VARIANT (va_list_type_node
)))
24201 && TYPE_SIZE (type
)
24202 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
24203 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 16
24204 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
24207 if (TREE_CODE (type
) == ARRAY_TYPE
)
24209 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
24211 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
24214 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
24216 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
24218 if ((TYPE_MODE (type
) == XCmode
24219 || TYPE_MODE (type
) == TCmode
) && align
< 128)
24222 else if ((TREE_CODE (type
) == RECORD_TYPE
24223 || TREE_CODE (type
) == UNION_TYPE
24224 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
24225 && TYPE_FIELDS (type
))
24227 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
24229 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
24232 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
24233 || TREE_CODE (type
) == INTEGER_TYPE
)
24236 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
24238 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
24244 /* Compute the minimum required alignment for dynamic stack realignment
24245 purposes for a local variable, parameter or a stack slot. EXP is
24246 the data type or decl itself, MODE is its mode and ALIGN is the
24247 alignment that the object would ordinarily have. */
24250 ix86_minimum_alignment (tree exp
, enum machine_mode mode
,
24251 unsigned int align
)
24255 if (exp
&& DECL_P (exp
))
24257 type
= TREE_TYPE (exp
);
24266 if (TARGET_64BIT
|| align
!= 64 || ix86_preferred_stack_boundary
>= 64)
24269 /* Don't do dynamic stack realignment for long long objects with
24270 -mpreferred-stack-boundary=2. */
24271 if ((mode
== DImode
|| (type
&& TYPE_MODE (type
) == DImode
))
24272 && (!type
|| !TYPE_USER_ALIGN (type
))
24273 && (!decl
|| !DECL_USER_ALIGN (decl
)))
24279 /* Find a location for the static chain incoming to a nested function.
24280 This is a register, unless all free registers are used by arguments. */
24283 ix86_static_chain (const_tree fndecl
, bool incoming_p
)
24287 if (!DECL_STATIC_CHAIN (fndecl
))
24292 /* We always use R10 in 64-bit mode. */
24300 /* By default in 32-bit mode we use ECX to pass the static chain. */
24303 fntype
= TREE_TYPE (fndecl
);
24304 ccvt
= ix86_get_callcvt (fntype
);
24305 if ((ccvt
& (IX86_CALLCVT_FASTCALL
| IX86_CALLCVT_THISCALL
)) != 0)
24307 /* Fastcall functions use ecx/edx for arguments, which leaves
24308 us with EAX for the static chain.
24309 Thiscall functions use ecx for arguments, which also
24310 leaves us with EAX for the static chain. */
24313 else if (ix86_function_regparm (fntype
, fndecl
) == 3)
24315 /* For regparm 3, we have no free call-clobbered registers in
24316 which to store the static chain. In order to implement this,
24317 we have the trampoline push the static chain to the stack.
24318 However, we can't push a value below the return address when
24319 we call the nested function directly, so we have to use an
24320 alternate entry point. For this we use ESI, and have the
24321 alternate entry point push ESI, so that things appear the
24322 same once we're executing the nested function. */
24325 if (fndecl
== current_function_decl
)
24326 ix86_static_chain_on_stack
= true;
24327 return gen_frame_mem (SImode
,
24328 plus_constant (arg_pointer_rtx
, -8));
24334 return gen_rtx_REG (Pmode
, regno
);
24337 /* Emit RTL insns to initialize the variable parts of a trampoline.
24338 FNDECL is the decl of the target address; M_TRAMP is a MEM for
24339 the trampoline, and CHAIN_VALUE is an RTX for the static chain
24340 to be passed to the target function. */
24343 ix86_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
24349 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
24355 /* Load the function address to r11. Try to load address using
24356 the shorter movl instead of movabs. We may want to support
24357 movq for kernel mode, but kernel does not use trampolines at
24358 the moment. FNADDR is a 32bit address and may not be in
24359 DImode when ptr_mode == SImode. Always use movl in this
24361 if (ptr_mode
== SImode
24362 || x86_64_zext_immediate_operand (fnaddr
, VOIDmode
))
24364 fnaddr
= copy_to_mode_reg (Pmode
, fnaddr
);
24366 mem
= adjust_address (m_tramp
, HImode
, offset
);
24367 emit_move_insn (mem
, gen_int_mode (0xbb41, HImode
));
24369 mem
= adjust_address (m_tramp
, SImode
, offset
+ 2);
24370 emit_move_insn (mem
, gen_lowpart (SImode
, fnaddr
));
24375 mem
= adjust_address (m_tramp
, HImode
, offset
);
24376 emit_move_insn (mem
, gen_int_mode (0xbb49, HImode
));
24378 mem
= adjust_address (m_tramp
, DImode
, offset
+ 2);
24379 emit_move_insn (mem
, fnaddr
);
24383 /* Load static chain using movabs to r10. Use the shorter movl
24384 instead of movabs when ptr_mode == SImode. */
24385 if (ptr_mode
== SImode
)
24396 mem
= adjust_address (m_tramp
, HImode
, offset
);
24397 emit_move_insn (mem
, gen_int_mode (opcode
, HImode
));
24399 mem
= adjust_address (m_tramp
, ptr_mode
, offset
+ 2);
24400 emit_move_insn (mem
, chain_value
);
24403 /* Jump to r11; the last (unused) byte is a nop, only there to
24404 pad the write out to a single 32-bit store. */
24405 mem
= adjust_address (m_tramp
, SImode
, offset
);
24406 emit_move_insn (mem
, gen_int_mode (0x90e3ff49, SImode
));
24413 /* Depending on the static chain location, either load a register
24414 with a constant, or push the constant to the stack. All of the
24415 instructions are the same size. */
24416 chain
= ix86_static_chain (fndecl
, true);
24419 switch (REGNO (chain
))
24422 opcode
= 0xb8; break;
24424 opcode
= 0xb9; break;
24426 gcc_unreachable ();
24432 mem
= adjust_address (m_tramp
, QImode
, offset
);
24433 emit_move_insn (mem
, gen_int_mode (opcode
, QImode
));
24435 mem
= adjust_address (m_tramp
, SImode
, offset
+ 1);
24436 emit_move_insn (mem
, chain_value
);
24439 mem
= adjust_address (m_tramp
, QImode
, offset
);
24440 emit_move_insn (mem
, gen_int_mode (0xe9, QImode
));
24442 mem
= adjust_address (m_tramp
, SImode
, offset
+ 1);
24444 /* Compute offset from the end of the jmp to the target function.
24445 In the case in which the trampoline stores the static chain on
24446 the stack, we need to skip the first insn which pushes the
24447 (call-saved) register static chain; this push is 1 byte. */
24449 disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
24450 plus_constant (XEXP (m_tramp
, 0),
24451 offset
- (MEM_P (chain
) ? 1 : 0)),
24452 NULL_RTX
, 1, OPTAB_DIRECT
);
24453 emit_move_insn (mem
, disp
);
24456 gcc_assert (offset
<= TRAMPOLINE_SIZE
);
24458 #ifdef HAVE_ENABLE_EXECUTE_STACK
24459 #ifdef CHECK_EXECUTE_STACK_ENABLED
24460 if (CHECK_EXECUTE_STACK_ENABLED
)
24462 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__enable_execute_stack"),
24463 LCT_NORMAL
, VOIDmode
, 1, XEXP (m_tramp
, 0), Pmode
);
24467 /* The following file contains several enumerations and data structures
24468 built from the definitions in i386-builtin-types.def. */
24470 #include "i386-builtin-types.inc"
24472 /* Table for the ix86 builtin non-function types. */
24473 static GTY(()) tree ix86_builtin_type_tab
[(int) IX86_BT_LAST_CPTR
+ 1];
24475 /* Retrieve an element from the above table, building some of
24476 the types lazily. */
24479 ix86_get_builtin_type (enum ix86_builtin_type tcode
)
24481 unsigned int index
;
24484 gcc_assert ((unsigned)tcode
< ARRAY_SIZE(ix86_builtin_type_tab
));
24486 type
= ix86_builtin_type_tab
[(int) tcode
];
24490 gcc_assert (tcode
> IX86_BT_LAST_PRIM
);
24491 if (tcode
<= IX86_BT_LAST_VECT
)
24493 enum machine_mode mode
;
24495 index
= tcode
- IX86_BT_LAST_PRIM
- 1;
24496 itype
= ix86_get_builtin_type (ix86_builtin_type_vect_base
[index
]);
24497 mode
= ix86_builtin_type_vect_mode
[index
];
24499 type
= build_vector_type_for_mode (itype
, mode
);
24505 index
= tcode
- IX86_BT_LAST_VECT
- 1;
24506 if (tcode
<= IX86_BT_LAST_PTR
)
24507 quals
= TYPE_UNQUALIFIED
;
24509 quals
= TYPE_QUAL_CONST
;
24511 itype
= ix86_get_builtin_type (ix86_builtin_type_ptr_base
[index
]);
24512 if (quals
!= TYPE_UNQUALIFIED
)
24513 itype
= build_qualified_type (itype
, quals
);
24515 type
= build_pointer_type (itype
);
24518 ix86_builtin_type_tab
[(int) tcode
] = type
;
24522 /* Table for the ix86 builtin function types. */
24523 static GTY(()) tree ix86_builtin_func_type_tab
[(int) IX86_BT_LAST_ALIAS
+ 1];
24525 /* Retrieve an element from the above table, building some of
24526 the types lazily. */
24529 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode
)
24533 gcc_assert ((unsigned)tcode
< ARRAY_SIZE (ix86_builtin_func_type_tab
));
24535 type
= ix86_builtin_func_type_tab
[(int) tcode
];
24539 if (tcode
<= IX86_BT_LAST_FUNC
)
24541 unsigned start
= ix86_builtin_func_start
[(int) tcode
];
24542 unsigned after
= ix86_builtin_func_start
[(int) tcode
+ 1];
24543 tree rtype
, atype
, args
= void_list_node
;
24546 rtype
= ix86_get_builtin_type (ix86_builtin_func_args
[start
]);
24547 for (i
= after
- 1; i
> start
; --i
)
24549 atype
= ix86_get_builtin_type (ix86_builtin_func_args
[i
]);
24550 args
= tree_cons (NULL
, atype
, args
);
24553 type
= build_function_type (rtype
, args
);
24557 unsigned index
= tcode
- IX86_BT_LAST_FUNC
- 1;
24558 enum ix86_builtin_func_type icode
;
24560 icode
= ix86_builtin_func_alias_base
[index
];
24561 type
= ix86_get_builtin_func_type (icode
);
24564 ix86_builtin_func_type_tab
[(int) tcode
] = type
;
24569 /* Codes for all the SSE/MMX builtins. */
24572 IX86_BUILTIN_ADDPS
,
24573 IX86_BUILTIN_ADDSS
,
24574 IX86_BUILTIN_DIVPS
,
24575 IX86_BUILTIN_DIVSS
,
24576 IX86_BUILTIN_MULPS
,
24577 IX86_BUILTIN_MULSS
,
24578 IX86_BUILTIN_SUBPS
,
24579 IX86_BUILTIN_SUBSS
,
24581 IX86_BUILTIN_CMPEQPS
,
24582 IX86_BUILTIN_CMPLTPS
,
24583 IX86_BUILTIN_CMPLEPS
,
24584 IX86_BUILTIN_CMPGTPS
,
24585 IX86_BUILTIN_CMPGEPS
,
24586 IX86_BUILTIN_CMPNEQPS
,
24587 IX86_BUILTIN_CMPNLTPS
,
24588 IX86_BUILTIN_CMPNLEPS
,
24589 IX86_BUILTIN_CMPNGTPS
,
24590 IX86_BUILTIN_CMPNGEPS
,
24591 IX86_BUILTIN_CMPORDPS
,
24592 IX86_BUILTIN_CMPUNORDPS
,
24593 IX86_BUILTIN_CMPEQSS
,
24594 IX86_BUILTIN_CMPLTSS
,
24595 IX86_BUILTIN_CMPLESS
,
24596 IX86_BUILTIN_CMPNEQSS
,
24597 IX86_BUILTIN_CMPNLTSS
,
24598 IX86_BUILTIN_CMPNLESS
,
24599 IX86_BUILTIN_CMPNGTSS
,
24600 IX86_BUILTIN_CMPNGESS
,
24601 IX86_BUILTIN_CMPORDSS
,
24602 IX86_BUILTIN_CMPUNORDSS
,
24604 IX86_BUILTIN_COMIEQSS
,
24605 IX86_BUILTIN_COMILTSS
,
24606 IX86_BUILTIN_COMILESS
,
24607 IX86_BUILTIN_COMIGTSS
,
24608 IX86_BUILTIN_COMIGESS
,
24609 IX86_BUILTIN_COMINEQSS
,
24610 IX86_BUILTIN_UCOMIEQSS
,
24611 IX86_BUILTIN_UCOMILTSS
,
24612 IX86_BUILTIN_UCOMILESS
,
24613 IX86_BUILTIN_UCOMIGTSS
,
24614 IX86_BUILTIN_UCOMIGESS
,
24615 IX86_BUILTIN_UCOMINEQSS
,
24617 IX86_BUILTIN_CVTPI2PS
,
24618 IX86_BUILTIN_CVTPS2PI
,
24619 IX86_BUILTIN_CVTSI2SS
,
24620 IX86_BUILTIN_CVTSI642SS
,
24621 IX86_BUILTIN_CVTSS2SI
,
24622 IX86_BUILTIN_CVTSS2SI64
,
24623 IX86_BUILTIN_CVTTPS2PI
,
24624 IX86_BUILTIN_CVTTSS2SI
,
24625 IX86_BUILTIN_CVTTSS2SI64
,
24627 IX86_BUILTIN_MAXPS
,
24628 IX86_BUILTIN_MAXSS
,
24629 IX86_BUILTIN_MINPS
,
24630 IX86_BUILTIN_MINSS
,
24632 IX86_BUILTIN_LOADUPS
,
24633 IX86_BUILTIN_STOREUPS
,
24634 IX86_BUILTIN_MOVSS
,
24636 IX86_BUILTIN_MOVHLPS
,
24637 IX86_BUILTIN_MOVLHPS
,
24638 IX86_BUILTIN_LOADHPS
,
24639 IX86_BUILTIN_LOADLPS
,
24640 IX86_BUILTIN_STOREHPS
,
24641 IX86_BUILTIN_STORELPS
,
24643 IX86_BUILTIN_MASKMOVQ
,
24644 IX86_BUILTIN_MOVMSKPS
,
24645 IX86_BUILTIN_PMOVMSKB
,
24647 IX86_BUILTIN_MOVNTPS
,
24648 IX86_BUILTIN_MOVNTQ
,
24650 IX86_BUILTIN_LOADDQU
,
24651 IX86_BUILTIN_STOREDQU
,
24653 IX86_BUILTIN_PACKSSWB
,
24654 IX86_BUILTIN_PACKSSDW
,
24655 IX86_BUILTIN_PACKUSWB
,
24657 IX86_BUILTIN_PADDB
,
24658 IX86_BUILTIN_PADDW
,
24659 IX86_BUILTIN_PADDD
,
24660 IX86_BUILTIN_PADDQ
,
24661 IX86_BUILTIN_PADDSB
,
24662 IX86_BUILTIN_PADDSW
,
24663 IX86_BUILTIN_PADDUSB
,
24664 IX86_BUILTIN_PADDUSW
,
24665 IX86_BUILTIN_PSUBB
,
24666 IX86_BUILTIN_PSUBW
,
24667 IX86_BUILTIN_PSUBD
,
24668 IX86_BUILTIN_PSUBQ
,
24669 IX86_BUILTIN_PSUBSB
,
24670 IX86_BUILTIN_PSUBSW
,
24671 IX86_BUILTIN_PSUBUSB
,
24672 IX86_BUILTIN_PSUBUSW
,
24675 IX86_BUILTIN_PANDN
,
24679 IX86_BUILTIN_PAVGB
,
24680 IX86_BUILTIN_PAVGW
,
24682 IX86_BUILTIN_PCMPEQB
,
24683 IX86_BUILTIN_PCMPEQW
,
24684 IX86_BUILTIN_PCMPEQD
,
24685 IX86_BUILTIN_PCMPGTB
,
24686 IX86_BUILTIN_PCMPGTW
,
24687 IX86_BUILTIN_PCMPGTD
,
24689 IX86_BUILTIN_PMADDWD
,
24691 IX86_BUILTIN_PMAXSW
,
24692 IX86_BUILTIN_PMAXUB
,
24693 IX86_BUILTIN_PMINSW
,
24694 IX86_BUILTIN_PMINUB
,
24696 IX86_BUILTIN_PMULHUW
,
24697 IX86_BUILTIN_PMULHW
,
24698 IX86_BUILTIN_PMULLW
,
24700 IX86_BUILTIN_PSADBW
,
24701 IX86_BUILTIN_PSHUFW
,
24703 IX86_BUILTIN_PSLLW
,
24704 IX86_BUILTIN_PSLLD
,
24705 IX86_BUILTIN_PSLLQ
,
24706 IX86_BUILTIN_PSRAW
,
24707 IX86_BUILTIN_PSRAD
,
24708 IX86_BUILTIN_PSRLW
,
24709 IX86_BUILTIN_PSRLD
,
24710 IX86_BUILTIN_PSRLQ
,
24711 IX86_BUILTIN_PSLLWI
,
24712 IX86_BUILTIN_PSLLDI
,
24713 IX86_BUILTIN_PSLLQI
,
24714 IX86_BUILTIN_PSRAWI
,
24715 IX86_BUILTIN_PSRADI
,
24716 IX86_BUILTIN_PSRLWI
,
24717 IX86_BUILTIN_PSRLDI
,
24718 IX86_BUILTIN_PSRLQI
,
24720 IX86_BUILTIN_PUNPCKHBW
,
24721 IX86_BUILTIN_PUNPCKHWD
,
24722 IX86_BUILTIN_PUNPCKHDQ
,
24723 IX86_BUILTIN_PUNPCKLBW
,
24724 IX86_BUILTIN_PUNPCKLWD
,
24725 IX86_BUILTIN_PUNPCKLDQ
,
24727 IX86_BUILTIN_SHUFPS
,
24729 IX86_BUILTIN_RCPPS
,
24730 IX86_BUILTIN_RCPSS
,
24731 IX86_BUILTIN_RSQRTPS
,
24732 IX86_BUILTIN_RSQRTPS_NR
,
24733 IX86_BUILTIN_RSQRTSS
,
24734 IX86_BUILTIN_RSQRTF
,
24735 IX86_BUILTIN_SQRTPS
,
24736 IX86_BUILTIN_SQRTPS_NR
,
24737 IX86_BUILTIN_SQRTSS
,
24739 IX86_BUILTIN_UNPCKHPS
,
24740 IX86_BUILTIN_UNPCKLPS
,
24742 IX86_BUILTIN_ANDPS
,
24743 IX86_BUILTIN_ANDNPS
,
24745 IX86_BUILTIN_XORPS
,
24748 IX86_BUILTIN_LDMXCSR
,
24749 IX86_BUILTIN_STMXCSR
,
24750 IX86_BUILTIN_SFENCE
,
24752 /* 3DNow! Original */
24753 IX86_BUILTIN_FEMMS
,
24754 IX86_BUILTIN_PAVGUSB
,
24755 IX86_BUILTIN_PF2ID
,
24756 IX86_BUILTIN_PFACC
,
24757 IX86_BUILTIN_PFADD
,
24758 IX86_BUILTIN_PFCMPEQ
,
24759 IX86_BUILTIN_PFCMPGE
,
24760 IX86_BUILTIN_PFCMPGT
,
24761 IX86_BUILTIN_PFMAX
,
24762 IX86_BUILTIN_PFMIN
,
24763 IX86_BUILTIN_PFMUL
,
24764 IX86_BUILTIN_PFRCP
,
24765 IX86_BUILTIN_PFRCPIT1
,
24766 IX86_BUILTIN_PFRCPIT2
,
24767 IX86_BUILTIN_PFRSQIT1
,
24768 IX86_BUILTIN_PFRSQRT
,
24769 IX86_BUILTIN_PFSUB
,
24770 IX86_BUILTIN_PFSUBR
,
24771 IX86_BUILTIN_PI2FD
,
24772 IX86_BUILTIN_PMULHRW
,
24774 /* 3DNow! Athlon Extensions */
24775 IX86_BUILTIN_PF2IW
,
24776 IX86_BUILTIN_PFNACC
,
24777 IX86_BUILTIN_PFPNACC
,
24778 IX86_BUILTIN_PI2FW
,
24779 IX86_BUILTIN_PSWAPDSI
,
24780 IX86_BUILTIN_PSWAPDSF
,
24783 IX86_BUILTIN_ADDPD
,
24784 IX86_BUILTIN_ADDSD
,
24785 IX86_BUILTIN_DIVPD
,
24786 IX86_BUILTIN_DIVSD
,
24787 IX86_BUILTIN_MULPD
,
24788 IX86_BUILTIN_MULSD
,
24789 IX86_BUILTIN_SUBPD
,
24790 IX86_BUILTIN_SUBSD
,
24792 IX86_BUILTIN_CMPEQPD
,
24793 IX86_BUILTIN_CMPLTPD
,
24794 IX86_BUILTIN_CMPLEPD
,
24795 IX86_BUILTIN_CMPGTPD
,
24796 IX86_BUILTIN_CMPGEPD
,
24797 IX86_BUILTIN_CMPNEQPD
,
24798 IX86_BUILTIN_CMPNLTPD
,
24799 IX86_BUILTIN_CMPNLEPD
,
24800 IX86_BUILTIN_CMPNGTPD
,
24801 IX86_BUILTIN_CMPNGEPD
,
24802 IX86_BUILTIN_CMPORDPD
,
24803 IX86_BUILTIN_CMPUNORDPD
,
24804 IX86_BUILTIN_CMPEQSD
,
24805 IX86_BUILTIN_CMPLTSD
,
24806 IX86_BUILTIN_CMPLESD
,
24807 IX86_BUILTIN_CMPNEQSD
,
24808 IX86_BUILTIN_CMPNLTSD
,
24809 IX86_BUILTIN_CMPNLESD
,
24810 IX86_BUILTIN_CMPORDSD
,
24811 IX86_BUILTIN_CMPUNORDSD
,
24813 IX86_BUILTIN_COMIEQSD
,
24814 IX86_BUILTIN_COMILTSD
,
24815 IX86_BUILTIN_COMILESD
,
24816 IX86_BUILTIN_COMIGTSD
,
24817 IX86_BUILTIN_COMIGESD
,
24818 IX86_BUILTIN_COMINEQSD
,
24819 IX86_BUILTIN_UCOMIEQSD
,
24820 IX86_BUILTIN_UCOMILTSD
,
24821 IX86_BUILTIN_UCOMILESD
,
24822 IX86_BUILTIN_UCOMIGTSD
,
24823 IX86_BUILTIN_UCOMIGESD
,
24824 IX86_BUILTIN_UCOMINEQSD
,
24826 IX86_BUILTIN_MAXPD
,
24827 IX86_BUILTIN_MAXSD
,
24828 IX86_BUILTIN_MINPD
,
24829 IX86_BUILTIN_MINSD
,
24831 IX86_BUILTIN_ANDPD
,
24832 IX86_BUILTIN_ANDNPD
,
24834 IX86_BUILTIN_XORPD
,
24836 IX86_BUILTIN_SQRTPD
,
24837 IX86_BUILTIN_SQRTSD
,
24839 IX86_BUILTIN_UNPCKHPD
,
24840 IX86_BUILTIN_UNPCKLPD
,
24842 IX86_BUILTIN_SHUFPD
,
24844 IX86_BUILTIN_LOADUPD
,
24845 IX86_BUILTIN_STOREUPD
,
24846 IX86_BUILTIN_MOVSD
,
24848 IX86_BUILTIN_LOADHPD
,
24849 IX86_BUILTIN_LOADLPD
,
24851 IX86_BUILTIN_CVTDQ2PD
,
24852 IX86_BUILTIN_CVTDQ2PS
,
24854 IX86_BUILTIN_CVTPD2DQ
,
24855 IX86_BUILTIN_CVTPD2PI
,
24856 IX86_BUILTIN_CVTPD2PS
,
24857 IX86_BUILTIN_CVTTPD2DQ
,
24858 IX86_BUILTIN_CVTTPD2PI
,
24860 IX86_BUILTIN_CVTPI2PD
,
24861 IX86_BUILTIN_CVTSI2SD
,
24862 IX86_BUILTIN_CVTSI642SD
,
24864 IX86_BUILTIN_CVTSD2SI
,
24865 IX86_BUILTIN_CVTSD2SI64
,
24866 IX86_BUILTIN_CVTSD2SS
,
24867 IX86_BUILTIN_CVTSS2SD
,
24868 IX86_BUILTIN_CVTTSD2SI
,
24869 IX86_BUILTIN_CVTTSD2SI64
,
24871 IX86_BUILTIN_CVTPS2DQ
,
24872 IX86_BUILTIN_CVTPS2PD
,
24873 IX86_BUILTIN_CVTTPS2DQ
,
24875 IX86_BUILTIN_MOVNTI
,
24876 IX86_BUILTIN_MOVNTI64
,
24877 IX86_BUILTIN_MOVNTPD
,
24878 IX86_BUILTIN_MOVNTDQ
,
24880 IX86_BUILTIN_MOVQ128
,
24883 IX86_BUILTIN_MASKMOVDQU
,
24884 IX86_BUILTIN_MOVMSKPD
,
24885 IX86_BUILTIN_PMOVMSKB128
,
24887 IX86_BUILTIN_PACKSSWB128
,
24888 IX86_BUILTIN_PACKSSDW128
,
24889 IX86_BUILTIN_PACKUSWB128
,
24891 IX86_BUILTIN_PADDB128
,
24892 IX86_BUILTIN_PADDW128
,
24893 IX86_BUILTIN_PADDD128
,
24894 IX86_BUILTIN_PADDQ128
,
24895 IX86_BUILTIN_PADDSB128
,
24896 IX86_BUILTIN_PADDSW128
,
24897 IX86_BUILTIN_PADDUSB128
,
24898 IX86_BUILTIN_PADDUSW128
,
24899 IX86_BUILTIN_PSUBB128
,
24900 IX86_BUILTIN_PSUBW128
,
24901 IX86_BUILTIN_PSUBD128
,
24902 IX86_BUILTIN_PSUBQ128
,
24903 IX86_BUILTIN_PSUBSB128
,
24904 IX86_BUILTIN_PSUBSW128
,
24905 IX86_BUILTIN_PSUBUSB128
,
24906 IX86_BUILTIN_PSUBUSW128
,
24908 IX86_BUILTIN_PAND128
,
24909 IX86_BUILTIN_PANDN128
,
24910 IX86_BUILTIN_POR128
,
24911 IX86_BUILTIN_PXOR128
,
24913 IX86_BUILTIN_PAVGB128
,
24914 IX86_BUILTIN_PAVGW128
,
24916 IX86_BUILTIN_PCMPEQB128
,
24917 IX86_BUILTIN_PCMPEQW128
,
24918 IX86_BUILTIN_PCMPEQD128
,
24919 IX86_BUILTIN_PCMPGTB128
,
24920 IX86_BUILTIN_PCMPGTW128
,
24921 IX86_BUILTIN_PCMPGTD128
,
24923 IX86_BUILTIN_PMADDWD128
,
24925 IX86_BUILTIN_PMAXSW128
,
24926 IX86_BUILTIN_PMAXUB128
,
24927 IX86_BUILTIN_PMINSW128
,
24928 IX86_BUILTIN_PMINUB128
,
24930 IX86_BUILTIN_PMULUDQ
,
24931 IX86_BUILTIN_PMULUDQ128
,
24932 IX86_BUILTIN_PMULHUW128
,
24933 IX86_BUILTIN_PMULHW128
,
24934 IX86_BUILTIN_PMULLW128
,
24936 IX86_BUILTIN_PSADBW128
,
24937 IX86_BUILTIN_PSHUFHW
,
24938 IX86_BUILTIN_PSHUFLW
,
24939 IX86_BUILTIN_PSHUFD
,
24941 IX86_BUILTIN_PSLLDQI128
,
24942 IX86_BUILTIN_PSLLWI128
,
24943 IX86_BUILTIN_PSLLDI128
,
24944 IX86_BUILTIN_PSLLQI128
,
24945 IX86_BUILTIN_PSRAWI128
,
24946 IX86_BUILTIN_PSRADI128
,
24947 IX86_BUILTIN_PSRLDQI128
,
24948 IX86_BUILTIN_PSRLWI128
,
24949 IX86_BUILTIN_PSRLDI128
,
24950 IX86_BUILTIN_PSRLQI128
,
24952 IX86_BUILTIN_PSLLDQ128
,
24953 IX86_BUILTIN_PSLLW128
,
24954 IX86_BUILTIN_PSLLD128
,
24955 IX86_BUILTIN_PSLLQ128
,
24956 IX86_BUILTIN_PSRAW128
,
24957 IX86_BUILTIN_PSRAD128
,
24958 IX86_BUILTIN_PSRLW128
,
24959 IX86_BUILTIN_PSRLD128
,
24960 IX86_BUILTIN_PSRLQ128
,
24962 IX86_BUILTIN_PUNPCKHBW128
,
24963 IX86_BUILTIN_PUNPCKHWD128
,
24964 IX86_BUILTIN_PUNPCKHDQ128
,
24965 IX86_BUILTIN_PUNPCKHQDQ128
,
24966 IX86_BUILTIN_PUNPCKLBW128
,
24967 IX86_BUILTIN_PUNPCKLWD128
,
24968 IX86_BUILTIN_PUNPCKLDQ128
,
24969 IX86_BUILTIN_PUNPCKLQDQ128
,
24971 IX86_BUILTIN_CLFLUSH
,
24972 IX86_BUILTIN_MFENCE
,
24973 IX86_BUILTIN_LFENCE
,
24974 IX86_BUILTIN_PAUSE
,
24976 IX86_BUILTIN_BSRSI
,
24977 IX86_BUILTIN_BSRDI
,
24978 IX86_BUILTIN_RDPMC
,
24979 IX86_BUILTIN_RDTSC
,
24980 IX86_BUILTIN_RDTSCP
,
24981 IX86_BUILTIN_ROLQI
,
24982 IX86_BUILTIN_ROLHI
,
24983 IX86_BUILTIN_RORQI
,
24984 IX86_BUILTIN_RORHI
,
24987 IX86_BUILTIN_ADDSUBPS
,
24988 IX86_BUILTIN_HADDPS
,
24989 IX86_BUILTIN_HSUBPS
,
24990 IX86_BUILTIN_MOVSHDUP
,
24991 IX86_BUILTIN_MOVSLDUP
,
24992 IX86_BUILTIN_ADDSUBPD
,
24993 IX86_BUILTIN_HADDPD
,
24994 IX86_BUILTIN_HSUBPD
,
24995 IX86_BUILTIN_LDDQU
,
24997 IX86_BUILTIN_MONITOR
,
24998 IX86_BUILTIN_MWAIT
,
25001 IX86_BUILTIN_PHADDW
,
25002 IX86_BUILTIN_PHADDD
,
25003 IX86_BUILTIN_PHADDSW
,
25004 IX86_BUILTIN_PHSUBW
,
25005 IX86_BUILTIN_PHSUBD
,
25006 IX86_BUILTIN_PHSUBSW
,
25007 IX86_BUILTIN_PMADDUBSW
,
25008 IX86_BUILTIN_PMULHRSW
,
25009 IX86_BUILTIN_PSHUFB
,
25010 IX86_BUILTIN_PSIGNB
,
25011 IX86_BUILTIN_PSIGNW
,
25012 IX86_BUILTIN_PSIGND
,
25013 IX86_BUILTIN_PALIGNR
,
25014 IX86_BUILTIN_PABSB
,
25015 IX86_BUILTIN_PABSW
,
25016 IX86_BUILTIN_PABSD
,
25018 IX86_BUILTIN_PHADDW128
,
25019 IX86_BUILTIN_PHADDD128
,
25020 IX86_BUILTIN_PHADDSW128
,
25021 IX86_BUILTIN_PHSUBW128
,
25022 IX86_BUILTIN_PHSUBD128
,
25023 IX86_BUILTIN_PHSUBSW128
,
25024 IX86_BUILTIN_PMADDUBSW128
,
25025 IX86_BUILTIN_PMULHRSW128
,
25026 IX86_BUILTIN_PSHUFB128
,
25027 IX86_BUILTIN_PSIGNB128
,
25028 IX86_BUILTIN_PSIGNW128
,
25029 IX86_BUILTIN_PSIGND128
,
25030 IX86_BUILTIN_PALIGNR128
,
25031 IX86_BUILTIN_PABSB128
,
25032 IX86_BUILTIN_PABSW128
,
25033 IX86_BUILTIN_PABSD128
,
25035 /* AMDFAM10 - SSE4A New Instructions. */
25036 IX86_BUILTIN_MOVNTSD
,
25037 IX86_BUILTIN_MOVNTSS
,
25038 IX86_BUILTIN_EXTRQI
,
25039 IX86_BUILTIN_EXTRQ
,
25040 IX86_BUILTIN_INSERTQI
,
25041 IX86_BUILTIN_INSERTQ
,
25044 IX86_BUILTIN_BLENDPD
,
25045 IX86_BUILTIN_BLENDPS
,
25046 IX86_BUILTIN_BLENDVPD
,
25047 IX86_BUILTIN_BLENDVPS
,
25048 IX86_BUILTIN_PBLENDVB128
,
25049 IX86_BUILTIN_PBLENDW128
,
25054 IX86_BUILTIN_INSERTPS128
,
25056 IX86_BUILTIN_MOVNTDQA
,
25057 IX86_BUILTIN_MPSADBW128
,
25058 IX86_BUILTIN_PACKUSDW128
,
25059 IX86_BUILTIN_PCMPEQQ
,
25060 IX86_BUILTIN_PHMINPOSUW128
,
25062 IX86_BUILTIN_PMAXSB128
,
25063 IX86_BUILTIN_PMAXSD128
,
25064 IX86_BUILTIN_PMAXUD128
,
25065 IX86_BUILTIN_PMAXUW128
,
25067 IX86_BUILTIN_PMINSB128
,
25068 IX86_BUILTIN_PMINSD128
,
25069 IX86_BUILTIN_PMINUD128
,
25070 IX86_BUILTIN_PMINUW128
,
25072 IX86_BUILTIN_PMOVSXBW128
,
25073 IX86_BUILTIN_PMOVSXBD128
,
25074 IX86_BUILTIN_PMOVSXBQ128
,
25075 IX86_BUILTIN_PMOVSXWD128
,
25076 IX86_BUILTIN_PMOVSXWQ128
,
25077 IX86_BUILTIN_PMOVSXDQ128
,
25079 IX86_BUILTIN_PMOVZXBW128
,
25080 IX86_BUILTIN_PMOVZXBD128
,
25081 IX86_BUILTIN_PMOVZXBQ128
,
25082 IX86_BUILTIN_PMOVZXWD128
,
25083 IX86_BUILTIN_PMOVZXWQ128
,
25084 IX86_BUILTIN_PMOVZXDQ128
,
25086 IX86_BUILTIN_PMULDQ128
,
25087 IX86_BUILTIN_PMULLD128
,
25089 IX86_BUILTIN_ROUNDSD
,
25090 IX86_BUILTIN_ROUNDSS
,
25092 IX86_BUILTIN_ROUNDPD
,
25093 IX86_BUILTIN_ROUNDPS
,
25095 IX86_BUILTIN_FLOORPD
,
25096 IX86_BUILTIN_CEILPD
,
25097 IX86_BUILTIN_TRUNCPD
,
25098 IX86_BUILTIN_RINTPD
,
25099 IX86_BUILTIN_ROUNDPD_AZ
,
25101 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX
,
25102 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX
,
25103 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX
,
25105 IX86_BUILTIN_FLOORPS
,
25106 IX86_BUILTIN_CEILPS
,
25107 IX86_BUILTIN_TRUNCPS
,
25108 IX86_BUILTIN_RINTPS
,
25109 IX86_BUILTIN_ROUNDPS_AZ
,
25111 IX86_BUILTIN_FLOORPS_SFIX
,
25112 IX86_BUILTIN_CEILPS_SFIX
,
25113 IX86_BUILTIN_ROUNDPS_AZ_SFIX
,
25115 IX86_BUILTIN_PTESTZ
,
25116 IX86_BUILTIN_PTESTC
,
25117 IX86_BUILTIN_PTESTNZC
,
25119 IX86_BUILTIN_VEC_INIT_V2SI
,
25120 IX86_BUILTIN_VEC_INIT_V4HI
,
25121 IX86_BUILTIN_VEC_INIT_V8QI
,
25122 IX86_BUILTIN_VEC_EXT_V2DF
,
25123 IX86_BUILTIN_VEC_EXT_V2DI
,
25124 IX86_BUILTIN_VEC_EXT_V4SF
,
25125 IX86_BUILTIN_VEC_EXT_V4SI
,
25126 IX86_BUILTIN_VEC_EXT_V8HI
,
25127 IX86_BUILTIN_VEC_EXT_V2SI
,
25128 IX86_BUILTIN_VEC_EXT_V4HI
,
25129 IX86_BUILTIN_VEC_EXT_V16QI
,
25130 IX86_BUILTIN_VEC_SET_V2DI
,
25131 IX86_BUILTIN_VEC_SET_V4SF
,
25132 IX86_BUILTIN_VEC_SET_V4SI
,
25133 IX86_BUILTIN_VEC_SET_V8HI
,
25134 IX86_BUILTIN_VEC_SET_V4HI
,
25135 IX86_BUILTIN_VEC_SET_V16QI
,
25137 IX86_BUILTIN_VEC_PACK_SFIX
,
25138 IX86_BUILTIN_VEC_PACK_SFIX256
,
25141 IX86_BUILTIN_CRC32QI
,
25142 IX86_BUILTIN_CRC32HI
,
25143 IX86_BUILTIN_CRC32SI
,
25144 IX86_BUILTIN_CRC32DI
,
25146 IX86_BUILTIN_PCMPESTRI128
,
25147 IX86_BUILTIN_PCMPESTRM128
,
25148 IX86_BUILTIN_PCMPESTRA128
,
25149 IX86_BUILTIN_PCMPESTRC128
,
25150 IX86_BUILTIN_PCMPESTRO128
,
25151 IX86_BUILTIN_PCMPESTRS128
,
25152 IX86_BUILTIN_PCMPESTRZ128
,
25153 IX86_BUILTIN_PCMPISTRI128
,
25154 IX86_BUILTIN_PCMPISTRM128
,
25155 IX86_BUILTIN_PCMPISTRA128
,
25156 IX86_BUILTIN_PCMPISTRC128
,
25157 IX86_BUILTIN_PCMPISTRO128
,
25158 IX86_BUILTIN_PCMPISTRS128
,
25159 IX86_BUILTIN_PCMPISTRZ128
,
25161 IX86_BUILTIN_PCMPGTQ
,
25163 /* AES instructions */
25164 IX86_BUILTIN_AESENC128
,
25165 IX86_BUILTIN_AESENCLAST128
,
25166 IX86_BUILTIN_AESDEC128
,
25167 IX86_BUILTIN_AESDECLAST128
,
25168 IX86_BUILTIN_AESIMC128
,
25169 IX86_BUILTIN_AESKEYGENASSIST128
,
25171 /* PCLMUL instruction */
25172 IX86_BUILTIN_PCLMULQDQ128
,
25175 IX86_BUILTIN_ADDPD256
,
25176 IX86_BUILTIN_ADDPS256
,
25177 IX86_BUILTIN_ADDSUBPD256
,
25178 IX86_BUILTIN_ADDSUBPS256
,
25179 IX86_BUILTIN_ANDPD256
,
25180 IX86_BUILTIN_ANDPS256
,
25181 IX86_BUILTIN_ANDNPD256
,
25182 IX86_BUILTIN_ANDNPS256
,
25183 IX86_BUILTIN_BLENDPD256
,
25184 IX86_BUILTIN_BLENDPS256
,
25185 IX86_BUILTIN_BLENDVPD256
,
25186 IX86_BUILTIN_BLENDVPS256
,
25187 IX86_BUILTIN_DIVPD256
,
25188 IX86_BUILTIN_DIVPS256
,
25189 IX86_BUILTIN_DPPS256
,
25190 IX86_BUILTIN_HADDPD256
,
25191 IX86_BUILTIN_HADDPS256
,
25192 IX86_BUILTIN_HSUBPD256
,
25193 IX86_BUILTIN_HSUBPS256
,
25194 IX86_BUILTIN_MAXPD256
,
25195 IX86_BUILTIN_MAXPS256
,
25196 IX86_BUILTIN_MINPD256
,
25197 IX86_BUILTIN_MINPS256
,
25198 IX86_BUILTIN_MULPD256
,
25199 IX86_BUILTIN_MULPS256
,
25200 IX86_BUILTIN_ORPD256
,
25201 IX86_BUILTIN_ORPS256
,
25202 IX86_BUILTIN_SHUFPD256
,
25203 IX86_BUILTIN_SHUFPS256
,
25204 IX86_BUILTIN_SUBPD256
,
25205 IX86_BUILTIN_SUBPS256
,
25206 IX86_BUILTIN_XORPD256
,
25207 IX86_BUILTIN_XORPS256
,
25208 IX86_BUILTIN_CMPSD
,
25209 IX86_BUILTIN_CMPSS
,
25210 IX86_BUILTIN_CMPPD
,
25211 IX86_BUILTIN_CMPPS
,
25212 IX86_BUILTIN_CMPPD256
,
25213 IX86_BUILTIN_CMPPS256
,
25214 IX86_BUILTIN_CVTDQ2PD256
,
25215 IX86_BUILTIN_CVTDQ2PS256
,
25216 IX86_BUILTIN_CVTPD2PS256
,
25217 IX86_BUILTIN_CVTPS2DQ256
,
25218 IX86_BUILTIN_CVTPS2PD256
,
25219 IX86_BUILTIN_CVTTPD2DQ256
,
25220 IX86_BUILTIN_CVTPD2DQ256
,
25221 IX86_BUILTIN_CVTTPS2DQ256
,
25222 IX86_BUILTIN_EXTRACTF128PD256
,
25223 IX86_BUILTIN_EXTRACTF128PS256
,
25224 IX86_BUILTIN_EXTRACTF128SI256
,
25225 IX86_BUILTIN_VZEROALL
,
25226 IX86_BUILTIN_VZEROUPPER
,
25227 IX86_BUILTIN_VPERMILVARPD
,
25228 IX86_BUILTIN_VPERMILVARPS
,
25229 IX86_BUILTIN_VPERMILVARPD256
,
25230 IX86_BUILTIN_VPERMILVARPS256
,
25231 IX86_BUILTIN_VPERMILPD
,
25232 IX86_BUILTIN_VPERMILPS
,
25233 IX86_BUILTIN_VPERMILPD256
,
25234 IX86_BUILTIN_VPERMILPS256
,
25235 IX86_BUILTIN_VPERMIL2PD
,
25236 IX86_BUILTIN_VPERMIL2PS
,
25237 IX86_BUILTIN_VPERMIL2PD256
,
25238 IX86_BUILTIN_VPERMIL2PS256
,
25239 IX86_BUILTIN_VPERM2F128PD256
,
25240 IX86_BUILTIN_VPERM2F128PS256
,
25241 IX86_BUILTIN_VPERM2F128SI256
,
25242 IX86_BUILTIN_VBROADCASTSS
,
25243 IX86_BUILTIN_VBROADCASTSD256
,
25244 IX86_BUILTIN_VBROADCASTSS256
,
25245 IX86_BUILTIN_VBROADCASTPD256
,
25246 IX86_BUILTIN_VBROADCASTPS256
,
25247 IX86_BUILTIN_VINSERTF128PD256
,
25248 IX86_BUILTIN_VINSERTF128PS256
,
25249 IX86_BUILTIN_VINSERTF128SI256
,
25250 IX86_BUILTIN_LOADUPD256
,
25251 IX86_BUILTIN_LOADUPS256
,
25252 IX86_BUILTIN_STOREUPD256
,
25253 IX86_BUILTIN_STOREUPS256
,
25254 IX86_BUILTIN_LDDQU256
,
25255 IX86_BUILTIN_MOVNTDQ256
,
25256 IX86_BUILTIN_MOVNTPD256
,
25257 IX86_BUILTIN_MOVNTPS256
,
25258 IX86_BUILTIN_LOADDQU256
,
25259 IX86_BUILTIN_STOREDQU256
,
25260 IX86_BUILTIN_MASKLOADPD
,
25261 IX86_BUILTIN_MASKLOADPS
,
25262 IX86_BUILTIN_MASKSTOREPD
,
25263 IX86_BUILTIN_MASKSTOREPS
,
25264 IX86_BUILTIN_MASKLOADPD256
,
25265 IX86_BUILTIN_MASKLOADPS256
,
25266 IX86_BUILTIN_MASKSTOREPD256
,
25267 IX86_BUILTIN_MASKSTOREPS256
,
25268 IX86_BUILTIN_MOVSHDUP256
,
25269 IX86_BUILTIN_MOVSLDUP256
,
25270 IX86_BUILTIN_MOVDDUP256
,
25272 IX86_BUILTIN_SQRTPD256
,
25273 IX86_BUILTIN_SQRTPS256
,
25274 IX86_BUILTIN_SQRTPS_NR256
,
25275 IX86_BUILTIN_RSQRTPS256
,
25276 IX86_BUILTIN_RSQRTPS_NR256
,
25278 IX86_BUILTIN_RCPPS256
,
25280 IX86_BUILTIN_ROUNDPD256
,
25281 IX86_BUILTIN_ROUNDPS256
,
25283 IX86_BUILTIN_FLOORPD256
,
25284 IX86_BUILTIN_CEILPD256
,
25285 IX86_BUILTIN_TRUNCPD256
,
25286 IX86_BUILTIN_RINTPD256
,
25287 IX86_BUILTIN_ROUNDPD_AZ256
,
25289 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256
,
25290 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256
,
25291 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256
,
25293 IX86_BUILTIN_FLOORPS256
,
25294 IX86_BUILTIN_CEILPS256
,
25295 IX86_BUILTIN_TRUNCPS256
,
25296 IX86_BUILTIN_RINTPS256
,
25297 IX86_BUILTIN_ROUNDPS_AZ256
,
25299 IX86_BUILTIN_FLOORPS_SFIX256
,
25300 IX86_BUILTIN_CEILPS_SFIX256
,
25301 IX86_BUILTIN_ROUNDPS_AZ_SFIX256
,
25303 IX86_BUILTIN_UNPCKHPD256
,
25304 IX86_BUILTIN_UNPCKLPD256
,
25305 IX86_BUILTIN_UNPCKHPS256
,
25306 IX86_BUILTIN_UNPCKLPS256
,
25308 IX86_BUILTIN_SI256_SI
,
25309 IX86_BUILTIN_PS256_PS
,
25310 IX86_BUILTIN_PD256_PD
,
25311 IX86_BUILTIN_SI_SI256
,
25312 IX86_BUILTIN_PS_PS256
,
25313 IX86_BUILTIN_PD_PD256
,
25315 IX86_BUILTIN_VTESTZPD
,
25316 IX86_BUILTIN_VTESTCPD
,
25317 IX86_BUILTIN_VTESTNZCPD
,
25318 IX86_BUILTIN_VTESTZPS
,
25319 IX86_BUILTIN_VTESTCPS
,
25320 IX86_BUILTIN_VTESTNZCPS
,
25321 IX86_BUILTIN_VTESTZPD256
,
25322 IX86_BUILTIN_VTESTCPD256
,
25323 IX86_BUILTIN_VTESTNZCPD256
,
25324 IX86_BUILTIN_VTESTZPS256
,
25325 IX86_BUILTIN_VTESTCPS256
,
25326 IX86_BUILTIN_VTESTNZCPS256
,
25327 IX86_BUILTIN_PTESTZ256
,
25328 IX86_BUILTIN_PTESTC256
,
25329 IX86_BUILTIN_PTESTNZC256
,
25331 IX86_BUILTIN_MOVMSKPD256
,
25332 IX86_BUILTIN_MOVMSKPS256
,
25335 IX86_BUILTIN_MPSADBW256
,
25336 IX86_BUILTIN_PABSB256
,
25337 IX86_BUILTIN_PABSW256
,
25338 IX86_BUILTIN_PABSD256
,
25339 IX86_BUILTIN_PACKSSDW256
,
25340 IX86_BUILTIN_PACKSSWB256
,
25341 IX86_BUILTIN_PACKUSDW256
,
25342 IX86_BUILTIN_PACKUSWB256
,
25343 IX86_BUILTIN_PADDB256
,
25344 IX86_BUILTIN_PADDW256
,
25345 IX86_BUILTIN_PADDD256
,
25346 IX86_BUILTIN_PADDQ256
,
25347 IX86_BUILTIN_PADDSB256
,
25348 IX86_BUILTIN_PADDSW256
,
25349 IX86_BUILTIN_PADDUSB256
,
25350 IX86_BUILTIN_PADDUSW256
,
25351 IX86_BUILTIN_PALIGNR256
,
25352 IX86_BUILTIN_AND256I
,
25353 IX86_BUILTIN_ANDNOT256I
,
25354 IX86_BUILTIN_PAVGB256
,
25355 IX86_BUILTIN_PAVGW256
,
25356 IX86_BUILTIN_PBLENDVB256
,
25357 IX86_BUILTIN_PBLENDVW256
,
25358 IX86_BUILTIN_PCMPEQB256
,
25359 IX86_BUILTIN_PCMPEQW256
,
25360 IX86_BUILTIN_PCMPEQD256
,
25361 IX86_BUILTIN_PCMPEQQ256
,
25362 IX86_BUILTIN_PCMPGTB256
,
25363 IX86_BUILTIN_PCMPGTW256
,
25364 IX86_BUILTIN_PCMPGTD256
,
25365 IX86_BUILTIN_PCMPGTQ256
,
25366 IX86_BUILTIN_PHADDW256
,
25367 IX86_BUILTIN_PHADDD256
,
25368 IX86_BUILTIN_PHADDSW256
,
25369 IX86_BUILTIN_PHSUBW256
,
25370 IX86_BUILTIN_PHSUBD256
,
25371 IX86_BUILTIN_PHSUBSW256
,
25372 IX86_BUILTIN_PMADDUBSW256
,
25373 IX86_BUILTIN_PMADDWD256
,
25374 IX86_BUILTIN_PMAXSB256
,
25375 IX86_BUILTIN_PMAXSW256
,
25376 IX86_BUILTIN_PMAXSD256
,
25377 IX86_BUILTIN_PMAXUB256
,
25378 IX86_BUILTIN_PMAXUW256
,
25379 IX86_BUILTIN_PMAXUD256
,
25380 IX86_BUILTIN_PMINSB256
,
25381 IX86_BUILTIN_PMINSW256
,
25382 IX86_BUILTIN_PMINSD256
,
25383 IX86_BUILTIN_PMINUB256
,
25384 IX86_BUILTIN_PMINUW256
,
25385 IX86_BUILTIN_PMINUD256
,
25386 IX86_BUILTIN_PMOVMSKB256
,
25387 IX86_BUILTIN_PMOVSXBW256
,
25388 IX86_BUILTIN_PMOVSXBD256
,
25389 IX86_BUILTIN_PMOVSXBQ256
,
25390 IX86_BUILTIN_PMOVSXWD256
,
25391 IX86_BUILTIN_PMOVSXWQ256
,
25392 IX86_BUILTIN_PMOVSXDQ256
,
25393 IX86_BUILTIN_PMOVZXBW256
,
25394 IX86_BUILTIN_PMOVZXBD256
,
25395 IX86_BUILTIN_PMOVZXBQ256
,
25396 IX86_BUILTIN_PMOVZXWD256
,
25397 IX86_BUILTIN_PMOVZXWQ256
,
25398 IX86_BUILTIN_PMOVZXDQ256
,
25399 IX86_BUILTIN_PMULDQ256
,
25400 IX86_BUILTIN_PMULHRSW256
,
25401 IX86_BUILTIN_PMULHUW256
,
25402 IX86_BUILTIN_PMULHW256
,
25403 IX86_BUILTIN_PMULLW256
,
25404 IX86_BUILTIN_PMULLD256
,
25405 IX86_BUILTIN_PMULUDQ256
,
25406 IX86_BUILTIN_POR256
,
25407 IX86_BUILTIN_PSADBW256
,
25408 IX86_BUILTIN_PSHUFB256
,
25409 IX86_BUILTIN_PSHUFD256
,
25410 IX86_BUILTIN_PSHUFHW256
,
25411 IX86_BUILTIN_PSHUFLW256
,
25412 IX86_BUILTIN_PSIGNB256
,
25413 IX86_BUILTIN_PSIGNW256
,
25414 IX86_BUILTIN_PSIGND256
,
25415 IX86_BUILTIN_PSLLDQI256
,
25416 IX86_BUILTIN_PSLLWI256
,
25417 IX86_BUILTIN_PSLLW256
,
25418 IX86_BUILTIN_PSLLDI256
,
25419 IX86_BUILTIN_PSLLD256
,
25420 IX86_BUILTIN_PSLLQI256
,
25421 IX86_BUILTIN_PSLLQ256
,
25422 IX86_BUILTIN_PSRAWI256
,
25423 IX86_BUILTIN_PSRAW256
,
25424 IX86_BUILTIN_PSRADI256
,
25425 IX86_BUILTIN_PSRAD256
,
25426 IX86_BUILTIN_PSRLDQI256
,
25427 IX86_BUILTIN_PSRLWI256
,
25428 IX86_BUILTIN_PSRLW256
,
25429 IX86_BUILTIN_PSRLDI256
,
25430 IX86_BUILTIN_PSRLD256
,
25431 IX86_BUILTIN_PSRLQI256
,
25432 IX86_BUILTIN_PSRLQ256
,
25433 IX86_BUILTIN_PSUBB256
,
25434 IX86_BUILTIN_PSUBW256
,
25435 IX86_BUILTIN_PSUBD256
,
25436 IX86_BUILTIN_PSUBQ256
,
25437 IX86_BUILTIN_PSUBSB256
,
25438 IX86_BUILTIN_PSUBSW256
,
25439 IX86_BUILTIN_PSUBUSB256
,
25440 IX86_BUILTIN_PSUBUSW256
,
25441 IX86_BUILTIN_PUNPCKHBW256
,
25442 IX86_BUILTIN_PUNPCKHWD256
,
25443 IX86_BUILTIN_PUNPCKHDQ256
,
25444 IX86_BUILTIN_PUNPCKHQDQ256
,
25445 IX86_BUILTIN_PUNPCKLBW256
,
25446 IX86_BUILTIN_PUNPCKLWD256
,
25447 IX86_BUILTIN_PUNPCKLDQ256
,
25448 IX86_BUILTIN_PUNPCKLQDQ256
,
25449 IX86_BUILTIN_PXOR256
,
25450 IX86_BUILTIN_MOVNTDQA256
,
25451 IX86_BUILTIN_VBROADCASTSS_PS
,
25452 IX86_BUILTIN_VBROADCASTSS_PS256
,
25453 IX86_BUILTIN_VBROADCASTSD_PD256
,
25454 IX86_BUILTIN_VBROADCASTSI256
,
25455 IX86_BUILTIN_PBLENDD256
,
25456 IX86_BUILTIN_PBLENDD128
,
25457 IX86_BUILTIN_PBROADCASTB256
,
25458 IX86_BUILTIN_PBROADCASTW256
,
25459 IX86_BUILTIN_PBROADCASTD256
,
25460 IX86_BUILTIN_PBROADCASTQ256
,
25461 IX86_BUILTIN_PBROADCASTB128
,
25462 IX86_BUILTIN_PBROADCASTW128
,
25463 IX86_BUILTIN_PBROADCASTD128
,
25464 IX86_BUILTIN_PBROADCASTQ128
,
25465 IX86_BUILTIN_VPERMVARSI256
,
25466 IX86_BUILTIN_VPERMDF256
,
25467 IX86_BUILTIN_VPERMVARSF256
,
25468 IX86_BUILTIN_VPERMDI256
,
25469 IX86_BUILTIN_VPERMTI256
,
25470 IX86_BUILTIN_VEXTRACT128I256
,
25471 IX86_BUILTIN_VINSERT128I256
,
25472 IX86_BUILTIN_MASKLOADD
,
25473 IX86_BUILTIN_MASKLOADQ
,
25474 IX86_BUILTIN_MASKLOADD256
,
25475 IX86_BUILTIN_MASKLOADQ256
,
25476 IX86_BUILTIN_MASKSTORED
,
25477 IX86_BUILTIN_MASKSTOREQ
,
25478 IX86_BUILTIN_MASKSTORED256
,
25479 IX86_BUILTIN_MASKSTOREQ256
,
25480 IX86_BUILTIN_PSLLVV4DI
,
25481 IX86_BUILTIN_PSLLVV2DI
,
25482 IX86_BUILTIN_PSLLVV8SI
,
25483 IX86_BUILTIN_PSLLVV4SI
,
25484 IX86_BUILTIN_PSRAVV8SI
,
25485 IX86_BUILTIN_PSRAVV4SI
,
25486 IX86_BUILTIN_PSRLVV4DI
,
25487 IX86_BUILTIN_PSRLVV2DI
,
25488 IX86_BUILTIN_PSRLVV8SI
,
25489 IX86_BUILTIN_PSRLVV4SI
,
25491 IX86_BUILTIN_GATHERSIV2DF
,
25492 IX86_BUILTIN_GATHERSIV4DF
,
25493 IX86_BUILTIN_GATHERDIV2DF
,
25494 IX86_BUILTIN_GATHERDIV4DF
,
25495 IX86_BUILTIN_GATHERSIV4SF
,
25496 IX86_BUILTIN_GATHERSIV8SF
,
25497 IX86_BUILTIN_GATHERDIV4SF
,
25498 IX86_BUILTIN_GATHERDIV8SF
,
25499 IX86_BUILTIN_GATHERSIV2DI
,
25500 IX86_BUILTIN_GATHERSIV4DI
,
25501 IX86_BUILTIN_GATHERDIV2DI
,
25502 IX86_BUILTIN_GATHERDIV4DI
,
25503 IX86_BUILTIN_GATHERSIV4SI
,
25504 IX86_BUILTIN_GATHERSIV8SI
,
25505 IX86_BUILTIN_GATHERDIV4SI
,
25506 IX86_BUILTIN_GATHERDIV8SI
,
25508 /* Alternate 4 element gather for the vectorizer where
25509 all operands are 32-byte wide. */
25510 IX86_BUILTIN_GATHERALTSIV4DF
,
25511 IX86_BUILTIN_GATHERALTDIV8SF
,
25512 IX86_BUILTIN_GATHERALTSIV4DI
,
25513 IX86_BUILTIN_GATHERALTDIV8SI
,
25515 /* TFmode support builtins. */
25517 IX86_BUILTIN_HUGE_VALQ
,
25518 IX86_BUILTIN_FABSQ
,
25519 IX86_BUILTIN_COPYSIGNQ
,
25521 /* Vectorizer support builtins. */
25522 IX86_BUILTIN_CPYSGNPS
,
25523 IX86_BUILTIN_CPYSGNPD
,
25524 IX86_BUILTIN_CPYSGNPS256
,
25525 IX86_BUILTIN_CPYSGNPD256
,
25527 /* FMA4 instructions. */
25528 IX86_BUILTIN_VFMADDSS
,
25529 IX86_BUILTIN_VFMADDSD
,
25530 IX86_BUILTIN_VFMADDPS
,
25531 IX86_BUILTIN_VFMADDPD
,
25532 IX86_BUILTIN_VFMADDPS256
,
25533 IX86_BUILTIN_VFMADDPD256
,
25534 IX86_BUILTIN_VFMADDSUBPS
,
25535 IX86_BUILTIN_VFMADDSUBPD
,
25536 IX86_BUILTIN_VFMADDSUBPS256
,
25537 IX86_BUILTIN_VFMADDSUBPD256
,
25539 /* FMA3 instructions. */
25540 IX86_BUILTIN_VFMADDSS3
,
25541 IX86_BUILTIN_VFMADDSD3
,
25543 /* XOP instructions. */
25544 IX86_BUILTIN_VPCMOV
,
25545 IX86_BUILTIN_VPCMOV_V2DI
,
25546 IX86_BUILTIN_VPCMOV_V4SI
,
25547 IX86_BUILTIN_VPCMOV_V8HI
,
25548 IX86_BUILTIN_VPCMOV_V16QI
,
25549 IX86_BUILTIN_VPCMOV_V4SF
,
25550 IX86_BUILTIN_VPCMOV_V2DF
,
25551 IX86_BUILTIN_VPCMOV256
,
25552 IX86_BUILTIN_VPCMOV_V4DI256
,
25553 IX86_BUILTIN_VPCMOV_V8SI256
,
25554 IX86_BUILTIN_VPCMOV_V16HI256
,
25555 IX86_BUILTIN_VPCMOV_V32QI256
,
25556 IX86_BUILTIN_VPCMOV_V8SF256
,
25557 IX86_BUILTIN_VPCMOV_V4DF256
,
25559 IX86_BUILTIN_VPPERM
,
25561 IX86_BUILTIN_VPMACSSWW
,
25562 IX86_BUILTIN_VPMACSWW
,
25563 IX86_BUILTIN_VPMACSSWD
,
25564 IX86_BUILTIN_VPMACSWD
,
25565 IX86_BUILTIN_VPMACSSDD
,
25566 IX86_BUILTIN_VPMACSDD
,
25567 IX86_BUILTIN_VPMACSSDQL
,
25568 IX86_BUILTIN_VPMACSSDQH
,
25569 IX86_BUILTIN_VPMACSDQL
,
25570 IX86_BUILTIN_VPMACSDQH
,
25571 IX86_BUILTIN_VPMADCSSWD
,
25572 IX86_BUILTIN_VPMADCSWD
,
25574 IX86_BUILTIN_VPHADDBW
,
25575 IX86_BUILTIN_VPHADDBD
,
25576 IX86_BUILTIN_VPHADDBQ
,
25577 IX86_BUILTIN_VPHADDWD
,
25578 IX86_BUILTIN_VPHADDWQ
,
25579 IX86_BUILTIN_VPHADDDQ
,
25580 IX86_BUILTIN_VPHADDUBW
,
25581 IX86_BUILTIN_VPHADDUBD
,
25582 IX86_BUILTIN_VPHADDUBQ
,
25583 IX86_BUILTIN_VPHADDUWD
,
25584 IX86_BUILTIN_VPHADDUWQ
,
25585 IX86_BUILTIN_VPHADDUDQ
,
25586 IX86_BUILTIN_VPHSUBBW
,
25587 IX86_BUILTIN_VPHSUBWD
,
25588 IX86_BUILTIN_VPHSUBDQ
,
25590 IX86_BUILTIN_VPROTB
,
25591 IX86_BUILTIN_VPROTW
,
25592 IX86_BUILTIN_VPROTD
,
25593 IX86_BUILTIN_VPROTQ
,
25594 IX86_BUILTIN_VPROTB_IMM
,
25595 IX86_BUILTIN_VPROTW_IMM
,
25596 IX86_BUILTIN_VPROTD_IMM
,
25597 IX86_BUILTIN_VPROTQ_IMM
,
25599 IX86_BUILTIN_VPSHLB
,
25600 IX86_BUILTIN_VPSHLW
,
25601 IX86_BUILTIN_VPSHLD
,
25602 IX86_BUILTIN_VPSHLQ
,
25603 IX86_BUILTIN_VPSHAB
,
25604 IX86_BUILTIN_VPSHAW
,
25605 IX86_BUILTIN_VPSHAD
,
25606 IX86_BUILTIN_VPSHAQ
,
25608 IX86_BUILTIN_VFRCZSS
,
25609 IX86_BUILTIN_VFRCZSD
,
25610 IX86_BUILTIN_VFRCZPS
,
25611 IX86_BUILTIN_VFRCZPD
,
25612 IX86_BUILTIN_VFRCZPS256
,
25613 IX86_BUILTIN_VFRCZPD256
,
25615 IX86_BUILTIN_VPCOMEQUB
,
25616 IX86_BUILTIN_VPCOMNEUB
,
25617 IX86_BUILTIN_VPCOMLTUB
,
25618 IX86_BUILTIN_VPCOMLEUB
,
25619 IX86_BUILTIN_VPCOMGTUB
,
25620 IX86_BUILTIN_VPCOMGEUB
,
25621 IX86_BUILTIN_VPCOMFALSEUB
,
25622 IX86_BUILTIN_VPCOMTRUEUB
,
25624 IX86_BUILTIN_VPCOMEQUW
,
25625 IX86_BUILTIN_VPCOMNEUW
,
25626 IX86_BUILTIN_VPCOMLTUW
,
25627 IX86_BUILTIN_VPCOMLEUW
,
25628 IX86_BUILTIN_VPCOMGTUW
,
25629 IX86_BUILTIN_VPCOMGEUW
,
25630 IX86_BUILTIN_VPCOMFALSEUW
,
25631 IX86_BUILTIN_VPCOMTRUEUW
,
25633 IX86_BUILTIN_VPCOMEQUD
,
25634 IX86_BUILTIN_VPCOMNEUD
,
25635 IX86_BUILTIN_VPCOMLTUD
,
25636 IX86_BUILTIN_VPCOMLEUD
,
25637 IX86_BUILTIN_VPCOMGTUD
,
25638 IX86_BUILTIN_VPCOMGEUD
,
25639 IX86_BUILTIN_VPCOMFALSEUD
,
25640 IX86_BUILTIN_VPCOMTRUEUD
,
25642 IX86_BUILTIN_VPCOMEQUQ
,
25643 IX86_BUILTIN_VPCOMNEUQ
,
25644 IX86_BUILTIN_VPCOMLTUQ
,
25645 IX86_BUILTIN_VPCOMLEUQ
,
25646 IX86_BUILTIN_VPCOMGTUQ
,
25647 IX86_BUILTIN_VPCOMGEUQ
,
25648 IX86_BUILTIN_VPCOMFALSEUQ
,
25649 IX86_BUILTIN_VPCOMTRUEUQ
,
25651 IX86_BUILTIN_VPCOMEQB
,
25652 IX86_BUILTIN_VPCOMNEB
,
25653 IX86_BUILTIN_VPCOMLTB
,
25654 IX86_BUILTIN_VPCOMLEB
,
25655 IX86_BUILTIN_VPCOMGTB
,
25656 IX86_BUILTIN_VPCOMGEB
,
25657 IX86_BUILTIN_VPCOMFALSEB
,
25658 IX86_BUILTIN_VPCOMTRUEB
,
25660 IX86_BUILTIN_VPCOMEQW
,
25661 IX86_BUILTIN_VPCOMNEW
,
25662 IX86_BUILTIN_VPCOMLTW
,
25663 IX86_BUILTIN_VPCOMLEW
,
25664 IX86_BUILTIN_VPCOMGTW
,
25665 IX86_BUILTIN_VPCOMGEW
,
25666 IX86_BUILTIN_VPCOMFALSEW
,
25667 IX86_BUILTIN_VPCOMTRUEW
,
25669 IX86_BUILTIN_VPCOMEQD
,
25670 IX86_BUILTIN_VPCOMNED
,
25671 IX86_BUILTIN_VPCOMLTD
,
25672 IX86_BUILTIN_VPCOMLED
,
25673 IX86_BUILTIN_VPCOMGTD
,
25674 IX86_BUILTIN_VPCOMGED
,
25675 IX86_BUILTIN_VPCOMFALSED
,
25676 IX86_BUILTIN_VPCOMTRUED
,
25678 IX86_BUILTIN_VPCOMEQQ
,
25679 IX86_BUILTIN_VPCOMNEQ
,
25680 IX86_BUILTIN_VPCOMLTQ
,
25681 IX86_BUILTIN_VPCOMLEQ
,
25682 IX86_BUILTIN_VPCOMGTQ
,
25683 IX86_BUILTIN_VPCOMGEQ
,
25684 IX86_BUILTIN_VPCOMFALSEQ
,
25685 IX86_BUILTIN_VPCOMTRUEQ
,
25687 /* LWP instructions. */
25688 IX86_BUILTIN_LLWPCB
,
25689 IX86_BUILTIN_SLWPCB
,
25690 IX86_BUILTIN_LWPVAL32
,
25691 IX86_BUILTIN_LWPVAL64
,
25692 IX86_BUILTIN_LWPINS32
,
25693 IX86_BUILTIN_LWPINS64
,
25698 IX86_BUILTIN_XBEGIN
,
25700 IX86_BUILTIN_XABORT
,
25701 IX86_BUILTIN_XTEST
,
25703 /* BMI instructions. */
25704 IX86_BUILTIN_BEXTR32
,
25705 IX86_BUILTIN_BEXTR64
,
25708 /* TBM instructions. */
25709 IX86_BUILTIN_BEXTRI32
,
25710 IX86_BUILTIN_BEXTRI64
,
25712 /* BMI2 instructions. */
25713 IX86_BUILTIN_BZHI32
,
25714 IX86_BUILTIN_BZHI64
,
25715 IX86_BUILTIN_PDEP32
,
25716 IX86_BUILTIN_PDEP64
,
25717 IX86_BUILTIN_PEXT32
,
25718 IX86_BUILTIN_PEXT64
,
25720 /* FSGSBASE instructions. */
25721 IX86_BUILTIN_RDFSBASE32
,
25722 IX86_BUILTIN_RDFSBASE64
,
25723 IX86_BUILTIN_RDGSBASE32
,
25724 IX86_BUILTIN_RDGSBASE64
,
25725 IX86_BUILTIN_WRFSBASE32
,
25726 IX86_BUILTIN_WRFSBASE64
,
25727 IX86_BUILTIN_WRGSBASE32
,
25728 IX86_BUILTIN_WRGSBASE64
,
25730 /* RDRND instructions. */
25731 IX86_BUILTIN_RDRAND16_STEP
,
25732 IX86_BUILTIN_RDRAND32_STEP
,
25733 IX86_BUILTIN_RDRAND64_STEP
,
25735 /* F16C instructions. */
25736 IX86_BUILTIN_CVTPH2PS
,
25737 IX86_BUILTIN_CVTPH2PS256
,
25738 IX86_BUILTIN_CVTPS2PH
,
25739 IX86_BUILTIN_CVTPS2PH256
,
25741 /* CFString built-in for darwin */
25742 IX86_BUILTIN_CFSTRING
,
25747 /* Table for the ix86 builtin decls. */
25748 static GTY(()) tree ix86_builtins
[(int) IX86_BUILTIN_MAX
];
25750 /* Table of all of the builtin functions that are possible with different ISA's
25751 but are waiting to be built until a function is declared to use that
25753 struct builtin_isa
{
25754 const char *name
; /* function name */
25755 enum ix86_builtin_func_type tcode
; /* type to use in the declaration */
25756 HOST_WIDE_INT isa
; /* isa_flags this builtin is defined for */
25757 bool const_p
; /* true if the declaration is constant */
25758 bool set_and_not_built_p
;
25761 static struct builtin_isa ix86_builtins_isa
[(int) IX86_BUILTIN_MAX
];
25764 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
25765 of which isa_flags to use in the ix86_builtins_isa array. Stores the
25766 function decl in the ix86_builtins array. Returns the function decl or
25767 NULL_TREE, if the builtin was not added.
25769 If the front end has a special hook for builtin functions, delay adding
25770 builtin functions that aren't in the current ISA until the ISA is changed
25771 with function specific optimization. Doing so, can save about 300K for the
25772 default compiler. When the builtin is expanded, check at that time whether
25775 If the front end doesn't have a special hook, record all builtins, even if
25776 it isn't an instruction set in the current ISA in case the user uses
25777 function specific options for a different ISA, so that we don't get scope
25778 errors if a builtin is added in the middle of a function scope. */
25781 def_builtin (HOST_WIDE_INT mask
, const char *name
,
25782 enum ix86_builtin_func_type tcode
,
25783 enum ix86_builtins code
)
25785 tree decl
= NULL_TREE
;
25787 if (!(mask
& OPTION_MASK_ISA_64BIT
) || TARGET_64BIT
)
25789 ix86_builtins_isa
[(int) code
].isa
= mask
;
25791 mask
&= ~OPTION_MASK_ISA_64BIT
;
25793 || (mask
& ix86_isa_flags
) != 0
25794 || (lang_hooks
.builtin_function
25795 == lang_hooks
.builtin_function_ext_scope
))
25798 tree type
= ix86_get_builtin_func_type (tcode
);
25799 decl
= add_builtin_function (name
, type
, code
, BUILT_IN_MD
,
25801 ix86_builtins
[(int) code
] = decl
;
25802 ix86_builtins_isa
[(int) code
].set_and_not_built_p
= false;
25806 ix86_builtins
[(int) code
] = NULL_TREE
;
25807 ix86_builtins_isa
[(int) code
].tcode
= tcode
;
25808 ix86_builtins_isa
[(int) code
].name
= name
;
25809 ix86_builtins_isa
[(int) code
].const_p
= false;
25810 ix86_builtins_isa
[(int) code
].set_and_not_built_p
= true;
25817 /* Like def_builtin, but also marks the function decl "const". */
25820 def_builtin_const (HOST_WIDE_INT mask
, const char *name
,
25821 enum ix86_builtin_func_type tcode
, enum ix86_builtins code
)
25823 tree decl
= def_builtin (mask
, name
, tcode
, code
);
25825 TREE_READONLY (decl
) = 1;
25827 ix86_builtins_isa
[(int) code
].const_p
= true;
25832 /* Add any new builtin functions for a given ISA that may not have been
25833 declared. This saves a bit of space compared to adding all of the
25834 declarations to the tree, even if we didn't use them. */
25837 ix86_add_new_builtins (HOST_WIDE_INT isa
)
25841 for (i
= 0; i
< (int)IX86_BUILTIN_MAX
; i
++)
25843 if ((ix86_builtins_isa
[i
].isa
& isa
) != 0
25844 && ix86_builtins_isa
[i
].set_and_not_built_p
)
25848 /* Don't define the builtin again. */
25849 ix86_builtins_isa
[i
].set_and_not_built_p
= false;
25851 type
= ix86_get_builtin_func_type (ix86_builtins_isa
[i
].tcode
);
25852 decl
= add_builtin_function_ext_scope (ix86_builtins_isa
[i
].name
,
25853 type
, i
, BUILT_IN_MD
, NULL
,
25856 ix86_builtins
[i
] = decl
;
25857 if (ix86_builtins_isa
[i
].const_p
)
25858 TREE_READONLY (decl
) = 1;
25863 /* Bits for builtin_description.flag. */
25865 /* Set when we don't support the comparison natively, and should
25866 swap_comparison in order to support it. */
25867 #define BUILTIN_DESC_SWAP_OPERANDS 1
25869 struct builtin_description
25871 const HOST_WIDE_INT mask
;
25872 const enum insn_code icode
;
25873 const char *const name
;
25874 const enum ix86_builtins code
;
25875 const enum rtx_code comparison
;
25879 static const struct builtin_description bdesc_comi
[] =
25881 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS
, UNEQ
, 0 },
25882 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS
, UNLT
, 0 },
25883 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS
, UNLE
, 0 },
25884 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS
, GT
, 0 },
25885 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS
, GE
, 0 },
25886 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS
, LTGT
, 0 },
25887 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS
, UNEQ
, 0 },
25888 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS
, UNLT
, 0 },
25889 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS
, UNLE
, 0 },
25890 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS
, GT
, 0 },
25891 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS
, GE
, 0 },
25892 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS
, LTGT
, 0 },
25893 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD
, UNEQ
, 0 },
25894 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD
, UNLT
, 0 },
25895 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD
, UNLE
, 0 },
25896 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD
, GT
, 0 },
25897 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD
, GE
, 0 },
25898 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD
, LTGT
, 0 },
25899 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD
, UNEQ
, 0 },
25900 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD
, UNLT
, 0 },
25901 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD
, UNLE
, 0 },
25902 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD
, GT
, 0 },
25903 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD
, GE
, 0 },
25904 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD
, LTGT
, 0 },
25907 static const struct builtin_description bdesc_pcmpestr
[] =
25910 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128
, UNKNOWN
, 0 },
25911 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128
, UNKNOWN
, 0 },
25912 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128
, UNKNOWN
, (int) CCAmode
},
25913 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128
, UNKNOWN
, (int) CCCmode
},
25914 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128
, UNKNOWN
, (int) CCOmode
},
25915 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128
, UNKNOWN
, (int) CCSmode
},
25916 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128
, UNKNOWN
, (int) CCZmode
},
25919 static const struct builtin_description bdesc_pcmpistr
[] =
25922 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128
, UNKNOWN
, 0 },
25923 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128
, UNKNOWN
, 0 },
25924 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128
, UNKNOWN
, (int) CCAmode
},
25925 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128
, UNKNOWN
, (int) CCCmode
},
25926 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128
, UNKNOWN
, (int) CCOmode
},
25927 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128
, UNKNOWN
, (int) CCSmode
},
25928 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128
, UNKNOWN
, (int) CCZmode
},
25931 /* Special builtins with variable number of arguments. */
25932 static const struct builtin_description bdesc_special_args
[] =
25934 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rdtsc
, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
25935 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rdtscp
, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP
, UNKNOWN
, (int) UINT64_FTYPE_PUNSIGNED
},
25936 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_pause
, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
25939 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_emms
, "__builtin_ia32_emms", IX86_BUILTIN_EMMS
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
25942 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_femms
, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
25945 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movups
, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
25946 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movntv4sf
, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
25947 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movups
, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS
, UNKNOWN
, (int) V4SF_FTYPE_PCFLOAT
},
25949 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_loadhps_exp
, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_PCV2SF
},
25950 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_loadlps_exp
, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_PCV2SF
},
25951 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_storehps
, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS
, UNKNOWN
, (int) VOID_FTYPE_PV2SF_V4SF
},
25952 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_storelps
, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS
, UNKNOWN
, (int) VOID_FTYPE_PV2SF_V4SF
},
25954 /* SSE or 3DNow!A */
25955 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_sse_sfence
, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
25956 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_sse_movntq
, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ
, UNKNOWN
, (int) VOID_FTYPE_PULONGLONG_ULONGLONG
},
25959 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_lfence
, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
25960 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_mfence
, 0, IX86_BUILTIN_MFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
25961 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movupd
, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
25962 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movdqu
, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU
, UNKNOWN
, (int) VOID_FTYPE_PCHAR_V16QI
},
25963 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntv2df
, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
25964 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntv2di
, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ
, UNKNOWN
, (int) VOID_FTYPE_PV2DI_V2DI
},
25965 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntisi
, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI
, UNKNOWN
, (int) VOID_FTYPE_PINT_INT
},
25966 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_movntidi
, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64
, UNKNOWN
, (int) VOID_FTYPE_PLONGLONG_LONGLONG
},
25967 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movupd
, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD
, UNKNOWN
, (int) V2DF_FTYPE_PCDOUBLE
},
25968 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movdqu
, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU
, UNKNOWN
, (int) V16QI_FTYPE_PCCHAR
},
25970 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loadhpd_exp
, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_PCDOUBLE
},
25971 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loadlpd_exp
, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_PCDOUBLE
},
25974 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_lddqu
, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU
, UNKNOWN
, (int) V16QI_FTYPE_PCCHAR
},
25977 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_movntdqa
, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA
, UNKNOWN
, (int) V2DI_FTYPE_PV2DI
},
25980 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_vmmovntv2df
, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
25981 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_vmmovntv4sf
, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
25984 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vzeroall
, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
25985 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vzeroupper
, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
25987 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv4sf
, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS
, UNKNOWN
, (int) V4SF_FTYPE_PCFLOAT
},
25988 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv4df
, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256
, UNKNOWN
, (int) V4DF_FTYPE_PCDOUBLE
},
25989 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv8sf
, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256
, UNKNOWN
, (int) V8SF_FTYPE_PCFLOAT
},
25990 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vbroadcastf128_v4df
, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCV2DF
},
25991 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vbroadcastf128_v8sf
, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCV4SF
},
25993 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movupd256
, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCDOUBLE
},
25994 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movups256
, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCFLOAT
},
25995 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movupd256
, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V4DF
},
25996 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movups256
, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V8SF
},
25997 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movdqu256
, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256
, UNKNOWN
, (int) V32QI_FTYPE_PCCHAR
},
25998 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movdqu256
, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256
, UNKNOWN
, (int) VOID_FTYPE_PCHAR_V32QI
},
25999 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_lddqu256
, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256
, UNKNOWN
, (int) V32QI_FTYPE_PCCHAR
},
26001 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv4di
, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256
, UNKNOWN
, (int) VOID_FTYPE_PV4DI_V4DI
},
26002 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv4df
, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V4DF
},
26003 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv8sf
, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V8SF
},
26005 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadpd
, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD
, UNKNOWN
, (int) V2DF_FTYPE_PCV2DF_V2DI
},
26006 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadps
, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS
, UNKNOWN
, (int) V4SF_FTYPE_PCV4SF_V4SI
},
26007 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadpd256
, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCV4DF_V4DI
},
26008 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadps256
, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCV8SF_V8SI
},
26009 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstorepd
, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD
, UNKNOWN
, (int) VOID_FTYPE_PV2DF_V2DI_V2DF
},
26010 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstoreps
, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS
, UNKNOWN
, (int) VOID_FTYPE_PV4SF_V4SI_V4SF
},
26011 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstorepd256
, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256
, UNKNOWN
, (int) VOID_FTYPE_PV4DF_V4DI_V4DF
},
26012 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstoreps256
, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256
, UNKNOWN
, (int) VOID_FTYPE_PV8SF_V8SI_V8SF
},
26015 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_movntdqa
, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256
, UNKNOWN
, (int) V4DI_FTYPE_PV4DI
},
26016 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadd
, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD
, UNKNOWN
, (int) V4SI_FTYPE_PCV4SI_V4SI
},
26017 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadq
, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ
, UNKNOWN
, (int) V2DI_FTYPE_PCV2DI_V2DI
},
26018 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadd256
, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256
, UNKNOWN
, (int) V8SI_FTYPE_PCV8SI_V8SI
},
26019 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadq256
, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256
, UNKNOWN
, (int) V4DI_FTYPE_PCV4DI_V4DI
},
26020 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstored
, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED
, UNKNOWN
, (int) VOID_FTYPE_PV4SI_V4SI_V4SI
},
26021 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstoreq
, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ
, UNKNOWN
, (int) VOID_FTYPE_PV2DI_V2DI_V2DI
},
26022 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstored256
, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256
, UNKNOWN
, (int) VOID_FTYPE_PV8SI_V8SI_V8SI
},
26023 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstoreq256
, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256
, UNKNOWN
, (int) VOID_FTYPE_PV4DI_V4DI_V4DI
},
26025 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_llwpcb
, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
26026 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_slwpcb
, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB
, UNKNOWN
, (int) PVOID_FTYPE_VOID
},
26027 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpvalsi3
, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32
, UNKNOWN
, (int) VOID_FTYPE_UINT_UINT_UINT
},
26028 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpvaldi3
, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64
, UNKNOWN
, (int) VOID_FTYPE_UINT64_UINT_UINT
},
26029 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpinssi3
, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32
, UNKNOWN
, (int) UCHAR_FTYPE_UINT_UINT_UINT
},
26030 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpinsdi3
, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64
, UNKNOWN
, (int) UCHAR_FTYPE_UINT64_UINT_UINT
},
26033 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdfsbasesi
, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
26034 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdfsbasedi
, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
26035 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdgsbasesi
, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
26036 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdgsbasedi
, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
26037 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrfsbasesi
, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32
, UNKNOWN
, (int) VOID_FTYPE_UNSIGNED
},
26038 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrfsbasedi
, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64
, UNKNOWN
, (int) VOID_FTYPE_UINT64
},
26039 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrgsbasesi
, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32
, UNKNOWN
, (int) VOID_FTYPE_UNSIGNED
},
26040 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrgsbasedi
, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64
, UNKNOWN
, (int) VOID_FTYPE_UINT64
},
26043 { OPTION_MASK_ISA_RTM
, CODE_FOR_xbegin
, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
26044 { OPTION_MASK_ISA_RTM
, CODE_FOR_xend
, "__builtin_ia32_xend", IX86_BUILTIN_XEND
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26045 { OPTION_MASK_ISA_RTM
, CODE_FOR_xtest
, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST
, UNKNOWN
, (int) INT_FTYPE_VOID
},
26048 /* Builtins with variable number of arguments. */
26049 static const struct builtin_description bdesc_args
[] =
26051 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_bsr
, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI
, UNKNOWN
, (int) INT_FTYPE_INT
},
26052 { OPTION_MASK_ISA_64BIT
, CODE_FOR_bsr_rex64
, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI
, UNKNOWN
, (int) INT64_FTYPE_INT64
},
26053 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rdpmc
, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC
, UNKNOWN
, (int) UINT64_FTYPE_INT
},
26054 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotlqi3
, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI
, UNKNOWN
, (int) UINT8_FTYPE_UINT8_INT
},
26055 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotlhi3
, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI
, UNKNOWN
, (int) UINT16_FTYPE_UINT16_INT
},
26056 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotrqi3
, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI
, UNKNOWN
, (int) UINT8_FTYPE_UINT8_INT
},
26057 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotrhi3
, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI
, UNKNOWN
, (int) UINT16_FTYPE_UINT16_INT
},
26060 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv8qi3
, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26061 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv4hi3
, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26062 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv2si3
, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26063 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv8qi3
, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26064 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv4hi3
, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26065 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv2si3
, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26067 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ssaddv8qi3
, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26068 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ssaddv4hi3
, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26069 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_sssubv8qi3
, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26070 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_sssubv4hi3
, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26071 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_usaddv8qi3
, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26072 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_usaddv4hi3
, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26073 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ussubv8qi3
, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26074 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ussubv4hi3
, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26076 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_mulv4hi3
, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26077 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_smulv4hi3_highpart
, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26079 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_andv2si3
, "__builtin_ia32_pand", IX86_BUILTIN_PAND
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26080 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_andnotv2si3
, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26081 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_iorv2si3
, "__builtin_ia32_por", IX86_BUILTIN_POR
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26082 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_xorv2si3
, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26084 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv8qi3
, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26085 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv4hi3
, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26086 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv2si3
, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26087 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv8qi3
, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26088 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv4hi3
, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26089 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv2si3
, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26091 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhbw
, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26092 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhwd
, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26093 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhdq
, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26094 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpcklbw
, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26095 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpcklwd
, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26096 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckldq
, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26098 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packsswb
, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB
, UNKNOWN
, (int) V8QI_FTYPE_V4HI_V4HI
},
26099 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packssdw
, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW
, UNKNOWN
, (int) V4HI_FTYPE_V2SI_V2SI
},
26100 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packuswb
, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB
, UNKNOWN
, (int) V8QI_FTYPE_V4HI_V4HI
},
26102 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_pmaddwd
, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD
, UNKNOWN
, (int) V2SI_FTYPE_V4HI_V4HI
},
26104 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv4hi3
, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
26105 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv2si3
, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
26106 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv1di3
, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_SI_COUNT
},
26107 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv4hi3
, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
26108 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv2si3
, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
26109 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv1di3
, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_COUNT
},
26111 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv4hi3
, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
26112 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv2si3
, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
26113 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv1di3
, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_SI_COUNT
},
26114 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv4hi3
, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
26115 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv2si3
, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
26116 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv1di3
, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_COUNT
},
26118 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv4hi3
, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
26119 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv2si3
, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
26120 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv4hi3
, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
26121 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv2si3
, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
26124 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_pf2id
, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID
, UNKNOWN
, (int) V2SI_FTYPE_V2SF
},
26125 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_floatv2si2
, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD
, UNKNOWN
, (int) V2SF_FTYPE_V2SI
},
26126 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpv2sf2
, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
26127 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rsqrtv2sf2
, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
26129 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26130 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_haddv2sf3
, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26131 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_addv2sf3
, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26132 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_eqv2sf3
, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
26133 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_gev2sf3
, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
26134 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_gtv2sf3
, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
26135 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_smaxv2sf3
, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26136 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_sminv2sf3
, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26137 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_mulv2sf3
, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26138 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpit1v2sf3
, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26139 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpit2v2sf3
, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26140 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rsqit1v2sf3
, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26141 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_subv2sf3
, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26142 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_subrv2sf3
, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26143 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_pmulhrwv4hi3
, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26146 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pf2iw
, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW
, UNKNOWN
, (int) V2SI_FTYPE_V2SF
},
26147 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pi2fw
, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW
, UNKNOWN
, (int) V2SF_FTYPE_V2SI
},
26148 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pswapdv2si2
, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI
},
26149 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pswapdv2sf2
, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
26150 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_hsubv2sf3
, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26151 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_addsubv2sf3
, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26154 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movmskps
, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
26155 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_sqrtv4sf2
, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26156 { OPTION_MASK_ISA_SSE
, CODE_FOR_sqrtv4sf2
, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26157 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_rsqrtv4sf2
, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26158 { OPTION_MASK_ISA_SSE
, CODE_FOR_rsqrtv4sf2
, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26159 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_rcpv4sf2
, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26160 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtps2pi
, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI
, UNKNOWN
, (int) V2SI_FTYPE_V4SF
},
26161 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtss2si
, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
26162 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvtss2siq
, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64
, UNKNOWN
, (int) INT64_FTYPE_V4SF
},
26163 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvttps2pi
, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI
, UNKNOWN
, (int) V2SI_FTYPE_V4SF
},
26164 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvttss2si
, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
26165 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvttss2siq
, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64
, UNKNOWN
, (int) INT64_FTYPE_V4SF
},
26167 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_shufps
, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
26169 { OPTION_MASK_ISA_SSE
, CODE_FOR_addv4sf3
, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26170 { OPTION_MASK_ISA_SSE
, CODE_FOR_subv4sf3
, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26171 { OPTION_MASK_ISA_SSE
, CODE_FOR_mulv4sf3
, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26172 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_divv4sf3
, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26173 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmaddv4sf3
, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26174 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsubv4sf3
, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26175 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmulv4sf3
, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26176 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmdivv4sf3
, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26178 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS
, EQ
, (int) V4SF_FTYPE_V4SF_V4SF
},
26179 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF
},
26180 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF
},
26181 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
26182 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
26183 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS
, UNORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
26184 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS
, NE
, (int) V4SF_FTYPE_V4SF_V4SF
},
26185 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF
},
26186 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF
},
26187 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
26188 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
26189 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS
, ORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
26190 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS
, EQ
, (int) V4SF_FTYPE_V4SF_V4SF
},
26191 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF
},
26192 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF
},
26193 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS
, UNORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
26194 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS
, NE
, (int) V4SF_FTYPE_V4SF_V4SF
},
26195 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF
},
26196 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF
},
26197 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
26198 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
26199 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS
, ORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
26201 { OPTION_MASK_ISA_SSE
, CODE_FOR_sminv4sf3
, "__builtin_ia32_minps", IX86_BUILTIN_MINPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26202 { OPTION_MASK_ISA_SSE
, CODE_FOR_smaxv4sf3
, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26203 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsminv4sf3
, "__builtin_ia32_minss", IX86_BUILTIN_MINSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26204 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsmaxv4sf3
, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26206 { OPTION_MASK_ISA_SSE
, CODE_FOR_andv4sf3
, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26207 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_andnotv4sf3
, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26208 { OPTION_MASK_ISA_SSE
, CODE_FOR_iorv4sf3
, "__builtin_ia32_orps", IX86_BUILTIN_ORPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26209 { OPTION_MASK_ISA_SSE
, CODE_FOR_xorv4sf3
, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26211 { OPTION_MASK_ISA_SSE
, CODE_FOR_copysignv4sf3
, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26213 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movss
, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26214 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movhlps_exp
, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26215 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movlhps_exp
, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26216 { OPTION_MASK_ISA_SSE
, CODE_FOR_vec_interleave_highv4sf
, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26217 { OPTION_MASK_ISA_SSE
, CODE_FOR_vec_interleave_lowv4sf
, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26219 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtpi2ps
, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V2SI
},
26220 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtsi2ss
, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_SI
},
26221 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvtsi2ssq
, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS
, UNKNOWN
, V4SF_FTYPE_V4SF_DI
},
26223 { OPTION_MASK_ISA_SSE
, CODE_FOR_rsqrtsf2
, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF
, UNKNOWN
, (int) FLOAT_FTYPE_FLOAT
},
26225 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsqrtv4sf2
, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
26226 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmrsqrtv4sf2
, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
26227 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmrcpv4sf2
, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
26229 /* SSE MMX or 3Dnow!A */
26230 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26231 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uavgv4hi3
, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26232 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_umulv4hi3_highpart
, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26234 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_umaxv8qi3
, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26235 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_smaxv4hi3
, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26236 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uminv8qi3
, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26237 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_sminv4hi3
, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26239 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_psadbw
, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW
, UNKNOWN
, (int) V1DI_FTYPE_V8QI_V8QI
},
26240 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pmovmskb
, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB
, UNKNOWN
, (int) INT_FTYPE_V8QI
},
26242 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pshufw
, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_INT
},
26245 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_shufpd
, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
26247 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movmskpd
, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
26248 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pmovmskb
, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128
, UNKNOWN
, (int) INT_FTYPE_V16QI
},
26249 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sqrtv2df2
, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF
},
26250 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtdq2pd
, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD
, UNKNOWN
, (int) V2DF_FTYPE_V4SI
},
26251 { OPTION_MASK_ISA_SSE2
, CODE_FOR_floatv4siv4sf2
, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SI
},
26253 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2dq
, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V2DF
},
26254 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2pi
, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI
, UNKNOWN
, (int) V2SI_FTYPE_V2DF
},
26255 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2ps
, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS
, UNKNOWN
, (int) V4SF_FTYPE_V2DF
},
26256 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttpd2dq
, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V2DF
},
26257 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttpd2pi
, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI
, UNKNOWN
, (int) V2SI_FTYPE_V2DF
},
26259 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpi2pd
, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD
, UNKNOWN
, (int) V2DF_FTYPE_V2SI
},
26261 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsd2si
, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
26262 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttsd2si
, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
26263 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvtsd2siq
, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64
, UNKNOWN
, (int) INT64_FTYPE_V2DF
},
26264 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvttsd2siq
, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64
, UNKNOWN
, (int) INT64_FTYPE_V2DF
},
26266 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtps2dq
, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
26267 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtps2pd
, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD
, UNKNOWN
, (int) V2DF_FTYPE_V4SF
},
26268 { OPTION_MASK_ISA_SSE2
, CODE_FOR_fix_truncv4sfv4si2
, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
26270 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv2df3
, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26271 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv2df3
, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26272 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mulv2df3
, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26273 { OPTION_MASK_ISA_SSE2
, CODE_FOR_divv2df3
, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26274 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmaddv2df3
, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26275 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsubv2df3
, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26276 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmulv2df3
, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26277 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmdivv2df3
, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26279 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD
, EQ
, (int) V2DF_FTYPE_V2DF_V2DF
},
26280 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF
},
26281 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF
},
26282 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
26283 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
26284 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD
, UNORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
26285 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD
, NE
, (int) V2DF_FTYPE_V2DF_V2DF
},
26286 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF
},
26287 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF
},
26288 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
26289 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
26290 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD
, ORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
26291 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD
, EQ
, (int) V2DF_FTYPE_V2DF_V2DF
},
26292 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF
},
26293 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF
},
26294 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD
, UNORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
26295 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD
, NE
, (int) V2DF_FTYPE_V2DF_V2DF
},
26296 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF
},
26297 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF
},
26298 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD
, ORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
26300 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sminv2df3
, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26301 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smaxv2df3
, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26302 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsminv2df3
, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26303 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsmaxv2df3
, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26305 { OPTION_MASK_ISA_SSE2
, CODE_FOR_andv2df3
, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26306 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_andnotv2df3
, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26307 { OPTION_MASK_ISA_SSE2
, CODE_FOR_iorv2df3
, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26308 { OPTION_MASK_ISA_SSE2
, CODE_FOR_xorv2df3
, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26310 { OPTION_MASK_ISA_SSE2
, CODE_FOR_copysignv2df3
, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26312 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movsd
, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26313 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv2df
, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26314 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv2df
, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26316 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_pack_sfix_v2df
, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX
, UNKNOWN
, (int) V4SI_FTYPE_V2DF_V2DF
},
26318 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv16qi3
, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26319 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv8hi3
, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26320 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26321 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv2di3
, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26322 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv16qi3
, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26323 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv8hi3
, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26324 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26325 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv2di3
, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26327 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ssaddv16qi3
, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26328 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ssaddv8hi3
, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26329 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_sssubv16qi3
, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26330 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_sssubv8hi3
, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26331 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_usaddv16qi3
, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26332 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_usaddv8hi3
, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26333 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ussubv16qi3
, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26334 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ussubv8hi3
, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26336 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mulv8hi3
, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26337 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smulv8hi3_highpart
, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128
, UNKNOWN
,(int) V8HI_FTYPE_V8HI_V8HI
},
26339 { OPTION_MASK_ISA_SSE2
, CODE_FOR_andv2di3
, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26340 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_andnotv2di3
, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26341 { OPTION_MASK_ISA_SSE2
, CODE_FOR_iorv2di3
, "__builtin_ia32_por128", IX86_BUILTIN_POR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26342 { OPTION_MASK_ISA_SSE2
, CODE_FOR_xorv2di3
, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26344 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_uavgv16qi3
, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26345 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_uavgv8hi3
, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26347 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv16qi3
, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26348 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv8hi3
, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26349 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv4si3
, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26350 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv16qi3
, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26351 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv8hi3
, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26352 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv4si3
, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26354 { OPTION_MASK_ISA_SSE2
, CODE_FOR_umaxv16qi3
, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26355 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smaxv8hi3
, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26356 { OPTION_MASK_ISA_SSE2
, CODE_FOR_uminv16qi3
, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26357 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sminv8hi3
, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26359 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv16qi
, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26360 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv8hi
, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26361 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv4si
, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26362 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv2di
, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26363 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv16qi
, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26364 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv8hi
, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26365 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv4si
, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26366 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv2di
, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26368 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packsswb
, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128
, UNKNOWN
, (int) V16QI_FTYPE_V8HI_V8HI
},
26369 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packssdw
, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128
, UNKNOWN
, (int) V8HI_FTYPE_V4SI_V4SI
},
26370 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packuswb
, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128
, UNKNOWN
, (int) V16QI_FTYPE_V8HI_V8HI
},
26372 { OPTION_MASK_ISA_SSE2
, CODE_FOR_umulv8hi3_highpart
, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26373 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_psadbw
, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI_V16QI
},
26375 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_umulv1siv1di3
, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ
, UNKNOWN
, (int) V1DI_FTYPE_V2SI_V2SI
},
26376 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_umulv2siv2di3
, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI_V4SI
},
26378 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pmaddwd
, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI_V8HI
},
26380 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsi2sd
, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_SI
},
26381 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvtsi2sdq
, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_DI
},
26382 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsd2ss
, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V2DF
},
26383 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtss2sd
, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V4SF
},
26385 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ashlv1ti3
, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT_CONVERT
},
26386 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv8hi3
, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
26387 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv4si3
, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
26388 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv2di3
, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_SI_COUNT
},
26389 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv8hi3
, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
26390 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv4si3
, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
26391 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv2di3
, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_COUNT
},
26393 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_lshrv1ti3
, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT_CONVERT
},
26394 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv8hi3
, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
26395 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv4si3
, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
26396 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv2di3
, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_SI_COUNT
},
26397 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv8hi3
, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
26398 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv4si3
, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
26399 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv2di3
, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_COUNT
},
26401 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv8hi3
, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
26402 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv4si3
, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
26403 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv8hi3
, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
26404 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv4si3
, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
26406 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshufd
, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_INT
},
26407 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshuflw
, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_INT
},
26408 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshufhw
, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_INT
},
26410 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsqrtv2df2
, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_VEC_MERGE
},
26412 { OPTION_MASK_ISA_SSE2
, CODE_FOR_abstf2
, 0, IX86_BUILTIN_FABSQ
, UNKNOWN
, (int) FLOAT128_FTYPE_FLOAT128
},
26413 { OPTION_MASK_ISA_SSE2
, CODE_FOR_copysigntf3
, 0, IX86_BUILTIN_COPYSIGNQ
, UNKNOWN
, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128
},
26415 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse2_movq128
, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
26418 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mmx_addv1di3
, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI
},
26419 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mmx_subv1di3
, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI
},
26422 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_movshdup
, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26423 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_movsldup
, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26425 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_addsubv4sf3
, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26426 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_addsubv2df3
, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26427 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_haddv4sf3
, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26428 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_haddv2df3
, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26429 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_hsubv4sf3
, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26430 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_hsubv2df3
, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26433 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv16qi2
, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI
},
26434 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv8qi2
, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI
},
26435 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv8hi2
, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
26436 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv4hi2
, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI
},
26437 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv4si2
, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI
},
26438 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv2si2
, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI
},
26440 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddwv8hi3
, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26441 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddwv4hi3
, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26442 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phadddv4si3
, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26443 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phadddv2si3
, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26444 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddswv8hi3
, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26445 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddswv4hi3
, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26446 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubwv8hi3
, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26447 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubwv4hi3
, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26448 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubdv4si3
, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26449 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubdv2si3
, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26450 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubswv8hi3
, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26451 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubswv4hi3
, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26452 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmaddubsw128
, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI_V16QI
},
26453 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmaddubsw
, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V8QI_V8QI
},
26454 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmulhrswv8hi3
, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26455 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmulhrswv4hi3
, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26456 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pshufbv16qi3
, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26457 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pshufbv8qi3
, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26458 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv16qi3
, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26459 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv8qi3
, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26460 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv8hi3
, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26461 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv4hi3
, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26462 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv4si3
, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26463 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv2si3
, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26466 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_palignrti
, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT
},
26467 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_palignrdi
, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT
},
26470 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendpd
, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
26471 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendps
, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
26472 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendvpd
, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_V2DF
},
26473 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendvps
, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_V4SF
},
26474 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_dppd
, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
26475 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_dpps
, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
26476 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_insertps
, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
26477 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_mpsadbw
, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI_INT
},
26478 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_pblendvb
, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI_V16QI
},
26479 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_pblendw
, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_INT
},
26481 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv8qiv8hi2
, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI
},
26482 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv4qiv4si2
, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128
, UNKNOWN
, (int) V4SI_FTYPE_V16QI
},
26483 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2qiv2di2
, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI
},
26484 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv4hiv4si2
, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI
},
26485 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2hiv2di2
, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128
, UNKNOWN
, (int) V2DI_FTYPE_V8HI
},
26486 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2siv2di2
, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI
},
26487 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv8qiv8hi2
, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI
},
26488 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv4qiv4si2
, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128
, UNKNOWN
, (int) V4SI_FTYPE_V16QI
},
26489 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2qiv2di2
, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI
},
26490 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv4hiv4si2
, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI
},
26491 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2hiv2di2
, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128
, UNKNOWN
, (int) V2DI_FTYPE_V8HI
},
26492 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2siv2di2
, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI
},
26493 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_phminposuw
, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
26495 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_packusdw
, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128
, UNKNOWN
, (int) V8HI_FTYPE_V4SI_V4SI
},
26496 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_eqv2di3
, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26497 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_smaxv16qi3
, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26498 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_smaxv4si3
, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26499 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_umaxv4si3
, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26500 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_umaxv8hi3
, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26501 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sminv16qi3
, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26502 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sminv4si3
, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26503 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_uminv4si3
, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26504 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_uminv8hi3
, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26505 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_mulv2siv2di3
, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI_V4SI
},
26506 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_mulv4si3
, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26509 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_INT
},
26510 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_INT
},
26511 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundsd
, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
26512 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundss
, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
26514 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD
, (enum rtx_code
) ROUND_FLOOR
, (int) V2DF_FTYPE_V2DF_ROUND
},
26515 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD
, (enum rtx_code
) ROUND_CEIL
, (int) V2DF_FTYPE_V2DF_ROUND
},
26516 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD
, (enum rtx_code
) ROUND_TRUNC
, (int) V2DF_FTYPE_V2DF_ROUND
},
26517 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD
, (enum rtx_code
) ROUND_MXCSR
, (int) V2DF_FTYPE_V2DF_ROUND
},
26519 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd_vec_pack_sfix
, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX
, (enum rtx_code
) ROUND_FLOOR
, (int) V4SI_FTYPE_V2DF_V2DF_ROUND
},
26520 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd_vec_pack_sfix
, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX
, (enum rtx_code
) ROUND_CEIL
, (int) V4SI_FTYPE_V2DF_V2DF_ROUND
},
26522 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv2df2
, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ
, UNKNOWN
, (int) V2DF_FTYPE_V2DF
},
26523 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv2df2_vec_pack_sfix
, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX
, UNKNOWN
, (int) V4SI_FTYPE_V2DF_V2DF
},
26525 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS
, (enum rtx_code
) ROUND_FLOOR
, (int) V4SF_FTYPE_V4SF_ROUND
},
26526 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS
, (enum rtx_code
) ROUND_CEIL
, (int) V4SF_FTYPE_V4SF_ROUND
},
26527 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS
, (enum rtx_code
) ROUND_TRUNC
, (int) V4SF_FTYPE_V4SF_ROUND
},
26528 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS
, (enum rtx_code
) ROUND_MXCSR
, (int) V4SF_FTYPE_V4SF_ROUND
},
26530 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps_sfix
, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX
, (enum rtx_code
) ROUND_FLOOR
, (int) V4SI_FTYPE_V4SF_ROUND
},
26531 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps_sfix
, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX
, (enum rtx_code
) ROUND_CEIL
, (int) V4SI_FTYPE_V4SF_ROUND
},
26533 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv4sf2
, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26534 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv4sf2_sfix
, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
26536 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ
, EQ
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
26537 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC
, LTU
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
26538 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC
, GTU
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
26541 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_gtv2di3
, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26542 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32qi
, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI
, UNKNOWN
, (int) UINT_FTYPE_UINT_UCHAR
},
26543 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32hi
, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI
, UNKNOWN
, (int) UINT_FTYPE_UINT_USHORT
},
26544 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32si
, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
26545 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse4_2_crc32di
, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
26548 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_extrqi
, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_UINT_UINT
},
26549 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_extrq
, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V16QI
},
26550 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_insertqi
, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT
},
26551 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_insertq
, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26554 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aeskeygenassist
, 0, IX86_BUILTIN_AESKEYGENASSIST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT
},
26555 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesimc
, 0, IX86_BUILTIN_AESIMC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
26557 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesenc
, 0, IX86_BUILTIN_AESENC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26558 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesenclast
, 0, IX86_BUILTIN_AESENCLAST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26559 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesdec
, 0, IX86_BUILTIN_AESDEC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26560 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesdeclast
, 0, IX86_BUILTIN_AESDECLAST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26563 { OPTION_MASK_ISA_SSE2
, CODE_FOR_pclmulqdq
, 0, IX86_BUILTIN_PCLMULQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_INT
},
26566 { OPTION_MASK_ISA_AVX
, CODE_FOR_addv4df3
, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26567 { OPTION_MASK_ISA_AVX
, CODE_FOR_addv8sf3
, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26568 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_addsubv4df3
, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26569 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_addsubv8sf3
, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26570 { OPTION_MASK_ISA_AVX
, CODE_FOR_andv4df3
, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26571 { OPTION_MASK_ISA_AVX
, CODE_FOR_andv8sf3
, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26572 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_andnotv4df3
, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26573 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_andnotv8sf3
, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26574 { OPTION_MASK_ISA_AVX
, CODE_FOR_divv4df3
, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26575 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_divv8sf3
, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26576 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_haddv4df3
, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26577 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_hsubv8sf3
, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26578 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_hsubv4df3
, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26579 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_haddv8sf3
, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26580 { OPTION_MASK_ISA_AVX
, CODE_FOR_smaxv4df3
, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26581 { OPTION_MASK_ISA_AVX
, CODE_FOR_smaxv8sf3
, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26582 { OPTION_MASK_ISA_AVX
, CODE_FOR_sminv4df3
, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26583 { OPTION_MASK_ISA_AVX
, CODE_FOR_sminv8sf3
, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26584 { OPTION_MASK_ISA_AVX
, CODE_FOR_mulv4df3
, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26585 { OPTION_MASK_ISA_AVX
, CODE_FOR_mulv8sf3
, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26586 { OPTION_MASK_ISA_AVX
, CODE_FOR_iorv4df3
, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26587 { OPTION_MASK_ISA_AVX
, CODE_FOR_iorv8sf3
, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26588 { OPTION_MASK_ISA_AVX
, CODE_FOR_subv4df3
, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26589 { OPTION_MASK_ISA_AVX
, CODE_FOR_subv8sf3
, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26590 { OPTION_MASK_ISA_AVX
, CODE_FOR_xorv4df3
, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26591 { OPTION_MASK_ISA_AVX
, CODE_FOR_xorv8sf3
, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26593 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv2df3
, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DI
},
26594 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv4sf3
, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SI
},
26595 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv4df3
, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DI
},
26596 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv8sf3
, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SI
},
26598 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendpd256
, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
26599 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendps256
, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
26600 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendvpd256
, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_V4DF
},
26601 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendvps256
, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_V8SF
},
26602 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_dpps256
, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
26603 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_shufpd256
, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
26604 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_shufps256
, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
26605 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vmcmpv2df3
, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
26606 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vmcmpv4sf3
, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
26607 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv2df3
, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
26608 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv4sf3
, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
26609 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv4df3
, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
26610 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv8sf3
, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
26611 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v4df
, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256
, UNKNOWN
, (int) V2DF_FTYPE_V4DF_INT
},
26612 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v8sf
, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256
, UNKNOWN
, (int) V4SF_FTYPE_V8SF_INT
},
26613 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v8si
, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256
, UNKNOWN
, (int) V4SI_FTYPE_V8SI_INT
},
26614 { OPTION_MASK_ISA_AVX
, CODE_FOR_floatv4siv4df2
, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4SI
},
26615 { OPTION_MASK_ISA_AVX
, CODE_FOR_floatv8siv8sf2
, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SI
},
26616 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtpd2ps256
, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256
, UNKNOWN
, (int) V4SF_FTYPE_V4DF
},
26617 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtps2dq256
, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
26618 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtps2pd256
, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4SF
},
26619 { OPTION_MASK_ISA_AVX
, CODE_FOR_fix_truncv4dfv4si2
, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256
, UNKNOWN
, (int) V4SI_FTYPE_V4DF
},
26620 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtpd2dq256
, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256
, UNKNOWN
, (int) V4SI_FTYPE_V4DF
},
26621 { OPTION_MASK_ISA_AVX
, CODE_FOR_fix_truncv8sfv8si2
, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
26622 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v4df3
, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
26623 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v8sf3
, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
26624 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v8si3
, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI_INT
},
26625 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv2df
, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_INT
},
26626 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv4sf
, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_INT
},
26627 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv4df
, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
26628 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv8sf
, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_INT
},
26629 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v4df
, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V2DF_INT
},
26630 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v8sf
, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V4SF_INT
},
26631 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v8si
, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_INT
},
26633 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movshdup256
, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
26634 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movsldup256
, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
26635 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movddup256
, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
26637 { OPTION_MASK_ISA_AVX
, CODE_FOR_sqrtv4df2
, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
26638 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_sqrtv8sf2
, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
26639 { OPTION_MASK_ISA_AVX
, CODE_FOR_sqrtv8sf2
, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
26640 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_rsqrtv8sf2
, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
26641 { OPTION_MASK_ISA_AVX
, CODE_FOR_rsqrtv8sf2
, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
26643 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_rcpv8sf2
, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
26645 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
26646 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_INT
},
26648 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256
, (enum rtx_code
) ROUND_FLOOR
, (int) V4DF_FTYPE_V4DF_ROUND
},
26649 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256
, (enum rtx_code
) ROUND_CEIL
, (int) V4DF_FTYPE_V4DF_ROUND
},
26650 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256
, (enum rtx_code
) ROUND_TRUNC
, (int) V4DF_FTYPE_V4DF_ROUND
},
26651 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256
, (enum rtx_code
) ROUND_MXCSR
, (int) V4DF_FTYPE_V4DF_ROUND
},
26653 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv4df2
, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
26654 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv4df2_vec_pack_sfix
, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256
, UNKNOWN
, (int) V8SI_FTYPE_V4DF_V4DF
},
26656 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd_vec_pack_sfix256
, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256
, (enum rtx_code
) ROUND_FLOOR
, (int) V8SI_FTYPE_V4DF_V4DF_ROUND
},
26657 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd_vec_pack_sfix256
, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256
, (enum rtx_code
) ROUND_CEIL
, (int) V8SI_FTYPE_V4DF_V4DF_ROUND
},
26659 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256
, (enum rtx_code
) ROUND_FLOOR
, (int) V8SF_FTYPE_V8SF_ROUND
},
26660 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256
, (enum rtx_code
) ROUND_CEIL
, (int) V8SF_FTYPE_V8SF_ROUND
},
26661 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256
, (enum rtx_code
) ROUND_TRUNC
, (int) V8SF_FTYPE_V8SF_ROUND
},
26662 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256
, (enum rtx_code
) ROUND_MXCSR
, (int) V8SF_FTYPE_V8SF_ROUND
},
26664 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps_sfix256
, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256
, (enum rtx_code
) ROUND_FLOOR
, (int) V8SI_FTYPE_V8SF_ROUND
},
26665 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps_sfix256
, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256
, (enum rtx_code
) ROUND_CEIL
, (int) V8SI_FTYPE_V8SF_ROUND
},
26667 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv8sf2
, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
26668 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv8sf2_sfix
, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
26670 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpckhpd256
, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26671 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpcklpd256
, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26672 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpckhps256
, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26673 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpcklps256
, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26675 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_si256_si
, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI
, UNKNOWN
, (int) V8SI_FTYPE_V4SI
},
26676 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ps256_ps
, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS
, UNKNOWN
, (int) V8SF_FTYPE_V4SF
},
26677 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_pd256_pd
, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD
, UNKNOWN
, (int) V4DF_FTYPE_V2DF
},
26678 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v8si
, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256
, UNKNOWN
, (int) V4SI_FTYPE_V8SI
},
26679 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v8sf
, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256
, UNKNOWN
, (int) V4SF_FTYPE_V8SF
},
26680 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v4df
, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256
, UNKNOWN
, (int) V2DF_FTYPE_V4DF
},
26682 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD
, EQ
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
26683 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD
, LTU
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
26684 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD
, GTU
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
26685 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS
, EQ
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
26686 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS
, LTU
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
26687 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS
, GTU
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
26688 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256
, EQ
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
26689 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256
, LTU
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
26690 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256
, GTU
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
26691 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256
, EQ
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
26692 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256
, LTU
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
26693 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256
, GTU
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
26694 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256
, EQ
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
26695 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256
, LTU
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
26696 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256
, GTU
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
26698 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movmskpd256
, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256
, UNKNOWN
, (int) INT_FTYPE_V4DF
},
26699 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movmskps256
, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256
, UNKNOWN
, (int) INT_FTYPE_V8SF
},
26701 { OPTION_MASK_ISA_AVX
, CODE_FOR_copysignv8sf3
, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26702 { OPTION_MASK_ISA_AVX
, CODE_FOR_copysignv4df3
, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26704 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_pack_sfix_v4df
, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256
, UNKNOWN
, (int) V8SI_FTYPE_V4DF_V4DF
},
26707 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_mpsadbw
, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI_INT
},
26708 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv32qi2
, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI
},
26709 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv16hi2
, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI
},
26710 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv8si2
, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI
},
26711 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packssdw
, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256
, UNKNOWN
, (int) V16HI_FTYPE_V8SI_V8SI
},
26712 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packsswb
, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256
, UNKNOWN
, (int) V32QI_FTYPE_V16HI_V16HI
},
26713 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packusdw
, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256
, UNKNOWN
, (int) V16HI_FTYPE_V8SI_V8SI
},
26714 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packuswb
, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256
, UNKNOWN
, (int) V32QI_FTYPE_V16HI_V16HI
},
26715 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv32qi3
, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26716 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv16hi3
, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26717 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv8si3
, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26718 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv4di3
, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26719 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ssaddv32qi3
, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26720 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ssaddv16hi3
, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26721 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_usaddv32qi3
, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26722 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_usaddv16hi3
, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26723 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_palignrv2ti
, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT
},
26724 { OPTION_MASK_ISA_AVX2
, CODE_FOR_andv4di3
, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26725 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_andnotv4di3
, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26726 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_uavgv32qi3
, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26727 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_uavgv16hi3
, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26728 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblendvb
, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI_V32QI
},
26729 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblendw
, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI_INT
},
26730 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv32qi3
, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26731 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv16hi3
, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26732 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv8si3
, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26733 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv4di3
, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26734 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv32qi3
, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26735 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv16hi3
, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26736 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv8si3
, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26737 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv4di3
, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26738 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phaddwv16hi3
, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26739 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phadddv8si3
, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26740 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phaddswv16hi3
, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26741 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubwv16hi3
, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26742 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubdv8si3
, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26743 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubswv16hi3
, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26744 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmaddubsw256
, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V32QI_V32QI
},
26745 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmaddwd
, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256
, UNKNOWN
, (int) V8SI_FTYPE_V16HI_V16HI
},
26746 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv32qi3
, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26747 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv16hi3
, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26748 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv8si3
, "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26749 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv32qi3
, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26750 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv16hi3
, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26751 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv8si3
, "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26752 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv32qi3
, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26753 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv16hi3
, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26754 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv8si3
, "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26755 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv32qi3
, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26756 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv16hi3
, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26757 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv8si3
, "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26758 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmovmskb
, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256
, UNKNOWN
, (int) INT_FTYPE_V32QI
},
26759 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv16qiv16hi2
, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16QI
},
26760 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv8qiv8si2
, "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256
, UNKNOWN
, (int) V8SI_FTYPE_V16QI
},
26761 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4qiv4di2
, "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V16QI
},
26762 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv8hiv8si2
, "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256
, UNKNOWN
, (int) V8SI_FTYPE_V8HI
},
26763 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4hiv4di2
, "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8HI
},
26764 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4siv4di2
, "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4SI
},
26765 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv16qiv16hi2
, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16QI
},
26766 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv8qiv8si2
, "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256
, UNKNOWN
, (int) V8SI_FTYPE_V16QI
},
26767 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4qiv4di2
, "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V16QI
},
26768 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv8hiv8si2
, "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256
, UNKNOWN
, (int) V8SI_FTYPE_V8HI
},
26769 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4hiv4di2
, "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8HI
},
26770 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4siv4di2
, "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4SI
},
26771 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_mulv4siv4di3
, "__builtin_ia32_pmuldq256" , IX86_BUILTIN_PMULDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8SI_V8SI
},
26772 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_umulhrswv16hi3
, "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26773 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umulv16hi3_highpart
, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26774 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smulv16hi3_highpart
, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26775 { OPTION_MASK_ISA_AVX2
, CODE_FOR_mulv16hi3
, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26776 { OPTION_MASK_ISA_AVX2
, CODE_FOR_mulv8si3
, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26777 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_umulv4siv4di3
, "__builtin_ia32_pmuludq256" , IX86_BUILTIN_PMULUDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8SI_V8SI
},
26778 { OPTION_MASK_ISA_AVX2
, CODE_FOR_iorv4di3
, "__builtin_ia32_por256", IX86_BUILTIN_POR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26779 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psadbw
, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256
, UNKNOWN
, (int) V16HI_FTYPE_V32QI_V32QI
},
26780 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufbv32qi3
, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26781 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufdv3
, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_INT
},
26782 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufhwv3
, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_INT
},
26783 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshuflwv3
, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_INT
},
26784 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv32qi3
, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26785 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv16hi3
, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26786 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv8si3
, "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26787 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlv2ti3
, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_CONVERT
},
26788 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv16hi3
, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
26789 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv16hi3
, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
26790 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv8si3
, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
26791 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv8si3
, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
26792 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv4di3
, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_COUNT
},
26793 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv4di3
, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_COUNT
},
26794 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv16hi3
, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
26795 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv16hi3
, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
26796 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv8si3
, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
26797 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv8si3
, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
26798 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrv2ti3
, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_CONVERT
},
26799 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv16hi3
, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
26800 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv16hi3
, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
26801 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv8si3
, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
26802 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv8si3
, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
26803 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv4di3
, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_COUNT
},
26804 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv4di3
, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_COUNT
},
26805 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv32qi3
, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26806 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv16hi3
, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26807 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv8si3
, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26808 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv4di3
, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26809 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sssubv32qi3
, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26810 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sssubv16hi3
, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26811 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ussubv32qi3
, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26812 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ussubv16hi3
, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26813 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv32qi
, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26814 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv16hi
, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26815 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv8si
, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26816 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv4di
, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26817 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv32qi
, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26818 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv16hi
, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26819 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv8si
, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26820 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv4di
, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26821 { OPTION_MASK_ISA_AVX2
, CODE_FOR_xorv4di3
, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26822 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv4sf
, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26823 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv8sf
, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256
, UNKNOWN
, (int) V8SF_FTYPE_V4SF
},
26824 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv4df
, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256
, UNKNOWN
, (int) V4DF_FTYPE_V2DF
},
26825 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vbroadcasti128_v4di
, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256
, UNKNOWN
, (int) V4DI_FTYPE_V2DI
},
26826 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblenddv4si
, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_INT
},
26827 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblenddv8si
, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI_INT
},
26828 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv32qi
, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256
, UNKNOWN
, (int) V32QI_FTYPE_V16QI
},
26829 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv16hi
, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256
, UNKNOWN
, (int) V16HI_FTYPE_V8HI
},
26830 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv8si
, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256
, UNKNOWN
, (int) V8SI_FTYPE_V4SI
},
26831 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv4di
, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256
, UNKNOWN
, (int) V4DI_FTYPE_V2DI
},
26832 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv16qi
, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI
},
26833 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv8hi
, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
26834 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv4si
, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI
},
26835 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv2di
, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
26836 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permvarv8si
, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26837 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv4df
, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
26838 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permvarv8sf
, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26839 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv4di
, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT
},
26840 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv2ti
, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI_INT
},
26841 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_extracti128
, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256
, UNKNOWN
, (int) V2DI_FTYPE_V4DI_INT
},
26842 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_inserti128
, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_INT
},
26843 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv4di
, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26844 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv2di
, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26845 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv8si
, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26846 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv4si
, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26847 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashrvv8si
, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26848 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashrvv4si
, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26849 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv4di
, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26850 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv2di
, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26851 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv8si
, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26852 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv4si
, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26854 { OPTION_MASK_ISA_LZCNT
, CODE_FOR_clzhi2_lzcnt
, "__builtin_clzs", IX86_BUILTIN_CLZS
, UNKNOWN
, (int) UINT16_FTYPE_UINT16
},
26857 { OPTION_MASK_ISA_BMI
, CODE_FOR_bmi_bextr_si
, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
26858 { OPTION_MASK_ISA_BMI
, CODE_FOR_bmi_bextr_di
, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
26859 { OPTION_MASK_ISA_BMI
, CODE_FOR_ctzhi2
, "__builtin_ctzs", IX86_BUILTIN_CTZS
, UNKNOWN
, (int) UINT16_FTYPE_UINT16
},
26862 { OPTION_MASK_ISA_TBM
, CODE_FOR_tbm_bextri_si
, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
26863 { OPTION_MASK_ISA_TBM
, CODE_FOR_tbm_bextri_di
, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
26866 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtph2ps
, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS
, UNKNOWN
, (int) V4SF_FTYPE_V8HI
},
26867 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtph2ps256
, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8HI
},
26868 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtps2ph
, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH
, UNKNOWN
, (int) V8HI_FTYPE_V4SF_INT
},
26869 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtps2ph256
, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256
, UNKNOWN
, (int) V8HI_FTYPE_V8SF_INT
},
26872 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_bzhi_si3
, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
26873 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_bzhi_di3
, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
26874 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pdep_si3
, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
26875 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pdep_di3
, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
26876 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pext_si3
, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
26877 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pext_di3
, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
26880 /* FMA4 and XOP. */
26881 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
26882 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
26883 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
26884 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
26885 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
26886 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
26887 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
26888 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
26889 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
26890 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
26891 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
26892 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
26893 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
26894 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
26895 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
26896 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
26897 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
26898 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
26899 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
26900 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
26901 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
26902 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
26903 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
26904 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
26905 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
26906 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
26907 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
26908 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
26909 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
26910 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
26911 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
26912 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
26913 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
26914 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
26915 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
26916 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
26917 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
26918 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
26919 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
26920 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
26921 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
26922 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
26923 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
26924 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
26925 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
26926 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
26927 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
26928 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
26929 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
26930 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
26931 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
26932 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
26934 static const struct builtin_description bdesc_multi_arg
[] =
26936 { OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_vmfmadd_v4sf
,
26937 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS
,
26938 UNKNOWN
, (int)MULTI_ARG_3_SF
},
26939 { OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_vmfmadd_v2df
,
26940 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD
,
26941 UNKNOWN
, (int)MULTI_ARG_3_DF
},
26943 { OPTION_MASK_ISA_FMA
, CODE_FOR_fmai_vmfmadd_v4sf
,
26944 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3
,
26945 UNKNOWN
, (int)MULTI_ARG_3_SF
},
26946 { OPTION_MASK_ISA_FMA
, CODE_FOR_fmai_vmfmadd_v2df
,
26947 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3
,
26948 UNKNOWN
, (int)MULTI_ARG_3_DF
},
26950 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v4sf
,
26951 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS
,
26952 UNKNOWN
, (int)MULTI_ARG_3_SF
},
26953 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v2df
,
26954 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD
,
26955 UNKNOWN
, (int)MULTI_ARG_3_DF
},
26956 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v8sf
,
26957 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256
,
26958 UNKNOWN
, (int)MULTI_ARG_3_SF2
},
26959 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v4df
,
26960 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256
,
26961 UNKNOWN
, (int)MULTI_ARG_3_DF2
},
26963 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v4sf
,
26964 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS
,
26965 UNKNOWN
, (int)MULTI_ARG_3_SF
},
26966 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v2df
,
26967 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD
,
26968 UNKNOWN
, (int)MULTI_ARG_3_DF
},
26969 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v8sf
,
26970 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256
,
26971 UNKNOWN
, (int)MULTI_ARG_3_SF2
},
26972 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v4df
,
26973 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256
,
26974 UNKNOWN
, (int)MULTI_ARG_3_DF2
},
26976 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2di
, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV
, UNKNOWN
, (int)MULTI_ARG_3_DI
},
26977 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2di
, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI
, UNKNOWN
, (int)MULTI_ARG_3_DI
},
26978 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4si
, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
26979 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8hi
, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
26980 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v16qi
, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI
,UNKNOWN
, (int)MULTI_ARG_3_QI
},
26981 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2df
, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF
, UNKNOWN
, (int)MULTI_ARG_3_DF
},
26982 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4sf
, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF
, UNKNOWN
, (int)MULTI_ARG_3_SF
},
26984 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4di256
, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256
, UNKNOWN
, (int)MULTI_ARG_3_DI2
},
26985 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4di256
, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256
, UNKNOWN
, (int)MULTI_ARG_3_DI2
},
26986 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8si256
, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256
, UNKNOWN
, (int)MULTI_ARG_3_SI2
},
26987 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v16hi256
, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256
, UNKNOWN
, (int)MULTI_ARG_3_HI2
},
26988 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v32qi256
, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256
, UNKNOWN
, (int)MULTI_ARG_3_QI2
},
26989 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4df256
, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256
, UNKNOWN
, (int)MULTI_ARG_3_DF2
},
26990 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8sf256
, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256
, UNKNOWN
, (int)MULTI_ARG_3_SF2
},
26992 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pperm
, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM
, UNKNOWN
, (int)MULTI_ARG_3_QI
},
26994 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssww
, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
26995 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsww
, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
26996 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsswd
, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
26997 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacswd
, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
26998 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdd
, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
26999 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdd
, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
27000 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdql
, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
27001 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdqh
, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
27002 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdql
, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
27003 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdqh
, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
27004 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmadcsswd
, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
27005 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmadcswd
, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
27007 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv2di3
, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
27008 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv4si3
, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
27009 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv8hi3
, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
27010 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv16qi3
, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
27011 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv2di3
, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM
, UNKNOWN
, (int)MULTI_ARG_2_DI_IMM
},
27012 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv4si3
, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM
, UNKNOWN
, (int)MULTI_ARG_2_SI_IMM
},
27013 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv8hi3
, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM
, UNKNOWN
, (int)MULTI_ARG_2_HI_IMM
},
27014 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv16qi3
, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM
, UNKNOWN
, (int)MULTI_ARG_2_QI_IMM
},
27015 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav2di3
, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
27016 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav4si3
, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
27017 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav8hi3
, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
27018 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav16qi3
, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
27019 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv2di3
, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
27020 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv4si3
, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
27021 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv8hi3
, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
27022 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv16qi3
, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
27024 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vmfrczv4sf2
, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS
, UNKNOWN
, (int)MULTI_ARG_2_SF
},
27025 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vmfrczv2df2
, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD
, UNKNOWN
, (int)MULTI_ARG_2_DF
},
27026 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv4sf2
, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS
, UNKNOWN
, (int)MULTI_ARG_1_SF
},
27027 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv2df2
, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD
, UNKNOWN
, (int)MULTI_ARG_1_DF
},
27028 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv8sf2
, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256
, UNKNOWN
, (int)MULTI_ARG_1_SF2
},
27029 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv4df2
, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256
, UNKNOWN
, (int)MULTI_ARG_1_DF2
},
27031 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbw
, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
27032 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbd
, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD
, UNKNOWN
, (int)MULTI_ARG_1_QI_SI
},
27033 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbq
, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ
, UNKNOWN
, (int)MULTI_ARG_1_QI_DI
},
27034 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddwd
, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
27035 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddwq
, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ
, UNKNOWN
, (int)MULTI_ARG_1_HI_DI
},
27036 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadddq
, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
27037 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubw
, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
27038 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubd
, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD
, UNKNOWN
, (int)MULTI_ARG_1_QI_SI
},
27039 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubq
, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ
, UNKNOWN
, (int)MULTI_ARG_1_QI_DI
},
27040 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadduwd
, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
27041 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadduwq
, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ
, UNKNOWN
, (int)MULTI_ARG_1_HI_DI
},
27042 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddudq
, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
27043 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubbw
, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
27044 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubwd
, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
27045 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubdq
, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
27047 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB
, EQ
, (int)MULTI_ARG_2_QI_CMP
},
27048 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
27049 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
27050 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB
, LT
, (int)MULTI_ARG_2_QI_CMP
},
27051 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB
, LE
, (int)MULTI_ARG_2_QI_CMP
},
27052 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB
, GT
, (int)MULTI_ARG_2_QI_CMP
},
27053 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB
, GE
, (int)MULTI_ARG_2_QI_CMP
},
27055 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW
, EQ
, (int)MULTI_ARG_2_HI_CMP
},
27056 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
27057 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
27058 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW
, LT
, (int)MULTI_ARG_2_HI_CMP
},
27059 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW
, LE
, (int)MULTI_ARG_2_HI_CMP
},
27060 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW
, GT
, (int)MULTI_ARG_2_HI_CMP
},
27061 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW
, GE
, (int)MULTI_ARG_2_HI_CMP
},
27063 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD
, EQ
, (int)MULTI_ARG_2_SI_CMP
},
27064 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED
, NE
, (int)MULTI_ARG_2_SI_CMP
},
27065 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED
, NE
, (int)MULTI_ARG_2_SI_CMP
},
27066 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD
, LT
, (int)MULTI_ARG_2_SI_CMP
},
27067 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED
, LE
, (int)MULTI_ARG_2_SI_CMP
},
27068 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD
, GT
, (int)MULTI_ARG_2_SI_CMP
},
27069 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED
, GE
, (int)MULTI_ARG_2_SI_CMP
},
27071 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ
, EQ
, (int)MULTI_ARG_2_DI_CMP
},
27072 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
27073 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
27074 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ
, LT
, (int)MULTI_ARG_2_DI_CMP
},
27075 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ
, LE
, (int)MULTI_ARG_2_DI_CMP
},
27076 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ
, GT
, (int)MULTI_ARG_2_DI_CMP
},
27077 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ
, GE
, (int)MULTI_ARG_2_DI_CMP
},
27079 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB
, EQ
, (int)MULTI_ARG_2_QI_CMP
},
27080 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
27081 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
27082 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB
, LTU
, (int)MULTI_ARG_2_QI_CMP
},
27083 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB
, LEU
, (int)MULTI_ARG_2_QI_CMP
},
27084 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB
, GTU
, (int)MULTI_ARG_2_QI_CMP
},
27085 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB
, GEU
, (int)MULTI_ARG_2_QI_CMP
},
27087 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW
, EQ
, (int)MULTI_ARG_2_HI_CMP
},
27088 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
27089 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
27090 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW
, LTU
, (int)MULTI_ARG_2_HI_CMP
},
27091 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW
, LEU
, (int)MULTI_ARG_2_HI_CMP
},
27092 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW
, GTU
, (int)MULTI_ARG_2_HI_CMP
},
27093 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW
, GEU
, (int)MULTI_ARG_2_HI_CMP
},
27095 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD
, EQ
, (int)MULTI_ARG_2_SI_CMP
},
27096 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD
, NE
, (int)MULTI_ARG_2_SI_CMP
},
27097 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD
, NE
, (int)MULTI_ARG_2_SI_CMP
},
27098 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD
, LTU
, (int)MULTI_ARG_2_SI_CMP
},
27099 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD
, LEU
, (int)MULTI_ARG_2_SI_CMP
},
27100 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD
, GTU
, (int)MULTI_ARG_2_SI_CMP
},
27101 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD
, GEU
, (int)MULTI_ARG_2_SI_CMP
},
27103 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ
, EQ
, (int)MULTI_ARG_2_DI_CMP
},
27104 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
27105 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
27106 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ
, LTU
, (int)MULTI_ARG_2_DI_CMP
},
27107 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ
, LEU
, (int)MULTI_ARG_2_DI_CMP
},
27108 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ
, GTU
, (int)MULTI_ARG_2_DI_CMP
},
27109 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ
, GEU
, (int)MULTI_ARG_2_DI_CMP
},
27111 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_QI_TF
},
27112 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_HI_TF
},
27113 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_SI_TF
},
27114 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_DI_TF
},
27115 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_QI_TF
},
27116 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_HI_TF
},
27117 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_SI_TF
},
27118 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_DI_TF
},
27120 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_QI_TF
},
27121 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_HI_TF
},
27122 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_SI_TF
},
27123 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_DI_TF
},
27124 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_QI_TF
},
27125 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_HI_TF
},
27126 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_SI_TF
},
27127 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_DI_TF
},
27129 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v2df3
, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD
, UNKNOWN
, (int)MULTI_ARG_4_DF2_DI_I
},
27130 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v4sf3
, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS
, UNKNOWN
, (int)MULTI_ARG_4_SF2_SI_I
},
27131 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v4df3
, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256
, UNKNOWN
, (int)MULTI_ARG_4_DF2_DI_I1
},
27132 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v8sf3
, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256
, UNKNOWN
, (int)MULTI_ARG_4_SF2_SI_I1
},
27136 /* TM vector builtins. */
27138 /* Reuse the existing x86-specific `struct builtin_description' cause
27139 we're lazy. Add casts to make them fit. */
27140 static const struct builtin_description bdesc_tm
[] =
27142 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
27143 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WaRM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
27144 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WaWM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
27145 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
27146 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RaRM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
27147 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RaWM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
27148 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RfWM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
27150 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
27151 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WaRM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
27152 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WaWM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
27153 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
27154 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RaRM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
27155 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RaWM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
27156 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RfWM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
27158 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
27159 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WaRM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
27160 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WaWM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
27161 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
27162 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RaRM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
27163 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RaWM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
27164 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RfWM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
27166 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_LM64", (enum ix86_builtins
) BUILT_IN_TM_LOG_M64
, UNKNOWN
, VOID_FTYPE_PCVOID
},
27167 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_LM128", (enum ix86_builtins
) BUILT_IN_TM_LOG_M128
, UNKNOWN
, VOID_FTYPE_PCVOID
},
27168 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_LM256", (enum ix86_builtins
) BUILT_IN_TM_LOG_M256
, UNKNOWN
, VOID_FTYPE_PCVOID
},
27171 /* TM callbacks. */
27173 /* Return the builtin decl needed to load a vector of TYPE. */
27176 ix86_builtin_tm_load (tree type
)
27178 if (TREE_CODE (type
) == VECTOR_TYPE
)
27180 switch (tree_low_cst (TYPE_SIZE (type
), 1))
27183 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64
);
27185 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128
);
27187 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256
);
27193 /* Return the builtin decl needed to store a vector of TYPE. */
27196 ix86_builtin_tm_store (tree type
)
27198 if (TREE_CODE (type
) == VECTOR_TYPE
)
27200 switch (tree_low_cst (TYPE_SIZE (type
), 1))
27203 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64
);
27205 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128
);
27207 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256
);
27213 /* Initialize the transactional memory vector load/store builtins. */
27216 ix86_init_tm_builtins (void)
27218 enum ix86_builtin_func_type ftype
;
27219 const struct builtin_description
*d
;
27222 tree attrs_load
, attrs_type_load
, attrs_store
, attrs_type_store
;
27223 tree attrs_log
, attrs_type_log
;
27228 /* If there are no builtins defined, we must be compiling in a
27229 language without trans-mem support. */
27230 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1
))
27233 /* Use whatever attributes a normal TM load has. */
27234 decl
= builtin_decl_explicit (BUILT_IN_TM_LOAD_1
);
27235 attrs_load
= DECL_ATTRIBUTES (decl
);
27236 attrs_type_load
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
27237 /* Use whatever attributes a normal TM store has. */
27238 decl
= builtin_decl_explicit (BUILT_IN_TM_STORE_1
);
27239 attrs_store
= DECL_ATTRIBUTES (decl
);
27240 attrs_type_store
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
27241 /* Use whatever attributes a normal TM log has. */
27242 decl
= builtin_decl_explicit (BUILT_IN_TM_LOG
);
27243 attrs_log
= DECL_ATTRIBUTES (decl
);
27244 attrs_type_log
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
27246 for (i
= 0, d
= bdesc_tm
;
27247 i
< ARRAY_SIZE (bdesc_tm
);
27250 if ((d
->mask
& ix86_isa_flags
) != 0
27251 || (lang_hooks
.builtin_function
27252 == lang_hooks
.builtin_function_ext_scope
))
27254 tree type
, attrs
, attrs_type
;
27255 enum built_in_function code
= (enum built_in_function
) d
->code
;
27257 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
27258 type
= ix86_get_builtin_func_type (ftype
);
27260 if (BUILTIN_TM_LOAD_P (code
))
27262 attrs
= attrs_load
;
27263 attrs_type
= attrs_type_load
;
27265 else if (BUILTIN_TM_STORE_P (code
))
27267 attrs
= attrs_store
;
27268 attrs_type
= attrs_type_store
;
27273 attrs_type
= attrs_type_log
;
27275 decl
= add_builtin_function (d
->name
, type
, code
, BUILT_IN_NORMAL
,
27276 /* The builtin without the prefix for
27277 calling it directly. */
27278 d
->name
+ strlen ("__builtin_"),
27280 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
27281 set the TYPE_ATTRIBUTES. */
27282 decl_attributes (&TREE_TYPE (decl
), attrs_type
, ATTR_FLAG_BUILT_IN
);
27284 set_builtin_decl (code
, decl
, false);
27289 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
27290 in the current target ISA to allow the user to compile particular modules
27291 with different target specific options that differ from the command line
27294 ix86_init_mmx_sse_builtins (void)
27296 const struct builtin_description
* d
;
27297 enum ix86_builtin_func_type ftype
;
27300 /* Add all special builtins with variable number of operands. */
27301 for (i
= 0, d
= bdesc_special_args
;
27302 i
< ARRAY_SIZE (bdesc_special_args
);
27308 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
27309 def_builtin (d
->mask
, d
->name
, ftype
, d
->code
);
27312 /* Add all builtins with variable number of operands. */
27313 for (i
= 0, d
= bdesc_args
;
27314 i
< ARRAY_SIZE (bdesc_args
);
27320 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
27321 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
27324 /* pcmpestr[im] insns. */
27325 for (i
= 0, d
= bdesc_pcmpestr
;
27326 i
< ARRAY_SIZE (bdesc_pcmpestr
);
27329 if (d
->code
== IX86_BUILTIN_PCMPESTRM128
)
27330 ftype
= V16QI_FTYPE_V16QI_INT_V16QI_INT_INT
;
27332 ftype
= INT_FTYPE_V16QI_INT_V16QI_INT_INT
;
27333 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
27336 /* pcmpistr[im] insns. */
27337 for (i
= 0, d
= bdesc_pcmpistr
;
27338 i
< ARRAY_SIZE (bdesc_pcmpistr
);
27341 if (d
->code
== IX86_BUILTIN_PCMPISTRM128
)
27342 ftype
= V16QI_FTYPE_V16QI_V16QI_INT
;
27344 ftype
= INT_FTYPE_V16QI_V16QI_INT
;
27345 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
27348 /* comi/ucomi insns. */
27349 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
27351 if (d
->mask
== OPTION_MASK_ISA_SSE2
)
27352 ftype
= INT_FTYPE_V2DF_V2DF
;
27354 ftype
= INT_FTYPE_V4SF_V4SF
;
27355 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
27359 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_ldmxcsr",
27360 VOID_FTYPE_UNSIGNED
, IX86_BUILTIN_LDMXCSR
);
27361 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_stmxcsr",
27362 UNSIGNED_FTYPE_VOID
, IX86_BUILTIN_STMXCSR
);
27364 /* SSE or 3DNow!A */
27365 def_builtin (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
27366 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR
,
27367 IX86_BUILTIN_MASKMOVQ
);
27370 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_maskmovdqu",
27371 VOID_FTYPE_V16QI_V16QI_PCHAR
, IX86_BUILTIN_MASKMOVDQU
);
27373 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_clflush",
27374 VOID_FTYPE_PCVOID
, IX86_BUILTIN_CLFLUSH
);
27375 x86_mfence
= def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_mfence",
27376 VOID_FTYPE_VOID
, IX86_BUILTIN_MFENCE
);
27379 def_builtin (OPTION_MASK_ISA_SSE3
, "__builtin_ia32_monitor",
27380 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED
, IX86_BUILTIN_MONITOR
);
27381 def_builtin (OPTION_MASK_ISA_SSE3
, "__builtin_ia32_mwait",
27382 VOID_FTYPE_UNSIGNED_UNSIGNED
, IX86_BUILTIN_MWAIT
);
27385 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesenc128",
27386 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESENC128
);
27387 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesenclast128",
27388 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESENCLAST128
);
27389 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesdec128",
27390 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESDEC128
);
27391 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesdeclast128",
27392 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESDECLAST128
);
27393 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesimc128",
27394 V2DI_FTYPE_V2DI
, IX86_BUILTIN_AESIMC128
);
27395 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aeskeygenassist128",
27396 V2DI_FTYPE_V2DI_INT
, IX86_BUILTIN_AESKEYGENASSIST128
);
27399 def_builtin_const (OPTION_MASK_ISA_PCLMUL
, "__builtin_ia32_pclmulqdq128",
27400 V2DI_FTYPE_V2DI_V2DI_INT
, IX86_BUILTIN_PCLMULQDQ128
);
27403 def_builtin (OPTION_MASK_ISA_RDRND
, "__builtin_ia32_rdrand16_step",
27404 INT_FTYPE_PUSHORT
, IX86_BUILTIN_RDRAND16_STEP
);
27405 def_builtin (OPTION_MASK_ISA_RDRND
, "__builtin_ia32_rdrand32_step",
27406 INT_FTYPE_PUNSIGNED
, IX86_BUILTIN_RDRAND32_STEP
);
27407 def_builtin (OPTION_MASK_ISA_RDRND
| OPTION_MASK_ISA_64BIT
,
27408 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG
,
27409 IX86_BUILTIN_RDRAND64_STEP
);
27412 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv2df",
27413 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT
,
27414 IX86_BUILTIN_GATHERSIV2DF
);
27416 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4df",
27417 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT
,
27418 IX86_BUILTIN_GATHERSIV4DF
);
27420 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv2df",
27421 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT
,
27422 IX86_BUILTIN_GATHERDIV2DF
);
27424 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4df",
27425 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT
,
27426 IX86_BUILTIN_GATHERDIV4DF
);
27428 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4sf",
27429 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT
,
27430 IX86_BUILTIN_GATHERSIV4SF
);
27432 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv8sf",
27433 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT
,
27434 IX86_BUILTIN_GATHERSIV8SF
);
27436 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4sf",
27437 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT
,
27438 IX86_BUILTIN_GATHERDIV4SF
);
27440 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4sf256",
27441 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT
,
27442 IX86_BUILTIN_GATHERDIV8SF
);
27444 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv2di",
27445 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT
,
27446 IX86_BUILTIN_GATHERSIV2DI
);
27448 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4di",
27449 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT
,
27450 IX86_BUILTIN_GATHERSIV4DI
);
27452 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv2di",
27453 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT
,
27454 IX86_BUILTIN_GATHERDIV2DI
);
27456 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4di",
27457 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT
,
27458 IX86_BUILTIN_GATHERDIV4DI
);
27460 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4si",
27461 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT
,
27462 IX86_BUILTIN_GATHERSIV4SI
);
27464 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv8si",
27465 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT
,
27466 IX86_BUILTIN_GATHERSIV8SI
);
27468 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4si",
27469 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT
,
27470 IX86_BUILTIN_GATHERDIV4SI
);
27472 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4si256",
27473 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT
,
27474 IX86_BUILTIN_GATHERDIV8SI
);
27476 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltsiv4df ",
27477 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT
,
27478 IX86_BUILTIN_GATHERALTSIV4DF
);
27480 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltdiv4sf256 ",
27481 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT
,
27482 IX86_BUILTIN_GATHERALTDIV8SF
);
27484 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltsiv4di ",
27485 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT
,
27486 IX86_BUILTIN_GATHERALTSIV4DI
);
27488 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltdiv4si256 ",
27489 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT
,
27490 IX86_BUILTIN_GATHERALTDIV8SI
);
27493 def_builtin (OPTION_MASK_ISA_RTM
, "__builtin_ia32_xabort",
27494 VOID_FTYPE_UNSIGNED
, IX86_BUILTIN_XABORT
);
27496 /* MMX access to the vec_init patterns. */
27497 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v2si",
27498 V2SI_FTYPE_INT_INT
, IX86_BUILTIN_VEC_INIT_V2SI
);
27500 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v4hi",
27501 V4HI_FTYPE_HI_HI_HI_HI
,
27502 IX86_BUILTIN_VEC_INIT_V4HI
);
27504 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v8qi",
27505 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI
,
27506 IX86_BUILTIN_VEC_INIT_V8QI
);
27508 /* Access to the vec_extract patterns. */
27509 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v2df",
27510 DOUBLE_FTYPE_V2DF_INT
, IX86_BUILTIN_VEC_EXT_V2DF
);
27511 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v2di",
27512 DI_FTYPE_V2DI_INT
, IX86_BUILTIN_VEC_EXT_V2DI
);
27513 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_vec_ext_v4sf",
27514 FLOAT_FTYPE_V4SF_INT
, IX86_BUILTIN_VEC_EXT_V4SF
);
27515 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v4si",
27516 SI_FTYPE_V4SI_INT
, IX86_BUILTIN_VEC_EXT_V4SI
);
27517 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v8hi",
27518 HI_FTYPE_V8HI_INT
, IX86_BUILTIN_VEC_EXT_V8HI
);
27520 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
27521 "__builtin_ia32_vec_ext_v4hi",
27522 HI_FTYPE_V4HI_INT
, IX86_BUILTIN_VEC_EXT_V4HI
);
27524 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_ext_v2si",
27525 SI_FTYPE_V2SI_INT
, IX86_BUILTIN_VEC_EXT_V2SI
);
27527 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v16qi",
27528 QI_FTYPE_V16QI_INT
, IX86_BUILTIN_VEC_EXT_V16QI
);
27530 /* Access to the vec_set patterns. */
27531 def_builtin_const (OPTION_MASK_ISA_SSE4_1
| OPTION_MASK_ISA_64BIT
,
27532 "__builtin_ia32_vec_set_v2di",
27533 V2DI_FTYPE_V2DI_DI_INT
, IX86_BUILTIN_VEC_SET_V2DI
);
27535 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v4sf",
27536 V4SF_FTYPE_V4SF_FLOAT_INT
, IX86_BUILTIN_VEC_SET_V4SF
);
27538 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v4si",
27539 V4SI_FTYPE_V4SI_SI_INT
, IX86_BUILTIN_VEC_SET_V4SI
);
27541 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_set_v8hi",
27542 V8HI_FTYPE_V8HI_HI_INT
, IX86_BUILTIN_VEC_SET_V8HI
);
27544 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
27545 "__builtin_ia32_vec_set_v4hi",
27546 V4HI_FTYPE_V4HI_HI_INT
, IX86_BUILTIN_VEC_SET_V4HI
);
27548 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v16qi",
27549 V16QI_FTYPE_V16QI_QI_INT
, IX86_BUILTIN_VEC_SET_V16QI
);
27551 /* Add FMA4 multi-arg argument instructions */
27552 for (i
= 0, d
= bdesc_multi_arg
; i
< ARRAY_SIZE (bdesc_multi_arg
); i
++, d
++)
27557 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
27558 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
27562 /* Internal method for ix86_init_builtins. */
27565 ix86_init_builtins_va_builtins_abi (void)
27567 tree ms_va_ref
, sysv_va_ref
;
27568 tree fnvoid_va_end_ms
, fnvoid_va_end_sysv
;
27569 tree fnvoid_va_start_ms
, fnvoid_va_start_sysv
;
27570 tree fnvoid_va_copy_ms
, fnvoid_va_copy_sysv
;
27571 tree fnattr_ms
= NULL_TREE
, fnattr_sysv
= NULL_TREE
;
27575 fnattr_ms
= build_tree_list (get_identifier ("ms_abi"), NULL_TREE
);
27576 fnattr_sysv
= build_tree_list (get_identifier ("sysv_abi"), NULL_TREE
);
27577 ms_va_ref
= build_reference_type (ms_va_list_type_node
);
27579 build_pointer_type (TREE_TYPE (sysv_va_list_type_node
));
27582 build_function_type_list (void_type_node
, ms_va_ref
, NULL_TREE
);
27583 fnvoid_va_start_ms
=
27584 build_varargs_function_type_list (void_type_node
, ms_va_ref
, NULL_TREE
);
27585 fnvoid_va_end_sysv
=
27586 build_function_type_list (void_type_node
, sysv_va_ref
, NULL_TREE
);
27587 fnvoid_va_start_sysv
=
27588 build_varargs_function_type_list (void_type_node
, sysv_va_ref
,
27590 fnvoid_va_copy_ms
=
27591 build_function_type_list (void_type_node
, ms_va_ref
, ms_va_list_type_node
,
27593 fnvoid_va_copy_sysv
=
27594 build_function_type_list (void_type_node
, sysv_va_ref
,
27595 sysv_va_ref
, NULL_TREE
);
27597 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms
,
27598 BUILT_IN_VA_START
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
27599 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms
,
27600 BUILT_IN_VA_END
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
27601 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms
,
27602 BUILT_IN_VA_COPY
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
27603 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv
,
27604 BUILT_IN_VA_START
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
27605 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv
,
27606 BUILT_IN_VA_END
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
27607 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv
,
27608 BUILT_IN_VA_COPY
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
27612 ix86_init_builtin_types (void)
27614 tree float128_type_node
, float80_type_node
;
27616 /* The __float80 type. */
27617 float80_type_node
= long_double_type_node
;
27618 if (TYPE_MODE (float80_type_node
) != XFmode
)
27620 /* The __float80 type. */
27621 float80_type_node
= make_node (REAL_TYPE
);
27623 TYPE_PRECISION (float80_type_node
) = 80;
27624 layout_type (float80_type_node
);
27626 lang_hooks
.types
.register_builtin_type (float80_type_node
, "__float80");
27628 /* The __float128 type. */
27629 float128_type_node
= make_node (REAL_TYPE
);
27630 TYPE_PRECISION (float128_type_node
) = 128;
27631 layout_type (float128_type_node
);
27632 lang_hooks
.types
.register_builtin_type (float128_type_node
, "__float128");
27634 /* This macro is built by i386-builtin-types.awk. */
27635 DEFINE_BUILTIN_PRIMITIVE_TYPES
;
27639 ix86_init_builtins (void)
27643 ix86_init_builtin_types ();
27645 /* TFmode support builtins. */
27646 def_builtin_const (0, "__builtin_infq",
27647 FLOAT128_FTYPE_VOID
, IX86_BUILTIN_INFQ
);
27648 def_builtin_const (0, "__builtin_huge_valq",
27649 FLOAT128_FTYPE_VOID
, IX86_BUILTIN_HUGE_VALQ
);
27651 /* We will expand them to normal call if SSE2 isn't available since
27652 they are used by libgcc. */
27653 t
= ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128
);
27654 t
= add_builtin_function ("__builtin_fabsq", t
, IX86_BUILTIN_FABSQ
,
27655 BUILT_IN_MD
, "__fabstf2", NULL_TREE
);
27656 TREE_READONLY (t
) = 1;
27657 ix86_builtins
[(int) IX86_BUILTIN_FABSQ
] = t
;
27659 t
= ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128
);
27660 t
= add_builtin_function ("__builtin_copysignq", t
, IX86_BUILTIN_COPYSIGNQ
,
27661 BUILT_IN_MD
, "__copysigntf3", NULL_TREE
);
27662 TREE_READONLY (t
) = 1;
27663 ix86_builtins
[(int) IX86_BUILTIN_COPYSIGNQ
] = t
;
27665 ix86_init_tm_builtins ();
27666 ix86_init_mmx_sse_builtins ();
27669 ix86_init_builtins_va_builtins_abi ();
27671 #ifdef SUBTARGET_INIT_BUILTINS
27672 SUBTARGET_INIT_BUILTINS
;
27676 /* Return the ix86 builtin for CODE. */
27679 ix86_builtin_decl (unsigned code
, bool initialize_p ATTRIBUTE_UNUSED
)
27681 if (code
>= IX86_BUILTIN_MAX
)
27682 return error_mark_node
;
27684 return ix86_builtins
[code
];
27687 /* Errors in the source file can cause expand_expr to return const0_rtx
27688 where we expect a vector. To avoid crashing, use one of the vector
27689 clear instructions. */
27691 safe_vector_operand (rtx x
, enum machine_mode mode
)
27693 if (x
== const0_rtx
)
27694 x
= CONST0_RTX (mode
);
27698 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
27701 ix86_expand_binop_builtin (enum insn_code icode
, tree exp
, rtx target
)
27704 tree arg0
= CALL_EXPR_ARG (exp
, 0);
27705 tree arg1
= CALL_EXPR_ARG (exp
, 1);
27706 rtx op0
= expand_normal (arg0
);
27707 rtx op1
= expand_normal (arg1
);
27708 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
27709 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
27710 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
27712 if (VECTOR_MODE_P (mode0
))
27713 op0
= safe_vector_operand (op0
, mode0
);
27714 if (VECTOR_MODE_P (mode1
))
27715 op1
= safe_vector_operand (op1
, mode1
);
27717 if (optimize
|| !target
27718 || GET_MODE (target
) != tmode
27719 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
27720 target
= gen_reg_rtx (tmode
);
27722 if (GET_MODE (op1
) == SImode
&& mode1
== TImode
)
27724 rtx x
= gen_reg_rtx (V4SImode
);
27725 emit_insn (gen_sse2_loadd (x
, op1
));
27726 op1
= gen_lowpart (TImode
, x
);
27729 if (!insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
27730 op0
= copy_to_mode_reg (mode0
, op0
);
27731 if (!insn_data
[icode
].operand
[2].predicate (op1
, mode1
))
27732 op1
= copy_to_mode_reg (mode1
, op1
);
27734 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
27743 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
27746 ix86_expand_multi_arg_builtin (enum insn_code icode
, tree exp
, rtx target
,
27747 enum ix86_builtin_func_type m_type
,
27748 enum rtx_code sub_code
)
27753 bool comparison_p
= false;
27755 bool last_arg_constant
= false;
27756 int num_memory
= 0;
27759 enum machine_mode mode
;
27762 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
27766 case MULTI_ARG_4_DF2_DI_I
:
27767 case MULTI_ARG_4_DF2_DI_I1
:
27768 case MULTI_ARG_4_SF2_SI_I
:
27769 case MULTI_ARG_4_SF2_SI_I1
:
27771 last_arg_constant
= true;
27774 case MULTI_ARG_3_SF
:
27775 case MULTI_ARG_3_DF
:
27776 case MULTI_ARG_3_SF2
:
27777 case MULTI_ARG_3_DF2
:
27778 case MULTI_ARG_3_DI
:
27779 case MULTI_ARG_3_SI
:
27780 case MULTI_ARG_3_SI_DI
:
27781 case MULTI_ARG_3_HI
:
27782 case MULTI_ARG_3_HI_SI
:
27783 case MULTI_ARG_3_QI
:
27784 case MULTI_ARG_3_DI2
:
27785 case MULTI_ARG_3_SI2
:
27786 case MULTI_ARG_3_HI2
:
27787 case MULTI_ARG_3_QI2
:
27791 case MULTI_ARG_2_SF
:
27792 case MULTI_ARG_2_DF
:
27793 case MULTI_ARG_2_DI
:
27794 case MULTI_ARG_2_SI
:
27795 case MULTI_ARG_2_HI
:
27796 case MULTI_ARG_2_QI
:
27800 case MULTI_ARG_2_DI_IMM
:
27801 case MULTI_ARG_2_SI_IMM
:
27802 case MULTI_ARG_2_HI_IMM
:
27803 case MULTI_ARG_2_QI_IMM
:
27805 last_arg_constant
= true;
27808 case MULTI_ARG_1_SF
:
27809 case MULTI_ARG_1_DF
:
27810 case MULTI_ARG_1_SF2
:
27811 case MULTI_ARG_1_DF2
:
27812 case MULTI_ARG_1_DI
:
27813 case MULTI_ARG_1_SI
:
27814 case MULTI_ARG_1_HI
:
27815 case MULTI_ARG_1_QI
:
27816 case MULTI_ARG_1_SI_DI
:
27817 case MULTI_ARG_1_HI_DI
:
27818 case MULTI_ARG_1_HI_SI
:
27819 case MULTI_ARG_1_QI_DI
:
27820 case MULTI_ARG_1_QI_SI
:
27821 case MULTI_ARG_1_QI_HI
:
27825 case MULTI_ARG_2_DI_CMP
:
27826 case MULTI_ARG_2_SI_CMP
:
27827 case MULTI_ARG_2_HI_CMP
:
27828 case MULTI_ARG_2_QI_CMP
:
27830 comparison_p
= true;
27833 case MULTI_ARG_2_SF_TF
:
27834 case MULTI_ARG_2_DF_TF
:
27835 case MULTI_ARG_2_DI_TF
:
27836 case MULTI_ARG_2_SI_TF
:
27837 case MULTI_ARG_2_HI_TF
:
27838 case MULTI_ARG_2_QI_TF
:
27844 gcc_unreachable ();
27847 if (optimize
|| !target
27848 || GET_MODE (target
) != tmode
27849 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
27850 target
= gen_reg_rtx (tmode
);
27852 gcc_assert (nargs
<= 4);
27854 for (i
= 0; i
< nargs
; i
++)
27856 tree arg
= CALL_EXPR_ARG (exp
, i
);
27857 rtx op
= expand_normal (arg
);
27858 int adjust
= (comparison_p
) ? 1 : 0;
27859 enum machine_mode mode
= insn_data
[icode
].operand
[i
+adjust
+1].mode
;
27861 if (last_arg_constant
&& i
== nargs
- 1)
27863 if (!insn_data
[icode
].operand
[i
+ 1].predicate (op
, mode
))
27865 enum insn_code new_icode
= icode
;
27868 case CODE_FOR_xop_vpermil2v2df3
:
27869 case CODE_FOR_xop_vpermil2v4sf3
:
27870 case CODE_FOR_xop_vpermil2v4df3
:
27871 case CODE_FOR_xop_vpermil2v8sf3
:
27872 error ("the last argument must be a 2-bit immediate");
27873 return gen_reg_rtx (tmode
);
27874 case CODE_FOR_xop_rotlv2di3
:
27875 new_icode
= CODE_FOR_rotlv2di3
;
27877 case CODE_FOR_xop_rotlv4si3
:
27878 new_icode
= CODE_FOR_rotlv4si3
;
27880 case CODE_FOR_xop_rotlv8hi3
:
27881 new_icode
= CODE_FOR_rotlv8hi3
;
27883 case CODE_FOR_xop_rotlv16qi3
:
27884 new_icode
= CODE_FOR_rotlv16qi3
;
27886 if (CONST_INT_P (op
))
27888 int mask
= GET_MODE_BITSIZE (GET_MODE_INNER (tmode
)) - 1;
27889 op
= GEN_INT (INTVAL (op
) & mask
);
27890 gcc_checking_assert
27891 (insn_data
[icode
].operand
[i
+ 1].predicate (op
, mode
));
27895 gcc_checking_assert
27897 && insn_data
[new_icode
].operand
[0].mode
== tmode
27898 && insn_data
[new_icode
].operand
[1].mode
== tmode
27899 && insn_data
[new_icode
].operand
[2].mode
== mode
27900 && insn_data
[new_icode
].operand
[0].predicate
27901 == insn_data
[icode
].operand
[0].predicate
27902 && insn_data
[new_icode
].operand
[1].predicate
27903 == insn_data
[icode
].operand
[1].predicate
);
27909 gcc_unreachable ();
27916 if (VECTOR_MODE_P (mode
))
27917 op
= safe_vector_operand (op
, mode
);
27919 /* If we aren't optimizing, only allow one memory operand to be
27921 if (memory_operand (op
, mode
))
27924 gcc_assert (GET_MODE (op
) == mode
|| GET_MODE (op
) == VOIDmode
);
27927 || !insn_data
[icode
].operand
[i
+adjust
+1].predicate (op
, mode
)
27929 op
= force_reg (mode
, op
);
27933 args
[i
].mode
= mode
;
27939 pat
= GEN_FCN (icode
) (target
, args
[0].op
);
27944 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
,
27945 GEN_INT ((int)sub_code
));
27946 else if (! comparison_p
)
27947 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
);
27950 rtx cmp_op
= gen_rtx_fmt_ee (sub_code
, GET_MODE (target
),
27954 pat
= GEN_FCN (icode
) (target
, cmp_op
, args
[0].op
, args
[1].op
);
27959 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
);
27963 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
, args
[3].op
);
27967 gcc_unreachable ();
27977 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
27978 insns with vec_merge. */
27981 ix86_expand_unop_vec_merge_builtin (enum insn_code icode
, tree exp
,
27985 tree arg0
= CALL_EXPR_ARG (exp
, 0);
27986 rtx op1
, op0
= expand_normal (arg0
);
27987 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
27988 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
27990 if (optimize
|| !target
27991 || GET_MODE (target
) != tmode
27992 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
27993 target
= gen_reg_rtx (tmode
);
27995 if (VECTOR_MODE_P (mode0
))
27996 op0
= safe_vector_operand (op0
, mode0
);
27998 if ((optimize
&& !register_operand (op0
, mode0
))
27999 || !insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
28000 op0
= copy_to_mode_reg (mode0
, op0
);
28003 if (!insn_data
[icode
].operand
[2].predicate (op1
, mode0
))
28004 op1
= copy_to_mode_reg (mode0
, op1
);
28006 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
28013 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
28016 ix86_expand_sse_compare (const struct builtin_description
*d
,
28017 tree exp
, rtx target
, bool swap
)
28020 tree arg0
= CALL_EXPR_ARG (exp
, 0);
28021 tree arg1
= CALL_EXPR_ARG (exp
, 1);
28022 rtx op0
= expand_normal (arg0
);
28023 rtx op1
= expand_normal (arg1
);
28025 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
28026 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
28027 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
28028 enum rtx_code comparison
= d
->comparison
;
28030 if (VECTOR_MODE_P (mode0
))
28031 op0
= safe_vector_operand (op0
, mode0
);
28032 if (VECTOR_MODE_P (mode1
))
28033 op1
= safe_vector_operand (op1
, mode1
);
28035 /* Swap operands if we have a comparison that isn't available in
28039 rtx tmp
= gen_reg_rtx (mode1
);
28040 emit_move_insn (tmp
, op1
);
28045 if (optimize
|| !target
28046 || GET_MODE (target
) != tmode
28047 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
28048 target
= gen_reg_rtx (tmode
);
28050 if ((optimize
&& !register_operand (op0
, mode0
))
28051 || !insn_data
[d
->icode
].operand
[1].predicate (op0
, mode0
))
28052 op0
= copy_to_mode_reg (mode0
, op0
);
28053 if ((optimize
&& !register_operand (op1
, mode1
))
28054 || !insn_data
[d
->icode
].operand
[2].predicate (op1
, mode1
))
28055 op1
= copy_to_mode_reg (mode1
, op1
);
28057 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
28058 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
28065 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
28068 ix86_expand_sse_comi (const struct builtin_description
*d
, tree exp
,
28072 tree arg0
= CALL_EXPR_ARG (exp
, 0);
28073 tree arg1
= CALL_EXPR_ARG (exp
, 1);
28074 rtx op0
= expand_normal (arg0
);
28075 rtx op1
= expand_normal (arg1
);
28076 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
28077 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
28078 enum rtx_code comparison
= d
->comparison
;
28080 if (VECTOR_MODE_P (mode0
))
28081 op0
= safe_vector_operand (op0
, mode0
);
28082 if (VECTOR_MODE_P (mode1
))
28083 op1
= safe_vector_operand (op1
, mode1
);
28085 /* Swap operands if we have a comparison that isn't available in
28087 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
28094 target
= gen_reg_rtx (SImode
);
28095 emit_move_insn (target
, const0_rtx
);
28096 target
= gen_rtx_SUBREG (QImode
, target
, 0);
28098 if ((optimize
&& !register_operand (op0
, mode0
))
28099 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
28100 op0
= copy_to_mode_reg (mode0
, op0
);
28101 if ((optimize
&& !register_operand (op1
, mode1
))
28102 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
28103 op1
= copy_to_mode_reg (mode1
, op1
);
28105 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
28109 emit_insn (gen_rtx_SET (VOIDmode
,
28110 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
28111 gen_rtx_fmt_ee (comparison
, QImode
,
28115 return SUBREG_REG (target
);
28118 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
28121 ix86_expand_sse_round (const struct builtin_description
*d
, tree exp
,
28125 tree arg0
= CALL_EXPR_ARG (exp
, 0);
28126 rtx op1
, op0
= expand_normal (arg0
);
28127 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
28128 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
28130 if (optimize
|| target
== 0
28131 || GET_MODE (target
) != tmode
28132 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
28133 target
= gen_reg_rtx (tmode
);
28135 if (VECTOR_MODE_P (mode0
))
28136 op0
= safe_vector_operand (op0
, mode0
);
28138 if ((optimize
&& !register_operand (op0
, mode0
))
28139 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
28140 op0
= copy_to_mode_reg (mode0
, op0
);
28142 op1
= GEN_INT (d
->comparison
);
28144 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
);
28152 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description
*d
,
28153 tree exp
, rtx target
)
28156 tree arg0
= CALL_EXPR_ARG (exp
, 0);
28157 tree arg1
= CALL_EXPR_ARG (exp
, 1);
28158 rtx op0
= expand_normal (arg0
);
28159 rtx op1
= expand_normal (arg1
);
28161 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
28162 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
28163 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
28165 if (optimize
|| target
== 0
28166 || GET_MODE (target
) != tmode
28167 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
28168 target
= gen_reg_rtx (tmode
);
28170 op0
= safe_vector_operand (op0
, mode0
);
28171 op1
= safe_vector_operand (op1
, mode1
);
28173 if ((optimize
&& !register_operand (op0
, mode0
))
28174 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
28175 op0
= copy_to_mode_reg (mode0
, op0
);
28176 if ((optimize
&& !register_operand (op1
, mode1
))
28177 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
28178 op1
= copy_to_mode_reg (mode1
, op1
);
28180 op2
= GEN_INT (d
->comparison
);
28182 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
28189 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
28192 ix86_expand_sse_ptest (const struct builtin_description
*d
, tree exp
,
28196 tree arg0
= CALL_EXPR_ARG (exp
, 0);
28197 tree arg1
= CALL_EXPR_ARG (exp
, 1);
28198 rtx op0
= expand_normal (arg0
);
28199 rtx op1
= expand_normal (arg1
);
28200 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
28201 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
28202 enum rtx_code comparison
= d
->comparison
;
28204 if (VECTOR_MODE_P (mode0
))
28205 op0
= safe_vector_operand (op0
, mode0
);
28206 if (VECTOR_MODE_P (mode1
))
28207 op1
= safe_vector_operand (op1
, mode1
);
28209 target
= gen_reg_rtx (SImode
);
28210 emit_move_insn (target
, const0_rtx
);
28211 target
= gen_rtx_SUBREG (QImode
, target
, 0);
28213 if ((optimize
&& !register_operand (op0
, mode0
))
28214 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
28215 op0
= copy_to_mode_reg (mode0
, op0
);
28216 if ((optimize
&& !register_operand (op1
, mode1
))
28217 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
28218 op1
= copy_to_mode_reg (mode1
, op1
);
28220 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
28224 emit_insn (gen_rtx_SET (VOIDmode
,
28225 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
28226 gen_rtx_fmt_ee (comparison
, QImode
,
28230 return SUBREG_REG (target
);
28233 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
28236 ix86_expand_sse_pcmpestr (const struct builtin_description
*d
,
28237 tree exp
, rtx target
)
28240 tree arg0
= CALL_EXPR_ARG (exp
, 0);
28241 tree arg1
= CALL_EXPR_ARG (exp
, 1);
28242 tree arg2
= CALL_EXPR_ARG (exp
, 2);
28243 tree arg3
= CALL_EXPR_ARG (exp
, 3);
28244 tree arg4
= CALL_EXPR_ARG (exp
, 4);
28245 rtx scratch0
, scratch1
;
28246 rtx op0
= expand_normal (arg0
);
28247 rtx op1
= expand_normal (arg1
);
28248 rtx op2
= expand_normal (arg2
);
28249 rtx op3
= expand_normal (arg3
);
28250 rtx op4
= expand_normal (arg4
);
28251 enum machine_mode tmode0
, tmode1
, modev2
, modei3
, modev4
, modei5
, modeimm
;
28253 tmode0
= insn_data
[d
->icode
].operand
[0].mode
;
28254 tmode1
= insn_data
[d
->icode
].operand
[1].mode
;
28255 modev2
= insn_data
[d
->icode
].operand
[2].mode
;
28256 modei3
= insn_data
[d
->icode
].operand
[3].mode
;
28257 modev4
= insn_data
[d
->icode
].operand
[4].mode
;
28258 modei5
= insn_data
[d
->icode
].operand
[5].mode
;
28259 modeimm
= insn_data
[d
->icode
].operand
[6].mode
;
28261 if (VECTOR_MODE_P (modev2
))
28262 op0
= safe_vector_operand (op0
, modev2
);
28263 if (VECTOR_MODE_P (modev4
))
28264 op2
= safe_vector_operand (op2
, modev4
);
28266 if (!insn_data
[d
->icode
].operand
[2].predicate (op0
, modev2
))
28267 op0
= copy_to_mode_reg (modev2
, op0
);
28268 if (!insn_data
[d
->icode
].operand
[3].predicate (op1
, modei3
))
28269 op1
= copy_to_mode_reg (modei3
, op1
);
28270 if ((optimize
&& !register_operand (op2
, modev4
))
28271 || !insn_data
[d
->icode
].operand
[4].predicate (op2
, modev4
))
28272 op2
= copy_to_mode_reg (modev4
, op2
);
28273 if (!insn_data
[d
->icode
].operand
[5].predicate (op3
, modei5
))
28274 op3
= copy_to_mode_reg (modei5
, op3
);
28276 if (!insn_data
[d
->icode
].operand
[6].predicate (op4
, modeimm
))
28278 error ("the fifth argument must be an 8-bit immediate");
28282 if (d
->code
== IX86_BUILTIN_PCMPESTRI128
)
28284 if (optimize
|| !target
28285 || GET_MODE (target
) != tmode0
28286 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode0
))
28287 target
= gen_reg_rtx (tmode0
);
28289 scratch1
= gen_reg_rtx (tmode1
);
28291 pat
= GEN_FCN (d
->icode
) (target
, scratch1
, op0
, op1
, op2
, op3
, op4
);
28293 else if (d
->code
== IX86_BUILTIN_PCMPESTRM128
)
28295 if (optimize
|| !target
28296 || GET_MODE (target
) != tmode1
28297 || !insn_data
[d
->icode
].operand
[1].predicate (target
, tmode1
))
28298 target
= gen_reg_rtx (tmode1
);
28300 scratch0
= gen_reg_rtx (tmode0
);
28302 pat
= GEN_FCN (d
->icode
) (scratch0
, target
, op0
, op1
, op2
, op3
, op4
);
28306 gcc_assert (d
->flag
);
28308 scratch0
= gen_reg_rtx (tmode0
);
28309 scratch1
= gen_reg_rtx (tmode1
);
28311 pat
= GEN_FCN (d
->icode
) (scratch0
, scratch1
, op0
, op1
, op2
, op3
, op4
);
28321 target
= gen_reg_rtx (SImode
);
28322 emit_move_insn (target
, const0_rtx
);
28323 target
= gen_rtx_SUBREG (QImode
, target
, 0);
28326 (gen_rtx_SET (VOIDmode
, gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
28327 gen_rtx_fmt_ee (EQ
, QImode
,
28328 gen_rtx_REG ((enum machine_mode
) d
->flag
,
28331 return SUBREG_REG (target
);
28338 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
28341 ix86_expand_sse_pcmpistr (const struct builtin_description
*d
,
28342 tree exp
, rtx target
)
28345 tree arg0
= CALL_EXPR_ARG (exp
, 0);
28346 tree arg1
= CALL_EXPR_ARG (exp
, 1);
28347 tree arg2
= CALL_EXPR_ARG (exp
, 2);
28348 rtx scratch0
, scratch1
;
28349 rtx op0
= expand_normal (arg0
);
28350 rtx op1
= expand_normal (arg1
);
28351 rtx op2
= expand_normal (arg2
);
28352 enum machine_mode tmode0
, tmode1
, modev2
, modev3
, modeimm
;
28354 tmode0
= insn_data
[d
->icode
].operand
[0].mode
;
28355 tmode1
= insn_data
[d
->icode
].operand
[1].mode
;
28356 modev2
= insn_data
[d
->icode
].operand
[2].mode
;
28357 modev3
= insn_data
[d
->icode
].operand
[3].mode
;
28358 modeimm
= insn_data
[d
->icode
].operand
[4].mode
;
28360 if (VECTOR_MODE_P (modev2
))
28361 op0
= safe_vector_operand (op0
, modev2
);
28362 if (VECTOR_MODE_P (modev3
))
28363 op1
= safe_vector_operand (op1
, modev3
);
28365 if (!insn_data
[d
->icode
].operand
[2].predicate (op0
, modev2
))
28366 op0
= copy_to_mode_reg (modev2
, op0
);
28367 if ((optimize
&& !register_operand (op1
, modev3
))
28368 || !insn_data
[d
->icode
].operand
[3].predicate (op1
, modev3
))
28369 op1
= copy_to_mode_reg (modev3
, op1
);
28371 if (!insn_data
[d
->icode
].operand
[4].predicate (op2
, modeimm
))
28373 error ("the third argument must be an 8-bit immediate");
28377 if (d
->code
== IX86_BUILTIN_PCMPISTRI128
)
28379 if (optimize
|| !target
28380 || GET_MODE (target
) != tmode0
28381 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode0
))
28382 target
= gen_reg_rtx (tmode0
);
28384 scratch1
= gen_reg_rtx (tmode1
);
28386 pat
= GEN_FCN (d
->icode
) (target
, scratch1
, op0
, op1
, op2
);
28388 else if (d
->code
== IX86_BUILTIN_PCMPISTRM128
)
28390 if (optimize
|| !target
28391 || GET_MODE (target
) != tmode1
28392 || !insn_data
[d
->icode
].operand
[1].predicate (target
, tmode1
))
28393 target
= gen_reg_rtx (tmode1
);
28395 scratch0
= gen_reg_rtx (tmode0
);
28397 pat
= GEN_FCN (d
->icode
) (scratch0
, target
, op0
, op1
, op2
);
28401 gcc_assert (d
->flag
);
28403 scratch0
= gen_reg_rtx (tmode0
);
28404 scratch1
= gen_reg_rtx (tmode1
);
28406 pat
= GEN_FCN (d
->icode
) (scratch0
, scratch1
, op0
, op1
, op2
);
28416 target
= gen_reg_rtx (SImode
);
28417 emit_move_insn (target
, const0_rtx
);
28418 target
= gen_rtx_SUBREG (QImode
, target
, 0);
28421 (gen_rtx_SET (VOIDmode
, gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
28422 gen_rtx_fmt_ee (EQ
, QImode
,
28423 gen_rtx_REG ((enum machine_mode
) d
->flag
,
28426 return SUBREG_REG (target
);
28432 /* Subroutine of ix86_expand_builtin to take care of insns with
28433 variable number of operands. */
28436 ix86_expand_args_builtin (const struct builtin_description
*d
,
28437 tree exp
, rtx target
)
28439 rtx pat
, real_target
;
28440 unsigned int i
, nargs
;
28441 unsigned int nargs_constant
= 0;
28442 int num_memory
= 0;
28446 enum machine_mode mode
;
28448 bool last_arg_count
= false;
28449 enum insn_code icode
= d
->icode
;
28450 const struct insn_data_d
*insn_p
= &insn_data
[icode
];
28451 enum machine_mode tmode
= insn_p
->operand
[0].mode
;
28452 enum machine_mode rmode
= VOIDmode
;
28454 enum rtx_code comparison
= d
->comparison
;
28456 switch ((enum ix86_builtin_func_type
) d
->flag
)
28458 case V2DF_FTYPE_V2DF_ROUND
:
28459 case V4DF_FTYPE_V4DF_ROUND
:
28460 case V4SF_FTYPE_V4SF_ROUND
:
28461 case V8SF_FTYPE_V8SF_ROUND
:
28462 case V4SI_FTYPE_V4SF_ROUND
:
28463 case V8SI_FTYPE_V8SF_ROUND
:
28464 return ix86_expand_sse_round (d
, exp
, target
);
28465 case V4SI_FTYPE_V2DF_V2DF_ROUND
:
28466 case V8SI_FTYPE_V4DF_V4DF_ROUND
:
28467 return ix86_expand_sse_round_vec_pack_sfix (d
, exp
, target
);
28468 case INT_FTYPE_V8SF_V8SF_PTEST
:
28469 case INT_FTYPE_V4DI_V4DI_PTEST
:
28470 case INT_FTYPE_V4DF_V4DF_PTEST
:
28471 case INT_FTYPE_V4SF_V4SF_PTEST
:
28472 case INT_FTYPE_V2DI_V2DI_PTEST
:
28473 case INT_FTYPE_V2DF_V2DF_PTEST
:
28474 return ix86_expand_sse_ptest (d
, exp
, target
);
28475 case FLOAT128_FTYPE_FLOAT128
:
28476 case FLOAT_FTYPE_FLOAT
:
28477 case INT_FTYPE_INT
:
28478 case UINT64_FTYPE_INT
:
28479 case UINT16_FTYPE_UINT16
:
28480 case INT64_FTYPE_INT64
:
28481 case INT64_FTYPE_V4SF
:
28482 case INT64_FTYPE_V2DF
:
28483 case INT_FTYPE_V16QI
:
28484 case INT_FTYPE_V8QI
:
28485 case INT_FTYPE_V8SF
:
28486 case INT_FTYPE_V4DF
:
28487 case INT_FTYPE_V4SF
:
28488 case INT_FTYPE_V2DF
:
28489 case INT_FTYPE_V32QI
:
28490 case V16QI_FTYPE_V16QI
:
28491 case V8SI_FTYPE_V8SF
:
28492 case V8SI_FTYPE_V4SI
:
28493 case V8HI_FTYPE_V8HI
:
28494 case V8HI_FTYPE_V16QI
:
28495 case V8QI_FTYPE_V8QI
:
28496 case V8SF_FTYPE_V8SF
:
28497 case V8SF_FTYPE_V8SI
:
28498 case V8SF_FTYPE_V4SF
:
28499 case V8SF_FTYPE_V8HI
:
28500 case V4SI_FTYPE_V4SI
:
28501 case V4SI_FTYPE_V16QI
:
28502 case V4SI_FTYPE_V4SF
:
28503 case V4SI_FTYPE_V8SI
:
28504 case V4SI_FTYPE_V8HI
:
28505 case V4SI_FTYPE_V4DF
:
28506 case V4SI_FTYPE_V2DF
:
28507 case V4HI_FTYPE_V4HI
:
28508 case V4DF_FTYPE_V4DF
:
28509 case V4DF_FTYPE_V4SI
:
28510 case V4DF_FTYPE_V4SF
:
28511 case V4DF_FTYPE_V2DF
:
28512 case V4SF_FTYPE_V4SF
:
28513 case V4SF_FTYPE_V4SI
:
28514 case V4SF_FTYPE_V8SF
:
28515 case V4SF_FTYPE_V4DF
:
28516 case V4SF_FTYPE_V8HI
:
28517 case V4SF_FTYPE_V2DF
:
28518 case V2DI_FTYPE_V2DI
:
28519 case V2DI_FTYPE_V16QI
:
28520 case V2DI_FTYPE_V8HI
:
28521 case V2DI_FTYPE_V4SI
:
28522 case V2DF_FTYPE_V2DF
:
28523 case V2DF_FTYPE_V4SI
:
28524 case V2DF_FTYPE_V4DF
:
28525 case V2DF_FTYPE_V4SF
:
28526 case V2DF_FTYPE_V2SI
:
28527 case V2SI_FTYPE_V2SI
:
28528 case V2SI_FTYPE_V4SF
:
28529 case V2SI_FTYPE_V2SF
:
28530 case V2SI_FTYPE_V2DF
:
28531 case V2SF_FTYPE_V2SF
:
28532 case V2SF_FTYPE_V2SI
:
28533 case V32QI_FTYPE_V32QI
:
28534 case V32QI_FTYPE_V16QI
:
28535 case V16HI_FTYPE_V16HI
:
28536 case V16HI_FTYPE_V8HI
:
28537 case V8SI_FTYPE_V8SI
:
28538 case V16HI_FTYPE_V16QI
:
28539 case V8SI_FTYPE_V16QI
:
28540 case V4DI_FTYPE_V16QI
:
28541 case V8SI_FTYPE_V8HI
:
28542 case V4DI_FTYPE_V8HI
:
28543 case V4DI_FTYPE_V4SI
:
28544 case V4DI_FTYPE_V2DI
:
28547 case V4SF_FTYPE_V4SF_VEC_MERGE
:
28548 case V2DF_FTYPE_V2DF_VEC_MERGE
:
28549 return ix86_expand_unop_vec_merge_builtin (icode
, exp
, target
);
28550 case FLOAT128_FTYPE_FLOAT128_FLOAT128
:
28551 case V16QI_FTYPE_V16QI_V16QI
:
28552 case V16QI_FTYPE_V8HI_V8HI
:
28553 case V8QI_FTYPE_V8QI_V8QI
:
28554 case V8QI_FTYPE_V4HI_V4HI
:
28555 case V8HI_FTYPE_V8HI_V8HI
:
28556 case V8HI_FTYPE_V16QI_V16QI
:
28557 case V8HI_FTYPE_V4SI_V4SI
:
28558 case V8SF_FTYPE_V8SF_V8SF
:
28559 case V8SF_FTYPE_V8SF_V8SI
:
28560 case V4SI_FTYPE_V4SI_V4SI
:
28561 case V4SI_FTYPE_V8HI_V8HI
:
28562 case V4SI_FTYPE_V4SF_V4SF
:
28563 case V4SI_FTYPE_V2DF_V2DF
:
28564 case V4HI_FTYPE_V4HI_V4HI
:
28565 case V4HI_FTYPE_V8QI_V8QI
:
28566 case V4HI_FTYPE_V2SI_V2SI
:
28567 case V4DF_FTYPE_V4DF_V4DF
:
28568 case V4DF_FTYPE_V4DF_V4DI
:
28569 case V4SF_FTYPE_V4SF_V4SF
:
28570 case V4SF_FTYPE_V4SF_V4SI
:
28571 case V4SF_FTYPE_V4SF_V2SI
:
28572 case V4SF_FTYPE_V4SF_V2DF
:
28573 case V4SF_FTYPE_V4SF_DI
:
28574 case V4SF_FTYPE_V4SF_SI
:
28575 case V2DI_FTYPE_V2DI_V2DI
:
28576 case V2DI_FTYPE_V16QI_V16QI
:
28577 case V2DI_FTYPE_V4SI_V4SI
:
28578 case V2DI_FTYPE_V2DI_V16QI
:
28579 case V2DI_FTYPE_V2DF_V2DF
:
28580 case V2SI_FTYPE_V2SI_V2SI
:
28581 case V2SI_FTYPE_V4HI_V4HI
:
28582 case V2SI_FTYPE_V2SF_V2SF
:
28583 case V2DF_FTYPE_V2DF_V2DF
:
28584 case V2DF_FTYPE_V2DF_V4SF
:
28585 case V2DF_FTYPE_V2DF_V2DI
:
28586 case V2DF_FTYPE_V2DF_DI
:
28587 case V2DF_FTYPE_V2DF_SI
:
28588 case V2SF_FTYPE_V2SF_V2SF
:
28589 case V1DI_FTYPE_V1DI_V1DI
:
28590 case V1DI_FTYPE_V8QI_V8QI
:
28591 case V1DI_FTYPE_V2SI_V2SI
:
28592 case V32QI_FTYPE_V16HI_V16HI
:
28593 case V16HI_FTYPE_V8SI_V8SI
:
28594 case V32QI_FTYPE_V32QI_V32QI
:
28595 case V16HI_FTYPE_V32QI_V32QI
:
28596 case V16HI_FTYPE_V16HI_V16HI
:
28597 case V8SI_FTYPE_V4DF_V4DF
:
28598 case V8SI_FTYPE_V8SI_V8SI
:
28599 case V8SI_FTYPE_V16HI_V16HI
:
28600 case V4DI_FTYPE_V4DI_V4DI
:
28601 case V4DI_FTYPE_V8SI_V8SI
:
28602 if (comparison
== UNKNOWN
)
28603 return ix86_expand_binop_builtin (icode
, exp
, target
);
28606 case V4SF_FTYPE_V4SF_V4SF_SWAP
:
28607 case V2DF_FTYPE_V2DF_V2DF_SWAP
:
28608 gcc_assert (comparison
!= UNKNOWN
);
28612 case V16HI_FTYPE_V16HI_V8HI_COUNT
:
28613 case V16HI_FTYPE_V16HI_SI_COUNT
:
28614 case V8SI_FTYPE_V8SI_V4SI_COUNT
:
28615 case V8SI_FTYPE_V8SI_SI_COUNT
:
28616 case V4DI_FTYPE_V4DI_V2DI_COUNT
:
28617 case V4DI_FTYPE_V4DI_INT_COUNT
:
28618 case V8HI_FTYPE_V8HI_V8HI_COUNT
:
28619 case V8HI_FTYPE_V8HI_SI_COUNT
:
28620 case V4SI_FTYPE_V4SI_V4SI_COUNT
:
28621 case V4SI_FTYPE_V4SI_SI_COUNT
:
28622 case V4HI_FTYPE_V4HI_V4HI_COUNT
:
28623 case V4HI_FTYPE_V4HI_SI_COUNT
:
28624 case V2DI_FTYPE_V2DI_V2DI_COUNT
:
28625 case V2DI_FTYPE_V2DI_SI_COUNT
:
28626 case V2SI_FTYPE_V2SI_V2SI_COUNT
:
28627 case V2SI_FTYPE_V2SI_SI_COUNT
:
28628 case V1DI_FTYPE_V1DI_V1DI_COUNT
:
28629 case V1DI_FTYPE_V1DI_SI_COUNT
:
28631 last_arg_count
= true;
28633 case UINT64_FTYPE_UINT64_UINT64
:
28634 case UINT_FTYPE_UINT_UINT
:
28635 case UINT_FTYPE_UINT_USHORT
:
28636 case UINT_FTYPE_UINT_UCHAR
:
28637 case UINT16_FTYPE_UINT16_INT
:
28638 case UINT8_FTYPE_UINT8_INT
:
28641 case V2DI_FTYPE_V2DI_INT_CONVERT
:
28644 nargs_constant
= 1;
28646 case V4DI_FTYPE_V4DI_INT_CONVERT
:
28649 nargs_constant
= 1;
28651 case V8HI_FTYPE_V8HI_INT
:
28652 case V8HI_FTYPE_V8SF_INT
:
28653 case V8HI_FTYPE_V4SF_INT
:
28654 case V8SF_FTYPE_V8SF_INT
:
28655 case V4SI_FTYPE_V4SI_INT
:
28656 case V4SI_FTYPE_V8SI_INT
:
28657 case V4HI_FTYPE_V4HI_INT
:
28658 case V4DF_FTYPE_V4DF_INT
:
28659 case V4SF_FTYPE_V4SF_INT
:
28660 case V4SF_FTYPE_V8SF_INT
:
28661 case V2DI_FTYPE_V2DI_INT
:
28662 case V2DF_FTYPE_V2DF_INT
:
28663 case V2DF_FTYPE_V4DF_INT
:
28664 case V16HI_FTYPE_V16HI_INT
:
28665 case V8SI_FTYPE_V8SI_INT
:
28666 case V4DI_FTYPE_V4DI_INT
:
28667 case V2DI_FTYPE_V4DI_INT
:
28669 nargs_constant
= 1;
28671 case V16QI_FTYPE_V16QI_V16QI_V16QI
:
28672 case V8SF_FTYPE_V8SF_V8SF_V8SF
:
28673 case V4DF_FTYPE_V4DF_V4DF_V4DF
:
28674 case V4SF_FTYPE_V4SF_V4SF_V4SF
:
28675 case V2DF_FTYPE_V2DF_V2DF_V2DF
:
28676 case V32QI_FTYPE_V32QI_V32QI_V32QI
:
28679 case V32QI_FTYPE_V32QI_V32QI_INT
:
28680 case V16HI_FTYPE_V16HI_V16HI_INT
:
28681 case V16QI_FTYPE_V16QI_V16QI_INT
:
28682 case V4DI_FTYPE_V4DI_V4DI_INT
:
28683 case V8HI_FTYPE_V8HI_V8HI_INT
:
28684 case V8SI_FTYPE_V8SI_V8SI_INT
:
28685 case V8SI_FTYPE_V8SI_V4SI_INT
:
28686 case V8SF_FTYPE_V8SF_V8SF_INT
:
28687 case V8SF_FTYPE_V8SF_V4SF_INT
:
28688 case V4SI_FTYPE_V4SI_V4SI_INT
:
28689 case V4DF_FTYPE_V4DF_V4DF_INT
:
28690 case V4DF_FTYPE_V4DF_V2DF_INT
:
28691 case V4SF_FTYPE_V4SF_V4SF_INT
:
28692 case V2DI_FTYPE_V2DI_V2DI_INT
:
28693 case V4DI_FTYPE_V4DI_V2DI_INT
:
28694 case V2DF_FTYPE_V2DF_V2DF_INT
:
28696 nargs_constant
= 1;
28698 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT
:
28701 nargs_constant
= 1;
28703 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT
:
28706 nargs_constant
= 1;
28708 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT
:
28711 nargs_constant
= 1;
28713 case V2DI_FTYPE_V2DI_UINT_UINT
:
28715 nargs_constant
= 2;
28717 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT
:
28718 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT
:
28719 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT
:
28720 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT
:
28722 nargs_constant
= 1;
28724 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT
:
28726 nargs_constant
= 2;
28729 gcc_unreachable ();
28732 gcc_assert (nargs
<= ARRAY_SIZE (args
));
28734 if (comparison
!= UNKNOWN
)
28736 gcc_assert (nargs
== 2);
28737 return ix86_expand_sse_compare (d
, exp
, target
, swap
);
28740 if (rmode
== VOIDmode
|| rmode
== tmode
)
28744 || GET_MODE (target
) != tmode
28745 || !insn_p
->operand
[0].predicate (target
, tmode
))
28746 target
= gen_reg_rtx (tmode
);
28747 real_target
= target
;
28751 target
= gen_reg_rtx (rmode
);
28752 real_target
= simplify_gen_subreg (tmode
, target
, rmode
, 0);
28755 for (i
= 0; i
< nargs
; i
++)
28757 tree arg
= CALL_EXPR_ARG (exp
, i
);
28758 rtx op
= expand_normal (arg
);
28759 enum machine_mode mode
= insn_p
->operand
[i
+ 1].mode
;
28760 bool match
= insn_p
->operand
[i
+ 1].predicate (op
, mode
);
28762 if (last_arg_count
&& (i
+ 1) == nargs
)
28764 /* SIMD shift insns take either an 8-bit immediate or
28765 register as count. But builtin functions take int as
28766 count. If count doesn't match, we put it in register. */
28769 op
= simplify_gen_subreg (SImode
, op
, GET_MODE (op
), 0);
28770 if (!insn_p
->operand
[i
+ 1].predicate (op
, mode
))
28771 op
= copy_to_reg (op
);
28774 else if ((nargs
- i
) <= nargs_constant
)
28779 case CODE_FOR_avx2_inserti128
:
28780 case CODE_FOR_avx2_extracti128
:
28781 error ("the last argument must be an 1-bit immediate");
28784 case CODE_FOR_sse4_1_roundsd
:
28785 case CODE_FOR_sse4_1_roundss
:
28787 case CODE_FOR_sse4_1_roundpd
:
28788 case CODE_FOR_sse4_1_roundps
:
28789 case CODE_FOR_avx_roundpd256
:
28790 case CODE_FOR_avx_roundps256
:
28792 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix
:
28793 case CODE_FOR_sse4_1_roundps_sfix
:
28794 case CODE_FOR_avx_roundpd_vec_pack_sfix256
:
28795 case CODE_FOR_avx_roundps_sfix256
:
28797 case CODE_FOR_sse4_1_blendps
:
28798 case CODE_FOR_avx_blendpd256
:
28799 case CODE_FOR_avx_vpermilv4df
:
28800 error ("the last argument must be a 4-bit immediate");
28803 case CODE_FOR_sse4_1_blendpd
:
28804 case CODE_FOR_avx_vpermilv2df
:
28805 case CODE_FOR_xop_vpermil2v2df3
:
28806 case CODE_FOR_xop_vpermil2v4sf3
:
28807 case CODE_FOR_xop_vpermil2v4df3
:
28808 case CODE_FOR_xop_vpermil2v8sf3
:
28809 error ("the last argument must be a 2-bit immediate");
28812 case CODE_FOR_avx_vextractf128v4df
:
28813 case CODE_FOR_avx_vextractf128v8sf
:
28814 case CODE_FOR_avx_vextractf128v8si
:
28815 case CODE_FOR_avx_vinsertf128v4df
:
28816 case CODE_FOR_avx_vinsertf128v8sf
:
28817 case CODE_FOR_avx_vinsertf128v8si
:
28818 error ("the last argument must be a 1-bit immediate");
28821 case CODE_FOR_avx_vmcmpv2df3
:
28822 case CODE_FOR_avx_vmcmpv4sf3
:
28823 case CODE_FOR_avx_cmpv2df3
:
28824 case CODE_FOR_avx_cmpv4sf3
:
28825 case CODE_FOR_avx_cmpv4df3
:
28826 case CODE_FOR_avx_cmpv8sf3
:
28827 error ("the last argument must be a 5-bit immediate");
28831 switch (nargs_constant
)
28834 if ((nargs
- i
) == nargs_constant
)
28836 error ("the next to last argument must be an 8-bit immediate");
28840 error ("the last argument must be an 8-bit immediate");
28843 gcc_unreachable ();
28850 if (VECTOR_MODE_P (mode
))
28851 op
= safe_vector_operand (op
, mode
);
28853 /* If we aren't optimizing, only allow one memory operand to
28855 if (memory_operand (op
, mode
))
28858 if (GET_MODE (op
) == mode
|| GET_MODE (op
) == VOIDmode
)
28860 if (optimize
|| !match
|| num_memory
> 1)
28861 op
= copy_to_mode_reg (mode
, op
);
28865 op
= copy_to_reg (op
);
28866 op
= simplify_gen_subreg (mode
, op
, GET_MODE (op
), 0);
28871 args
[i
].mode
= mode
;
28877 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
);
28880 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
);
28883 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
,
28887 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
,
28888 args
[2].op
, args
[3].op
);
28891 gcc_unreachable ();
28901 /* Subroutine of ix86_expand_builtin to take care of special insns
28902 with variable number of operands. */
28905 ix86_expand_special_args_builtin (const struct builtin_description
*d
,
28906 tree exp
, rtx target
)
28910 unsigned int i
, nargs
, arg_adjust
, memory
;
28914 enum machine_mode mode
;
28916 enum insn_code icode
= d
->icode
;
28917 bool last_arg_constant
= false;
28918 const struct insn_data_d
*insn_p
= &insn_data
[icode
];
28919 enum machine_mode tmode
= insn_p
->operand
[0].mode
;
28920 enum { load
, store
} klass
;
28922 switch ((enum ix86_builtin_func_type
) d
->flag
)
28924 case VOID_FTYPE_VOID
:
28925 if (icode
== CODE_FOR_avx_vzeroupper
)
28926 target
= GEN_INT (vzeroupper_intrinsic
);
28927 emit_insn (GEN_FCN (icode
) (target
));
28929 case VOID_FTYPE_UINT64
:
28930 case VOID_FTYPE_UNSIGNED
:
28936 case INT_FTYPE_VOID
:
28937 case UINT64_FTYPE_VOID
:
28938 case UNSIGNED_FTYPE_VOID
:
28943 case UINT64_FTYPE_PUNSIGNED
:
28944 case V2DI_FTYPE_PV2DI
:
28945 case V4DI_FTYPE_PV4DI
:
28946 case V32QI_FTYPE_PCCHAR
:
28947 case V16QI_FTYPE_PCCHAR
:
28948 case V8SF_FTYPE_PCV4SF
:
28949 case V8SF_FTYPE_PCFLOAT
:
28950 case V4SF_FTYPE_PCFLOAT
:
28951 case V4DF_FTYPE_PCV2DF
:
28952 case V4DF_FTYPE_PCDOUBLE
:
28953 case V2DF_FTYPE_PCDOUBLE
:
28954 case VOID_FTYPE_PVOID
:
28959 case VOID_FTYPE_PV2SF_V4SF
:
28960 case VOID_FTYPE_PV4DI_V4DI
:
28961 case VOID_FTYPE_PV2DI_V2DI
:
28962 case VOID_FTYPE_PCHAR_V32QI
:
28963 case VOID_FTYPE_PCHAR_V16QI
:
28964 case VOID_FTYPE_PFLOAT_V8SF
:
28965 case VOID_FTYPE_PFLOAT_V4SF
:
28966 case VOID_FTYPE_PDOUBLE_V4DF
:
28967 case VOID_FTYPE_PDOUBLE_V2DF
:
28968 case VOID_FTYPE_PLONGLONG_LONGLONG
:
28969 case VOID_FTYPE_PULONGLONG_ULONGLONG
:
28970 case VOID_FTYPE_PINT_INT
:
28973 /* Reserve memory operand for target. */
28974 memory
= ARRAY_SIZE (args
);
28976 case V4SF_FTYPE_V4SF_PCV2SF
:
28977 case V2DF_FTYPE_V2DF_PCDOUBLE
:
28982 case V8SF_FTYPE_PCV8SF_V8SI
:
28983 case V4DF_FTYPE_PCV4DF_V4DI
:
28984 case V4SF_FTYPE_PCV4SF_V4SI
:
28985 case V2DF_FTYPE_PCV2DF_V2DI
:
28986 case V8SI_FTYPE_PCV8SI_V8SI
:
28987 case V4DI_FTYPE_PCV4DI_V4DI
:
28988 case V4SI_FTYPE_PCV4SI_V4SI
:
28989 case V2DI_FTYPE_PCV2DI_V2DI
:
28994 case VOID_FTYPE_PV8SF_V8SI_V8SF
:
28995 case VOID_FTYPE_PV4DF_V4DI_V4DF
:
28996 case VOID_FTYPE_PV4SF_V4SI_V4SF
:
28997 case VOID_FTYPE_PV2DF_V2DI_V2DF
:
28998 case VOID_FTYPE_PV8SI_V8SI_V8SI
:
28999 case VOID_FTYPE_PV4DI_V4DI_V4DI
:
29000 case VOID_FTYPE_PV4SI_V4SI_V4SI
:
29001 case VOID_FTYPE_PV2DI_V2DI_V2DI
:
29004 /* Reserve memory operand for target. */
29005 memory
= ARRAY_SIZE (args
);
29007 case VOID_FTYPE_UINT_UINT_UINT
:
29008 case VOID_FTYPE_UINT64_UINT_UINT
:
29009 case UCHAR_FTYPE_UINT_UINT_UINT
:
29010 case UCHAR_FTYPE_UINT64_UINT_UINT
:
29013 memory
= ARRAY_SIZE (args
);
29014 last_arg_constant
= true;
29017 gcc_unreachable ();
29020 gcc_assert (nargs
<= ARRAY_SIZE (args
));
29022 if (klass
== store
)
29024 arg
= CALL_EXPR_ARG (exp
, 0);
29025 op
= expand_normal (arg
);
29026 gcc_assert (target
== 0);
29029 if (GET_MODE (op
) != Pmode
)
29030 op
= convert_to_mode (Pmode
, op
, 1);
29031 target
= gen_rtx_MEM (tmode
, force_reg (Pmode
, op
));
29034 target
= force_reg (tmode
, op
);
29042 || GET_MODE (target
) != tmode
29043 || !insn_p
->operand
[0].predicate (target
, tmode
))
29044 target
= gen_reg_rtx (tmode
);
29047 for (i
= 0; i
< nargs
; i
++)
29049 enum machine_mode mode
= insn_p
->operand
[i
+ 1].mode
;
29052 arg
= CALL_EXPR_ARG (exp
, i
+ arg_adjust
);
29053 op
= expand_normal (arg
);
29054 match
= insn_p
->operand
[i
+ 1].predicate (op
, mode
);
29056 if (last_arg_constant
&& (i
+ 1) == nargs
)
29060 if (icode
== CODE_FOR_lwp_lwpvalsi3
29061 || icode
== CODE_FOR_lwp_lwpinssi3
29062 || icode
== CODE_FOR_lwp_lwpvaldi3
29063 || icode
== CODE_FOR_lwp_lwpinsdi3
)
29064 error ("the last argument must be a 32-bit immediate");
29066 error ("the last argument must be an 8-bit immediate");
29074 /* This must be the memory operand. */
29075 if (GET_MODE (op
) != Pmode
)
29076 op
= convert_to_mode (Pmode
, op
, 1);
29077 op
= gen_rtx_MEM (mode
, force_reg (Pmode
, op
));
29078 gcc_assert (GET_MODE (op
) == mode
29079 || GET_MODE (op
) == VOIDmode
);
29083 /* This must be register. */
29084 if (VECTOR_MODE_P (mode
))
29085 op
= safe_vector_operand (op
, mode
);
29087 gcc_assert (GET_MODE (op
) == mode
29088 || GET_MODE (op
) == VOIDmode
);
29089 op
= copy_to_mode_reg (mode
, op
);
29094 args
[i
].mode
= mode
;
29100 pat
= GEN_FCN (icode
) (target
);
29103 pat
= GEN_FCN (icode
) (target
, args
[0].op
);
29106 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
);
29109 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
);
29112 gcc_unreachable ();
29118 return klass
== store
? 0 : target
;
29121 /* Return the integer constant in ARG. Constrain it to be in the range
29122 of the subparts of VEC_TYPE; issue an error if not. */
29125 get_element_number (tree vec_type
, tree arg
)
29127 unsigned HOST_WIDE_INT elt
, max
= TYPE_VECTOR_SUBPARTS (vec_type
) - 1;
29129 if (!host_integerp (arg
, 1)
29130 || (elt
= tree_low_cst (arg
, 1), elt
> max
))
29132 error ("selector must be an integer constant in the range 0..%wi", max
);
29139 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
29140 ix86_expand_vector_init. We DO have language-level syntax for this, in
29141 the form of (type){ init-list }. Except that since we can't place emms
29142 instructions from inside the compiler, we can't allow the use of MMX
29143 registers unless the user explicitly asks for it. So we do *not* define
29144 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
29145 we have builtins invoked by mmintrin.h that gives us license to emit
29146 these sorts of instructions. */
29149 ix86_expand_vec_init_builtin (tree type
, tree exp
, rtx target
)
29151 enum machine_mode tmode
= TYPE_MODE (type
);
29152 enum machine_mode inner_mode
= GET_MODE_INNER (tmode
);
29153 int i
, n_elt
= GET_MODE_NUNITS (tmode
);
29154 rtvec v
= rtvec_alloc (n_elt
);
29156 gcc_assert (VECTOR_MODE_P (tmode
));
29157 gcc_assert (call_expr_nargs (exp
) == n_elt
);
29159 for (i
= 0; i
< n_elt
; ++i
)
29161 rtx x
= expand_normal (CALL_EXPR_ARG (exp
, i
));
29162 RTVEC_ELT (v
, i
) = gen_lowpart (inner_mode
, x
);
29165 if (!target
|| !register_operand (target
, tmode
))
29166 target
= gen_reg_rtx (tmode
);
29168 ix86_expand_vector_init (true, target
, gen_rtx_PARALLEL (tmode
, v
));
29172 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
29173 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
29174 had a language-level syntax for referencing vector elements. */
29177 ix86_expand_vec_ext_builtin (tree exp
, rtx target
)
29179 enum machine_mode tmode
, mode0
;
29184 arg0
= CALL_EXPR_ARG (exp
, 0);
29185 arg1
= CALL_EXPR_ARG (exp
, 1);
29187 op0
= expand_normal (arg0
);
29188 elt
= get_element_number (TREE_TYPE (arg0
), arg1
);
29190 tmode
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
29191 mode0
= TYPE_MODE (TREE_TYPE (arg0
));
29192 gcc_assert (VECTOR_MODE_P (mode0
));
29194 op0
= force_reg (mode0
, op0
);
29196 if (optimize
|| !target
|| !register_operand (target
, tmode
))
29197 target
= gen_reg_rtx (tmode
);
29199 ix86_expand_vector_extract (true, target
, op0
, elt
);
29204 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
29205 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
29206 a language-level syntax for referencing vector elements. */
29209 ix86_expand_vec_set_builtin (tree exp
)
29211 enum machine_mode tmode
, mode1
;
29212 tree arg0
, arg1
, arg2
;
29214 rtx op0
, op1
, target
;
29216 arg0
= CALL_EXPR_ARG (exp
, 0);
29217 arg1
= CALL_EXPR_ARG (exp
, 1);
29218 arg2
= CALL_EXPR_ARG (exp
, 2);
29220 tmode
= TYPE_MODE (TREE_TYPE (arg0
));
29221 mode1
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
29222 gcc_assert (VECTOR_MODE_P (tmode
));
29224 op0
= expand_expr (arg0
, NULL_RTX
, tmode
, EXPAND_NORMAL
);
29225 op1
= expand_expr (arg1
, NULL_RTX
, mode1
, EXPAND_NORMAL
);
29226 elt
= get_element_number (TREE_TYPE (arg0
), arg2
);
29228 if (GET_MODE (op1
) != mode1
&& GET_MODE (op1
) != VOIDmode
)
29229 op1
= convert_modes (mode1
, GET_MODE (op1
), op1
, true);
29231 op0
= force_reg (tmode
, op0
);
29232 op1
= force_reg (mode1
, op1
);
29234 /* OP0 is the source of these builtin functions and shouldn't be
29235 modified. Create a copy, use it and return it as target. */
29236 target
= gen_reg_rtx (tmode
);
29237 emit_move_insn (target
, op0
);
29238 ix86_expand_vector_set (true, target
, op1
, elt
);
29243 /* Expand an expression EXP that calls a built-in function,
29244 with result going to TARGET if that's convenient
29245 (and in mode MODE if that's convenient).
29246 SUBTARGET may be used as the target for computing one of EXP's operands.
29247 IGNORE is nonzero if the value is to be ignored. */
29250 ix86_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
29251 enum machine_mode mode ATTRIBUTE_UNUSED
,
29252 int ignore ATTRIBUTE_UNUSED
)
29254 const struct builtin_description
*d
;
29256 enum insn_code icode
;
29257 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
29258 tree arg0
, arg1
, arg2
, arg3
, arg4
;
29259 rtx op0
, op1
, op2
, op3
, op4
, pat
;
29260 enum machine_mode mode0
, mode1
, mode2
, mode3
, mode4
;
29261 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
29263 /* Determine whether the builtin function is available under the current ISA.
29264 Originally the builtin was not created if it wasn't applicable to the
29265 current ISA based on the command line switches. With function specific
29266 options, we need to check in the context of the function making the call
29267 whether it is supported. */
29268 if (ix86_builtins_isa
[fcode
].isa
29269 && !(ix86_builtins_isa
[fcode
].isa
& ix86_isa_flags
))
29271 char *opts
= ix86_target_string (ix86_builtins_isa
[fcode
].isa
, 0, NULL
,
29272 NULL
, (enum fpmath_unit
) 0, false);
29275 error ("%qE needs unknown isa option", fndecl
);
29278 gcc_assert (opts
!= NULL
);
29279 error ("%qE needs isa option %s", fndecl
, opts
);
29287 case IX86_BUILTIN_MASKMOVQ
:
29288 case IX86_BUILTIN_MASKMOVDQU
:
29289 icode
= (fcode
== IX86_BUILTIN_MASKMOVQ
29290 ? CODE_FOR_mmx_maskmovq
29291 : CODE_FOR_sse2_maskmovdqu
);
29292 /* Note the arg order is different from the operand order. */
29293 arg1
= CALL_EXPR_ARG (exp
, 0);
29294 arg2
= CALL_EXPR_ARG (exp
, 1);
29295 arg0
= CALL_EXPR_ARG (exp
, 2);
29296 op0
= expand_normal (arg0
);
29297 op1
= expand_normal (arg1
);
29298 op2
= expand_normal (arg2
);
29299 mode0
= insn_data
[icode
].operand
[0].mode
;
29300 mode1
= insn_data
[icode
].operand
[1].mode
;
29301 mode2
= insn_data
[icode
].operand
[2].mode
;
29303 if (GET_MODE (op0
) != Pmode
)
29304 op0
= convert_to_mode (Pmode
, op0
, 1);
29305 op0
= gen_rtx_MEM (mode1
, force_reg (Pmode
, op0
));
29307 if (!insn_data
[icode
].operand
[0].predicate (op0
, mode0
))
29308 op0
= copy_to_mode_reg (mode0
, op0
);
29309 if (!insn_data
[icode
].operand
[1].predicate (op1
, mode1
))
29310 op1
= copy_to_mode_reg (mode1
, op1
);
29311 if (!insn_data
[icode
].operand
[2].predicate (op2
, mode2
))
29312 op2
= copy_to_mode_reg (mode2
, op2
);
29313 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
29319 case IX86_BUILTIN_LDMXCSR
:
29320 op0
= expand_normal (CALL_EXPR_ARG (exp
, 0));
29321 target
= assign_386_stack_local (SImode
, SLOT_VIRTUAL
);
29322 emit_move_insn (target
, op0
);
29323 emit_insn (gen_sse_ldmxcsr (target
));
29326 case IX86_BUILTIN_STMXCSR
:
29327 target
= assign_386_stack_local (SImode
, SLOT_VIRTUAL
);
29328 emit_insn (gen_sse_stmxcsr (target
));
29329 return copy_to_mode_reg (SImode
, target
);
29331 case IX86_BUILTIN_CLFLUSH
:
29332 arg0
= CALL_EXPR_ARG (exp
, 0);
29333 op0
= expand_normal (arg0
);
29334 icode
= CODE_FOR_sse2_clflush
;
29335 if (!insn_data
[icode
].operand
[0].predicate (op0
, Pmode
))
29337 if (GET_MODE (op0
) != Pmode
)
29338 op0
= convert_to_mode (Pmode
, op0
, 1);
29339 op0
= force_reg (Pmode
, op0
);
29342 emit_insn (gen_sse2_clflush (op0
));
29345 case IX86_BUILTIN_MONITOR
:
29346 arg0
= CALL_EXPR_ARG (exp
, 0);
29347 arg1
= CALL_EXPR_ARG (exp
, 1);
29348 arg2
= CALL_EXPR_ARG (exp
, 2);
29349 op0
= expand_normal (arg0
);
29350 op1
= expand_normal (arg1
);
29351 op2
= expand_normal (arg2
);
29354 if (GET_MODE (op0
) != Pmode
)
29355 op0
= convert_to_mode (Pmode
, op0
, 1);
29356 op0
= force_reg (Pmode
, op0
);
29359 op1
= copy_to_mode_reg (SImode
, op1
);
29361 op2
= copy_to_mode_reg (SImode
, op2
);
29362 emit_insn (ix86_gen_monitor (op0
, op1
, op2
));
29365 case IX86_BUILTIN_MWAIT
:
29366 arg0
= CALL_EXPR_ARG (exp
, 0);
29367 arg1
= CALL_EXPR_ARG (exp
, 1);
29368 op0
= expand_normal (arg0
);
29369 op1
= expand_normal (arg1
);
29371 op0
= copy_to_mode_reg (SImode
, op0
);
29373 op1
= copy_to_mode_reg (SImode
, op1
);
29374 emit_insn (gen_sse3_mwait (op0
, op1
));
29377 case IX86_BUILTIN_VEC_INIT_V2SI
:
29378 case IX86_BUILTIN_VEC_INIT_V4HI
:
29379 case IX86_BUILTIN_VEC_INIT_V8QI
:
29380 return ix86_expand_vec_init_builtin (TREE_TYPE (exp
), exp
, target
);
29382 case IX86_BUILTIN_VEC_EXT_V2DF
:
29383 case IX86_BUILTIN_VEC_EXT_V2DI
:
29384 case IX86_BUILTIN_VEC_EXT_V4SF
:
29385 case IX86_BUILTIN_VEC_EXT_V4SI
:
29386 case IX86_BUILTIN_VEC_EXT_V8HI
:
29387 case IX86_BUILTIN_VEC_EXT_V2SI
:
29388 case IX86_BUILTIN_VEC_EXT_V4HI
:
29389 case IX86_BUILTIN_VEC_EXT_V16QI
:
29390 return ix86_expand_vec_ext_builtin (exp
, target
);
29392 case IX86_BUILTIN_VEC_SET_V2DI
:
29393 case IX86_BUILTIN_VEC_SET_V4SF
:
29394 case IX86_BUILTIN_VEC_SET_V4SI
:
29395 case IX86_BUILTIN_VEC_SET_V8HI
:
29396 case IX86_BUILTIN_VEC_SET_V4HI
:
29397 case IX86_BUILTIN_VEC_SET_V16QI
:
29398 return ix86_expand_vec_set_builtin (exp
);
29400 case IX86_BUILTIN_INFQ
:
29401 case IX86_BUILTIN_HUGE_VALQ
:
29403 REAL_VALUE_TYPE inf
;
29407 tmp
= CONST_DOUBLE_FROM_REAL_VALUE (inf
, mode
);
29409 tmp
= validize_mem (force_const_mem (mode
, tmp
));
29412 target
= gen_reg_rtx (mode
);
29414 emit_move_insn (target
, tmp
);
29418 case IX86_BUILTIN_LLWPCB
:
29419 arg0
= CALL_EXPR_ARG (exp
, 0);
29420 op0
= expand_normal (arg0
);
29421 icode
= CODE_FOR_lwp_llwpcb
;
29422 if (!insn_data
[icode
].operand
[0].predicate (op0
, Pmode
))
29424 if (GET_MODE (op0
) != Pmode
)
29425 op0
= convert_to_mode (Pmode
, op0
, 1);
29426 op0
= force_reg (Pmode
, op0
);
29428 emit_insn (gen_lwp_llwpcb (op0
));
29431 case IX86_BUILTIN_SLWPCB
:
29432 icode
= CODE_FOR_lwp_slwpcb
;
29434 || !insn_data
[icode
].operand
[0].predicate (target
, Pmode
))
29435 target
= gen_reg_rtx (Pmode
);
29436 emit_insn (gen_lwp_slwpcb (target
));
29439 case IX86_BUILTIN_BEXTRI32
:
29440 case IX86_BUILTIN_BEXTRI64
:
29441 arg0
= CALL_EXPR_ARG (exp
, 0);
29442 arg1
= CALL_EXPR_ARG (exp
, 1);
29443 op0
= expand_normal (arg0
);
29444 op1
= expand_normal (arg1
);
29445 icode
= (fcode
== IX86_BUILTIN_BEXTRI32
29446 ? CODE_FOR_tbm_bextri_si
29447 : CODE_FOR_tbm_bextri_di
);
29448 if (!CONST_INT_P (op1
))
29450 error ("last argument must be an immediate");
29455 unsigned char length
= (INTVAL (op1
) >> 8) & 0xFF;
29456 unsigned char lsb_index
= INTVAL (op1
) & 0xFF;
29457 op1
= GEN_INT (length
);
29458 op2
= GEN_INT (lsb_index
);
29459 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
29465 case IX86_BUILTIN_RDRAND16_STEP
:
29466 icode
= CODE_FOR_rdrandhi_1
;
29470 case IX86_BUILTIN_RDRAND32_STEP
:
29471 icode
= CODE_FOR_rdrandsi_1
;
29475 case IX86_BUILTIN_RDRAND64_STEP
:
29476 icode
= CODE_FOR_rdranddi_1
;
29480 op0
= gen_reg_rtx (mode0
);
29481 emit_insn (GEN_FCN (icode
) (op0
));
29483 arg0
= CALL_EXPR_ARG (exp
, 0);
29484 op1
= expand_normal (arg0
);
29485 if (!address_operand (op1
, VOIDmode
))
29487 op1
= convert_memory_address (Pmode
, op1
);
29488 op1
= copy_addr_to_reg (op1
);
29490 emit_move_insn (gen_rtx_MEM (mode0
, op1
), op0
);
29492 op1
= gen_reg_rtx (SImode
);
29493 emit_move_insn (op1
, CONST1_RTX (SImode
));
29495 /* Emit SImode conditional move. */
29496 if (mode0
== HImode
)
29498 op2
= gen_reg_rtx (SImode
);
29499 emit_insn (gen_zero_extendhisi2 (op2
, op0
));
29501 else if (mode0
== SImode
)
29504 op2
= gen_rtx_SUBREG (SImode
, op0
, 0);
29507 target
= gen_reg_rtx (SImode
);
29509 pat
= gen_rtx_GEU (VOIDmode
, gen_rtx_REG (CCCmode
, FLAGS_REG
),
29511 emit_insn (gen_rtx_SET (VOIDmode
, target
,
29512 gen_rtx_IF_THEN_ELSE (SImode
, pat
, op2
, op1
)));
29515 case IX86_BUILTIN_GATHERSIV2DF
:
29516 icode
= CODE_FOR_avx2_gathersiv2df
;
29518 case IX86_BUILTIN_GATHERSIV4DF
:
29519 icode
= CODE_FOR_avx2_gathersiv4df
;
29521 case IX86_BUILTIN_GATHERDIV2DF
:
29522 icode
= CODE_FOR_avx2_gatherdiv2df
;
29524 case IX86_BUILTIN_GATHERDIV4DF
:
29525 icode
= CODE_FOR_avx2_gatherdiv4df
;
29527 case IX86_BUILTIN_GATHERSIV4SF
:
29528 icode
= CODE_FOR_avx2_gathersiv4sf
;
29530 case IX86_BUILTIN_GATHERSIV8SF
:
29531 icode
= CODE_FOR_avx2_gathersiv8sf
;
29533 case IX86_BUILTIN_GATHERDIV4SF
:
29534 icode
= CODE_FOR_avx2_gatherdiv4sf
;
29536 case IX86_BUILTIN_GATHERDIV8SF
:
29537 icode
= CODE_FOR_avx2_gatherdiv8sf
;
29539 case IX86_BUILTIN_GATHERSIV2DI
:
29540 icode
= CODE_FOR_avx2_gathersiv2di
;
29542 case IX86_BUILTIN_GATHERSIV4DI
:
29543 icode
= CODE_FOR_avx2_gathersiv4di
;
29545 case IX86_BUILTIN_GATHERDIV2DI
:
29546 icode
= CODE_FOR_avx2_gatherdiv2di
;
29548 case IX86_BUILTIN_GATHERDIV4DI
:
29549 icode
= CODE_FOR_avx2_gatherdiv4di
;
29551 case IX86_BUILTIN_GATHERSIV4SI
:
29552 icode
= CODE_FOR_avx2_gathersiv4si
;
29554 case IX86_BUILTIN_GATHERSIV8SI
:
29555 icode
= CODE_FOR_avx2_gathersiv8si
;
29557 case IX86_BUILTIN_GATHERDIV4SI
:
29558 icode
= CODE_FOR_avx2_gatherdiv4si
;
29560 case IX86_BUILTIN_GATHERDIV8SI
:
29561 icode
= CODE_FOR_avx2_gatherdiv8si
;
29563 case IX86_BUILTIN_GATHERALTSIV4DF
:
29564 icode
= CODE_FOR_avx2_gathersiv4df
;
29566 case IX86_BUILTIN_GATHERALTDIV8SF
:
29567 icode
= CODE_FOR_avx2_gatherdiv8sf
;
29569 case IX86_BUILTIN_GATHERALTSIV4DI
:
29570 icode
= CODE_FOR_avx2_gathersiv4di
;
29572 case IX86_BUILTIN_GATHERALTDIV8SI
:
29573 icode
= CODE_FOR_avx2_gatherdiv8si
;
29577 arg0
= CALL_EXPR_ARG (exp
, 0);
29578 arg1
= CALL_EXPR_ARG (exp
, 1);
29579 arg2
= CALL_EXPR_ARG (exp
, 2);
29580 arg3
= CALL_EXPR_ARG (exp
, 3);
29581 arg4
= CALL_EXPR_ARG (exp
, 4);
29582 op0
= expand_normal (arg0
);
29583 op1
= expand_normal (arg1
);
29584 op2
= expand_normal (arg2
);
29585 op3
= expand_normal (arg3
);
29586 op4
= expand_normal (arg4
);
29587 /* Note the arg order is different from the operand order. */
29588 mode0
= insn_data
[icode
].operand
[1].mode
;
29589 mode2
= insn_data
[icode
].operand
[3].mode
;
29590 mode3
= insn_data
[icode
].operand
[4].mode
;
29591 mode4
= insn_data
[icode
].operand
[5].mode
;
29593 if (target
== NULL_RTX
29594 || GET_MODE (target
) != insn_data
[icode
].operand
[0].mode
)
29595 subtarget
= gen_reg_rtx (insn_data
[icode
].operand
[0].mode
);
29597 subtarget
= target
;
29599 if (fcode
== IX86_BUILTIN_GATHERALTSIV4DF
29600 || fcode
== IX86_BUILTIN_GATHERALTSIV4DI
)
29602 rtx half
= gen_reg_rtx (V4SImode
);
29603 if (!nonimmediate_operand (op2
, V8SImode
))
29604 op2
= copy_to_mode_reg (V8SImode
, op2
);
29605 emit_insn (gen_vec_extract_lo_v8si (half
, op2
));
29608 else if (fcode
== IX86_BUILTIN_GATHERALTDIV8SF
29609 || fcode
== IX86_BUILTIN_GATHERALTDIV8SI
)
29611 rtx (*gen
) (rtx
, rtx
);
29612 rtx half
= gen_reg_rtx (mode0
);
29613 if (mode0
== V4SFmode
)
29614 gen
= gen_vec_extract_lo_v8sf
;
29616 gen
= gen_vec_extract_lo_v8si
;
29617 if (!nonimmediate_operand (op0
, GET_MODE (op0
)))
29618 op0
= copy_to_mode_reg (GET_MODE (op0
), op0
);
29619 emit_insn (gen (half
, op0
));
29621 if (!nonimmediate_operand (op3
, GET_MODE (op3
)))
29622 op3
= copy_to_mode_reg (GET_MODE (op3
), op3
);
29623 emit_insn (gen (half
, op3
));
29627 /* Force memory operand only with base register here. But we
29628 don't want to do it on memory operand for other builtin
29630 if (GET_MODE (op1
) != Pmode
)
29631 op1
= convert_to_mode (Pmode
, op1
, 1);
29632 op1
= force_reg (Pmode
, op1
);
29634 if (!insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
29635 op0
= copy_to_mode_reg (mode0
, op0
);
29636 if (!insn_data
[icode
].operand
[2].predicate (op1
, Pmode
))
29637 op1
= copy_to_mode_reg (Pmode
, op1
);
29638 if (!insn_data
[icode
].operand
[3].predicate (op2
, mode2
))
29639 op2
= copy_to_mode_reg (mode2
, op2
);
29640 if (!insn_data
[icode
].operand
[4].predicate (op3
, mode3
))
29641 op3
= copy_to_mode_reg (mode3
, op3
);
29642 if (!insn_data
[icode
].operand
[5].predicate (op4
, mode4
))
29644 error ("last argument must be scale 1, 2, 4, 8");
29648 /* Optimize. If mask is known to have all high bits set,
29649 replace op0 with pc_rtx to signal that the instruction
29650 overwrites the whole destination and doesn't use its
29651 previous contents. */
29654 if (TREE_CODE (arg3
) == VECTOR_CST
)
29656 unsigned int negative
= 0;
29657 for (i
= 0; i
< VECTOR_CST_NELTS (arg3
); ++i
)
29659 tree cst
= VECTOR_CST_ELT (arg3
, i
);
29660 if (TREE_CODE (cst
) == INTEGER_CST
29661 && tree_int_cst_sign_bit (cst
))
29663 else if (TREE_CODE (cst
) == REAL_CST
29664 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst
)))
29667 if (negative
== TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3
)))
29670 else if (TREE_CODE (arg3
) == SSA_NAME
)
29672 /* Recognize also when mask is like:
29673 __v2df src = _mm_setzero_pd ();
29674 __v2df mask = _mm_cmpeq_pd (src, src);
29676 __v8sf src = _mm256_setzero_ps ();
29677 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
29678 as that is a cheaper way to load all ones into
29679 a register than having to load a constant from
29681 gimple def_stmt
= SSA_NAME_DEF_STMT (arg3
);
29682 if (is_gimple_call (def_stmt
))
29684 tree fndecl
= gimple_call_fndecl (def_stmt
);
29686 && DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_MD
)
29687 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl
))
29689 case IX86_BUILTIN_CMPPD
:
29690 case IX86_BUILTIN_CMPPS
:
29691 case IX86_BUILTIN_CMPPD256
:
29692 case IX86_BUILTIN_CMPPS256
:
29693 if (!integer_zerop (gimple_call_arg (def_stmt
, 2)))
29696 case IX86_BUILTIN_CMPEQPD
:
29697 case IX86_BUILTIN_CMPEQPS
:
29698 if (initializer_zerop (gimple_call_arg (def_stmt
, 0))
29699 && initializer_zerop (gimple_call_arg (def_stmt
,
29710 pat
= GEN_FCN (icode
) (subtarget
, op0
, op1
, op2
, op3
, op4
);
29715 if (fcode
== IX86_BUILTIN_GATHERDIV8SF
29716 || fcode
== IX86_BUILTIN_GATHERDIV8SI
)
29718 enum machine_mode tmode
= GET_MODE (subtarget
) == V8SFmode
29719 ? V4SFmode
: V4SImode
;
29720 if (target
== NULL_RTX
)
29721 target
= gen_reg_rtx (tmode
);
29722 if (tmode
== V4SFmode
)
29723 emit_insn (gen_vec_extract_lo_v8sf (target
, subtarget
));
29725 emit_insn (gen_vec_extract_lo_v8si (target
, subtarget
));
29728 target
= subtarget
;
29732 case IX86_BUILTIN_XABORT
:
29733 icode
= CODE_FOR_xabort
;
29734 arg0
= CALL_EXPR_ARG (exp
, 0);
29735 op0
= expand_normal (arg0
);
29736 mode0
= insn_data
[icode
].operand
[0].mode
;
29737 if (!insn_data
[icode
].operand
[0].predicate (op0
, mode0
))
29739 error ("the xabort's argument must be an 8-bit immediate");
29742 emit_insn (gen_xabort (op0
));
29749 for (i
= 0, d
= bdesc_special_args
;
29750 i
< ARRAY_SIZE (bdesc_special_args
);
29752 if (d
->code
== fcode
)
29753 return ix86_expand_special_args_builtin (d
, exp
, target
);
29755 for (i
= 0, d
= bdesc_args
;
29756 i
< ARRAY_SIZE (bdesc_args
);
29758 if (d
->code
== fcode
)
29761 case IX86_BUILTIN_FABSQ
:
29762 case IX86_BUILTIN_COPYSIGNQ
:
29764 /* Emit a normal call if SSE2 isn't available. */
29765 return expand_call (exp
, target
, ignore
);
29767 return ix86_expand_args_builtin (d
, exp
, target
);
29770 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
29771 if (d
->code
== fcode
)
29772 return ix86_expand_sse_comi (d
, exp
, target
);
29774 for (i
= 0, d
= bdesc_pcmpestr
;
29775 i
< ARRAY_SIZE (bdesc_pcmpestr
);
29777 if (d
->code
== fcode
)
29778 return ix86_expand_sse_pcmpestr (d
, exp
, target
);
29780 for (i
= 0, d
= bdesc_pcmpistr
;
29781 i
< ARRAY_SIZE (bdesc_pcmpistr
);
29783 if (d
->code
== fcode
)
29784 return ix86_expand_sse_pcmpistr (d
, exp
, target
);
29786 for (i
= 0, d
= bdesc_multi_arg
; i
< ARRAY_SIZE (bdesc_multi_arg
); i
++, d
++)
29787 if (d
->code
== fcode
)
29788 return ix86_expand_multi_arg_builtin (d
->icode
, exp
, target
,
29789 (enum ix86_builtin_func_type
)
29790 d
->flag
, d
->comparison
);
29792 gcc_unreachable ();
29795 /* Returns a function decl for a vectorized version of the builtin function
29796 with builtin function code FN and the result vector type TYPE, or NULL_TREE
29797 if it is not available. */
29800 ix86_builtin_vectorized_function (tree fndecl
, tree type_out
,
29803 enum machine_mode in_mode
, out_mode
;
29805 enum built_in_function fn
= DECL_FUNCTION_CODE (fndecl
);
29807 if (TREE_CODE (type_out
) != VECTOR_TYPE
29808 || TREE_CODE (type_in
) != VECTOR_TYPE
29809 || DECL_BUILT_IN_CLASS (fndecl
) != BUILT_IN_NORMAL
)
29812 out_mode
= TYPE_MODE (TREE_TYPE (type_out
));
29813 out_n
= TYPE_VECTOR_SUBPARTS (type_out
);
29814 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
29815 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
29819 case BUILT_IN_SQRT
:
29820 if (out_mode
== DFmode
&& in_mode
== DFmode
)
29822 if (out_n
== 2 && in_n
== 2)
29823 return ix86_builtins
[IX86_BUILTIN_SQRTPD
];
29824 else if (out_n
== 4 && in_n
== 4)
29825 return ix86_builtins
[IX86_BUILTIN_SQRTPD256
];
29829 case BUILT_IN_SQRTF
:
29830 if (out_mode
== SFmode
&& in_mode
== SFmode
)
29832 if (out_n
== 4 && in_n
== 4)
29833 return ix86_builtins
[IX86_BUILTIN_SQRTPS_NR
];
29834 else if (out_n
== 8 && in_n
== 8)
29835 return ix86_builtins
[IX86_BUILTIN_SQRTPS_NR256
];
29839 case BUILT_IN_IFLOOR
:
29840 case BUILT_IN_LFLOOR
:
29841 case BUILT_IN_LLFLOOR
:
29842 /* The round insn does not trap on denormals. */
29843 if (flag_trapping_math
|| !TARGET_ROUND
)
29846 if (out_mode
== SImode
&& in_mode
== DFmode
)
29848 if (out_n
== 4 && in_n
== 2)
29849 return ix86_builtins
[IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX
];
29850 else if (out_n
== 8 && in_n
== 4)
29851 return ix86_builtins
[IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256
];
29855 case BUILT_IN_IFLOORF
:
29856 case BUILT_IN_LFLOORF
:
29857 case BUILT_IN_LLFLOORF
:
29858 /* The round insn does not trap on denormals. */
29859 if (flag_trapping_math
|| !TARGET_ROUND
)
29862 if (out_mode
== SImode
&& in_mode
== SFmode
)
29864 if (out_n
== 4 && in_n
== 4)
29865 return ix86_builtins
[IX86_BUILTIN_FLOORPS_SFIX
];
29866 else if (out_n
== 8 && in_n
== 8)
29867 return ix86_builtins
[IX86_BUILTIN_FLOORPS_SFIX256
];
29871 case BUILT_IN_ICEIL
:
29872 case BUILT_IN_LCEIL
:
29873 case BUILT_IN_LLCEIL
:
29874 /* The round insn does not trap on denormals. */
29875 if (flag_trapping_math
|| !TARGET_ROUND
)
29878 if (out_mode
== SImode
&& in_mode
== DFmode
)
29880 if (out_n
== 4 && in_n
== 2)
29881 return ix86_builtins
[IX86_BUILTIN_CEILPD_VEC_PACK_SFIX
];
29882 else if (out_n
== 8 && in_n
== 4)
29883 return ix86_builtins
[IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256
];
29887 case BUILT_IN_ICEILF
:
29888 case BUILT_IN_LCEILF
:
29889 case BUILT_IN_LLCEILF
:
29890 /* The round insn does not trap on denormals. */
29891 if (flag_trapping_math
|| !TARGET_ROUND
)
29894 if (out_mode
== SImode
&& in_mode
== SFmode
)
29896 if (out_n
== 4 && in_n
== 4)
29897 return ix86_builtins
[IX86_BUILTIN_CEILPS_SFIX
];
29898 else if (out_n
== 8 && in_n
== 8)
29899 return ix86_builtins
[IX86_BUILTIN_CEILPS_SFIX256
];
29903 case BUILT_IN_IRINT
:
29904 case BUILT_IN_LRINT
:
29905 case BUILT_IN_LLRINT
:
29906 if (out_mode
== SImode
&& in_mode
== DFmode
)
29908 if (out_n
== 4 && in_n
== 2)
29909 return ix86_builtins
[IX86_BUILTIN_VEC_PACK_SFIX
];
29910 else if (out_n
== 8 && in_n
== 4)
29911 return ix86_builtins
[IX86_BUILTIN_VEC_PACK_SFIX256
];
29915 case BUILT_IN_IRINTF
:
29916 case BUILT_IN_LRINTF
:
29917 case BUILT_IN_LLRINTF
:
29918 if (out_mode
== SImode
&& in_mode
== SFmode
)
29920 if (out_n
== 4 && in_n
== 4)
29921 return ix86_builtins
[IX86_BUILTIN_CVTPS2DQ
];
29922 else if (out_n
== 8 && in_n
== 8)
29923 return ix86_builtins
[IX86_BUILTIN_CVTPS2DQ256
];
29927 case BUILT_IN_IROUND
:
29928 case BUILT_IN_LROUND
:
29929 case BUILT_IN_LLROUND
:
29930 /* The round insn does not trap on denormals. */
29931 if (flag_trapping_math
|| !TARGET_ROUND
)
29934 if (out_mode
== SImode
&& in_mode
== DFmode
)
29936 if (out_n
== 4 && in_n
== 2)
29937 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX
];
29938 else if (out_n
== 8 && in_n
== 4)
29939 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256
];
29943 case BUILT_IN_IROUNDF
:
29944 case BUILT_IN_LROUNDF
:
29945 case BUILT_IN_LLROUNDF
:
29946 /* The round insn does not trap on denormals. */
29947 if (flag_trapping_math
|| !TARGET_ROUND
)
29950 if (out_mode
== SImode
&& in_mode
== SFmode
)
29952 if (out_n
== 4 && in_n
== 4)
29953 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ_SFIX
];
29954 else if (out_n
== 8 && in_n
== 8)
29955 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ_SFIX256
];
29959 case BUILT_IN_COPYSIGN
:
29960 if (out_mode
== DFmode
&& in_mode
== DFmode
)
29962 if (out_n
== 2 && in_n
== 2)
29963 return ix86_builtins
[IX86_BUILTIN_CPYSGNPD
];
29964 else if (out_n
== 4 && in_n
== 4)
29965 return ix86_builtins
[IX86_BUILTIN_CPYSGNPD256
];
29969 case BUILT_IN_COPYSIGNF
:
29970 if (out_mode
== SFmode
&& in_mode
== SFmode
)
29972 if (out_n
== 4 && in_n
== 4)
29973 return ix86_builtins
[IX86_BUILTIN_CPYSGNPS
];
29974 else if (out_n
== 8 && in_n
== 8)
29975 return ix86_builtins
[IX86_BUILTIN_CPYSGNPS256
];
29979 case BUILT_IN_FLOOR
:
29980 /* The round insn does not trap on denormals. */
29981 if (flag_trapping_math
|| !TARGET_ROUND
)
29984 if (out_mode
== DFmode
&& in_mode
== DFmode
)
29986 if (out_n
== 2 && in_n
== 2)
29987 return ix86_builtins
[IX86_BUILTIN_FLOORPD
];
29988 else if (out_n
== 4 && in_n
== 4)
29989 return ix86_builtins
[IX86_BUILTIN_FLOORPD256
];
29993 case BUILT_IN_FLOORF
:
29994 /* The round insn does not trap on denormals. */
29995 if (flag_trapping_math
|| !TARGET_ROUND
)
29998 if (out_mode
== SFmode
&& in_mode
== SFmode
)
30000 if (out_n
== 4 && in_n
== 4)
30001 return ix86_builtins
[IX86_BUILTIN_FLOORPS
];
30002 else if (out_n
== 8 && in_n
== 8)
30003 return ix86_builtins
[IX86_BUILTIN_FLOORPS256
];
30007 case BUILT_IN_CEIL
:
30008 /* The round insn does not trap on denormals. */
30009 if (flag_trapping_math
|| !TARGET_ROUND
)
30012 if (out_mode
== DFmode
&& in_mode
== DFmode
)
30014 if (out_n
== 2 && in_n
== 2)
30015 return ix86_builtins
[IX86_BUILTIN_CEILPD
];
30016 else if (out_n
== 4 && in_n
== 4)
30017 return ix86_builtins
[IX86_BUILTIN_CEILPD256
];
30021 case BUILT_IN_CEILF
:
30022 /* The round insn does not trap on denormals. */
30023 if (flag_trapping_math
|| !TARGET_ROUND
)
30026 if (out_mode
== SFmode
&& in_mode
== SFmode
)
30028 if (out_n
== 4 && in_n
== 4)
30029 return ix86_builtins
[IX86_BUILTIN_CEILPS
];
30030 else if (out_n
== 8 && in_n
== 8)
30031 return ix86_builtins
[IX86_BUILTIN_CEILPS256
];
30035 case BUILT_IN_TRUNC
:
30036 /* The round insn does not trap on denormals. */
30037 if (flag_trapping_math
|| !TARGET_ROUND
)
30040 if (out_mode
== DFmode
&& in_mode
== DFmode
)
30042 if (out_n
== 2 && in_n
== 2)
30043 return ix86_builtins
[IX86_BUILTIN_TRUNCPD
];
30044 else if (out_n
== 4 && in_n
== 4)
30045 return ix86_builtins
[IX86_BUILTIN_TRUNCPD256
];
30049 case BUILT_IN_TRUNCF
:
30050 /* The round insn does not trap on denormals. */
30051 if (flag_trapping_math
|| !TARGET_ROUND
)
30054 if (out_mode
== SFmode
&& in_mode
== SFmode
)
30056 if (out_n
== 4 && in_n
== 4)
30057 return ix86_builtins
[IX86_BUILTIN_TRUNCPS
];
30058 else if (out_n
== 8 && in_n
== 8)
30059 return ix86_builtins
[IX86_BUILTIN_TRUNCPS256
];
30063 case BUILT_IN_RINT
:
30064 /* The round insn does not trap on denormals. */
30065 if (flag_trapping_math
|| !TARGET_ROUND
)
30068 if (out_mode
== DFmode
&& in_mode
== DFmode
)
30070 if (out_n
== 2 && in_n
== 2)
30071 return ix86_builtins
[IX86_BUILTIN_RINTPD
];
30072 else if (out_n
== 4 && in_n
== 4)
30073 return ix86_builtins
[IX86_BUILTIN_RINTPD256
];
30077 case BUILT_IN_RINTF
:
30078 /* The round insn does not trap on denormals. */
30079 if (flag_trapping_math
|| !TARGET_ROUND
)
30082 if (out_mode
== SFmode
&& in_mode
== SFmode
)
30084 if (out_n
== 4 && in_n
== 4)
30085 return ix86_builtins
[IX86_BUILTIN_RINTPS
];
30086 else if (out_n
== 8 && in_n
== 8)
30087 return ix86_builtins
[IX86_BUILTIN_RINTPS256
];
30091 case BUILT_IN_ROUND
:
30092 /* The round insn does not trap on denormals. */
30093 if (flag_trapping_math
|| !TARGET_ROUND
)
30096 if (out_mode
== DFmode
&& in_mode
== DFmode
)
30098 if (out_n
== 2 && in_n
== 2)
30099 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ
];
30100 else if (out_n
== 4 && in_n
== 4)
30101 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ256
];
30105 case BUILT_IN_ROUNDF
:
30106 /* The round insn does not trap on denormals. */
30107 if (flag_trapping_math
|| !TARGET_ROUND
)
30110 if (out_mode
== SFmode
&& in_mode
== SFmode
)
30112 if (out_n
== 4 && in_n
== 4)
30113 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ
];
30114 else if (out_n
== 8 && in_n
== 8)
30115 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ256
];
30120 if (out_mode
== DFmode
&& in_mode
== DFmode
)
30122 if (out_n
== 2 && in_n
== 2)
30123 return ix86_builtins
[IX86_BUILTIN_VFMADDPD
];
30124 if (out_n
== 4 && in_n
== 4)
30125 return ix86_builtins
[IX86_BUILTIN_VFMADDPD256
];
30129 case BUILT_IN_FMAF
:
30130 if (out_mode
== SFmode
&& in_mode
== SFmode
)
30132 if (out_n
== 4 && in_n
== 4)
30133 return ix86_builtins
[IX86_BUILTIN_VFMADDPS
];
30134 if (out_n
== 8 && in_n
== 8)
30135 return ix86_builtins
[IX86_BUILTIN_VFMADDPS256
];
30143 /* Dispatch to a handler for a vectorization library. */
30144 if (ix86_veclib_handler
)
30145 return ix86_veclib_handler ((enum built_in_function
) fn
, type_out
,
30151 /* Handler for an SVML-style interface to
30152 a library with vectorized intrinsics. */
30155 ix86_veclibabi_svml (enum built_in_function fn
, tree type_out
, tree type_in
)
30158 tree fntype
, new_fndecl
, args
;
30161 enum machine_mode el_mode
, in_mode
;
30164 /* The SVML is suitable for unsafe math only. */
30165 if (!flag_unsafe_math_optimizations
)
30168 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
30169 n
= TYPE_VECTOR_SUBPARTS (type_out
);
30170 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
30171 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
30172 if (el_mode
!= in_mode
30180 case BUILT_IN_LOG10
:
30182 case BUILT_IN_TANH
:
30184 case BUILT_IN_ATAN
:
30185 case BUILT_IN_ATAN2
:
30186 case BUILT_IN_ATANH
:
30187 case BUILT_IN_CBRT
:
30188 case BUILT_IN_SINH
:
30190 case BUILT_IN_ASINH
:
30191 case BUILT_IN_ASIN
:
30192 case BUILT_IN_COSH
:
30194 case BUILT_IN_ACOSH
:
30195 case BUILT_IN_ACOS
:
30196 if (el_mode
!= DFmode
|| n
!= 2)
30200 case BUILT_IN_EXPF
:
30201 case BUILT_IN_LOGF
:
30202 case BUILT_IN_LOG10F
:
30203 case BUILT_IN_POWF
:
30204 case BUILT_IN_TANHF
:
30205 case BUILT_IN_TANF
:
30206 case BUILT_IN_ATANF
:
30207 case BUILT_IN_ATAN2F
:
30208 case BUILT_IN_ATANHF
:
30209 case BUILT_IN_CBRTF
:
30210 case BUILT_IN_SINHF
:
30211 case BUILT_IN_SINF
:
30212 case BUILT_IN_ASINHF
:
30213 case BUILT_IN_ASINF
:
30214 case BUILT_IN_COSHF
:
30215 case BUILT_IN_COSF
:
30216 case BUILT_IN_ACOSHF
:
30217 case BUILT_IN_ACOSF
:
30218 if (el_mode
!= SFmode
|| n
!= 4)
30226 bname
= IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn
)));
30228 if (fn
== BUILT_IN_LOGF
)
30229 strcpy (name
, "vmlsLn4");
30230 else if (fn
== BUILT_IN_LOG
)
30231 strcpy (name
, "vmldLn2");
30234 sprintf (name
, "vmls%s", bname
+10);
30235 name
[strlen (name
)-1] = '4';
30238 sprintf (name
, "vmld%s2", bname
+10);
30240 /* Convert to uppercase. */
30244 for (args
= DECL_ARGUMENTS (builtin_decl_implicit (fn
));
30246 args
= TREE_CHAIN (args
))
30250 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
30252 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
30254 /* Build a function declaration for the vectorized function. */
30255 new_fndecl
= build_decl (BUILTINS_LOCATION
,
30256 FUNCTION_DECL
, get_identifier (name
), fntype
);
30257 TREE_PUBLIC (new_fndecl
) = 1;
30258 DECL_EXTERNAL (new_fndecl
) = 1;
30259 DECL_IS_NOVOPS (new_fndecl
) = 1;
30260 TREE_READONLY (new_fndecl
) = 1;
30265 /* Handler for an ACML-style interface to
30266 a library with vectorized intrinsics. */
30269 ix86_veclibabi_acml (enum built_in_function fn
, tree type_out
, tree type_in
)
30271 char name
[20] = "__vr.._";
30272 tree fntype
, new_fndecl
, args
;
30275 enum machine_mode el_mode
, in_mode
;
30278 /* The ACML is 64bits only and suitable for unsafe math only as
30279 it does not correctly support parts of IEEE with the required
30280 precision such as denormals. */
30282 || !flag_unsafe_math_optimizations
)
30285 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
30286 n
= TYPE_VECTOR_SUBPARTS (type_out
);
30287 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
30288 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
30289 if (el_mode
!= in_mode
30299 case BUILT_IN_LOG2
:
30300 case BUILT_IN_LOG10
:
30303 if (el_mode
!= DFmode
30308 case BUILT_IN_SINF
:
30309 case BUILT_IN_COSF
:
30310 case BUILT_IN_EXPF
:
30311 case BUILT_IN_POWF
:
30312 case BUILT_IN_LOGF
:
30313 case BUILT_IN_LOG2F
:
30314 case BUILT_IN_LOG10F
:
30317 if (el_mode
!= SFmode
30326 bname
= IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn
)));
30327 sprintf (name
+ 7, "%s", bname
+10);
30330 for (args
= DECL_ARGUMENTS (builtin_decl_implicit (fn
));
30332 args
= TREE_CHAIN (args
))
30336 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
30338 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
30340 /* Build a function declaration for the vectorized function. */
30341 new_fndecl
= build_decl (BUILTINS_LOCATION
,
30342 FUNCTION_DECL
, get_identifier (name
), fntype
);
30343 TREE_PUBLIC (new_fndecl
) = 1;
30344 DECL_EXTERNAL (new_fndecl
) = 1;
30345 DECL_IS_NOVOPS (new_fndecl
) = 1;
30346 TREE_READONLY (new_fndecl
) = 1;
30351 /* Returns a decl of a function that implements gather load with
30352 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
30353 Return NULL_TREE if it is not available. */
30356 ix86_vectorize_builtin_gather (const_tree mem_vectype
,
30357 const_tree index_type
, int scale
)
30360 enum ix86_builtins code
;
30365 if ((TREE_CODE (index_type
) != INTEGER_TYPE
30366 && !POINTER_TYPE_P (index_type
))
30367 || (TYPE_MODE (index_type
) != SImode
30368 && TYPE_MODE (index_type
) != DImode
))
30371 if (TYPE_PRECISION (index_type
) > POINTER_SIZE
)
30374 /* v*gather* insn sign extends index to pointer mode. */
30375 if (TYPE_PRECISION (index_type
) < POINTER_SIZE
30376 && TYPE_UNSIGNED (index_type
))
30381 || (scale
& (scale
- 1)) != 0)
30384 si
= TYPE_MODE (index_type
) == SImode
;
30385 switch (TYPE_MODE (mem_vectype
))
30388 code
= si
? IX86_BUILTIN_GATHERSIV2DF
: IX86_BUILTIN_GATHERDIV2DF
;
30391 code
= si
? IX86_BUILTIN_GATHERALTSIV4DF
: IX86_BUILTIN_GATHERDIV4DF
;
30394 code
= si
? IX86_BUILTIN_GATHERSIV2DI
: IX86_BUILTIN_GATHERDIV2DI
;
30397 code
= si
? IX86_BUILTIN_GATHERALTSIV4DI
: IX86_BUILTIN_GATHERDIV4DI
;
30400 code
= si
? IX86_BUILTIN_GATHERSIV4SF
: IX86_BUILTIN_GATHERDIV4SF
;
30403 code
= si
? IX86_BUILTIN_GATHERSIV8SF
: IX86_BUILTIN_GATHERALTDIV8SF
;
30406 code
= si
? IX86_BUILTIN_GATHERSIV4SI
: IX86_BUILTIN_GATHERDIV4SI
;
30409 code
= si
? IX86_BUILTIN_GATHERSIV8SI
: IX86_BUILTIN_GATHERALTDIV8SI
;
30415 return ix86_builtins
[code
];
30418 /* Returns a code for a target-specific builtin that implements
30419 reciprocal of the function, or NULL_TREE if not available. */
30422 ix86_builtin_reciprocal (unsigned int fn
, bool md_fn
,
30423 bool sqrt ATTRIBUTE_UNUSED
)
30425 if (! (TARGET_SSE_MATH
&& !optimize_insn_for_size_p ()
30426 && flag_finite_math_only
&& !flag_trapping_math
30427 && flag_unsafe_math_optimizations
))
30431 /* Machine dependent builtins. */
30434 /* Vectorized version of sqrt to rsqrt conversion. */
30435 case IX86_BUILTIN_SQRTPS_NR
:
30436 return ix86_builtins
[IX86_BUILTIN_RSQRTPS_NR
];
30438 case IX86_BUILTIN_SQRTPS_NR256
:
30439 return ix86_builtins
[IX86_BUILTIN_RSQRTPS_NR256
];
30445 /* Normal builtins. */
30448 /* Sqrt to rsqrt conversion. */
30449 case BUILT_IN_SQRTF
:
30450 return ix86_builtins
[IX86_BUILTIN_RSQRTF
];
30457 /* Helper for avx_vpermilps256_operand et al. This is also used by
30458 the expansion functions to turn the parallel back into a mask.
30459 The return value is 0 for no match and the imm8+1 for a match. */
30462 avx_vpermilp_parallel (rtx par
, enum machine_mode mode
)
30464 unsigned i
, nelt
= GET_MODE_NUNITS (mode
);
30466 unsigned char ipar
[8];
30468 if (XVECLEN (par
, 0) != (int) nelt
)
30471 /* Validate that all of the elements are constants, and not totally
30472 out of range. Copy the data into an integral array to make the
30473 subsequent checks easier. */
30474 for (i
= 0; i
< nelt
; ++i
)
30476 rtx er
= XVECEXP (par
, 0, i
);
30477 unsigned HOST_WIDE_INT ei
;
30479 if (!CONST_INT_P (er
))
30490 /* In the 256-bit DFmode case, we can only move elements within
30492 for (i
= 0; i
< 2; ++i
)
30496 mask
|= ipar
[i
] << i
;
30498 for (i
= 2; i
< 4; ++i
)
30502 mask
|= (ipar
[i
] - 2) << i
;
30507 /* In the 256-bit SFmode case, we have full freedom of movement
30508 within the low 128-bit lane, but the high 128-bit lane must
30509 mirror the exact same pattern. */
30510 for (i
= 0; i
< 4; ++i
)
30511 if (ipar
[i
] + 4 != ipar
[i
+ 4])
30518 /* In the 128-bit case, we've full freedom in the placement of
30519 the elements from the source operand. */
30520 for (i
= 0; i
< nelt
; ++i
)
30521 mask
|= ipar
[i
] << (i
* (nelt
/ 2));
30525 gcc_unreachable ();
30528 /* Make sure success has a non-zero value by adding one. */
30532 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
30533 the expansion functions to turn the parallel back into a mask.
30534 The return value is 0 for no match and the imm8+1 for a match. */
30537 avx_vperm2f128_parallel (rtx par
, enum machine_mode mode
)
30539 unsigned i
, nelt
= GET_MODE_NUNITS (mode
), nelt2
= nelt
/ 2;
30541 unsigned char ipar
[8];
30543 if (XVECLEN (par
, 0) != (int) nelt
)
30546 /* Validate that all of the elements are constants, and not totally
30547 out of range. Copy the data into an integral array to make the
30548 subsequent checks easier. */
30549 for (i
= 0; i
< nelt
; ++i
)
30551 rtx er
= XVECEXP (par
, 0, i
);
30552 unsigned HOST_WIDE_INT ei
;
30554 if (!CONST_INT_P (er
))
30557 if (ei
>= 2 * nelt
)
30562 /* Validate that the halves of the permute are halves. */
30563 for (i
= 0; i
< nelt2
- 1; ++i
)
30564 if (ipar
[i
] + 1 != ipar
[i
+ 1])
30566 for (i
= nelt2
; i
< nelt
- 1; ++i
)
30567 if (ipar
[i
] + 1 != ipar
[i
+ 1])
30570 /* Reconstruct the mask. */
30571 for (i
= 0; i
< 2; ++i
)
30573 unsigned e
= ipar
[i
* nelt2
];
30577 mask
|= e
<< (i
* 4);
30580 /* Make sure success has a non-zero value by adding one. */
30584 /* Store OPERAND to the memory after reload is completed. This means
30585 that we can't easily use assign_stack_local. */
30587 ix86_force_to_memory (enum machine_mode mode
, rtx operand
)
30591 gcc_assert (reload_completed
);
30592 if (ix86_using_red_zone ())
30594 result
= gen_rtx_MEM (mode
,
30595 gen_rtx_PLUS (Pmode
,
30597 GEN_INT (-RED_ZONE_SIZE
)));
30598 emit_move_insn (result
, operand
);
30600 else if (TARGET_64BIT
)
30606 operand
= gen_lowpart (DImode
, operand
);
30610 gen_rtx_SET (VOIDmode
,
30611 gen_rtx_MEM (DImode
,
30612 gen_rtx_PRE_DEC (DImode
,
30613 stack_pointer_rtx
)),
30617 gcc_unreachable ();
30619 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
30628 split_double_mode (mode
, &operand
, 1, operands
, operands
+ 1);
30630 gen_rtx_SET (VOIDmode
,
30631 gen_rtx_MEM (SImode
,
30632 gen_rtx_PRE_DEC (Pmode
,
30633 stack_pointer_rtx
)),
30636 gen_rtx_SET (VOIDmode
,
30637 gen_rtx_MEM (SImode
,
30638 gen_rtx_PRE_DEC (Pmode
,
30639 stack_pointer_rtx
)),
30644 /* Store HImodes as SImodes. */
30645 operand
= gen_lowpart (SImode
, operand
);
30649 gen_rtx_SET (VOIDmode
,
30650 gen_rtx_MEM (GET_MODE (operand
),
30651 gen_rtx_PRE_DEC (SImode
,
30652 stack_pointer_rtx
)),
30656 gcc_unreachable ();
30658 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
30663 /* Free operand from the memory. */
30665 ix86_free_from_memory (enum machine_mode mode
)
30667 if (!ix86_using_red_zone ())
30671 if (mode
== DImode
|| TARGET_64BIT
)
30675 /* Use LEA to deallocate stack space. In peephole2 it will be converted
30676 to pop or add instruction if registers are available. */
30677 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
30678 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
30683 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
30685 Put float CONST_DOUBLE in the constant pool instead of fp regs.
30686 QImode must go into class Q_REGS.
30687 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
30688 movdf to do mem-to-mem moves through integer regs. */
30691 ix86_preferred_reload_class (rtx x
, reg_class_t regclass
)
30693 enum machine_mode mode
= GET_MODE (x
);
30695 /* We're only allowed to return a subclass of CLASS. Many of the
30696 following checks fail for NO_REGS, so eliminate that early. */
30697 if (regclass
== NO_REGS
)
30700 /* All classes can load zeros. */
30701 if (x
== CONST0_RTX (mode
))
30704 /* Force constants into memory if we are loading a (nonzero) constant into
30705 an MMX or SSE register. This is because there are no MMX/SSE instructions
30706 to load from a constant. */
30708 && (MAYBE_MMX_CLASS_P (regclass
) || MAYBE_SSE_CLASS_P (regclass
)))
30711 /* Prefer SSE regs only, if we can use them for math. */
30712 if (TARGET_SSE_MATH
&& !TARGET_MIX_SSE_I387
&& SSE_FLOAT_MODE_P (mode
))
30713 return SSE_CLASS_P (regclass
) ? regclass
: NO_REGS
;
30715 /* Floating-point constants need more complex checks. */
30716 if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) != VOIDmode
)
30718 /* General regs can load everything. */
30719 if (reg_class_subset_p (regclass
, GENERAL_REGS
))
30722 /* Floats can load 0 and 1 plus some others. Note that we eliminated
30723 zero above. We only want to wind up preferring 80387 registers if
30724 we plan on doing computation with them. */
30726 && standard_80387_constant_p (x
) > 0)
30728 /* Limit class to non-sse. */
30729 if (regclass
== FLOAT_SSE_REGS
)
30731 if (regclass
== FP_TOP_SSE_REGS
)
30733 if (regclass
== FP_SECOND_SSE_REGS
)
30734 return FP_SECOND_REG
;
30735 if (regclass
== FLOAT_INT_REGS
|| regclass
== FLOAT_REGS
)
30742 /* Generally when we see PLUS here, it's the function invariant
30743 (plus soft-fp const_int). Which can only be computed into general
30745 if (GET_CODE (x
) == PLUS
)
30746 return reg_class_subset_p (regclass
, GENERAL_REGS
) ? regclass
: NO_REGS
;
30748 /* QImode constants are easy to load, but non-constant QImode data
30749 must go into Q_REGS. */
30750 if (GET_MODE (x
) == QImode
&& !CONSTANT_P (x
))
30752 if (reg_class_subset_p (regclass
, Q_REGS
))
30754 if (reg_class_subset_p (Q_REGS
, regclass
))
30762 /* Discourage putting floating-point values in SSE registers unless
30763 SSE math is being used, and likewise for the 387 registers. */
30765 ix86_preferred_output_reload_class (rtx x
, reg_class_t regclass
)
30767 enum machine_mode mode
= GET_MODE (x
);
30769 /* Restrict the output reload class to the register bank that we are doing
30770 math on. If we would like not to return a subset of CLASS, reject this
30771 alternative: if reload cannot do this, it will still use its choice. */
30772 mode
= GET_MODE (x
);
30773 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
30774 return MAYBE_SSE_CLASS_P (regclass
) ? SSE_REGS
: NO_REGS
;
30776 if (X87_FLOAT_MODE_P (mode
))
30778 if (regclass
== FP_TOP_SSE_REGS
)
30780 else if (regclass
== FP_SECOND_SSE_REGS
)
30781 return FP_SECOND_REG
;
30783 return FLOAT_CLASS_P (regclass
) ? regclass
: NO_REGS
;
30790 ix86_secondary_reload (bool in_p
, rtx x
, reg_class_t rclass
,
30791 enum machine_mode mode
, secondary_reload_info
*sri
)
30793 /* Double-word spills from general registers to non-offsettable memory
30794 references (zero-extended addresses) require special handling. */
30797 && GET_MODE_SIZE (mode
) > UNITS_PER_WORD
30798 && rclass
== GENERAL_REGS
30799 && !offsettable_memref_p (x
))
30802 ? CODE_FOR_reload_noff_load
30803 : CODE_FOR_reload_noff_store
);
30804 /* Add the cost of moving address to a temporary. */
30805 sri
->extra_cost
= 1;
30810 /* QImode spills from non-QI registers require
30811 intermediate register on 32bit targets. */
30813 && !in_p
&& mode
== QImode
30814 && (rclass
== GENERAL_REGS
30815 || rclass
== LEGACY_REGS
30816 || rclass
== INDEX_REGS
))
30825 if (regno
>= FIRST_PSEUDO_REGISTER
|| GET_CODE (x
) == SUBREG
)
30826 regno
= true_regnum (x
);
30828 /* Return Q_REGS if the operand is in memory. */
30833 /* This condition handles corner case where an expression involving
30834 pointers gets vectorized. We're trying to use the address of a
30835 stack slot as a vector initializer.
30837 (set (reg:V2DI 74 [ vect_cst_.2 ])
30838 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
30840 Eventually frame gets turned into sp+offset like this:
30842 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
30843 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
30844 (const_int 392 [0x188]))))
30846 That later gets turned into:
30848 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
30849 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
30850 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
30852 We'll have the following reload recorded:
30854 Reload 0: reload_in (DI) =
30855 (plus:DI (reg/f:DI 7 sp)
30856 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
30857 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
30858 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
30859 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
30860 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
30861 reload_reg_rtx: (reg:V2DI 22 xmm1)
30863 Which isn't going to work since SSE instructions can't handle scalar
30864 additions. Returning GENERAL_REGS forces the addition into integer
30865 register and reload can handle subsequent reloads without problems. */
30867 if (in_p
&& GET_CODE (x
) == PLUS
30868 && SSE_CLASS_P (rclass
)
30869 && SCALAR_INT_MODE_P (mode
))
30870 return GENERAL_REGS
;
30875 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
30878 ix86_class_likely_spilled_p (reg_class_t rclass
)
30889 case SSE_FIRST_REG
:
30891 case FP_SECOND_REG
:
30901 /* If we are copying between general and FP registers, we need a memory
30902 location. The same is true for SSE and MMX registers.
30904 To optimize register_move_cost performance, allow inline variant.
30906 The macro can't work reliably when one of the CLASSES is class containing
30907 registers from multiple units (SSE, MMX, integer). We avoid this by never
30908 combining those units in single alternative in the machine description.
30909 Ensure that this constraint holds to avoid unexpected surprises.
30911 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
30912 enforce these sanity checks. */
30915 inline_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
30916 enum machine_mode mode
, int strict
)
30918 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
30919 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
30920 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
30921 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
30922 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
30923 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
))
30925 gcc_assert (!strict
);
30929 if (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
))
30932 /* ??? This is a lie. We do have moves between mmx/general, and for
30933 mmx/sse2. But by saying we need secondary memory we discourage the
30934 register allocator from using the mmx registers unless needed. */
30935 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
))
30938 if (SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
30940 /* SSE1 doesn't have any direct moves from other classes. */
30944 /* If the target says that inter-unit moves are more expensive
30945 than moving through memory, then don't generate them. */
30946 if (!TARGET_INTER_UNIT_MOVES
)
30949 /* Between SSE and general, we have moves no larger than word size. */
30950 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
30958 ix86_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
30959 enum machine_mode mode
, int strict
)
30961 return inline_secondary_memory_needed (class1
, class2
, mode
, strict
);
30964 /* Implement the TARGET_CLASS_MAX_NREGS hook.
30966 On the 80386, this is the size of MODE in words,
30967 except in the FP regs, where a single reg is always enough. */
30969 static unsigned char
30970 ix86_class_max_nregs (reg_class_t rclass
, enum machine_mode mode
)
30972 if (MAYBE_INTEGER_CLASS_P (rclass
))
30974 if (mode
== XFmode
)
30975 return (TARGET_64BIT
? 2 : 3);
30976 else if (mode
== XCmode
)
30977 return (TARGET_64BIT
? 4 : 6);
30979 return ((GET_MODE_SIZE (mode
) + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
);
30983 if (COMPLEX_MODE_P (mode
))
30990 /* Return true if the registers in CLASS cannot represent the change from
30991 modes FROM to TO. */
30994 ix86_cannot_change_mode_class (enum machine_mode from
, enum machine_mode to
,
30995 enum reg_class regclass
)
31000 /* x87 registers can't do subreg at all, as all values are reformatted
31001 to extended precision. */
31002 if (MAYBE_FLOAT_CLASS_P (regclass
))
31005 if (MAYBE_SSE_CLASS_P (regclass
) || MAYBE_MMX_CLASS_P (regclass
))
31007 /* Vector registers do not support QI or HImode loads. If we don't
31008 disallow a change to these modes, reload will assume it's ok to
31009 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
31010 the vec_dupv4hi pattern. */
31011 if (GET_MODE_SIZE (from
) < 4)
31014 /* Vector registers do not support subreg with nonzero offsets, which
31015 are otherwise valid for integer registers. Since we can't see
31016 whether we have a nonzero offset from here, prohibit all
31017 nonparadoxical subregs changing size. */
31018 if (GET_MODE_SIZE (to
) < GET_MODE_SIZE (from
))
31025 /* Return the cost of moving data of mode M between a
31026 register and memory. A value of 2 is the default; this cost is
31027 relative to those in `REGISTER_MOVE_COST'.
31029 This function is used extensively by register_move_cost that is used to
31030 build tables at startup. Make it inline in this case.
31031 When IN is 2, return maximum of in and out move cost.
31033 If moving between registers and memory is more expensive than
31034 between two registers, you should define this macro to express the
31037 Model also increased moving costs of QImode registers in non
31041 inline_memory_move_cost (enum machine_mode mode
, enum reg_class regclass
,
31045 if (FLOAT_CLASS_P (regclass
))
31063 return MAX (ix86_cost
->fp_load
[index
], ix86_cost
->fp_store
[index
]);
31064 return in
? ix86_cost
->fp_load
[index
] : ix86_cost
->fp_store
[index
];
31066 if (SSE_CLASS_P (regclass
))
31069 switch (GET_MODE_SIZE (mode
))
31084 return MAX (ix86_cost
->sse_load
[index
], ix86_cost
->sse_store
[index
]);
31085 return in
? ix86_cost
->sse_load
[index
] : ix86_cost
->sse_store
[index
];
31087 if (MMX_CLASS_P (regclass
))
31090 switch (GET_MODE_SIZE (mode
))
31102 return MAX (ix86_cost
->mmx_load
[index
], ix86_cost
->mmx_store
[index
]);
31103 return in
? ix86_cost
->mmx_load
[index
] : ix86_cost
->mmx_store
[index
];
31105 switch (GET_MODE_SIZE (mode
))
31108 if (Q_CLASS_P (regclass
) || TARGET_64BIT
)
31111 return ix86_cost
->int_store
[0];
31112 if (TARGET_PARTIAL_REG_DEPENDENCY
31113 && optimize_function_for_speed_p (cfun
))
31114 cost
= ix86_cost
->movzbl_load
;
31116 cost
= ix86_cost
->int_load
[0];
31118 return MAX (cost
, ix86_cost
->int_store
[0]);
31124 return MAX (ix86_cost
->movzbl_load
, ix86_cost
->int_store
[0] + 4);
31126 return ix86_cost
->movzbl_load
;
31128 return ix86_cost
->int_store
[0] + 4;
31133 return MAX (ix86_cost
->int_load
[1], ix86_cost
->int_store
[1]);
31134 return in
? ix86_cost
->int_load
[1] : ix86_cost
->int_store
[1];
31136 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
31137 if (mode
== TFmode
)
31140 cost
= MAX (ix86_cost
->int_load
[2] , ix86_cost
->int_store
[2]);
31142 cost
= ix86_cost
->int_load
[2];
31144 cost
= ix86_cost
->int_store
[2];
31145 return (cost
* (((int) GET_MODE_SIZE (mode
)
31146 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
));
31151 ix86_memory_move_cost (enum machine_mode mode
, reg_class_t regclass
,
31154 return inline_memory_move_cost (mode
, (enum reg_class
) regclass
, in
? 1 : 0);
31158 /* Return the cost of moving data from a register in class CLASS1 to
31159 one in class CLASS2.
31161 It is not required that the cost always equal 2 when FROM is the same as TO;
31162 on some machines it is expensive to move between registers if they are not
31163 general registers. */
31166 ix86_register_move_cost (enum machine_mode mode
, reg_class_t class1_i
,
31167 reg_class_t class2_i
)
31169 enum reg_class class1
= (enum reg_class
) class1_i
;
31170 enum reg_class class2
= (enum reg_class
) class2_i
;
31172 /* In case we require secondary memory, compute cost of the store followed
31173 by load. In order to avoid bad register allocation choices, we need
31174 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
31176 if (inline_secondary_memory_needed (class1
, class2
, mode
, 0))
31180 cost
+= inline_memory_move_cost (mode
, class1
, 2);
31181 cost
+= inline_memory_move_cost (mode
, class2
, 2);
31183 /* In case of copying from general_purpose_register we may emit multiple
31184 stores followed by single load causing memory size mismatch stall.
31185 Count this as arbitrarily high cost of 20. */
31186 if (targetm
.class_max_nregs (class1
, mode
)
31187 > targetm
.class_max_nregs (class2
, mode
))
31190 /* In the case of FP/MMX moves, the registers actually overlap, and we
31191 have to switch modes in order to treat them differently. */
31192 if ((MMX_CLASS_P (class1
) && MAYBE_FLOAT_CLASS_P (class2
))
31193 || (MMX_CLASS_P (class2
) && MAYBE_FLOAT_CLASS_P (class1
)))
31199 /* Moves between SSE/MMX and integer unit are expensive. */
31200 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
31201 || SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
31203 /* ??? By keeping returned value relatively high, we limit the number
31204 of moves between integer and MMX/SSE registers for all targets.
31205 Additionally, high value prevents problem with x86_modes_tieable_p(),
31206 where integer modes in MMX/SSE registers are not tieable
31207 because of missing QImode and HImode moves to, from or between
31208 MMX/SSE registers. */
31209 return MAX (8, ix86_cost
->mmxsse_to_integer
);
31211 if (MAYBE_FLOAT_CLASS_P (class1
))
31212 return ix86_cost
->fp_move
;
31213 if (MAYBE_SSE_CLASS_P (class1
))
31214 return ix86_cost
->sse_move
;
31215 if (MAYBE_MMX_CLASS_P (class1
))
31216 return ix86_cost
->mmx_move
;
31220 /* Return TRUE if hard register REGNO can hold a value of machine-mode
31224 ix86_hard_regno_mode_ok (int regno
, enum machine_mode mode
)
31226 /* Flags and only flags can only hold CCmode values. */
31227 if (CC_REGNO_P (regno
))
31228 return GET_MODE_CLASS (mode
) == MODE_CC
;
31229 if (GET_MODE_CLASS (mode
) == MODE_CC
31230 || GET_MODE_CLASS (mode
) == MODE_RANDOM
31231 || GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
31233 if (FP_REGNO_P (regno
))
31234 return VALID_FP_MODE_P (mode
);
31235 if (SSE_REGNO_P (regno
))
31237 /* We implement the move patterns for all vector modes into and
31238 out of SSE registers, even when no operation instructions
31239 are available. OImode move is available only when AVX is
31241 return ((TARGET_AVX
&& mode
== OImode
)
31242 || VALID_AVX256_REG_MODE (mode
)
31243 || VALID_SSE_REG_MODE (mode
)
31244 || VALID_SSE2_REG_MODE (mode
)
31245 || VALID_MMX_REG_MODE (mode
)
31246 || VALID_MMX_REG_MODE_3DNOW (mode
));
31248 if (MMX_REGNO_P (regno
))
31250 /* We implement the move patterns for 3DNOW modes even in MMX mode,
31251 so if the register is available at all, then we can move data of
31252 the given mode into or out of it. */
31253 return (VALID_MMX_REG_MODE (mode
)
31254 || VALID_MMX_REG_MODE_3DNOW (mode
));
31257 if (mode
== QImode
)
31259 /* Take care for QImode values - they can be in non-QI regs,
31260 but then they do cause partial register stalls. */
31261 if (regno
<= BX_REG
|| TARGET_64BIT
)
31263 if (!TARGET_PARTIAL_REG_STALL
)
31265 return !can_create_pseudo_p ();
31267 /* We handle both integer and floats in the general purpose registers. */
31268 else if (VALID_INT_MODE_P (mode
))
31270 else if (VALID_FP_MODE_P (mode
))
31272 else if (VALID_DFP_MODE_P (mode
))
31274 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
31275 on to use that value in smaller contexts, this can easily force a
31276 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
31277 supporting DImode, allow it. */
31278 else if (VALID_MMX_REG_MODE_3DNOW (mode
) || VALID_MMX_REG_MODE (mode
))
31284 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
31285 tieable integer mode. */
31288 ix86_tieable_integer_mode_p (enum machine_mode mode
)
31297 return TARGET_64BIT
|| !TARGET_PARTIAL_REG_STALL
;
31300 return TARGET_64BIT
;
31307 /* Return true if MODE1 is accessible in a register that can hold MODE2
31308 without copying. That is, all register classes that can hold MODE2
31309 can also hold MODE1. */
31312 ix86_modes_tieable_p (enum machine_mode mode1
, enum machine_mode mode2
)
31314 if (mode1
== mode2
)
31317 if (ix86_tieable_integer_mode_p (mode1
)
31318 && ix86_tieable_integer_mode_p (mode2
))
31321 /* MODE2 being XFmode implies fp stack or general regs, which means we
31322 can tie any smaller floating point modes to it. Note that we do not
31323 tie this with TFmode. */
31324 if (mode2
== XFmode
)
31325 return mode1
== SFmode
|| mode1
== DFmode
;
31327 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
31328 that we can tie it with SFmode. */
31329 if (mode2
== DFmode
)
31330 return mode1
== SFmode
;
31332 /* If MODE2 is only appropriate for an SSE register, then tie with
31333 any other mode acceptable to SSE registers. */
31334 if (GET_MODE_SIZE (mode2
) == 16
31335 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
31336 return (GET_MODE_SIZE (mode1
) == 16
31337 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
31339 /* If MODE2 is appropriate for an MMX register, then tie
31340 with any other mode acceptable to MMX registers. */
31341 if (GET_MODE_SIZE (mode2
) == 8
31342 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode2
))
31343 return (GET_MODE_SIZE (mode1
) == 8
31344 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode1
));
31349 /* Compute a (partial) cost for rtx X. Return true if the complete
31350 cost has been computed, and false if subexpressions should be
31351 scanned. In either case, *TOTAL contains the cost result. */
31354 ix86_rtx_costs (rtx x
, int code
, int outer_code_i
, int opno
, int *total
,
31357 enum rtx_code outer_code
= (enum rtx_code
) outer_code_i
;
31358 enum machine_mode mode
= GET_MODE (x
);
31359 const struct processor_costs
*cost
= speed
? ix86_cost
: &ix86_size_cost
;
31367 if (TARGET_64BIT
&& !x86_64_immediate_operand (x
, VOIDmode
))
31369 else if (TARGET_64BIT
&& !x86_64_zext_immediate_operand (x
, VOIDmode
))
31371 else if (flag_pic
&& SYMBOLIC_CONST (x
)
31373 || (!GET_CODE (x
) != LABEL_REF
31374 && (GET_CODE (x
) != SYMBOL_REF
31375 || !SYMBOL_REF_LOCAL_P (x
)))))
31382 if (mode
== VOIDmode
)
31385 switch (standard_80387_constant_p (x
))
31390 default: /* Other constants */
31395 /* Start with (MEM (SYMBOL_REF)), since that's where
31396 it'll probably end up. Add a penalty for size. */
31397 *total
= (COSTS_N_INSNS (1)
31398 + (flag_pic
!= 0 && !TARGET_64BIT
)
31399 + (mode
== SFmode
? 0 : mode
== DFmode
? 1 : 2));
31405 /* The zero extensions is often completely free on x86_64, so make
31406 it as cheap as possible. */
31407 if (TARGET_64BIT
&& mode
== DImode
31408 && GET_MODE (XEXP (x
, 0)) == SImode
)
31410 else if (TARGET_ZERO_EXTEND_WITH_AND
)
31411 *total
= cost
->add
;
31413 *total
= cost
->movzx
;
31417 *total
= cost
->movsx
;
31421 if (CONST_INT_P (XEXP (x
, 1))
31422 && (GET_MODE (XEXP (x
, 0)) != DImode
|| TARGET_64BIT
))
31424 HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
31427 *total
= cost
->add
;
31430 if ((value
== 2 || value
== 3)
31431 && cost
->lea
<= cost
->shift_const
)
31433 *total
= cost
->lea
;
31443 if (!TARGET_64BIT
&& GET_MODE (XEXP (x
, 0)) == DImode
)
31445 if (CONST_INT_P (XEXP (x
, 1)))
31447 if (INTVAL (XEXP (x
, 1)) > 32)
31448 *total
= cost
->shift_const
+ COSTS_N_INSNS (2);
31450 *total
= cost
->shift_const
* 2;
31454 if (GET_CODE (XEXP (x
, 1)) == AND
)
31455 *total
= cost
->shift_var
* 2;
31457 *total
= cost
->shift_var
* 6 + COSTS_N_INSNS (2);
31462 if (CONST_INT_P (XEXP (x
, 1)))
31463 *total
= cost
->shift_const
;
31465 *total
= cost
->shift_var
;
31473 gcc_assert (FLOAT_MODE_P (mode
));
31474 gcc_assert (TARGET_FMA
|| TARGET_FMA4
);
31476 /* ??? SSE scalar/vector cost should be used here. */
31477 /* ??? Bald assumption that fma has the same cost as fmul. */
31478 *total
= cost
->fmul
;
31479 *total
+= rtx_cost (XEXP (x
, 1), FMA
, 1, speed
);
31481 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
31483 if (GET_CODE (sub
) == NEG
)
31484 sub
= XEXP (sub
, 0);
31485 *total
+= rtx_cost (sub
, FMA
, 0, speed
);
31488 if (GET_CODE (sub
) == NEG
)
31489 sub
= XEXP (sub
, 0);
31490 *total
+= rtx_cost (sub
, FMA
, 2, speed
);
31495 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
31497 /* ??? SSE scalar cost should be used here. */
31498 *total
= cost
->fmul
;
31501 else if (X87_FLOAT_MODE_P (mode
))
31503 *total
= cost
->fmul
;
31506 else if (FLOAT_MODE_P (mode
))
31508 /* ??? SSE vector cost should be used here. */
31509 *total
= cost
->fmul
;
31514 rtx op0
= XEXP (x
, 0);
31515 rtx op1
= XEXP (x
, 1);
31517 if (CONST_INT_P (XEXP (x
, 1)))
31519 unsigned HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
31520 for (nbits
= 0; value
!= 0; value
&= value
- 1)
31524 /* This is arbitrary. */
31527 /* Compute costs correctly for widening multiplication. */
31528 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op0
) == ZERO_EXTEND
)
31529 && GET_MODE_SIZE (GET_MODE (XEXP (op0
, 0))) * 2
31530 == GET_MODE_SIZE (mode
))
31532 int is_mulwiden
= 0;
31533 enum machine_mode inner_mode
= GET_MODE (op0
);
31535 if (GET_CODE (op0
) == GET_CODE (op1
))
31536 is_mulwiden
= 1, op1
= XEXP (op1
, 0);
31537 else if (CONST_INT_P (op1
))
31539 if (GET_CODE (op0
) == SIGN_EXTEND
)
31540 is_mulwiden
= trunc_int_for_mode (INTVAL (op1
), inner_mode
)
31543 is_mulwiden
= !(INTVAL (op1
) & ~GET_MODE_MASK (inner_mode
));
31547 op0
= XEXP (op0
, 0), mode
= GET_MODE (op0
);
31550 *total
= (cost
->mult_init
[MODE_INDEX (mode
)]
31551 + nbits
* cost
->mult_bit
31552 + rtx_cost (op0
, outer_code
, opno
, speed
)
31553 + rtx_cost (op1
, outer_code
, opno
, speed
));
31562 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
31563 /* ??? SSE cost should be used here. */
31564 *total
= cost
->fdiv
;
31565 else if (X87_FLOAT_MODE_P (mode
))
31566 *total
= cost
->fdiv
;
31567 else if (FLOAT_MODE_P (mode
))
31568 /* ??? SSE vector cost should be used here. */
31569 *total
= cost
->fdiv
;
31571 *total
= cost
->divide
[MODE_INDEX (mode
)];
31575 if (GET_MODE_CLASS (mode
) == MODE_INT
31576 && GET_MODE_BITSIZE (mode
) <= GET_MODE_BITSIZE (Pmode
))
31578 if (GET_CODE (XEXP (x
, 0)) == PLUS
31579 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
31580 && CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 0), 1))
31581 && CONSTANT_P (XEXP (x
, 1)))
31583 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1));
31584 if (val
== 2 || val
== 4 || val
== 8)
31586 *total
= cost
->lea
;
31587 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1),
31588 outer_code
, opno
, speed
);
31589 *total
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
31590 outer_code
, opno
, speed
);
31591 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
31595 else if (GET_CODE (XEXP (x
, 0)) == MULT
31596 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
31598 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (x
, 0), 1));
31599 if (val
== 2 || val
== 4 || val
== 8)
31601 *total
= cost
->lea
;
31602 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0),
31603 outer_code
, opno
, speed
);
31604 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
31608 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
31610 *total
= cost
->lea
;
31611 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0),
31612 outer_code
, opno
, speed
);
31613 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1),
31614 outer_code
, opno
, speed
);
31615 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
31622 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
31624 /* ??? SSE cost should be used here. */
31625 *total
= cost
->fadd
;
31628 else if (X87_FLOAT_MODE_P (mode
))
31630 *total
= cost
->fadd
;
31633 else if (FLOAT_MODE_P (mode
))
31635 /* ??? SSE vector cost should be used here. */
31636 *total
= cost
->fadd
;
31644 if (!TARGET_64BIT
&& mode
== DImode
)
31646 *total
= (cost
->add
* 2
31647 + (rtx_cost (XEXP (x
, 0), outer_code
, opno
, speed
)
31648 << (GET_MODE (XEXP (x
, 0)) != DImode
))
31649 + (rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
)
31650 << (GET_MODE (XEXP (x
, 1)) != DImode
)));
31656 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
31658 /* ??? SSE cost should be used here. */
31659 *total
= cost
->fchs
;
31662 else if (X87_FLOAT_MODE_P (mode
))
31664 *total
= cost
->fchs
;
31667 else if (FLOAT_MODE_P (mode
))
31669 /* ??? SSE vector cost should be used here. */
31670 *total
= cost
->fchs
;
31676 if (!TARGET_64BIT
&& mode
== DImode
)
31677 *total
= cost
->add
* 2;
31679 *total
= cost
->add
;
31683 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTRACT
31684 && XEXP (XEXP (x
, 0), 1) == const1_rtx
31685 && CONST_INT_P (XEXP (XEXP (x
, 0), 2))
31686 && XEXP (x
, 1) == const0_rtx
)
31688 /* This kind of construct is implemented using test[bwl].
31689 Treat it as if we had an AND. */
31690 *total
= (cost
->add
31691 + rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
, opno
, speed
)
31692 + rtx_cost (const1_rtx
, outer_code
, opno
, speed
));
31698 if (!(SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
))
31703 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
31704 /* ??? SSE cost should be used here. */
31705 *total
= cost
->fabs
;
31706 else if (X87_FLOAT_MODE_P (mode
))
31707 *total
= cost
->fabs
;
31708 else if (FLOAT_MODE_P (mode
))
31709 /* ??? SSE vector cost should be used here. */
31710 *total
= cost
->fabs
;
31714 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
31715 /* ??? SSE cost should be used here. */
31716 *total
= cost
->fsqrt
;
31717 else if (X87_FLOAT_MODE_P (mode
))
31718 *total
= cost
->fsqrt
;
31719 else if (FLOAT_MODE_P (mode
))
31720 /* ??? SSE vector cost should be used here. */
31721 *total
= cost
->fsqrt
;
31725 if (XINT (x
, 1) == UNSPEC_TP
)
31732 case VEC_DUPLICATE
:
31733 /* ??? Assume all of these vector manipulation patterns are
31734 recognizable. In which case they all pretty much have the
31736 *total
= COSTS_N_INSNS (1);
31746 static int current_machopic_label_num
;
31748 /* Given a symbol name and its associated stub, write out the
31749 definition of the stub. */
31752 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
31754 unsigned int length
;
31755 char *binder_name
, *symbol_name
, lazy_ptr_name
[32];
31756 int label
= ++current_machopic_label_num
;
31758 /* For 64-bit we shouldn't get here. */
31759 gcc_assert (!TARGET_64BIT
);
31761 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
31762 symb
= targetm
.strip_name_encoding (symb
);
31764 length
= strlen (stub
);
31765 binder_name
= XALLOCAVEC (char, length
+ 32);
31766 GEN_BINDER_NAME_FOR_STUB (binder_name
, stub
, length
);
31768 length
= strlen (symb
);
31769 symbol_name
= XALLOCAVEC (char, length
+ 32);
31770 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
31772 sprintf (lazy_ptr_name
, "L%d$lz", label
);
31774 if (MACHOPIC_ATT_STUB
)
31775 switch_to_section (darwin_sections
[machopic_picsymbol_stub3_section
]);
31776 else if (MACHOPIC_PURE
)
31777 switch_to_section (darwin_sections
[machopic_picsymbol_stub2_section
]);
31779 switch_to_section (darwin_sections
[machopic_symbol_stub_section
]);
31781 fprintf (file
, "%s:\n", stub
);
31782 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
31784 if (MACHOPIC_ATT_STUB
)
31786 fprintf (file
, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
31788 else if (MACHOPIC_PURE
)
31791 /* 25-byte PIC stub using "CALL get_pc_thunk". */
31792 rtx tmp
= gen_rtx_REG (SImode
, 2 /* ECX */);
31793 output_set_got (tmp
, NULL_RTX
); /* "CALL ___<cpu>.get_pc_thunk.cx". */
31794 fprintf (file
, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
31795 label
, lazy_ptr_name
, label
);
31796 fprintf (file
, "\tjmp\t*%%ecx\n");
31799 fprintf (file
, "\tjmp\t*%s\n", lazy_ptr_name
);
31801 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
31802 it needs no stub-binding-helper. */
31803 if (MACHOPIC_ATT_STUB
)
31806 fprintf (file
, "%s:\n", binder_name
);
31810 fprintf (file
, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name
, binder_name
);
31811 fprintf (file
, "\tpushl\t%%ecx\n");
31814 fprintf (file
, "\tpushl\t$%s\n", lazy_ptr_name
);
31816 fputs ("\tjmp\tdyld_stub_binding_helper\n", file
);
31818 /* N.B. Keep the correspondence of these
31819 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
31820 old-pic/new-pic/non-pic stubs; altering this will break
31821 compatibility with existing dylibs. */
31824 /* 25-byte PIC stub using "CALL get_pc_thunk". */
31825 switch_to_section (darwin_sections
[machopic_lazy_symbol_ptr2_section
]);
31828 /* 16-byte -mdynamic-no-pic stub. */
31829 switch_to_section(darwin_sections
[machopic_lazy_symbol_ptr3_section
]);
31831 fprintf (file
, "%s:\n", lazy_ptr_name
);
31832 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
31833 fprintf (file
, ASM_LONG
"%s\n", binder_name
);
31835 #endif /* TARGET_MACHO */
31837 /* Order the registers for register allocator. */
31840 x86_order_regs_for_local_alloc (void)
31845 /* First allocate the local general purpose registers. */
31846 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
31847 if (GENERAL_REGNO_P (i
) && call_used_regs
[i
])
31848 reg_alloc_order
[pos
++] = i
;
31850 /* Global general purpose registers. */
31851 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
31852 if (GENERAL_REGNO_P (i
) && !call_used_regs
[i
])
31853 reg_alloc_order
[pos
++] = i
;
31855 /* x87 registers come first in case we are doing FP math
31857 if (!TARGET_SSE_MATH
)
31858 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
31859 reg_alloc_order
[pos
++] = i
;
31861 /* SSE registers. */
31862 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
31863 reg_alloc_order
[pos
++] = i
;
31864 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
31865 reg_alloc_order
[pos
++] = i
;
31867 /* x87 registers. */
31868 if (TARGET_SSE_MATH
)
31869 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
31870 reg_alloc_order
[pos
++] = i
;
31872 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
31873 reg_alloc_order
[pos
++] = i
;
31875 /* Initialize the rest of array as we do not allocate some registers
31877 while (pos
< FIRST_PSEUDO_REGISTER
)
31878 reg_alloc_order
[pos
++] = 0;
31881 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
31882 in struct attribute_spec handler. */
31884 ix86_handle_callee_pop_aggregate_return (tree
*node
, tree name
,
31886 int flags ATTRIBUTE_UNUSED
,
31887 bool *no_add_attrs
)
31889 if (TREE_CODE (*node
) != FUNCTION_TYPE
31890 && TREE_CODE (*node
) != METHOD_TYPE
31891 && TREE_CODE (*node
) != FIELD_DECL
31892 && TREE_CODE (*node
) != TYPE_DECL
)
31894 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
31896 *no_add_attrs
= true;
31901 warning (OPT_Wattributes
, "%qE attribute only available for 32-bit",
31903 *no_add_attrs
= true;
31906 if (is_attribute_p ("callee_pop_aggregate_return", name
))
31910 cst
= TREE_VALUE (args
);
31911 if (TREE_CODE (cst
) != INTEGER_CST
)
31913 warning (OPT_Wattributes
,
31914 "%qE attribute requires an integer constant argument",
31916 *no_add_attrs
= true;
31918 else if (compare_tree_int (cst
, 0) != 0
31919 && compare_tree_int (cst
, 1) != 0)
31921 warning (OPT_Wattributes
,
31922 "argument to %qE attribute is neither zero, nor one",
31924 *no_add_attrs
= true;
31933 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
31934 struct attribute_spec.handler. */
31936 ix86_handle_abi_attribute (tree
*node
, tree name
,
31937 tree args ATTRIBUTE_UNUSED
,
31938 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
31940 if (TREE_CODE (*node
) != FUNCTION_TYPE
31941 && TREE_CODE (*node
) != METHOD_TYPE
31942 && TREE_CODE (*node
) != FIELD_DECL
31943 && TREE_CODE (*node
) != TYPE_DECL
)
31945 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
31947 *no_add_attrs
= true;
31951 /* Can combine regparm with all attributes but fastcall. */
31952 if (is_attribute_p ("ms_abi", name
))
31954 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node
)))
31956 error ("ms_abi and sysv_abi attributes are not compatible");
31961 else if (is_attribute_p ("sysv_abi", name
))
31963 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node
)))
31965 error ("ms_abi and sysv_abi attributes are not compatible");
31974 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
31975 struct attribute_spec.handler. */
31977 ix86_handle_struct_attribute (tree
*node
, tree name
,
31978 tree args ATTRIBUTE_UNUSED
,
31979 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
31982 if (DECL_P (*node
))
31984 if (TREE_CODE (*node
) == TYPE_DECL
)
31985 type
= &TREE_TYPE (*node
);
31990 if (!(type
&& (TREE_CODE (*type
) == RECORD_TYPE
31991 || TREE_CODE (*type
) == UNION_TYPE
)))
31993 warning (OPT_Wattributes
, "%qE attribute ignored",
31995 *no_add_attrs
= true;
31998 else if ((is_attribute_p ("ms_struct", name
)
31999 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type
)))
32000 || ((is_attribute_p ("gcc_struct", name
)
32001 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type
)))))
32003 warning (OPT_Wattributes
, "%qE incompatible attribute ignored",
32005 *no_add_attrs
= true;
32012 ix86_handle_fndecl_attribute (tree
*node
, tree name
,
32013 tree args ATTRIBUTE_UNUSED
,
32014 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
32016 if (TREE_CODE (*node
) != FUNCTION_DECL
)
32018 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
32020 *no_add_attrs
= true;
32026 ix86_ms_bitfield_layout_p (const_tree record_type
)
32028 return ((TARGET_MS_BITFIELD_LAYOUT
32029 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
32030 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
)));
32033 /* Returns an expression indicating where the this parameter is
32034 located on entry to the FUNCTION. */
32037 x86_this_parameter (tree function
)
32039 tree type
= TREE_TYPE (function
);
32040 bool aggr
= aggregate_value_p (TREE_TYPE (type
), type
) != 0;
32045 const int *parm_regs
;
32047 if (ix86_function_type_abi (type
) == MS_ABI
)
32048 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
32050 parm_regs
= x86_64_int_parameter_registers
;
32051 return gen_rtx_REG (Pmode
, parm_regs
[aggr
]);
32054 nregs
= ix86_function_regparm (type
, function
);
32056 if (nregs
> 0 && !stdarg_p (type
))
32059 unsigned int ccvt
= ix86_get_callcvt (type
);
32061 if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
32062 regno
= aggr
? DX_REG
: CX_REG
;
32063 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
32067 return gen_rtx_MEM (SImode
,
32068 plus_constant (stack_pointer_rtx
, 4));
32077 return gen_rtx_MEM (SImode
,
32078 plus_constant (stack_pointer_rtx
, 4));
32081 return gen_rtx_REG (SImode
, regno
);
32084 return gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, aggr
? 8 : 4));
32087 /* Determine whether x86_output_mi_thunk can succeed. */
32090 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED
,
32091 HOST_WIDE_INT delta ATTRIBUTE_UNUSED
,
32092 HOST_WIDE_INT vcall_offset
, const_tree function
)
32094 /* 64-bit can handle anything. */
32098 /* For 32-bit, everything's fine if we have one free register. */
32099 if (ix86_function_regparm (TREE_TYPE (function
), function
) < 3)
32102 /* Need a free register for vcall_offset. */
32106 /* Need a free register for GOT references. */
32107 if (flag_pic
&& !targetm
.binds_local_p (function
))
32110 /* Otherwise ok. */
32114 /* Output the assembler code for a thunk function. THUNK_DECL is the
32115 declaration for the thunk function itself, FUNCTION is the decl for
32116 the target function. DELTA is an immediate constant offset to be
32117 added to THIS. If VCALL_OFFSET is nonzero, the word at
32118 *(*this + vcall_offset) should be added to THIS. */
32121 x86_output_mi_thunk (FILE *file
,
32122 tree thunk ATTRIBUTE_UNUSED
, HOST_WIDE_INT delta
,
32123 HOST_WIDE_INT vcall_offset
, tree function
)
32125 rtx this_param
= x86_this_parameter (function
);
32126 rtx this_reg
, tmp
, fnaddr
;
32128 emit_note (NOTE_INSN_PROLOGUE_END
);
32130 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
32131 pull it in now and let DELTA benefit. */
32132 if (REG_P (this_param
))
32133 this_reg
= this_param
;
32134 else if (vcall_offset
)
32136 /* Put the this parameter into %eax. */
32137 this_reg
= gen_rtx_REG (Pmode
, AX_REG
);
32138 emit_move_insn (this_reg
, this_param
);
32141 this_reg
= NULL_RTX
;
32143 /* Adjust the this parameter by a fixed constant. */
32146 rtx delta_rtx
= GEN_INT (delta
);
32147 rtx delta_dst
= this_reg
? this_reg
: this_param
;
32151 if (!x86_64_general_operand (delta_rtx
, Pmode
))
32153 tmp
= gen_rtx_REG (Pmode
, R10_REG
);
32154 emit_move_insn (tmp
, delta_rtx
);
32159 ix86_emit_binop (PLUS
, Pmode
, delta_dst
, delta_rtx
);
32162 /* Adjust the this parameter by a value stored in the vtable. */
32165 rtx vcall_addr
, vcall_mem
, this_mem
;
32166 unsigned int tmp_regno
;
32169 tmp_regno
= R10_REG
;
32172 unsigned int ccvt
= ix86_get_callcvt (TREE_TYPE (function
));
32173 if ((ccvt
& (IX86_CALLCVT_FASTCALL
| IX86_CALLCVT_THISCALL
)) != 0)
32174 tmp_regno
= AX_REG
;
32176 tmp_regno
= CX_REG
;
32178 tmp
= gen_rtx_REG (Pmode
, tmp_regno
);
32180 this_mem
= gen_rtx_MEM (ptr_mode
, this_reg
);
32181 if (Pmode
!= ptr_mode
)
32182 this_mem
= gen_rtx_ZERO_EXTEND (Pmode
, this_mem
);
32183 emit_move_insn (tmp
, this_mem
);
32185 /* Adjust the this parameter. */
32186 vcall_addr
= plus_constant (tmp
, vcall_offset
);
32188 && !ix86_legitimate_address_p (ptr_mode
, vcall_addr
, true))
32190 rtx tmp2
= gen_rtx_REG (Pmode
, R11_REG
);
32191 emit_move_insn (tmp2
, GEN_INT (vcall_offset
));
32192 vcall_addr
= gen_rtx_PLUS (Pmode
, tmp
, tmp2
);
32195 vcall_mem
= gen_rtx_MEM (ptr_mode
, vcall_addr
);
32196 if (Pmode
!= ptr_mode
)
32197 emit_insn (gen_addsi_1_zext (this_reg
,
32198 gen_rtx_REG (ptr_mode
,
32202 ix86_emit_binop (PLUS
, Pmode
, this_reg
, vcall_mem
);
32205 /* If necessary, drop THIS back to its stack slot. */
32206 if (this_reg
&& this_reg
!= this_param
)
32207 emit_move_insn (this_param
, this_reg
);
32209 fnaddr
= XEXP (DECL_RTL (function
), 0);
32212 if (!flag_pic
|| targetm
.binds_local_p (function
)
32213 || cfun
->machine
->call_abi
== MS_ABI
)
32217 tmp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, fnaddr
), UNSPEC_GOTPCREL
);
32218 tmp
= gen_rtx_CONST (Pmode
, tmp
);
32219 fnaddr
= gen_rtx_MEM (Pmode
, tmp
);
32224 if (!flag_pic
|| targetm
.binds_local_p (function
))
32227 else if (TARGET_MACHO
)
32229 fnaddr
= machopic_indirect_call_target (DECL_RTL (function
));
32230 fnaddr
= XEXP (fnaddr
, 0);
32232 #endif /* TARGET_MACHO */
32235 tmp
= gen_rtx_REG (Pmode
, CX_REG
);
32236 output_set_got (tmp
, NULL_RTX
);
32238 fnaddr
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, fnaddr
), UNSPEC_GOT
);
32239 fnaddr
= gen_rtx_PLUS (Pmode
, fnaddr
, tmp
);
32240 fnaddr
= gen_rtx_MEM (Pmode
, fnaddr
);
32244 /* Our sibling call patterns do not allow memories, because we have no
32245 predicate that can distinguish between frame and non-frame memory.
32246 For our purposes here, we can get away with (ab)using a jump pattern,
32247 because we're going to do no optimization. */
32248 if (MEM_P (fnaddr
))
32249 emit_jump_insn (gen_indirect_jump (fnaddr
));
32252 tmp
= gen_rtx_MEM (QImode
, fnaddr
);
32253 tmp
= gen_rtx_CALL (VOIDmode
, tmp
, const0_rtx
);
32254 tmp
= emit_call_insn (tmp
);
32255 SIBLING_CALL_P (tmp
) = 1;
32259 /* Emit just enough of rest_of_compilation to get the insns emitted.
32260 Note that use_thunk calls assemble_start_function et al. */
32261 tmp
= get_insns ();
32262 insn_locators_alloc ();
32263 shorten_branches (tmp
);
32264 final_start_function (tmp
, file
, 1);
32265 final (tmp
, file
, 1);
32266 final_end_function ();
32270 x86_file_start (void)
32272 default_file_start ();
32274 darwin_file_start ();
32276 if (X86_FILE_START_VERSION_DIRECTIVE
)
32277 fputs ("\t.version\t\"01.01\"\n", asm_out_file
);
32278 if (X86_FILE_START_FLTUSED
)
32279 fputs ("\t.global\t__fltused\n", asm_out_file
);
32280 if (ix86_asm_dialect
== ASM_INTEL
)
32281 fputs ("\t.intel_syntax noprefix\n", asm_out_file
);
32285 x86_field_alignment (tree field
, int computed
)
32287 enum machine_mode mode
;
32288 tree type
= TREE_TYPE (field
);
32290 if (TARGET_64BIT
|| TARGET_ALIGN_DOUBLE
)
32292 mode
= TYPE_MODE (strip_array_types (type
));
32293 if (mode
== DFmode
|| mode
== DCmode
32294 || GET_MODE_CLASS (mode
) == MODE_INT
32295 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
32296 return MIN (32, computed
);
32300 /* Output assembler code to FILE to increment profiler label # LABELNO
32301 for profiling a function entry. */
32303 x86_function_profiler (FILE *file
, int labelno ATTRIBUTE_UNUSED
)
32305 const char *mcount_name
= (flag_fentry
? MCOUNT_NAME_BEFORE_PROLOGUE
32310 #ifndef NO_PROFILE_COUNTERS
32311 fprintf (file
, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX
, labelno
);
32314 if (DEFAULT_ABI
== SYSV_ABI
&& flag_pic
)
32315 fprintf (file
, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name
);
32317 fprintf (file
, "\tcall\t%s\n", mcount_name
);
32321 #ifndef NO_PROFILE_COUNTERS
32322 fprintf (file
, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER
"\n",
32325 fprintf (file
, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name
);
32329 #ifndef NO_PROFILE_COUNTERS
32330 fprintf (file
, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER
"\n",
32333 fprintf (file
, "\tcall\t%s\n", mcount_name
);
32337 /* We don't have exact information about the insn sizes, but we may assume
32338 quite safely that we are informed about all 1 byte insns and memory
32339 address sizes. This is enough to eliminate unnecessary padding in
32343 min_insn_size (rtx insn
)
32347 if (!INSN_P (insn
) || !active_insn_p (insn
))
32350 /* Discard alignments we've emit and jump instructions. */
32351 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
32352 && XINT (PATTERN (insn
), 1) == UNSPECV_ALIGN
)
32354 if (JUMP_TABLE_DATA_P (insn
))
32357 /* Important case - calls are always 5 bytes.
32358 It is common to have many calls in the row. */
32360 && symbolic_reference_mentioned_p (PATTERN (insn
))
32361 && !SIBLING_CALL_P (insn
))
32363 len
= get_attr_length (insn
);
32367 /* For normal instructions we rely on get_attr_length being exact,
32368 with a few exceptions. */
32369 if (!JUMP_P (insn
))
32371 enum attr_type type
= get_attr_type (insn
);
32376 if (GET_CODE (PATTERN (insn
)) == ASM_INPUT
32377 || asm_noperands (PATTERN (insn
)) >= 0)
32384 /* Otherwise trust get_attr_length. */
32388 l
= get_attr_length_address (insn
);
32389 if (l
< 4 && symbolic_reference_mentioned_p (PATTERN (insn
)))
32398 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
32400 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
32404 ix86_avoid_jump_mispredicts (void)
32406 rtx insn
, start
= get_insns ();
32407 int nbytes
= 0, njumps
= 0;
32410 /* Look for all minimal intervals of instructions containing 4 jumps.
32411 The intervals are bounded by START and INSN. NBYTES is the total
32412 size of instructions in the interval including INSN and not including
32413 START. When the NBYTES is smaller than 16 bytes, it is possible
32414 that the end of START and INSN ends up in the same 16byte page.
32416 The smallest offset in the page INSN can start is the case where START
32417 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
32418 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
32420 for (insn
= start
; insn
; insn
= NEXT_INSN (insn
))
32424 if (LABEL_P (insn
))
32426 int align
= label_to_alignment (insn
);
32427 int max_skip
= label_to_max_skip (insn
);
32431 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
32432 already in the current 16 byte page, because otherwise
32433 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
32434 bytes to reach 16 byte boundary. */
32436 || (align
<= 3 && max_skip
!= (1 << align
) - 1))
32439 fprintf (dump_file
, "Label %i with max_skip %i\n",
32440 INSN_UID (insn
), max_skip
);
32443 while (nbytes
+ max_skip
>= 16)
32445 start
= NEXT_INSN (start
);
32446 if ((JUMP_P (start
)
32447 && GET_CODE (PATTERN (start
)) != ADDR_VEC
32448 && GET_CODE (PATTERN (start
)) != ADDR_DIFF_VEC
)
32450 njumps
--, isjump
= 1;
32453 nbytes
-= min_insn_size (start
);
32459 min_size
= min_insn_size (insn
);
32460 nbytes
+= min_size
;
32462 fprintf (dump_file
, "Insn %i estimated to %i bytes\n",
32463 INSN_UID (insn
), min_size
);
32465 && GET_CODE (PATTERN (insn
)) != ADDR_VEC
32466 && GET_CODE (PATTERN (insn
)) != ADDR_DIFF_VEC
)
32474 start
= NEXT_INSN (start
);
32475 if ((JUMP_P (start
)
32476 && GET_CODE (PATTERN (start
)) != ADDR_VEC
32477 && GET_CODE (PATTERN (start
)) != ADDR_DIFF_VEC
)
32479 njumps
--, isjump
= 1;
32482 nbytes
-= min_insn_size (start
);
32484 gcc_assert (njumps
>= 0);
32486 fprintf (dump_file
, "Interval %i to %i has %i bytes\n",
32487 INSN_UID (start
), INSN_UID (insn
), nbytes
);
32489 if (njumps
== 3 && isjump
&& nbytes
< 16)
32491 int padsize
= 15 - nbytes
+ min_insn_size (insn
);
32494 fprintf (dump_file
, "Padding insn %i by %i bytes!\n",
32495 INSN_UID (insn
), padsize
);
32496 emit_insn_before (gen_pad (GEN_INT (padsize
)), insn
);
32502 /* AMD Athlon works faster
32503 when RET is not destination of conditional jump or directly preceded
32504 by other jump instruction. We avoid the penalty by inserting NOP just
32505 before the RET instructions in such cases. */
32507 ix86_pad_returns (void)
32512 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
32514 basic_block bb
= e
->src
;
32515 rtx ret
= BB_END (bb
);
32517 bool replace
= false;
32519 if (!JUMP_P (ret
) || !ANY_RETURN_P (PATTERN (ret
))
32520 || optimize_bb_for_size_p (bb
))
32522 for (prev
= PREV_INSN (ret
); prev
; prev
= PREV_INSN (prev
))
32523 if (active_insn_p (prev
) || LABEL_P (prev
))
32525 if (prev
&& LABEL_P (prev
))
32530 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
32531 if (EDGE_FREQUENCY (e
) && e
->src
->index
>= 0
32532 && !(e
->flags
& EDGE_FALLTHRU
))
32537 prev
= prev_active_insn (ret
);
32539 && ((JUMP_P (prev
) && any_condjump_p (prev
))
32542 /* Empty functions get branch mispredict even when
32543 the jump destination is not visible to us. */
32544 if (!prev
&& !optimize_function_for_size_p (cfun
))
32549 emit_jump_insn_before (gen_simple_return_internal_long (), ret
);
32555 /* Count the minimum number of instructions in BB. Return 4 if the
32556 number of instructions >= 4. */
32559 ix86_count_insn_bb (basic_block bb
)
32562 int insn_count
= 0;
32564 /* Count number of instructions in this block. Return 4 if the number
32565 of instructions >= 4. */
32566 FOR_BB_INSNS (bb
, insn
)
32568 /* Only happen in exit blocks. */
32570 && ANY_RETURN_P (PATTERN (insn
)))
32573 if (NONDEBUG_INSN_P (insn
)
32574 && GET_CODE (PATTERN (insn
)) != USE
32575 && GET_CODE (PATTERN (insn
)) != CLOBBER
)
32578 if (insn_count
>= 4)
32587 /* Count the minimum number of instructions in code path in BB.
32588 Return 4 if the number of instructions >= 4. */
32591 ix86_count_insn (basic_block bb
)
32595 int min_prev_count
;
32597 /* Only bother counting instructions along paths with no
32598 more than 2 basic blocks between entry and exit. Given
32599 that BB has an edge to exit, determine if a predecessor
32600 of BB has an edge from entry. If so, compute the number
32601 of instructions in the predecessor block. If there
32602 happen to be multiple such blocks, compute the minimum. */
32603 min_prev_count
= 4;
32604 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
32607 edge_iterator prev_ei
;
32609 if (e
->src
== ENTRY_BLOCK_PTR
)
32611 min_prev_count
= 0;
32614 FOR_EACH_EDGE (prev_e
, prev_ei
, e
->src
->preds
)
32616 if (prev_e
->src
== ENTRY_BLOCK_PTR
)
32618 int count
= ix86_count_insn_bb (e
->src
);
32619 if (count
< min_prev_count
)
32620 min_prev_count
= count
;
32626 if (min_prev_count
< 4)
32627 min_prev_count
+= ix86_count_insn_bb (bb
);
32629 return min_prev_count
;
32632 /* Pad short funtion to 4 instructions. */
32635 ix86_pad_short_function (void)
32640 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
32642 rtx ret
= BB_END (e
->src
);
32643 if (JUMP_P (ret
) && ANY_RETURN_P (PATTERN (ret
)))
32645 int insn_count
= ix86_count_insn (e
->src
);
32647 /* Pad short function. */
32648 if (insn_count
< 4)
32652 /* Find epilogue. */
32655 || NOTE_KIND (insn
) != NOTE_INSN_EPILOGUE_BEG
))
32656 insn
= PREV_INSN (insn
);
32661 /* Two NOPs count as one instruction. */
32662 insn_count
= 2 * (4 - insn_count
);
32663 emit_insn_before (gen_nops (GEN_INT (insn_count
)), insn
);
32669 /* Implement machine specific optimizations. We implement padding of returns
32670 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
32674 /* We are freeing block_for_insn in the toplev to keep compatibility
32675 with old MDEP_REORGS that are not CFG based. Recompute it now. */
32676 compute_bb_for_insn ();
32678 /* Run the vzeroupper optimization if needed. */
32679 if (TARGET_VZEROUPPER
)
32680 move_or_delete_vzeroupper ();
32682 if (optimize
&& optimize_function_for_speed_p (cfun
))
32684 if (TARGET_PAD_SHORT_FUNCTION
)
32685 ix86_pad_short_function ();
32686 else if (TARGET_PAD_RETURNS
)
32687 ix86_pad_returns ();
32688 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
32689 if (TARGET_FOUR_JUMP_LIMIT
)
32690 ix86_avoid_jump_mispredicts ();
32695 /* Return nonzero when QImode register that must be represented via REX prefix
32698 x86_extended_QIreg_mentioned_p (rtx insn
)
32701 extract_insn_cached (insn
);
32702 for (i
= 0; i
< recog_data
.n_operands
; i
++)
32703 if (REG_P (recog_data
.operand
[i
])
32704 && REGNO (recog_data
.operand
[i
]) > BX_REG
)
32709 /* Return nonzero when P points to register encoded via REX prefix.
32710 Called via for_each_rtx. */
32712 extended_reg_mentioned_1 (rtx
*p
, void *data ATTRIBUTE_UNUSED
)
32714 unsigned int regno
;
32717 regno
= REGNO (*p
);
32718 return REX_INT_REGNO_P (regno
) || REX_SSE_REGNO_P (regno
);
32721 /* Return true when INSN mentions register that must be encoded using REX
32724 x86_extended_reg_mentioned_p (rtx insn
)
32726 return for_each_rtx (INSN_P (insn
) ? &PATTERN (insn
) : &insn
,
32727 extended_reg_mentioned_1
, NULL
);
32730 /* If profitable, negate (without causing overflow) integer constant
32731 of mode MODE at location LOC. Return true in this case. */
32733 x86_maybe_negate_const_int (rtx
*loc
, enum machine_mode mode
)
32737 if (!CONST_INT_P (*loc
))
32743 /* DImode x86_64 constants must fit in 32 bits. */
32744 gcc_assert (x86_64_immediate_operand (*loc
, mode
));
32755 gcc_unreachable ();
32758 /* Avoid overflows. */
32759 if (mode_signbit_p (mode
, *loc
))
32762 val
= INTVAL (*loc
);
32764 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
32765 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
32766 if ((val
< 0 && val
!= -128)
32769 *loc
= GEN_INT (-val
);
32776 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
32777 optabs would emit if we didn't have TFmode patterns. */
32780 x86_emit_floatuns (rtx operands
[2])
32782 rtx neglab
, donelab
, i0
, i1
, f0
, in
, out
;
32783 enum machine_mode mode
, inmode
;
32785 inmode
= GET_MODE (operands
[1]);
32786 gcc_assert (inmode
== SImode
|| inmode
== DImode
);
32789 in
= force_reg (inmode
, operands
[1]);
32790 mode
= GET_MODE (out
);
32791 neglab
= gen_label_rtx ();
32792 donelab
= gen_label_rtx ();
32793 f0
= gen_reg_rtx (mode
);
32795 emit_cmp_and_jump_insns (in
, const0_rtx
, LT
, const0_rtx
, inmode
, 0, neglab
);
32797 expand_float (out
, in
, 0);
32799 emit_jump_insn (gen_jump (donelab
));
32802 emit_label (neglab
);
32804 i0
= expand_simple_binop (inmode
, LSHIFTRT
, in
, const1_rtx
, NULL
,
32806 i1
= expand_simple_binop (inmode
, AND
, in
, const1_rtx
, NULL
,
32808 i0
= expand_simple_binop (inmode
, IOR
, i0
, i1
, i0
, 1, OPTAB_DIRECT
);
32810 expand_float (f0
, i0
, 0);
32812 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_PLUS (mode
, f0
, f0
)));
32814 emit_label (donelab
);
32817 /* AVX2 does support 32-byte integer vector operations,
32818 thus the longest vector we are faced with is V32QImode. */
32819 #define MAX_VECT_LEN 32
32821 struct expand_vec_perm_d
32823 rtx target
, op0
, op1
;
32824 unsigned char perm
[MAX_VECT_LEN
];
32825 enum machine_mode vmode
;
32826 unsigned char nelt
;
32830 static bool expand_vec_perm_1 (struct expand_vec_perm_d
*d
);
32831 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d
*d
);
32833 /* Get a vector mode of the same size as the original but with elements
32834 twice as wide. This is only guaranteed to apply to integral vectors. */
32836 static inline enum machine_mode
32837 get_mode_wider_vector (enum machine_mode o
)
32839 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
32840 enum machine_mode n
= GET_MODE_WIDER_MODE (o
);
32841 gcc_assert (GET_MODE_NUNITS (o
) == GET_MODE_NUNITS (n
) * 2);
32842 gcc_assert (GET_MODE_SIZE (o
) == GET_MODE_SIZE (n
));
32846 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
32847 with all elements equal to VAR. Return true if successful. */
32850 ix86_expand_vector_init_duplicate (bool mmx_ok
, enum machine_mode mode
,
32851 rtx target
, rtx val
)
32874 /* First attempt to recognize VAL as-is. */
32875 dup
= gen_rtx_VEC_DUPLICATE (mode
, val
);
32876 insn
= emit_insn (gen_rtx_SET (VOIDmode
, target
, dup
));
32877 if (recog_memoized (insn
) < 0)
32880 /* If that fails, force VAL into a register. */
32883 XEXP (dup
, 0) = force_reg (GET_MODE_INNER (mode
), val
);
32884 seq
= get_insns ();
32887 emit_insn_before (seq
, insn
);
32889 ok
= recog_memoized (insn
) >= 0;
32898 if (TARGET_SSE
|| TARGET_3DNOW_A
)
32902 val
= gen_lowpart (SImode
, val
);
32903 x
= gen_rtx_TRUNCATE (HImode
, val
);
32904 x
= gen_rtx_VEC_DUPLICATE (mode
, x
);
32905 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
32918 struct expand_vec_perm_d dperm
;
32922 memset (&dperm
, 0, sizeof (dperm
));
32923 dperm
.target
= target
;
32924 dperm
.vmode
= mode
;
32925 dperm
.nelt
= GET_MODE_NUNITS (mode
);
32926 dperm
.op0
= dperm
.op1
= gen_reg_rtx (mode
);
32928 /* Extend to SImode using a paradoxical SUBREG. */
32929 tmp1
= gen_reg_rtx (SImode
);
32930 emit_move_insn (tmp1
, gen_lowpart (SImode
, val
));
32932 /* Insert the SImode value as low element of a V4SImode vector. */
32933 tmp2
= gen_lowpart (V4SImode
, dperm
.op0
);
32934 emit_insn (gen_vec_setv4si_0 (tmp2
, CONST0_RTX (V4SImode
), tmp1
));
32936 ok
= (expand_vec_perm_1 (&dperm
)
32937 || expand_vec_perm_broadcast_1 (&dperm
));
32949 /* Replicate the value once into the next wider mode and recurse. */
32951 enum machine_mode smode
, wsmode
, wvmode
;
32954 smode
= GET_MODE_INNER (mode
);
32955 wvmode
= get_mode_wider_vector (mode
);
32956 wsmode
= GET_MODE_INNER (wvmode
);
32958 val
= convert_modes (wsmode
, smode
, val
, true);
32959 x
= expand_simple_binop (wsmode
, ASHIFT
, val
,
32960 GEN_INT (GET_MODE_BITSIZE (smode
)),
32961 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
32962 val
= expand_simple_binop (wsmode
, IOR
, val
, x
, x
, 1, OPTAB_LIB_WIDEN
);
32964 x
= gen_lowpart (wvmode
, target
);
32965 ok
= ix86_expand_vector_init_duplicate (mmx_ok
, wvmode
, x
, val
);
32973 enum machine_mode hvmode
= (mode
== V16HImode
? V8HImode
: V16QImode
);
32974 rtx x
= gen_reg_rtx (hvmode
);
32976 ok
= ix86_expand_vector_init_duplicate (false, hvmode
, x
, val
);
32979 x
= gen_rtx_VEC_CONCAT (mode
, x
, x
);
32980 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
32989 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
32990 whose ONE_VAR element is VAR, and other elements are zero. Return true
32994 ix86_expand_vector_init_one_nonzero (bool mmx_ok
, enum machine_mode mode
,
32995 rtx target
, rtx var
, int one_var
)
32997 enum machine_mode vsimode
;
33000 bool use_vector_set
= false;
33005 /* For SSE4.1, we normally use vector set. But if the second
33006 element is zero and inter-unit moves are OK, we use movq
33008 use_vector_set
= (TARGET_64BIT
33010 && !(TARGET_INTER_UNIT_MOVES
33016 use_vector_set
= TARGET_SSE4_1
;
33019 use_vector_set
= TARGET_SSE2
;
33022 use_vector_set
= TARGET_SSE
|| TARGET_3DNOW_A
;
33029 use_vector_set
= TARGET_AVX
;
33032 /* Use ix86_expand_vector_set in 64bit mode only. */
33033 use_vector_set
= TARGET_AVX
&& TARGET_64BIT
;
33039 if (use_vector_set
)
33041 emit_insn (gen_rtx_SET (VOIDmode
, target
, CONST0_RTX (mode
)));
33042 var
= force_reg (GET_MODE_INNER (mode
), var
);
33043 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
33059 var
= force_reg (GET_MODE_INNER (mode
), var
);
33060 x
= gen_rtx_VEC_CONCAT (mode
, var
, CONST0_RTX (GET_MODE_INNER (mode
)));
33061 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
33066 if (!REG_P (target
) || REGNO (target
) < FIRST_PSEUDO_REGISTER
)
33067 new_target
= gen_reg_rtx (mode
);
33069 new_target
= target
;
33070 var
= force_reg (GET_MODE_INNER (mode
), var
);
33071 x
= gen_rtx_VEC_DUPLICATE (mode
, var
);
33072 x
= gen_rtx_VEC_MERGE (mode
, x
, CONST0_RTX (mode
), const1_rtx
);
33073 emit_insn (gen_rtx_SET (VOIDmode
, new_target
, x
));
33076 /* We need to shuffle the value to the correct position, so
33077 create a new pseudo to store the intermediate result. */
33079 /* With SSE2, we can use the integer shuffle insns. */
33080 if (mode
!= V4SFmode
&& TARGET_SSE2
)
33082 emit_insn (gen_sse2_pshufd_1 (new_target
, new_target
,
33084 GEN_INT (one_var
== 1 ? 0 : 1),
33085 GEN_INT (one_var
== 2 ? 0 : 1),
33086 GEN_INT (one_var
== 3 ? 0 : 1)));
33087 if (target
!= new_target
)
33088 emit_move_insn (target
, new_target
);
33092 /* Otherwise convert the intermediate result to V4SFmode and
33093 use the SSE1 shuffle instructions. */
33094 if (mode
!= V4SFmode
)
33096 tmp
= gen_reg_rtx (V4SFmode
);
33097 emit_move_insn (tmp
, gen_lowpart (V4SFmode
, new_target
));
33102 emit_insn (gen_sse_shufps_v4sf (tmp
, tmp
, tmp
,
33104 GEN_INT (one_var
== 1 ? 0 : 1),
33105 GEN_INT (one_var
== 2 ? 0+4 : 1+4),
33106 GEN_INT (one_var
== 3 ? 0+4 : 1+4)));
33108 if (mode
!= V4SFmode
)
33109 emit_move_insn (target
, gen_lowpart (V4SImode
, tmp
));
33110 else if (tmp
!= target
)
33111 emit_move_insn (target
, tmp
);
33113 else if (target
!= new_target
)
33114 emit_move_insn (target
, new_target
);
33119 vsimode
= V4SImode
;
33125 vsimode
= V2SImode
;
33131 /* Zero extend the variable element to SImode and recurse. */
33132 var
= convert_modes (SImode
, GET_MODE_INNER (mode
), var
, true);
33134 x
= gen_reg_rtx (vsimode
);
33135 if (!ix86_expand_vector_init_one_nonzero (mmx_ok
, vsimode
, x
,
33137 gcc_unreachable ();
33139 emit_move_insn (target
, gen_lowpart (mode
, x
));
33147 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
33148 consisting of the values in VALS. It is known that all elements
33149 except ONE_VAR are constants. Return true if successful. */
33152 ix86_expand_vector_init_one_var (bool mmx_ok
, enum machine_mode mode
,
33153 rtx target
, rtx vals
, int one_var
)
33155 rtx var
= XVECEXP (vals
, 0, one_var
);
33156 enum machine_mode wmode
;
33159 const_vec
= copy_rtx (vals
);
33160 XVECEXP (const_vec
, 0, one_var
) = CONST0_RTX (GET_MODE_INNER (mode
));
33161 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (const_vec
, 0));
33169 /* For the two element vectors, it's just as easy to use
33170 the general case. */
33174 /* Use ix86_expand_vector_set in 64bit mode only. */
33197 /* There's no way to set one QImode entry easily. Combine
33198 the variable value with its adjacent constant value, and
33199 promote to an HImode set. */
33200 x
= XVECEXP (vals
, 0, one_var
^ 1);
33203 var
= convert_modes (HImode
, QImode
, var
, true);
33204 var
= expand_simple_binop (HImode
, ASHIFT
, var
, GEN_INT (8),
33205 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
33206 x
= GEN_INT (INTVAL (x
) & 0xff);
33210 var
= convert_modes (HImode
, QImode
, var
, true);
33211 x
= gen_int_mode (INTVAL (x
) << 8, HImode
);
33213 if (x
!= const0_rtx
)
33214 var
= expand_simple_binop (HImode
, IOR
, var
, x
, var
,
33215 1, OPTAB_LIB_WIDEN
);
33217 x
= gen_reg_rtx (wmode
);
33218 emit_move_insn (x
, gen_lowpart (wmode
, const_vec
));
33219 ix86_expand_vector_set (mmx_ok
, x
, var
, one_var
>> 1);
33221 emit_move_insn (target
, gen_lowpart (mode
, x
));
33228 emit_move_insn (target
, const_vec
);
33229 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
33233 /* A subroutine of ix86_expand_vector_init_general. Use vector
33234 concatenate to handle the most general case: all values variable,
33235 and none identical. */
33238 ix86_expand_vector_init_concat (enum machine_mode mode
,
33239 rtx target
, rtx
*ops
, int n
)
33241 enum machine_mode cmode
, hmode
= VOIDmode
;
33242 rtx first
[8], second
[4];
33282 gcc_unreachable ();
33285 if (!register_operand (ops
[1], cmode
))
33286 ops
[1] = force_reg (cmode
, ops
[1]);
33287 if (!register_operand (ops
[0], cmode
))
33288 ops
[0] = force_reg (cmode
, ops
[0]);
33289 emit_insn (gen_rtx_SET (VOIDmode
, target
,
33290 gen_rtx_VEC_CONCAT (mode
, ops
[0],
33310 gcc_unreachable ();
33326 gcc_unreachable ();
33331 /* FIXME: We process inputs backward to help RA. PR 36222. */
33334 for (; i
> 0; i
-= 2, j
--)
33336 first
[j
] = gen_reg_rtx (cmode
);
33337 v
= gen_rtvec (2, ops
[i
- 1], ops
[i
]);
33338 ix86_expand_vector_init (false, first
[j
],
33339 gen_rtx_PARALLEL (cmode
, v
));
33345 gcc_assert (hmode
!= VOIDmode
);
33346 for (i
= j
= 0; i
< n
; i
+= 2, j
++)
33348 second
[j
] = gen_reg_rtx (hmode
);
33349 ix86_expand_vector_init_concat (hmode
, second
[j
],
33353 ix86_expand_vector_init_concat (mode
, target
, second
, n
);
33356 ix86_expand_vector_init_concat (mode
, target
, first
, n
);
33360 gcc_unreachable ();
33364 /* A subroutine of ix86_expand_vector_init_general. Use vector
33365 interleave to handle the most general case: all values variable,
33366 and none identical. */
33369 ix86_expand_vector_init_interleave (enum machine_mode mode
,
33370 rtx target
, rtx
*ops
, int n
)
33372 enum machine_mode first_imode
, second_imode
, third_imode
, inner_mode
;
33375 rtx (*gen_load_even
) (rtx
, rtx
, rtx
);
33376 rtx (*gen_interleave_first_low
) (rtx
, rtx
, rtx
);
33377 rtx (*gen_interleave_second_low
) (rtx
, rtx
, rtx
);
33382 gen_load_even
= gen_vec_setv8hi
;
33383 gen_interleave_first_low
= gen_vec_interleave_lowv4si
;
33384 gen_interleave_second_low
= gen_vec_interleave_lowv2di
;
33385 inner_mode
= HImode
;
33386 first_imode
= V4SImode
;
33387 second_imode
= V2DImode
;
33388 third_imode
= VOIDmode
;
33391 gen_load_even
= gen_vec_setv16qi
;
33392 gen_interleave_first_low
= gen_vec_interleave_lowv8hi
;
33393 gen_interleave_second_low
= gen_vec_interleave_lowv4si
;
33394 inner_mode
= QImode
;
33395 first_imode
= V8HImode
;
33396 second_imode
= V4SImode
;
33397 third_imode
= V2DImode
;
33400 gcc_unreachable ();
33403 for (i
= 0; i
< n
; i
++)
33405 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
33406 op0
= gen_reg_rtx (SImode
);
33407 emit_move_insn (op0
, gen_lowpart (SImode
, ops
[i
+ i
]));
33409 /* Insert the SImode value as low element of V4SImode vector. */
33410 op1
= gen_reg_rtx (V4SImode
);
33411 op0
= gen_rtx_VEC_MERGE (V4SImode
,
33412 gen_rtx_VEC_DUPLICATE (V4SImode
,
33414 CONST0_RTX (V4SImode
),
33416 emit_insn (gen_rtx_SET (VOIDmode
, op1
, op0
));
33418 /* Cast the V4SImode vector back to a vector in orignal mode. */
33419 op0
= gen_reg_rtx (mode
);
33420 emit_move_insn (op0
, gen_lowpart (mode
, op1
));
33422 /* Load even elements into the second positon. */
33423 emit_insn (gen_load_even (op0
,
33424 force_reg (inner_mode
,
33428 /* Cast vector to FIRST_IMODE vector. */
33429 ops
[i
] = gen_reg_rtx (first_imode
);
33430 emit_move_insn (ops
[i
], gen_lowpart (first_imode
, op0
));
33433 /* Interleave low FIRST_IMODE vectors. */
33434 for (i
= j
= 0; i
< n
; i
+= 2, j
++)
33436 op0
= gen_reg_rtx (first_imode
);
33437 emit_insn (gen_interleave_first_low (op0
, ops
[i
], ops
[i
+ 1]));
33439 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
33440 ops
[j
] = gen_reg_rtx (second_imode
);
33441 emit_move_insn (ops
[j
], gen_lowpart (second_imode
, op0
));
33444 /* Interleave low SECOND_IMODE vectors. */
33445 switch (second_imode
)
33448 for (i
= j
= 0; i
< n
/ 2; i
+= 2, j
++)
33450 op0
= gen_reg_rtx (second_imode
);
33451 emit_insn (gen_interleave_second_low (op0
, ops
[i
],
33454 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
33456 ops
[j
] = gen_reg_rtx (third_imode
);
33457 emit_move_insn (ops
[j
], gen_lowpart (third_imode
, op0
));
33459 second_imode
= V2DImode
;
33460 gen_interleave_second_low
= gen_vec_interleave_lowv2di
;
33464 op0
= gen_reg_rtx (second_imode
);
33465 emit_insn (gen_interleave_second_low (op0
, ops
[0],
33468 /* Cast the SECOND_IMODE vector back to a vector on original
33470 emit_insn (gen_rtx_SET (VOIDmode
, target
,
33471 gen_lowpart (mode
, op0
)));
33475 gcc_unreachable ();
33479 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
33480 all values variable, and none identical. */
33483 ix86_expand_vector_init_general (bool mmx_ok
, enum machine_mode mode
,
33484 rtx target
, rtx vals
)
33486 rtx ops
[32], op0
, op1
;
33487 enum machine_mode half_mode
= VOIDmode
;
33494 if (!mmx_ok
&& !TARGET_SSE
)
33506 n
= GET_MODE_NUNITS (mode
);
33507 for (i
= 0; i
< n
; i
++)
33508 ops
[i
] = XVECEXP (vals
, 0, i
);
33509 ix86_expand_vector_init_concat (mode
, target
, ops
, n
);
33513 half_mode
= V16QImode
;
33517 half_mode
= V8HImode
;
33521 n
= GET_MODE_NUNITS (mode
);
33522 for (i
= 0; i
< n
; i
++)
33523 ops
[i
] = XVECEXP (vals
, 0, i
);
33524 op0
= gen_reg_rtx (half_mode
);
33525 op1
= gen_reg_rtx (half_mode
);
33526 ix86_expand_vector_init_interleave (half_mode
, op0
, ops
,
33528 ix86_expand_vector_init_interleave (half_mode
, op1
,
33529 &ops
[n
>> 1], n
>> 2);
33530 emit_insn (gen_rtx_SET (VOIDmode
, target
,
33531 gen_rtx_VEC_CONCAT (mode
, op0
, op1
)));
33535 if (!TARGET_SSE4_1
)
33543 /* Don't use ix86_expand_vector_init_interleave if we can't
33544 move from GPR to SSE register directly. */
33545 if (!TARGET_INTER_UNIT_MOVES
)
33548 n
= GET_MODE_NUNITS (mode
);
33549 for (i
= 0; i
< n
; i
++)
33550 ops
[i
] = XVECEXP (vals
, 0, i
);
33551 ix86_expand_vector_init_interleave (mode
, target
, ops
, n
>> 1);
33559 gcc_unreachable ();
33563 int i
, j
, n_elts
, n_words
, n_elt_per_word
;
33564 enum machine_mode inner_mode
;
33565 rtx words
[4], shift
;
33567 inner_mode
= GET_MODE_INNER (mode
);
33568 n_elts
= GET_MODE_NUNITS (mode
);
33569 n_words
= GET_MODE_SIZE (mode
) / UNITS_PER_WORD
;
33570 n_elt_per_word
= n_elts
/ n_words
;
33571 shift
= GEN_INT (GET_MODE_BITSIZE (inner_mode
));
33573 for (i
= 0; i
< n_words
; ++i
)
33575 rtx word
= NULL_RTX
;
33577 for (j
= 0; j
< n_elt_per_word
; ++j
)
33579 rtx elt
= XVECEXP (vals
, 0, (i
+1)*n_elt_per_word
- j
- 1);
33580 elt
= convert_modes (word_mode
, inner_mode
, elt
, true);
33586 word
= expand_simple_binop (word_mode
, ASHIFT
, word
, shift
,
33587 word
, 1, OPTAB_LIB_WIDEN
);
33588 word
= expand_simple_binop (word_mode
, IOR
, word
, elt
,
33589 word
, 1, OPTAB_LIB_WIDEN
);
33597 emit_move_insn (target
, gen_lowpart (mode
, words
[0]));
33598 else if (n_words
== 2)
33600 rtx tmp
= gen_reg_rtx (mode
);
33601 emit_clobber (tmp
);
33602 emit_move_insn (gen_lowpart (word_mode
, tmp
), words
[0]);
33603 emit_move_insn (gen_highpart (word_mode
, tmp
), words
[1]);
33604 emit_move_insn (target
, tmp
);
33606 else if (n_words
== 4)
33608 rtx tmp
= gen_reg_rtx (V4SImode
);
33609 gcc_assert (word_mode
== SImode
);
33610 vals
= gen_rtx_PARALLEL (V4SImode
, gen_rtvec_v (4, words
));
33611 ix86_expand_vector_init_general (false, V4SImode
, tmp
, vals
);
33612 emit_move_insn (target
, gen_lowpart (mode
, tmp
));
33615 gcc_unreachable ();
33619 /* Initialize vector TARGET via VALS. Suppress the use of MMX
33620 instructions unless MMX_OK is true. */
33623 ix86_expand_vector_init (bool mmx_ok
, rtx target
, rtx vals
)
33625 enum machine_mode mode
= GET_MODE (target
);
33626 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
33627 int n_elts
= GET_MODE_NUNITS (mode
);
33628 int n_var
= 0, one_var
= -1;
33629 bool all_same
= true, all_const_zero
= true;
33633 for (i
= 0; i
< n_elts
; ++i
)
33635 x
= XVECEXP (vals
, 0, i
);
33636 if (!(CONST_INT_P (x
)
33637 || GET_CODE (x
) == CONST_DOUBLE
33638 || GET_CODE (x
) == CONST_FIXED
))
33639 n_var
++, one_var
= i
;
33640 else if (x
!= CONST0_RTX (inner_mode
))
33641 all_const_zero
= false;
33642 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
33646 /* Constants are best loaded from the constant pool. */
33649 emit_move_insn (target
, gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
33653 /* If all values are identical, broadcast the value. */
33655 && ix86_expand_vector_init_duplicate (mmx_ok
, mode
, target
,
33656 XVECEXP (vals
, 0, 0)))
33659 /* Values where only one field is non-constant are best loaded from
33660 the pool and overwritten via move later. */
33664 && ix86_expand_vector_init_one_nonzero (mmx_ok
, mode
, target
,
33665 XVECEXP (vals
, 0, one_var
),
33669 if (ix86_expand_vector_init_one_var (mmx_ok
, mode
, target
, vals
, one_var
))
33673 ix86_expand_vector_init_general (mmx_ok
, mode
, target
, vals
);
33677 ix86_expand_vector_set (bool mmx_ok
, rtx target
, rtx val
, int elt
)
33679 enum machine_mode mode
= GET_MODE (target
);
33680 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
33681 enum machine_mode half_mode
;
33682 bool use_vec_merge
= false;
33684 static rtx (*gen_extract
[6][2]) (rtx
, rtx
)
33686 { gen_vec_extract_lo_v32qi
, gen_vec_extract_hi_v32qi
},
33687 { gen_vec_extract_lo_v16hi
, gen_vec_extract_hi_v16hi
},
33688 { gen_vec_extract_lo_v8si
, gen_vec_extract_hi_v8si
},
33689 { gen_vec_extract_lo_v4di
, gen_vec_extract_hi_v4di
},
33690 { gen_vec_extract_lo_v8sf
, gen_vec_extract_hi_v8sf
},
33691 { gen_vec_extract_lo_v4df
, gen_vec_extract_hi_v4df
}
33693 static rtx (*gen_insert
[6][2]) (rtx
, rtx
, rtx
)
33695 { gen_vec_set_lo_v32qi
, gen_vec_set_hi_v32qi
},
33696 { gen_vec_set_lo_v16hi
, gen_vec_set_hi_v16hi
},
33697 { gen_vec_set_lo_v8si
, gen_vec_set_hi_v8si
},
33698 { gen_vec_set_lo_v4di
, gen_vec_set_hi_v4di
},
33699 { gen_vec_set_lo_v8sf
, gen_vec_set_hi_v8sf
},
33700 { gen_vec_set_lo_v4df
, gen_vec_set_hi_v4df
}
33710 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
33711 ix86_expand_vector_extract (true, tmp
, target
, 1 - elt
);
33713 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
33715 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
33716 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
33722 use_vec_merge
= TARGET_SSE4_1
&& TARGET_64BIT
;
33726 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
33727 ix86_expand_vector_extract (false, tmp
, target
, 1 - elt
);
33729 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
33731 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
33732 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
33739 /* For the two element vectors, we implement a VEC_CONCAT with
33740 the extraction of the other element. */
33742 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (1 - elt
)));
33743 tmp
= gen_rtx_VEC_SELECT (inner_mode
, target
, tmp
);
33746 op0
= val
, op1
= tmp
;
33748 op0
= tmp
, op1
= val
;
33750 tmp
= gen_rtx_VEC_CONCAT (mode
, op0
, op1
);
33751 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
33756 use_vec_merge
= TARGET_SSE4_1
;
33763 use_vec_merge
= true;
33767 /* tmp = target = A B C D */
33768 tmp
= copy_to_reg (target
);
33769 /* target = A A B B */
33770 emit_insn (gen_vec_interleave_lowv4sf (target
, target
, target
));
33771 /* target = X A B B */
33772 ix86_expand_vector_set (false, target
, val
, 0);
33773 /* target = A X C D */
33774 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
33775 const1_rtx
, const0_rtx
,
33776 GEN_INT (2+4), GEN_INT (3+4)));
33780 /* tmp = target = A B C D */
33781 tmp
= copy_to_reg (target
);
33782 /* tmp = X B C D */
33783 ix86_expand_vector_set (false, tmp
, val
, 0);
33784 /* target = A B X D */
33785 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
33786 const0_rtx
, const1_rtx
,
33787 GEN_INT (0+4), GEN_INT (3+4)));
33791 /* tmp = target = A B C D */
33792 tmp
= copy_to_reg (target
);
33793 /* tmp = X B C D */
33794 ix86_expand_vector_set (false, tmp
, val
, 0);
33795 /* target = A B X D */
33796 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
33797 const0_rtx
, const1_rtx
,
33798 GEN_INT (2+4), GEN_INT (0+4)));
33802 gcc_unreachable ();
33807 use_vec_merge
= TARGET_SSE4_1
;
33811 /* Element 0 handled by vec_merge below. */
33814 use_vec_merge
= true;
33820 /* With SSE2, use integer shuffles to swap element 0 and ELT,
33821 store into element 0, then shuffle them back. */
33825 order
[0] = GEN_INT (elt
);
33826 order
[1] = const1_rtx
;
33827 order
[2] = const2_rtx
;
33828 order
[3] = GEN_INT (3);
33829 order
[elt
] = const0_rtx
;
33831 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
33832 order
[1], order
[2], order
[3]));
33834 ix86_expand_vector_set (false, target
, val
, 0);
33836 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
33837 order
[1], order
[2], order
[3]));
33841 /* For SSE1, we have to reuse the V4SF code. */
33842 ix86_expand_vector_set (false, gen_lowpart (V4SFmode
, target
),
33843 gen_lowpart (SFmode
, val
), elt
);
33848 use_vec_merge
= TARGET_SSE2
;
33851 use_vec_merge
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
33855 use_vec_merge
= TARGET_SSE4_1
;
33862 half_mode
= V16QImode
;
33868 half_mode
= V8HImode
;
33874 half_mode
= V4SImode
;
33880 half_mode
= V2DImode
;
33886 half_mode
= V4SFmode
;
33892 half_mode
= V2DFmode
;
33898 /* Compute offset. */
33902 gcc_assert (i
<= 1);
33904 /* Extract the half. */
33905 tmp
= gen_reg_rtx (half_mode
);
33906 emit_insn (gen_extract
[j
][i
] (tmp
, target
));
33908 /* Put val in tmp at elt. */
33909 ix86_expand_vector_set (false, tmp
, val
, elt
);
33912 emit_insn (gen_insert
[j
][i
] (target
, target
, tmp
));
33921 tmp
= gen_rtx_VEC_DUPLICATE (mode
, val
);
33922 tmp
= gen_rtx_VEC_MERGE (mode
, tmp
, target
, GEN_INT (1 << elt
));
33923 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
33927 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
), false);
33929 emit_move_insn (mem
, target
);
33931 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
33932 emit_move_insn (tmp
, val
);
33934 emit_move_insn (target
, mem
);
33939 ix86_expand_vector_extract (bool mmx_ok
, rtx target
, rtx vec
, int elt
)
33941 enum machine_mode mode
= GET_MODE (vec
);
33942 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
33943 bool use_vec_extr
= false;
33956 use_vec_extr
= true;
33960 use_vec_extr
= TARGET_SSE4_1
;
33972 tmp
= gen_reg_rtx (mode
);
33973 emit_insn (gen_sse_shufps_v4sf (tmp
, vec
, vec
,
33974 GEN_INT (elt
), GEN_INT (elt
),
33975 GEN_INT (elt
+4), GEN_INT (elt
+4)));
33979 tmp
= gen_reg_rtx (mode
);
33980 emit_insn (gen_vec_interleave_highv4sf (tmp
, vec
, vec
));
33984 gcc_unreachable ();
33987 use_vec_extr
= true;
33992 use_vec_extr
= TARGET_SSE4_1
;
34006 tmp
= gen_reg_rtx (mode
);
34007 emit_insn (gen_sse2_pshufd_1 (tmp
, vec
,
34008 GEN_INT (elt
), GEN_INT (elt
),
34009 GEN_INT (elt
), GEN_INT (elt
)));
34013 tmp
= gen_reg_rtx (mode
);
34014 emit_insn (gen_vec_interleave_highv4si (tmp
, vec
, vec
));
34018 gcc_unreachable ();
34021 use_vec_extr
= true;
34026 /* For SSE1, we have to reuse the V4SF code. */
34027 ix86_expand_vector_extract (false, gen_lowpart (SFmode
, target
),
34028 gen_lowpart (V4SFmode
, vec
), elt
);
34034 use_vec_extr
= TARGET_SSE2
;
34037 use_vec_extr
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
34041 use_vec_extr
= TARGET_SSE4_1
;
34047 tmp
= gen_reg_rtx (V4SFmode
);
34049 emit_insn (gen_vec_extract_lo_v8sf (tmp
, vec
));
34051 emit_insn (gen_vec_extract_hi_v8sf (tmp
, vec
));
34052 ix86_expand_vector_extract (false, target
, tmp
, elt
& 3);
34060 tmp
= gen_reg_rtx (V2DFmode
);
34062 emit_insn (gen_vec_extract_lo_v4df (tmp
, vec
));
34064 emit_insn (gen_vec_extract_hi_v4df (tmp
, vec
));
34065 ix86_expand_vector_extract (false, target
, tmp
, elt
& 1);
34073 tmp
= gen_reg_rtx (V16QImode
);
34075 emit_insn (gen_vec_extract_lo_v32qi (tmp
, vec
));
34077 emit_insn (gen_vec_extract_hi_v32qi (tmp
, vec
));
34078 ix86_expand_vector_extract (false, target
, tmp
, elt
& 15);
34086 tmp
= gen_reg_rtx (V8HImode
);
34088 emit_insn (gen_vec_extract_lo_v16hi (tmp
, vec
));
34090 emit_insn (gen_vec_extract_hi_v16hi (tmp
, vec
));
34091 ix86_expand_vector_extract (false, target
, tmp
, elt
& 7);
34099 tmp
= gen_reg_rtx (V4SImode
);
34101 emit_insn (gen_vec_extract_lo_v8si (tmp
, vec
));
34103 emit_insn (gen_vec_extract_hi_v8si (tmp
, vec
));
34104 ix86_expand_vector_extract (false, target
, tmp
, elt
& 3);
34112 tmp
= gen_reg_rtx (V2DImode
);
34114 emit_insn (gen_vec_extract_lo_v4di (tmp
, vec
));
34116 emit_insn (gen_vec_extract_hi_v4di (tmp
, vec
));
34117 ix86_expand_vector_extract (false, target
, tmp
, elt
& 1);
34123 /* ??? Could extract the appropriate HImode element and shift. */
34130 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (elt
)));
34131 tmp
= gen_rtx_VEC_SELECT (inner_mode
, vec
, tmp
);
34133 /* Let the rtl optimizers know about the zero extension performed. */
34134 if (inner_mode
== QImode
|| inner_mode
== HImode
)
34136 tmp
= gen_rtx_ZERO_EXTEND (SImode
, tmp
);
34137 target
= gen_lowpart (SImode
, target
);
34140 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
34144 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
), false);
34146 emit_move_insn (mem
, vec
);
34148 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
34149 emit_move_insn (target
, tmp
);
34153 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
34154 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
34155 The upper bits of DEST are undefined, though they shouldn't cause
34156 exceptions (some bits from src or all zeros are ok). */
34159 emit_reduc_half (rtx dest
, rtx src
, int i
)
34162 switch (GET_MODE (src
))
34166 tem
= gen_sse_movhlps (dest
, src
, src
);
34168 tem
= gen_sse_shufps_v4sf (dest
, src
, src
, const1_rtx
, const1_rtx
,
34169 GEN_INT (1 + 4), GEN_INT (1 + 4));
34172 tem
= gen_vec_interleave_highv2df (dest
, src
, src
);
34178 tem
= gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode
, dest
),
34179 gen_lowpart (V1TImode
, src
),
34184 tem
= gen_avx_vperm2f128v8sf3 (dest
, src
, src
, const1_rtx
);
34186 tem
= gen_avx_shufps256 (dest
, src
, src
,
34187 GEN_INT (i
== 128 ? 2 + (3 << 2) : 1));
34191 tem
= gen_avx_vperm2f128v4df3 (dest
, src
, src
, const1_rtx
);
34193 tem
= gen_avx_shufpd256 (dest
, src
, src
, const1_rtx
);
34200 tem
= gen_avx2_permv2ti (gen_lowpart (V4DImode
, dest
),
34201 gen_lowpart (V4DImode
, src
),
34202 gen_lowpart (V4DImode
, src
),
34205 tem
= gen_avx2_lshrv2ti3 (gen_lowpart (V2TImode
, dest
),
34206 gen_lowpart (V2TImode
, src
),
34210 gcc_unreachable ();
34215 /* Expand a vector reduction. FN is the binary pattern to reduce;
34216 DEST is the destination; IN is the input vector. */
34219 ix86_expand_reduc (rtx (*fn
) (rtx
, rtx
, rtx
), rtx dest
, rtx in
)
34221 rtx half
, dst
, vec
= in
;
34222 enum machine_mode mode
= GET_MODE (in
);
34225 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
34227 && mode
== V8HImode
34228 && fn
== gen_uminv8hi3
)
34230 emit_insn (gen_sse4_1_phminposuw (dest
, in
));
34234 for (i
= GET_MODE_BITSIZE (mode
);
34235 i
> GET_MODE_BITSIZE (GET_MODE_INNER (mode
));
34238 half
= gen_reg_rtx (mode
);
34239 emit_reduc_half (half
, vec
, i
);
34240 if (i
== GET_MODE_BITSIZE (GET_MODE_INNER (mode
)) * 2)
34243 dst
= gen_reg_rtx (mode
);
34244 emit_insn (fn (dst
, half
, vec
));
34249 /* Target hook for scalar_mode_supported_p. */
34251 ix86_scalar_mode_supported_p (enum machine_mode mode
)
34253 if (DECIMAL_FLOAT_MODE_P (mode
))
34254 return default_decimal_float_supported_p ();
34255 else if (mode
== TFmode
)
34258 return default_scalar_mode_supported_p (mode
);
34261 /* Implements target hook vector_mode_supported_p. */
34263 ix86_vector_mode_supported_p (enum machine_mode mode
)
34265 if (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
34267 if (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
34269 if (TARGET_AVX
&& VALID_AVX256_REG_MODE (mode
))
34271 if (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
))
34273 if (TARGET_3DNOW
&& VALID_MMX_REG_MODE_3DNOW (mode
))
34278 /* Target hook for c_mode_for_suffix. */
34279 static enum machine_mode
34280 ix86_c_mode_for_suffix (char suffix
)
34290 /* Worker function for TARGET_MD_ASM_CLOBBERS.
34292 We do this in the new i386 backend to maintain source compatibility
34293 with the old cc0-based compiler. */
34296 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED
,
34297 tree inputs ATTRIBUTE_UNUSED
,
34300 clobbers
= tree_cons (NULL_TREE
, build_string (5, "flags"),
34302 clobbers
= tree_cons (NULL_TREE
, build_string (4, "fpsr"),
34307 /* Implements target vector targetm.asm.encode_section_info. */
34309 static void ATTRIBUTE_UNUSED
34310 ix86_encode_section_info (tree decl
, rtx rtl
, int first
)
34312 default_encode_section_info (decl
, rtl
, first
);
34314 if (TREE_CODE (decl
) == VAR_DECL
34315 && (TREE_STATIC (decl
) || DECL_EXTERNAL (decl
))
34316 && ix86_in_large_data_p (decl
))
34317 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= SYMBOL_FLAG_FAR_ADDR
;
34320 /* Worker function for REVERSE_CONDITION. */
34323 ix86_reverse_condition (enum rtx_code code
, enum machine_mode mode
)
34325 return (mode
!= CCFPmode
&& mode
!= CCFPUmode
34326 ? reverse_condition (code
)
34327 : reverse_condition_maybe_unordered (code
));
34330 /* Output code to perform an x87 FP register move, from OPERANDS[1]
34334 output_387_reg_move (rtx insn
, rtx
*operands
)
34336 if (REG_P (operands
[0]))
34338 if (REG_P (operands
[1])
34339 && find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
34341 if (REGNO (operands
[0]) == FIRST_STACK_REG
)
34342 return output_387_ffreep (operands
, 0);
34343 return "fstp\t%y0";
34345 if (STACK_TOP_P (operands
[0]))
34346 return "fld%Z1\t%y1";
34349 else if (MEM_P (operands
[0]))
34351 gcc_assert (REG_P (operands
[1]));
34352 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
34353 return "fstp%Z0\t%y0";
34356 /* There is no non-popping store to memory for XFmode.
34357 So if we need one, follow the store with a load. */
34358 if (GET_MODE (operands
[0]) == XFmode
)
34359 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
34361 return "fst%Z0\t%y0";
34368 /* Output code to perform a conditional jump to LABEL, if C2 flag in
34369 FP status register is set. */
34372 ix86_emit_fp_unordered_jump (rtx label
)
34374 rtx reg
= gen_reg_rtx (HImode
);
34377 emit_insn (gen_x86_fnstsw_1 (reg
));
34379 if (TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_insn_for_size_p ()))
34381 emit_insn (gen_x86_sahf_1 (reg
));
34383 temp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
34384 temp
= gen_rtx_UNORDERED (VOIDmode
, temp
, const0_rtx
);
34388 emit_insn (gen_testqi_ext_ccno_0 (reg
, GEN_INT (0x04)));
34390 temp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
34391 temp
= gen_rtx_NE (VOIDmode
, temp
, const0_rtx
);
34394 temp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, temp
,
34395 gen_rtx_LABEL_REF (VOIDmode
, label
),
34397 temp
= gen_rtx_SET (VOIDmode
, pc_rtx
, temp
);
34399 emit_jump_insn (temp
);
34400 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
34403 /* Output code to perform a log1p XFmode calculation. */
34405 void ix86_emit_i387_log1p (rtx op0
, rtx op1
)
34407 rtx label1
= gen_label_rtx ();
34408 rtx label2
= gen_label_rtx ();
34410 rtx tmp
= gen_reg_rtx (XFmode
);
34411 rtx tmp2
= gen_reg_rtx (XFmode
);
34414 emit_insn (gen_absxf2 (tmp
, op1
));
34415 test
= gen_rtx_GE (VOIDmode
, tmp
,
34416 CONST_DOUBLE_FROM_REAL_VALUE (
34417 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode
),
34419 emit_jump_insn (gen_cbranchxf4 (test
, XEXP (test
, 0), XEXP (test
, 1), label1
));
34421 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
34422 emit_insn (gen_fyl2xp1xf3_i387 (op0
, op1
, tmp2
));
34423 emit_jump (label2
);
34425 emit_label (label1
);
34426 emit_move_insn (tmp
, CONST1_RTX (XFmode
));
34427 emit_insn (gen_addxf3 (tmp
, op1
, tmp
));
34428 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
34429 emit_insn (gen_fyl2xxf3_i387 (op0
, tmp
, tmp2
));
34431 emit_label (label2
);
34434 /* Emit code for round calculation. */
34435 void ix86_emit_i387_round (rtx op0
, rtx op1
)
34437 enum machine_mode inmode
= GET_MODE (op1
);
34438 enum machine_mode outmode
= GET_MODE (op0
);
34439 rtx e1
, e2
, res
, tmp
, tmp1
, half
;
34440 rtx scratch
= gen_reg_rtx (HImode
);
34441 rtx flags
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
34442 rtx jump_label
= gen_label_rtx ();
34444 rtx (*gen_abs
) (rtx
, rtx
);
34445 rtx (*gen_neg
) (rtx
, rtx
);
34450 gen_abs
= gen_abssf2
;
34453 gen_abs
= gen_absdf2
;
34456 gen_abs
= gen_absxf2
;
34459 gcc_unreachable ();
34465 gen_neg
= gen_negsf2
;
34468 gen_neg
= gen_negdf2
;
34471 gen_neg
= gen_negxf2
;
34474 gen_neg
= gen_neghi2
;
34477 gen_neg
= gen_negsi2
;
34480 gen_neg
= gen_negdi2
;
34483 gcc_unreachable ();
34486 e1
= gen_reg_rtx (inmode
);
34487 e2
= gen_reg_rtx (inmode
);
34488 res
= gen_reg_rtx (outmode
);
34490 half
= CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf
, inmode
);
34492 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
34494 /* scratch = fxam(op1) */
34495 emit_insn (gen_rtx_SET (VOIDmode
, scratch
,
34496 gen_rtx_UNSPEC (HImode
, gen_rtvec (1, op1
),
34498 /* e1 = fabs(op1) */
34499 emit_insn (gen_abs (e1
, op1
));
34501 /* e2 = e1 + 0.5 */
34502 half
= force_reg (inmode
, half
);
34503 emit_insn (gen_rtx_SET (VOIDmode
, e2
,
34504 gen_rtx_PLUS (inmode
, e1
, half
)));
34506 /* res = floor(e2) */
34507 if (inmode
!= XFmode
)
34509 tmp1
= gen_reg_rtx (XFmode
);
34511 emit_insn (gen_rtx_SET (VOIDmode
, tmp1
,
34512 gen_rtx_FLOAT_EXTEND (XFmode
, e2
)));
34522 rtx tmp0
= gen_reg_rtx (XFmode
);
34524 emit_insn (gen_frndintxf2_floor (tmp0
, tmp1
));
34526 emit_insn (gen_rtx_SET (VOIDmode
, res
,
34527 gen_rtx_UNSPEC (outmode
, gen_rtvec (1, tmp0
),
34528 UNSPEC_TRUNC_NOOP
)));
34532 emit_insn (gen_frndintxf2_floor (res
, tmp1
));
34535 emit_insn (gen_lfloorxfhi2 (res
, tmp1
));
34538 emit_insn (gen_lfloorxfsi2 (res
, tmp1
));
34541 emit_insn (gen_lfloorxfdi2 (res
, tmp1
));
34544 gcc_unreachable ();
34547 /* flags = signbit(a) */
34548 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x02)));
34550 /* if (flags) then res = -res */
34551 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
,
34552 gen_rtx_EQ (VOIDmode
, flags
, const0_rtx
),
34553 gen_rtx_LABEL_REF (VOIDmode
, jump_label
),
34555 insn
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
34556 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
34557 JUMP_LABEL (insn
) = jump_label
;
34559 emit_insn (gen_neg (res
, res
));
34561 emit_label (jump_label
);
34562 LABEL_NUSES (jump_label
) = 1;
34564 emit_move_insn (op0
, res
);
34567 /* Output code to perform a Newton-Rhapson approximation of a single precision
34568 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
34570 void ix86_emit_swdivsf (rtx res
, rtx a
, rtx b
, enum machine_mode mode
)
34572 rtx x0
, x1
, e0
, e1
;
34574 x0
= gen_reg_rtx (mode
);
34575 e0
= gen_reg_rtx (mode
);
34576 e1
= gen_reg_rtx (mode
);
34577 x1
= gen_reg_rtx (mode
);
34579 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
34581 b
= force_reg (mode
, b
);
34583 /* x0 = rcp(b) estimate */
34584 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
34585 gen_rtx_UNSPEC (mode
, gen_rtvec (1, b
),
34588 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
34589 gen_rtx_MULT (mode
, x0
, b
)));
34592 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
34593 gen_rtx_MULT (mode
, x0
, e0
)));
34596 emit_insn (gen_rtx_SET (VOIDmode
, e1
,
34597 gen_rtx_PLUS (mode
, x0
, x0
)));
34600 emit_insn (gen_rtx_SET (VOIDmode
, x1
,
34601 gen_rtx_MINUS (mode
, e1
, e0
)));
34604 emit_insn (gen_rtx_SET (VOIDmode
, res
,
34605 gen_rtx_MULT (mode
, a
, x1
)));
34608 /* Output code to perform a Newton-Rhapson approximation of a
34609 single precision floating point [reciprocal] square root. */
34611 void ix86_emit_swsqrtsf (rtx res
, rtx a
, enum machine_mode mode
,
34614 rtx x0
, e0
, e1
, e2
, e3
, mthree
, mhalf
;
34617 x0
= gen_reg_rtx (mode
);
34618 e0
= gen_reg_rtx (mode
);
34619 e1
= gen_reg_rtx (mode
);
34620 e2
= gen_reg_rtx (mode
);
34621 e3
= gen_reg_rtx (mode
);
34623 real_from_integer (&r
, VOIDmode
, -3, -1, 0);
34624 mthree
= CONST_DOUBLE_FROM_REAL_VALUE (r
, SFmode
);
34626 real_arithmetic (&r
, NEGATE_EXPR
, &dconsthalf
, NULL
);
34627 mhalf
= CONST_DOUBLE_FROM_REAL_VALUE (r
, SFmode
);
34629 if (VECTOR_MODE_P (mode
))
34631 mthree
= ix86_build_const_vector (mode
, true, mthree
);
34632 mhalf
= ix86_build_const_vector (mode
, true, mhalf
);
34635 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
34636 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
34638 a
= force_reg (mode
, a
);
34640 /* x0 = rsqrt(a) estimate */
34641 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
34642 gen_rtx_UNSPEC (mode
, gen_rtvec (1, a
),
34645 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
34650 zero
= gen_reg_rtx (mode
);
34651 mask
= gen_reg_rtx (mode
);
34653 zero
= force_reg (mode
, CONST0_RTX(mode
));
34654 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
34655 gen_rtx_NE (mode
, zero
, a
)));
34657 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
34658 gen_rtx_AND (mode
, x0
, mask
)));
34662 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
34663 gen_rtx_MULT (mode
, x0
, a
)));
34665 emit_insn (gen_rtx_SET (VOIDmode
, e1
,
34666 gen_rtx_MULT (mode
, e0
, x0
)));
34669 mthree
= force_reg (mode
, mthree
);
34670 emit_insn (gen_rtx_SET (VOIDmode
, e2
,
34671 gen_rtx_PLUS (mode
, e1
, mthree
)));
34673 mhalf
= force_reg (mode
, mhalf
);
34675 /* e3 = -.5 * x0 */
34676 emit_insn (gen_rtx_SET (VOIDmode
, e3
,
34677 gen_rtx_MULT (mode
, x0
, mhalf
)));
34679 /* e3 = -.5 * e0 */
34680 emit_insn (gen_rtx_SET (VOIDmode
, e3
,
34681 gen_rtx_MULT (mode
, e0
, mhalf
)));
34682 /* ret = e2 * e3 */
34683 emit_insn (gen_rtx_SET (VOIDmode
, res
,
34684 gen_rtx_MULT (mode
, e2
, e3
)));
34687 #ifdef TARGET_SOLARIS
34688 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
34691 i386_solaris_elf_named_section (const char *name
, unsigned int flags
,
34694 /* With Binutils 2.15, the "@unwind" marker must be specified on
34695 every occurrence of the ".eh_frame" section, not just the first
34698 && strcmp (name
, ".eh_frame") == 0)
34700 fprintf (asm_out_file
, "\t.section\t%s,\"%s\",@unwind\n", name
,
34701 flags
& SECTION_WRITE
? "aw" : "a");
34706 if (HAVE_COMDAT_GROUP
&& flags
& SECTION_LINKONCE
)
34708 solaris_elf_asm_comdat_section (name
, flags
, decl
);
34713 default_elf_asm_named_section (name
, flags
, decl
);
34715 #endif /* TARGET_SOLARIS */
34717 /* Return the mangling of TYPE if it is an extended fundamental type. */
34719 static const char *
34720 ix86_mangle_type (const_tree type
)
34722 type
= TYPE_MAIN_VARIANT (type
);
34724 if (TREE_CODE (type
) != VOID_TYPE
&& TREE_CODE (type
) != BOOLEAN_TYPE
34725 && TREE_CODE (type
) != INTEGER_TYPE
&& TREE_CODE (type
) != REAL_TYPE
)
34728 switch (TYPE_MODE (type
))
34731 /* __float128 is "g". */
34734 /* "long double" or __float80 is "e". */
34741 /* For 32-bit code we can save PIC register setup by using
34742 __stack_chk_fail_local hidden function instead of calling
34743 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
34744 register, so it is better to call __stack_chk_fail directly. */
34746 static tree ATTRIBUTE_UNUSED
34747 ix86_stack_protect_fail (void)
34749 return TARGET_64BIT
34750 ? default_external_stack_protect_fail ()
34751 : default_hidden_stack_protect_fail ();
34754 /* Select a format to encode pointers in exception handling data. CODE
34755 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
34756 true if the symbol may be affected by dynamic relocations.
34758 ??? All x86 object file formats are capable of representing this.
34759 After all, the relocation needed is the same as for the call insn.
34760 Whether or not a particular assembler allows us to enter such, I
34761 guess we'll have to see. */
34763 asm_preferred_eh_data_format (int code
, int global
)
34767 int type
= DW_EH_PE_sdata8
;
34769 || ix86_cmodel
== CM_SMALL_PIC
34770 || (ix86_cmodel
== CM_MEDIUM_PIC
&& (global
|| code
)))
34771 type
= DW_EH_PE_sdata4
;
34772 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
34774 if (ix86_cmodel
== CM_SMALL
34775 || (ix86_cmodel
== CM_MEDIUM
&& code
))
34776 return DW_EH_PE_udata4
;
34777 return DW_EH_PE_absptr
;
34780 /* Expand copysign from SIGN to the positive value ABS_VALUE
34781 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
34784 ix86_sse_copysign_to_positive (rtx result
, rtx abs_value
, rtx sign
, rtx mask
)
34786 enum machine_mode mode
= GET_MODE (sign
);
34787 rtx sgn
= gen_reg_rtx (mode
);
34788 if (mask
== NULL_RTX
)
34790 enum machine_mode vmode
;
34792 if (mode
== SFmode
)
34794 else if (mode
== DFmode
)
34799 mask
= ix86_build_signbit_mask (vmode
, VECTOR_MODE_P (mode
), false);
34800 if (!VECTOR_MODE_P (mode
))
34802 /* We need to generate a scalar mode mask in this case. */
34803 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
34804 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
34805 mask
= gen_reg_rtx (mode
);
34806 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
34810 mask
= gen_rtx_NOT (mode
, mask
);
34811 emit_insn (gen_rtx_SET (VOIDmode
, sgn
,
34812 gen_rtx_AND (mode
, mask
, sign
)));
34813 emit_insn (gen_rtx_SET (VOIDmode
, result
,
34814 gen_rtx_IOR (mode
, abs_value
, sgn
)));
34817 /* Expand fabs (OP0) and return a new rtx that holds the result. The
34818 mask for masking out the sign-bit is stored in *SMASK, if that is
34821 ix86_expand_sse_fabs (rtx op0
, rtx
*smask
)
34823 enum machine_mode vmode
, mode
= GET_MODE (op0
);
34826 xa
= gen_reg_rtx (mode
);
34827 if (mode
== SFmode
)
34829 else if (mode
== DFmode
)
34833 mask
= ix86_build_signbit_mask (vmode
, VECTOR_MODE_P (mode
), true);
34834 if (!VECTOR_MODE_P (mode
))
34836 /* We need to generate a scalar mode mask in this case. */
34837 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
34838 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
34839 mask
= gen_reg_rtx (mode
);
34840 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
34842 emit_insn (gen_rtx_SET (VOIDmode
, xa
,
34843 gen_rtx_AND (mode
, op0
, mask
)));
34851 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
34852 swapping the operands if SWAP_OPERANDS is true. The expanded
34853 code is a forward jump to a newly created label in case the
34854 comparison is true. The generated label rtx is returned. */
34856 ix86_expand_sse_compare_and_jump (enum rtx_code code
, rtx op0
, rtx op1
,
34857 bool swap_operands
)
34868 label
= gen_label_rtx ();
34869 tmp
= gen_rtx_REG (CCFPUmode
, FLAGS_REG
);
34870 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
34871 gen_rtx_COMPARE (CCFPUmode
, op0
, op1
)));
34872 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
, tmp
, const0_rtx
);
34873 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
34874 gen_rtx_LABEL_REF (VOIDmode
, label
), pc_rtx
);
34875 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
34876 JUMP_LABEL (tmp
) = label
;
34881 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
34882 using comparison code CODE. Operands are swapped for the comparison if
34883 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
34885 ix86_expand_sse_compare_mask (enum rtx_code code
, rtx op0
, rtx op1
,
34886 bool swap_operands
)
34888 rtx (*insn
)(rtx
, rtx
, rtx
, rtx
);
34889 enum machine_mode mode
= GET_MODE (op0
);
34890 rtx mask
= gen_reg_rtx (mode
);
34899 insn
= mode
== DFmode
? gen_setcc_df_sse
: gen_setcc_sf_sse
;
34901 emit_insn (insn (mask
, op0
, op1
,
34902 gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
34906 /* Generate and return a rtx of mode MODE for 2**n where n is the number
34907 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
34909 ix86_gen_TWO52 (enum machine_mode mode
)
34911 REAL_VALUE_TYPE TWO52r
;
34914 real_ldexp (&TWO52r
, &dconst1
, mode
== DFmode
? 52 : 23);
34915 TWO52
= const_double_from_real_value (TWO52r
, mode
);
34916 TWO52
= force_reg (mode
, TWO52
);
34921 /* Expand SSE sequence for computing lround from OP1 storing
34924 ix86_expand_lround (rtx op0
, rtx op1
)
34926 /* C code for the stuff we're doing below:
34927 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
34930 enum machine_mode mode
= GET_MODE (op1
);
34931 const struct real_format
*fmt
;
34932 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
34935 /* load nextafter (0.5, 0.0) */
34936 fmt
= REAL_MODE_FORMAT (mode
);
34937 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
34938 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
34940 /* adj = copysign (0.5, op1) */
34941 adj
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
34942 ix86_sse_copysign_to_positive (adj
, adj
, force_reg (mode
, op1
), NULL_RTX
);
34944 /* adj = op1 + adj */
34945 adj
= expand_simple_binop (mode
, PLUS
, adj
, op1
, NULL_RTX
, 0, OPTAB_DIRECT
);
34947 /* op0 = (imode)adj */
34948 expand_fix (op0
, adj
, 0);
34951 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
34954 ix86_expand_lfloorceil (rtx op0
, rtx op1
, bool do_floor
)
34956 /* C code for the stuff we're doing below (for do_floor):
34958 xi -= (double)xi > op1 ? 1 : 0;
34961 enum machine_mode fmode
= GET_MODE (op1
);
34962 enum machine_mode imode
= GET_MODE (op0
);
34963 rtx ireg
, freg
, label
, tmp
;
34965 /* reg = (long)op1 */
34966 ireg
= gen_reg_rtx (imode
);
34967 expand_fix (ireg
, op1
, 0);
34969 /* freg = (double)reg */
34970 freg
= gen_reg_rtx (fmode
);
34971 expand_float (freg
, ireg
, 0);
34973 /* ireg = (freg > op1) ? ireg - 1 : ireg */
34974 label
= ix86_expand_sse_compare_and_jump (UNLE
,
34975 freg
, op1
, !do_floor
);
34976 tmp
= expand_simple_binop (imode
, do_floor
? MINUS
: PLUS
,
34977 ireg
, const1_rtx
, NULL_RTX
, 0, OPTAB_DIRECT
);
34978 emit_move_insn (ireg
, tmp
);
34980 emit_label (label
);
34981 LABEL_NUSES (label
) = 1;
34983 emit_move_insn (op0
, ireg
);
34986 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
34987 result in OPERAND0. */
34989 ix86_expand_rint (rtx operand0
, rtx operand1
)
34991 /* C code for the stuff we're doing below:
34992 xa = fabs (operand1);
34993 if (!isless (xa, 2**52))
34995 xa = xa + 2**52 - 2**52;
34996 return copysign (xa, operand1);
34998 enum machine_mode mode
= GET_MODE (operand0
);
34999 rtx res
, xa
, label
, TWO52
, mask
;
35001 res
= gen_reg_rtx (mode
);
35002 emit_move_insn (res
, operand1
);
35004 /* xa = abs (operand1) */
35005 xa
= ix86_expand_sse_fabs (res
, &mask
);
35007 /* if (!isless (xa, TWO52)) goto label; */
35008 TWO52
= ix86_gen_TWO52 (mode
);
35009 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
35011 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
35012 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
35014 ix86_sse_copysign_to_positive (res
, xa
, res
, mask
);
35016 emit_label (label
);
35017 LABEL_NUSES (label
) = 1;
35019 emit_move_insn (operand0
, res
);
35022 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
35025 ix86_expand_floorceildf_32 (rtx operand0
, rtx operand1
, bool do_floor
)
35027 /* C code for the stuff we expand below.
35028 double xa = fabs (x), x2;
35029 if (!isless (xa, TWO52))
35031 xa = xa + TWO52 - TWO52;
35032 x2 = copysign (xa, x);
35041 enum machine_mode mode
= GET_MODE (operand0
);
35042 rtx xa
, TWO52
, tmp
, label
, one
, res
, mask
;
35044 TWO52
= ix86_gen_TWO52 (mode
);
35046 /* Temporary for holding the result, initialized to the input
35047 operand to ease control flow. */
35048 res
= gen_reg_rtx (mode
);
35049 emit_move_insn (res
, operand1
);
35051 /* xa = abs (operand1) */
35052 xa
= ix86_expand_sse_fabs (res
, &mask
);
35054 /* if (!isless (xa, TWO52)) goto label; */
35055 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
35057 /* xa = xa + TWO52 - TWO52; */
35058 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
35059 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
35061 /* xa = copysign (xa, operand1) */
35062 ix86_sse_copysign_to_positive (xa
, xa
, res
, mask
);
35064 /* generate 1.0 or -1.0 */
35065 one
= force_reg (mode
,
35066 const_double_from_real_value (do_floor
35067 ? dconst1
: dconstm1
, mode
));
35069 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
35070 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
35071 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
35072 gen_rtx_AND (mode
, one
, tmp
)));
35073 /* We always need to subtract here to preserve signed zero. */
35074 tmp
= expand_simple_binop (mode
, MINUS
,
35075 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
35076 emit_move_insn (res
, tmp
);
35078 emit_label (label
);
35079 LABEL_NUSES (label
) = 1;
35081 emit_move_insn (operand0
, res
);
35084 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
35087 ix86_expand_floorceil (rtx operand0
, rtx operand1
, bool do_floor
)
35089 /* C code for the stuff we expand below.
35090 double xa = fabs (x), x2;
35091 if (!isless (xa, TWO52))
35093 x2 = (double)(long)x;
35100 if (HONOR_SIGNED_ZEROS (mode))
35101 return copysign (x2, x);
35104 enum machine_mode mode
= GET_MODE (operand0
);
35105 rtx xa
, xi
, TWO52
, tmp
, label
, one
, res
, mask
;
35107 TWO52
= ix86_gen_TWO52 (mode
);
35109 /* Temporary for holding the result, initialized to the input
35110 operand to ease control flow. */
35111 res
= gen_reg_rtx (mode
);
35112 emit_move_insn (res
, operand1
);
35114 /* xa = abs (operand1) */
35115 xa
= ix86_expand_sse_fabs (res
, &mask
);
35117 /* if (!isless (xa, TWO52)) goto label; */
35118 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
35120 /* xa = (double)(long)x */
35121 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
35122 expand_fix (xi
, res
, 0);
35123 expand_float (xa
, xi
, 0);
35126 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
35128 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
35129 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
35130 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
35131 gen_rtx_AND (mode
, one
, tmp
)));
35132 tmp
= expand_simple_binop (mode
, do_floor
? MINUS
: PLUS
,
35133 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
35134 emit_move_insn (res
, tmp
);
35136 if (HONOR_SIGNED_ZEROS (mode
))
35137 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
35139 emit_label (label
);
35140 LABEL_NUSES (label
) = 1;
35142 emit_move_insn (operand0
, res
);
35145 /* Expand SSE sequence for computing round from OPERAND1 storing
35146 into OPERAND0. Sequence that works without relying on DImode truncation
35147 via cvttsd2siq that is only available on 64bit targets. */
35149 ix86_expand_rounddf_32 (rtx operand0
, rtx operand1
)
35151 /* C code for the stuff we expand below.
35152 double xa = fabs (x), xa2, x2;
35153 if (!isless (xa, TWO52))
35155 Using the absolute value and copying back sign makes
35156 -0.0 -> -0.0 correct.
35157 xa2 = xa + TWO52 - TWO52;
35162 else if (dxa > 0.5)
35164 x2 = copysign (xa2, x);
35167 enum machine_mode mode
= GET_MODE (operand0
);
35168 rtx xa
, xa2
, dxa
, TWO52
, tmp
, label
, half
, mhalf
, one
, res
, mask
;
35170 TWO52
= ix86_gen_TWO52 (mode
);
35172 /* Temporary for holding the result, initialized to the input
35173 operand to ease control flow. */
35174 res
= gen_reg_rtx (mode
);
35175 emit_move_insn (res
, operand1
);
35177 /* xa = abs (operand1) */
35178 xa
= ix86_expand_sse_fabs (res
, &mask
);
35180 /* if (!isless (xa, TWO52)) goto label; */
35181 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
35183 /* xa2 = xa + TWO52 - TWO52; */
35184 xa2
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
35185 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, TWO52
, xa2
, 0, OPTAB_DIRECT
);
35187 /* dxa = xa2 - xa; */
35188 dxa
= expand_simple_binop (mode
, MINUS
, xa2
, xa
, NULL_RTX
, 0, OPTAB_DIRECT
);
35190 /* generate 0.5, 1.0 and -0.5 */
35191 half
= force_reg (mode
, const_double_from_real_value (dconsthalf
, mode
));
35192 one
= expand_simple_binop (mode
, PLUS
, half
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
35193 mhalf
= expand_simple_binop (mode
, MINUS
, half
, one
, NULL_RTX
,
35197 tmp
= gen_reg_rtx (mode
);
35198 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
35199 tmp
= ix86_expand_sse_compare_mask (UNGT
, dxa
, half
, false);
35200 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
35201 gen_rtx_AND (mode
, one
, tmp
)));
35202 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
35203 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
35204 tmp
= ix86_expand_sse_compare_mask (UNGE
, mhalf
, dxa
, false);
35205 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
35206 gen_rtx_AND (mode
, one
, tmp
)));
35207 xa2
= expand_simple_binop (mode
, PLUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
35209 /* res = copysign (xa2, operand1) */
35210 ix86_sse_copysign_to_positive (res
, xa2
, force_reg (mode
, operand1
), mask
);
35212 emit_label (label
);
35213 LABEL_NUSES (label
) = 1;
35215 emit_move_insn (operand0
, res
);
35218 /* Expand SSE sequence for computing trunc from OPERAND1 storing
35221 ix86_expand_trunc (rtx operand0
, rtx operand1
)
35223 /* C code for SSE variant we expand below.
35224 double xa = fabs (x), x2;
35225 if (!isless (xa, TWO52))
35227 x2 = (double)(long)x;
35228 if (HONOR_SIGNED_ZEROS (mode))
35229 return copysign (x2, x);
35232 enum machine_mode mode
= GET_MODE (operand0
);
35233 rtx xa
, xi
, TWO52
, label
, res
, mask
;
35235 TWO52
= ix86_gen_TWO52 (mode
);
35237 /* Temporary for holding the result, initialized to the input
35238 operand to ease control flow. */
35239 res
= gen_reg_rtx (mode
);
35240 emit_move_insn (res
, operand1
);
35242 /* xa = abs (operand1) */
35243 xa
= ix86_expand_sse_fabs (res
, &mask
);
35245 /* if (!isless (xa, TWO52)) goto label; */
35246 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
35248 /* x = (double)(long)x */
35249 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
35250 expand_fix (xi
, res
, 0);
35251 expand_float (res
, xi
, 0);
35253 if (HONOR_SIGNED_ZEROS (mode
))
35254 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
35256 emit_label (label
);
35257 LABEL_NUSES (label
) = 1;
35259 emit_move_insn (operand0
, res
);
35262 /* Expand SSE sequence for computing trunc from OPERAND1 storing
35265 ix86_expand_truncdf_32 (rtx operand0
, rtx operand1
)
35267 enum machine_mode mode
= GET_MODE (operand0
);
35268 rtx xa
, mask
, TWO52
, label
, one
, res
, smask
, tmp
;
35270 /* C code for SSE variant we expand below.
35271 double xa = fabs (x), x2;
35272 if (!isless (xa, TWO52))
35274 xa2 = xa + TWO52 - TWO52;
35278 x2 = copysign (xa2, x);
35282 TWO52
= ix86_gen_TWO52 (mode
);
35284 /* Temporary for holding the result, initialized to the input
35285 operand to ease control flow. */
35286 res
= gen_reg_rtx (mode
);
35287 emit_move_insn (res
, operand1
);
35289 /* xa = abs (operand1) */
35290 xa
= ix86_expand_sse_fabs (res
, &smask
);
35292 /* if (!isless (xa, TWO52)) goto label; */
35293 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
35295 /* res = xa + TWO52 - TWO52; */
35296 tmp
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
35297 tmp
= expand_simple_binop (mode
, MINUS
, tmp
, TWO52
, tmp
, 0, OPTAB_DIRECT
);
35298 emit_move_insn (res
, tmp
);
35301 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
35303 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
35304 mask
= ix86_expand_sse_compare_mask (UNGT
, res
, xa
, false);
35305 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
35306 gen_rtx_AND (mode
, mask
, one
)));
35307 tmp
= expand_simple_binop (mode
, MINUS
,
35308 res
, mask
, NULL_RTX
, 0, OPTAB_DIRECT
);
35309 emit_move_insn (res
, tmp
);
35311 /* res = copysign (res, operand1) */
35312 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), smask
);
35314 emit_label (label
);
35315 LABEL_NUSES (label
) = 1;
35317 emit_move_insn (operand0
, res
);
35320 /* Expand SSE sequence for computing round from OPERAND1 storing
35323 ix86_expand_round (rtx operand0
, rtx operand1
)
35325 /* C code for the stuff we're doing below:
35326 double xa = fabs (x);
35327 if (!isless (xa, TWO52))
35329 xa = (double)(long)(xa + nextafter (0.5, 0.0));
35330 return copysign (xa, x);
35332 enum machine_mode mode
= GET_MODE (operand0
);
35333 rtx res
, TWO52
, xa
, label
, xi
, half
, mask
;
35334 const struct real_format
*fmt
;
35335 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
35337 /* Temporary for holding the result, initialized to the input
35338 operand to ease control flow. */
35339 res
= gen_reg_rtx (mode
);
35340 emit_move_insn (res
, operand1
);
35342 TWO52
= ix86_gen_TWO52 (mode
);
35343 xa
= ix86_expand_sse_fabs (res
, &mask
);
35344 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
35346 /* load nextafter (0.5, 0.0) */
35347 fmt
= REAL_MODE_FORMAT (mode
);
35348 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
35349 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
35351 /* xa = xa + 0.5 */
35352 half
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
35353 xa
= expand_simple_binop (mode
, PLUS
, xa
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
35355 /* xa = (double)(int64_t)xa */
35356 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
35357 expand_fix (xi
, xa
, 0);
35358 expand_float (xa
, xi
, 0);
35360 /* res = copysign (xa, operand1) */
35361 ix86_sse_copysign_to_positive (res
, xa
, force_reg (mode
, operand1
), mask
);
35363 emit_label (label
);
35364 LABEL_NUSES (label
) = 1;
35366 emit_move_insn (operand0
, res
);
35369 /* Expand SSE sequence for computing round
35370 from OP1 storing into OP0 using sse4 round insn. */
35372 ix86_expand_round_sse4 (rtx op0
, rtx op1
)
35374 enum machine_mode mode
= GET_MODE (op0
);
35375 rtx e1
, e2
, res
, half
;
35376 const struct real_format
*fmt
;
35377 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
35378 rtx (*gen_copysign
) (rtx
, rtx
, rtx
);
35379 rtx (*gen_round
) (rtx
, rtx
, rtx
);
35384 gen_copysign
= gen_copysignsf3
;
35385 gen_round
= gen_sse4_1_roundsf2
;
35388 gen_copysign
= gen_copysigndf3
;
35389 gen_round
= gen_sse4_1_rounddf2
;
35392 gcc_unreachable ();
35395 /* round (a) = trunc (a + copysign (0.5, a)) */
35397 /* load nextafter (0.5, 0.0) */
35398 fmt
= REAL_MODE_FORMAT (mode
);
35399 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
35400 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
35401 half
= const_double_from_real_value (pred_half
, mode
);
35403 /* e1 = copysign (0.5, op1) */
35404 e1
= gen_reg_rtx (mode
);
35405 emit_insn (gen_copysign (e1
, half
, op1
));
35407 /* e2 = op1 + e1 */
35408 e2
= expand_simple_binop (mode
, PLUS
, op1
, e1
, NULL_RTX
, 0, OPTAB_DIRECT
);
35410 /* res = trunc (e2) */
35411 res
= gen_reg_rtx (mode
);
35412 emit_insn (gen_round (res
, e2
, GEN_INT (ROUND_TRUNC
)));
35414 emit_move_insn (op0
, res
);
35418 /* Table of valid machine attributes. */
35419 static const struct attribute_spec ix86_attribute_table
[] =
35421 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
35422 affects_type_identity } */
35423 /* Stdcall attribute says callee is responsible for popping arguments
35424 if they are not variable. */
35425 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
35427 /* Fastcall attribute says callee is responsible for popping arguments
35428 if they are not variable. */
35429 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
35431 /* Thiscall attribute says callee is responsible for popping arguments
35432 if they are not variable. */
35433 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
35435 /* Cdecl attribute says the callee is a normal C declaration */
35436 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
35438 /* Regparm attribute specifies how many integer arguments are to be
35439 passed in registers. */
35440 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute
,
35442 /* Sseregparm attribute says we are using x86_64 calling conventions
35443 for FP arguments. */
35444 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
35446 /* The transactional memory builtins are implicitly regparm or fastcall
35447 depending on the ABI. Override the generic do-nothing attribute that
35448 these builtins were declared with. */
35449 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute
,
35451 /* force_align_arg_pointer says this function realigns the stack at entry. */
35452 { (const char *)&ix86_force_align_arg_pointer_string
, 0, 0,
35453 false, true, true, ix86_handle_cconv_attribute
, false },
35454 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
35455 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
, false },
35456 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
, false },
35457 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute
,
35460 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
,
35462 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
,
35464 #ifdef SUBTARGET_ATTRIBUTE_TABLE
35465 SUBTARGET_ATTRIBUTE_TABLE
,
35467 /* ms_abi and sysv_abi calling convention function attributes. */
35468 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute
, true },
35469 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute
, true },
35470 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute
,
35472 { "callee_pop_aggregate_return", 1, 1, false, true, true,
35473 ix86_handle_callee_pop_aggregate_return
, true },
35475 { NULL
, 0, 0, false, false, false, NULL
, false }
35478 /* Implement targetm.vectorize.builtin_vectorization_cost. */
35480 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
35481 tree vectype ATTRIBUTE_UNUSED
,
35482 int misalign ATTRIBUTE_UNUSED
)
35484 switch (type_of_cost
)
35487 return ix86_cost
->scalar_stmt_cost
;
35490 return ix86_cost
->scalar_load_cost
;
35493 return ix86_cost
->scalar_store_cost
;
35496 return ix86_cost
->vec_stmt_cost
;
35499 return ix86_cost
->vec_align_load_cost
;
35502 return ix86_cost
->vec_store_cost
;
35504 case vec_to_scalar
:
35505 return ix86_cost
->vec_to_scalar_cost
;
35507 case scalar_to_vec
:
35508 return ix86_cost
->scalar_to_vec_cost
;
35510 case unaligned_load
:
35511 case unaligned_store
:
35512 return ix86_cost
->vec_unalign_load_cost
;
35514 case cond_branch_taken
:
35515 return ix86_cost
->cond_taken_branch_cost
;
35517 case cond_branch_not_taken
:
35518 return ix86_cost
->cond_not_taken_branch_cost
;
35521 case vec_promote_demote
:
35522 return ix86_cost
->vec_stmt_cost
;
35525 gcc_unreachable ();
35529 /* Construct (set target (vec_select op0 (parallel perm))) and
35530 return true if that's a valid instruction in the active ISA. */
35533 expand_vselect (rtx target
, rtx op0
, const unsigned char *perm
, unsigned nelt
)
35535 rtx rperm
[MAX_VECT_LEN
], x
;
35538 for (i
= 0; i
< nelt
; ++i
)
35539 rperm
[i
] = GEN_INT (perm
[i
]);
35541 x
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (nelt
, rperm
));
35542 x
= gen_rtx_VEC_SELECT (GET_MODE (target
), op0
, x
);
35543 x
= gen_rtx_SET (VOIDmode
, target
, x
);
35546 if (recog_memoized (x
) < 0)
35554 /* Similar, but generate a vec_concat from op0 and op1 as well. */
35557 expand_vselect_vconcat (rtx target
, rtx op0
, rtx op1
,
35558 const unsigned char *perm
, unsigned nelt
)
35560 enum machine_mode v2mode
;
35563 v2mode
= GET_MODE_2XWIDER_MODE (GET_MODE (op0
));
35564 x
= gen_rtx_VEC_CONCAT (v2mode
, op0
, op1
);
35565 return expand_vselect (target
, x
, perm
, nelt
);
35568 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
35569 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
35572 expand_vec_perm_blend (struct expand_vec_perm_d
*d
)
35574 enum machine_mode vmode
= d
->vmode
;
35575 unsigned i
, mask
, nelt
= d
->nelt
;
35576 rtx target
, op0
, op1
, x
;
35577 rtx rperm
[32], vperm
;
35579 if (d
->op0
== d
->op1
)
35581 if (TARGET_AVX2
&& GET_MODE_SIZE (vmode
) == 32)
35583 else if (TARGET_AVX
&& (vmode
== V4DFmode
|| vmode
== V8SFmode
))
35585 else if (TARGET_SSE4_1
&& GET_MODE_SIZE (vmode
) == 16)
35590 /* This is a blend, not a permute. Elements must stay in their
35591 respective lanes. */
35592 for (i
= 0; i
< nelt
; ++i
)
35594 unsigned e
= d
->perm
[i
];
35595 if (!(e
== i
|| e
== i
+ nelt
))
35602 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
35603 decision should be extracted elsewhere, so that we only try that
35604 sequence once all budget==3 options have been tried. */
35605 target
= d
->target
;
35618 for (i
= 0; i
< nelt
; ++i
)
35619 mask
|= (d
->perm
[i
] >= nelt
) << i
;
35623 for (i
= 0; i
< 2; ++i
)
35624 mask
|= (d
->perm
[i
] >= 2 ? 15 : 0) << (i
* 4);
35629 for (i
= 0; i
< 4; ++i
)
35630 mask
|= (d
->perm
[i
] >= 4 ? 3 : 0) << (i
* 2);
35635 /* See if bytes move in pairs so we can use pblendw with
35636 an immediate argument, rather than pblendvb with a vector
35638 for (i
= 0; i
< 16; i
+= 2)
35639 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
35642 for (i
= 0; i
< nelt
; ++i
)
35643 rperm
[i
] = (d
->perm
[i
] < nelt
? const0_rtx
: constm1_rtx
);
35646 vperm
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
35647 vperm
= force_reg (vmode
, vperm
);
35649 if (GET_MODE_SIZE (vmode
) == 16)
35650 emit_insn (gen_sse4_1_pblendvb (target
, op0
, op1
, vperm
));
35652 emit_insn (gen_avx2_pblendvb (target
, op0
, op1
, vperm
));
35656 for (i
= 0; i
< 8; ++i
)
35657 mask
|= (d
->perm
[i
* 2] >= 16) << i
;
35662 target
= gen_lowpart (vmode
, target
);
35663 op0
= gen_lowpart (vmode
, op0
);
35664 op1
= gen_lowpart (vmode
, op1
);
35668 /* See if bytes move in pairs. If not, vpblendvb must be used. */
35669 for (i
= 0; i
< 32; i
+= 2)
35670 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
35672 /* See if bytes move in quadruplets. If yes, vpblendd
35673 with immediate can be used. */
35674 for (i
= 0; i
< 32; i
+= 4)
35675 if (d
->perm
[i
] + 2 != d
->perm
[i
+ 2])
35679 /* See if bytes move the same in both lanes. If yes,
35680 vpblendw with immediate can be used. */
35681 for (i
= 0; i
< 16; i
+= 2)
35682 if (d
->perm
[i
] + 16 != d
->perm
[i
+ 16])
35685 /* Use vpblendw. */
35686 for (i
= 0; i
< 16; ++i
)
35687 mask
|= (d
->perm
[i
* 2] >= 32) << i
;
35692 /* Use vpblendd. */
35693 for (i
= 0; i
< 8; ++i
)
35694 mask
|= (d
->perm
[i
* 4] >= 32) << i
;
35699 /* See if words move in pairs. If yes, vpblendd can be used. */
35700 for (i
= 0; i
< 16; i
+= 2)
35701 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
35705 /* See if words move the same in both lanes. If not,
35706 vpblendvb must be used. */
35707 for (i
= 0; i
< 8; i
++)
35708 if (d
->perm
[i
] + 8 != d
->perm
[i
+ 8])
35710 /* Use vpblendvb. */
35711 for (i
= 0; i
< 32; ++i
)
35712 rperm
[i
] = (d
->perm
[i
/ 2] < 16 ? const0_rtx
: constm1_rtx
);
35716 target
= gen_lowpart (vmode
, target
);
35717 op0
= gen_lowpart (vmode
, op0
);
35718 op1
= gen_lowpart (vmode
, op1
);
35719 goto finish_pblendvb
;
35722 /* Use vpblendw. */
35723 for (i
= 0; i
< 16; ++i
)
35724 mask
|= (d
->perm
[i
] >= 16) << i
;
35728 /* Use vpblendd. */
35729 for (i
= 0; i
< 8; ++i
)
35730 mask
|= (d
->perm
[i
* 2] >= 16) << i
;
35735 /* Use vpblendd. */
35736 for (i
= 0; i
< 4; ++i
)
35737 mask
|= (d
->perm
[i
] >= 4 ? 3 : 0) << (i
* 2);
35742 gcc_unreachable ();
35745 /* This matches five different patterns with the different modes. */
35746 x
= gen_rtx_VEC_MERGE (vmode
, op1
, op0
, GEN_INT (mask
));
35747 x
= gen_rtx_SET (VOIDmode
, target
, x
);
35753 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
35754 in terms of the variable form of vpermilps.
35756 Note that we will have already failed the immediate input vpermilps,
35757 which requires that the high and low part shuffle be identical; the
35758 variable form doesn't require that. */
35761 expand_vec_perm_vpermil (struct expand_vec_perm_d
*d
)
35763 rtx rperm
[8], vperm
;
35766 if (!TARGET_AVX
|| d
->vmode
!= V8SFmode
|| d
->op0
!= d
->op1
)
35769 /* We can only permute within the 128-bit lane. */
35770 for (i
= 0; i
< 8; ++i
)
35772 unsigned e
= d
->perm
[i
];
35773 if (i
< 4 ? e
>= 4 : e
< 4)
35780 for (i
= 0; i
< 8; ++i
)
35782 unsigned e
= d
->perm
[i
];
35784 /* Within each 128-bit lane, the elements of op0 are numbered
35785 from 0 and the elements of op1 are numbered from 4. */
35791 rperm
[i
] = GEN_INT (e
);
35794 vperm
= gen_rtx_CONST_VECTOR (V8SImode
, gen_rtvec_v (8, rperm
));
35795 vperm
= force_reg (V8SImode
, vperm
);
35796 emit_insn (gen_avx_vpermilvarv8sf3 (d
->target
, d
->op0
, vperm
));
35801 /* Return true if permutation D can be performed as VMODE permutation
35805 valid_perm_using_mode_p (enum machine_mode vmode
, struct expand_vec_perm_d
*d
)
35807 unsigned int i
, j
, chunk
;
35809 if (GET_MODE_CLASS (vmode
) != MODE_VECTOR_INT
35810 || GET_MODE_CLASS (d
->vmode
) != MODE_VECTOR_INT
35811 || GET_MODE_SIZE (vmode
) != GET_MODE_SIZE (d
->vmode
))
35814 if (GET_MODE_NUNITS (vmode
) >= d
->nelt
)
35817 chunk
= d
->nelt
/ GET_MODE_NUNITS (vmode
);
35818 for (i
= 0; i
< d
->nelt
; i
+= chunk
)
35819 if (d
->perm
[i
] & (chunk
- 1))
35822 for (j
= 1; j
< chunk
; ++j
)
35823 if (d
->perm
[i
] + j
!= d
->perm
[i
+ j
])
35829 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
35830 in terms of pshufb, vpperm, vpermq, vpermd or vperm2i128. */
35833 expand_vec_perm_pshufb (struct expand_vec_perm_d
*d
)
35835 unsigned i
, nelt
, eltsz
, mask
;
35836 unsigned char perm
[32];
35837 enum machine_mode vmode
= V16QImode
;
35838 rtx rperm
[32], vperm
, target
, op0
, op1
;
35842 if (d
->op0
!= d
->op1
)
35844 if (!TARGET_XOP
|| GET_MODE_SIZE (d
->vmode
) != 16)
35847 && valid_perm_using_mode_p (V2TImode
, d
))
35852 /* Use vperm2i128 insn. The pattern uses
35853 V4DImode instead of V2TImode. */
35854 target
= gen_lowpart (V4DImode
, d
->target
);
35855 op0
= gen_lowpart (V4DImode
, d
->op0
);
35856 op1
= gen_lowpart (V4DImode
, d
->op1
);
35858 = GEN_INT (((d
->perm
[0] & (nelt
/ 2)) ? 1 : 0)
35859 || ((d
->perm
[nelt
/ 2] & (nelt
/ 2)) ? 2 : 0));
35860 emit_insn (gen_avx2_permv2ti (target
, op0
, op1
, rperm
[0]));
35868 if (GET_MODE_SIZE (d
->vmode
) == 16)
35873 else if (GET_MODE_SIZE (d
->vmode
) == 32)
35878 /* V4DImode should be already handled through
35879 expand_vselect by vpermq instruction. */
35880 gcc_assert (d
->vmode
!= V4DImode
);
35883 if (d
->vmode
== V8SImode
35884 || d
->vmode
== V16HImode
35885 || d
->vmode
== V32QImode
)
35887 /* First see if vpermq can be used for
35888 V8SImode/V16HImode/V32QImode. */
35889 if (valid_perm_using_mode_p (V4DImode
, d
))
35891 for (i
= 0; i
< 4; i
++)
35892 perm
[i
] = (d
->perm
[i
* nelt
/ 4] * 4 / nelt
) & 3;
35895 return expand_vselect (gen_lowpart (V4DImode
, d
->target
),
35896 gen_lowpart (V4DImode
, d
->op0
),
35900 /* Next see if vpermd can be used. */
35901 if (valid_perm_using_mode_p (V8SImode
, d
))
35905 if (vmode
== V32QImode
)
35907 /* vpshufb only works intra lanes, it is not
35908 possible to shuffle bytes in between the lanes. */
35909 for (i
= 0; i
< nelt
; ++i
)
35910 if ((d
->perm
[i
] ^ i
) & (nelt
/ 2))
35921 if (vmode
== V8SImode
)
35922 for (i
= 0; i
< 8; ++i
)
35923 rperm
[i
] = GEN_INT ((d
->perm
[i
* nelt
/ 8] * 8 / nelt
) & 7);
35926 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
35927 if (d
->op0
!= d
->op1
)
35928 mask
= 2 * nelt
- 1;
35929 else if (vmode
== V16QImode
)
35932 mask
= nelt
/ 2 - 1;
35934 for (i
= 0; i
< nelt
; ++i
)
35936 unsigned j
, e
= d
->perm
[i
] & mask
;
35937 for (j
= 0; j
< eltsz
; ++j
)
35938 rperm
[i
* eltsz
+ j
] = GEN_INT (e
* eltsz
+ j
);
35942 vperm
= gen_rtx_CONST_VECTOR (vmode
,
35943 gen_rtvec_v (GET_MODE_NUNITS (vmode
), rperm
));
35944 vperm
= force_reg (vmode
, vperm
);
35946 target
= gen_lowpart (vmode
, d
->target
);
35947 op0
= gen_lowpart (vmode
, d
->op0
);
35948 if (d
->op0
== d
->op1
)
35950 if (vmode
== V16QImode
)
35951 emit_insn (gen_ssse3_pshufbv16qi3 (target
, op0
, vperm
));
35952 else if (vmode
== V32QImode
)
35953 emit_insn (gen_avx2_pshufbv32qi3 (target
, op0
, vperm
));
35955 emit_insn (gen_avx2_permvarv8si (target
, vperm
, op0
));
35959 op1
= gen_lowpart (vmode
, d
->op1
);
35960 emit_insn (gen_xop_pperm (target
, op0
, op1
, vperm
));
35966 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
35967 in a single instruction. */
35970 expand_vec_perm_1 (struct expand_vec_perm_d
*d
)
35972 unsigned i
, nelt
= d
->nelt
;
35973 unsigned char perm2
[MAX_VECT_LEN
];
35975 /* Check plain VEC_SELECT first, because AVX has instructions that could
35976 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
35977 input where SEL+CONCAT may not. */
35978 if (d
->op0
== d
->op1
)
35980 int mask
= nelt
- 1;
35981 bool identity_perm
= true;
35982 bool broadcast_perm
= true;
35984 for (i
= 0; i
< nelt
; i
++)
35986 perm2
[i
] = d
->perm
[i
] & mask
;
35988 identity_perm
= false;
35990 broadcast_perm
= false;
35996 emit_move_insn (d
->target
, d
->op0
);
35999 else if (broadcast_perm
&& TARGET_AVX2
)
36001 /* Use vpbroadcast{b,w,d}. */
36002 rtx op
= d
->op0
, (*gen
) (rtx
, rtx
) = NULL
;
36006 op
= gen_lowpart (V16QImode
, op
);
36007 gen
= gen_avx2_pbroadcastv32qi
;
36010 op
= gen_lowpart (V8HImode
, op
);
36011 gen
= gen_avx2_pbroadcastv16hi
;
36014 op
= gen_lowpart (V4SImode
, op
);
36015 gen
= gen_avx2_pbroadcastv8si
;
36018 gen
= gen_avx2_pbroadcastv16qi
;
36021 gen
= gen_avx2_pbroadcastv8hi
;
36023 /* For other modes prefer other shuffles this function creates. */
36029 emit_insn (gen (d
->target
, op
));
36034 if (expand_vselect (d
->target
, d
->op0
, perm2
, nelt
))
36037 /* There are plenty of patterns in sse.md that are written for
36038 SEL+CONCAT and are not replicated for a single op. Perhaps
36039 that should be changed, to avoid the nastiness here. */
36041 /* Recognize interleave style patterns, which means incrementing
36042 every other permutation operand. */
36043 for (i
= 0; i
< nelt
; i
+= 2)
36045 perm2
[i
] = d
->perm
[i
] & mask
;
36046 perm2
[i
+ 1] = (d
->perm
[i
+ 1] & mask
) + nelt
;
36048 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op0
, perm2
, nelt
))
36051 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
36054 for (i
= 0; i
< nelt
; i
+= 4)
36056 perm2
[i
+ 0] = d
->perm
[i
+ 0] & mask
;
36057 perm2
[i
+ 1] = d
->perm
[i
+ 1] & mask
;
36058 perm2
[i
+ 2] = (d
->perm
[i
+ 2] & mask
) + nelt
;
36059 perm2
[i
+ 3] = (d
->perm
[i
+ 3] & mask
) + nelt
;
36062 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op0
, perm2
, nelt
))
36067 /* Finally, try the fully general two operand permute. */
36068 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op1
, d
->perm
, nelt
))
36071 /* Recognize interleave style patterns with reversed operands. */
36072 if (d
->op0
!= d
->op1
)
36074 for (i
= 0; i
< nelt
; ++i
)
36076 unsigned e
= d
->perm
[i
];
36084 if (expand_vselect_vconcat (d
->target
, d
->op1
, d
->op0
, perm2
, nelt
))
36088 /* Try the SSE4.1 blend variable merge instructions. */
36089 if (expand_vec_perm_blend (d
))
36092 /* Try one of the AVX vpermil variable permutations. */
36093 if (expand_vec_perm_vpermil (d
))
36096 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
36097 vpshufb, vpermd or vpermq variable permutation. */
36098 if (expand_vec_perm_pshufb (d
))
36104 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
36105 in terms of a pair of pshuflw + pshufhw instructions. */
36108 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d
*d
)
36110 unsigned char perm2
[MAX_VECT_LEN
];
36114 if (d
->vmode
!= V8HImode
|| d
->op0
!= d
->op1
)
36117 /* The two permutations only operate in 64-bit lanes. */
36118 for (i
= 0; i
< 4; ++i
)
36119 if (d
->perm
[i
] >= 4)
36121 for (i
= 4; i
< 8; ++i
)
36122 if (d
->perm
[i
] < 4)
36128 /* Emit the pshuflw. */
36129 memcpy (perm2
, d
->perm
, 4);
36130 for (i
= 4; i
< 8; ++i
)
36132 ok
= expand_vselect (d
->target
, d
->op0
, perm2
, 8);
36135 /* Emit the pshufhw. */
36136 memcpy (perm2
+ 4, d
->perm
+ 4, 4);
36137 for (i
= 0; i
< 4; ++i
)
36139 ok
= expand_vselect (d
->target
, d
->target
, perm2
, 8);
36145 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
36146 the permutation using the SSSE3 palignr instruction. This succeeds
36147 when all of the elements in PERM fit within one vector and we merely
36148 need to shift them down so that a single vector permutation has a
36149 chance to succeed. */
36152 expand_vec_perm_palignr (struct expand_vec_perm_d
*d
)
36154 unsigned i
, nelt
= d
->nelt
;
36159 /* Even with AVX, palignr only operates on 128-bit vectors. */
36160 if (!TARGET_SSSE3
|| GET_MODE_SIZE (d
->vmode
) != 16)
36163 min
= nelt
, max
= 0;
36164 for (i
= 0; i
< nelt
; ++i
)
36166 unsigned e
= d
->perm
[i
];
36172 if (min
== 0 || max
- min
>= nelt
)
36175 /* Given that we have SSSE3, we know we'll be able to implement the
36176 single operand permutation after the palignr with pshufb. */
36180 shift
= GEN_INT (min
* GET_MODE_BITSIZE (GET_MODE_INNER (d
->vmode
)));
36181 emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode
, d
->target
),
36182 gen_lowpart (TImode
, d
->op1
),
36183 gen_lowpart (TImode
, d
->op0
), shift
));
36185 d
->op0
= d
->op1
= d
->target
;
36188 for (i
= 0; i
< nelt
; ++i
)
36190 unsigned e
= d
->perm
[i
] - min
;
36196 /* Test for the degenerate case where the alignment by itself
36197 produces the desired permutation. */
36201 ok
= expand_vec_perm_1 (d
);
36207 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d
*d
);
36209 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
36210 a two vector permutation into a single vector permutation by using
36211 an interleave operation to merge the vectors. */
36214 expand_vec_perm_interleave2 (struct expand_vec_perm_d
*d
)
36216 struct expand_vec_perm_d dremap
, dfinal
;
36217 unsigned i
, nelt
= d
->nelt
, nelt2
= nelt
/ 2;
36218 unsigned HOST_WIDE_INT contents
;
36219 unsigned char remap
[2 * MAX_VECT_LEN
];
36221 bool ok
, same_halves
= false;
36223 if (GET_MODE_SIZE (d
->vmode
) == 16)
36225 if (d
->op0
== d
->op1
)
36228 else if (GET_MODE_SIZE (d
->vmode
) == 32)
36232 /* For 32-byte modes allow even d->op0 == d->op1.
36233 The lack of cross-lane shuffling in some instructions
36234 might prevent a single insn shuffle. */
36236 dfinal
.testing_p
= true;
36237 /* If expand_vec_perm_interleave3 can expand this into
36238 a 3 insn sequence, give up and let it be expanded as
36239 3 insn sequence. While that is one insn longer,
36240 it doesn't need a memory operand and in the common
36241 case that both interleave low and high permutations
36242 with the same operands are adjacent needs 4 insns
36243 for both after CSE. */
36244 if (expand_vec_perm_interleave3 (&dfinal
))
36250 /* Examine from whence the elements come. */
36252 for (i
= 0; i
< nelt
; ++i
)
36253 contents
|= ((unsigned HOST_WIDE_INT
) 1) << d
->perm
[i
];
36255 memset (remap
, 0xff, sizeof (remap
));
36258 if (GET_MODE_SIZE (d
->vmode
) == 16)
36260 unsigned HOST_WIDE_INT h1
, h2
, h3
, h4
;
36262 /* Split the two input vectors into 4 halves. */
36263 h1
= (((unsigned HOST_WIDE_INT
) 1) << nelt2
) - 1;
36268 /* If the elements from the low halves use interleave low, and similarly
36269 for interleave high. If the elements are from mis-matched halves, we
36270 can use shufps for V4SF/V4SI or do a DImode shuffle. */
36271 if ((contents
& (h1
| h3
)) == contents
)
36274 for (i
= 0; i
< nelt2
; ++i
)
36277 remap
[i
+ nelt
] = i
* 2 + 1;
36278 dremap
.perm
[i
* 2] = i
;
36279 dremap
.perm
[i
* 2 + 1] = i
+ nelt
;
36281 if (!TARGET_SSE2
&& d
->vmode
== V4SImode
)
36282 dremap
.vmode
= V4SFmode
;
36284 else if ((contents
& (h2
| h4
)) == contents
)
36287 for (i
= 0; i
< nelt2
; ++i
)
36289 remap
[i
+ nelt2
] = i
* 2;
36290 remap
[i
+ nelt
+ nelt2
] = i
* 2 + 1;
36291 dremap
.perm
[i
* 2] = i
+ nelt2
;
36292 dremap
.perm
[i
* 2 + 1] = i
+ nelt
+ nelt2
;
36294 if (!TARGET_SSE2
&& d
->vmode
== V4SImode
)
36295 dremap
.vmode
= V4SFmode
;
36297 else if ((contents
& (h1
| h4
)) == contents
)
36300 for (i
= 0; i
< nelt2
; ++i
)
36303 remap
[i
+ nelt
+ nelt2
] = i
+ nelt2
;
36304 dremap
.perm
[i
] = i
;
36305 dremap
.perm
[i
+ nelt2
] = i
+ nelt
+ nelt2
;
36310 dremap
.vmode
= V2DImode
;
36312 dremap
.perm
[0] = 0;
36313 dremap
.perm
[1] = 3;
36316 else if ((contents
& (h2
| h3
)) == contents
)
36319 for (i
= 0; i
< nelt2
; ++i
)
36321 remap
[i
+ nelt2
] = i
;
36322 remap
[i
+ nelt
] = i
+ nelt2
;
36323 dremap
.perm
[i
] = i
+ nelt2
;
36324 dremap
.perm
[i
+ nelt2
] = i
+ nelt
;
36329 dremap
.vmode
= V2DImode
;
36331 dremap
.perm
[0] = 1;
36332 dremap
.perm
[1] = 2;
36340 unsigned int nelt4
= nelt
/ 4, nzcnt
= 0;
36341 unsigned HOST_WIDE_INT q
[8];
36342 unsigned int nonzero_halves
[4];
36344 /* Split the two input vectors into 8 quarters. */
36345 q
[0] = (((unsigned HOST_WIDE_INT
) 1) << nelt4
) - 1;
36346 for (i
= 1; i
< 8; ++i
)
36347 q
[i
] = q
[0] << (nelt4
* i
);
36348 for (i
= 0; i
< 4; ++i
)
36349 if (((q
[2 * i
] | q
[2 * i
+ 1]) & contents
) != 0)
36351 nonzero_halves
[nzcnt
] = i
;
36357 gcc_assert (d
->op0
== d
->op1
);
36358 nonzero_halves
[1] = nonzero_halves
[0];
36359 same_halves
= true;
36361 else if (d
->op0
== d
->op1
)
36363 gcc_assert (nonzero_halves
[0] == 0);
36364 gcc_assert (nonzero_halves
[1] == 1);
36369 if (d
->perm
[0] / nelt2
== nonzero_halves
[1])
36371 /* Attempt to increase the likelyhood that dfinal
36372 shuffle will be intra-lane. */
36373 char tmph
= nonzero_halves
[0];
36374 nonzero_halves
[0] = nonzero_halves
[1];
36375 nonzero_halves
[1] = tmph
;
36378 /* vperm2f128 or vperm2i128. */
36379 for (i
= 0; i
< nelt2
; ++i
)
36381 remap
[i
+ nonzero_halves
[1] * nelt2
] = i
+ nelt2
;
36382 remap
[i
+ nonzero_halves
[0] * nelt2
] = i
;
36383 dremap
.perm
[i
+ nelt2
] = i
+ nonzero_halves
[1] * nelt2
;
36384 dremap
.perm
[i
] = i
+ nonzero_halves
[0] * nelt2
;
36387 if (d
->vmode
!= V8SFmode
36388 && d
->vmode
!= V4DFmode
36389 && d
->vmode
!= V8SImode
)
36391 dremap
.vmode
= V8SImode
;
36393 for (i
= 0; i
< 4; ++i
)
36395 dremap
.perm
[i
] = i
+ nonzero_halves
[0] * 4;
36396 dremap
.perm
[i
+ 4] = i
+ nonzero_halves
[1] * 4;
36400 else if (d
->op0
== d
->op1
)
36402 else if (TARGET_AVX2
36403 && (contents
& (q
[0] | q
[2] | q
[4] | q
[6])) == contents
)
36406 for (i
= 0; i
< nelt4
; ++i
)
36409 remap
[i
+ nelt
] = i
* 2 + 1;
36410 remap
[i
+ nelt2
] = i
* 2 + nelt2
;
36411 remap
[i
+ nelt
+ nelt2
] = i
* 2 + nelt2
+ 1;
36412 dremap
.perm
[i
* 2] = i
;
36413 dremap
.perm
[i
* 2 + 1] = i
+ nelt
;
36414 dremap
.perm
[i
* 2 + nelt2
] = i
+ nelt2
;
36415 dremap
.perm
[i
* 2 + nelt2
+ 1] = i
+ nelt
+ nelt2
;
36418 else if (TARGET_AVX2
36419 && (contents
& (q
[1] | q
[3] | q
[5] | q
[7])) == contents
)
36422 for (i
= 0; i
< nelt4
; ++i
)
36424 remap
[i
+ nelt4
] = i
* 2;
36425 remap
[i
+ nelt
+ nelt4
] = i
* 2 + 1;
36426 remap
[i
+ nelt2
+ nelt4
] = i
* 2 + nelt2
;
36427 remap
[i
+ nelt
+ nelt2
+ nelt4
] = i
* 2 + nelt2
+ 1;
36428 dremap
.perm
[i
* 2] = i
+ nelt4
;
36429 dremap
.perm
[i
* 2 + 1] = i
+ nelt
+ nelt4
;
36430 dremap
.perm
[i
* 2 + nelt2
] = i
+ nelt2
+ nelt4
;
36431 dremap
.perm
[i
* 2 + nelt2
+ 1] = i
+ nelt
+ nelt2
+ nelt4
;
36438 /* Use the remapping array set up above to move the elements from their
36439 swizzled locations into their final destinations. */
36441 for (i
= 0; i
< nelt
; ++i
)
36443 unsigned e
= remap
[d
->perm
[i
]];
36444 gcc_assert (e
< nelt
);
36445 /* If same_halves is true, both halves of the remapped vector are the
36446 same. Avoid cross-lane accesses if possible. */
36447 if (same_halves
&& i
>= nelt2
)
36449 gcc_assert (e
< nelt2
);
36450 dfinal
.perm
[i
] = e
+ nelt2
;
36453 dfinal
.perm
[i
] = e
;
36455 dfinal
.op0
= gen_reg_rtx (dfinal
.vmode
);
36456 dfinal
.op1
= dfinal
.op0
;
36457 dremap
.target
= dfinal
.op0
;
36459 /* Test if the final remap can be done with a single insn. For V4SFmode or
36460 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
36462 ok
= expand_vec_perm_1 (&dfinal
);
36463 seq
= get_insns ();
36472 if (dremap
.vmode
!= dfinal
.vmode
)
36474 dremap
.target
= gen_lowpart (dremap
.vmode
, dremap
.target
);
36475 dremap
.op0
= gen_lowpart (dremap
.vmode
, dremap
.op0
);
36476 dremap
.op1
= gen_lowpart (dremap
.vmode
, dremap
.op1
);
36479 ok
= expand_vec_perm_1 (&dremap
);
36486 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
36487 a single vector cross-lane permutation into vpermq followed
36488 by any of the single insn permutations. */
36491 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d
*d
)
36493 struct expand_vec_perm_d dremap
, dfinal
;
36494 unsigned i
, j
, nelt
= d
->nelt
, nelt2
= nelt
/ 2, nelt4
= nelt
/ 4;
36495 unsigned contents
[2];
36499 && (d
->vmode
== V32QImode
|| d
->vmode
== V16HImode
)
36500 && d
->op0
== d
->op1
))
36505 for (i
= 0; i
< nelt2
; ++i
)
36507 contents
[0] |= 1u << (d
->perm
[i
] / nelt4
);
36508 contents
[1] |= 1u << (d
->perm
[i
+ nelt2
] / nelt4
);
36511 for (i
= 0; i
< 2; ++i
)
36513 unsigned int cnt
= 0;
36514 for (j
= 0; j
< 4; ++j
)
36515 if ((contents
[i
] & (1u << j
)) != 0 && ++cnt
> 2)
36523 dremap
.vmode
= V4DImode
;
36525 dremap
.target
= gen_reg_rtx (V4DImode
);
36526 dremap
.op0
= gen_lowpart (V4DImode
, d
->op0
);
36527 dremap
.op1
= dremap
.op0
;
36528 for (i
= 0; i
< 2; ++i
)
36530 unsigned int cnt
= 0;
36531 for (j
= 0; j
< 4; ++j
)
36532 if ((contents
[i
] & (1u << j
)) != 0)
36533 dremap
.perm
[2 * i
+ cnt
++] = j
;
36534 for (; cnt
< 2; ++cnt
)
36535 dremap
.perm
[2 * i
+ cnt
] = 0;
36539 dfinal
.op0
= gen_lowpart (dfinal
.vmode
, dremap
.target
);
36540 dfinal
.op1
= dfinal
.op0
;
36541 for (i
= 0, j
= 0; i
< nelt
; ++i
)
36545 dfinal
.perm
[i
] = (d
->perm
[i
] & (nelt4
- 1)) | (j
? nelt2
: 0);
36546 if ((d
->perm
[i
] / nelt4
) == dremap
.perm
[j
])
36548 else if ((d
->perm
[i
] / nelt4
) == dremap
.perm
[j
+ 1])
36549 dfinal
.perm
[i
] |= nelt4
;
36551 gcc_unreachable ();
36554 ok
= expand_vec_perm_1 (&dremap
);
36557 ok
= expand_vec_perm_1 (&dfinal
);
36563 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
36564 a two vector permutation using 2 intra-lane interleave insns
36565 and cross-lane shuffle for 32-byte vectors. */
36568 expand_vec_perm_interleave3 (struct expand_vec_perm_d
*d
)
36571 rtx (*gen
) (rtx
, rtx
, rtx
);
36573 if (d
->op0
== d
->op1
)
36575 if (TARGET_AVX2
&& GET_MODE_SIZE (d
->vmode
) == 32)
36577 else if (TARGET_AVX
&& (d
->vmode
== V8SFmode
|| d
->vmode
== V4DFmode
))
36583 if (d
->perm
[0] != 0 && d
->perm
[0] != nelt
/ 2)
36585 for (i
= 0; i
< nelt
; i
+= 2)
36586 if (d
->perm
[i
] != d
->perm
[0] + i
/ 2
36587 || d
->perm
[i
+ 1] != d
->perm
[0] + i
/ 2 + nelt
)
36597 gen
= gen_vec_interleave_highv32qi
;
36599 gen
= gen_vec_interleave_lowv32qi
;
36603 gen
= gen_vec_interleave_highv16hi
;
36605 gen
= gen_vec_interleave_lowv16hi
;
36609 gen
= gen_vec_interleave_highv8si
;
36611 gen
= gen_vec_interleave_lowv8si
;
36615 gen
= gen_vec_interleave_highv4di
;
36617 gen
= gen_vec_interleave_lowv4di
;
36621 gen
= gen_vec_interleave_highv8sf
;
36623 gen
= gen_vec_interleave_lowv8sf
;
36627 gen
= gen_vec_interleave_highv4df
;
36629 gen
= gen_vec_interleave_lowv4df
;
36632 gcc_unreachable ();
36635 emit_insn (gen (d
->target
, d
->op0
, d
->op1
));
36639 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
36640 a single vector permutation using a single intra-lane vector
36641 permutation, vperm2f128 swapping the lanes and vblend* insn blending
36642 the non-swapped and swapped vectors together. */
36645 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d
*d
)
36647 struct expand_vec_perm_d dfirst
, dsecond
;
36648 unsigned i
, j
, msk
, nelt
= d
->nelt
, nelt2
= nelt
/ 2;
36651 rtx (*blend
) (rtx
, rtx
, rtx
, rtx
) = NULL
;
36655 || (d
->vmode
!= V8SFmode
&& d
->vmode
!= V4DFmode
)
36656 || d
->op0
!= d
->op1
)
36660 for (i
= 0; i
< nelt
; i
++)
36661 dfirst
.perm
[i
] = 0xff;
36662 for (i
= 0, msk
= 0; i
< nelt
; i
++)
36664 j
= (d
->perm
[i
] & nelt2
) ? i
| nelt2
: i
& ~nelt2
;
36665 if (dfirst
.perm
[j
] != 0xff && dfirst
.perm
[j
] != d
->perm
[i
])
36667 dfirst
.perm
[j
] = d
->perm
[i
];
36671 for (i
= 0; i
< nelt
; i
++)
36672 if (dfirst
.perm
[i
] == 0xff)
36673 dfirst
.perm
[i
] = i
;
36676 dfirst
.target
= gen_reg_rtx (dfirst
.vmode
);
36679 ok
= expand_vec_perm_1 (&dfirst
);
36680 seq
= get_insns ();
36692 dsecond
.op0
= dfirst
.target
;
36693 dsecond
.op1
= dfirst
.target
;
36694 dsecond
.target
= gen_reg_rtx (dsecond
.vmode
);
36695 for (i
= 0; i
< nelt
; i
++)
36696 dsecond
.perm
[i
] = i
^ nelt2
;
36698 ok
= expand_vec_perm_1 (&dsecond
);
36701 blend
= d
->vmode
== V8SFmode
? gen_avx_blendps256
: gen_avx_blendpd256
;
36702 emit_insn (blend (d
->target
, dfirst
.target
, dsecond
.target
, GEN_INT (msk
)));
36706 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
36707 permutation with two pshufb insns and an ior. We should have already
36708 failed all two instruction sequences. */
36711 expand_vec_perm_pshufb2 (struct expand_vec_perm_d
*d
)
36713 rtx rperm
[2][16], vperm
, l
, h
, op
, m128
;
36714 unsigned int i
, nelt
, eltsz
;
36716 if (!TARGET_SSSE3
|| GET_MODE_SIZE (d
->vmode
) != 16)
36718 gcc_assert (d
->op0
!= d
->op1
);
36721 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
36723 /* Generate two permutation masks. If the required element is within
36724 the given vector it is shuffled into the proper lane. If the required
36725 element is in the other vector, force a zero into the lane by setting
36726 bit 7 in the permutation mask. */
36727 m128
= GEN_INT (-128);
36728 for (i
= 0; i
< nelt
; ++i
)
36730 unsigned j
, e
= d
->perm
[i
];
36731 unsigned which
= (e
>= nelt
);
36735 for (j
= 0; j
< eltsz
; ++j
)
36737 rperm
[which
][i
*eltsz
+ j
] = GEN_INT (e
*eltsz
+ j
);
36738 rperm
[1-which
][i
*eltsz
+ j
] = m128
;
36742 vperm
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, rperm
[0]));
36743 vperm
= force_reg (V16QImode
, vperm
);
36745 l
= gen_reg_rtx (V16QImode
);
36746 op
= gen_lowpart (V16QImode
, d
->op0
);
36747 emit_insn (gen_ssse3_pshufbv16qi3 (l
, op
, vperm
));
36749 vperm
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, rperm
[1]));
36750 vperm
= force_reg (V16QImode
, vperm
);
36752 h
= gen_reg_rtx (V16QImode
);
36753 op
= gen_lowpart (V16QImode
, d
->op1
);
36754 emit_insn (gen_ssse3_pshufbv16qi3 (h
, op
, vperm
));
36756 op
= gen_lowpart (V16QImode
, d
->target
);
36757 emit_insn (gen_iorv16qi3 (op
, l
, h
));
36762 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
36763 with two vpshufb insns, vpermq and vpor. We should have already failed
36764 all two or three instruction sequences. */
36767 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d
*d
)
36769 rtx rperm
[2][32], vperm
, l
, h
, hp
, op
, m128
;
36770 unsigned int i
, nelt
, eltsz
;
36773 || d
->op0
!= d
->op1
36774 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
36781 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
36783 /* Generate two permutation masks. If the required element is within
36784 the same lane, it is shuffled in. If the required element from the
36785 other lane, force a zero by setting bit 7 in the permutation mask.
36786 In the other mask the mask has non-negative elements if element
36787 is requested from the other lane, but also moved to the other lane,
36788 so that the result of vpshufb can have the two V2TImode halves
36790 m128
= GEN_INT (-128);
36791 for (i
= 0; i
< nelt
; ++i
)
36793 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
36794 unsigned which
= ((d
->perm
[i
] ^ i
) & (nelt
/ 2)) * eltsz
;
36796 for (j
= 0; j
< eltsz
; ++j
)
36798 rperm
[!!which
][(i
* eltsz
+ j
) ^ which
] = GEN_INT (e
* eltsz
+ j
);
36799 rperm
[!which
][(i
* eltsz
+ j
) ^ (which
^ 16)] = m128
;
36803 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[1]));
36804 vperm
= force_reg (V32QImode
, vperm
);
36806 h
= gen_reg_rtx (V32QImode
);
36807 op
= gen_lowpart (V32QImode
, d
->op0
);
36808 emit_insn (gen_avx2_pshufbv32qi3 (h
, op
, vperm
));
36810 /* Swap the 128-byte lanes of h into hp. */
36811 hp
= gen_reg_rtx (V4DImode
);
36812 op
= gen_lowpart (V4DImode
, h
);
36813 emit_insn (gen_avx2_permv4di_1 (hp
, op
, const2_rtx
, GEN_INT (3), const0_rtx
,
36816 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[0]));
36817 vperm
= force_reg (V32QImode
, vperm
);
36819 l
= gen_reg_rtx (V32QImode
);
36820 op
= gen_lowpart (V32QImode
, d
->op0
);
36821 emit_insn (gen_avx2_pshufbv32qi3 (l
, op
, vperm
));
36823 op
= gen_lowpart (V32QImode
, d
->target
);
36824 emit_insn (gen_iorv32qi3 (op
, l
, gen_lowpart (V32QImode
, hp
)));
36829 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
36830 and extract-odd permutations of two V32QImode and V16QImode operand
36831 with two vpshufb insns, vpor and vpermq. We should have already
36832 failed all two or three instruction sequences. */
36835 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d
*d
)
36837 rtx rperm
[2][32], vperm
, l
, h
, ior
, op
, m128
;
36838 unsigned int i
, nelt
, eltsz
;
36841 || d
->op0
== d
->op1
36842 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
36845 for (i
= 0; i
< d
->nelt
; ++i
)
36846 if ((d
->perm
[i
] ^ (i
* 2)) & (3 * d
->nelt
/ 2))
36853 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
36855 /* Generate two permutation masks. In the first permutation mask
36856 the first quarter will contain indexes for the first half
36857 of the op0, the second quarter will contain bit 7 set, third quarter
36858 will contain indexes for the second half of the op0 and the
36859 last quarter bit 7 set. In the second permutation mask
36860 the first quarter will contain bit 7 set, the second quarter
36861 indexes for the first half of the op1, the third quarter bit 7 set
36862 and last quarter indexes for the second half of the op1.
36863 I.e. the first mask e.g. for V32QImode extract even will be:
36864 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
36865 (all values masked with 0xf except for -128) and second mask
36866 for extract even will be
36867 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
36868 m128
= GEN_INT (-128);
36869 for (i
= 0; i
< nelt
; ++i
)
36871 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
36872 unsigned which
= d
->perm
[i
] >= nelt
;
36873 unsigned xorv
= (i
>= nelt
/ 4 && i
< 3 * nelt
/ 4) ? 24 : 0;
36875 for (j
= 0; j
< eltsz
; ++j
)
36877 rperm
[which
][(i
* eltsz
+ j
) ^ xorv
] = GEN_INT (e
* eltsz
+ j
);
36878 rperm
[1 - which
][(i
* eltsz
+ j
) ^ xorv
] = m128
;
36882 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[0]));
36883 vperm
= force_reg (V32QImode
, vperm
);
36885 l
= gen_reg_rtx (V32QImode
);
36886 op
= gen_lowpart (V32QImode
, d
->op0
);
36887 emit_insn (gen_avx2_pshufbv32qi3 (l
, op
, vperm
));
36889 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[1]));
36890 vperm
= force_reg (V32QImode
, vperm
);
36892 h
= gen_reg_rtx (V32QImode
);
36893 op
= gen_lowpart (V32QImode
, d
->op1
);
36894 emit_insn (gen_avx2_pshufbv32qi3 (h
, op
, vperm
));
36896 ior
= gen_reg_rtx (V32QImode
);
36897 emit_insn (gen_iorv32qi3 (ior
, l
, h
));
36899 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
36900 op
= gen_lowpart (V4DImode
, d
->target
);
36901 ior
= gen_lowpart (V4DImode
, ior
);
36902 emit_insn (gen_avx2_permv4di_1 (op
, ior
, const0_rtx
, const2_rtx
,
36903 const1_rtx
, GEN_INT (3)));
36908 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
36909 and extract-odd permutations. */
36912 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d
*d
, unsigned odd
)
36919 t1
= gen_reg_rtx (V4DFmode
);
36920 t2
= gen_reg_rtx (V4DFmode
);
36922 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
36923 emit_insn (gen_avx_vperm2f128v4df3 (t1
, d
->op0
, d
->op1
, GEN_INT (0x20)));
36924 emit_insn (gen_avx_vperm2f128v4df3 (t2
, d
->op0
, d
->op1
, GEN_INT (0x31)));
36926 /* Now an unpck[lh]pd will produce the result required. */
36928 t3
= gen_avx_unpckhpd256 (d
->target
, t1
, t2
);
36930 t3
= gen_avx_unpcklpd256 (d
->target
, t1
, t2
);
36936 int mask
= odd
? 0xdd : 0x88;
36938 t1
= gen_reg_rtx (V8SFmode
);
36939 t2
= gen_reg_rtx (V8SFmode
);
36940 t3
= gen_reg_rtx (V8SFmode
);
36942 /* Shuffle within the 128-bit lanes to produce:
36943 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
36944 emit_insn (gen_avx_shufps256 (t1
, d
->op0
, d
->op1
,
36947 /* Shuffle the lanes around to produce:
36948 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
36949 emit_insn (gen_avx_vperm2f128v8sf3 (t2
, t1
, t1
,
36952 /* Shuffle within the 128-bit lanes to produce:
36953 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
36954 emit_insn (gen_avx_shufps256 (t3
, t1
, t2
, GEN_INT (0x44)));
36956 /* Shuffle within the 128-bit lanes to produce:
36957 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
36958 emit_insn (gen_avx_shufps256 (t2
, t1
, t2
, GEN_INT (0xee)));
36960 /* Shuffle the lanes around to produce:
36961 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
36962 emit_insn (gen_avx_vperm2f128v8sf3 (d
->target
, t3
, t2
,
36971 /* These are always directly implementable by expand_vec_perm_1. */
36972 gcc_unreachable ();
36976 return expand_vec_perm_pshufb2 (d
);
36979 /* We need 2*log2(N)-1 operations to achieve odd/even
36980 with interleave. */
36981 t1
= gen_reg_rtx (V8HImode
);
36982 t2
= gen_reg_rtx (V8HImode
);
36983 emit_insn (gen_vec_interleave_highv8hi (t1
, d
->op0
, d
->op1
));
36984 emit_insn (gen_vec_interleave_lowv8hi (d
->target
, d
->op0
, d
->op1
));
36985 emit_insn (gen_vec_interleave_highv8hi (t2
, d
->target
, t1
));
36986 emit_insn (gen_vec_interleave_lowv8hi (d
->target
, d
->target
, t1
));
36988 t3
= gen_vec_interleave_highv8hi (d
->target
, d
->target
, t2
);
36990 t3
= gen_vec_interleave_lowv8hi (d
->target
, d
->target
, t2
);
36997 return expand_vec_perm_pshufb2 (d
);
37000 t1
= gen_reg_rtx (V16QImode
);
37001 t2
= gen_reg_rtx (V16QImode
);
37002 t3
= gen_reg_rtx (V16QImode
);
37003 emit_insn (gen_vec_interleave_highv16qi (t1
, d
->op0
, d
->op1
));
37004 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->op0
, d
->op1
));
37005 emit_insn (gen_vec_interleave_highv16qi (t2
, d
->target
, t1
));
37006 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t1
));
37007 emit_insn (gen_vec_interleave_highv16qi (t3
, d
->target
, t2
));
37008 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t2
));
37010 t3
= gen_vec_interleave_highv16qi (d
->target
, d
->target
, t3
);
37012 t3
= gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t3
);
37019 return expand_vec_perm_vpshufb2_vpermq_even_odd (d
);
37024 struct expand_vec_perm_d d_copy
= *d
;
37025 d_copy
.vmode
= V4DFmode
;
37026 d_copy
.target
= gen_lowpart (V4DFmode
, d
->target
);
37027 d_copy
.op0
= gen_lowpart (V4DFmode
, d
->op0
);
37028 d_copy
.op1
= gen_lowpart (V4DFmode
, d
->op1
);
37029 return expand_vec_perm_even_odd_1 (&d_copy
, odd
);
37032 t1
= gen_reg_rtx (V4DImode
);
37033 t2
= gen_reg_rtx (V4DImode
);
37035 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
37036 emit_insn (gen_avx2_permv2ti (t1
, d
->op0
, d
->op1
, GEN_INT (0x20)));
37037 emit_insn (gen_avx2_permv2ti (t2
, d
->op0
, d
->op1
, GEN_INT (0x31)));
37039 /* Now an vpunpck[lh]qdq will produce the result required. */
37041 t3
= gen_avx2_interleave_highv4di (d
->target
, t1
, t2
);
37043 t3
= gen_avx2_interleave_lowv4di (d
->target
, t1
, t2
);
37050 struct expand_vec_perm_d d_copy
= *d
;
37051 d_copy
.vmode
= V8SFmode
;
37052 d_copy
.target
= gen_lowpart (V8SFmode
, d
->target
);
37053 d_copy
.op0
= gen_lowpart (V8SFmode
, d
->op0
);
37054 d_copy
.op1
= gen_lowpart (V8SFmode
, d
->op1
);
37055 return expand_vec_perm_even_odd_1 (&d_copy
, odd
);
37058 t1
= gen_reg_rtx (V8SImode
);
37059 t2
= gen_reg_rtx (V8SImode
);
37061 /* Shuffle the lanes around into
37062 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
37063 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode
, t1
),
37064 gen_lowpart (V4DImode
, d
->op0
),
37065 gen_lowpart (V4DImode
, d
->op1
),
37067 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode
, t2
),
37068 gen_lowpart (V4DImode
, d
->op0
),
37069 gen_lowpart (V4DImode
, d
->op1
),
37072 /* Swap the 2nd and 3rd position in each lane into
37073 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
37074 emit_insn (gen_avx2_pshufdv3 (t1
, t1
,
37075 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
37076 emit_insn (gen_avx2_pshufdv3 (t2
, t2
,
37077 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
37079 /* Now an vpunpck[lh]qdq will produce
37080 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
37082 t3
= gen_avx2_interleave_highv4di (gen_lowpart (V4DImode
, d
->target
),
37083 gen_lowpart (V4DImode
, t1
),
37084 gen_lowpart (V4DImode
, t2
));
37086 t3
= gen_avx2_interleave_lowv4di (gen_lowpart (V4DImode
, d
->target
),
37087 gen_lowpart (V4DImode
, t1
),
37088 gen_lowpart (V4DImode
, t2
));
37093 gcc_unreachable ();
37099 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
37100 extract-even and extract-odd permutations. */
37103 expand_vec_perm_even_odd (struct expand_vec_perm_d
*d
)
37105 unsigned i
, odd
, nelt
= d
->nelt
;
37108 if (odd
!= 0 && odd
!= 1)
37111 for (i
= 1; i
< nelt
; ++i
)
37112 if (d
->perm
[i
] != 2 * i
+ odd
)
37115 return expand_vec_perm_even_odd_1 (d
, odd
);
37118 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
37119 permutations. We assume that expand_vec_perm_1 has already failed. */
37122 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d
*d
)
37124 unsigned elt
= d
->perm
[0], nelt2
= d
->nelt
/ 2;
37125 enum machine_mode vmode
= d
->vmode
;
37126 unsigned char perm2
[4];
37134 /* These are special-cased in sse.md so that we can optionally
37135 use the vbroadcast instruction. They expand to two insns
37136 if the input happens to be in a register. */
37137 gcc_unreachable ();
37143 /* These are always implementable using standard shuffle patterns. */
37144 gcc_unreachable ();
37148 /* These can be implemented via interleave. We save one insn by
37149 stopping once we have promoted to V4SImode and then use pshufd. */
37153 rtx (*gen
) (rtx
, rtx
, rtx
)
37154 = vmode
== V16QImode
? gen_vec_interleave_lowv16qi
37155 : gen_vec_interleave_lowv8hi
;
37159 gen
= vmode
== V16QImode
? gen_vec_interleave_highv16qi
37160 : gen_vec_interleave_highv8hi
;
37165 dest
= gen_reg_rtx (vmode
);
37166 emit_insn (gen (dest
, op0
, op0
));
37167 vmode
= get_mode_wider_vector (vmode
);
37168 op0
= gen_lowpart (vmode
, dest
);
37170 while (vmode
!= V4SImode
);
37172 memset (perm2
, elt
, 4);
37173 ok
= expand_vselect (gen_lowpart (V4SImode
, d
->target
), op0
, perm2
, 4);
37181 /* For AVX2 broadcasts of the first element vpbroadcast* or
37182 vpermq should be used by expand_vec_perm_1. */
37183 gcc_assert (!TARGET_AVX2
|| d
->perm
[0]);
37187 gcc_unreachable ();
37191 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
37192 broadcast permutations. */
37195 expand_vec_perm_broadcast (struct expand_vec_perm_d
*d
)
37197 unsigned i
, elt
, nelt
= d
->nelt
;
37199 if (d
->op0
!= d
->op1
)
37203 for (i
= 1; i
< nelt
; ++i
)
37204 if (d
->perm
[i
] != elt
)
37207 return expand_vec_perm_broadcast_1 (d
);
37210 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
37211 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
37212 all the shorter instruction sequences. */
37215 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d
*d
)
37217 rtx rperm
[4][32], vperm
, l
[2], h
[2], op
, m128
;
37218 unsigned int i
, nelt
, eltsz
;
37222 || d
->op0
== d
->op1
37223 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
37230 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
37232 /* Generate 4 permutation masks. If the required element is within
37233 the same lane, it is shuffled in. If the required element from the
37234 other lane, force a zero by setting bit 7 in the permutation mask.
37235 In the other mask the mask has non-negative elements if element
37236 is requested from the other lane, but also moved to the other lane,
37237 so that the result of vpshufb can have the two V2TImode halves
37239 m128
= GEN_INT (-128);
37240 for (i
= 0; i
< 32; ++i
)
37242 rperm
[0][i
] = m128
;
37243 rperm
[1][i
] = m128
;
37244 rperm
[2][i
] = m128
;
37245 rperm
[3][i
] = m128
;
37251 for (i
= 0; i
< nelt
; ++i
)
37253 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
37254 unsigned xlane
= ((d
->perm
[i
] ^ i
) & (nelt
/ 2)) * eltsz
;
37255 unsigned int which
= ((d
->perm
[i
] & nelt
) ? 2 : 0) + (xlane
? 1 : 0);
37257 for (j
= 0; j
< eltsz
; ++j
)
37258 rperm
[which
][(i
* eltsz
+ j
) ^ xlane
] = GEN_INT (e
* eltsz
+ j
);
37259 used
[which
] = true;
37262 for (i
= 0; i
< 2; ++i
)
37264 if (!used
[2 * i
+ 1])
37269 vperm
= gen_rtx_CONST_VECTOR (V32QImode
,
37270 gen_rtvec_v (32, rperm
[2 * i
+ 1]));
37271 vperm
= force_reg (V32QImode
, vperm
);
37272 h
[i
] = gen_reg_rtx (V32QImode
);
37273 op
= gen_lowpart (V32QImode
, i
? d
->op1
: d
->op0
);
37274 emit_insn (gen_avx2_pshufbv32qi3 (h
[i
], op
, vperm
));
37277 /* Swap the 128-byte lanes of h[X]. */
37278 for (i
= 0; i
< 2; ++i
)
37280 if (h
[i
] == NULL_RTX
)
37282 op
= gen_reg_rtx (V4DImode
);
37283 emit_insn (gen_avx2_permv4di_1 (op
, gen_lowpart (V4DImode
, h
[i
]),
37284 const2_rtx
, GEN_INT (3), const0_rtx
,
37286 h
[i
] = gen_lowpart (V32QImode
, op
);
37289 for (i
= 0; i
< 2; ++i
)
37296 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[2 * i
]));
37297 vperm
= force_reg (V32QImode
, vperm
);
37298 l
[i
] = gen_reg_rtx (V32QImode
);
37299 op
= gen_lowpart (V32QImode
, i
? d
->op1
: d
->op0
);
37300 emit_insn (gen_avx2_pshufbv32qi3 (l
[i
], op
, vperm
));
37303 for (i
= 0; i
< 2; ++i
)
37307 op
= gen_reg_rtx (V32QImode
);
37308 emit_insn (gen_iorv32qi3 (op
, l
[i
], h
[i
]));
37315 gcc_assert (l
[0] && l
[1]);
37316 op
= gen_lowpart (V32QImode
, d
->target
);
37317 emit_insn (gen_iorv32qi3 (op
, l
[0], l
[1]));
37321 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
37322 With all of the interface bits taken care of, perform the expansion
37323 in D and return true on success. */
37326 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
37328 /* Try a single instruction expansion. */
37329 if (expand_vec_perm_1 (d
))
37332 /* Try sequences of two instructions. */
37334 if (expand_vec_perm_pshuflw_pshufhw (d
))
37337 if (expand_vec_perm_palignr (d
))
37340 if (expand_vec_perm_interleave2 (d
))
37343 if (expand_vec_perm_broadcast (d
))
37346 if (expand_vec_perm_vpermq_perm_1 (d
))
37349 /* Try sequences of three instructions. */
37351 if (expand_vec_perm_pshufb2 (d
))
37354 if (expand_vec_perm_interleave3 (d
))
37357 if (expand_vec_perm_vperm2f128_vblend (d
))
37360 /* Try sequences of four instructions. */
37362 if (expand_vec_perm_vpshufb2_vpermq (d
))
37365 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d
))
37368 /* ??? Look for narrow permutations whose element orderings would
37369 allow the promotion to a wider mode. */
37371 /* ??? Look for sequences of interleave or a wider permute that place
37372 the data into the correct lanes for a half-vector shuffle like
37373 pshuf[lh]w or vpermilps. */
37375 /* ??? Look for sequences of interleave that produce the desired results.
37376 The combinatorics of punpck[lh] get pretty ugly... */
37378 if (expand_vec_perm_even_odd (d
))
37381 /* Even longer sequences. */
37382 if (expand_vec_perm_vpshufb4_vpermq2 (d
))
37389 ix86_expand_vec_perm_const (rtx operands
[4])
37391 struct expand_vec_perm_d d
;
37392 unsigned char perm
[MAX_VECT_LEN
];
37393 int i
, nelt
, which
;
37396 d
.target
= operands
[0];
37397 d
.op0
= operands
[1];
37398 d
.op1
= operands
[2];
37401 d
.vmode
= GET_MODE (d
.target
);
37402 gcc_assert (VECTOR_MODE_P (d
.vmode
));
37403 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
37404 d
.testing_p
= false;
37406 gcc_assert (GET_CODE (sel
) == CONST_VECTOR
);
37407 gcc_assert (XVECLEN (sel
, 0) == nelt
);
37408 gcc_checking_assert (sizeof (d
.perm
) == sizeof (perm
));
37410 for (i
= which
= 0; i
< nelt
; ++i
)
37412 rtx e
= XVECEXP (sel
, 0, i
);
37413 int ei
= INTVAL (e
) & (2 * nelt
- 1);
37415 which
|= (ei
< nelt
? 1 : 2);
37426 if (!rtx_equal_p (d
.op0
, d
.op1
))
37429 /* The elements of PERM do not suggest that only the first operand
37430 is used, but both operands are identical. Allow easier matching
37431 of the permutation by folding the permutation into the single
37433 for (i
= 0; i
< nelt
; ++i
)
37434 if (d
.perm
[i
] >= nelt
)
37443 for (i
= 0; i
< nelt
; ++i
)
37449 if (ix86_expand_vec_perm_const_1 (&d
))
37452 /* If the mask says both arguments are needed, but they are the same,
37453 the above tried to expand with d.op0 == d.op1. If that didn't work,
37454 retry with d.op0 != d.op1 as that is what testing has been done with. */
37455 if (which
== 3 && d
.op0
== d
.op1
)
37460 memcpy (d
.perm
, perm
, sizeof (perm
));
37461 d
.op1
= gen_reg_rtx (d
.vmode
);
37463 ok
= ix86_expand_vec_perm_const_1 (&d
);
37464 seq
= get_insns ();
37468 emit_move_insn (d
.op1
, d
.op0
);
37477 /* Implement targetm.vectorize.vec_perm_const_ok. */
37480 ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode
,
37481 const unsigned char *sel
)
37483 struct expand_vec_perm_d d
;
37484 unsigned int i
, nelt
, which
;
37488 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
37489 d
.testing_p
= true;
37491 /* Given sufficient ISA support we can just return true here
37492 for selected vector modes. */
37493 if (GET_MODE_SIZE (d
.vmode
) == 16)
37495 /* All implementable with a single vpperm insn. */
37498 /* All implementable with 2 pshufb + 1 ior. */
37501 /* All implementable with shufpd or unpck[lh]pd. */
37506 /* Extract the values from the vector CST into the permutation
37508 memcpy (d
.perm
, sel
, nelt
);
37509 for (i
= which
= 0; i
< nelt
; ++i
)
37511 unsigned char e
= d
.perm
[i
];
37512 gcc_assert (e
< 2 * nelt
);
37513 which
|= (e
< nelt
? 1 : 2);
37516 /* For all elements from second vector, fold the elements to first. */
37518 for (i
= 0; i
< nelt
; ++i
)
37521 /* Check whether the mask can be applied to the vector type. */
37522 one_vec
= (which
!= 3);
37524 /* Implementable with shufps or pshufd. */
37525 if (one_vec
&& (d
.vmode
== V4SFmode
|| d
.vmode
== V4SImode
))
37528 /* Otherwise we have to go through the motions and see if we can
37529 figure out how to generate the requested permutation. */
37530 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
37531 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
37533 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
37536 ret
= ix86_expand_vec_perm_const_1 (&d
);
37543 ix86_expand_vec_extract_even_odd (rtx targ
, rtx op0
, rtx op1
, unsigned odd
)
37545 struct expand_vec_perm_d d
;
37551 d
.vmode
= GET_MODE (targ
);
37552 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
37553 d
.testing_p
= false;
37555 for (i
= 0; i
< nelt
; ++i
)
37556 d
.perm
[i
] = i
* 2 + odd
;
37558 /* We'll either be able to implement the permutation directly... */
37559 if (expand_vec_perm_1 (&d
))
37562 /* ... or we use the special-case patterns. */
37563 expand_vec_perm_even_odd_1 (&d
, odd
);
37566 /* Expand an insert into a vector register through pinsr insn.
37567 Return true if successful. */
37570 ix86_expand_pinsr (rtx
*operands
)
37572 rtx dst
= operands
[0];
37573 rtx src
= operands
[3];
37575 unsigned int size
= INTVAL (operands
[1]);
37576 unsigned int pos
= INTVAL (operands
[2]);
37578 if (GET_CODE (dst
) == SUBREG
)
37580 pos
+= SUBREG_BYTE (dst
) * BITS_PER_UNIT
;
37581 dst
= SUBREG_REG (dst
);
37584 if (GET_CODE (src
) == SUBREG
)
37585 src
= SUBREG_REG (src
);
37587 switch (GET_MODE (dst
))
37594 enum machine_mode srcmode
, dstmode
;
37595 rtx (*pinsr
)(rtx
, rtx
, rtx
, rtx
);
37597 srcmode
= mode_for_size (size
, MODE_INT
, 0);
37602 if (!TARGET_SSE4_1
)
37604 dstmode
= V16QImode
;
37605 pinsr
= gen_sse4_1_pinsrb
;
37611 dstmode
= V8HImode
;
37612 pinsr
= gen_sse2_pinsrw
;
37616 if (!TARGET_SSE4_1
)
37618 dstmode
= V4SImode
;
37619 pinsr
= gen_sse4_1_pinsrd
;
37623 gcc_assert (TARGET_64BIT
);
37624 if (!TARGET_SSE4_1
)
37626 dstmode
= V2DImode
;
37627 pinsr
= gen_sse4_1_pinsrq
;
37634 dst
= gen_lowpart (dstmode
, dst
);
37635 src
= gen_lowpart (srcmode
, src
);
37639 emit_insn (pinsr (dst
, dst
, src
, GEN_INT (1 << pos
)));
37648 /* This function returns the calling abi specific va_list type node.
37649 It returns the FNDECL specific va_list type. */
37652 ix86_fn_abi_va_list (tree fndecl
)
37655 return va_list_type_node
;
37656 gcc_assert (fndecl
!= NULL_TREE
);
37658 if (ix86_function_abi ((const_tree
) fndecl
) == MS_ABI
)
37659 return ms_va_list_type_node
;
37661 return sysv_va_list_type_node
;
37664 /* Returns the canonical va_list type specified by TYPE. If there
37665 is no valid TYPE provided, it return NULL_TREE. */
37668 ix86_canonical_va_list_type (tree type
)
37672 /* Resolve references and pointers to va_list type. */
37673 if (TREE_CODE (type
) == MEM_REF
)
37674 type
= TREE_TYPE (type
);
37675 else if (POINTER_TYPE_P (type
) && POINTER_TYPE_P (TREE_TYPE(type
)))
37676 type
= TREE_TYPE (type
);
37677 else if (POINTER_TYPE_P (type
) && TREE_CODE (TREE_TYPE (type
)) == ARRAY_TYPE
)
37678 type
= TREE_TYPE (type
);
37680 if (TARGET_64BIT
&& va_list_type_node
!= NULL_TREE
)
37682 wtype
= va_list_type_node
;
37683 gcc_assert (wtype
!= NULL_TREE
);
37685 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
37687 /* If va_list is an array type, the argument may have decayed
37688 to a pointer type, e.g. by being passed to another function.
37689 In that case, unwrap both types so that we can compare the
37690 underlying records. */
37691 if (TREE_CODE (htype
) == ARRAY_TYPE
37692 || POINTER_TYPE_P (htype
))
37694 wtype
= TREE_TYPE (wtype
);
37695 htype
= TREE_TYPE (htype
);
37698 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
37699 return va_list_type_node
;
37700 wtype
= sysv_va_list_type_node
;
37701 gcc_assert (wtype
!= NULL_TREE
);
37703 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
37705 /* If va_list is an array type, the argument may have decayed
37706 to a pointer type, e.g. by being passed to another function.
37707 In that case, unwrap both types so that we can compare the
37708 underlying records. */
37709 if (TREE_CODE (htype
) == ARRAY_TYPE
37710 || POINTER_TYPE_P (htype
))
37712 wtype
= TREE_TYPE (wtype
);
37713 htype
= TREE_TYPE (htype
);
37716 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
37717 return sysv_va_list_type_node
;
37718 wtype
= ms_va_list_type_node
;
37719 gcc_assert (wtype
!= NULL_TREE
);
37721 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
37723 /* If va_list is an array type, the argument may have decayed
37724 to a pointer type, e.g. by being passed to another function.
37725 In that case, unwrap both types so that we can compare the
37726 underlying records. */
37727 if (TREE_CODE (htype
) == ARRAY_TYPE
37728 || POINTER_TYPE_P (htype
))
37730 wtype
= TREE_TYPE (wtype
);
37731 htype
= TREE_TYPE (htype
);
37734 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
37735 return ms_va_list_type_node
;
37738 return std_canonical_va_list_type (type
);
37741 /* Iterate through the target-specific builtin types for va_list.
37742 IDX denotes the iterator, *PTREE is set to the result type of
37743 the va_list builtin, and *PNAME to its internal type.
37744 Returns zero if there is no element for this index, otherwise
37745 IDX should be increased upon the next call.
37746 Note, do not iterate a base builtin's name like __builtin_va_list.
37747 Used from c_common_nodes_and_builtins. */
37750 ix86_enum_va_list (int idx
, const char **pname
, tree
*ptree
)
37760 *ptree
= ms_va_list_type_node
;
37761 *pname
= "__builtin_ms_va_list";
37765 *ptree
= sysv_va_list_type_node
;
37766 *pname
= "__builtin_sysv_va_list";
37774 #undef TARGET_SCHED_DISPATCH
37775 #define TARGET_SCHED_DISPATCH has_dispatch
37776 #undef TARGET_SCHED_DISPATCH_DO
37777 #define TARGET_SCHED_DISPATCH_DO do_dispatch
37778 #undef TARGET_SCHED_REASSOCIATION_WIDTH
37779 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
37781 /* The size of the dispatch window is the total number of bytes of
37782 object code allowed in a window. */
37783 #define DISPATCH_WINDOW_SIZE 16
37785 /* Number of dispatch windows considered for scheduling. */
37786 #define MAX_DISPATCH_WINDOWS 3
37788 /* Maximum number of instructions in a window. */
37791 /* Maximum number of immediate operands in a window. */
37794 /* Maximum number of immediate bits allowed in a window. */
37795 #define MAX_IMM_SIZE 128
37797 /* Maximum number of 32 bit immediates allowed in a window. */
37798 #define MAX_IMM_32 4
37800 /* Maximum number of 64 bit immediates allowed in a window. */
37801 #define MAX_IMM_64 2
37803 /* Maximum total of loads or prefetches allowed in a window. */
37806 /* Maximum total of stores allowed in a window. */
37807 #define MAX_STORE 1
37813 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
37814 enum dispatch_group
{
37829 /* Number of allowable groups in a dispatch window. It is an array
37830 indexed by dispatch_group enum. 100 is used as a big number,
37831 because the number of these kind of operations does not have any
37832 effect in dispatch window, but we need them for other reasons in
37834 static unsigned int num_allowable_groups
[disp_last
] = {
37835 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG
, BIG
37838 char group_name
[disp_last
+ 1][16] = {
37839 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
37840 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
37841 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
37844 /* Instruction path. */
37847 path_single
, /* Single micro op. */
37848 path_double
, /* Double micro op. */
37849 path_multi
, /* Instructions with more than 2 micro op.. */
37853 /* sched_insn_info defines a window to the instructions scheduled in
37854 the basic block. It contains a pointer to the insn_info table and
37855 the instruction scheduled.
37857 Windows are allocated for each basic block and are linked
37859 typedef struct sched_insn_info_s
{
37861 enum dispatch_group group
;
37862 enum insn_path path
;
37867 /* Linked list of dispatch windows. This is a two way list of
37868 dispatch windows of a basic block. It contains information about
37869 the number of uops in the window and the total number of
37870 instructions and of bytes in the object code for this dispatch
37872 typedef struct dispatch_windows_s
{
37873 int num_insn
; /* Number of insn in the window. */
37874 int num_uops
; /* Number of uops in the window. */
37875 int window_size
; /* Number of bytes in the window. */
37876 int window_num
; /* Window number between 0 or 1. */
37877 int num_imm
; /* Number of immediates in an insn. */
37878 int num_imm_32
; /* Number of 32 bit immediates in an insn. */
37879 int num_imm_64
; /* Number of 64 bit immediates in an insn. */
37880 int imm_size
; /* Total immediates in the window. */
37881 int num_loads
; /* Total memory loads in the window. */
37882 int num_stores
; /* Total memory stores in the window. */
37883 int violation
; /* Violation exists in window. */
37884 sched_insn_info
*window
; /* Pointer to the window. */
37885 struct dispatch_windows_s
*next
;
37886 struct dispatch_windows_s
*prev
;
37887 } dispatch_windows
;
37889 /* Immediate valuse used in an insn. */
37890 typedef struct imm_info_s
37897 static dispatch_windows
*dispatch_window_list
;
37898 static dispatch_windows
*dispatch_window_list1
;
37900 /* Get dispatch group of insn. */
37902 static enum dispatch_group
37903 get_mem_group (rtx insn
)
37905 enum attr_memory memory
;
37907 if (INSN_CODE (insn
) < 0)
37908 return disp_no_group
;
37909 memory
= get_attr_memory (insn
);
37910 if (memory
== MEMORY_STORE
)
37913 if (memory
== MEMORY_LOAD
)
37916 if (memory
== MEMORY_BOTH
)
37917 return disp_load_store
;
37919 return disp_no_group
;
37922 /* Return true if insn is a compare instruction. */
37927 enum attr_type type
;
37929 type
= get_attr_type (insn
);
37930 return (type
== TYPE_TEST
37931 || type
== TYPE_ICMP
37932 || type
== TYPE_FCMP
37933 || GET_CODE (PATTERN (insn
)) == COMPARE
);
37936 /* Return true if a dispatch violation encountered. */
37939 dispatch_violation (void)
37941 if (dispatch_window_list
->next
)
37942 return dispatch_window_list
->next
->violation
;
37943 return dispatch_window_list
->violation
;
37946 /* Return true if insn is a branch instruction. */
37949 is_branch (rtx insn
)
37951 return (CALL_P (insn
) || JUMP_P (insn
));
37954 /* Return true if insn is a prefetch instruction. */
37957 is_prefetch (rtx insn
)
37959 return NONJUMP_INSN_P (insn
) && GET_CODE (PATTERN (insn
)) == PREFETCH
;
37962 /* This function initializes a dispatch window and the list container holding a
37963 pointer to the window. */
37966 init_window (int window_num
)
37969 dispatch_windows
*new_list
;
37971 if (window_num
== 0)
37972 new_list
= dispatch_window_list
;
37974 new_list
= dispatch_window_list1
;
37976 new_list
->num_insn
= 0;
37977 new_list
->num_uops
= 0;
37978 new_list
->window_size
= 0;
37979 new_list
->next
= NULL
;
37980 new_list
->prev
= NULL
;
37981 new_list
->window_num
= window_num
;
37982 new_list
->num_imm
= 0;
37983 new_list
->num_imm_32
= 0;
37984 new_list
->num_imm_64
= 0;
37985 new_list
->imm_size
= 0;
37986 new_list
->num_loads
= 0;
37987 new_list
->num_stores
= 0;
37988 new_list
->violation
= false;
37990 for (i
= 0; i
< MAX_INSN
; i
++)
37992 new_list
->window
[i
].insn
= NULL
;
37993 new_list
->window
[i
].group
= disp_no_group
;
37994 new_list
->window
[i
].path
= no_path
;
37995 new_list
->window
[i
].byte_len
= 0;
37996 new_list
->window
[i
].imm_bytes
= 0;
38001 /* This function allocates and initializes a dispatch window and the
38002 list container holding a pointer to the window. */
38004 static dispatch_windows
*
38005 allocate_window (void)
38007 dispatch_windows
*new_list
= XNEW (struct dispatch_windows_s
);
38008 new_list
->window
= XNEWVEC (struct sched_insn_info_s
, MAX_INSN
+ 1);
38013 /* This routine initializes the dispatch scheduling information. It
38014 initiates building dispatch scheduler tables and constructs the
38015 first dispatch window. */
38018 init_dispatch_sched (void)
38020 /* Allocate a dispatch list and a window. */
38021 dispatch_window_list
= allocate_window ();
38022 dispatch_window_list1
= allocate_window ();
38027 /* This function returns true if a branch is detected. End of a basic block
38028 does not have to be a branch, but here we assume only branches end a
38032 is_end_basic_block (enum dispatch_group group
)
38034 return group
== disp_branch
;
38037 /* This function is called when the end of a window processing is reached. */
38040 process_end_window (void)
38042 gcc_assert (dispatch_window_list
->num_insn
<= MAX_INSN
);
38043 if (dispatch_window_list
->next
)
38045 gcc_assert (dispatch_window_list1
->num_insn
<= MAX_INSN
);
38046 gcc_assert (dispatch_window_list
->window_size
38047 + dispatch_window_list1
->window_size
<= 48);
38053 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
38054 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
38055 for 48 bytes of instructions. Note that these windows are not dispatch
38056 windows that their sizes are DISPATCH_WINDOW_SIZE. */
38058 static dispatch_windows
*
38059 allocate_next_window (int window_num
)
38061 if (window_num
== 0)
38063 if (dispatch_window_list
->next
)
38066 return dispatch_window_list
;
38069 dispatch_window_list
->next
= dispatch_window_list1
;
38070 dispatch_window_list1
->prev
= dispatch_window_list
;
38072 return dispatch_window_list1
;
38075 /* Increment the number of immediate operands of an instruction. */
38078 find_constant_1 (rtx
*in_rtx
, imm_info
*imm_values
)
38083 switch ( GET_CODE (*in_rtx
))
38088 (imm_values
->imm
)++;
38089 if (x86_64_immediate_operand (*in_rtx
, SImode
))
38090 (imm_values
->imm32
)++;
38092 (imm_values
->imm64
)++;
38096 (imm_values
->imm
)++;
38097 (imm_values
->imm64
)++;
38101 if (LABEL_KIND (*in_rtx
) == LABEL_NORMAL
)
38103 (imm_values
->imm
)++;
38104 (imm_values
->imm32
)++;
38115 /* Compute number of immediate operands of an instruction. */
38118 find_constant (rtx in_rtx
, imm_info
*imm_values
)
38120 for_each_rtx (INSN_P (in_rtx
) ? &PATTERN (in_rtx
) : &in_rtx
,
38121 (rtx_function
) find_constant_1
, (void *) imm_values
);
38124 /* Return total size of immediate operands of an instruction along with number
38125 of corresponding immediate-operands. It initializes its parameters to zero
38126 befor calling FIND_CONSTANT.
38127 INSN is the input instruction. IMM is the total of immediates.
38128 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
38132 get_num_immediates (rtx insn
, int *imm
, int *imm32
, int *imm64
)
38134 imm_info imm_values
= {0, 0, 0};
38136 find_constant (insn
, &imm_values
);
38137 *imm
= imm_values
.imm
;
38138 *imm32
= imm_values
.imm32
;
38139 *imm64
= imm_values
.imm64
;
38140 return imm_values
.imm32
* 4 + imm_values
.imm64
* 8;
38143 /* This function indicates if an operand of an instruction is an
38147 has_immediate (rtx insn
)
38149 int num_imm_operand
;
38150 int num_imm32_operand
;
38151 int num_imm64_operand
;
38154 return get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
38155 &num_imm64_operand
);
38159 /* Return single or double path for instructions. */
38161 static enum insn_path
38162 get_insn_path (rtx insn
)
38164 enum attr_amdfam10_decode path
= get_attr_amdfam10_decode (insn
);
38166 if ((int)path
== 0)
38167 return path_single
;
38169 if ((int)path
== 1)
38170 return path_double
;
38175 /* Return insn dispatch group. */
38177 static enum dispatch_group
38178 get_insn_group (rtx insn
)
38180 enum dispatch_group group
= get_mem_group (insn
);
38184 if (is_branch (insn
))
38185 return disp_branch
;
38190 if (has_immediate (insn
))
38193 if (is_prefetch (insn
))
38194 return disp_prefetch
;
38196 return disp_no_group
;
38199 /* Count number of GROUP restricted instructions in a dispatch
38200 window WINDOW_LIST. */
38203 count_num_restricted (rtx insn
, dispatch_windows
*window_list
)
38205 enum dispatch_group group
= get_insn_group (insn
);
38207 int num_imm_operand
;
38208 int num_imm32_operand
;
38209 int num_imm64_operand
;
38211 if (group
== disp_no_group
)
38214 if (group
== disp_imm
)
38216 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
38217 &num_imm64_operand
);
38218 if (window_list
->imm_size
+ imm_size
> MAX_IMM_SIZE
38219 || num_imm_operand
+ window_list
->num_imm
> MAX_IMM
38220 || (num_imm32_operand
> 0
38221 && (window_list
->num_imm_32
+ num_imm32_operand
> MAX_IMM_32
38222 || window_list
->num_imm_64
* 2 + num_imm32_operand
> MAX_IMM_32
))
38223 || (num_imm64_operand
> 0
38224 && (window_list
->num_imm_64
+ num_imm64_operand
> MAX_IMM_64
38225 || window_list
->num_imm_32
+ num_imm64_operand
* 2 > MAX_IMM_32
))
38226 || (window_list
->imm_size
+ imm_size
== MAX_IMM_SIZE
38227 && num_imm64_operand
> 0
38228 && ((window_list
->num_imm_64
> 0
38229 && window_list
->num_insn
>= 2)
38230 || window_list
->num_insn
>= 3)))
38236 if ((group
== disp_load_store
38237 && (window_list
->num_loads
>= MAX_LOAD
38238 || window_list
->num_stores
>= MAX_STORE
))
38239 || ((group
== disp_load
38240 || group
== disp_prefetch
)
38241 && window_list
->num_loads
>= MAX_LOAD
)
38242 || (group
== disp_store
38243 && window_list
->num_stores
>= MAX_STORE
))
38249 /* This function returns true if insn satisfies dispatch rules on the
38250 last window scheduled. */
38253 fits_dispatch_window (rtx insn
)
38255 dispatch_windows
*window_list
= dispatch_window_list
;
38256 dispatch_windows
*window_list_next
= dispatch_window_list
->next
;
38257 unsigned int num_restrict
;
38258 enum dispatch_group group
= get_insn_group (insn
);
38259 enum insn_path path
= get_insn_path (insn
);
38262 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
38263 instructions should be given the lowest priority in the
38264 scheduling process in Haifa scheduler to make sure they will be
38265 scheduled in the same dispatch window as the refrence to them. */
38266 if (group
== disp_jcc
|| group
== disp_cmp
)
38269 /* Check nonrestricted. */
38270 if (group
== disp_no_group
|| group
== disp_branch
)
38273 /* Get last dispatch window. */
38274 if (window_list_next
)
38275 window_list
= window_list_next
;
38277 if (window_list
->window_num
== 1)
38279 sum
= window_list
->prev
->window_size
+ window_list
->window_size
;
38282 || (min_insn_size (insn
) + sum
) >= 48)
38283 /* Window 1 is full. Go for next window. */
38287 num_restrict
= count_num_restricted (insn
, window_list
);
38289 if (num_restrict
> num_allowable_groups
[group
])
38292 /* See if it fits in the first window. */
38293 if (window_list
->window_num
== 0)
38295 /* The first widow should have only single and double path
38297 if (path
== path_double
38298 && (window_list
->num_uops
+ 2) > MAX_INSN
)
38300 else if (path
!= path_single
)
38306 /* Add an instruction INSN with NUM_UOPS micro-operations to the
38307 dispatch window WINDOW_LIST. */
38310 add_insn_window (rtx insn
, dispatch_windows
*window_list
, int num_uops
)
38312 int byte_len
= min_insn_size (insn
);
38313 int num_insn
= window_list
->num_insn
;
38315 sched_insn_info
*window
= window_list
->window
;
38316 enum dispatch_group group
= get_insn_group (insn
);
38317 enum insn_path path
= get_insn_path (insn
);
38318 int num_imm_operand
;
38319 int num_imm32_operand
;
38320 int num_imm64_operand
;
38322 if (!window_list
->violation
&& group
!= disp_cmp
38323 && !fits_dispatch_window (insn
))
38324 window_list
->violation
= true;
38326 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
38327 &num_imm64_operand
);
38329 /* Initialize window with new instruction. */
38330 window
[num_insn
].insn
= insn
;
38331 window
[num_insn
].byte_len
= byte_len
;
38332 window
[num_insn
].group
= group
;
38333 window
[num_insn
].path
= path
;
38334 window
[num_insn
].imm_bytes
= imm_size
;
38336 window_list
->window_size
+= byte_len
;
38337 window_list
->num_insn
= num_insn
+ 1;
38338 window_list
->num_uops
= window_list
->num_uops
+ num_uops
;
38339 window_list
->imm_size
+= imm_size
;
38340 window_list
->num_imm
+= num_imm_operand
;
38341 window_list
->num_imm_32
+= num_imm32_operand
;
38342 window_list
->num_imm_64
+= num_imm64_operand
;
38344 if (group
== disp_store
)
38345 window_list
->num_stores
+= 1;
38346 else if (group
== disp_load
38347 || group
== disp_prefetch
)
38348 window_list
->num_loads
+= 1;
38349 else if (group
== disp_load_store
)
38351 window_list
->num_stores
+= 1;
38352 window_list
->num_loads
+= 1;
38356 /* Adds a scheduled instruction, INSN, to the current dispatch window.
38357 If the total bytes of instructions or the number of instructions in
38358 the window exceed allowable, it allocates a new window. */
38361 add_to_dispatch_window (rtx insn
)
38364 dispatch_windows
*window_list
;
38365 dispatch_windows
*next_list
;
38366 dispatch_windows
*window0_list
;
38367 enum insn_path path
;
38368 enum dispatch_group insn_group
;
38376 if (INSN_CODE (insn
) < 0)
38379 byte_len
= min_insn_size (insn
);
38380 window_list
= dispatch_window_list
;
38381 next_list
= window_list
->next
;
38382 path
= get_insn_path (insn
);
38383 insn_group
= get_insn_group (insn
);
38385 /* Get the last dispatch window. */
38387 window_list
= dispatch_window_list
->next
;
38389 if (path
== path_single
)
38391 else if (path
== path_double
)
38394 insn_num_uops
= (int) path
;
38396 /* If current window is full, get a new window.
38397 Window number zero is full, if MAX_INSN uops are scheduled in it.
38398 Window number one is full, if window zero's bytes plus window
38399 one's bytes is 32, or if the bytes of the new instruction added
38400 to the total makes it greater than 48, or it has already MAX_INSN
38401 instructions in it. */
38402 num_insn
= window_list
->num_insn
;
38403 num_uops
= window_list
->num_uops
;
38404 window_num
= window_list
->window_num
;
38405 insn_fits
= fits_dispatch_window (insn
);
38407 if (num_insn
>= MAX_INSN
38408 || num_uops
+ insn_num_uops
> MAX_INSN
38411 window_num
= ~window_num
& 1;
38412 window_list
= allocate_next_window (window_num
);
38415 if (window_num
== 0)
38417 add_insn_window (insn
, window_list
, insn_num_uops
);
38418 if (window_list
->num_insn
>= MAX_INSN
38419 && insn_group
== disp_branch
)
38421 process_end_window ();
38425 else if (window_num
== 1)
38427 window0_list
= window_list
->prev
;
38428 sum
= window0_list
->window_size
+ window_list
->window_size
;
38430 || (byte_len
+ sum
) >= 48)
38432 process_end_window ();
38433 window_list
= dispatch_window_list
;
38436 add_insn_window (insn
, window_list
, insn_num_uops
);
38439 gcc_unreachable ();
38441 if (is_end_basic_block (insn_group
))
38443 /* End of basic block is reached do end-basic-block process. */
38444 process_end_window ();
38449 /* Print the dispatch window, WINDOW_NUM, to FILE. */
38451 DEBUG_FUNCTION
static void
38452 debug_dispatch_window_file (FILE *file
, int window_num
)
38454 dispatch_windows
*list
;
38457 if (window_num
== 0)
38458 list
= dispatch_window_list
;
38460 list
= dispatch_window_list1
;
38462 fprintf (file
, "Window #%d:\n", list
->window_num
);
38463 fprintf (file
, " num_insn = %d, num_uops = %d, window_size = %d\n",
38464 list
->num_insn
, list
->num_uops
, list
->window_size
);
38465 fprintf (file
, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
38466 list
->num_imm
, list
->num_imm_32
, list
->num_imm_64
, list
->imm_size
);
38468 fprintf (file
, " num_loads = %d, num_stores = %d\n", list
->num_loads
,
38470 fprintf (file
, " insn info:\n");
38472 for (i
= 0; i
< MAX_INSN
; i
++)
38474 if (!list
->window
[i
].insn
)
38476 fprintf (file
, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
38477 i
, group_name
[list
->window
[i
].group
],
38478 i
, (void *)list
->window
[i
].insn
,
38479 i
, list
->window
[i
].path
,
38480 i
, list
->window
[i
].byte_len
,
38481 i
, list
->window
[i
].imm_bytes
);
38485 /* Print to stdout a dispatch window. */
38487 DEBUG_FUNCTION
void
38488 debug_dispatch_window (int window_num
)
38490 debug_dispatch_window_file (stdout
, window_num
);
38493 /* Print INSN dispatch information to FILE. */
38495 DEBUG_FUNCTION
static void
38496 debug_insn_dispatch_info_file (FILE *file
, rtx insn
)
38499 enum insn_path path
;
38500 enum dispatch_group group
;
38502 int num_imm_operand
;
38503 int num_imm32_operand
;
38504 int num_imm64_operand
;
38506 if (INSN_CODE (insn
) < 0)
38509 byte_len
= min_insn_size (insn
);
38510 path
= get_insn_path (insn
);
38511 group
= get_insn_group (insn
);
38512 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
38513 &num_imm64_operand
);
38515 fprintf (file
, " insn info:\n");
38516 fprintf (file
, " group = %s, path = %d, byte_len = %d\n",
38517 group_name
[group
], path
, byte_len
);
38518 fprintf (file
, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
38519 num_imm_operand
, num_imm32_operand
, num_imm64_operand
, imm_size
);
38522 /* Print to STDERR the status of the ready list with respect to
38523 dispatch windows. */
38525 DEBUG_FUNCTION
void
38526 debug_ready_dispatch (void)
38529 int no_ready
= number_in_ready ();
38531 fprintf (stdout
, "Number of ready: %d\n", no_ready
);
38533 for (i
= 0; i
< no_ready
; i
++)
38534 debug_insn_dispatch_info_file (stdout
, get_ready_element (i
));
38537 /* This routine is the driver of the dispatch scheduler. */
38540 do_dispatch (rtx insn
, int mode
)
38542 if (mode
== DISPATCH_INIT
)
38543 init_dispatch_sched ();
38544 else if (mode
== ADD_TO_DISPATCH_WINDOW
)
38545 add_to_dispatch_window (insn
);
38548 /* Return TRUE if Dispatch Scheduling is supported. */
38551 has_dispatch (rtx insn
, int action
)
38553 if ((ix86_tune
== PROCESSOR_BDVER1
|| ix86_tune
== PROCESSOR_BDVER2
)
38554 && flag_dispatch_scheduler
)
38560 case IS_DISPATCH_ON
:
38565 return is_cmp (insn
);
38567 case DISPATCH_VIOLATION
:
38568 return dispatch_violation ();
38570 case FITS_DISPATCH_WINDOW
:
38571 return fits_dispatch_window (insn
);
38577 /* Implementation of reassociation_width target hook used by
38578 reassoc phase to identify parallelism level in reassociated
38579 tree. Statements tree_code is passed in OPC. Arguments type
38582 Currently parallel reassociation is enabled for Atom
38583 processors only and we set reassociation width to be 2
38584 because Atom may issue up to 2 instructions per cycle.
38586 Return value should be fixed if parallel reassociation is
38587 enabled for other processors. */
38590 ix86_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED
,
38591 enum machine_mode mode
)
38595 if (INTEGRAL_MODE_P (mode
) && TARGET_REASSOC_INT_TO_PARALLEL
)
38597 else if (FLOAT_MODE_P (mode
) && TARGET_REASSOC_FP_TO_PARALLEL
)
38603 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
38604 place emms and femms instructions. */
38606 static enum machine_mode
38607 ix86_preferred_simd_mode (enum machine_mode mode
)
38615 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V32QImode
: V16QImode
;
38617 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V16HImode
: V8HImode
;
38619 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V8SImode
: V4SImode
;
38621 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V4DImode
: V2DImode
;
38624 if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
38630 if (!TARGET_VECTORIZE_DOUBLE
)
38632 else if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
38634 else if (TARGET_SSE2
)
38643 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
38646 static unsigned int
38647 ix86_autovectorize_vector_sizes (void)
38649 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? 32 | 16 : 0;
38652 /* Initialize the GCC target structure. */
38653 #undef TARGET_RETURN_IN_MEMORY
38654 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
38656 #undef TARGET_LEGITIMIZE_ADDRESS
38657 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
38659 #undef TARGET_ATTRIBUTE_TABLE
38660 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
38661 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
38662 # undef TARGET_MERGE_DECL_ATTRIBUTES
38663 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
38666 #undef TARGET_COMP_TYPE_ATTRIBUTES
38667 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
38669 #undef TARGET_INIT_BUILTINS
38670 #define TARGET_INIT_BUILTINS ix86_init_builtins
38671 #undef TARGET_BUILTIN_DECL
38672 #define TARGET_BUILTIN_DECL ix86_builtin_decl
38673 #undef TARGET_EXPAND_BUILTIN
38674 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
38676 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
38677 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
38678 ix86_builtin_vectorized_function
38680 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
38681 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
38683 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
38684 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
38686 #undef TARGET_VECTORIZE_BUILTIN_GATHER
38687 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
38689 #undef TARGET_BUILTIN_RECIPROCAL
38690 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
38692 #undef TARGET_ASM_FUNCTION_EPILOGUE
38693 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
38695 #undef TARGET_ENCODE_SECTION_INFO
38696 #ifndef SUBTARGET_ENCODE_SECTION_INFO
38697 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
38699 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
38702 #undef TARGET_ASM_OPEN_PAREN
38703 #define TARGET_ASM_OPEN_PAREN ""
38704 #undef TARGET_ASM_CLOSE_PAREN
38705 #define TARGET_ASM_CLOSE_PAREN ""
38707 #undef TARGET_ASM_BYTE_OP
38708 #define TARGET_ASM_BYTE_OP ASM_BYTE
38710 #undef TARGET_ASM_ALIGNED_HI_OP
38711 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
38712 #undef TARGET_ASM_ALIGNED_SI_OP
38713 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
38715 #undef TARGET_ASM_ALIGNED_DI_OP
38716 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
38719 #undef TARGET_PROFILE_BEFORE_PROLOGUE
38720 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
38722 #undef TARGET_ASM_UNALIGNED_HI_OP
38723 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
38724 #undef TARGET_ASM_UNALIGNED_SI_OP
38725 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
38726 #undef TARGET_ASM_UNALIGNED_DI_OP
38727 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
38729 #undef TARGET_PRINT_OPERAND
38730 #define TARGET_PRINT_OPERAND ix86_print_operand
38731 #undef TARGET_PRINT_OPERAND_ADDRESS
38732 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
38733 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
38734 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
38735 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
38736 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
38738 #undef TARGET_SCHED_INIT_GLOBAL
38739 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
38740 #undef TARGET_SCHED_ADJUST_COST
38741 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
38742 #undef TARGET_SCHED_ISSUE_RATE
38743 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
38744 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
38745 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
38746 ia32_multipass_dfa_lookahead
38748 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
38749 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
38752 #undef TARGET_HAVE_TLS
38753 #define TARGET_HAVE_TLS true
38755 #undef TARGET_CANNOT_FORCE_CONST_MEM
38756 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
38757 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
38758 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
38760 #undef TARGET_DELEGITIMIZE_ADDRESS
38761 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
38763 #undef TARGET_MS_BITFIELD_LAYOUT_P
38764 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
38767 #undef TARGET_BINDS_LOCAL_P
38768 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
38770 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
38771 #undef TARGET_BINDS_LOCAL_P
38772 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
38775 #undef TARGET_ASM_OUTPUT_MI_THUNK
38776 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
38777 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
38778 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
38780 #undef TARGET_ASM_FILE_START
38781 #define TARGET_ASM_FILE_START x86_file_start
38783 #undef TARGET_OPTION_OVERRIDE
38784 #define TARGET_OPTION_OVERRIDE ix86_option_override
38786 #undef TARGET_REGISTER_MOVE_COST
38787 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
38788 #undef TARGET_MEMORY_MOVE_COST
38789 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
38790 #undef TARGET_RTX_COSTS
38791 #define TARGET_RTX_COSTS ix86_rtx_costs
38792 #undef TARGET_ADDRESS_COST
38793 #define TARGET_ADDRESS_COST ix86_address_cost
38795 #undef TARGET_FIXED_CONDITION_CODE_REGS
38796 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
38797 #undef TARGET_CC_MODES_COMPATIBLE
38798 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
38800 #undef TARGET_MACHINE_DEPENDENT_REORG
38801 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
38803 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
38804 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
38806 #undef TARGET_BUILD_BUILTIN_VA_LIST
38807 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
38809 #undef TARGET_ENUM_VA_LIST_P
38810 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
38812 #undef TARGET_FN_ABI_VA_LIST
38813 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
38815 #undef TARGET_CANONICAL_VA_LIST_TYPE
38816 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
38818 #undef TARGET_EXPAND_BUILTIN_VA_START
38819 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
38821 #undef TARGET_MD_ASM_CLOBBERS
38822 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
38824 #undef TARGET_PROMOTE_PROTOTYPES
38825 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
38826 #undef TARGET_STRUCT_VALUE_RTX
38827 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
38828 #undef TARGET_SETUP_INCOMING_VARARGS
38829 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
38830 #undef TARGET_MUST_PASS_IN_STACK
38831 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
38832 #undef TARGET_FUNCTION_ARG_ADVANCE
38833 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
38834 #undef TARGET_FUNCTION_ARG
38835 #define TARGET_FUNCTION_ARG ix86_function_arg
38836 #undef TARGET_FUNCTION_ARG_BOUNDARY
38837 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
38838 #undef TARGET_PASS_BY_REFERENCE
38839 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
38840 #undef TARGET_INTERNAL_ARG_POINTER
38841 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
38842 #undef TARGET_UPDATE_STACK_BOUNDARY
38843 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
38844 #undef TARGET_GET_DRAP_RTX
38845 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
38846 #undef TARGET_STRICT_ARGUMENT_NAMING
38847 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
38848 #undef TARGET_STATIC_CHAIN
38849 #define TARGET_STATIC_CHAIN ix86_static_chain
38850 #undef TARGET_TRAMPOLINE_INIT
38851 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
38852 #undef TARGET_RETURN_POPS_ARGS
38853 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
38855 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
38856 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
38858 #undef TARGET_SCALAR_MODE_SUPPORTED_P
38859 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
38861 #undef TARGET_VECTOR_MODE_SUPPORTED_P
38862 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
38864 #undef TARGET_C_MODE_FOR_SUFFIX
38865 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
38868 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
38869 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
38872 #ifdef SUBTARGET_INSERT_ATTRIBUTES
38873 #undef TARGET_INSERT_ATTRIBUTES
38874 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
38877 #undef TARGET_MANGLE_TYPE
38878 #define TARGET_MANGLE_TYPE ix86_mangle_type
38881 #undef TARGET_STACK_PROTECT_FAIL
38882 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
38885 #undef TARGET_FUNCTION_VALUE
38886 #define TARGET_FUNCTION_VALUE ix86_function_value
38888 #undef TARGET_FUNCTION_VALUE_REGNO_P
38889 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
38891 #undef TARGET_PROMOTE_FUNCTION_MODE
38892 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
38894 #undef TARGET_SECONDARY_RELOAD
38895 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
38897 #undef TARGET_CLASS_MAX_NREGS
38898 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
38900 #undef TARGET_PREFERRED_RELOAD_CLASS
38901 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
38902 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
38903 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
38904 #undef TARGET_CLASS_LIKELY_SPILLED_P
38905 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
38907 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
38908 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
38909 ix86_builtin_vectorization_cost
38910 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
38911 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
38912 ix86_vectorize_vec_perm_const_ok
38913 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
38914 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
38915 ix86_preferred_simd_mode
38916 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
38917 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
38918 ix86_autovectorize_vector_sizes
38920 #undef TARGET_SET_CURRENT_FUNCTION
38921 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
38923 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
38924 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
38926 #undef TARGET_OPTION_SAVE
38927 #define TARGET_OPTION_SAVE ix86_function_specific_save
38929 #undef TARGET_OPTION_RESTORE
38930 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
38932 #undef TARGET_OPTION_PRINT
38933 #define TARGET_OPTION_PRINT ix86_function_specific_print
38935 #undef TARGET_CAN_INLINE_P
38936 #define TARGET_CAN_INLINE_P ix86_can_inline_p
38938 #undef TARGET_EXPAND_TO_RTL_HOOK
38939 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
38941 #undef TARGET_LEGITIMATE_ADDRESS_P
38942 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
38944 #undef TARGET_LEGITIMATE_CONSTANT_P
38945 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
38947 #undef TARGET_FRAME_POINTER_REQUIRED
38948 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
38950 #undef TARGET_CAN_ELIMINATE
38951 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
38953 #undef TARGET_EXTRA_LIVE_ON_ENTRY
38954 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
38956 #undef TARGET_ASM_CODE_END
38957 #define TARGET_ASM_CODE_END ix86_code_end
38959 #undef TARGET_CONDITIONAL_REGISTER_USAGE
38960 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
38963 #undef TARGET_INIT_LIBFUNCS
38964 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
38967 struct gcc_target targetm
= TARGET_INITIALIZER
;
38969 #include "gt-i386.h"