1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
3 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
34 #include "insn-codes.h"
35 #include "insn-attr.h"
42 #include "diagnostic-core.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "common/common-target.h"
49 #include "langhooks.h"
54 #include "tm-constrs.h"
58 #include "sched-int.h"
62 #include "diagnostic.h"
64 enum upper_128bits_state
71 typedef struct block_info_def
73 /* State of the upper 128bits of AVX registers at exit. */
74 enum upper_128bits_state state
;
75 /* TRUE if state of the upper 128bits of AVX registers is unchanged
78 /* TRUE if block has been processed. */
80 /* TRUE if block has been scanned. */
82 /* Previous state of the upper 128bits of AVX registers at entry. */
83 enum upper_128bits_state prev
;
86 #define BLOCK_INFO(B) ((block_info) (B)->aux)
88 enum call_avx256_state
90 /* Callee returns 256bit AVX register. */
91 callee_return_avx256
= -1,
92 /* Callee returns and passes 256bit AVX register. */
93 callee_return_pass_avx256
,
94 /* Callee passes 256bit AVX register. */
96 /* Callee doesn't return nor passe 256bit AVX register, or no
97 256bit AVX register in function return. */
99 /* vzeroupper intrinsic. */
103 /* Check if a 256bit AVX register is referenced in stores. */
106 check_avx256_stores (rtx dest
, const_rtx set
, void *data
)
109 && VALID_AVX256_REG_MODE (GET_MODE (dest
)))
110 || (GET_CODE (set
) == SET
111 && REG_P (SET_SRC (set
))
112 && VALID_AVX256_REG_MODE (GET_MODE (SET_SRC (set
)))))
114 enum upper_128bits_state
*state
115 = (enum upper_128bits_state
*) data
;
120 /* Helper function for move_or_delete_vzeroupper_1. Look for vzeroupper
121 in basic block BB. Delete it if upper 128bit AVX registers are
122 unused. If it isn't deleted, move it to just before a jump insn.
124 STATE is state of the upper 128bits of AVX registers at entry. */
127 move_or_delete_vzeroupper_2 (basic_block bb
,
128 enum upper_128bits_state state
)
131 rtx vzeroupper_insn
= NULL_RTX
;
136 if (BLOCK_INFO (bb
)->unchanged
)
139 fprintf (dump_file
, " [bb %i] unchanged: upper 128bits: %d\n",
142 BLOCK_INFO (bb
)->state
= state
;
146 if (BLOCK_INFO (bb
)->scanned
&& BLOCK_INFO (bb
)->prev
== state
)
149 fprintf (dump_file
, " [bb %i] scanned: upper 128bits: %d\n",
150 bb
->index
, BLOCK_INFO (bb
)->state
);
154 BLOCK_INFO (bb
)->prev
= state
;
157 fprintf (dump_file
, " [bb %i] entry: upper 128bits: %d\n",
162 /* BB_END changes when it is deleted. */
163 bb_end
= BB_END (bb
);
165 while (insn
!= bb_end
)
167 insn
= NEXT_INSN (insn
);
169 if (!NONDEBUG_INSN_P (insn
))
172 /* Move vzeroupper before jump/call. */
173 if (JUMP_P (insn
) || CALL_P (insn
))
175 if (!vzeroupper_insn
)
178 if (PREV_INSN (insn
) != vzeroupper_insn
)
182 fprintf (dump_file
, "Move vzeroupper after:\n");
183 print_rtl_single (dump_file
, PREV_INSN (insn
));
184 fprintf (dump_file
, "before:\n");
185 print_rtl_single (dump_file
, insn
);
187 reorder_insns_nobb (vzeroupper_insn
, vzeroupper_insn
,
190 vzeroupper_insn
= NULL_RTX
;
194 pat
= PATTERN (insn
);
196 /* Check insn for vzeroupper intrinsic. */
197 if (GET_CODE (pat
) == UNSPEC_VOLATILE
198 && XINT (pat
, 1) == UNSPECV_VZEROUPPER
)
202 /* Found vzeroupper intrinsic. */
203 fprintf (dump_file
, "Found vzeroupper:\n");
204 print_rtl_single (dump_file
, insn
);
209 /* Check insn for vzeroall intrinsic. */
210 if (GET_CODE (pat
) == PARALLEL
211 && GET_CODE (XVECEXP (pat
, 0, 0)) == UNSPEC_VOLATILE
212 && XINT (XVECEXP (pat
, 0, 0), 1) == UNSPECV_VZEROALL
)
217 /* Delete pending vzeroupper insertion. */
220 delete_insn (vzeroupper_insn
);
221 vzeroupper_insn
= NULL_RTX
;
224 else if (state
!= used
)
226 note_stores (pat
, check_avx256_stores
, &state
);
233 /* Process vzeroupper intrinsic. */
234 avx256
= INTVAL (XVECEXP (pat
, 0, 0));
238 /* Since the upper 128bits are cleared, callee must not pass
239 256bit AVX register. We only need to check if callee
240 returns 256bit AVX register. */
241 if (avx256
== callee_return_avx256
)
247 /* Remove unnecessary vzeroupper since upper 128bits are
251 fprintf (dump_file
, "Delete redundant vzeroupper:\n");
252 print_rtl_single (dump_file
, insn
);
258 /* Set state to UNUSED if callee doesn't return 256bit AVX
260 if (avx256
!= callee_return_pass_avx256
)
263 if (avx256
== callee_return_pass_avx256
264 || avx256
== callee_pass_avx256
)
266 /* Must remove vzeroupper since callee passes in 256bit
270 fprintf (dump_file
, "Delete callee pass vzeroupper:\n");
271 print_rtl_single (dump_file
, insn
);
277 vzeroupper_insn
= insn
;
283 BLOCK_INFO (bb
)->state
= state
;
284 BLOCK_INFO (bb
)->unchanged
= unchanged
;
285 BLOCK_INFO (bb
)->scanned
= true;
288 fprintf (dump_file
, " [bb %i] exit: %s: upper 128bits: %d\n",
289 bb
->index
, unchanged
? "unchanged" : "changed",
293 /* Helper function for move_or_delete_vzeroupper. Process vzeroupper
294 in BLOCK and check its predecessor blocks. Treat UNKNOWN state
295 as USED if UNKNOWN_IS_UNUSED is true. Return TRUE if the exit
299 move_or_delete_vzeroupper_1 (basic_block block
, bool unknown_is_unused
)
303 enum upper_128bits_state state
, old_state
, new_state
;
307 fprintf (dump_file
, " Process [bb %i]: status: %d\n",
308 block
->index
, BLOCK_INFO (block
)->processed
);
310 if (BLOCK_INFO (block
)->processed
)
315 /* Check all predecessor edges of this block. */
316 seen_unknown
= false;
317 FOR_EACH_EDGE (e
, ei
, block
->preds
)
321 switch (BLOCK_INFO (e
->src
)->state
)
324 if (!unknown_is_unused
)
338 old_state
= BLOCK_INFO (block
)->state
;
339 move_or_delete_vzeroupper_2 (block
, state
);
340 new_state
= BLOCK_INFO (block
)->state
;
342 if (state
!= unknown
|| new_state
== used
)
343 BLOCK_INFO (block
)->processed
= true;
345 /* Need to rescan if the upper 128bits of AVX registers are changed
347 if (new_state
!= old_state
)
349 if (new_state
== used
)
350 cfun
->machine
->rescan_vzeroupper_p
= 1;
357 /* Go through the instruction stream looking for vzeroupper. Delete
358 it if upper 128bit AVX registers are unused. If it isn't deleted,
359 move it to just before a jump insn. */
362 move_or_delete_vzeroupper (void)
367 fibheap_t worklist
, pending
, fibheap_swap
;
368 sbitmap visited
, in_worklist
, in_pending
, sbitmap_swap
;
373 /* Set up block info for each basic block. */
374 alloc_aux_for_blocks (sizeof (struct block_info_def
));
376 /* Process outgoing edges of entry point. */
378 fprintf (dump_file
, "Process outgoing edges of entry point\n");
380 FOR_EACH_EDGE (e
, ei
, ENTRY_BLOCK_PTR
->succs
)
382 move_or_delete_vzeroupper_2 (e
->dest
,
383 cfun
->machine
->caller_pass_avx256_p
385 BLOCK_INFO (e
->dest
)->processed
= true;
388 /* Compute reverse completion order of depth first search of the CFG
389 so that the data-flow runs faster. */
390 rc_order
= XNEWVEC (int, n_basic_blocks
- NUM_FIXED_BLOCKS
);
391 bb_order
= XNEWVEC (int, last_basic_block
);
392 pre_and_rev_post_order_compute (NULL
, rc_order
, false);
393 for (i
= 0; i
< n_basic_blocks
- NUM_FIXED_BLOCKS
; i
++)
394 bb_order
[rc_order
[i
]] = i
;
397 worklist
= fibheap_new ();
398 pending
= fibheap_new ();
399 visited
= sbitmap_alloc (last_basic_block
);
400 in_worklist
= sbitmap_alloc (last_basic_block
);
401 in_pending
= sbitmap_alloc (last_basic_block
);
402 sbitmap_zero (in_worklist
);
404 /* Don't check outgoing edges of entry point. */
405 sbitmap_ones (in_pending
);
407 if (BLOCK_INFO (bb
)->processed
)
408 RESET_BIT (in_pending
, bb
->index
);
411 move_or_delete_vzeroupper_1 (bb
, false);
412 fibheap_insert (pending
, bb_order
[bb
->index
], bb
);
416 fprintf (dump_file
, "Check remaining basic blocks\n");
418 while (!fibheap_empty (pending
))
420 fibheap_swap
= pending
;
422 worklist
= fibheap_swap
;
423 sbitmap_swap
= in_pending
;
424 in_pending
= in_worklist
;
425 in_worklist
= sbitmap_swap
;
427 sbitmap_zero (visited
);
429 cfun
->machine
->rescan_vzeroupper_p
= 0;
431 while (!fibheap_empty (worklist
))
433 bb
= (basic_block
) fibheap_extract_min (worklist
);
434 RESET_BIT (in_worklist
, bb
->index
);
435 gcc_assert (!TEST_BIT (visited
, bb
->index
));
436 if (!TEST_BIT (visited
, bb
->index
))
440 SET_BIT (visited
, bb
->index
);
442 if (move_or_delete_vzeroupper_1 (bb
, false))
443 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
445 if (e
->dest
== EXIT_BLOCK_PTR
446 || BLOCK_INFO (e
->dest
)->processed
)
449 if (TEST_BIT (visited
, e
->dest
->index
))
451 if (!TEST_BIT (in_pending
, e
->dest
->index
))
453 /* Send E->DEST to next round. */
454 SET_BIT (in_pending
, e
->dest
->index
);
455 fibheap_insert (pending
,
456 bb_order
[e
->dest
->index
],
460 else if (!TEST_BIT (in_worklist
, e
->dest
->index
))
462 /* Add E->DEST to current round. */
463 SET_BIT (in_worklist
, e
->dest
->index
);
464 fibheap_insert (worklist
, bb_order
[e
->dest
->index
],
471 if (!cfun
->machine
->rescan_vzeroupper_p
)
476 fibheap_delete (worklist
);
477 fibheap_delete (pending
);
478 sbitmap_free (visited
);
479 sbitmap_free (in_worklist
);
480 sbitmap_free (in_pending
);
483 fprintf (dump_file
, "Process remaining basic blocks\n");
486 move_or_delete_vzeroupper_1 (bb
, true);
488 free_aux_for_blocks ();
491 static rtx
legitimize_dllimport_symbol (rtx
, bool);
493 #ifndef CHECK_STACK_LIMIT
494 #define CHECK_STACK_LIMIT (-1)
497 /* Return index of given mode in mult and division cost tables. */
498 #define MODE_INDEX(mode) \
499 ((mode) == QImode ? 0 \
500 : (mode) == HImode ? 1 \
501 : (mode) == SImode ? 2 \
502 : (mode) == DImode ? 3 \
505 /* Processor costs (relative to an add) */
506 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
507 #define COSTS_N_BYTES(N) ((N) * 2)
509 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
512 struct processor_costs ix86_size_cost
= {/* costs for tuning for size */
513 COSTS_N_BYTES (2), /* cost of an add instruction */
514 COSTS_N_BYTES (3), /* cost of a lea instruction */
515 COSTS_N_BYTES (2), /* variable shift costs */
516 COSTS_N_BYTES (3), /* constant shift costs */
517 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
518 COSTS_N_BYTES (3), /* HI */
519 COSTS_N_BYTES (3), /* SI */
520 COSTS_N_BYTES (3), /* DI */
521 COSTS_N_BYTES (5)}, /* other */
522 0, /* cost of multiply per each bit set */
523 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
524 COSTS_N_BYTES (3), /* HI */
525 COSTS_N_BYTES (3), /* SI */
526 COSTS_N_BYTES (3), /* DI */
527 COSTS_N_BYTES (5)}, /* other */
528 COSTS_N_BYTES (3), /* cost of movsx */
529 COSTS_N_BYTES (3), /* cost of movzx */
530 0, /* "large" insn */
532 2, /* cost for loading QImode using movzbl */
533 {2, 2, 2}, /* cost of loading integer registers
534 in QImode, HImode and SImode.
535 Relative to reg-reg move (2). */
536 {2, 2, 2}, /* cost of storing integer registers */
537 2, /* cost of reg,reg fld/fst */
538 {2, 2, 2}, /* cost of loading fp registers
539 in SFmode, DFmode and XFmode */
540 {2, 2, 2}, /* cost of storing fp registers
541 in SFmode, DFmode and XFmode */
542 3, /* cost of moving MMX register */
543 {3, 3}, /* cost of loading MMX registers
544 in SImode and DImode */
545 {3, 3}, /* cost of storing MMX registers
546 in SImode and DImode */
547 3, /* cost of moving SSE register */
548 {3, 3, 3}, /* cost of loading SSE registers
549 in SImode, DImode and TImode */
550 {3, 3, 3}, /* cost of storing SSE registers
551 in SImode, DImode and TImode */
552 3, /* MMX or SSE register to integer */
553 0, /* size of l1 cache */
554 0, /* size of l2 cache */
555 0, /* size of prefetch block */
556 0, /* number of parallel prefetches */
558 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
559 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
560 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
561 COSTS_N_BYTES (2), /* cost of FABS instruction. */
562 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
563 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
564 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
565 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}}},
566 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
567 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}}},
568 1, /* scalar_stmt_cost. */
569 1, /* scalar load_cost. */
570 1, /* scalar_store_cost. */
571 1, /* vec_stmt_cost. */
572 1, /* vec_to_scalar_cost. */
573 1, /* scalar_to_vec_cost. */
574 1, /* vec_align_load_cost. */
575 1, /* vec_unalign_load_cost. */
576 1, /* vec_store_cost. */
577 1, /* cond_taken_branch_cost. */
578 1, /* cond_not_taken_branch_cost. */
581 /* Processor costs (relative to an add) */
583 struct processor_costs i386_cost
= { /* 386 specific costs */
584 COSTS_N_INSNS (1), /* cost of an add instruction */
585 COSTS_N_INSNS (1), /* cost of a lea instruction */
586 COSTS_N_INSNS (3), /* variable shift costs */
587 COSTS_N_INSNS (2), /* constant shift costs */
588 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
589 COSTS_N_INSNS (6), /* HI */
590 COSTS_N_INSNS (6), /* SI */
591 COSTS_N_INSNS (6), /* DI */
592 COSTS_N_INSNS (6)}, /* other */
593 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
594 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
595 COSTS_N_INSNS (23), /* HI */
596 COSTS_N_INSNS (23), /* SI */
597 COSTS_N_INSNS (23), /* DI */
598 COSTS_N_INSNS (23)}, /* other */
599 COSTS_N_INSNS (3), /* cost of movsx */
600 COSTS_N_INSNS (2), /* cost of movzx */
601 15, /* "large" insn */
603 4, /* cost for loading QImode using movzbl */
604 {2, 4, 2}, /* cost of loading integer registers
605 in QImode, HImode and SImode.
606 Relative to reg-reg move (2). */
607 {2, 4, 2}, /* cost of storing integer registers */
608 2, /* cost of reg,reg fld/fst */
609 {8, 8, 8}, /* cost of loading fp registers
610 in SFmode, DFmode and XFmode */
611 {8, 8, 8}, /* cost of storing fp registers
612 in SFmode, DFmode and XFmode */
613 2, /* cost of moving MMX register */
614 {4, 8}, /* cost of loading MMX registers
615 in SImode and DImode */
616 {4, 8}, /* cost of storing MMX registers
617 in SImode and DImode */
618 2, /* cost of moving SSE register */
619 {4, 8, 16}, /* cost of loading SSE registers
620 in SImode, DImode and TImode */
621 {4, 8, 16}, /* cost of storing SSE registers
622 in SImode, DImode and TImode */
623 3, /* MMX or SSE register to integer */
624 0, /* size of l1 cache */
625 0, /* size of l2 cache */
626 0, /* size of prefetch block */
627 0, /* number of parallel prefetches */
629 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
630 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
631 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
632 COSTS_N_INSNS (22), /* cost of FABS instruction. */
633 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
634 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
635 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
636 DUMMY_STRINGOP_ALGS
},
637 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
638 DUMMY_STRINGOP_ALGS
},
639 1, /* scalar_stmt_cost. */
640 1, /* scalar load_cost. */
641 1, /* scalar_store_cost. */
642 1, /* vec_stmt_cost. */
643 1, /* vec_to_scalar_cost. */
644 1, /* scalar_to_vec_cost. */
645 1, /* vec_align_load_cost. */
646 2, /* vec_unalign_load_cost. */
647 1, /* vec_store_cost. */
648 3, /* cond_taken_branch_cost. */
649 1, /* cond_not_taken_branch_cost. */
653 struct processor_costs i486_cost
= { /* 486 specific costs */
654 COSTS_N_INSNS (1), /* cost of an add instruction */
655 COSTS_N_INSNS (1), /* cost of a lea instruction */
656 COSTS_N_INSNS (3), /* variable shift costs */
657 COSTS_N_INSNS (2), /* constant shift costs */
658 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
659 COSTS_N_INSNS (12), /* HI */
660 COSTS_N_INSNS (12), /* SI */
661 COSTS_N_INSNS (12), /* DI */
662 COSTS_N_INSNS (12)}, /* other */
663 1, /* cost of multiply per each bit set */
664 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
665 COSTS_N_INSNS (40), /* HI */
666 COSTS_N_INSNS (40), /* SI */
667 COSTS_N_INSNS (40), /* DI */
668 COSTS_N_INSNS (40)}, /* other */
669 COSTS_N_INSNS (3), /* cost of movsx */
670 COSTS_N_INSNS (2), /* cost of movzx */
671 15, /* "large" insn */
673 4, /* cost for loading QImode using movzbl */
674 {2, 4, 2}, /* cost of loading integer registers
675 in QImode, HImode and SImode.
676 Relative to reg-reg move (2). */
677 {2, 4, 2}, /* cost of storing integer registers */
678 2, /* cost of reg,reg fld/fst */
679 {8, 8, 8}, /* cost of loading fp registers
680 in SFmode, DFmode and XFmode */
681 {8, 8, 8}, /* cost of storing fp registers
682 in SFmode, DFmode and XFmode */
683 2, /* cost of moving MMX register */
684 {4, 8}, /* cost of loading MMX registers
685 in SImode and DImode */
686 {4, 8}, /* cost of storing MMX registers
687 in SImode and DImode */
688 2, /* cost of moving SSE register */
689 {4, 8, 16}, /* cost of loading SSE registers
690 in SImode, DImode and TImode */
691 {4, 8, 16}, /* cost of storing SSE registers
692 in SImode, DImode and TImode */
693 3, /* MMX or SSE register to integer */
694 4, /* size of l1 cache. 486 has 8kB cache
695 shared for code and data, so 4kB is
696 not really precise. */
697 4, /* size of l2 cache */
698 0, /* size of prefetch block */
699 0, /* number of parallel prefetches */
701 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
702 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
703 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
704 COSTS_N_INSNS (3), /* cost of FABS instruction. */
705 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
706 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
707 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
}}},
708 DUMMY_STRINGOP_ALGS
},
709 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
}}},
710 DUMMY_STRINGOP_ALGS
},
711 1, /* scalar_stmt_cost. */
712 1, /* scalar load_cost. */
713 1, /* scalar_store_cost. */
714 1, /* vec_stmt_cost. */
715 1, /* vec_to_scalar_cost. */
716 1, /* scalar_to_vec_cost. */
717 1, /* vec_align_load_cost. */
718 2, /* vec_unalign_load_cost. */
719 1, /* vec_store_cost. */
720 3, /* cond_taken_branch_cost. */
721 1, /* cond_not_taken_branch_cost. */
725 struct processor_costs pentium_cost
= {
726 COSTS_N_INSNS (1), /* cost of an add instruction */
727 COSTS_N_INSNS (1), /* cost of a lea instruction */
728 COSTS_N_INSNS (4), /* variable shift costs */
729 COSTS_N_INSNS (1), /* constant shift costs */
730 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
731 COSTS_N_INSNS (11), /* HI */
732 COSTS_N_INSNS (11), /* SI */
733 COSTS_N_INSNS (11), /* DI */
734 COSTS_N_INSNS (11)}, /* other */
735 0, /* cost of multiply per each bit set */
736 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
737 COSTS_N_INSNS (25), /* HI */
738 COSTS_N_INSNS (25), /* SI */
739 COSTS_N_INSNS (25), /* DI */
740 COSTS_N_INSNS (25)}, /* other */
741 COSTS_N_INSNS (3), /* cost of movsx */
742 COSTS_N_INSNS (2), /* cost of movzx */
743 8, /* "large" insn */
745 6, /* cost for loading QImode using movzbl */
746 {2, 4, 2}, /* cost of loading integer registers
747 in QImode, HImode and SImode.
748 Relative to reg-reg move (2). */
749 {2, 4, 2}, /* cost of storing integer registers */
750 2, /* cost of reg,reg fld/fst */
751 {2, 2, 6}, /* cost of loading fp registers
752 in SFmode, DFmode and XFmode */
753 {4, 4, 6}, /* cost of storing fp registers
754 in SFmode, DFmode and XFmode */
755 8, /* cost of moving MMX register */
756 {8, 8}, /* cost of loading MMX registers
757 in SImode and DImode */
758 {8, 8}, /* cost of storing MMX registers
759 in SImode and DImode */
760 2, /* cost of moving SSE register */
761 {4, 8, 16}, /* cost of loading SSE registers
762 in SImode, DImode and TImode */
763 {4, 8, 16}, /* cost of storing SSE registers
764 in SImode, DImode and TImode */
765 3, /* MMX or SSE register to integer */
766 8, /* size of l1 cache. */
767 8, /* size of l2 cache */
768 0, /* size of prefetch block */
769 0, /* number of parallel prefetches */
771 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
772 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
773 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
774 COSTS_N_INSNS (1), /* cost of FABS instruction. */
775 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
776 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
777 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
778 DUMMY_STRINGOP_ALGS
},
779 {{libcall
, {{-1, rep_prefix_4_byte
}}},
780 DUMMY_STRINGOP_ALGS
},
781 1, /* scalar_stmt_cost. */
782 1, /* scalar load_cost. */
783 1, /* scalar_store_cost. */
784 1, /* vec_stmt_cost. */
785 1, /* vec_to_scalar_cost. */
786 1, /* scalar_to_vec_cost. */
787 1, /* vec_align_load_cost. */
788 2, /* vec_unalign_load_cost. */
789 1, /* vec_store_cost. */
790 3, /* cond_taken_branch_cost. */
791 1, /* cond_not_taken_branch_cost. */
795 struct processor_costs pentiumpro_cost
= {
796 COSTS_N_INSNS (1), /* cost of an add instruction */
797 COSTS_N_INSNS (1), /* cost of a lea instruction */
798 COSTS_N_INSNS (1), /* variable shift costs */
799 COSTS_N_INSNS (1), /* constant shift costs */
800 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
801 COSTS_N_INSNS (4), /* HI */
802 COSTS_N_INSNS (4), /* SI */
803 COSTS_N_INSNS (4), /* DI */
804 COSTS_N_INSNS (4)}, /* other */
805 0, /* cost of multiply per each bit set */
806 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
807 COSTS_N_INSNS (17), /* HI */
808 COSTS_N_INSNS (17), /* SI */
809 COSTS_N_INSNS (17), /* DI */
810 COSTS_N_INSNS (17)}, /* other */
811 COSTS_N_INSNS (1), /* cost of movsx */
812 COSTS_N_INSNS (1), /* cost of movzx */
813 8, /* "large" insn */
815 2, /* cost for loading QImode using movzbl */
816 {4, 4, 4}, /* cost of loading integer registers
817 in QImode, HImode and SImode.
818 Relative to reg-reg move (2). */
819 {2, 2, 2}, /* cost of storing integer registers */
820 2, /* cost of reg,reg fld/fst */
821 {2, 2, 6}, /* cost of loading fp registers
822 in SFmode, DFmode and XFmode */
823 {4, 4, 6}, /* cost of storing fp registers
824 in SFmode, DFmode and XFmode */
825 2, /* cost of moving MMX register */
826 {2, 2}, /* cost of loading MMX registers
827 in SImode and DImode */
828 {2, 2}, /* cost of storing MMX registers
829 in SImode and DImode */
830 2, /* cost of moving SSE register */
831 {2, 2, 8}, /* cost of loading SSE registers
832 in SImode, DImode and TImode */
833 {2, 2, 8}, /* cost of storing SSE registers
834 in SImode, DImode and TImode */
835 3, /* MMX or SSE register to integer */
836 8, /* size of l1 cache. */
837 256, /* size of l2 cache */
838 32, /* size of prefetch block */
839 6, /* number of parallel prefetches */
841 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
842 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
843 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
844 COSTS_N_INSNS (2), /* cost of FABS instruction. */
845 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
846 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
847 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
848 (we ensure the alignment). For small blocks inline loop is still a
849 noticeable win, for bigger blocks either rep movsl or rep movsb is
850 way to go. Rep movsb has apparently more expensive startup time in CPU,
851 but after 4K the difference is down in the noise. */
852 {{rep_prefix_4_byte
, {{128, loop
}, {1024, unrolled_loop
},
853 {8192, rep_prefix_4_byte
}, {-1, rep_prefix_1_byte
}}},
854 DUMMY_STRINGOP_ALGS
},
855 {{rep_prefix_4_byte
, {{1024, unrolled_loop
},
856 {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
857 DUMMY_STRINGOP_ALGS
},
858 1, /* scalar_stmt_cost. */
859 1, /* scalar load_cost. */
860 1, /* scalar_store_cost. */
861 1, /* vec_stmt_cost. */
862 1, /* vec_to_scalar_cost. */
863 1, /* scalar_to_vec_cost. */
864 1, /* vec_align_load_cost. */
865 2, /* vec_unalign_load_cost. */
866 1, /* vec_store_cost. */
867 3, /* cond_taken_branch_cost. */
868 1, /* cond_not_taken_branch_cost. */
872 struct processor_costs geode_cost
= {
873 COSTS_N_INSNS (1), /* cost of an add instruction */
874 COSTS_N_INSNS (1), /* cost of a lea instruction */
875 COSTS_N_INSNS (2), /* variable shift costs */
876 COSTS_N_INSNS (1), /* constant shift costs */
877 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
878 COSTS_N_INSNS (4), /* HI */
879 COSTS_N_INSNS (7), /* SI */
880 COSTS_N_INSNS (7), /* DI */
881 COSTS_N_INSNS (7)}, /* other */
882 0, /* cost of multiply per each bit set */
883 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
884 COSTS_N_INSNS (23), /* HI */
885 COSTS_N_INSNS (39), /* SI */
886 COSTS_N_INSNS (39), /* DI */
887 COSTS_N_INSNS (39)}, /* other */
888 COSTS_N_INSNS (1), /* cost of movsx */
889 COSTS_N_INSNS (1), /* cost of movzx */
890 8, /* "large" insn */
892 1, /* cost for loading QImode using movzbl */
893 {1, 1, 1}, /* cost of loading integer registers
894 in QImode, HImode and SImode.
895 Relative to reg-reg move (2). */
896 {1, 1, 1}, /* cost of storing integer registers */
897 1, /* cost of reg,reg fld/fst */
898 {1, 1, 1}, /* cost of loading fp registers
899 in SFmode, DFmode and XFmode */
900 {4, 6, 6}, /* cost of storing fp registers
901 in SFmode, DFmode and XFmode */
903 1, /* cost of moving MMX register */
904 {1, 1}, /* cost of loading MMX registers
905 in SImode and DImode */
906 {1, 1}, /* cost of storing MMX registers
907 in SImode and DImode */
908 1, /* cost of moving SSE register */
909 {1, 1, 1}, /* cost of loading SSE registers
910 in SImode, DImode and TImode */
911 {1, 1, 1}, /* cost of storing SSE registers
912 in SImode, DImode and TImode */
913 1, /* MMX or SSE register to integer */
914 64, /* size of l1 cache. */
915 128, /* size of l2 cache. */
916 32, /* size of prefetch block */
917 1, /* number of parallel prefetches */
919 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
920 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
921 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
922 COSTS_N_INSNS (1), /* cost of FABS instruction. */
923 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
924 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
925 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
926 DUMMY_STRINGOP_ALGS
},
927 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
928 DUMMY_STRINGOP_ALGS
},
929 1, /* scalar_stmt_cost. */
930 1, /* scalar load_cost. */
931 1, /* scalar_store_cost. */
932 1, /* vec_stmt_cost. */
933 1, /* vec_to_scalar_cost. */
934 1, /* scalar_to_vec_cost. */
935 1, /* vec_align_load_cost. */
936 2, /* vec_unalign_load_cost. */
937 1, /* vec_store_cost. */
938 3, /* cond_taken_branch_cost. */
939 1, /* cond_not_taken_branch_cost. */
943 struct processor_costs k6_cost
= {
944 COSTS_N_INSNS (1), /* cost of an add instruction */
945 COSTS_N_INSNS (2), /* cost of a lea instruction */
946 COSTS_N_INSNS (1), /* variable shift costs */
947 COSTS_N_INSNS (1), /* constant shift costs */
948 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
949 COSTS_N_INSNS (3), /* HI */
950 COSTS_N_INSNS (3), /* SI */
951 COSTS_N_INSNS (3), /* DI */
952 COSTS_N_INSNS (3)}, /* other */
953 0, /* cost of multiply per each bit set */
954 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
955 COSTS_N_INSNS (18), /* HI */
956 COSTS_N_INSNS (18), /* SI */
957 COSTS_N_INSNS (18), /* DI */
958 COSTS_N_INSNS (18)}, /* other */
959 COSTS_N_INSNS (2), /* cost of movsx */
960 COSTS_N_INSNS (2), /* cost of movzx */
961 8, /* "large" insn */
963 3, /* cost for loading QImode using movzbl */
964 {4, 5, 4}, /* cost of loading integer registers
965 in QImode, HImode and SImode.
966 Relative to reg-reg move (2). */
967 {2, 3, 2}, /* cost of storing integer registers */
968 4, /* cost of reg,reg fld/fst */
969 {6, 6, 6}, /* cost of loading fp registers
970 in SFmode, DFmode and XFmode */
971 {4, 4, 4}, /* cost of storing fp registers
972 in SFmode, DFmode and XFmode */
973 2, /* cost of moving MMX register */
974 {2, 2}, /* cost of loading MMX registers
975 in SImode and DImode */
976 {2, 2}, /* cost of storing MMX registers
977 in SImode and DImode */
978 2, /* cost of moving SSE register */
979 {2, 2, 8}, /* cost of loading SSE registers
980 in SImode, DImode and TImode */
981 {2, 2, 8}, /* cost of storing SSE registers
982 in SImode, DImode and TImode */
983 6, /* MMX or SSE register to integer */
984 32, /* size of l1 cache. */
985 32, /* size of l2 cache. Some models
986 have integrated l2 cache, but
987 optimizing for k6 is not important
988 enough to worry about that. */
989 32, /* size of prefetch block */
990 1, /* number of parallel prefetches */
992 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
993 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
994 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
995 COSTS_N_INSNS (2), /* cost of FABS instruction. */
996 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
997 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
998 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
999 DUMMY_STRINGOP_ALGS
},
1000 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
1001 DUMMY_STRINGOP_ALGS
},
1002 1, /* scalar_stmt_cost. */
1003 1, /* scalar load_cost. */
1004 1, /* scalar_store_cost. */
1005 1, /* vec_stmt_cost. */
1006 1, /* vec_to_scalar_cost. */
1007 1, /* scalar_to_vec_cost. */
1008 1, /* vec_align_load_cost. */
1009 2, /* vec_unalign_load_cost. */
1010 1, /* vec_store_cost. */
1011 3, /* cond_taken_branch_cost. */
1012 1, /* cond_not_taken_branch_cost. */
1016 struct processor_costs athlon_cost
= {
1017 COSTS_N_INSNS (1), /* cost of an add instruction */
1018 COSTS_N_INSNS (2), /* cost of a lea instruction */
1019 COSTS_N_INSNS (1), /* variable shift costs */
1020 COSTS_N_INSNS (1), /* constant shift costs */
1021 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
1022 COSTS_N_INSNS (5), /* HI */
1023 COSTS_N_INSNS (5), /* SI */
1024 COSTS_N_INSNS (5), /* DI */
1025 COSTS_N_INSNS (5)}, /* other */
1026 0, /* cost of multiply per each bit set */
1027 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1028 COSTS_N_INSNS (26), /* HI */
1029 COSTS_N_INSNS (42), /* SI */
1030 COSTS_N_INSNS (74), /* DI */
1031 COSTS_N_INSNS (74)}, /* other */
1032 COSTS_N_INSNS (1), /* cost of movsx */
1033 COSTS_N_INSNS (1), /* cost of movzx */
1034 8, /* "large" insn */
1036 4, /* cost for loading QImode using movzbl */
1037 {3, 4, 3}, /* cost of loading integer registers
1038 in QImode, HImode and SImode.
1039 Relative to reg-reg move (2). */
1040 {3, 4, 3}, /* cost of storing integer registers */
1041 4, /* cost of reg,reg fld/fst */
1042 {4, 4, 12}, /* cost of loading fp registers
1043 in SFmode, DFmode and XFmode */
1044 {6, 6, 8}, /* cost of storing fp registers
1045 in SFmode, DFmode and XFmode */
1046 2, /* cost of moving MMX register */
1047 {4, 4}, /* cost of loading MMX registers
1048 in SImode and DImode */
1049 {4, 4}, /* cost of storing MMX registers
1050 in SImode and DImode */
1051 2, /* cost of moving SSE register */
1052 {4, 4, 6}, /* cost of loading SSE registers
1053 in SImode, DImode and TImode */
1054 {4, 4, 5}, /* cost of storing SSE registers
1055 in SImode, DImode and TImode */
1056 5, /* MMX or SSE register to integer */
1057 64, /* size of l1 cache. */
1058 256, /* size of l2 cache. */
1059 64, /* size of prefetch block */
1060 6, /* number of parallel prefetches */
1061 5, /* Branch cost */
1062 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1063 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1064 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
1065 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1066 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1067 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1068 /* For some reason, Athlon deals better with REP prefix (relative to loops)
1069 compared to K8. Alignment becomes important after 8 bytes for memcpy and
1070 128 bytes for memset. */
1071 {{libcall
, {{2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1072 DUMMY_STRINGOP_ALGS
},
1073 {{libcall
, {{2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1074 DUMMY_STRINGOP_ALGS
},
1075 1, /* scalar_stmt_cost. */
1076 1, /* scalar load_cost. */
1077 1, /* scalar_store_cost. */
1078 1, /* vec_stmt_cost. */
1079 1, /* vec_to_scalar_cost. */
1080 1, /* scalar_to_vec_cost. */
1081 1, /* vec_align_load_cost. */
1082 2, /* vec_unalign_load_cost. */
1083 1, /* vec_store_cost. */
1084 3, /* cond_taken_branch_cost. */
1085 1, /* cond_not_taken_branch_cost. */
1089 struct processor_costs k8_cost
= {
1090 COSTS_N_INSNS (1), /* cost of an add instruction */
1091 COSTS_N_INSNS (2), /* cost of a lea instruction */
1092 COSTS_N_INSNS (1), /* variable shift costs */
1093 COSTS_N_INSNS (1), /* constant shift costs */
1094 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1095 COSTS_N_INSNS (4), /* HI */
1096 COSTS_N_INSNS (3), /* SI */
1097 COSTS_N_INSNS (4), /* DI */
1098 COSTS_N_INSNS (5)}, /* other */
1099 0, /* cost of multiply per each bit set */
1100 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1101 COSTS_N_INSNS (26), /* HI */
1102 COSTS_N_INSNS (42), /* SI */
1103 COSTS_N_INSNS (74), /* DI */
1104 COSTS_N_INSNS (74)}, /* other */
1105 COSTS_N_INSNS (1), /* cost of movsx */
1106 COSTS_N_INSNS (1), /* cost of movzx */
1107 8, /* "large" insn */
1109 4, /* cost for loading QImode using movzbl */
1110 {3, 4, 3}, /* cost of loading integer registers
1111 in QImode, HImode and SImode.
1112 Relative to reg-reg move (2). */
1113 {3, 4, 3}, /* cost of storing integer registers */
1114 4, /* cost of reg,reg fld/fst */
1115 {4, 4, 12}, /* cost of loading fp registers
1116 in SFmode, DFmode and XFmode */
1117 {6, 6, 8}, /* cost of storing fp registers
1118 in SFmode, DFmode and XFmode */
1119 2, /* cost of moving MMX register */
1120 {3, 3}, /* cost of loading MMX registers
1121 in SImode and DImode */
1122 {4, 4}, /* cost of storing MMX registers
1123 in SImode and DImode */
1124 2, /* cost of moving SSE register */
1125 {4, 3, 6}, /* cost of loading SSE registers
1126 in SImode, DImode and TImode */
1127 {4, 4, 5}, /* cost of storing SSE registers
1128 in SImode, DImode and TImode */
1129 5, /* MMX or SSE register to integer */
1130 64, /* size of l1 cache. */
1131 512, /* size of l2 cache. */
1132 64, /* size of prefetch block */
1133 /* New AMD processors never drop prefetches; if they cannot be performed
1134 immediately, they are queued. We set number of simultaneous prefetches
1135 to a large constant to reflect this (it probably is not a good idea not
1136 to limit number of prefetches at all, as their execution also takes some
1138 100, /* number of parallel prefetches */
1139 3, /* Branch cost */
1140 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1141 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1142 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1143 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1144 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1145 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1146 /* K8 has optimized REP instruction for medium sized blocks, but for very
1147 small blocks it is better to use loop. For large blocks, libcall can
1148 do nontemporary accesses and beat inline considerably. */
1149 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1150 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1151 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
1152 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1153 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1154 4, /* scalar_stmt_cost. */
1155 2, /* scalar load_cost. */
1156 2, /* scalar_store_cost. */
1157 5, /* vec_stmt_cost. */
1158 0, /* vec_to_scalar_cost. */
1159 2, /* scalar_to_vec_cost. */
1160 2, /* vec_align_load_cost. */
1161 3, /* vec_unalign_load_cost. */
1162 3, /* vec_store_cost. */
1163 3, /* cond_taken_branch_cost. */
1164 2, /* cond_not_taken_branch_cost. */
1167 struct processor_costs amdfam10_cost
= {
1168 COSTS_N_INSNS (1), /* cost of an add instruction */
1169 COSTS_N_INSNS (2), /* cost of a lea instruction */
1170 COSTS_N_INSNS (1), /* variable shift costs */
1171 COSTS_N_INSNS (1), /* constant shift costs */
1172 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1173 COSTS_N_INSNS (4), /* HI */
1174 COSTS_N_INSNS (3), /* SI */
1175 COSTS_N_INSNS (4), /* DI */
1176 COSTS_N_INSNS (5)}, /* other */
1177 0, /* cost of multiply per each bit set */
1178 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1179 COSTS_N_INSNS (35), /* HI */
1180 COSTS_N_INSNS (51), /* SI */
1181 COSTS_N_INSNS (83), /* DI */
1182 COSTS_N_INSNS (83)}, /* other */
1183 COSTS_N_INSNS (1), /* cost of movsx */
1184 COSTS_N_INSNS (1), /* cost of movzx */
1185 8, /* "large" insn */
1187 4, /* cost for loading QImode using movzbl */
1188 {3, 4, 3}, /* cost of loading integer registers
1189 in QImode, HImode and SImode.
1190 Relative to reg-reg move (2). */
1191 {3, 4, 3}, /* cost of storing integer registers */
1192 4, /* cost of reg,reg fld/fst */
1193 {4, 4, 12}, /* cost of loading fp registers
1194 in SFmode, DFmode and XFmode */
1195 {6, 6, 8}, /* cost of storing fp registers
1196 in SFmode, DFmode and XFmode */
1197 2, /* cost of moving MMX register */
1198 {3, 3}, /* cost of loading MMX registers
1199 in SImode and DImode */
1200 {4, 4}, /* cost of storing MMX registers
1201 in SImode and DImode */
1202 2, /* cost of moving SSE register */
1203 {4, 4, 3}, /* cost of loading SSE registers
1204 in SImode, DImode and TImode */
1205 {4, 4, 5}, /* cost of storing SSE registers
1206 in SImode, DImode and TImode */
1207 3, /* MMX or SSE register to integer */
1209 MOVD reg64, xmmreg Double FSTORE 4
1210 MOVD reg32, xmmreg Double FSTORE 4
1212 MOVD reg64, xmmreg Double FADD 3
1214 MOVD reg32, xmmreg Double FADD 3
1216 64, /* size of l1 cache. */
1217 512, /* size of l2 cache. */
1218 64, /* size of prefetch block */
1219 /* New AMD processors never drop prefetches; if they cannot be performed
1220 immediately, they are queued. We set number of simultaneous prefetches
1221 to a large constant to reflect this (it probably is not a good idea not
1222 to limit number of prefetches at all, as their execution also takes some
1224 100, /* number of parallel prefetches */
1225 2, /* Branch cost */
1226 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1227 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1228 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1229 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1230 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1231 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1233 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
1234 very small blocks it is better to use loop. For large blocks, libcall can
1235 do nontemporary accesses and beat inline considerably. */
1236 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1237 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1238 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
1239 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1240 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1241 4, /* scalar_stmt_cost. */
1242 2, /* scalar load_cost. */
1243 2, /* scalar_store_cost. */
1244 6, /* vec_stmt_cost. */
1245 0, /* vec_to_scalar_cost. */
1246 2, /* scalar_to_vec_cost. */
1247 2, /* vec_align_load_cost. */
1248 2, /* vec_unalign_load_cost. */
1249 2, /* vec_store_cost. */
1250 2, /* cond_taken_branch_cost. */
1251 1, /* cond_not_taken_branch_cost. */
1254 struct processor_costs bdver1_cost
= {
1255 COSTS_N_INSNS (1), /* cost of an add instruction */
1256 COSTS_N_INSNS (1), /* cost of a lea instruction */
1257 COSTS_N_INSNS (1), /* variable shift costs */
1258 COSTS_N_INSNS (1), /* constant shift costs */
1259 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1260 COSTS_N_INSNS (4), /* HI */
1261 COSTS_N_INSNS (4), /* SI */
1262 COSTS_N_INSNS (6), /* DI */
1263 COSTS_N_INSNS (6)}, /* other */
1264 0, /* cost of multiply per each bit set */
1265 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1266 COSTS_N_INSNS (35), /* HI */
1267 COSTS_N_INSNS (51), /* SI */
1268 COSTS_N_INSNS (83), /* DI */
1269 COSTS_N_INSNS (83)}, /* other */
1270 COSTS_N_INSNS (1), /* cost of movsx */
1271 COSTS_N_INSNS (1), /* cost of movzx */
1272 8, /* "large" insn */
1274 4, /* cost for loading QImode using movzbl */
1275 {5, 5, 4}, /* cost of loading integer registers
1276 in QImode, HImode and SImode.
1277 Relative to reg-reg move (2). */
1278 {4, 4, 4}, /* cost of storing integer registers */
1279 2, /* cost of reg,reg fld/fst */
1280 {5, 5, 12}, /* cost of loading fp registers
1281 in SFmode, DFmode and XFmode */
1282 {4, 4, 8}, /* cost of storing fp registers
1283 in SFmode, DFmode and XFmode */
1284 2, /* cost of moving MMX register */
1285 {4, 4}, /* cost of loading MMX registers
1286 in SImode and DImode */
1287 {4, 4}, /* cost of storing MMX registers
1288 in SImode and DImode */
1289 2, /* cost of moving SSE register */
1290 {4, 4, 4}, /* cost of loading SSE registers
1291 in SImode, DImode and TImode */
1292 {4, 4, 4}, /* cost of storing SSE registers
1293 in SImode, DImode and TImode */
1294 2, /* MMX or SSE register to integer */
1296 MOVD reg64, xmmreg Double FSTORE 4
1297 MOVD reg32, xmmreg Double FSTORE 4
1299 MOVD reg64, xmmreg Double FADD 3
1301 MOVD reg32, xmmreg Double FADD 3
1303 16, /* size of l1 cache. */
1304 2048, /* size of l2 cache. */
1305 64, /* size of prefetch block */
1306 /* New AMD processors never drop prefetches; if they cannot be performed
1307 immediately, they are queued. We set number of simultaneous prefetches
1308 to a large constant to reflect this (it probably is not a good idea not
1309 to limit number of prefetches at all, as their execution also takes some
1311 100, /* number of parallel prefetches */
1312 2, /* Branch cost */
1313 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1314 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1315 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1316 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1317 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1318 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1320 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
1321 very small blocks it is better to use loop. For large blocks, libcall
1322 can do nontemporary accesses and beat inline considerably. */
1323 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1324 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1325 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
1326 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1327 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1328 6, /* scalar_stmt_cost. */
1329 4, /* scalar load_cost. */
1330 4, /* scalar_store_cost. */
1331 6, /* vec_stmt_cost. */
1332 0, /* vec_to_scalar_cost. */
1333 2, /* scalar_to_vec_cost. */
1334 4, /* vec_align_load_cost. */
1335 4, /* vec_unalign_load_cost. */
1336 4, /* vec_store_cost. */
1337 2, /* cond_taken_branch_cost. */
1338 1, /* cond_not_taken_branch_cost. */
1341 struct processor_costs bdver2_cost
= {
1342 COSTS_N_INSNS (1), /* cost of an add instruction */
1343 COSTS_N_INSNS (1), /* cost of a lea instruction */
1344 COSTS_N_INSNS (1), /* variable shift costs */
1345 COSTS_N_INSNS (1), /* constant shift costs */
1346 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1347 COSTS_N_INSNS (4), /* HI */
1348 COSTS_N_INSNS (4), /* SI */
1349 COSTS_N_INSNS (6), /* DI */
1350 COSTS_N_INSNS (6)}, /* other */
1351 0, /* cost of multiply per each bit set */
1352 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1353 COSTS_N_INSNS (35), /* HI */
1354 COSTS_N_INSNS (51), /* SI */
1355 COSTS_N_INSNS (83), /* DI */
1356 COSTS_N_INSNS (83)}, /* other */
1357 COSTS_N_INSNS (1), /* cost of movsx */
1358 COSTS_N_INSNS (1), /* cost of movzx */
1359 8, /* "large" insn */
1361 4, /* cost for loading QImode using movzbl */
1362 {5, 5, 4}, /* cost of loading integer registers
1363 in QImode, HImode and SImode.
1364 Relative to reg-reg move (2). */
1365 {4, 4, 4}, /* cost of storing integer registers */
1366 2, /* cost of reg,reg fld/fst */
1367 {5, 5, 12}, /* cost of loading fp registers
1368 in SFmode, DFmode and XFmode */
1369 {4, 4, 8}, /* cost of storing fp registers
1370 in SFmode, DFmode and XFmode */
1371 2, /* cost of moving MMX register */
1372 {4, 4}, /* cost of loading MMX registers
1373 in SImode and DImode */
1374 {4, 4}, /* cost of storing MMX registers
1375 in SImode and DImode */
1376 2, /* cost of moving SSE register */
1377 {4, 4, 4}, /* cost of loading SSE registers
1378 in SImode, DImode and TImode */
1379 {4, 4, 4}, /* cost of storing SSE registers
1380 in SImode, DImode and TImode */
1381 2, /* MMX or SSE register to integer */
1383 MOVD reg64, xmmreg Double FSTORE 4
1384 MOVD reg32, xmmreg Double FSTORE 4
1386 MOVD reg64, xmmreg Double FADD 3
1388 MOVD reg32, xmmreg Double FADD 3
1390 16, /* size of l1 cache. */
1391 2048, /* size of l2 cache. */
1392 64, /* size of prefetch block */
1393 /* New AMD processors never drop prefetches; if they cannot be performed
1394 immediately, they are queued. We set number of simultaneous prefetches
1395 to a large constant to reflect this (it probably is not a good idea not
1396 to limit number of prefetches at all, as their execution also takes some
1398 100, /* number of parallel prefetches */
1399 2, /* Branch cost */
1400 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1401 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1402 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1403 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1404 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1405 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1407 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1408 very small blocks it is better to use loop. For large blocks, libcall
1409 can do nontemporary accesses and beat inline considerably. */
1410 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1411 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1412 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
1413 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1414 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1415 6, /* scalar_stmt_cost. */
1416 4, /* scalar load_cost. */
1417 4, /* scalar_store_cost. */
1418 6, /* vec_stmt_cost. */
1419 0, /* vec_to_scalar_cost. */
1420 2, /* scalar_to_vec_cost. */
1421 4, /* vec_align_load_cost. */
1422 4, /* vec_unalign_load_cost. */
1423 4, /* vec_store_cost. */
1424 2, /* cond_taken_branch_cost. */
1425 1, /* cond_not_taken_branch_cost. */
1428 struct processor_costs btver1_cost
= {
1429 COSTS_N_INSNS (1), /* cost of an add instruction */
1430 COSTS_N_INSNS (2), /* cost of a lea instruction */
1431 COSTS_N_INSNS (1), /* variable shift costs */
1432 COSTS_N_INSNS (1), /* constant shift costs */
1433 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1434 COSTS_N_INSNS (4), /* HI */
1435 COSTS_N_INSNS (3), /* SI */
1436 COSTS_N_INSNS (4), /* DI */
1437 COSTS_N_INSNS (5)}, /* other */
1438 0, /* cost of multiply per each bit set */
1439 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1440 COSTS_N_INSNS (35), /* HI */
1441 COSTS_N_INSNS (51), /* SI */
1442 COSTS_N_INSNS (83), /* DI */
1443 COSTS_N_INSNS (83)}, /* other */
1444 COSTS_N_INSNS (1), /* cost of movsx */
1445 COSTS_N_INSNS (1), /* cost of movzx */
1446 8, /* "large" insn */
1448 4, /* cost for loading QImode using movzbl */
1449 {3, 4, 3}, /* cost of loading integer registers
1450 in QImode, HImode and SImode.
1451 Relative to reg-reg move (2). */
1452 {3, 4, 3}, /* cost of storing integer registers */
1453 4, /* cost of reg,reg fld/fst */
1454 {4, 4, 12}, /* cost of loading fp registers
1455 in SFmode, DFmode and XFmode */
1456 {6, 6, 8}, /* cost of storing fp registers
1457 in SFmode, DFmode and XFmode */
1458 2, /* cost of moving MMX register */
1459 {3, 3}, /* cost of loading MMX registers
1460 in SImode and DImode */
1461 {4, 4}, /* cost of storing MMX registers
1462 in SImode and DImode */
1463 2, /* cost of moving SSE register */
1464 {4, 4, 3}, /* cost of loading SSE registers
1465 in SImode, DImode and TImode */
1466 {4, 4, 5}, /* cost of storing SSE registers
1467 in SImode, DImode and TImode */
1468 3, /* MMX or SSE register to integer */
1470 MOVD reg64, xmmreg Double FSTORE 4
1471 MOVD reg32, xmmreg Double FSTORE 4
1473 MOVD reg64, xmmreg Double FADD 3
1475 MOVD reg32, xmmreg Double FADD 3
1477 32, /* size of l1 cache. */
1478 512, /* size of l2 cache. */
1479 64, /* size of prefetch block */
1480 100, /* number of parallel prefetches */
1481 2, /* Branch cost */
1482 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1483 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1484 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1485 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1486 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1487 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1489 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1490 very small blocks it is better to use loop. For large blocks, libcall can
1491 do nontemporary accesses and beat inline considerably. */
1492 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1493 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1494 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
1495 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1496 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1497 4, /* scalar_stmt_cost. */
1498 2, /* scalar load_cost. */
1499 2, /* scalar_store_cost. */
1500 6, /* vec_stmt_cost. */
1501 0, /* vec_to_scalar_cost. */
1502 2, /* scalar_to_vec_cost. */
1503 2, /* vec_align_load_cost. */
1504 2, /* vec_unalign_load_cost. */
1505 2, /* vec_store_cost. */
1506 2, /* cond_taken_branch_cost. */
1507 1, /* cond_not_taken_branch_cost. */
1511 struct processor_costs pentium4_cost
= {
1512 COSTS_N_INSNS (1), /* cost of an add instruction */
1513 COSTS_N_INSNS (3), /* cost of a lea instruction */
1514 COSTS_N_INSNS (4), /* variable shift costs */
1515 COSTS_N_INSNS (4), /* constant shift costs */
1516 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1517 COSTS_N_INSNS (15), /* HI */
1518 COSTS_N_INSNS (15), /* SI */
1519 COSTS_N_INSNS (15), /* DI */
1520 COSTS_N_INSNS (15)}, /* other */
1521 0, /* cost of multiply per each bit set */
1522 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1523 COSTS_N_INSNS (56), /* HI */
1524 COSTS_N_INSNS (56), /* SI */
1525 COSTS_N_INSNS (56), /* DI */
1526 COSTS_N_INSNS (56)}, /* other */
1527 COSTS_N_INSNS (1), /* cost of movsx */
1528 COSTS_N_INSNS (1), /* cost of movzx */
1529 16, /* "large" insn */
1531 2, /* cost for loading QImode using movzbl */
1532 {4, 5, 4}, /* cost of loading integer registers
1533 in QImode, HImode and SImode.
1534 Relative to reg-reg move (2). */
1535 {2, 3, 2}, /* cost of storing integer registers */
1536 2, /* cost of reg,reg fld/fst */
1537 {2, 2, 6}, /* cost of loading fp registers
1538 in SFmode, DFmode and XFmode */
1539 {4, 4, 6}, /* cost of storing fp registers
1540 in SFmode, DFmode and XFmode */
1541 2, /* cost of moving MMX register */
1542 {2, 2}, /* cost of loading MMX registers
1543 in SImode and DImode */
1544 {2, 2}, /* cost of storing MMX registers
1545 in SImode and DImode */
1546 12, /* cost of moving SSE register */
1547 {12, 12, 12}, /* cost of loading SSE registers
1548 in SImode, DImode and TImode */
1549 {2, 2, 8}, /* cost of storing SSE registers
1550 in SImode, DImode and TImode */
1551 10, /* MMX or SSE register to integer */
1552 8, /* size of l1 cache. */
1553 256, /* size of l2 cache. */
1554 64, /* size of prefetch block */
1555 6, /* number of parallel prefetches */
1556 2, /* Branch cost */
1557 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1558 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1559 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1560 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1561 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1562 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1563 {{libcall
, {{12, loop_1_byte
}, {-1, rep_prefix_4_byte
}}},
1564 DUMMY_STRINGOP_ALGS
},
1565 {{libcall
, {{6, loop_1_byte
}, {48, loop
}, {20480, rep_prefix_4_byte
},
1567 DUMMY_STRINGOP_ALGS
},
1568 1, /* scalar_stmt_cost. */
1569 1, /* scalar load_cost. */
1570 1, /* scalar_store_cost. */
1571 1, /* vec_stmt_cost. */
1572 1, /* vec_to_scalar_cost. */
1573 1, /* scalar_to_vec_cost. */
1574 1, /* vec_align_load_cost. */
1575 2, /* vec_unalign_load_cost. */
1576 1, /* vec_store_cost. */
1577 3, /* cond_taken_branch_cost. */
1578 1, /* cond_not_taken_branch_cost. */
1582 struct processor_costs nocona_cost
= {
1583 COSTS_N_INSNS (1), /* cost of an add instruction */
1584 COSTS_N_INSNS (1), /* cost of a lea instruction */
1585 COSTS_N_INSNS (1), /* variable shift costs */
1586 COSTS_N_INSNS (1), /* constant shift costs */
1587 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1588 COSTS_N_INSNS (10), /* HI */
1589 COSTS_N_INSNS (10), /* SI */
1590 COSTS_N_INSNS (10), /* DI */
1591 COSTS_N_INSNS (10)}, /* other */
1592 0, /* cost of multiply per each bit set */
1593 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1594 COSTS_N_INSNS (66), /* HI */
1595 COSTS_N_INSNS (66), /* SI */
1596 COSTS_N_INSNS (66), /* DI */
1597 COSTS_N_INSNS (66)}, /* other */
1598 COSTS_N_INSNS (1), /* cost of movsx */
1599 COSTS_N_INSNS (1), /* cost of movzx */
1600 16, /* "large" insn */
1601 17, /* MOVE_RATIO */
1602 4, /* cost for loading QImode using movzbl */
1603 {4, 4, 4}, /* cost of loading integer registers
1604 in QImode, HImode and SImode.
1605 Relative to reg-reg move (2). */
1606 {4, 4, 4}, /* cost of storing integer registers */
1607 3, /* cost of reg,reg fld/fst */
1608 {12, 12, 12}, /* cost of loading fp registers
1609 in SFmode, DFmode and XFmode */
1610 {4, 4, 4}, /* cost of storing fp registers
1611 in SFmode, DFmode and XFmode */
1612 6, /* cost of moving MMX register */
1613 {12, 12}, /* cost of loading MMX registers
1614 in SImode and DImode */
1615 {12, 12}, /* cost of storing MMX registers
1616 in SImode and DImode */
1617 6, /* cost of moving SSE register */
1618 {12, 12, 12}, /* cost of loading SSE registers
1619 in SImode, DImode and TImode */
1620 {12, 12, 12}, /* cost of storing SSE registers
1621 in SImode, DImode and TImode */
1622 8, /* MMX or SSE register to integer */
1623 8, /* size of l1 cache. */
1624 1024, /* size of l2 cache. */
1625 128, /* size of prefetch block */
1626 8, /* number of parallel prefetches */
1627 1, /* Branch cost */
1628 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1629 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1630 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1631 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1632 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1633 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1634 {{libcall
, {{12, loop_1_byte
}, {-1, rep_prefix_4_byte
}}},
1635 {libcall
, {{32, loop
}, {20000, rep_prefix_8_byte
},
1636 {100000, unrolled_loop
}, {-1, libcall
}}}},
1637 {{libcall
, {{6, loop_1_byte
}, {48, loop
}, {20480, rep_prefix_4_byte
},
1639 {libcall
, {{24, loop
}, {64, unrolled_loop
},
1640 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1641 1, /* scalar_stmt_cost. */
1642 1, /* scalar load_cost. */
1643 1, /* scalar_store_cost. */
1644 1, /* vec_stmt_cost. */
1645 1, /* vec_to_scalar_cost. */
1646 1, /* scalar_to_vec_cost. */
1647 1, /* vec_align_load_cost. */
1648 2, /* vec_unalign_load_cost. */
1649 1, /* vec_store_cost. */
1650 3, /* cond_taken_branch_cost. */
1651 1, /* cond_not_taken_branch_cost. */
1655 struct processor_costs atom_cost
= {
1656 COSTS_N_INSNS (1), /* cost of an add instruction */
1657 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1658 COSTS_N_INSNS (1), /* variable shift costs */
1659 COSTS_N_INSNS (1), /* constant shift costs */
1660 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1661 COSTS_N_INSNS (4), /* HI */
1662 COSTS_N_INSNS (3), /* SI */
1663 COSTS_N_INSNS (4), /* DI */
1664 COSTS_N_INSNS (2)}, /* other */
1665 0, /* cost of multiply per each bit set */
1666 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1667 COSTS_N_INSNS (26), /* HI */
1668 COSTS_N_INSNS (42), /* SI */
1669 COSTS_N_INSNS (74), /* DI */
1670 COSTS_N_INSNS (74)}, /* other */
1671 COSTS_N_INSNS (1), /* cost of movsx */
1672 COSTS_N_INSNS (1), /* cost of movzx */
1673 8, /* "large" insn */
1674 17, /* MOVE_RATIO */
1675 4, /* cost for loading QImode using movzbl */
1676 {4, 4, 4}, /* cost of loading integer registers
1677 in QImode, HImode and SImode.
1678 Relative to reg-reg move (2). */
1679 {4, 4, 4}, /* cost of storing integer registers */
1680 4, /* cost of reg,reg fld/fst */
1681 {12, 12, 12}, /* cost of loading fp registers
1682 in SFmode, DFmode and XFmode */
1683 {6, 6, 8}, /* cost of storing fp registers
1684 in SFmode, DFmode and XFmode */
1685 2, /* cost of moving MMX register */
1686 {8, 8}, /* cost of loading MMX registers
1687 in SImode and DImode */
1688 {8, 8}, /* cost of storing MMX registers
1689 in SImode and DImode */
1690 2, /* cost of moving SSE register */
1691 {8, 8, 8}, /* cost of loading SSE registers
1692 in SImode, DImode and TImode */
1693 {8, 8, 8}, /* cost of storing SSE registers
1694 in SImode, DImode and TImode */
1695 5, /* MMX or SSE register to integer */
1696 32, /* size of l1 cache. */
1697 256, /* size of l2 cache. */
1698 64, /* size of prefetch block */
1699 6, /* number of parallel prefetches */
1700 3, /* Branch cost */
1701 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1702 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1703 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1704 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1705 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1706 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1707 {{libcall
, {{11, loop
}, {-1, rep_prefix_4_byte
}}},
1708 {libcall
, {{32, loop
}, {64, rep_prefix_4_byte
},
1709 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1710 {{libcall
, {{8, loop
}, {15, unrolled_loop
},
1711 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1712 {libcall
, {{24, loop
}, {32, unrolled_loop
},
1713 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1714 1, /* scalar_stmt_cost. */
1715 1, /* scalar load_cost. */
1716 1, /* scalar_store_cost. */
1717 1, /* vec_stmt_cost. */
1718 1, /* vec_to_scalar_cost. */
1719 1, /* scalar_to_vec_cost. */
1720 1, /* vec_align_load_cost. */
1721 2, /* vec_unalign_load_cost. */
1722 1, /* vec_store_cost. */
1723 3, /* cond_taken_branch_cost. */
1724 1, /* cond_not_taken_branch_cost. */
1727 /* Generic64 should produce code tuned for Nocona and K8. */
1729 struct processor_costs generic64_cost
= {
1730 COSTS_N_INSNS (1), /* cost of an add instruction */
1731 /* On all chips taken into consideration lea is 2 cycles and more. With
1732 this cost however our current implementation of synth_mult results in
1733 use of unnecessary temporary registers causing regression on several
1734 SPECfp benchmarks. */
1735 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1736 COSTS_N_INSNS (1), /* variable shift costs */
1737 COSTS_N_INSNS (1), /* constant shift costs */
1738 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1739 COSTS_N_INSNS (4), /* HI */
1740 COSTS_N_INSNS (3), /* SI */
1741 COSTS_N_INSNS (4), /* DI */
1742 COSTS_N_INSNS (2)}, /* other */
1743 0, /* cost of multiply per each bit set */
1744 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1745 COSTS_N_INSNS (26), /* HI */
1746 COSTS_N_INSNS (42), /* SI */
1747 COSTS_N_INSNS (74), /* DI */
1748 COSTS_N_INSNS (74)}, /* other */
1749 COSTS_N_INSNS (1), /* cost of movsx */
1750 COSTS_N_INSNS (1), /* cost of movzx */
1751 8, /* "large" insn */
1752 17, /* MOVE_RATIO */
1753 4, /* cost for loading QImode using movzbl */
1754 {4, 4, 4}, /* cost of loading integer registers
1755 in QImode, HImode and SImode.
1756 Relative to reg-reg move (2). */
1757 {4, 4, 4}, /* cost of storing integer registers */
1758 4, /* cost of reg,reg fld/fst */
1759 {12, 12, 12}, /* cost of loading fp registers
1760 in SFmode, DFmode and XFmode */
1761 {6, 6, 8}, /* cost of storing fp registers
1762 in SFmode, DFmode and XFmode */
1763 2, /* cost of moving MMX register */
1764 {8, 8}, /* cost of loading MMX registers
1765 in SImode and DImode */
1766 {8, 8}, /* cost of storing MMX registers
1767 in SImode and DImode */
1768 2, /* cost of moving SSE register */
1769 {8, 8, 8}, /* cost of loading SSE registers
1770 in SImode, DImode and TImode */
1771 {8, 8, 8}, /* cost of storing SSE registers
1772 in SImode, DImode and TImode */
1773 5, /* MMX or SSE register to integer */
1774 32, /* size of l1 cache. */
1775 512, /* size of l2 cache. */
1776 64, /* size of prefetch block */
1777 6, /* number of parallel prefetches */
1778 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1779 value is increased to perhaps more appropriate value of 5. */
1780 3, /* Branch cost */
1781 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1782 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1783 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1784 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1785 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1786 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1787 {DUMMY_STRINGOP_ALGS
,
1788 {libcall
, {{32, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1789 {DUMMY_STRINGOP_ALGS
,
1790 {libcall
, {{32, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1791 1, /* scalar_stmt_cost. */
1792 1, /* scalar load_cost. */
1793 1, /* scalar_store_cost. */
1794 1, /* vec_stmt_cost. */
1795 1, /* vec_to_scalar_cost. */
1796 1, /* scalar_to_vec_cost. */
1797 1, /* vec_align_load_cost. */
1798 2, /* vec_unalign_load_cost. */
1799 1, /* vec_store_cost. */
1800 3, /* cond_taken_branch_cost. */
1801 1, /* cond_not_taken_branch_cost. */
1804 /* Generic32 should produce code tuned for PPro, Pentium4, Nocona,
1807 struct processor_costs generic32_cost
= {
1808 COSTS_N_INSNS (1), /* cost of an add instruction */
1809 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1810 COSTS_N_INSNS (1), /* variable shift costs */
1811 COSTS_N_INSNS (1), /* constant shift costs */
1812 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1813 COSTS_N_INSNS (4), /* HI */
1814 COSTS_N_INSNS (3), /* SI */
1815 COSTS_N_INSNS (4), /* DI */
1816 COSTS_N_INSNS (2)}, /* other */
1817 0, /* cost of multiply per each bit set */
1818 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1819 COSTS_N_INSNS (26), /* HI */
1820 COSTS_N_INSNS (42), /* SI */
1821 COSTS_N_INSNS (74), /* DI */
1822 COSTS_N_INSNS (74)}, /* other */
1823 COSTS_N_INSNS (1), /* cost of movsx */
1824 COSTS_N_INSNS (1), /* cost of movzx */
1825 8, /* "large" insn */
1826 17, /* MOVE_RATIO */
1827 4, /* cost for loading QImode using movzbl */
1828 {4, 4, 4}, /* cost of loading integer registers
1829 in QImode, HImode and SImode.
1830 Relative to reg-reg move (2). */
1831 {4, 4, 4}, /* cost of storing integer registers */
1832 4, /* cost of reg,reg fld/fst */
1833 {12, 12, 12}, /* cost of loading fp registers
1834 in SFmode, DFmode and XFmode */
1835 {6, 6, 8}, /* cost of storing fp registers
1836 in SFmode, DFmode and XFmode */
1837 2, /* cost of moving MMX register */
1838 {8, 8}, /* cost of loading MMX registers
1839 in SImode and DImode */
1840 {8, 8}, /* cost of storing MMX registers
1841 in SImode and DImode */
1842 2, /* cost of moving SSE register */
1843 {8, 8, 8}, /* cost of loading SSE registers
1844 in SImode, DImode and TImode */
1845 {8, 8, 8}, /* cost of storing SSE registers
1846 in SImode, DImode and TImode */
1847 5, /* MMX or SSE register to integer */
1848 32, /* size of l1 cache. */
1849 256, /* size of l2 cache. */
1850 64, /* size of prefetch block */
1851 6, /* number of parallel prefetches */
1852 3, /* Branch cost */
1853 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1854 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1855 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1856 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1857 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1858 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1859 {{libcall
, {{32, loop
}, {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
1860 DUMMY_STRINGOP_ALGS
},
1861 {{libcall
, {{32, loop
}, {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
1862 DUMMY_STRINGOP_ALGS
},
1863 1, /* scalar_stmt_cost. */
1864 1, /* scalar load_cost. */
1865 1, /* scalar_store_cost. */
1866 1, /* vec_stmt_cost. */
1867 1, /* vec_to_scalar_cost. */
1868 1, /* scalar_to_vec_cost. */
1869 1, /* vec_align_load_cost. */
1870 2, /* vec_unalign_load_cost. */
1871 1, /* vec_store_cost. */
1872 3, /* cond_taken_branch_cost. */
1873 1, /* cond_not_taken_branch_cost. */
1876 const struct processor_costs
*ix86_cost
= &pentium_cost
;
1878 /* Processor feature/optimization bitmasks. */
1879 #define m_386 (1<<PROCESSOR_I386)
1880 #define m_486 (1<<PROCESSOR_I486)
1881 #define m_PENT (1<<PROCESSOR_PENTIUM)
1882 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1883 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1884 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1885 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
1886 #define m_CORE2_32 (1<<PROCESSOR_CORE2_32)
1887 #define m_CORE2_64 (1<<PROCESSOR_CORE2_64)
1888 #define m_COREI7_32 (1<<PROCESSOR_COREI7_32)
1889 #define m_COREI7_64 (1<<PROCESSOR_COREI7_64)
1890 #define m_COREI7 (m_COREI7_32 | m_COREI7_64)
1891 #define m_CORE2I7_32 (m_CORE2_32 | m_COREI7_32)
1892 #define m_CORE2I7_64 (m_CORE2_64 | m_COREI7_64)
1893 #define m_CORE2I7 (m_CORE2I7_32 | m_CORE2I7_64)
1894 #define m_ATOM (1<<PROCESSOR_ATOM)
1896 #define m_GEODE (1<<PROCESSOR_GEODE)
1897 #define m_K6 (1<<PROCESSOR_K6)
1898 #define m_K6_GEODE (m_K6 | m_GEODE)
1899 #define m_K8 (1<<PROCESSOR_K8)
1900 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1901 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1902 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1903 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
1904 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
1905 #define m_BDVER (m_BDVER1 | m_BDVER2)
1906 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
1907 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1)
1909 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1910 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1912 /* Generic instruction choice should be common subset of supported CPUs
1913 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1914 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1916 /* Feature tests against the various tunings. */
1917 unsigned char ix86_tune_features
[X86_TUNE_LAST
];
1919 /* Feature tests against the various tunings used to create ix86_tune_features
1920 based on the processor mask. */
1921 static unsigned int initial_ix86_tune_features
[X86_TUNE_LAST
] = {
1922 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1923 negatively, so enabling for Generic64 seems like good code size
1924 tradeoff. We can't enable it for 32bit generic because it does not
1925 work well with PPro base chips. */
1926 m_386
| m_CORE2I7_64
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC64
,
1928 /* X86_TUNE_PUSH_MEMORY */
1929 m_386
| m_P4_NOCONA
| m_CORE2I7
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
1931 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1934 /* X86_TUNE_UNROLL_STRLEN */
1935 m_486
| m_PENT
| m_PPRO
| m_ATOM
| m_CORE2I7
| m_K6
| m_AMD_MULTIPLE
| m_GENERIC
,
1937 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1938 on simulation result. But after P4 was made, no performance benefit
1939 was observed with branch hints. It also increases the code size.
1940 As a result, icc never generates branch hints. */
1943 /* X86_TUNE_DOUBLE_WITH_ADD */
1946 /* X86_TUNE_USE_SAHF */
1947 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_K6_GEODE
| m_K8
| m_AMDFAM10
| m_BDVER
| m_BTVER1
| m_GENERIC
,
1949 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1950 partial dependencies. */
1951 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
1953 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1954 register stalls on Generic32 compilation setting as well. However
1955 in current implementation the partial register stalls are not eliminated
1956 very well - they can be introduced via subregs synthesized by combine
1957 and can happen in caller/callee saving sequences. Because this option
1958 pays back little on PPro based chips and is in conflict with partial reg
1959 dependencies used by Athlon/P4 based chips, it is better to leave it off
1960 for generic32 for now. */
1963 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1964 m_CORE2I7
| m_GENERIC
,
1966 /* X86_TUNE_USE_HIMODE_FIOP */
1967 m_386
| m_486
| m_K6_GEODE
,
1969 /* X86_TUNE_USE_SIMODE_FIOP */
1970 ~(m_PENT
| m_PPRO
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
),
1972 /* X86_TUNE_USE_MOV0 */
1975 /* X86_TUNE_USE_CLTD */
1976 ~(m_PENT
| m_CORE2I7
| m_ATOM
| m_K6
| m_GENERIC
),
1978 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1981 /* X86_TUNE_SPLIT_LONG_MOVES */
1984 /* X86_TUNE_READ_MODIFY_WRITE */
1987 /* X86_TUNE_READ_MODIFY */
1990 /* X86_TUNE_PROMOTE_QIMODE */
1991 m_386
| m_486
| m_PENT
| m_CORE2I7
| m_ATOM
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
1993 /* X86_TUNE_FAST_PREFIX */
1994 ~(m_386
| m_486
| m_PENT
),
1996 /* X86_TUNE_SINGLE_STRINGOP */
1997 m_386
| m_P4_NOCONA
,
1999 /* X86_TUNE_QIMODE_MATH */
2002 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
2003 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
2004 might be considered for Generic32 if our scheme for avoiding partial
2005 stalls was more effective. */
2008 /* X86_TUNE_PROMOTE_QI_REGS */
2011 /* X86_TUNE_PROMOTE_HI_REGS */
2014 /* X86_TUNE_SINGLE_POP: Enable if single pop insn is preferred
2015 over esp addition. */
2016 m_386
| m_486
| m_PENT
| m_PPRO
,
2018 /* X86_TUNE_DOUBLE_POP: Enable if double pop insn is preferred
2019 over esp addition. */
2022 /* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred
2023 over esp subtraction. */
2024 m_386
| m_486
| m_PENT
| m_K6_GEODE
,
2026 /* X86_TUNE_DOUBLE_PUSH. Enable if double push insn is preferred
2027 over esp subtraction. */
2028 m_PENT
| m_K6_GEODE
,
2030 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
2031 for DFmode copies */
2032 ~(m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_GEODE
| m_AMD_MULTIPLE
| m_ATOM
| m_GENERIC
),
2034 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
2035 m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
,
2037 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
2038 conflict here in between PPro/Pentium4 based chips that thread 128bit
2039 SSE registers as single units versus K8 based chips that divide SSE
2040 registers to two 64bit halves. This knob promotes all store destinations
2041 to be 128bit to allow register renaming on 128bit SSE units, but usually
2042 results in one extra microop on 64bit SSE units. Experimental results
2043 shows that disabling this option on P4 brings over 20% SPECfp regression,
2044 while enabling it on K8 brings roughly 2.4% regression that can be partly
2045 masked by careful scheduling of moves. */
2046 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMDFAM10
| m_BDVER
| m_GENERIC
,
2048 /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
2049 m_COREI7
| m_AMDFAM10
| m_BDVER
| m_BTVER1
,
2051 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
2054 /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
2057 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
2058 are resolved on SSE register parts instead of whole registers, so we may
2059 maintain just lower part of scalar values in proper format leaving the
2060 upper part undefined. */
2063 /* X86_TUNE_SSE_TYPELESS_STORES */
2066 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
2067 m_PPRO
| m_P4_NOCONA
,
2069 /* X86_TUNE_MEMORY_MISMATCH_STALL */
2070 m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
,
2072 /* X86_TUNE_PROLOGUE_USING_MOVE */
2073 m_PPRO
| m_CORE2I7
| m_ATOM
| m_ATHLON_K8
| m_GENERIC
,
2075 /* X86_TUNE_EPILOGUE_USING_MOVE */
2076 m_PPRO
| m_CORE2I7
| m_ATOM
| m_ATHLON_K8
| m_GENERIC
,
2078 /* X86_TUNE_SHIFT1 */
2081 /* X86_TUNE_USE_FFREEP */
2084 /* X86_TUNE_INTER_UNIT_MOVES */
2085 ~(m_AMD_MULTIPLE
| m_GENERIC
),
2087 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
2088 ~(m_AMDFAM10
| m_BDVER
),
2090 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
2091 than 4 branch instructions in the 16 byte window. */
2092 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
,
2094 /* X86_TUNE_SCHEDULE */
2095 m_PENT
| m_PPRO
| m_CORE2I7
| m_ATOM
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
2097 /* X86_TUNE_USE_BT */
2098 m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
,
2100 /* X86_TUNE_USE_INCDEC */
2101 ~(m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_GENERIC
),
2103 /* X86_TUNE_PAD_RETURNS */
2104 m_CORE2I7
| m_AMD_MULTIPLE
| m_GENERIC
,
2106 /* X86_TUNE_PAD_SHORT_FUNCTION: Pad short funtion. */
2109 /* X86_TUNE_EXT_80387_CONSTANTS */
2110 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_K6_GEODE
| m_ATHLON_K8
| m_GENERIC
,
2112 /* X86_TUNE_SHORTEN_X87_SSE */
2115 /* X86_TUNE_AVOID_VECTOR_DECODE */
2116 m_CORE2I7_64
| m_K8
| m_GENERIC64
,
2118 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
2119 and SImode multiply, but 386 and 486 do HImode multiply faster. */
2122 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
2123 vector path on AMD machines. */
2124 m_CORE2I7_64
| m_K8
| m_AMDFAM10
| m_BDVER
| m_BTVER1
| m_GENERIC64
,
2126 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
2128 m_CORE2I7_64
| m_K8
| m_AMDFAM10
| m_BDVER
| m_BTVER1
| m_GENERIC64
,
2130 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
2134 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
2135 but one byte longer. */
2138 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
2139 operand that cannot be represented using a modRM byte. The XOR
2140 replacement is long decoded, so this split helps here as well. */
2143 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
2145 m_CORE2I7
| m_AMDFAM10
| m_GENERIC
,
2147 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
2148 from integer to FP. */
2151 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
2152 with a subsequent conditional jump instruction into a single
2153 compare-and-branch uop. */
2156 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
2157 will impact LEA instruction selection. */
2160 /* X86_TUNE_VECTORIZE_DOUBLE: Enable double precision vector
2164 /* X86_SOFTARE_PREFETCHING_BENEFICIAL: Enable software prefetching
2165 at -O3. For the moment, the prefetching seems badly tuned for Intel
2167 m_K6_GEODE
| m_AMD_MULTIPLE
,
2169 /* X86_TUNE_AVX128_OPTIMAL: Enable 128-bit AVX instruction generation for
2170 the auto-vectorizer. */
2173 /* X86_TUNE_REASSOC_INT_TO_PARALLEL: Try to produce parallel computations
2174 during reassociation of integer computation. */
2177 /* X86_TUNE_REASSOC_FP_TO_PARALLEL: Try to produce parallel computations
2178 during reassociation of fp computation. */
2182 /* Feature tests against the various architecture variations. */
2183 unsigned char ix86_arch_features
[X86_ARCH_LAST
];
2185 /* Feature tests against the various architecture variations, used to create
2186 ix86_arch_features based on the processor mask. */
2187 static unsigned int initial_ix86_arch_features
[X86_ARCH_LAST
] = {
2188 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
2189 ~(m_386
| m_486
| m_PENT
| m_K6
),
2191 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2194 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2197 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2200 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2204 static const unsigned int x86_accumulate_outgoing_args
2205 = m_PPRO
| m_P4_NOCONA
| m_ATOM
| m_CORE2I7
| m_AMD_MULTIPLE
| m_GENERIC
;
2207 static const unsigned int x86_arch_always_fancy_math_387
2208 = m_PENT
| m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
;
2210 static const unsigned int x86_avx256_split_unaligned_load
2211 = m_COREI7
| m_GENERIC
;
2213 static const unsigned int x86_avx256_split_unaligned_store
2214 = m_COREI7
| m_BDVER
| m_GENERIC
;
2216 /* In case the average insn count for single function invocation is
2217 lower than this constant, emit fast (but longer) prologue and
2219 #define FAST_PROLOGUE_INSN_COUNT 20
2221 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2222 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
;
2223 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
2224 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
;
2226 /* Array of the smallest class containing reg number REGNO, indexed by
2227 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2229 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
2231 /* ax, dx, cx, bx */
2232 AREG
, DREG
, CREG
, BREG
,
2233 /* si, di, bp, sp */
2234 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
2236 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
2237 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
2240 /* flags, fpsr, fpcr, frame */
2241 NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
2243 SSE_FIRST_REG
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
2246 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
2249 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
2250 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
2251 /* SSE REX registers */
2252 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
2256 /* The "default" register map used in 32bit mode. */
2258 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
2260 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2261 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2262 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2263 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2264 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2265 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2266 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2269 /* The "default" register map used in 64bit mode. */
2271 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
2273 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2274 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2275 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2276 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2277 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2278 8,9,10,11,12,13,14,15, /* extended integer registers */
2279 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2282 /* Define the register numbers to be used in Dwarf debugging information.
2283 The SVR4 reference port C compiler uses the following register numbers
2284 in its Dwarf output code:
2285 0 for %eax (gcc regno = 0)
2286 1 for %ecx (gcc regno = 2)
2287 2 for %edx (gcc regno = 1)
2288 3 for %ebx (gcc regno = 3)
2289 4 for %esp (gcc regno = 7)
2290 5 for %ebp (gcc regno = 6)
2291 6 for %esi (gcc regno = 4)
2292 7 for %edi (gcc regno = 5)
2293 The following three DWARF register numbers are never generated by
2294 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2295 believes these numbers have these meanings.
2296 8 for %eip (no gcc equivalent)
2297 9 for %eflags (gcc regno = 17)
2298 10 for %trapno (no gcc equivalent)
2299 It is not at all clear how we should number the FP stack registers
2300 for the x86 architecture. If the version of SDB on x86/svr4 were
2301 a bit less brain dead with respect to floating-point then we would
2302 have a precedent to follow with respect to DWARF register numbers
2303 for x86 FP registers, but the SDB on x86/svr4 is so completely
2304 broken with respect to FP registers that it is hardly worth thinking
2305 of it as something to strive for compatibility with.
2306 The version of x86/svr4 SDB I have at the moment does (partially)
2307 seem to believe that DWARF register number 11 is associated with
2308 the x86 register %st(0), but that's about all. Higher DWARF
2309 register numbers don't seem to be associated with anything in
2310 particular, and even for DWARF regno 11, SDB only seems to under-
2311 stand that it should say that a variable lives in %st(0) (when
2312 asked via an `=' command) if we said it was in DWARF regno 11,
2313 but SDB still prints garbage when asked for the value of the
2314 variable in question (via a `/' command).
2315 (Also note that the labels SDB prints for various FP stack regs
2316 when doing an `x' command are all wrong.)
2317 Note that these problems generally don't affect the native SVR4
2318 C compiler because it doesn't allow the use of -O with -g and
2319 because when it is *not* optimizing, it allocates a memory
2320 location for each floating-point variable, and the memory
2321 location is what gets described in the DWARF AT_location
2322 attribute for the variable in question.
2323 Regardless of the severe mental illness of the x86/svr4 SDB, we
2324 do something sensible here and we use the following DWARF
2325 register numbers. Note that these are all stack-top-relative
2327 11 for %st(0) (gcc regno = 8)
2328 12 for %st(1) (gcc regno = 9)
2329 13 for %st(2) (gcc regno = 10)
2330 14 for %st(3) (gcc regno = 11)
2331 15 for %st(4) (gcc regno = 12)
2332 16 for %st(5) (gcc regno = 13)
2333 17 for %st(6) (gcc regno = 14)
2334 18 for %st(7) (gcc regno = 15)
2336 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
2338 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2339 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2340 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2341 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2342 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2343 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2344 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2347 /* Define parameter passing and return registers. */
2349 static int const x86_64_int_parameter_registers
[6] =
2351 DI_REG
, SI_REG
, DX_REG
, CX_REG
, R8_REG
, R9_REG
2354 static int const x86_64_ms_abi_int_parameter_registers
[4] =
2356 CX_REG
, DX_REG
, R8_REG
, R9_REG
2359 static int const x86_64_int_return_registers
[4] =
2361 AX_REG
, DX_REG
, DI_REG
, SI_REG
2364 /* Define the structure for the machine field in struct function. */
2366 struct GTY(()) stack_local_entry
{
2367 unsigned short mode
;
2370 struct stack_local_entry
*next
;
2373 /* Structure describing stack frame layout.
2374 Stack grows downward:
2380 saved static chain if ix86_static_chain_on_stack
2382 saved frame pointer if frame_pointer_needed
2383 <- HARD_FRAME_POINTER
2389 <- sse_regs_save_offset
2392 [va_arg registers] |
2396 [padding2] | = to_allocate
2405 int outgoing_arguments_size
;
2406 HOST_WIDE_INT frame
;
2408 /* The offsets relative to ARG_POINTER. */
2409 HOST_WIDE_INT frame_pointer_offset
;
2410 HOST_WIDE_INT hard_frame_pointer_offset
;
2411 HOST_WIDE_INT stack_pointer_offset
;
2412 HOST_WIDE_INT hfp_save_offset
;
2413 HOST_WIDE_INT reg_save_offset
;
2414 HOST_WIDE_INT sse_reg_save_offset
;
2416 /* When save_regs_using_mov is set, emit prologue using
2417 move instead of push instructions. */
2418 bool save_regs_using_mov
;
2421 /* Which cpu are we scheduling for. */
2422 enum attr_cpu ix86_schedule
;
2424 /* Which cpu are we optimizing for. */
2425 enum processor_type ix86_tune
;
2427 /* Which instruction set architecture to use. */
2428 enum processor_type ix86_arch
;
2430 /* true if sse prefetch instruction is not NOOP. */
2431 int x86_prefetch_sse
;
2433 /* -mstackrealign option */
2434 static const char ix86_force_align_arg_pointer_string
[]
2435 = "force_align_arg_pointer";
2437 static rtx (*ix86_gen_leave
) (void);
2438 static rtx (*ix86_gen_add3
) (rtx
, rtx
, rtx
);
2439 static rtx (*ix86_gen_sub3
) (rtx
, rtx
, rtx
);
2440 static rtx (*ix86_gen_sub3_carry
) (rtx
, rtx
, rtx
, rtx
, rtx
);
2441 static rtx (*ix86_gen_one_cmpl2
) (rtx
, rtx
);
2442 static rtx (*ix86_gen_monitor
) (rtx
, rtx
, rtx
);
2443 static rtx (*ix86_gen_andsp
) (rtx
, rtx
, rtx
);
2444 static rtx (*ix86_gen_allocate_stack_worker
) (rtx
, rtx
);
2445 static rtx (*ix86_gen_adjust_stack_and_probe
) (rtx
, rtx
, rtx
);
2446 static rtx (*ix86_gen_probe_stack_range
) (rtx
, rtx
, rtx
);
2448 /* Preferred alignment for stack boundary in bits. */
2449 unsigned int ix86_preferred_stack_boundary
;
2451 /* Alignment for incoming stack boundary in bits specified at
2453 static unsigned int ix86_user_incoming_stack_boundary
;
2455 /* Default alignment for incoming stack boundary in bits. */
2456 static unsigned int ix86_default_incoming_stack_boundary
;
2458 /* Alignment for incoming stack boundary in bits. */
2459 unsigned int ix86_incoming_stack_boundary
;
2461 /* Calling abi specific va_list type nodes. */
2462 static GTY(()) tree sysv_va_list_type_node
;
2463 static GTY(()) tree ms_va_list_type_node
;
2465 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2466 char internal_label_prefix
[16];
2467 int internal_label_prefix_len
;
2469 /* Fence to use after loop using movnt. */
2472 /* Register class used for passing given 64bit part of the argument.
2473 These represent classes as documented by the PS ABI, with the exception
2474 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2475 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2477 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2478 whenever possible (upper half does contain padding). */
2479 enum x86_64_reg_class
2482 X86_64_INTEGER_CLASS
,
2483 X86_64_INTEGERSI_CLASS
,
2490 X86_64_COMPLEX_X87_CLASS
,
2494 #define MAX_CLASSES 4
2496 /* Table of constants used by fldpi, fldln2, etc.... */
2497 static REAL_VALUE_TYPE ext_80387_constants_table
[5];
2498 static bool ext_80387_constants_init
= 0;
2501 static struct machine_function
* ix86_init_machine_status (void);
2502 static rtx
ix86_function_value (const_tree
, const_tree
, bool);
2503 static bool ix86_function_value_regno_p (const unsigned int);
2504 static unsigned int ix86_function_arg_boundary (enum machine_mode
,
2506 static rtx
ix86_static_chain (const_tree
, bool);
2507 static int ix86_function_regparm (const_tree
, const_tree
);
2508 static void ix86_compute_frame_layout (struct ix86_frame
*);
2509 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode
,
2511 static void ix86_add_new_builtins (HOST_WIDE_INT
);
2512 static tree
ix86_canonical_va_list_type (tree
);
2513 static void predict_jump (int);
2514 static unsigned int split_stack_prologue_scratch_regno (void);
2515 static bool i386_asm_output_addr_const_extra (FILE *, rtx
);
2517 enum ix86_function_specific_strings
2519 IX86_FUNCTION_SPECIFIC_ARCH
,
2520 IX86_FUNCTION_SPECIFIC_TUNE
,
2521 IX86_FUNCTION_SPECIFIC_MAX
2524 static char *ix86_target_string (HOST_WIDE_INT
, int, const char *,
2525 const char *, enum fpmath_unit
, bool);
2526 static void ix86_debug_options (void) ATTRIBUTE_UNUSED
;
2527 static void ix86_function_specific_save (struct cl_target_option
*);
2528 static void ix86_function_specific_restore (struct cl_target_option
*);
2529 static void ix86_function_specific_print (FILE *, int,
2530 struct cl_target_option
*);
2531 static bool ix86_valid_target_attribute_p (tree
, tree
, tree
, int);
2532 static bool ix86_valid_target_attribute_inner_p (tree
, char *[],
2533 struct gcc_options
*);
2534 static bool ix86_can_inline_p (tree
, tree
);
2535 static void ix86_set_current_function (tree
);
2536 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2538 static enum calling_abi
ix86_function_abi (const_tree
);
2541 #ifndef SUBTARGET32_DEFAULT_CPU
2542 #define SUBTARGET32_DEFAULT_CPU "i386"
2545 /* The svr4 ABI for the i386 says that records and unions are returned
2547 #ifndef DEFAULT_PCC_STRUCT_RETURN
2548 #define DEFAULT_PCC_STRUCT_RETURN 1
2551 /* Whether -mtune= or -march= were specified */
2552 static int ix86_tune_defaulted
;
2553 static int ix86_arch_specified
;
2555 /* Vectorization library interface and handlers. */
2556 static tree (*ix86_veclib_handler
) (enum built_in_function
, tree
, tree
);
2558 static tree
ix86_veclibabi_svml (enum built_in_function
, tree
, tree
);
2559 static tree
ix86_veclibabi_acml (enum built_in_function
, tree
, tree
);
2561 /* Processor target table, indexed by processor number */
2564 const struct processor_costs
*cost
; /* Processor costs */
2565 const int align_loop
; /* Default alignments. */
2566 const int align_loop_max_skip
;
2567 const int align_jump
;
2568 const int align_jump_max_skip
;
2569 const int align_func
;
2572 static const struct ptt processor_target_table
[PROCESSOR_max
] =
2574 {&i386_cost
, 4, 3, 4, 3, 4},
2575 {&i486_cost
, 16, 15, 16, 15, 16},
2576 {&pentium_cost
, 16, 7, 16, 7, 16},
2577 {&pentiumpro_cost
, 16, 15, 16, 10, 16},
2578 {&geode_cost
, 0, 0, 0, 0, 0},
2579 {&k6_cost
, 32, 7, 32, 7, 32},
2580 {&athlon_cost
, 16, 7, 16, 7, 16},
2581 {&pentium4_cost
, 0, 0, 0, 0, 0},
2582 {&k8_cost
, 16, 7, 16, 7, 16},
2583 {&nocona_cost
, 0, 0, 0, 0, 0},
2584 /* Core 2 32-bit. */
2585 {&generic32_cost
, 16, 10, 16, 10, 16},
2586 /* Core 2 64-bit. */
2587 {&generic64_cost
, 16, 10, 16, 10, 16},
2588 /* Core i7 32-bit. */
2589 {&generic32_cost
, 16, 10, 16, 10, 16},
2590 /* Core i7 64-bit. */
2591 {&generic64_cost
, 16, 10, 16, 10, 16},
2592 {&generic32_cost
, 16, 7, 16, 7, 16},
2593 {&generic64_cost
, 16, 10, 16, 10, 16},
2594 {&amdfam10_cost
, 32, 24, 32, 7, 32},
2595 {&bdver1_cost
, 32, 24, 32, 7, 32},
2596 {&bdver2_cost
, 32, 24, 32, 7, 32},
2597 {&btver1_cost
, 32, 24, 32, 7, 32},
2598 {&atom_cost
, 16, 15, 16, 7, 16}
2601 static const char *const cpu_names
[TARGET_CPU_DEFAULT_max
] =
2631 /* Return true if a red-zone is in use. */
2634 ix86_using_red_zone (void)
2636 return TARGET_RED_ZONE
&& !TARGET_64BIT_MS_ABI
;
2639 /* Return a string that documents the current -m options. The caller is
2640 responsible for freeing the string. */
2643 ix86_target_string (HOST_WIDE_INT isa
, int flags
, const char *arch
,
2644 const char *tune
, enum fpmath_unit fpmath
,
2647 struct ix86_target_opts
2649 const char *option
; /* option string */
2650 HOST_WIDE_INT mask
; /* isa mask options */
2653 /* This table is ordered so that options like -msse4.2 that imply
2654 preceding options while match those first. */
2655 static struct ix86_target_opts isa_opts
[] =
2657 { "-m64", OPTION_MASK_ISA_64BIT
},
2658 { "-mfma4", OPTION_MASK_ISA_FMA4
},
2659 { "-mfma", OPTION_MASK_ISA_FMA
},
2660 { "-mxop", OPTION_MASK_ISA_XOP
},
2661 { "-mlwp", OPTION_MASK_ISA_LWP
},
2662 { "-msse4a", OPTION_MASK_ISA_SSE4A
},
2663 { "-msse4.2", OPTION_MASK_ISA_SSE4_2
},
2664 { "-msse4.1", OPTION_MASK_ISA_SSE4_1
},
2665 { "-mssse3", OPTION_MASK_ISA_SSSE3
},
2666 { "-msse3", OPTION_MASK_ISA_SSE3
},
2667 { "-msse2", OPTION_MASK_ISA_SSE2
},
2668 { "-msse", OPTION_MASK_ISA_SSE
},
2669 { "-m3dnow", OPTION_MASK_ISA_3DNOW
},
2670 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A
},
2671 { "-mmmx", OPTION_MASK_ISA_MMX
},
2672 { "-mabm", OPTION_MASK_ISA_ABM
},
2673 { "-mbmi", OPTION_MASK_ISA_BMI
},
2674 { "-mbmi2", OPTION_MASK_ISA_BMI2
},
2675 { "-mlzcnt", OPTION_MASK_ISA_LZCNT
},
2676 { "-mtbm", OPTION_MASK_ISA_TBM
},
2677 { "-mpopcnt", OPTION_MASK_ISA_POPCNT
},
2678 { "-mmovbe", OPTION_MASK_ISA_MOVBE
},
2679 { "-mcrc32", OPTION_MASK_ISA_CRC32
},
2680 { "-maes", OPTION_MASK_ISA_AES
},
2681 { "-mpclmul", OPTION_MASK_ISA_PCLMUL
},
2682 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE
},
2683 { "-mrdrnd", OPTION_MASK_ISA_RDRND
},
2684 { "-mf16c", OPTION_MASK_ISA_F16C
},
2688 static struct ix86_target_opts flag_opts
[] =
2690 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE
},
2691 { "-m80387", MASK_80387
},
2692 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS
},
2693 { "-malign-double", MASK_ALIGN_DOUBLE
},
2694 { "-mcld", MASK_CLD
},
2695 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS
},
2696 { "-mieee-fp", MASK_IEEE_FP
},
2697 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS
},
2698 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY
},
2699 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT
},
2700 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS
},
2701 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387
},
2702 { "-mno-push-args", MASK_NO_PUSH_ARGS
},
2703 { "-mno-red-zone", MASK_NO_RED_ZONE
},
2704 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER
},
2705 { "-mrecip", MASK_RECIP
},
2706 { "-mrtd", MASK_RTD
},
2707 { "-msseregparm", MASK_SSEREGPARM
},
2708 { "-mstack-arg-probe", MASK_STACK_PROBE
},
2709 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS
},
2710 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS
},
2711 { "-m8bit-idiv", MASK_USE_8BIT_IDIV
},
2712 { "-mvzeroupper", MASK_VZEROUPPER
},
2713 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD
},
2714 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE
},
2715 { "-mprefer-avx128", MASK_PREFER_AVX128
},
2718 const char *opts
[ARRAY_SIZE (isa_opts
) + ARRAY_SIZE (flag_opts
) + 6][2];
2721 char target_other
[40];
2730 memset (opts
, '\0', sizeof (opts
));
2732 /* Add -march= option. */
2735 opts
[num
][0] = "-march=";
2736 opts
[num
++][1] = arch
;
2739 /* Add -mtune= option. */
2742 opts
[num
][0] = "-mtune=";
2743 opts
[num
++][1] = tune
;
2746 /* Pick out the options in isa options. */
2747 for (i
= 0; i
< ARRAY_SIZE (isa_opts
); i
++)
2749 if ((isa
& isa_opts
[i
].mask
) != 0)
2751 opts
[num
++][0] = isa_opts
[i
].option
;
2752 isa
&= ~ isa_opts
[i
].mask
;
2756 if (isa
&& add_nl_p
)
2758 opts
[num
++][0] = isa_other
;
2759 sprintf (isa_other
, "(other isa: %#" HOST_WIDE_INT_PRINT
"x)",
2763 /* Add flag options. */
2764 for (i
= 0; i
< ARRAY_SIZE (flag_opts
); i
++)
2766 if ((flags
& flag_opts
[i
].mask
) != 0)
2768 opts
[num
++][0] = flag_opts
[i
].option
;
2769 flags
&= ~ flag_opts
[i
].mask
;
2773 if (flags
&& add_nl_p
)
2775 opts
[num
++][0] = target_other
;
2776 sprintf (target_other
, "(other flags: %#x)", flags
);
2779 /* Add -fpmath= option. */
2782 opts
[num
][0] = "-mfpmath=";
2783 switch ((int) fpmath
)
2786 opts
[num
++][1] = "387";
2790 opts
[num
++][1] = "sse";
2793 case FPMATH_387
| FPMATH_SSE
:
2794 opts
[num
++][1] = "sse+387";
2806 gcc_assert (num
< ARRAY_SIZE (opts
));
2808 /* Size the string. */
2810 sep_len
= (add_nl_p
) ? 3 : 1;
2811 for (i
= 0; i
< num
; i
++)
2814 for (j
= 0; j
< 2; j
++)
2816 len
+= strlen (opts
[i
][j
]);
2819 /* Build the string. */
2820 ret
= ptr
= (char *) xmalloc (len
);
2823 for (i
= 0; i
< num
; i
++)
2827 for (j
= 0; j
< 2; j
++)
2828 len2
[j
] = (opts
[i
][j
]) ? strlen (opts
[i
][j
]) : 0;
2835 if (add_nl_p
&& line_len
+ len2
[0] + len2
[1] > 70)
2843 for (j
= 0; j
< 2; j
++)
2846 memcpy (ptr
, opts
[i
][j
], len2
[j
]);
2848 line_len
+= len2
[j
];
2853 gcc_assert (ret
+ len
>= ptr
);
2858 /* Return true, if profiling code should be emitted before
2859 prologue. Otherwise it returns false.
2860 Note: For x86 with "hotfix" it is sorried. */
2862 ix86_profile_before_prologue (void)
2864 return flag_fentry
!= 0;
2867 /* Function that is callable from the debugger to print the current
2870 ix86_debug_options (void)
2872 char *opts
= ix86_target_string (ix86_isa_flags
, target_flags
,
2873 ix86_arch_string
, ix86_tune_string
,
2878 fprintf (stderr
, "%s\n\n", opts
);
2882 fputs ("<no options>\n\n", stderr
);
2887 /* Override various settings based on options. If MAIN_ARGS_P, the
2888 options are from the command line, otherwise they are from
2892 ix86_option_override_internal (bool main_args_p
)
2895 unsigned int ix86_arch_mask
, ix86_tune_mask
;
2896 const bool ix86_tune_specified
= (ix86_tune_string
!= NULL
);
2901 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
2902 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
2903 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
2904 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
2905 #define PTA_AES (HOST_WIDE_INT_1 << 4)
2906 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
2907 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
2908 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
2909 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
2910 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
2911 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
2912 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
2913 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
2914 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
2915 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
2916 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
2917 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
2918 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
2919 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
2920 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
2921 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
2922 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
2923 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
2924 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
2925 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
2926 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
2927 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
2928 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
2929 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
2930 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
2931 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
2932 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
2933 /* if this reaches 64, need to widen struct pta flags below */
2937 const char *const name
; /* processor name or nickname. */
2938 const enum processor_type processor
;
2939 const enum attr_cpu schedule
;
2940 const unsigned HOST_WIDE_INT flags
;
2942 const processor_alias_table
[] =
2944 {"i386", PROCESSOR_I386
, CPU_NONE
, 0},
2945 {"i486", PROCESSOR_I486
, CPU_NONE
, 0},
2946 {"i586", PROCESSOR_PENTIUM
, CPU_PENTIUM
, 0},
2947 {"pentium", PROCESSOR_PENTIUM
, CPU_PENTIUM
, 0},
2948 {"pentium-mmx", PROCESSOR_PENTIUM
, CPU_PENTIUM
, PTA_MMX
},
2949 {"winchip-c6", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
},
2950 {"winchip2", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
| PTA_3DNOW
},
2951 {"c3", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
| PTA_3DNOW
},
2952 {"c3-2", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, PTA_MMX
| PTA_SSE
},
2953 {"i686", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, 0},
2954 {"pentiumpro", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, 0},
2955 {"pentium2", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, PTA_MMX
},
2956 {"pentium3", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
2958 {"pentium3m", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
2960 {"pentium-m", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
2961 PTA_MMX
| PTA_SSE
| PTA_SSE2
},
2962 {"pentium4", PROCESSOR_PENTIUM4
, CPU_NONE
,
2963 PTA_MMX
|PTA_SSE
| PTA_SSE2
},
2964 {"pentium4m", PROCESSOR_PENTIUM4
, CPU_NONE
,
2965 PTA_MMX
| PTA_SSE
| PTA_SSE2
},
2966 {"prescott", PROCESSOR_NOCONA
, CPU_NONE
,
2967 PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
},
2968 {"nocona", PROCESSOR_NOCONA
, CPU_NONE
,
2969 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2970 | PTA_CX16
| PTA_NO_SAHF
},
2971 {"core2", PROCESSOR_CORE2_64
, CPU_CORE2
,
2972 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2973 | PTA_SSSE3
| PTA_CX16
},
2974 {"corei7", PROCESSOR_COREI7_64
, CPU_COREI7
,
2975 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2976 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_CX16
},
2977 {"corei7-avx", PROCESSOR_COREI7_64
, CPU_COREI7
,
2978 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2979 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AVX
2980 | PTA_CX16
| PTA_POPCNT
| PTA_AES
| PTA_PCLMUL
},
2981 {"core-avx-i", PROCESSOR_COREI7_64
, CPU_COREI7
,
2982 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2983 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AVX
2984 | PTA_CX16
| PTA_POPCNT
| PTA_AES
| PTA_PCLMUL
| PTA_FSGSBASE
2985 | PTA_RDRND
| PTA_F16C
},
2986 {"core-avx2", PROCESSOR_COREI7_64
, CPU_COREI7
,
2987 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2988 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AVX
| PTA_AVX2
2989 | PTA_CX16
| PTA_POPCNT
| PTA_AES
| PTA_PCLMUL
| PTA_FSGSBASE
2990 | PTA_RDRND
| PTA_F16C
| PTA_BMI
| PTA_BMI2
| PTA_LZCNT
2991 | PTA_FMA
| PTA_MOVBE
},
2992 {"atom", PROCESSOR_ATOM
, CPU_ATOM
,
2993 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2994 | PTA_SSSE3
| PTA_CX16
| PTA_MOVBE
},
2995 {"geode", PROCESSOR_GEODE
, CPU_GEODE
,
2996 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
|PTA_PREFETCH_SSE
},
2997 {"k6", PROCESSOR_K6
, CPU_K6
, PTA_MMX
},
2998 {"k6-2", PROCESSOR_K6
, CPU_K6
, PTA_MMX
| PTA_3DNOW
},
2999 {"k6-3", PROCESSOR_K6
, CPU_K6
, PTA_MMX
| PTA_3DNOW
},
3000 {"athlon", PROCESSOR_ATHLON
, CPU_ATHLON
,
3001 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_PREFETCH_SSE
},
3002 {"athlon-tbird", PROCESSOR_ATHLON
, CPU_ATHLON
,
3003 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_PREFETCH_SSE
},
3004 {"athlon-4", PROCESSOR_ATHLON
, CPU_ATHLON
,
3005 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
},
3006 {"athlon-xp", PROCESSOR_ATHLON
, CPU_ATHLON
,
3007 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
},
3008 {"athlon-mp", PROCESSOR_ATHLON
, CPU_ATHLON
,
3009 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
},
3010 {"x86-64", PROCESSOR_K8
, CPU_K8
,
3011 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_NO_SAHF
},
3012 {"k8", PROCESSOR_K8
, CPU_K8
,
3013 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3014 | PTA_SSE2
| PTA_NO_SAHF
},
3015 {"k8-sse3", PROCESSOR_K8
, CPU_K8
,
3016 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3017 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
},
3018 {"opteron", PROCESSOR_K8
, CPU_K8
,
3019 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3020 | PTA_SSE2
| PTA_NO_SAHF
},
3021 {"opteron-sse3", PROCESSOR_K8
, CPU_K8
,
3022 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3023 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
},
3024 {"athlon64", PROCESSOR_K8
, CPU_K8
,
3025 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3026 | PTA_SSE2
| PTA_NO_SAHF
},
3027 {"athlon64-sse3", PROCESSOR_K8
, CPU_K8
,
3028 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3029 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
},
3030 {"athlon-fx", PROCESSOR_K8
, CPU_K8
,
3031 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3032 | PTA_SSE2
| PTA_NO_SAHF
},
3033 {"amdfam10", PROCESSOR_AMDFAM10
, CPU_AMDFAM10
,
3034 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3035 | PTA_SSE2
| PTA_SSE3
| PTA_SSE4A
| PTA_CX16
| PTA_ABM
},
3036 {"barcelona", PROCESSOR_AMDFAM10
, CPU_AMDFAM10
,
3037 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3038 | PTA_SSE2
| PTA_SSE3
| PTA_SSE4A
| PTA_CX16
| PTA_ABM
},
3039 {"bdver1", PROCESSOR_BDVER1
, CPU_BDVER1
,
3040 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3041 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
3042 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
| PTA_FMA4
3043 | PTA_XOP
| PTA_LWP
},
3044 {"bdver2", PROCESSOR_BDVER2
, CPU_BDVER2
,
3045 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3046 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
3047 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
3048 | PTA_XOP
| PTA_LWP
| PTA_BMI
| PTA_TBM
| PTA_F16C
3050 {"btver1", PROCESSOR_BTVER1
, CPU_GENERIC64
,
3051 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3052 | PTA_SSSE3
| PTA_SSE4A
|PTA_ABM
| PTA_CX16
},
3053 {"generic32", PROCESSOR_GENERIC32
, CPU_PENTIUMPRO
,
3054 0 /* flags are only used for -march switch. */ },
3055 {"generic64", PROCESSOR_GENERIC64
, CPU_GENERIC64
,
3056 PTA_64BIT
/* flags are only used for -march switch. */ },
3059 /* -mrecip options. */
3062 const char *string
; /* option name */
3063 unsigned int mask
; /* mask bits to set */
3065 const recip_options
[] =
3067 { "all", RECIP_MASK_ALL
},
3068 { "none", RECIP_MASK_NONE
},
3069 { "div", RECIP_MASK_DIV
},
3070 { "sqrt", RECIP_MASK_SQRT
},
3071 { "vec-div", RECIP_MASK_VEC_DIV
},
3072 { "vec-sqrt", RECIP_MASK_VEC_SQRT
},
3075 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
3077 /* Set up prefix/suffix so the error messages refer to either the command
3078 line argument, or the attribute(target). */
3087 prefix
= "option(\"";
3092 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3093 SUBTARGET_OVERRIDE_OPTIONS
;
3096 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3097 SUBSUBTARGET_OVERRIDE_OPTIONS
;
3101 ix86_isa_flags
|= OPTION_MASK_ISA_64BIT
;
3103 /* -fPIC is the default for x86_64. */
3104 if (TARGET_MACHO
&& TARGET_64BIT
)
3107 /* Need to check -mtune=generic first. */
3108 if (ix86_tune_string
)
3110 if (!strcmp (ix86_tune_string
, "generic")
3111 || !strcmp (ix86_tune_string
, "i686")
3112 /* As special support for cross compilers we read -mtune=native
3113 as -mtune=generic. With native compilers we won't see the
3114 -mtune=native, as it was changed by the driver. */
3115 || !strcmp (ix86_tune_string
, "native"))
3118 ix86_tune_string
= "generic64";
3120 ix86_tune_string
= "generic32";
3122 /* If this call is for setting the option attribute, allow the
3123 generic32/generic64 that was previously set. */
3124 else if (!main_args_p
3125 && (!strcmp (ix86_tune_string
, "generic32")
3126 || !strcmp (ix86_tune_string
, "generic64")))
3128 else if (!strncmp (ix86_tune_string
, "generic", 7))
3129 error ("bad value (%s) for %stune=%s %s",
3130 ix86_tune_string
, prefix
, suffix
, sw
);
3131 else if (!strcmp (ix86_tune_string
, "x86-64"))
3132 warning (OPT_Wdeprecated
, "%stune=x86-64%s is deprecated; use "
3133 "%stune=k8%s or %stune=generic%s instead as appropriate",
3134 prefix
, suffix
, prefix
, suffix
, prefix
, suffix
);
3138 if (ix86_arch_string
)
3139 ix86_tune_string
= ix86_arch_string
;
3140 if (!ix86_tune_string
)
3142 ix86_tune_string
= cpu_names
[TARGET_CPU_DEFAULT
];
3143 ix86_tune_defaulted
= 1;
3146 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
3147 need to use a sensible tune option. */
3148 if (!strcmp (ix86_tune_string
, "generic")
3149 || !strcmp (ix86_tune_string
, "x86-64")
3150 || !strcmp (ix86_tune_string
, "i686"))
3153 ix86_tune_string
= "generic64";
3155 ix86_tune_string
= "generic32";
3159 if (ix86_stringop_alg
== rep_prefix_8_byte
&& !TARGET_64BIT
)
3161 /* rep; movq isn't available in 32-bit code. */
3162 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3163 ix86_stringop_alg
= no_stringop
;
3166 if (!ix86_arch_string
)
3167 ix86_arch_string
= TARGET_64BIT
? "x86-64" : SUBTARGET32_DEFAULT_CPU
;
3169 ix86_arch_specified
= 1;
3171 if (!global_options_set
.x_ix86_abi
)
3172 ix86_abi
= DEFAULT_ABI
;
3174 if (global_options_set
.x_ix86_cmodel
)
3176 switch (ix86_cmodel
)
3181 ix86_cmodel
= CM_SMALL_PIC
;
3183 error ("code model %qs not supported in the %s bit mode",
3190 ix86_cmodel
= CM_MEDIUM_PIC
;
3192 error ("code model %qs not supported in the %s bit mode",
3194 else if (TARGET_X32
)
3195 error ("code model %qs not supported in x32 mode",
3202 ix86_cmodel
= CM_LARGE_PIC
;
3204 error ("code model %qs not supported in the %s bit mode",
3206 else if (TARGET_X32
)
3207 error ("code model %qs not supported in x32 mode",
3213 error ("code model %s does not support PIC mode", "32");
3215 error ("code model %qs not supported in the %s bit mode",
3222 error ("code model %s does not support PIC mode", "kernel");
3223 ix86_cmodel
= CM_32
;
3226 error ("code model %qs not supported in the %s bit mode",
3236 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3237 use of rip-relative addressing. This eliminates fixups that
3238 would otherwise be needed if this object is to be placed in a
3239 DLL, and is essentially just as efficient as direct addressing. */
3240 if (TARGET_64BIT
&& DEFAULT_ABI
== MS_ABI
)
3241 ix86_cmodel
= CM_SMALL_PIC
, flag_pic
= 1;
3242 else if (TARGET_64BIT
)
3243 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
3245 ix86_cmodel
= CM_32
;
3247 if (TARGET_MACHO
&& ix86_asm_dialect
== ASM_INTEL
)
3249 error ("-masm=intel not supported in this configuration");
3250 ix86_asm_dialect
= ASM_ATT
;
3252 if ((TARGET_64BIT
!= 0) != ((ix86_isa_flags
& OPTION_MASK_ISA_64BIT
) != 0))
3253 sorry ("%i-bit mode not compiled in",
3254 (ix86_isa_flags
& OPTION_MASK_ISA_64BIT
) ? 64 : 32);
3256 for (i
= 0; i
< pta_size
; i
++)
3257 if (! strcmp (ix86_arch_string
, processor_alias_table
[i
].name
))
3259 ix86_schedule
= processor_alias_table
[i
].schedule
;
3260 ix86_arch
= processor_alias_table
[i
].processor
;
3261 /* Default cpu tuning to the architecture. */
3262 ix86_tune
= ix86_arch
;
3264 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
3265 error ("CPU you selected does not support x86-64 "
3268 if (processor_alias_table
[i
].flags
& PTA_MMX
3269 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_MMX
))
3270 ix86_isa_flags
|= OPTION_MASK_ISA_MMX
;
3271 if (processor_alias_table
[i
].flags
& PTA_3DNOW
3272 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_3DNOW
))
3273 ix86_isa_flags
|= OPTION_MASK_ISA_3DNOW
;
3274 if (processor_alias_table
[i
].flags
& PTA_3DNOW_A
3275 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_3DNOW_A
))
3276 ix86_isa_flags
|= OPTION_MASK_ISA_3DNOW_A
;
3277 if (processor_alias_table
[i
].flags
& PTA_SSE
3278 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE
))
3279 ix86_isa_flags
|= OPTION_MASK_ISA_SSE
;
3280 if (processor_alias_table
[i
].flags
& PTA_SSE2
3281 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE2
))
3282 ix86_isa_flags
|= OPTION_MASK_ISA_SSE2
;
3283 if (processor_alias_table
[i
].flags
& PTA_SSE3
3284 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE3
))
3285 ix86_isa_flags
|= OPTION_MASK_ISA_SSE3
;
3286 if (processor_alias_table
[i
].flags
& PTA_SSSE3
3287 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSSE3
))
3288 ix86_isa_flags
|= OPTION_MASK_ISA_SSSE3
;
3289 if (processor_alias_table
[i
].flags
& PTA_SSE4_1
3290 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4_1
))
3291 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_1
;
3292 if (processor_alias_table
[i
].flags
& PTA_SSE4_2
3293 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4_2
))
3294 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_2
;
3295 if (processor_alias_table
[i
].flags
& PTA_AVX
3296 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX
))
3297 ix86_isa_flags
|= OPTION_MASK_ISA_AVX
;
3298 if (processor_alias_table
[i
].flags
& PTA_AVX2
3299 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX2
))
3300 ix86_isa_flags
|= OPTION_MASK_ISA_AVX2
;
3301 if (processor_alias_table
[i
].flags
& PTA_FMA
3302 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FMA
))
3303 ix86_isa_flags
|= OPTION_MASK_ISA_FMA
;
3304 if (processor_alias_table
[i
].flags
& PTA_SSE4A
3305 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4A
))
3306 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4A
;
3307 if (processor_alias_table
[i
].flags
& PTA_FMA4
3308 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FMA4
))
3309 ix86_isa_flags
|= OPTION_MASK_ISA_FMA4
;
3310 if (processor_alias_table
[i
].flags
& PTA_XOP
3311 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_XOP
))
3312 ix86_isa_flags
|= OPTION_MASK_ISA_XOP
;
3313 if (processor_alias_table
[i
].flags
& PTA_LWP
3314 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_LWP
))
3315 ix86_isa_flags
|= OPTION_MASK_ISA_LWP
;
3316 if (processor_alias_table
[i
].flags
& PTA_ABM
3317 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_ABM
))
3318 ix86_isa_flags
|= OPTION_MASK_ISA_ABM
;
3319 if (processor_alias_table
[i
].flags
& PTA_BMI
3320 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_BMI
))
3321 ix86_isa_flags
|= OPTION_MASK_ISA_BMI
;
3322 if (processor_alias_table
[i
].flags
& (PTA_LZCNT
| PTA_ABM
)
3323 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_LZCNT
))
3324 ix86_isa_flags
|= OPTION_MASK_ISA_LZCNT
;
3325 if (processor_alias_table
[i
].flags
& PTA_TBM
3326 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_TBM
))
3327 ix86_isa_flags
|= OPTION_MASK_ISA_TBM
;
3328 if (processor_alias_table
[i
].flags
& PTA_BMI2
3329 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_BMI2
))
3330 ix86_isa_flags
|= OPTION_MASK_ISA_BMI2
;
3331 if (processor_alias_table
[i
].flags
& PTA_CX16
3332 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_CX16
))
3333 ix86_isa_flags
|= OPTION_MASK_ISA_CX16
;
3334 if (processor_alias_table
[i
].flags
& (PTA_POPCNT
| PTA_ABM
)
3335 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_POPCNT
))
3336 ix86_isa_flags
|= OPTION_MASK_ISA_POPCNT
;
3337 if (!(TARGET_64BIT
&& (processor_alias_table
[i
].flags
& PTA_NO_SAHF
))
3338 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SAHF
))
3339 ix86_isa_flags
|= OPTION_MASK_ISA_SAHF
;
3340 if (processor_alias_table
[i
].flags
& PTA_MOVBE
3341 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_MOVBE
))
3342 ix86_isa_flags
|= OPTION_MASK_ISA_MOVBE
;
3343 if (processor_alias_table
[i
].flags
& PTA_AES
3344 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_AES
))
3345 ix86_isa_flags
|= OPTION_MASK_ISA_AES
;
3346 if (processor_alias_table
[i
].flags
& PTA_PCLMUL
3347 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_PCLMUL
))
3348 ix86_isa_flags
|= OPTION_MASK_ISA_PCLMUL
;
3349 if (processor_alias_table
[i
].flags
& PTA_FSGSBASE
3350 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FSGSBASE
))
3351 ix86_isa_flags
|= OPTION_MASK_ISA_FSGSBASE
;
3352 if (processor_alias_table
[i
].flags
& PTA_RDRND
3353 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_RDRND
))
3354 ix86_isa_flags
|= OPTION_MASK_ISA_RDRND
;
3355 if (processor_alias_table
[i
].flags
& PTA_F16C
3356 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_F16C
))
3357 ix86_isa_flags
|= OPTION_MASK_ISA_F16C
;
3358 if (processor_alias_table
[i
].flags
& (PTA_PREFETCH_SSE
| PTA_SSE
))
3359 x86_prefetch_sse
= true;
3364 if (!strcmp (ix86_arch_string
, "generic"))
3365 error ("generic CPU can be used only for %stune=%s %s",
3366 prefix
, suffix
, sw
);
3367 else if (!strncmp (ix86_arch_string
, "generic", 7) || i
== pta_size
)
3368 error ("bad value (%s) for %sarch=%s %s",
3369 ix86_arch_string
, prefix
, suffix
, sw
);
3371 ix86_arch_mask
= 1u << ix86_arch
;
3372 for (i
= 0; i
< X86_ARCH_LAST
; ++i
)
3373 ix86_arch_features
[i
] = !!(initial_ix86_arch_features
[i
] & ix86_arch_mask
);
3375 for (i
= 0; i
< pta_size
; i
++)
3376 if (! strcmp (ix86_tune_string
, processor_alias_table
[i
].name
))
3378 ix86_schedule
= processor_alias_table
[i
].schedule
;
3379 ix86_tune
= processor_alias_table
[i
].processor
;
3382 if (!(processor_alias_table
[i
].flags
& PTA_64BIT
))
3384 if (ix86_tune_defaulted
)
3386 ix86_tune_string
= "x86-64";
3387 for (i
= 0; i
< pta_size
; i
++)
3388 if (! strcmp (ix86_tune_string
,
3389 processor_alias_table
[i
].name
))
3391 ix86_schedule
= processor_alias_table
[i
].schedule
;
3392 ix86_tune
= processor_alias_table
[i
].processor
;
3395 error ("CPU you selected does not support x86-64 "
3401 /* Adjust tuning when compiling for 32-bit ABI. */
3404 case PROCESSOR_GENERIC64
:
3405 ix86_tune
= PROCESSOR_GENERIC32
;
3406 ix86_schedule
= CPU_PENTIUMPRO
;
3409 case PROCESSOR_CORE2_64
:
3410 ix86_tune
= PROCESSOR_CORE2_32
;
3413 case PROCESSOR_COREI7_64
:
3414 ix86_tune
= PROCESSOR_COREI7_32
;
3421 /* Intel CPUs have always interpreted SSE prefetch instructions as
3422 NOPs; so, we can enable SSE prefetch instructions even when
3423 -mtune (rather than -march) points us to a processor that has them.
3424 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3425 higher processors. */
3427 && (processor_alias_table
[i
].flags
& (PTA_PREFETCH_SSE
| PTA_SSE
)))
3428 x86_prefetch_sse
= true;
3432 if (ix86_tune_specified
&& i
== pta_size
)
3433 error ("bad value (%s) for %stune=%s %s",
3434 ix86_tune_string
, prefix
, suffix
, sw
);
3436 ix86_tune_mask
= 1u << ix86_tune
;
3437 for (i
= 0; i
< X86_TUNE_LAST
; ++i
)
3438 ix86_tune_features
[i
] = !!(initial_ix86_tune_features
[i
] & ix86_tune_mask
);
3440 #ifndef USE_IX86_FRAME_POINTER
3441 #define USE_IX86_FRAME_POINTER 0
3444 #ifndef USE_X86_64_FRAME_POINTER
3445 #define USE_X86_64_FRAME_POINTER 0
3448 /* Set the default values for switches whose default depends on TARGET_64BIT
3449 in case they weren't overwritten by command line options. */
3452 if (optimize
>= 1 && !global_options_set
.x_flag_omit_frame_pointer
)
3453 flag_omit_frame_pointer
= !USE_X86_64_FRAME_POINTER
;
3454 if (flag_asynchronous_unwind_tables
== 2)
3455 flag_unwind_tables
= flag_asynchronous_unwind_tables
= 1;
3456 if (flag_pcc_struct_return
== 2)
3457 flag_pcc_struct_return
= 0;
3461 if (optimize
>= 1 && !global_options_set
.x_flag_omit_frame_pointer
)
3462 flag_omit_frame_pointer
= !(USE_IX86_FRAME_POINTER
|| optimize_size
);
3463 if (flag_asynchronous_unwind_tables
== 2)
3464 flag_asynchronous_unwind_tables
= !USE_IX86_FRAME_POINTER
;
3465 if (flag_pcc_struct_return
== 2)
3466 flag_pcc_struct_return
= DEFAULT_PCC_STRUCT_RETURN
;
3470 ix86_cost
= &ix86_size_cost
;
3472 ix86_cost
= processor_target_table
[ix86_tune
].cost
;
3474 /* Arrange to set up i386_stack_locals for all functions. */
3475 init_machine_status
= ix86_init_machine_status
;
3477 /* Validate -mregparm= value. */
3478 if (global_options_set
.x_ix86_regparm
)
3481 warning (0, "-mregparm is ignored in 64-bit mode");
3482 if (ix86_regparm
> REGPARM_MAX
)
3484 error ("-mregparm=%d is not between 0 and %d",
3485 ix86_regparm
, REGPARM_MAX
);
3490 ix86_regparm
= REGPARM_MAX
;
3492 /* Default align_* from the processor table. */
3493 if (align_loops
== 0)
3495 align_loops
= processor_target_table
[ix86_tune
].align_loop
;
3496 align_loops_max_skip
= processor_target_table
[ix86_tune
].align_loop_max_skip
;
3498 if (align_jumps
== 0)
3500 align_jumps
= processor_target_table
[ix86_tune
].align_jump
;
3501 align_jumps_max_skip
= processor_target_table
[ix86_tune
].align_jump_max_skip
;
3503 if (align_functions
== 0)
3505 align_functions
= processor_target_table
[ix86_tune
].align_func
;
3508 /* Provide default for -mbranch-cost= value. */
3509 if (!global_options_set
.x_ix86_branch_cost
)
3510 ix86_branch_cost
= ix86_cost
->branch_cost
;
3514 target_flags
|= TARGET_SUBTARGET64_DEFAULT
& ~target_flags_explicit
;
3516 /* Enable by default the SSE and MMX builtins. Do allow the user to
3517 explicitly disable any of these. In particular, disabling SSE and
3518 MMX for kernel code is extremely useful. */
3519 if (!ix86_arch_specified
)
3521 |= ((OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_MMX
3522 | TARGET_SUBTARGET64_ISA_DEFAULT
) & ~ix86_isa_flags_explicit
);
3525 warning (0, "%srtd%s is ignored in 64bit mode", prefix
, suffix
);
3529 target_flags
|= TARGET_SUBTARGET32_DEFAULT
& ~target_flags_explicit
;
3531 if (!ix86_arch_specified
)
3533 |= TARGET_SUBTARGET32_ISA_DEFAULT
& ~ix86_isa_flags_explicit
;
3535 /* i386 ABI does not specify red zone. It still makes sense to use it
3536 when programmer takes care to stack from being destroyed. */
3537 if (!(target_flags_explicit
& MASK_NO_RED_ZONE
))
3538 target_flags
|= MASK_NO_RED_ZONE
;
3541 /* Keep nonleaf frame pointers. */
3542 if (flag_omit_frame_pointer
)
3543 target_flags
&= ~MASK_OMIT_LEAF_FRAME_POINTER
;
3544 else if (TARGET_OMIT_LEAF_FRAME_POINTER
)
3545 flag_omit_frame_pointer
= 1;
3547 /* If we're doing fast math, we don't care about comparison order
3548 wrt NaNs. This lets us use a shorter comparison sequence. */
3549 if (flag_finite_math_only
)
3550 target_flags
&= ~MASK_IEEE_FP
;
3552 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3553 since the insns won't need emulation. */
3554 if (x86_arch_always_fancy_math_387
& ix86_arch_mask
)
3555 target_flags
&= ~MASK_NO_FANCY_MATH_387
;
3557 /* Likewise, if the target doesn't have a 387, or we've specified
3558 software floating point, don't use 387 inline intrinsics. */
3560 target_flags
|= MASK_NO_FANCY_MATH_387
;
3562 /* Turn on MMX builtins for -msse. */
3565 ix86_isa_flags
|= OPTION_MASK_ISA_MMX
& ~ix86_isa_flags_explicit
;
3566 x86_prefetch_sse
= true;
3569 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3570 if (TARGET_SSE4_2
|| TARGET_ABM
)
3571 ix86_isa_flags
|= OPTION_MASK_ISA_POPCNT
& ~ix86_isa_flags_explicit
;
3573 /* Turn on lzcnt instruction for -mabm. */
3575 ix86_isa_flags
|= OPTION_MASK_ISA_LZCNT
& ~ix86_isa_flags_explicit
;
3577 /* Validate -mpreferred-stack-boundary= value or default it to
3578 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3579 ix86_preferred_stack_boundary
= PREFERRED_STACK_BOUNDARY_DEFAULT
;
3580 if (global_options_set
.x_ix86_preferred_stack_boundary_arg
)
3582 int min
= (TARGET_64BIT
? 4 : 2);
3583 int max
= (TARGET_SEH
? 4 : 12);
3585 if (ix86_preferred_stack_boundary_arg
< min
3586 || ix86_preferred_stack_boundary_arg
> max
)
3589 error ("-mpreferred-stack-boundary is not supported "
3592 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3593 ix86_preferred_stack_boundary_arg
, min
, max
);
3596 ix86_preferred_stack_boundary
3597 = (1 << ix86_preferred_stack_boundary_arg
) * BITS_PER_UNIT
;
3600 /* Set the default value for -mstackrealign. */
3601 if (ix86_force_align_arg_pointer
== -1)
3602 ix86_force_align_arg_pointer
= STACK_REALIGN_DEFAULT
;
3604 ix86_default_incoming_stack_boundary
= PREFERRED_STACK_BOUNDARY
;
3606 /* Validate -mincoming-stack-boundary= value or default it to
3607 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3608 ix86_incoming_stack_boundary
= ix86_default_incoming_stack_boundary
;
3609 if (global_options_set
.x_ix86_incoming_stack_boundary_arg
)
3611 if (ix86_incoming_stack_boundary_arg
< (TARGET_64BIT
? 4 : 2)
3612 || ix86_incoming_stack_boundary_arg
> 12)
3613 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3614 ix86_incoming_stack_boundary_arg
, TARGET_64BIT
? 4 : 2);
3617 ix86_user_incoming_stack_boundary
3618 = (1 << ix86_incoming_stack_boundary_arg
) * BITS_PER_UNIT
;
3619 ix86_incoming_stack_boundary
3620 = ix86_user_incoming_stack_boundary
;
3624 /* Accept -msseregparm only if at least SSE support is enabled. */
3625 if (TARGET_SSEREGPARM
3627 error ("%ssseregparm%s used without SSE enabled", prefix
, suffix
);
3629 if (global_options_set
.x_ix86_fpmath
)
3631 if (ix86_fpmath
& FPMATH_SSE
)
3635 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3636 ix86_fpmath
= FPMATH_387
;
3638 else if ((ix86_fpmath
& FPMATH_387
) && !TARGET_80387
)
3640 warning (0, "387 instruction set disabled, using SSE arithmetics");
3641 ix86_fpmath
= FPMATH_SSE
;
3646 ix86_fpmath
= TARGET_FPMATH_DEFAULT
;
3648 /* If the i387 is disabled, then do not return values in it. */
3650 target_flags
&= ~MASK_FLOAT_RETURNS
;
3652 /* Use external vectorized library in vectorizing intrinsics. */
3653 if (global_options_set
.x_ix86_veclibabi_type
)
3654 switch (ix86_veclibabi_type
)
3656 case ix86_veclibabi_type_svml
:
3657 ix86_veclib_handler
= ix86_veclibabi_svml
;
3660 case ix86_veclibabi_type_acml
:
3661 ix86_veclib_handler
= ix86_veclibabi_acml
;
3668 if ((!USE_IX86_FRAME_POINTER
3669 || (x86_accumulate_outgoing_args
& ix86_tune_mask
))
3670 && !(target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
3672 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
3674 /* ??? Unwind info is not correct around the CFG unless either a frame
3675 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3676 unwind info generation to be aware of the CFG and propagating states
3678 if ((flag_unwind_tables
|| flag_asynchronous_unwind_tables
3679 || flag_exceptions
|| flag_non_call_exceptions
)
3680 && flag_omit_frame_pointer
3681 && !(target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
3683 if (target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
3684 warning (0, "unwind tables currently require either a frame pointer "
3685 "or %saccumulate-outgoing-args%s for correctness",
3687 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
3690 /* If stack probes are required, the space used for large function
3691 arguments on the stack must also be probed, so enable
3692 -maccumulate-outgoing-args so this happens in the prologue. */
3693 if (TARGET_STACK_PROBE
3694 && !(target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
3696 if (target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
3697 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3698 "for correctness", prefix
, suffix
);
3699 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
3702 /* For sane SSE instruction set generation we need fcomi instruction.
3703 It is safe to enable all CMOVE instructions. Also, RDRAND intrinsic
3704 expands to a sequence that includes conditional move. */
3705 if (TARGET_SSE
|| TARGET_RDRND
)
3708 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3711 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix
, "LX", 0);
3712 p
= strchr (internal_label_prefix
, 'X');
3713 internal_label_prefix_len
= p
- internal_label_prefix
;
3717 /* When scheduling description is not available, disable scheduler pass
3718 so it won't slow down the compilation and make x87 code slower. */
3719 if (!TARGET_SCHEDULE
)
3720 flag_schedule_insns_after_reload
= flag_schedule_insns
= 0;
3722 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES
,
3723 ix86_cost
->simultaneous_prefetches
,
3724 global_options
.x_param_values
,
3725 global_options_set
.x_param_values
);
3726 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
, ix86_cost
->prefetch_block
,
3727 global_options
.x_param_values
,
3728 global_options_set
.x_param_values
);
3729 maybe_set_param_value (PARAM_L1_CACHE_SIZE
, ix86_cost
->l1_cache_size
,
3730 global_options
.x_param_values
,
3731 global_options_set
.x_param_values
);
3732 maybe_set_param_value (PARAM_L2_CACHE_SIZE
, ix86_cost
->l2_cache_size
,
3733 global_options
.x_param_values
,
3734 global_options_set
.x_param_values
);
3736 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
3737 if (flag_prefetch_loop_arrays
< 0
3740 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL
)
3741 flag_prefetch_loop_arrays
= 1;
3743 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3744 can be optimized to ap = __builtin_next_arg (0). */
3745 if (!TARGET_64BIT
&& !flag_split_stack
)
3746 targetm
.expand_builtin_va_start
= NULL
;
3750 ix86_gen_leave
= gen_leave_rex64
;
3751 ix86_gen_add3
= gen_adddi3
;
3752 ix86_gen_sub3
= gen_subdi3
;
3753 ix86_gen_sub3_carry
= gen_subdi3_carry
;
3754 ix86_gen_one_cmpl2
= gen_one_cmpldi2
;
3755 ix86_gen_monitor
= gen_sse3_monitor64
;
3756 ix86_gen_andsp
= gen_anddi3
;
3757 ix86_gen_allocate_stack_worker
= gen_allocate_stack_worker_probe_di
;
3758 ix86_gen_adjust_stack_and_probe
= gen_adjust_stack_and_probedi
;
3759 ix86_gen_probe_stack_range
= gen_probe_stack_rangedi
;
3763 ix86_gen_leave
= gen_leave
;
3764 ix86_gen_add3
= gen_addsi3
;
3765 ix86_gen_sub3
= gen_subsi3
;
3766 ix86_gen_sub3_carry
= gen_subsi3_carry
;
3767 ix86_gen_one_cmpl2
= gen_one_cmplsi2
;
3768 ix86_gen_monitor
= gen_sse3_monitor
;
3769 ix86_gen_andsp
= gen_andsi3
;
3770 ix86_gen_allocate_stack_worker
= gen_allocate_stack_worker_probe_si
;
3771 ix86_gen_adjust_stack_and_probe
= gen_adjust_stack_and_probesi
;
3772 ix86_gen_probe_stack_range
= gen_probe_stack_rangesi
;
3776 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3778 target_flags
|= MASK_CLD
& ~target_flags_explicit
;
3781 if (!TARGET_64BIT
&& flag_pic
)
3783 if (flag_fentry
> 0)
3784 sorry ("-mfentry isn%'t supported for 32-bit in combination "
3788 else if (TARGET_SEH
)
3790 if (flag_fentry
== 0)
3791 sorry ("-mno-fentry isn%'t compatible with SEH");
3794 else if (flag_fentry
< 0)
3796 #if defined(PROFILE_BEFORE_PROLOGUE)
3805 /* When not optimize for size, enable vzeroupper optimization for
3806 TARGET_AVX with -fexpensive-optimizations and split 32-byte
3807 AVX unaligned load/store. */
3810 if (flag_expensive_optimizations
3811 && !(target_flags_explicit
& MASK_VZEROUPPER
))
3812 target_flags
|= MASK_VZEROUPPER
;
3813 if ((x86_avx256_split_unaligned_load
& ix86_tune_mask
)
3814 && !(target_flags_explicit
& MASK_AVX256_SPLIT_UNALIGNED_LOAD
))
3815 target_flags
|= MASK_AVX256_SPLIT_UNALIGNED_LOAD
;
3816 if ((x86_avx256_split_unaligned_store
& ix86_tune_mask
)
3817 && !(target_flags_explicit
& MASK_AVX256_SPLIT_UNALIGNED_STORE
))
3818 target_flags
|= MASK_AVX256_SPLIT_UNALIGNED_STORE
;
3819 /* Enable 128-bit AVX instruction generation for the auto-vectorizer. */
3820 if (TARGET_AVX128_OPTIMAL
&& !(target_flags_explicit
& MASK_PREFER_AVX128
))
3821 target_flags
|= MASK_PREFER_AVX128
;
3826 /* Disable vzeroupper pass if TARGET_AVX is disabled. */
3827 target_flags
&= ~MASK_VZEROUPPER
;
3830 if (ix86_recip_name
)
3832 char *p
= ASTRDUP (ix86_recip_name
);
3834 unsigned int mask
, i
;
3837 while ((q
= strtok (p
, ",")) != NULL
)
3848 if (!strcmp (q
, "default"))
3849 mask
= RECIP_MASK_ALL
;
3852 for (i
= 0; i
< ARRAY_SIZE (recip_options
); i
++)
3853 if (!strcmp (q
, recip_options
[i
].string
))
3855 mask
= recip_options
[i
].mask
;
3859 if (i
== ARRAY_SIZE (recip_options
))
3861 error ("unknown option for -mrecip=%s", q
);
3863 mask
= RECIP_MASK_NONE
;
3867 recip_mask_explicit
|= mask
;
3869 recip_mask
&= ~mask
;
3876 recip_mask
|= RECIP_MASK_ALL
& ~recip_mask_explicit
;
3877 else if (target_flags_explicit
& MASK_RECIP
)
3878 recip_mask
&= ~(RECIP_MASK_ALL
& ~recip_mask_explicit
);
3880 /* Save the initial options in case the user does function specific
3883 target_option_default_node
= target_option_current_node
3884 = build_target_option_node ();
3887 /* Return TRUE if VAL is passed in register with 256bit AVX modes. */
3890 function_pass_avx256_p (const_rtx val
)
3895 if (REG_P (val
) && VALID_AVX256_REG_MODE (GET_MODE (val
)))
3898 if (GET_CODE (val
) == PARALLEL
)
3903 for (i
= XVECLEN (val
, 0) - 1; i
>= 0; i
--)
3905 r
= XVECEXP (val
, 0, i
);
3906 if (GET_CODE (r
) == EXPR_LIST
3908 && REG_P (XEXP (r
, 0))
3909 && (GET_MODE (XEXP (r
, 0)) == OImode
3910 || VALID_AVX256_REG_MODE (GET_MODE (XEXP (r
, 0)))))
3918 /* Implement the TARGET_OPTION_OVERRIDE hook. */
3921 ix86_option_override (void)
3923 ix86_option_override_internal (true);
3926 /* Update register usage after having seen the compiler flags. */
3929 ix86_conditional_register_usage (void)
3934 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
3936 if (fixed_regs
[i
] > 1)
3937 fixed_regs
[i
] = (fixed_regs
[i
] == (TARGET_64BIT
? 3 : 2));
3938 if (call_used_regs
[i
] > 1)
3939 call_used_regs
[i
] = (call_used_regs
[i
] == (TARGET_64BIT
? 3 : 2));
3942 /* The PIC register, if it exists, is fixed. */
3943 j
= PIC_OFFSET_TABLE_REGNUM
;
3944 if (j
!= INVALID_REGNUM
)
3945 fixed_regs
[j
] = call_used_regs
[j
] = 1;
3947 /* The 64-bit MS_ABI changes the set of call-used registers. */
3948 if (TARGET_64BIT_MS_ABI
)
3950 call_used_regs
[SI_REG
] = 0;
3951 call_used_regs
[DI_REG
] = 0;
3952 call_used_regs
[XMM6_REG
] = 0;
3953 call_used_regs
[XMM7_REG
] = 0;
3954 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
3955 call_used_regs
[i
] = 0;
3958 /* The default setting of CLOBBERED_REGS is for 32-bit; add in the
3959 other call-clobbered regs for 64-bit. */
3962 CLEAR_HARD_REG_SET (reg_class_contents
[(int)CLOBBERED_REGS
]);
3964 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
3965 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)GENERAL_REGS
], i
)
3966 && call_used_regs
[i
])
3967 SET_HARD_REG_BIT (reg_class_contents
[(int)CLOBBERED_REGS
], i
);
3970 /* If MMX is disabled, squash the registers. */
3972 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
3973 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)MMX_REGS
], i
))
3974 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
3976 /* If SSE is disabled, squash the registers. */
3978 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
3979 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)SSE_REGS
], i
))
3980 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
3982 /* If the FPU is disabled, squash the registers. */
3983 if (! (TARGET_80387
|| TARGET_FLOAT_RETURNS_IN_80387
))
3984 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
3985 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)FLOAT_REGS
], i
))
3986 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
3988 /* If 32-bit, squash the 64-bit registers. */
3991 for (i
= FIRST_REX_INT_REG
; i
<= LAST_REX_INT_REG
; i
++)
3993 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
3999 /* Save the current options */
4002 ix86_function_specific_save (struct cl_target_option
*ptr
)
4004 ptr
->arch
= ix86_arch
;
4005 ptr
->schedule
= ix86_schedule
;
4006 ptr
->tune
= ix86_tune
;
4007 ptr
->branch_cost
= ix86_branch_cost
;
4008 ptr
->tune_defaulted
= ix86_tune_defaulted
;
4009 ptr
->arch_specified
= ix86_arch_specified
;
4010 ptr
->x_ix86_isa_flags_explicit
= ix86_isa_flags_explicit
;
4011 ptr
->ix86_target_flags_explicit
= target_flags_explicit
;
4012 ptr
->x_recip_mask_explicit
= recip_mask_explicit
;
4014 /* The fields are char but the variables are not; make sure the
4015 values fit in the fields. */
4016 gcc_assert (ptr
->arch
== ix86_arch
);
4017 gcc_assert (ptr
->schedule
== ix86_schedule
);
4018 gcc_assert (ptr
->tune
== ix86_tune
);
4019 gcc_assert (ptr
->branch_cost
== ix86_branch_cost
);
4022 /* Restore the current options */
4025 ix86_function_specific_restore (struct cl_target_option
*ptr
)
4027 enum processor_type old_tune
= ix86_tune
;
4028 enum processor_type old_arch
= ix86_arch
;
4029 unsigned int ix86_arch_mask
, ix86_tune_mask
;
4032 ix86_arch
= (enum processor_type
) ptr
->arch
;
4033 ix86_schedule
= (enum attr_cpu
) ptr
->schedule
;
4034 ix86_tune
= (enum processor_type
) ptr
->tune
;
4035 ix86_branch_cost
= ptr
->branch_cost
;
4036 ix86_tune_defaulted
= ptr
->tune_defaulted
;
4037 ix86_arch_specified
= ptr
->arch_specified
;
4038 ix86_isa_flags_explicit
= ptr
->x_ix86_isa_flags_explicit
;
4039 target_flags_explicit
= ptr
->ix86_target_flags_explicit
;
4040 recip_mask_explicit
= ptr
->x_recip_mask_explicit
;
4042 /* Recreate the arch feature tests if the arch changed */
4043 if (old_arch
!= ix86_arch
)
4045 ix86_arch_mask
= 1u << ix86_arch
;
4046 for (i
= 0; i
< X86_ARCH_LAST
; ++i
)
4047 ix86_arch_features
[i
]
4048 = !!(initial_ix86_arch_features
[i
] & ix86_arch_mask
);
4051 /* Recreate the tune optimization tests */
4052 if (old_tune
!= ix86_tune
)
4054 ix86_tune_mask
= 1u << ix86_tune
;
4055 for (i
= 0; i
< X86_TUNE_LAST
; ++i
)
4056 ix86_tune_features
[i
]
4057 = !!(initial_ix86_tune_features
[i
] & ix86_tune_mask
);
4061 /* Print the current options */
4064 ix86_function_specific_print (FILE *file
, int indent
,
4065 struct cl_target_option
*ptr
)
4068 = ix86_target_string (ptr
->x_ix86_isa_flags
, ptr
->x_target_flags
,
4069 NULL
, NULL
, ptr
->x_ix86_fpmath
, false);
4071 fprintf (file
, "%*sarch = %d (%s)\n",
4074 ((ptr
->arch
< TARGET_CPU_DEFAULT_max
)
4075 ? cpu_names
[ptr
->arch
]
4078 fprintf (file
, "%*stune = %d (%s)\n",
4081 ((ptr
->tune
< TARGET_CPU_DEFAULT_max
)
4082 ? cpu_names
[ptr
->tune
]
4085 fprintf (file
, "%*sbranch_cost = %d\n", indent
, "", ptr
->branch_cost
);
4089 fprintf (file
, "%*s%s\n", indent
, "", target_string
);
4090 free (target_string
);
4095 /* Inner function to process the attribute((target(...))), take an argument and
4096 set the current options from the argument. If we have a list, recursively go
4100 ix86_valid_target_attribute_inner_p (tree args
, char *p_strings
[],
4101 struct gcc_options
*enum_opts_set
)
4106 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4107 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4108 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4109 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4110 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4126 enum ix86_opt_type type
;
4131 IX86_ATTR_ISA ("3dnow", OPT_m3dnow
),
4132 IX86_ATTR_ISA ("abm", OPT_mabm
),
4133 IX86_ATTR_ISA ("bmi", OPT_mbmi
),
4134 IX86_ATTR_ISA ("bmi2", OPT_mbmi2
),
4135 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt
),
4136 IX86_ATTR_ISA ("tbm", OPT_mtbm
),
4137 IX86_ATTR_ISA ("aes", OPT_maes
),
4138 IX86_ATTR_ISA ("avx", OPT_mavx
),
4139 IX86_ATTR_ISA ("avx2", OPT_mavx2
),
4140 IX86_ATTR_ISA ("mmx", OPT_mmmx
),
4141 IX86_ATTR_ISA ("pclmul", OPT_mpclmul
),
4142 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt
),
4143 IX86_ATTR_ISA ("sse", OPT_msse
),
4144 IX86_ATTR_ISA ("sse2", OPT_msse2
),
4145 IX86_ATTR_ISA ("sse3", OPT_msse3
),
4146 IX86_ATTR_ISA ("sse4", OPT_msse4
),
4147 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1
),
4148 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2
),
4149 IX86_ATTR_ISA ("sse4a", OPT_msse4a
),
4150 IX86_ATTR_ISA ("ssse3", OPT_mssse3
),
4151 IX86_ATTR_ISA ("fma4", OPT_mfma4
),
4152 IX86_ATTR_ISA ("fma", OPT_mfma
),
4153 IX86_ATTR_ISA ("xop", OPT_mxop
),
4154 IX86_ATTR_ISA ("lwp", OPT_mlwp
),
4155 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase
),
4156 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd
),
4157 IX86_ATTR_ISA ("f16c", OPT_mf16c
),
4160 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_
),
4162 /* string options */
4163 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH
),
4164 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE
),
4167 IX86_ATTR_YES ("cld",
4171 IX86_ATTR_NO ("fancy-math-387",
4172 OPT_mfancy_math_387
,
4173 MASK_NO_FANCY_MATH_387
),
4175 IX86_ATTR_YES ("ieee-fp",
4179 IX86_ATTR_YES ("inline-all-stringops",
4180 OPT_minline_all_stringops
,
4181 MASK_INLINE_ALL_STRINGOPS
),
4183 IX86_ATTR_YES ("inline-stringops-dynamically",
4184 OPT_minline_stringops_dynamically
,
4185 MASK_INLINE_STRINGOPS_DYNAMICALLY
),
4187 IX86_ATTR_NO ("align-stringops",
4188 OPT_mno_align_stringops
,
4189 MASK_NO_ALIGN_STRINGOPS
),
4191 IX86_ATTR_YES ("recip",
4197 /* If this is a list, recurse to get the options. */
4198 if (TREE_CODE (args
) == TREE_LIST
)
4202 for (; args
; args
= TREE_CHAIN (args
))
4203 if (TREE_VALUE (args
)
4204 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args
),
4205 p_strings
, enum_opts_set
))
4211 else if (TREE_CODE (args
) != STRING_CST
)
4214 /* Handle multiple arguments separated by commas. */
4215 next_optstr
= ASTRDUP (TREE_STRING_POINTER (args
));
4217 while (next_optstr
&& *next_optstr
!= '\0')
4219 char *p
= next_optstr
;
4221 char *comma
= strchr (next_optstr
, ',');
4222 const char *opt_string
;
4223 size_t len
, opt_len
;
4228 enum ix86_opt_type type
= ix86_opt_unknown
;
4234 len
= comma
- next_optstr
;
4235 next_optstr
= comma
+ 1;
4243 /* Recognize no-xxx. */
4244 if (len
> 3 && p
[0] == 'n' && p
[1] == 'o' && p
[2] == '-')
4253 /* Find the option. */
4256 for (i
= 0; i
< ARRAY_SIZE (attrs
); i
++)
4258 type
= attrs
[i
].type
;
4259 opt_len
= attrs
[i
].len
;
4260 if (ch
== attrs
[i
].string
[0]
4261 && ((type
!= ix86_opt_str
&& type
!= ix86_opt_enum
)
4264 && memcmp (p
, attrs
[i
].string
, opt_len
) == 0)
4267 mask
= attrs
[i
].mask
;
4268 opt_string
= attrs
[i
].string
;
4273 /* Process the option. */
4276 error ("attribute(target(\"%s\")) is unknown", orig_p
);
4280 else if (type
== ix86_opt_isa
)
4282 struct cl_decoded_option decoded
;
4284 generate_option (opt
, NULL
, opt_set_p
, CL_TARGET
, &decoded
);
4285 ix86_handle_option (&global_options
, &global_options_set
,
4286 &decoded
, input_location
);
4289 else if (type
== ix86_opt_yes
|| type
== ix86_opt_no
)
4291 if (type
== ix86_opt_no
)
4292 opt_set_p
= !opt_set_p
;
4295 target_flags
|= mask
;
4297 target_flags
&= ~mask
;
4300 else if (type
== ix86_opt_str
)
4304 error ("option(\"%s\") was already specified", opt_string
);
4308 p_strings
[opt
] = xstrdup (p
+ opt_len
);
4311 else if (type
== ix86_opt_enum
)
4316 arg_ok
= opt_enum_arg_to_value (opt
, p
+ opt_len
, &value
, CL_TARGET
);
4318 set_option (&global_options
, enum_opts_set
, opt
, value
,
4319 p
+ opt_len
, DK_UNSPECIFIED
, input_location
,
4323 error ("attribute(target(\"%s\")) is unknown", orig_p
);
4335 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4338 ix86_valid_target_attribute_tree (tree args
)
4340 const char *orig_arch_string
= ix86_arch_string
;
4341 const char *orig_tune_string
= ix86_tune_string
;
4342 enum fpmath_unit orig_fpmath_set
= global_options_set
.x_ix86_fpmath
;
4343 int orig_tune_defaulted
= ix86_tune_defaulted
;
4344 int orig_arch_specified
= ix86_arch_specified
;
4345 char *option_strings
[IX86_FUNCTION_SPECIFIC_MAX
] = { NULL
, NULL
};
4348 struct cl_target_option
*def
4349 = TREE_TARGET_OPTION (target_option_default_node
);
4350 struct gcc_options enum_opts_set
;
4352 memset (&enum_opts_set
, 0, sizeof (enum_opts_set
));
4354 /* Process each of the options on the chain. */
4355 if (! ix86_valid_target_attribute_inner_p (args
, option_strings
,
4359 /* If the changed options are different from the default, rerun
4360 ix86_option_override_internal, and then save the options away.
4361 The string options are are attribute options, and will be undone
4362 when we copy the save structure. */
4363 if (ix86_isa_flags
!= def
->x_ix86_isa_flags
4364 || target_flags
!= def
->x_target_flags
4365 || option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
]
4366 || option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
]
4367 || enum_opts_set
.x_ix86_fpmath
)
4369 /* If we are using the default tune= or arch=, undo the string assigned,
4370 and use the default. */
4371 if (option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
])
4372 ix86_arch_string
= option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
];
4373 else if (!orig_arch_specified
)
4374 ix86_arch_string
= NULL
;
4376 if (option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
])
4377 ix86_tune_string
= option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
];
4378 else if (orig_tune_defaulted
)
4379 ix86_tune_string
= NULL
;
4381 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4382 if (enum_opts_set
.x_ix86_fpmath
)
4383 global_options_set
.x_ix86_fpmath
= (enum fpmath_unit
) 1;
4384 else if (!TARGET_64BIT
&& TARGET_SSE
)
4386 ix86_fpmath
= (enum fpmath_unit
) (FPMATH_SSE
| FPMATH_387
);
4387 global_options_set
.x_ix86_fpmath
= (enum fpmath_unit
) 1;
4390 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4391 ix86_option_override_internal (false);
4393 /* Add any builtin functions with the new isa if any. */
4394 ix86_add_new_builtins (ix86_isa_flags
);
4396 /* Save the current options unless we are validating options for
4398 t
= build_target_option_node ();
4400 ix86_arch_string
= orig_arch_string
;
4401 ix86_tune_string
= orig_tune_string
;
4402 global_options_set
.x_ix86_fpmath
= orig_fpmath_set
;
4404 /* Free up memory allocated to hold the strings */
4405 for (i
= 0; i
< IX86_FUNCTION_SPECIFIC_MAX
; i
++)
4406 free (option_strings
[i
]);
4412 /* Hook to validate attribute((target("string"))). */
4415 ix86_valid_target_attribute_p (tree fndecl
,
4416 tree
ARG_UNUSED (name
),
4418 int ARG_UNUSED (flags
))
4420 struct cl_target_option cur_target
;
4422 tree old_optimize
= build_optimization_node ();
4423 tree new_target
, new_optimize
;
4424 tree func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
4426 /* If the function changed the optimization levels as well as setting target
4427 options, start with the optimizations specified. */
4428 if (func_optimize
&& func_optimize
!= old_optimize
)
4429 cl_optimization_restore (&global_options
,
4430 TREE_OPTIMIZATION (func_optimize
));
4432 /* The target attributes may also change some optimization flags, so update
4433 the optimization options if necessary. */
4434 cl_target_option_save (&cur_target
, &global_options
);
4435 new_target
= ix86_valid_target_attribute_tree (args
);
4436 new_optimize
= build_optimization_node ();
4443 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = new_target
;
4445 if (old_optimize
!= new_optimize
)
4446 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
4449 cl_target_option_restore (&global_options
, &cur_target
);
4451 if (old_optimize
!= new_optimize
)
4452 cl_optimization_restore (&global_options
,
4453 TREE_OPTIMIZATION (old_optimize
));
4459 /* Hook to determine if one function can safely inline another. */
4462 ix86_can_inline_p (tree caller
, tree callee
)
4465 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
4466 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
4468 /* If callee has no option attributes, then it is ok to inline. */
4472 /* If caller has no option attributes, but callee does then it is not ok to
4474 else if (!caller_tree
)
4479 struct cl_target_option
*caller_opts
= TREE_TARGET_OPTION (caller_tree
);
4480 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
4482 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4483 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4485 if ((caller_opts
->x_ix86_isa_flags
& callee_opts
->x_ix86_isa_flags
)
4486 != callee_opts
->x_ix86_isa_flags
)
4489 /* See if we have the same non-isa options. */
4490 else if (caller_opts
->x_target_flags
!= callee_opts
->x_target_flags
)
4493 /* See if arch, tune, etc. are the same. */
4494 else if (caller_opts
->arch
!= callee_opts
->arch
)
4497 else if (caller_opts
->tune
!= callee_opts
->tune
)
4500 else if (caller_opts
->x_ix86_fpmath
!= callee_opts
->x_ix86_fpmath
)
4503 else if (caller_opts
->branch_cost
!= callee_opts
->branch_cost
)
4514 /* Remember the last target of ix86_set_current_function. */
4515 static GTY(()) tree ix86_previous_fndecl
;
4517 /* Establish appropriate back-end context for processing the function
4518 FNDECL. The argument might be NULL to indicate processing at top
4519 level, outside of any function scope. */
4521 ix86_set_current_function (tree fndecl
)
4523 /* Only change the context if the function changes. This hook is called
4524 several times in the course of compiling a function, and we don't want to
4525 slow things down too much or call target_reinit when it isn't safe. */
4526 if (fndecl
&& fndecl
!= ix86_previous_fndecl
)
4528 tree old_tree
= (ix86_previous_fndecl
4529 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl
)
4532 tree new_tree
= (fndecl
4533 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl
)
4536 ix86_previous_fndecl
= fndecl
;
4537 if (old_tree
== new_tree
)
4542 cl_target_option_restore (&global_options
,
4543 TREE_TARGET_OPTION (new_tree
));
4549 struct cl_target_option
*def
4550 = TREE_TARGET_OPTION (target_option_current_node
);
4552 cl_target_option_restore (&global_options
, def
);
4559 /* Return true if this goes in large data/bss. */
4562 ix86_in_large_data_p (tree exp
)
4564 if (ix86_cmodel
!= CM_MEDIUM
&& ix86_cmodel
!= CM_MEDIUM_PIC
)
4567 /* Functions are never large data. */
4568 if (TREE_CODE (exp
) == FUNCTION_DECL
)
4571 if (TREE_CODE (exp
) == VAR_DECL
&& DECL_SECTION_NAME (exp
))
4573 const char *section
= TREE_STRING_POINTER (DECL_SECTION_NAME (exp
));
4574 if (strcmp (section
, ".ldata") == 0
4575 || strcmp (section
, ".lbss") == 0)
4581 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (exp
));
4583 /* If this is an incomplete type with size 0, then we can't put it
4584 in data because it might be too big when completed. */
4585 if (!size
|| size
> ix86_section_threshold
)
4592 /* Switch to the appropriate section for output of DECL.
4593 DECL is either a `VAR_DECL' node or a constant of some sort.
4594 RELOC indicates whether forming the initial value of DECL requires
4595 link-time relocations. */
4597 static section
* x86_64_elf_select_section (tree
, int, unsigned HOST_WIDE_INT
)
4601 x86_64_elf_select_section (tree decl
, int reloc
,
4602 unsigned HOST_WIDE_INT align
)
4604 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4605 && ix86_in_large_data_p (decl
))
4607 const char *sname
= NULL
;
4608 unsigned int flags
= SECTION_WRITE
;
4609 switch (categorize_decl_for_section (decl
, reloc
))
4614 case SECCAT_DATA_REL
:
4615 sname
= ".ldata.rel";
4617 case SECCAT_DATA_REL_LOCAL
:
4618 sname
= ".ldata.rel.local";
4620 case SECCAT_DATA_REL_RO
:
4621 sname
= ".ldata.rel.ro";
4623 case SECCAT_DATA_REL_RO_LOCAL
:
4624 sname
= ".ldata.rel.ro.local";
4628 flags
|= SECTION_BSS
;
4631 case SECCAT_RODATA_MERGE_STR
:
4632 case SECCAT_RODATA_MERGE_STR_INIT
:
4633 case SECCAT_RODATA_MERGE_CONST
:
4637 case SECCAT_SRODATA
:
4644 /* We don't split these for medium model. Place them into
4645 default sections and hope for best. */
4650 /* We might get called with string constants, but get_named_section
4651 doesn't like them as they are not DECLs. Also, we need to set
4652 flags in that case. */
4654 return get_section (sname
, flags
, NULL
);
4655 return get_named_section (decl
, sname
, reloc
);
4658 return default_elf_select_section (decl
, reloc
, align
);
4661 /* Build up a unique section name, expressed as a
4662 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4663 RELOC indicates whether the initial value of EXP requires
4664 link-time relocations. */
4666 static void ATTRIBUTE_UNUSED
4667 x86_64_elf_unique_section (tree decl
, int reloc
)
4669 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4670 && ix86_in_large_data_p (decl
))
4672 const char *prefix
= NULL
;
4673 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4674 bool one_only
= DECL_ONE_ONLY (decl
) && !HAVE_COMDAT_GROUP
;
4676 switch (categorize_decl_for_section (decl
, reloc
))
4679 case SECCAT_DATA_REL
:
4680 case SECCAT_DATA_REL_LOCAL
:
4681 case SECCAT_DATA_REL_RO
:
4682 case SECCAT_DATA_REL_RO_LOCAL
:
4683 prefix
= one_only
? ".ld" : ".ldata";
4686 prefix
= one_only
? ".lb" : ".lbss";
4689 case SECCAT_RODATA_MERGE_STR
:
4690 case SECCAT_RODATA_MERGE_STR_INIT
:
4691 case SECCAT_RODATA_MERGE_CONST
:
4692 prefix
= one_only
? ".lr" : ".lrodata";
4694 case SECCAT_SRODATA
:
4701 /* We don't split these for medium model. Place them into
4702 default sections and hope for best. */
4707 const char *name
, *linkonce
;
4710 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
4711 name
= targetm
.strip_name_encoding (name
);
4713 /* If we're using one_only, then there needs to be a .gnu.linkonce
4714 prefix to the section name. */
4715 linkonce
= one_only
? ".gnu.linkonce" : "";
4717 string
= ACONCAT ((linkonce
, prefix
, ".", name
, NULL
));
4719 DECL_SECTION_NAME (decl
) = build_string (strlen (string
), string
);
4723 default_unique_section (decl
, reloc
);
4726 #ifdef COMMON_ASM_OP
4727 /* This says how to output assembler code to declare an
4728 uninitialized external linkage data object.
4730 For medium model x86-64 we need to use .largecomm opcode for
4733 x86_elf_aligned_common (FILE *file
,
4734 const char *name
, unsigned HOST_WIDE_INT size
,
4737 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4738 && size
> (unsigned int)ix86_section_threshold
)
4739 fputs (".largecomm\t", file
);
4741 fputs (COMMON_ASM_OP
, file
);
4742 assemble_name (file
, name
);
4743 fprintf (file
, "," HOST_WIDE_INT_PRINT_UNSIGNED
",%u\n",
4744 size
, align
/ BITS_PER_UNIT
);
4748 /* Utility function for targets to use in implementing
4749 ASM_OUTPUT_ALIGNED_BSS. */
4752 x86_output_aligned_bss (FILE *file
, tree decl ATTRIBUTE_UNUSED
,
4753 const char *name
, unsigned HOST_WIDE_INT size
,
4756 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4757 && size
> (unsigned int)ix86_section_threshold
)
4758 switch_to_section (get_named_section (decl
, ".lbss", 0));
4760 switch_to_section (bss_section
);
4761 ASM_OUTPUT_ALIGN (file
, floor_log2 (align
/ BITS_PER_UNIT
));
4762 #ifdef ASM_DECLARE_OBJECT_NAME
4763 last_assemble_variable_decl
= decl
;
4764 ASM_DECLARE_OBJECT_NAME (file
, name
, decl
);
4766 /* Standard thing is just output label for the object. */
4767 ASM_OUTPUT_LABEL (file
, name
);
4768 #endif /* ASM_DECLARE_OBJECT_NAME */
4769 ASM_OUTPUT_SKIP (file
, size
? size
: 1);
4772 /* Decide whether we must probe the stack before any space allocation
4773 on this target. It's essentially TARGET_STACK_PROBE except when
4774 -fstack-check causes the stack to be already probed differently. */
4777 ix86_target_stack_probe (void)
4779 /* Do not probe the stack twice if static stack checking is enabled. */
4780 if (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
4783 return TARGET_STACK_PROBE
;
4786 /* Decide whether we can make a sibling call to a function. DECL is the
4787 declaration of the function being targeted by the call and EXP is the
4788 CALL_EXPR representing the call. */
4791 ix86_function_ok_for_sibcall (tree decl
, tree exp
)
4793 tree type
, decl_or_type
;
4796 /* If we are generating position-independent code, we cannot sibcall
4797 optimize any indirect call, or a direct call to a global function,
4798 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
4802 && (!decl
|| !targetm
.binds_local_p (decl
)))
4805 /* If we need to align the outgoing stack, then sibcalling would
4806 unalign the stack, which may break the called function. */
4807 if (ix86_minimum_incoming_stack_boundary (true)
4808 < PREFERRED_STACK_BOUNDARY
)
4813 decl_or_type
= decl
;
4814 type
= TREE_TYPE (decl
);
4818 /* We're looking at the CALL_EXPR, we need the type of the function. */
4819 type
= CALL_EXPR_FN (exp
); /* pointer expression */
4820 type
= TREE_TYPE (type
); /* pointer type */
4821 type
= TREE_TYPE (type
); /* function type */
4822 decl_or_type
= type
;
4825 /* Check that the return value locations are the same. Like
4826 if we are returning floats on the 80387 register stack, we cannot
4827 make a sibcall from a function that doesn't return a float to a
4828 function that does or, conversely, from a function that does return
4829 a float to a function that doesn't; the necessary stack adjustment
4830 would not be executed. This is also the place we notice
4831 differences in the return value ABI. Note that it is ok for one
4832 of the functions to have void return type as long as the return
4833 value of the other is passed in a register. */
4834 a
= ix86_function_value (TREE_TYPE (exp
), decl_or_type
, false);
4835 b
= ix86_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
4837 if (STACK_REG_P (a
) || STACK_REG_P (b
))
4839 if (!rtx_equal_p (a
, b
))
4842 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
4844 /* Disable sibcall if we need to generate vzeroupper after
4846 if (TARGET_VZEROUPPER
4847 && cfun
->machine
->callee_return_avx256_p
4848 && !cfun
->machine
->caller_return_avx256_p
)
4851 else if (!rtx_equal_p (a
, b
))
4856 /* The SYSV ABI has more call-clobbered registers;
4857 disallow sibcalls from MS to SYSV. */
4858 if (cfun
->machine
->call_abi
== MS_ABI
4859 && ix86_function_type_abi (type
) == SYSV_ABI
)
4864 /* If this call is indirect, we'll need to be able to use a
4865 call-clobbered register for the address of the target function.
4866 Make sure that all such registers are not used for passing
4867 parameters. Note that DLLIMPORT functions are indirect. */
4869 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES
&& DECL_DLLIMPORT_P (decl
)))
4871 if (ix86_function_regparm (type
, NULL
) >= 3)
4873 /* ??? Need to count the actual number of registers to be used,
4874 not the possible number of registers. Fix later. */
4880 /* Otherwise okay. That also includes certain types of indirect calls. */
4884 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
4885 and "sseregparm" calling convention attributes;
4886 arguments as in struct attribute_spec.handler. */
4889 ix86_handle_cconv_attribute (tree
*node
, tree name
,
4891 int flags ATTRIBUTE_UNUSED
,
4894 if (TREE_CODE (*node
) != FUNCTION_TYPE
4895 && TREE_CODE (*node
) != METHOD_TYPE
4896 && TREE_CODE (*node
) != FIELD_DECL
4897 && TREE_CODE (*node
) != TYPE_DECL
)
4899 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
4901 *no_add_attrs
= true;
4905 /* Can combine regparm with all attributes but fastcall, and thiscall. */
4906 if (is_attribute_p ("regparm", name
))
4910 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
4912 error ("fastcall and regparm attributes are not compatible");
4915 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
4917 error ("regparam and thiscall attributes are not compatible");
4920 cst
= TREE_VALUE (args
);
4921 if (TREE_CODE (cst
) != INTEGER_CST
)
4923 warning (OPT_Wattributes
,
4924 "%qE attribute requires an integer constant argument",
4926 *no_add_attrs
= true;
4928 else if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
4930 warning (OPT_Wattributes
, "argument to %qE attribute larger than %d",
4932 *no_add_attrs
= true;
4940 /* Do not warn when emulating the MS ABI. */
4941 if ((TREE_CODE (*node
) != FUNCTION_TYPE
4942 && TREE_CODE (*node
) != METHOD_TYPE
)
4943 || ix86_function_type_abi (*node
) != MS_ABI
)
4944 warning (OPT_Wattributes
, "%qE attribute ignored",
4946 *no_add_attrs
= true;
4950 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
4951 if (is_attribute_p ("fastcall", name
))
4953 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
4955 error ("fastcall and cdecl attributes are not compatible");
4957 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
4959 error ("fastcall and stdcall attributes are not compatible");
4961 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node
)))
4963 error ("fastcall and regparm attributes are not compatible");
4965 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
4967 error ("fastcall and thiscall attributes are not compatible");
4971 /* Can combine stdcall with fastcall (redundant), regparm and
4973 else if (is_attribute_p ("stdcall", name
))
4975 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
4977 error ("stdcall and cdecl attributes are not compatible");
4979 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
4981 error ("stdcall and fastcall attributes are not compatible");
4983 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
4985 error ("stdcall and thiscall attributes are not compatible");
4989 /* Can combine cdecl with regparm and sseregparm. */
4990 else if (is_attribute_p ("cdecl", name
))
4992 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
4994 error ("stdcall and cdecl attributes are not compatible");
4996 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
4998 error ("fastcall and cdecl attributes are not compatible");
5000 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5002 error ("cdecl and thiscall attributes are not compatible");
5005 else if (is_attribute_p ("thiscall", name
))
5007 if (TREE_CODE (*node
) != METHOD_TYPE
&& pedantic
)
5008 warning (OPT_Wattributes
, "%qE attribute is used for none class-method",
5010 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
5012 error ("stdcall and thiscall attributes are not compatible");
5014 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5016 error ("fastcall and thiscall attributes are not compatible");
5018 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
5020 error ("cdecl and thiscall attributes are not compatible");
5024 /* Can combine sseregparm with all attributes. */
5029 /* The transactional memory builtins are implicitly regparm or fastcall
5030 depending on the ABI. Override the generic do-nothing attribute that
5031 these builtins were declared with, and replace it with one of the two
5032 attributes that we expect elsewhere. */
5035 ix86_handle_tm_regparm_attribute (tree
*node
, tree name ATTRIBUTE_UNUSED
,
5036 tree args ATTRIBUTE_UNUSED
,
5037 int flags ATTRIBUTE_UNUSED
,
5042 /* In no case do we want to add the placeholder attribute. */
5043 *no_add_attrs
= true;
5045 /* The 64-bit ABI is unchanged for transactional memory. */
5049 /* ??? Is there a better way to validate 32-bit windows? We have
5050 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5051 if (CHECK_STACK_LIMIT
> 0)
5052 alt
= tree_cons (get_identifier ("fastcall"), NULL
, NULL
);
5055 alt
= tree_cons (NULL
, build_int_cst (NULL
, 2), NULL
);
5056 alt
= tree_cons (get_identifier ("regparm"), alt
, NULL
);
5058 decl_attributes (node
, alt
, flags
);
5063 /* This function determines from TYPE the calling-convention. */
5066 ix86_get_callcvt (const_tree type
)
5068 unsigned int ret
= 0;
5073 return IX86_CALLCVT_CDECL
;
5075 attrs
= TYPE_ATTRIBUTES (type
);
5076 if (attrs
!= NULL_TREE
)
5078 if (lookup_attribute ("cdecl", attrs
))
5079 ret
|= IX86_CALLCVT_CDECL
;
5080 else if (lookup_attribute ("stdcall", attrs
))
5081 ret
|= IX86_CALLCVT_STDCALL
;
5082 else if (lookup_attribute ("fastcall", attrs
))
5083 ret
|= IX86_CALLCVT_FASTCALL
;
5084 else if (lookup_attribute ("thiscall", attrs
))
5085 ret
|= IX86_CALLCVT_THISCALL
;
5087 /* Regparam isn't allowed for thiscall and fastcall. */
5088 if ((ret
& (IX86_CALLCVT_THISCALL
| IX86_CALLCVT_FASTCALL
)) == 0)
5090 if (lookup_attribute ("regparm", attrs
))
5091 ret
|= IX86_CALLCVT_REGPARM
;
5092 if (lookup_attribute ("sseregparm", attrs
))
5093 ret
|= IX86_CALLCVT_SSEREGPARM
;
5096 if (IX86_BASE_CALLCVT(ret
) != 0)
5100 is_stdarg
= stdarg_p (type
);
5101 if (TARGET_RTD
&& !is_stdarg
)
5102 return IX86_CALLCVT_STDCALL
| ret
;
5106 || TREE_CODE (type
) != METHOD_TYPE
5107 || ix86_function_type_abi (type
) != MS_ABI
)
5108 return IX86_CALLCVT_CDECL
| ret
;
5110 return IX86_CALLCVT_THISCALL
;
5113 /* Return 0 if the attributes for two types are incompatible, 1 if they
5114 are compatible, and 2 if they are nearly compatible (which causes a
5115 warning to be generated). */
5118 ix86_comp_type_attributes (const_tree type1
, const_tree type2
)
5120 unsigned int ccvt1
, ccvt2
;
5122 if (TREE_CODE (type1
) != FUNCTION_TYPE
5123 && TREE_CODE (type1
) != METHOD_TYPE
)
5126 ccvt1
= ix86_get_callcvt (type1
);
5127 ccvt2
= ix86_get_callcvt (type2
);
5130 if (ix86_function_regparm (type1
, NULL
)
5131 != ix86_function_regparm (type2
, NULL
))
5137 /* Return the regparm value for a function with the indicated TYPE and DECL.
5138 DECL may be NULL when calling function indirectly
5139 or considering a libcall. */
5142 ix86_function_regparm (const_tree type
, const_tree decl
)
5149 return (ix86_function_type_abi (type
) == SYSV_ABI
5150 ? X86_64_REGPARM_MAX
: X86_64_MS_REGPARM_MAX
);
5151 ccvt
= ix86_get_callcvt (type
);
5152 regparm
= ix86_regparm
;
5154 if ((ccvt
& IX86_CALLCVT_REGPARM
) != 0)
5156 attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (type
));
5159 regparm
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
5163 else if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
5165 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
5168 /* Use register calling convention for local functions when possible. */
5170 && TREE_CODE (decl
) == FUNCTION_DECL
5172 && !(profile_flag
&& !flag_fentry
))
5174 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5175 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE (decl
));
5176 if (i
&& i
->local
&& i
->can_change_signature
)
5178 int local_regparm
, globals
= 0, regno
;
5180 /* Make sure no regparm register is taken by a
5181 fixed register variable. */
5182 for (local_regparm
= 0; local_regparm
< REGPARM_MAX
; local_regparm
++)
5183 if (fixed_regs
[local_regparm
])
5186 /* We don't want to use regparm(3) for nested functions as
5187 these use a static chain pointer in the third argument. */
5188 if (local_regparm
== 3 && DECL_STATIC_CHAIN (decl
))
5191 /* In 32-bit mode save a register for the split stack. */
5192 if (!TARGET_64BIT
&& local_regparm
== 3 && flag_split_stack
)
5195 /* Each fixed register usage increases register pressure,
5196 so less registers should be used for argument passing.
5197 This functionality can be overriden by an explicit
5199 for (regno
= 0; regno
<= DI_REG
; regno
++)
5200 if (fixed_regs
[regno
])
5204 = globals
< local_regparm
? local_regparm
- globals
: 0;
5206 if (local_regparm
> regparm
)
5207 regparm
= local_regparm
;
5214 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5215 DFmode (2) arguments in SSE registers for a function with the
5216 indicated TYPE and DECL. DECL may be NULL when calling function
5217 indirectly or considering a libcall. Otherwise return 0. */
5220 ix86_function_sseregparm (const_tree type
, const_tree decl
, bool warn
)
5222 gcc_assert (!TARGET_64BIT
);
5224 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5225 by the sseregparm attribute. */
5226 if (TARGET_SSEREGPARM
5227 || (type
&& lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type
))))
5234 error ("calling %qD with attribute sseregparm without "
5235 "SSE/SSE2 enabled", decl
);
5237 error ("calling %qT with attribute sseregparm without "
5238 "SSE/SSE2 enabled", type
);
5246 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5247 (and DFmode for SSE2) arguments in SSE registers. */
5248 if (decl
&& TARGET_SSE_MATH
&& optimize
5249 && !(profile_flag
&& !flag_fentry
))
5251 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5252 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE(decl
));
5253 if (i
&& i
->local
&& i
->can_change_signature
)
5254 return TARGET_SSE2
? 2 : 1;
5260 /* Return true if EAX is live at the start of the function. Used by
5261 ix86_expand_prologue to determine if we need special help before
5262 calling allocate_stack_worker. */
5265 ix86_eax_live_at_start_p (void)
5267 /* Cheat. Don't bother working forward from ix86_function_regparm
5268 to the function type to whether an actual argument is located in
5269 eax. Instead just look at cfg info, which is still close enough
5270 to correct at this point. This gives false positives for broken
5271 functions that might use uninitialized data that happens to be
5272 allocated in eax, but who cares? */
5273 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR
), 0);
5277 ix86_keep_aggregate_return_pointer (tree fntype
)
5283 attr
= lookup_attribute ("callee_pop_aggregate_return",
5284 TYPE_ATTRIBUTES (fntype
));
5286 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
))) == 0);
5288 /* For 32-bit MS-ABI the default is to keep aggregate
5290 if (ix86_function_type_abi (fntype
) == MS_ABI
)
5293 return KEEP_AGGREGATE_RETURN_POINTER
!= 0;
5296 /* Value is the number of bytes of arguments automatically
5297 popped when returning from a subroutine call.
5298 FUNDECL is the declaration node of the function (as a tree),
5299 FUNTYPE is the data type of the function (as a tree),
5300 or for a library call it is an identifier node for the subroutine name.
5301 SIZE is the number of bytes of arguments passed on the stack.
5303 On the 80386, the RTD insn may be used to pop them if the number
5304 of args is fixed, but if the number is variable then the caller
5305 must pop them all. RTD can't be used for library calls now
5306 because the library is compiled with the Unix compiler.
5307 Use of RTD is a selectable option, since it is incompatible with
5308 standard Unix calling sequences. If the option is not selected,
5309 the caller must always pop the args.
5311 The attribute stdcall is equivalent to RTD on a per module basis. */
5314 ix86_return_pops_args (tree fundecl
, tree funtype
, int size
)
5318 /* None of the 64-bit ABIs pop arguments. */
5322 ccvt
= ix86_get_callcvt (funtype
);
5324 if ((ccvt
& (IX86_CALLCVT_STDCALL
| IX86_CALLCVT_FASTCALL
5325 | IX86_CALLCVT_THISCALL
)) != 0
5326 && ! stdarg_p (funtype
))
5329 /* Lose any fake structure return argument if it is passed on the stack. */
5330 if (aggregate_value_p (TREE_TYPE (funtype
), fundecl
)
5331 && !ix86_keep_aggregate_return_pointer (funtype
))
5333 int nregs
= ix86_function_regparm (funtype
, fundecl
);
5335 return GET_MODE_SIZE (Pmode
);
5341 /* Argument support functions. */
5343 /* Return true when register may be used to pass function parameters. */
5345 ix86_function_arg_regno_p (int regno
)
5348 const int *parm_regs
;
5353 return (regno
< REGPARM_MAX
5354 || (TARGET_SSE
&& SSE_REGNO_P (regno
) && !fixed_regs
[regno
]));
5356 return (regno
< REGPARM_MAX
5357 || (TARGET_MMX
&& MMX_REGNO_P (regno
)
5358 && (regno
< FIRST_MMX_REG
+ MMX_REGPARM_MAX
))
5359 || (TARGET_SSE
&& SSE_REGNO_P (regno
)
5360 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
)));
5365 if (SSE_REGNO_P (regno
) && TARGET_SSE
)
5370 if (TARGET_SSE
&& SSE_REGNO_P (regno
)
5371 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
))
5375 /* TODO: The function should depend on current function ABI but
5376 builtins.c would need updating then. Therefore we use the
5379 /* RAX is used as hidden argument to va_arg functions. */
5380 if (ix86_abi
== SYSV_ABI
&& regno
== AX_REG
)
5383 if (ix86_abi
== MS_ABI
)
5384 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
5386 parm_regs
= x86_64_int_parameter_registers
;
5387 for (i
= 0; i
< (ix86_abi
== MS_ABI
5388 ? X86_64_MS_REGPARM_MAX
: X86_64_REGPARM_MAX
); i
++)
5389 if (regno
== parm_regs
[i
])
5394 /* Return if we do not know how to pass TYPE solely in registers. */
5397 ix86_must_pass_in_stack (enum machine_mode mode
, const_tree type
)
5399 if (must_pass_in_stack_var_size_or_pad (mode
, type
))
5402 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5403 The layout_type routine is crafty and tries to trick us into passing
5404 currently unsupported vector types on the stack by using TImode. */
5405 return (!TARGET_64BIT
&& mode
== TImode
5406 && type
&& TREE_CODE (type
) != VECTOR_TYPE
);
5409 /* It returns the size, in bytes, of the area reserved for arguments passed
5410 in registers for the function represented by fndecl dependent to the used
5413 ix86_reg_parm_stack_space (const_tree fndecl
)
5415 enum calling_abi call_abi
= SYSV_ABI
;
5416 if (fndecl
!= NULL_TREE
&& TREE_CODE (fndecl
) == FUNCTION_DECL
)
5417 call_abi
= ix86_function_abi (fndecl
);
5419 call_abi
= ix86_function_type_abi (fndecl
);
5420 if (TARGET_64BIT
&& call_abi
== MS_ABI
)
5425 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5428 ix86_function_type_abi (const_tree fntype
)
5430 if (fntype
!= NULL_TREE
&& TYPE_ATTRIBUTES (fntype
) != NULL_TREE
)
5432 enum calling_abi abi
= ix86_abi
;
5433 if (abi
== SYSV_ABI
)
5435 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype
)))
5438 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype
)))
5446 ix86_function_ms_hook_prologue (const_tree fn
)
5448 if (fn
&& lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn
)))
5450 if (decl_function_context (fn
) != NULL_TREE
)
5451 error_at (DECL_SOURCE_LOCATION (fn
),
5452 "ms_hook_prologue is not compatible with nested function");
5459 static enum calling_abi
5460 ix86_function_abi (const_tree fndecl
)
5464 return ix86_function_type_abi (TREE_TYPE (fndecl
));
5467 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
5470 ix86_cfun_abi (void)
5474 return cfun
->machine
->call_abi
;
5477 /* Write the extra assembler code needed to declare a function properly. */
5480 ix86_asm_output_function_label (FILE *asm_out_file
, const char *fname
,
5483 bool is_ms_hook
= ix86_function_ms_hook_prologue (decl
);
5487 int i
, filler_count
= (TARGET_64BIT
? 32 : 16);
5488 unsigned int filler_cc
= 0xcccccccc;
5490 for (i
= 0; i
< filler_count
; i
+= 4)
5491 fprintf (asm_out_file
, ASM_LONG
" %#x\n", filler_cc
);
5494 #ifdef SUBTARGET_ASM_UNWIND_INIT
5495 SUBTARGET_ASM_UNWIND_INIT (asm_out_file
);
5498 ASM_OUTPUT_LABEL (asm_out_file
, fname
);
5500 /* Output magic byte marker, if hot-patch attribute is set. */
5505 /* leaq [%rsp + 0], %rsp */
5506 asm_fprintf (asm_out_file
, ASM_BYTE
5507 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
5511 /* movl.s %edi, %edi
5513 movl.s %esp, %ebp */
5514 asm_fprintf (asm_out_file
, ASM_BYTE
5515 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
5521 extern void init_regs (void);
5523 /* Implementation of call abi switching target hook. Specific to FNDECL
5524 the specific call register sets are set. See also
5525 ix86_conditional_register_usage for more details. */
5527 ix86_call_abi_override (const_tree fndecl
)
5529 if (fndecl
== NULL_TREE
)
5530 cfun
->machine
->call_abi
= ix86_abi
;
5532 cfun
->machine
->call_abi
= ix86_function_type_abi (TREE_TYPE (fndecl
));
5535 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
5536 expensive re-initialization of init_regs each time we switch function context
5537 since this is needed only during RTL expansion. */
5539 ix86_maybe_switch_abi (void)
5542 call_used_regs
[SI_REG
] == (cfun
->machine
->call_abi
== MS_ABI
))
5546 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5547 for a call to a function whose data type is FNTYPE.
5548 For a library call, FNTYPE is 0. */
5551 init_cumulative_args (CUMULATIVE_ARGS
*cum
, /* Argument info to initialize */
5552 tree fntype
, /* tree ptr for function decl */
5553 rtx libname
, /* SYMBOL_REF of library name or 0 */
5557 struct cgraph_local_info
*i
;
5560 memset (cum
, 0, sizeof (*cum
));
5562 /* Initialize for the current callee. */
5565 cfun
->machine
->callee_pass_avx256_p
= false;
5566 cfun
->machine
->callee_return_avx256_p
= false;
5571 i
= cgraph_local_info (fndecl
);
5572 cum
->call_abi
= ix86_function_abi (fndecl
);
5573 fnret_type
= TREE_TYPE (TREE_TYPE (fndecl
));
5578 cum
->call_abi
= ix86_function_type_abi (fntype
);
5580 fnret_type
= TREE_TYPE (fntype
);
5585 if (TARGET_VZEROUPPER
&& fnret_type
)
5587 rtx fnret_value
= ix86_function_value (fnret_type
, fntype
,
5589 if (function_pass_avx256_p (fnret_value
))
5591 /* The return value of this function uses 256bit AVX modes. */
5593 cfun
->machine
->callee_return_avx256_p
= true;
5595 cfun
->machine
->caller_return_avx256_p
= true;
5599 cum
->caller
= caller
;
5601 /* Set up the number of registers to use for passing arguments. */
5603 if (TARGET_64BIT
&& cum
->call_abi
== MS_ABI
&& !ACCUMULATE_OUTGOING_ARGS
)
5604 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
5605 "or subtarget optimization implying it");
5606 cum
->nregs
= ix86_regparm
;
5609 cum
->nregs
= (cum
->call_abi
== SYSV_ABI
5610 ? X86_64_REGPARM_MAX
5611 : X86_64_MS_REGPARM_MAX
);
5615 cum
->sse_nregs
= SSE_REGPARM_MAX
;
5618 cum
->sse_nregs
= (cum
->call_abi
== SYSV_ABI
5619 ? X86_64_SSE_REGPARM_MAX
5620 : X86_64_MS_SSE_REGPARM_MAX
);
5624 cum
->mmx_nregs
= MMX_REGPARM_MAX
;
5625 cum
->warn_avx
= true;
5626 cum
->warn_sse
= true;
5627 cum
->warn_mmx
= true;
5629 /* Because type might mismatch in between caller and callee, we need to
5630 use actual type of function for local calls.
5631 FIXME: cgraph_analyze can be told to actually record if function uses
5632 va_start so for local functions maybe_vaarg can be made aggressive
5634 FIXME: once typesytem is fixed, we won't need this code anymore. */
5635 if (i
&& i
->local
&& i
->can_change_signature
)
5636 fntype
= TREE_TYPE (fndecl
);
5637 cum
->maybe_vaarg
= (fntype
5638 ? (!prototype_p (fntype
) || stdarg_p (fntype
))
5643 /* If there are variable arguments, then we won't pass anything
5644 in registers in 32-bit mode. */
5645 if (stdarg_p (fntype
))
5656 /* Use ecx and edx registers if function has fastcall attribute,
5657 else look for regparm information. */
5660 unsigned int ccvt
= ix86_get_callcvt (fntype
);
5661 if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
5664 cum
->fastcall
= 1; /* Same first register as in fastcall. */
5666 else if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
5672 cum
->nregs
= ix86_function_regparm (fntype
, fndecl
);
5675 /* Set up the number of SSE registers used for passing SFmode
5676 and DFmode arguments. Warn for mismatching ABI. */
5677 cum
->float_in_sse
= ix86_function_sseregparm (fntype
, fndecl
, true);
5681 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
5682 But in the case of vector types, it is some vector mode.
5684 When we have only some of our vector isa extensions enabled, then there
5685 are some modes for which vector_mode_supported_p is false. For these
5686 modes, the generic vector support in gcc will choose some non-vector mode
5687 in order to implement the type. By computing the natural mode, we'll
5688 select the proper ABI location for the operand and not depend on whatever
5689 the middle-end decides to do with these vector types.
5691 The midde-end can't deal with the vector types > 16 bytes. In this
5692 case, we return the original mode and warn ABI change if CUM isn't
5695 static enum machine_mode
5696 type_natural_mode (const_tree type
, const CUMULATIVE_ARGS
*cum
)
5698 enum machine_mode mode
= TYPE_MODE (type
);
5700 if (TREE_CODE (type
) == VECTOR_TYPE
&& !VECTOR_MODE_P (mode
))
5702 HOST_WIDE_INT size
= int_size_in_bytes (type
);
5703 if ((size
== 8 || size
== 16 || size
== 32)
5704 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
5705 && TYPE_VECTOR_SUBPARTS (type
) > 1)
5707 enum machine_mode innermode
= TYPE_MODE (TREE_TYPE (type
));
5709 if (TREE_CODE (TREE_TYPE (type
)) == REAL_TYPE
)
5710 mode
= MIN_MODE_VECTOR_FLOAT
;
5712 mode
= MIN_MODE_VECTOR_INT
;
5714 /* Get the mode which has this inner mode and number of units. */
5715 for (; mode
!= VOIDmode
; mode
= GET_MODE_WIDER_MODE (mode
))
5716 if (GET_MODE_NUNITS (mode
) == TYPE_VECTOR_SUBPARTS (type
)
5717 && GET_MODE_INNER (mode
) == innermode
)
5719 if (size
== 32 && !TARGET_AVX
)
5721 static bool warnedavx
;
5728 warning (0, "AVX vector argument without AVX "
5729 "enabled changes the ABI");
5731 return TYPE_MODE (type
);
5744 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
5745 this may not agree with the mode that the type system has chosen for the
5746 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
5747 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
5750 gen_reg_or_parallel (enum machine_mode mode
, enum machine_mode orig_mode
,
5755 if (orig_mode
!= BLKmode
)
5756 tmp
= gen_rtx_REG (orig_mode
, regno
);
5759 tmp
= gen_rtx_REG (mode
, regno
);
5760 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
, const0_rtx
);
5761 tmp
= gen_rtx_PARALLEL (orig_mode
, gen_rtvec (1, tmp
));
5767 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
5768 of this code is to classify each 8bytes of incoming argument by the register
5769 class and assign registers accordingly. */
5771 /* Return the union class of CLASS1 and CLASS2.
5772 See the x86-64 PS ABI for details. */
5774 static enum x86_64_reg_class
5775 merge_classes (enum x86_64_reg_class class1
, enum x86_64_reg_class class2
)
5777 /* Rule #1: If both classes are equal, this is the resulting class. */
5778 if (class1
== class2
)
5781 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
5783 if (class1
== X86_64_NO_CLASS
)
5785 if (class2
== X86_64_NO_CLASS
)
5788 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
5789 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
5790 return X86_64_MEMORY_CLASS
;
5792 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
5793 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
5794 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
5795 return X86_64_INTEGERSI_CLASS
;
5796 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
5797 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
5798 return X86_64_INTEGER_CLASS
;
5800 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
5802 if (class1
== X86_64_X87_CLASS
5803 || class1
== X86_64_X87UP_CLASS
5804 || class1
== X86_64_COMPLEX_X87_CLASS
5805 || class2
== X86_64_X87_CLASS
5806 || class2
== X86_64_X87UP_CLASS
5807 || class2
== X86_64_COMPLEX_X87_CLASS
)
5808 return X86_64_MEMORY_CLASS
;
5810 /* Rule #6: Otherwise class SSE is used. */
5811 return X86_64_SSE_CLASS
;
5814 /* Classify the argument of type TYPE and mode MODE.
5815 CLASSES will be filled by the register class used to pass each word
5816 of the operand. The number of words is returned. In case the parameter
5817 should be passed in memory, 0 is returned. As a special case for zero
5818 sized containers, classes[0] will be NO_CLASS and 1 is returned.
5820 BIT_OFFSET is used internally for handling records and specifies offset
5821 of the offset in bits modulo 256 to avoid overflow cases.
5823 See the x86-64 PS ABI for details.
5827 classify_argument (enum machine_mode mode
, const_tree type
,
5828 enum x86_64_reg_class classes
[MAX_CLASSES
], int bit_offset
)
5830 HOST_WIDE_INT bytes
=
5831 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
5833 = (bytes
+ (bit_offset
% 64) / 8 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
5835 /* Variable sized entities are always passed/returned in memory. */
5839 if (mode
!= VOIDmode
5840 && targetm
.calls
.must_pass_in_stack (mode
, type
))
5843 if (type
&& AGGREGATE_TYPE_P (type
))
5847 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
5849 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
5853 for (i
= 0; i
< words
; i
++)
5854 classes
[i
] = X86_64_NO_CLASS
;
5856 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
5857 signalize memory class, so handle it as special case. */
5860 classes
[0] = X86_64_NO_CLASS
;
5864 /* Classify each field of record and merge classes. */
5865 switch (TREE_CODE (type
))
5868 /* And now merge the fields of structure. */
5869 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5871 if (TREE_CODE (field
) == FIELD_DECL
)
5875 if (TREE_TYPE (field
) == error_mark_node
)
5878 /* Bitfields are always classified as integer. Handle them
5879 early, since later code would consider them to be
5880 misaligned integers. */
5881 if (DECL_BIT_FIELD (field
))
5883 for (i
= (int_bit_position (field
)
5884 + (bit_offset
% 64)) / 8 / 8;
5885 i
< ((int_bit_position (field
) + (bit_offset
% 64))
5886 + tree_low_cst (DECL_SIZE (field
), 0)
5889 merge_classes (X86_64_INTEGER_CLASS
,
5896 type
= TREE_TYPE (field
);
5898 /* Flexible array member is ignored. */
5899 if (TYPE_MODE (type
) == BLKmode
5900 && TREE_CODE (type
) == ARRAY_TYPE
5901 && TYPE_SIZE (type
) == NULL_TREE
5902 && TYPE_DOMAIN (type
) != NULL_TREE
5903 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type
))
5908 if (!warned
&& warn_psabi
)
5911 inform (input_location
,
5912 "the ABI of passing struct with"
5913 " a flexible array member has"
5914 " changed in GCC 4.4");
5918 num
= classify_argument (TYPE_MODE (type
), type
,
5920 (int_bit_position (field
)
5921 + bit_offset
) % 256);
5924 pos
= (int_bit_position (field
)
5925 + (bit_offset
% 64)) / 8 / 8;
5926 for (i
= 0; i
< num
&& (i
+ pos
) < words
; i
++)
5928 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
5935 /* Arrays are handled as small records. */
5938 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
5939 TREE_TYPE (type
), subclasses
, bit_offset
);
5943 /* The partial classes are now full classes. */
5944 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
5945 subclasses
[0] = X86_64_SSE_CLASS
;
5946 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
5947 && !((bit_offset
% 64) == 0 && bytes
== 4))
5948 subclasses
[0] = X86_64_INTEGER_CLASS
;
5950 for (i
= 0; i
< words
; i
++)
5951 classes
[i
] = subclasses
[i
% num
];
5956 case QUAL_UNION_TYPE
:
5957 /* Unions are similar to RECORD_TYPE but offset is always 0.
5959 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5961 if (TREE_CODE (field
) == FIELD_DECL
)
5965 if (TREE_TYPE (field
) == error_mark_node
)
5968 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
5969 TREE_TYPE (field
), subclasses
,
5973 for (i
= 0; i
< num
; i
++)
5974 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
5985 /* When size > 16 bytes, if the first one isn't
5986 X86_64_SSE_CLASS or any other ones aren't
5987 X86_64_SSEUP_CLASS, everything should be passed in
5989 if (classes
[0] != X86_64_SSE_CLASS
)
5992 for (i
= 1; i
< words
; i
++)
5993 if (classes
[i
] != X86_64_SSEUP_CLASS
)
5997 /* Final merger cleanup. */
5998 for (i
= 0; i
< words
; i
++)
6000 /* If one class is MEMORY, everything should be passed in
6002 if (classes
[i
] == X86_64_MEMORY_CLASS
)
6005 /* The X86_64_SSEUP_CLASS should be always preceded by
6006 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6007 if (classes
[i
] == X86_64_SSEUP_CLASS
6008 && classes
[i
- 1] != X86_64_SSE_CLASS
6009 && classes
[i
- 1] != X86_64_SSEUP_CLASS
)
6011 /* The first one should never be X86_64_SSEUP_CLASS. */
6012 gcc_assert (i
!= 0);
6013 classes
[i
] = X86_64_SSE_CLASS
;
6016 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6017 everything should be passed in memory. */
6018 if (classes
[i
] == X86_64_X87UP_CLASS
6019 && (classes
[i
- 1] != X86_64_X87_CLASS
))
6023 /* The first one should never be X86_64_X87UP_CLASS. */
6024 gcc_assert (i
!= 0);
6025 if (!warned
&& warn_psabi
)
6028 inform (input_location
,
6029 "the ABI of passing union with long double"
6030 " has changed in GCC 4.4");
6038 /* Compute alignment needed. We align all types to natural boundaries with
6039 exception of XFmode that is aligned to 64bits. */
6040 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
6042 int mode_alignment
= GET_MODE_BITSIZE (mode
);
6045 mode_alignment
= 128;
6046 else if (mode
== XCmode
)
6047 mode_alignment
= 256;
6048 if (COMPLEX_MODE_P (mode
))
6049 mode_alignment
/= 2;
6050 /* Misaligned fields are always returned in memory. */
6051 if (bit_offset
% mode_alignment
)
6055 /* for V1xx modes, just use the base mode */
6056 if (VECTOR_MODE_P (mode
) && mode
!= V1DImode
&& mode
!= V1TImode
6057 && GET_MODE_SIZE (GET_MODE_INNER (mode
)) == bytes
)
6058 mode
= GET_MODE_INNER (mode
);
6060 /* Classification of atomic types. */
6065 classes
[0] = X86_64_SSE_CLASS
;
6068 classes
[0] = X86_64_SSE_CLASS
;
6069 classes
[1] = X86_64_SSEUP_CLASS
;
6079 int size
= (bit_offset
% 64)+ (int) GET_MODE_BITSIZE (mode
);
6083 classes
[0] = X86_64_INTEGERSI_CLASS
;
6086 else if (size
<= 64)
6088 classes
[0] = X86_64_INTEGER_CLASS
;
6091 else if (size
<= 64+32)
6093 classes
[0] = X86_64_INTEGER_CLASS
;
6094 classes
[1] = X86_64_INTEGERSI_CLASS
;
6097 else if (size
<= 64+64)
6099 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
6107 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
6111 /* OImode shouldn't be used directly. */
6116 if (!(bit_offset
% 64))
6117 classes
[0] = X86_64_SSESF_CLASS
;
6119 classes
[0] = X86_64_SSE_CLASS
;
6122 classes
[0] = X86_64_SSEDF_CLASS
;
6125 classes
[0] = X86_64_X87_CLASS
;
6126 classes
[1] = X86_64_X87UP_CLASS
;
6129 classes
[0] = X86_64_SSE_CLASS
;
6130 classes
[1] = X86_64_SSEUP_CLASS
;
6133 classes
[0] = X86_64_SSE_CLASS
;
6134 if (!(bit_offset
% 64))
6140 if (!warned
&& warn_psabi
)
6143 inform (input_location
,
6144 "the ABI of passing structure with complex float"
6145 " member has changed in GCC 4.4");
6147 classes
[1] = X86_64_SSESF_CLASS
;
6151 classes
[0] = X86_64_SSEDF_CLASS
;
6152 classes
[1] = X86_64_SSEDF_CLASS
;
6155 classes
[0] = X86_64_COMPLEX_X87_CLASS
;
6158 /* This modes is larger than 16 bytes. */
6166 classes
[0] = X86_64_SSE_CLASS
;
6167 classes
[1] = X86_64_SSEUP_CLASS
;
6168 classes
[2] = X86_64_SSEUP_CLASS
;
6169 classes
[3] = X86_64_SSEUP_CLASS
;
6177 classes
[0] = X86_64_SSE_CLASS
;
6178 classes
[1] = X86_64_SSEUP_CLASS
;
6186 classes
[0] = X86_64_SSE_CLASS
;
6192 gcc_assert (VECTOR_MODE_P (mode
));
6197 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode
)) == MODE_INT
);
6199 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
6200 classes
[0] = X86_64_INTEGERSI_CLASS
;
6202 classes
[0] = X86_64_INTEGER_CLASS
;
6203 classes
[1] = X86_64_INTEGER_CLASS
;
6204 return 1 + (bytes
> 8);
6208 /* Examine the argument and return set number of register required in each
6209 class. Return 0 iff parameter should be passed in memory. */
6211 examine_argument (enum machine_mode mode
, const_tree type
, int in_return
,
6212 int *int_nregs
, int *sse_nregs
)
6214 enum x86_64_reg_class regclass
[MAX_CLASSES
];
6215 int n
= classify_argument (mode
, type
, regclass
, 0);
6221 for (n
--; n
>= 0; n
--)
6222 switch (regclass
[n
])
6224 case X86_64_INTEGER_CLASS
:
6225 case X86_64_INTEGERSI_CLASS
:
6228 case X86_64_SSE_CLASS
:
6229 case X86_64_SSESF_CLASS
:
6230 case X86_64_SSEDF_CLASS
:
6233 case X86_64_NO_CLASS
:
6234 case X86_64_SSEUP_CLASS
:
6236 case X86_64_X87_CLASS
:
6237 case X86_64_X87UP_CLASS
:
6241 case X86_64_COMPLEX_X87_CLASS
:
6242 return in_return
? 2 : 0;
6243 case X86_64_MEMORY_CLASS
:
6249 /* Construct container for the argument used by GCC interface. See
6250 FUNCTION_ARG for the detailed description. */
6253 construct_container (enum machine_mode mode
, enum machine_mode orig_mode
,
6254 const_tree type
, int in_return
, int nintregs
, int nsseregs
,
6255 const int *intreg
, int sse_regno
)
6257 /* The following variables hold the static issued_error state. */
6258 static bool issued_sse_arg_error
;
6259 static bool issued_sse_ret_error
;
6260 static bool issued_x87_ret_error
;
6262 enum machine_mode tmpmode
;
6264 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
6265 enum x86_64_reg_class regclass
[MAX_CLASSES
];
6269 int needed_sseregs
, needed_intregs
;
6270 rtx exp
[MAX_CLASSES
];
6273 n
= classify_argument (mode
, type
, regclass
, 0);
6276 if (!examine_argument (mode
, type
, in_return
, &needed_intregs
,
6279 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
6282 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6283 some less clueful developer tries to use floating-point anyway. */
6284 if (needed_sseregs
&& !TARGET_SSE
)
6288 if (!issued_sse_ret_error
)
6290 error ("SSE register return with SSE disabled");
6291 issued_sse_ret_error
= true;
6294 else if (!issued_sse_arg_error
)
6296 error ("SSE register argument with SSE disabled");
6297 issued_sse_arg_error
= true;
6302 /* Likewise, error if the ABI requires us to return values in the
6303 x87 registers and the user specified -mno-80387. */
6304 if (!TARGET_80387
&& in_return
)
6305 for (i
= 0; i
< n
; i
++)
6306 if (regclass
[i
] == X86_64_X87_CLASS
6307 || regclass
[i
] == X86_64_X87UP_CLASS
6308 || regclass
[i
] == X86_64_COMPLEX_X87_CLASS
)
6310 if (!issued_x87_ret_error
)
6312 error ("x87 register return with x87 disabled");
6313 issued_x87_ret_error
= true;
6318 /* First construct simple cases. Avoid SCmode, since we want to use
6319 single register to pass this type. */
6320 if (n
== 1 && mode
!= SCmode
)
6321 switch (regclass
[0])
6323 case X86_64_INTEGER_CLASS
:
6324 case X86_64_INTEGERSI_CLASS
:
6325 return gen_rtx_REG (mode
, intreg
[0]);
6326 case X86_64_SSE_CLASS
:
6327 case X86_64_SSESF_CLASS
:
6328 case X86_64_SSEDF_CLASS
:
6329 if (mode
!= BLKmode
)
6330 return gen_reg_or_parallel (mode
, orig_mode
,
6331 SSE_REGNO (sse_regno
));
6333 case X86_64_X87_CLASS
:
6334 case X86_64_COMPLEX_X87_CLASS
:
6335 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
6336 case X86_64_NO_CLASS
:
6337 /* Zero sized array, struct or class. */
6343 && regclass
[0] == X86_64_SSE_CLASS
6344 && regclass
[1] == X86_64_SSEUP_CLASS
6346 return gen_reg_or_parallel (mode
, orig_mode
,
6347 SSE_REGNO (sse_regno
));
6349 && regclass
[0] == X86_64_SSE_CLASS
6350 && regclass
[1] == X86_64_SSEUP_CLASS
6351 && regclass
[2] == X86_64_SSEUP_CLASS
6352 && regclass
[3] == X86_64_SSEUP_CLASS
6354 return gen_reg_or_parallel (mode
, orig_mode
,
6355 SSE_REGNO (sse_regno
));
6357 && regclass
[0] == X86_64_X87_CLASS
6358 && regclass
[1] == X86_64_X87UP_CLASS
)
6359 return gen_rtx_REG (XFmode
, FIRST_STACK_REG
);
6362 && regclass
[0] == X86_64_INTEGER_CLASS
6363 && regclass
[1] == X86_64_INTEGER_CLASS
6364 && (mode
== CDImode
|| mode
== TImode
|| mode
== TFmode
)
6365 && intreg
[0] + 1 == intreg
[1])
6366 return gen_rtx_REG (mode
, intreg
[0]);
6368 /* Otherwise figure out the entries of the PARALLEL. */
6369 for (i
= 0; i
< n
; i
++)
6373 switch (regclass
[i
])
6375 case X86_64_NO_CLASS
:
6377 case X86_64_INTEGER_CLASS
:
6378 case X86_64_INTEGERSI_CLASS
:
6379 /* Merge TImodes on aligned occasions here too. */
6380 if (i
* 8 + 8 > bytes
)
6382 = mode_for_size ((bytes
- i
* 8) * BITS_PER_UNIT
, MODE_INT
, 0);
6383 else if (regclass
[i
] == X86_64_INTEGERSI_CLASS
)
6387 /* We've requested 24 bytes we
6388 don't have mode for. Use DImode. */
6389 if (tmpmode
== BLKmode
)
6392 = gen_rtx_EXPR_LIST (VOIDmode
,
6393 gen_rtx_REG (tmpmode
, *intreg
),
6397 case X86_64_SSESF_CLASS
:
6399 = gen_rtx_EXPR_LIST (VOIDmode
,
6400 gen_rtx_REG (SFmode
,
6401 SSE_REGNO (sse_regno
)),
6405 case X86_64_SSEDF_CLASS
:
6407 = gen_rtx_EXPR_LIST (VOIDmode
,
6408 gen_rtx_REG (DFmode
,
6409 SSE_REGNO (sse_regno
)),
6413 case X86_64_SSE_CLASS
:
6421 if (i
== 0 && regclass
[1] == X86_64_SSEUP_CLASS
)
6431 && regclass
[1] == X86_64_SSEUP_CLASS
6432 && regclass
[2] == X86_64_SSEUP_CLASS
6433 && regclass
[3] == X86_64_SSEUP_CLASS
);
6441 = gen_rtx_EXPR_LIST (VOIDmode
,
6442 gen_rtx_REG (tmpmode
,
6443 SSE_REGNO (sse_regno
)),
6452 /* Empty aligned struct, union or class. */
6456 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
6457 for (i
= 0; i
< nexps
; i
++)
6458 XVECEXP (ret
, 0, i
) = exp
[i
];
6462 /* Update the data in CUM to advance over an argument of mode MODE
6463 and data type TYPE. (TYPE is null for libcalls where that information
6464 may not be available.) */
6467 function_arg_advance_32 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6468 const_tree type
, HOST_WIDE_INT bytes
,
6469 HOST_WIDE_INT words
)
6485 cum
->words
+= words
;
6486 cum
->nregs
-= words
;
6487 cum
->regno
+= words
;
6489 if (cum
->nregs
<= 0)
6497 /* OImode shouldn't be used directly. */
6501 if (cum
->float_in_sse
< 2)
6504 if (cum
->float_in_sse
< 1)
6521 if (!type
|| !AGGREGATE_TYPE_P (type
))
6523 cum
->sse_words
+= words
;
6524 cum
->sse_nregs
-= 1;
6525 cum
->sse_regno
+= 1;
6526 if (cum
->sse_nregs
<= 0)
6540 if (!type
|| !AGGREGATE_TYPE_P (type
))
6542 cum
->mmx_words
+= words
;
6543 cum
->mmx_nregs
-= 1;
6544 cum
->mmx_regno
+= 1;
6545 if (cum
->mmx_nregs
<= 0)
6556 function_arg_advance_64 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6557 const_tree type
, HOST_WIDE_INT words
, bool named
)
6559 int int_nregs
, sse_nregs
;
6561 /* Unnamed 256bit vector mode parameters are passed on stack. */
6562 if (!named
&& VALID_AVX256_REG_MODE (mode
))
6565 if (examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
)
6566 && sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
6568 cum
->nregs
-= int_nregs
;
6569 cum
->sse_nregs
-= sse_nregs
;
6570 cum
->regno
+= int_nregs
;
6571 cum
->sse_regno
+= sse_nregs
;
6575 int align
= ix86_function_arg_boundary (mode
, type
) / BITS_PER_WORD
;
6576 cum
->words
= (cum
->words
+ align
- 1) & ~(align
- 1);
6577 cum
->words
+= words
;
6582 function_arg_advance_ms_64 (CUMULATIVE_ARGS
*cum
, HOST_WIDE_INT bytes
,
6583 HOST_WIDE_INT words
)
6585 /* Otherwise, this should be passed indirect. */
6586 gcc_assert (bytes
== 1 || bytes
== 2 || bytes
== 4 || bytes
== 8);
6588 cum
->words
+= words
;
6596 /* Update the data in CUM to advance over an argument of mode MODE and
6597 data type TYPE. (TYPE is null for libcalls where that information
6598 may not be available.) */
6601 ix86_function_arg_advance (cumulative_args_t cum_v
, enum machine_mode mode
,
6602 const_tree type
, bool named
)
6604 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
6605 HOST_WIDE_INT bytes
, words
;
6607 if (mode
== BLKmode
)
6608 bytes
= int_size_in_bytes (type
);
6610 bytes
= GET_MODE_SIZE (mode
);
6611 words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
6614 mode
= type_natural_mode (type
, NULL
);
6616 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
6617 function_arg_advance_ms_64 (cum
, bytes
, words
);
6618 else if (TARGET_64BIT
)
6619 function_arg_advance_64 (cum
, mode
, type
, words
, named
);
6621 function_arg_advance_32 (cum
, mode
, type
, bytes
, words
);
6624 /* Define where to put the arguments to a function.
6625 Value is zero to push the argument on the stack,
6626 or a hard register in which to store the argument.
6628 MODE is the argument's machine mode.
6629 TYPE is the data type of the argument (as a tree).
6630 This is null for libcalls where that information may
6632 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6633 the preceding args and about the function being called.
6634 NAMED is nonzero if this argument is a named parameter
6635 (otherwise it is an extra parameter matching an ellipsis). */
6638 function_arg_32 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6639 enum machine_mode orig_mode
, const_tree type
,
6640 HOST_WIDE_INT bytes
, HOST_WIDE_INT words
)
6642 static bool warnedsse
, warnedmmx
;
6644 /* Avoid the AL settings for the Unix64 ABI. */
6645 if (mode
== VOIDmode
)
6661 if (words
<= cum
->nregs
)
6663 int regno
= cum
->regno
;
6665 /* Fastcall allocates the first two DWORD (SImode) or
6666 smaller arguments to ECX and EDX if it isn't an
6672 || (type
&& AGGREGATE_TYPE_P (type
)))
6675 /* ECX not EAX is the first allocated register. */
6676 if (regno
== AX_REG
)
6679 return gen_rtx_REG (mode
, regno
);
6684 if (cum
->float_in_sse
< 2)
6687 if (cum
->float_in_sse
< 1)
6691 /* In 32bit, we pass TImode in xmm registers. */
6698 if (!type
|| !AGGREGATE_TYPE_P (type
))
6700 if (!TARGET_SSE
&& !warnedsse
&& cum
->warn_sse
)
6703 warning (0, "SSE vector argument without SSE enabled "
6707 return gen_reg_or_parallel (mode
, orig_mode
,
6708 cum
->sse_regno
+ FIRST_SSE_REG
);
6713 /* OImode shouldn't be used directly. */
6722 if (!type
|| !AGGREGATE_TYPE_P (type
))
6725 return gen_reg_or_parallel (mode
, orig_mode
,
6726 cum
->sse_regno
+ FIRST_SSE_REG
);
6736 if (!type
|| !AGGREGATE_TYPE_P (type
))
6738 if (!TARGET_MMX
&& !warnedmmx
&& cum
->warn_mmx
)
6741 warning (0, "MMX vector argument without MMX enabled "
6745 return gen_reg_or_parallel (mode
, orig_mode
,
6746 cum
->mmx_regno
+ FIRST_MMX_REG
);
6755 function_arg_64 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6756 enum machine_mode orig_mode
, const_tree type
, bool named
)
6758 /* Handle a hidden AL argument containing number of registers
6759 for varargs x86-64 functions. */
6760 if (mode
== VOIDmode
)
6761 return GEN_INT (cum
->maybe_vaarg
6762 ? (cum
->sse_nregs
< 0
6763 ? X86_64_SSE_REGPARM_MAX
6778 /* Unnamed 256bit vector mode parameters are passed on stack. */
6784 return construct_container (mode
, orig_mode
, type
, 0, cum
->nregs
,
6786 &x86_64_int_parameter_registers
[cum
->regno
],
6791 function_arg_ms_64 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6792 enum machine_mode orig_mode
, bool named
,
6793 HOST_WIDE_INT bytes
)
6797 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
6798 We use value of -2 to specify that current function call is MSABI. */
6799 if (mode
== VOIDmode
)
6800 return GEN_INT (-2);
6802 /* If we've run out of registers, it goes on the stack. */
6803 if (cum
->nregs
== 0)
6806 regno
= x86_64_ms_abi_int_parameter_registers
[cum
->regno
];
6808 /* Only floating point modes are passed in anything but integer regs. */
6809 if (TARGET_SSE
&& (mode
== SFmode
|| mode
== DFmode
))
6812 regno
= cum
->regno
+ FIRST_SSE_REG
;
6817 /* Unnamed floating parameters are passed in both the
6818 SSE and integer registers. */
6819 t1
= gen_rtx_REG (mode
, cum
->regno
+ FIRST_SSE_REG
);
6820 t2
= gen_rtx_REG (mode
, regno
);
6821 t1
= gen_rtx_EXPR_LIST (VOIDmode
, t1
, const0_rtx
);
6822 t2
= gen_rtx_EXPR_LIST (VOIDmode
, t2
, const0_rtx
);
6823 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, t1
, t2
));
6826 /* Handle aggregated types passed in register. */
6827 if (orig_mode
== BLKmode
)
6829 if (bytes
> 0 && bytes
<= 8)
6830 mode
= (bytes
> 4 ? DImode
: SImode
);
6831 if (mode
== BLKmode
)
6835 return gen_reg_or_parallel (mode
, orig_mode
, regno
);
6838 /* Return where to put the arguments to a function.
6839 Return zero to push the argument on the stack, or a hard register in which to store the argument.
6841 MODE is the argument's machine mode. TYPE is the data type of the
6842 argument. It is null for libcalls where that information may not be
6843 available. CUM gives information about the preceding args and about
6844 the function being called. NAMED is nonzero if this argument is a
6845 named parameter (otherwise it is an extra parameter matching an
6849 ix86_function_arg (cumulative_args_t cum_v
, enum machine_mode omode
,
6850 const_tree type
, bool named
)
6852 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
6853 enum machine_mode mode
= omode
;
6854 HOST_WIDE_INT bytes
, words
;
6857 if (mode
== BLKmode
)
6858 bytes
= int_size_in_bytes (type
);
6860 bytes
= GET_MODE_SIZE (mode
);
6861 words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
6863 /* To simplify the code below, represent vector types with a vector mode
6864 even if MMX/SSE are not active. */
6865 if (type
&& TREE_CODE (type
) == VECTOR_TYPE
)
6866 mode
= type_natural_mode (type
, cum
);
6868 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
6869 arg
= function_arg_ms_64 (cum
, mode
, omode
, named
, bytes
);
6870 else if (TARGET_64BIT
)
6871 arg
= function_arg_64 (cum
, mode
, omode
, type
, named
);
6873 arg
= function_arg_32 (cum
, mode
, omode
, type
, bytes
, words
);
6875 if (TARGET_VZEROUPPER
&& function_pass_avx256_p (arg
))
6877 /* This argument uses 256bit AVX modes. */
6879 cfun
->machine
->callee_pass_avx256_p
= true;
6881 cfun
->machine
->caller_pass_avx256_p
= true;
6887 /* A C expression that indicates when an argument must be passed by
6888 reference. If nonzero for an argument, a copy of that argument is
6889 made in memory and a pointer to the argument is passed instead of
6890 the argument itself. The pointer is passed in whatever way is
6891 appropriate for passing a pointer to that type. */
6894 ix86_pass_by_reference (cumulative_args_t cum_v ATTRIBUTE_UNUSED
,
6895 enum machine_mode mode ATTRIBUTE_UNUSED
,
6896 const_tree type
, bool named ATTRIBUTE_UNUSED
)
6898 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
6900 /* See Windows x64 Software Convention. */
6901 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
6903 int msize
= (int) GET_MODE_SIZE (mode
);
6906 /* Arrays are passed by reference. */
6907 if (TREE_CODE (type
) == ARRAY_TYPE
)
6910 if (AGGREGATE_TYPE_P (type
))
6912 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
6913 are passed by reference. */
6914 msize
= int_size_in_bytes (type
);
6918 /* __m128 is passed by reference. */
6920 case 1: case 2: case 4: case 8:
6926 else if (TARGET_64BIT
&& type
&& int_size_in_bytes (type
) == -1)
6932 /* Return true when TYPE should be 128bit aligned for 32bit argument
6933 passing ABI. XXX: This function is obsolete and is only used for
6934 checking psABI compatibility with previous versions of GCC. */
6937 ix86_compat_aligned_value_p (const_tree type
)
6939 enum machine_mode mode
= TYPE_MODE (type
);
6940 if (((TARGET_SSE
&& SSE_REG_MODE_P (mode
))
6944 && (!TYPE_USER_ALIGN (type
) || TYPE_ALIGN (type
) > 128))
6946 if (TYPE_ALIGN (type
) < 128)
6949 if (AGGREGATE_TYPE_P (type
))
6951 /* Walk the aggregates recursively. */
6952 switch (TREE_CODE (type
))
6956 case QUAL_UNION_TYPE
:
6960 /* Walk all the structure fields. */
6961 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6963 if (TREE_CODE (field
) == FIELD_DECL
6964 && ix86_compat_aligned_value_p (TREE_TYPE (field
)))
6971 /* Just for use if some languages passes arrays by value. */
6972 if (ix86_compat_aligned_value_p (TREE_TYPE (type
)))
6983 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
6984 XXX: This function is obsolete and is only used for checking psABI
6985 compatibility with previous versions of GCC. */
6988 ix86_compat_function_arg_boundary (enum machine_mode mode
,
6989 const_tree type
, unsigned int align
)
6991 /* In 32bit, only _Decimal128 and __float128 are aligned to their
6992 natural boundaries. */
6993 if (!TARGET_64BIT
&& mode
!= TDmode
&& mode
!= TFmode
)
6995 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
6996 make an exception for SSE modes since these require 128bit
6999 The handling here differs from field_alignment. ICC aligns MMX
7000 arguments to 4 byte boundaries, while structure fields are aligned
7001 to 8 byte boundaries. */
7004 if (!(TARGET_SSE
&& SSE_REG_MODE_P (mode
)))
7005 align
= PARM_BOUNDARY
;
7009 if (!ix86_compat_aligned_value_p (type
))
7010 align
= PARM_BOUNDARY
;
7013 if (align
> BIGGEST_ALIGNMENT
)
7014 align
= BIGGEST_ALIGNMENT
;
7018 /* Return true when TYPE should be 128bit aligned for 32bit argument
7022 ix86_contains_aligned_value_p (const_tree type
)
7024 enum machine_mode mode
= TYPE_MODE (type
);
7026 if (mode
== XFmode
|| mode
== XCmode
)
7029 if (TYPE_ALIGN (type
) < 128)
7032 if (AGGREGATE_TYPE_P (type
))
7034 /* Walk the aggregates recursively. */
7035 switch (TREE_CODE (type
))
7039 case QUAL_UNION_TYPE
:
7043 /* Walk all the structure fields. */
7044 for (field
= TYPE_FIELDS (type
);
7046 field
= DECL_CHAIN (field
))
7048 if (TREE_CODE (field
) == FIELD_DECL
7049 && ix86_contains_aligned_value_p (TREE_TYPE (field
)))
7056 /* Just for use if some languages passes arrays by value. */
7057 if (ix86_contains_aligned_value_p (TREE_TYPE (type
)))
7066 return TYPE_ALIGN (type
) >= 128;
7071 /* Gives the alignment boundary, in bits, of an argument with the
7072 specified mode and type. */
7075 ix86_function_arg_boundary (enum machine_mode mode
, const_tree type
)
7080 /* Since the main variant type is used for call, we convert it to
7081 the main variant type. */
7082 type
= TYPE_MAIN_VARIANT (type
);
7083 align
= TYPE_ALIGN (type
);
7086 align
= GET_MODE_ALIGNMENT (mode
);
7087 if (align
< PARM_BOUNDARY
)
7088 align
= PARM_BOUNDARY
;
7092 unsigned int saved_align
= align
;
7096 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7099 if (mode
== XFmode
|| mode
== XCmode
)
7100 align
= PARM_BOUNDARY
;
7102 else if (!ix86_contains_aligned_value_p (type
))
7103 align
= PARM_BOUNDARY
;
7106 align
= PARM_BOUNDARY
;
7111 && align
!= ix86_compat_function_arg_boundary (mode
, type
,
7115 inform (input_location
,
7116 "The ABI for passing parameters with %d-byte"
7117 " alignment has changed in GCC 4.6",
7118 align
/ BITS_PER_UNIT
);
7125 /* Return true if N is a possible register number of function value. */
7128 ix86_function_value_regno_p (const unsigned int regno
)
7135 case FIRST_FLOAT_REG
:
7136 /* TODO: The function should depend on current function ABI but
7137 builtins.c would need updating then. Therefore we use the
7139 if (TARGET_64BIT
&& ix86_abi
== MS_ABI
)
7141 return TARGET_FLOAT_RETURNS_IN_80387
;
7147 if (TARGET_MACHO
|| TARGET_64BIT
)
7155 /* Define how to find the value returned by a function.
7156 VALTYPE is the data type of the value (as a tree).
7157 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7158 otherwise, FUNC is 0. */
7161 function_value_32 (enum machine_mode orig_mode
, enum machine_mode mode
,
7162 const_tree fntype
, const_tree fn
)
7166 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7167 we normally prevent this case when mmx is not available. However
7168 some ABIs may require the result to be returned like DImode. */
7169 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
7170 regno
= FIRST_MMX_REG
;
7172 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7173 we prevent this case when sse is not available. However some ABIs
7174 may require the result to be returned like integer TImode. */
7175 else if (mode
== TImode
7176 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
7177 regno
= FIRST_SSE_REG
;
7179 /* 32-byte vector modes in %ymm0. */
7180 else if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 32)
7181 regno
= FIRST_SSE_REG
;
7183 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7184 else if (X87_FLOAT_MODE_P (mode
) && TARGET_FLOAT_RETURNS_IN_80387
)
7185 regno
= FIRST_FLOAT_REG
;
7187 /* Most things go in %eax. */
7190 /* Override FP return register with %xmm0 for local functions when
7191 SSE math is enabled or for functions with sseregparm attribute. */
7192 if ((fn
|| fntype
) && (mode
== SFmode
|| mode
== DFmode
))
7194 int sse_level
= ix86_function_sseregparm (fntype
, fn
, false);
7195 if ((sse_level
>= 1 && mode
== SFmode
)
7196 || (sse_level
== 2 && mode
== DFmode
))
7197 regno
= FIRST_SSE_REG
;
7200 /* OImode shouldn't be used directly. */
7201 gcc_assert (mode
!= OImode
);
7203 return gen_rtx_REG (orig_mode
, regno
);
7207 function_value_64 (enum machine_mode orig_mode
, enum machine_mode mode
,
7212 /* Handle libcalls, which don't provide a type node. */
7213 if (valtype
== NULL
)
7227 regno
= FIRST_SSE_REG
;
7231 regno
= FIRST_FLOAT_REG
;
7239 return gen_rtx_REG (mode
, regno
);
7241 else if (POINTER_TYPE_P (valtype
))
7243 /* Pointers are always returned in word_mode. */
7247 ret
= construct_container (mode
, orig_mode
, valtype
, 1,
7248 X86_64_REGPARM_MAX
, X86_64_SSE_REGPARM_MAX
,
7249 x86_64_int_return_registers
, 0);
7251 /* For zero sized structures, construct_container returns NULL, but we
7252 need to keep rest of compiler happy by returning meaningful value. */
7254 ret
= gen_rtx_REG (orig_mode
, AX_REG
);
7260 function_value_ms_64 (enum machine_mode orig_mode
, enum machine_mode mode
)
7262 unsigned int regno
= AX_REG
;
7266 switch (GET_MODE_SIZE (mode
))
7269 if((SCALAR_INT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
7270 && !COMPLEX_MODE_P (mode
))
7271 regno
= FIRST_SSE_REG
;
7275 if (mode
== SFmode
|| mode
== DFmode
)
7276 regno
= FIRST_SSE_REG
;
7282 return gen_rtx_REG (orig_mode
, regno
);
7286 ix86_function_value_1 (const_tree valtype
, const_tree fntype_or_decl
,
7287 enum machine_mode orig_mode
, enum machine_mode mode
)
7289 const_tree fn
, fntype
;
7292 if (fntype_or_decl
&& DECL_P (fntype_or_decl
))
7293 fn
= fntype_or_decl
;
7294 fntype
= fn
? TREE_TYPE (fn
) : fntype_or_decl
;
7296 if (TARGET_64BIT
&& ix86_function_type_abi (fntype
) == MS_ABI
)
7297 return function_value_ms_64 (orig_mode
, mode
);
7298 else if (TARGET_64BIT
)
7299 return function_value_64 (orig_mode
, mode
, valtype
);
7301 return function_value_32 (orig_mode
, mode
, fntype
, fn
);
7305 ix86_function_value (const_tree valtype
, const_tree fntype_or_decl
,
7306 bool outgoing ATTRIBUTE_UNUSED
)
7308 enum machine_mode mode
, orig_mode
;
7310 orig_mode
= TYPE_MODE (valtype
);
7311 mode
= type_natural_mode (valtype
, NULL
);
7312 return ix86_function_value_1 (valtype
, fntype_or_decl
, orig_mode
, mode
);
7315 /* Pointer function arguments and return values are promoted to
7318 static enum machine_mode
7319 ix86_promote_function_mode (const_tree type
, enum machine_mode mode
,
7320 int *punsignedp
, const_tree fntype
,
7323 if (type
!= NULL_TREE
&& POINTER_TYPE_P (type
))
7325 *punsignedp
= POINTERS_EXTEND_UNSIGNED
;
7328 return default_promote_function_mode (type
, mode
, punsignedp
, fntype
,
7333 ix86_libcall_value (enum machine_mode mode
)
7335 return ix86_function_value_1 (NULL
, NULL
, mode
, mode
);
7338 /* Return true iff type is returned in memory. */
7340 static bool ATTRIBUTE_UNUSED
7341 return_in_memory_32 (const_tree type
, enum machine_mode mode
)
7345 if (mode
== BLKmode
)
7348 size
= int_size_in_bytes (type
);
7350 if (MS_AGGREGATE_RETURN
&& AGGREGATE_TYPE_P (type
) && size
<= 8)
7353 if (VECTOR_MODE_P (mode
) || mode
== TImode
)
7355 /* User-created vectors small enough to fit in EAX. */
7359 /* MMX/3dNow values are returned in MM0,
7360 except when it doesn't exits or the ABI prescribes otherwise. */
7362 return !TARGET_MMX
|| TARGET_VECT8_RETURNS
;
7364 /* SSE values are returned in XMM0, except when it doesn't exist. */
7368 /* AVX values are returned in YMM0, except when it doesn't exist. */
7379 /* OImode shouldn't be used directly. */
7380 gcc_assert (mode
!= OImode
);
7385 static bool ATTRIBUTE_UNUSED
7386 return_in_memory_64 (const_tree type
, enum machine_mode mode
)
7388 int needed_intregs
, needed_sseregs
;
7389 return !examine_argument (mode
, type
, 1, &needed_intregs
, &needed_sseregs
);
7392 static bool ATTRIBUTE_UNUSED
7393 return_in_memory_ms_64 (const_tree type
, enum machine_mode mode
)
7395 HOST_WIDE_INT size
= int_size_in_bytes (type
);
7397 /* __m128 is returned in xmm0. */
7398 if ((SCALAR_INT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
7399 && !COMPLEX_MODE_P (mode
) && (GET_MODE_SIZE (mode
) == 16 || size
== 16))
7402 /* Otherwise, the size must be exactly in [1248]. */
7403 return size
!= 1 && size
!= 2 && size
!= 4 && size
!= 8;
7407 ix86_return_in_memory (const_tree type
, const_tree fntype ATTRIBUTE_UNUSED
)
7409 #ifdef SUBTARGET_RETURN_IN_MEMORY
7410 return SUBTARGET_RETURN_IN_MEMORY (type
, fntype
);
7412 const enum machine_mode mode
= type_natural_mode (type
, NULL
);
7416 if (ix86_function_type_abi (fntype
) == MS_ABI
)
7417 return return_in_memory_ms_64 (type
, mode
);
7419 return return_in_memory_64 (type
, mode
);
7422 return return_in_memory_32 (type
, mode
);
7426 /* When returning SSE vector types, we have a choice of either
7427 (1) being abi incompatible with a -march switch, or
7428 (2) generating an error.
7429 Given no good solution, I think the safest thing is one warning.
7430 The user won't be able to use -Werror, but....
7432 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
7433 called in response to actually generating a caller or callee that
7434 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
7435 via aggregate_value_p for general type probing from tree-ssa. */
7438 ix86_struct_value_rtx (tree type
, int incoming ATTRIBUTE_UNUSED
)
7440 static bool warnedsse
, warnedmmx
;
7442 if (!TARGET_64BIT
&& type
)
7444 /* Look at the return type of the function, not the function type. */
7445 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (type
));
7447 if (!TARGET_SSE
&& !warnedsse
)
7450 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
7453 warning (0, "SSE vector return without SSE enabled "
7458 if (!TARGET_MMX
&& !warnedmmx
)
7460 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
7463 warning (0, "MMX vector return without MMX enabled "
7473 /* Create the va_list data type. */
7475 /* Returns the calling convention specific va_list date type.
7476 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
7479 ix86_build_builtin_va_list_abi (enum calling_abi abi
)
7481 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
7483 /* For i386 we use plain pointer to argument area. */
7484 if (!TARGET_64BIT
|| abi
== MS_ABI
)
7485 return build_pointer_type (char_type_node
);
7487 record
= lang_hooks
.types
.make_type (RECORD_TYPE
);
7488 type_decl
= build_decl (BUILTINS_LOCATION
,
7489 TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
7491 f_gpr
= build_decl (BUILTINS_LOCATION
,
7492 FIELD_DECL
, get_identifier ("gp_offset"),
7493 unsigned_type_node
);
7494 f_fpr
= build_decl (BUILTINS_LOCATION
,
7495 FIELD_DECL
, get_identifier ("fp_offset"),
7496 unsigned_type_node
);
7497 f_ovf
= build_decl (BUILTINS_LOCATION
,
7498 FIELD_DECL
, get_identifier ("overflow_arg_area"),
7500 f_sav
= build_decl (BUILTINS_LOCATION
,
7501 FIELD_DECL
, get_identifier ("reg_save_area"),
7504 va_list_gpr_counter_field
= f_gpr
;
7505 va_list_fpr_counter_field
= f_fpr
;
7507 DECL_FIELD_CONTEXT (f_gpr
) = record
;
7508 DECL_FIELD_CONTEXT (f_fpr
) = record
;
7509 DECL_FIELD_CONTEXT (f_ovf
) = record
;
7510 DECL_FIELD_CONTEXT (f_sav
) = record
;
7512 TYPE_STUB_DECL (record
) = type_decl
;
7513 TYPE_NAME (record
) = type_decl
;
7514 TYPE_FIELDS (record
) = f_gpr
;
7515 DECL_CHAIN (f_gpr
) = f_fpr
;
7516 DECL_CHAIN (f_fpr
) = f_ovf
;
7517 DECL_CHAIN (f_ovf
) = f_sav
;
7519 layout_type (record
);
7521 /* The correct type is an array type of one element. */
7522 return build_array_type (record
, build_index_type (size_zero_node
));
7525 /* Setup the builtin va_list data type and for 64-bit the additional
7526 calling convention specific va_list data types. */
7529 ix86_build_builtin_va_list (void)
7531 tree ret
= ix86_build_builtin_va_list_abi (ix86_abi
);
7533 /* Initialize abi specific va_list builtin types. */
7537 if (ix86_abi
== MS_ABI
)
7539 t
= ix86_build_builtin_va_list_abi (SYSV_ABI
);
7540 if (TREE_CODE (t
) != RECORD_TYPE
)
7541 t
= build_variant_type_copy (t
);
7542 sysv_va_list_type_node
= t
;
7547 if (TREE_CODE (t
) != RECORD_TYPE
)
7548 t
= build_variant_type_copy (t
);
7549 sysv_va_list_type_node
= t
;
7551 if (ix86_abi
!= MS_ABI
)
7553 t
= ix86_build_builtin_va_list_abi (MS_ABI
);
7554 if (TREE_CODE (t
) != RECORD_TYPE
)
7555 t
= build_variant_type_copy (t
);
7556 ms_va_list_type_node
= t
;
7561 if (TREE_CODE (t
) != RECORD_TYPE
)
7562 t
= build_variant_type_copy (t
);
7563 ms_va_list_type_node
= t
;
7570 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
7573 setup_incoming_varargs_64 (CUMULATIVE_ARGS
*cum
)
7579 /* GPR size of varargs save area. */
7580 if (cfun
->va_list_gpr_size
)
7581 ix86_varargs_gpr_size
= X86_64_REGPARM_MAX
* UNITS_PER_WORD
;
7583 ix86_varargs_gpr_size
= 0;
7585 /* FPR size of varargs save area. We don't need it if we don't pass
7586 anything in SSE registers. */
7587 if (TARGET_SSE
&& cfun
->va_list_fpr_size
)
7588 ix86_varargs_fpr_size
= X86_64_SSE_REGPARM_MAX
* 16;
7590 ix86_varargs_fpr_size
= 0;
7592 if (! ix86_varargs_gpr_size
&& ! ix86_varargs_fpr_size
)
7595 save_area
= frame_pointer_rtx
;
7596 set
= get_varargs_alias_set ();
7598 max
= cum
->regno
+ cfun
->va_list_gpr_size
/ UNITS_PER_WORD
;
7599 if (max
> X86_64_REGPARM_MAX
)
7600 max
= X86_64_REGPARM_MAX
;
7602 for (i
= cum
->regno
; i
< max
; i
++)
7604 mem
= gen_rtx_MEM (word_mode
,
7605 plus_constant (save_area
, i
* UNITS_PER_WORD
));
7606 MEM_NOTRAP_P (mem
) = 1;
7607 set_mem_alias_set (mem
, set
);
7608 emit_move_insn (mem
,
7609 gen_rtx_REG (word_mode
,
7610 x86_64_int_parameter_registers
[i
]));
7613 if (ix86_varargs_fpr_size
)
7615 enum machine_mode smode
;
7618 /* Now emit code to save SSE registers. The AX parameter contains number
7619 of SSE parameter registers used to call this function, though all we
7620 actually check here is the zero/non-zero status. */
7622 label
= gen_label_rtx ();
7623 test
= gen_rtx_EQ (VOIDmode
, gen_rtx_REG (QImode
, AX_REG
), const0_rtx
);
7624 emit_jump_insn (gen_cbranchqi4 (test
, XEXP (test
, 0), XEXP (test
, 1),
7627 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
7628 we used movdqa (i.e. TImode) instead? Perhaps even better would
7629 be if we could determine the real mode of the data, via a hook
7630 into pass_stdarg. Ignore all that for now. */
7632 if (crtl
->stack_alignment_needed
< GET_MODE_ALIGNMENT (smode
))
7633 crtl
->stack_alignment_needed
= GET_MODE_ALIGNMENT (smode
);
7635 max
= cum
->sse_regno
+ cfun
->va_list_fpr_size
/ 16;
7636 if (max
> X86_64_SSE_REGPARM_MAX
)
7637 max
= X86_64_SSE_REGPARM_MAX
;
7639 for (i
= cum
->sse_regno
; i
< max
; ++i
)
7641 mem
= plus_constant (save_area
, i
* 16 + ix86_varargs_gpr_size
);
7642 mem
= gen_rtx_MEM (smode
, mem
);
7643 MEM_NOTRAP_P (mem
) = 1;
7644 set_mem_alias_set (mem
, set
);
7645 set_mem_align (mem
, GET_MODE_ALIGNMENT (smode
));
7647 emit_move_insn (mem
, gen_rtx_REG (smode
, SSE_REGNO (i
)));
7655 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS
*cum
)
7657 alias_set_type set
= get_varargs_alias_set ();
7660 /* Reset to zero, as there might be a sysv vaarg used
7662 ix86_varargs_gpr_size
= 0;
7663 ix86_varargs_fpr_size
= 0;
7665 for (i
= cum
->regno
; i
< X86_64_MS_REGPARM_MAX
; i
++)
7669 mem
= gen_rtx_MEM (Pmode
,
7670 plus_constant (virtual_incoming_args_rtx
,
7671 i
* UNITS_PER_WORD
));
7672 MEM_NOTRAP_P (mem
) = 1;
7673 set_mem_alias_set (mem
, set
);
7675 reg
= gen_rtx_REG (Pmode
, x86_64_ms_abi_int_parameter_registers
[i
]);
7676 emit_move_insn (mem
, reg
);
7681 ix86_setup_incoming_varargs (cumulative_args_t cum_v
, enum machine_mode mode
,
7682 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
7685 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
7686 CUMULATIVE_ARGS next_cum
;
7689 /* This argument doesn't appear to be used anymore. Which is good,
7690 because the old code here didn't suppress rtl generation. */
7691 gcc_assert (!no_rtl
);
7696 fntype
= TREE_TYPE (current_function_decl
);
7698 /* For varargs, we do not want to skip the dummy va_dcl argument.
7699 For stdargs, we do want to skip the last named argument. */
7701 if (stdarg_p (fntype
))
7702 ix86_function_arg_advance (pack_cumulative_args (&next_cum
), mode
, type
,
7705 if (cum
->call_abi
== MS_ABI
)
7706 setup_incoming_varargs_ms_64 (&next_cum
);
7708 setup_incoming_varargs_64 (&next_cum
);
7711 /* Checks if TYPE is of kind va_list char *. */
7714 is_va_list_char_pointer (tree type
)
7718 /* For 32-bit it is always true. */
7721 canonic
= ix86_canonical_va_list_type (type
);
7722 return (canonic
== ms_va_list_type_node
7723 || (ix86_abi
== MS_ABI
&& canonic
== va_list_type_node
));
7726 /* Implement va_start. */
7729 ix86_va_start (tree valist
, rtx nextarg
)
7731 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
7732 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
7733 tree gpr
, fpr
, ovf
, sav
, t
;
7737 if (flag_split_stack
7738 && cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
7740 unsigned int scratch_regno
;
7742 /* When we are splitting the stack, we can't refer to the stack
7743 arguments using internal_arg_pointer, because they may be on
7744 the old stack. The split stack prologue will arrange to
7745 leave a pointer to the old stack arguments in a scratch
7746 register, which we here copy to a pseudo-register. The split
7747 stack prologue can't set the pseudo-register directly because
7748 it (the prologue) runs before any registers have been saved. */
7750 scratch_regno
= split_stack_prologue_scratch_regno ();
7751 if (scratch_regno
!= INVALID_REGNUM
)
7755 reg
= gen_reg_rtx (Pmode
);
7756 cfun
->machine
->split_stack_varargs_pointer
= reg
;
7759 emit_move_insn (reg
, gen_rtx_REG (Pmode
, scratch_regno
));
7763 push_topmost_sequence ();
7764 emit_insn_after (seq
, entry_of_function ());
7765 pop_topmost_sequence ();
7769 /* Only 64bit target needs something special. */
7770 if (!TARGET_64BIT
|| is_va_list_char_pointer (TREE_TYPE (valist
)))
7772 if (cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
7773 std_expand_builtin_va_start (valist
, nextarg
);
7778 va_r
= expand_expr (valist
, NULL_RTX
, VOIDmode
, EXPAND_WRITE
);
7779 next
= expand_binop (ptr_mode
, add_optab
,
7780 cfun
->machine
->split_stack_varargs_pointer
,
7781 crtl
->args
.arg_offset_rtx
,
7782 NULL_RTX
, 0, OPTAB_LIB_WIDEN
);
7783 convert_move (va_r
, next
, 0);
7788 f_gpr
= TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node
));
7789 f_fpr
= DECL_CHAIN (f_gpr
);
7790 f_ovf
= DECL_CHAIN (f_fpr
);
7791 f_sav
= DECL_CHAIN (f_ovf
);
7793 valist
= build_simple_mem_ref (valist
);
7794 TREE_TYPE (valist
) = TREE_TYPE (sysv_va_list_type_node
);
7795 /* The following should be folded into the MEM_REF offset. */
7796 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), unshare_expr (valist
),
7798 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), unshare_expr (valist
),
7800 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), unshare_expr (valist
),
7802 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), unshare_expr (valist
),
7805 /* Count number of gp and fp argument registers used. */
7806 words
= crtl
->args
.info
.words
;
7807 n_gpr
= crtl
->args
.info
.regno
;
7808 n_fpr
= crtl
->args
.info
.sse_regno
;
7810 if (cfun
->va_list_gpr_size
)
7812 type
= TREE_TYPE (gpr
);
7813 t
= build2 (MODIFY_EXPR
, type
,
7814 gpr
, build_int_cst (type
, n_gpr
* 8));
7815 TREE_SIDE_EFFECTS (t
) = 1;
7816 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7819 if (TARGET_SSE
&& cfun
->va_list_fpr_size
)
7821 type
= TREE_TYPE (fpr
);
7822 t
= build2 (MODIFY_EXPR
, type
, fpr
,
7823 build_int_cst (type
, n_fpr
* 16 + 8*X86_64_REGPARM_MAX
));
7824 TREE_SIDE_EFFECTS (t
) = 1;
7825 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7828 /* Find the overflow area. */
7829 type
= TREE_TYPE (ovf
);
7830 if (cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
7831 ovf_rtx
= crtl
->args
.internal_arg_pointer
;
7833 ovf_rtx
= cfun
->machine
->split_stack_varargs_pointer
;
7834 t
= make_tree (type
, ovf_rtx
);
7836 t
= fold_build_pointer_plus_hwi (t
, words
* UNITS_PER_WORD
);
7837 t
= build2 (MODIFY_EXPR
, type
, ovf
, t
);
7838 TREE_SIDE_EFFECTS (t
) = 1;
7839 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7841 if (ix86_varargs_gpr_size
|| ix86_varargs_fpr_size
)
7843 /* Find the register save area.
7844 Prologue of the function save it right above stack frame. */
7845 type
= TREE_TYPE (sav
);
7846 t
= make_tree (type
, frame_pointer_rtx
);
7847 if (!ix86_varargs_gpr_size
)
7848 t
= fold_build_pointer_plus_hwi (t
, -8 * X86_64_REGPARM_MAX
);
7849 t
= build2 (MODIFY_EXPR
, type
, sav
, t
);
7850 TREE_SIDE_EFFECTS (t
) = 1;
7851 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7855 /* Implement va_arg. */
7858 ix86_gimplify_va_arg (tree valist
, tree type
, gimple_seq
*pre_p
,
7861 static const int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
7862 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
7863 tree gpr
, fpr
, ovf
, sav
, t
;
7865 tree lab_false
, lab_over
= NULL_TREE
;
7870 enum machine_mode nat_mode
;
7871 unsigned int arg_boundary
;
7873 /* Only 64bit target needs something special. */
7874 if (!TARGET_64BIT
|| is_va_list_char_pointer (TREE_TYPE (valist
)))
7875 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
7877 f_gpr
= TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node
));
7878 f_fpr
= DECL_CHAIN (f_gpr
);
7879 f_ovf
= DECL_CHAIN (f_fpr
);
7880 f_sav
= DECL_CHAIN (f_ovf
);
7882 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
),
7883 build_va_arg_indirect_ref (valist
), f_gpr
, NULL_TREE
);
7884 valist
= build_va_arg_indirect_ref (valist
);
7885 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
7886 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
7887 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
7889 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
7891 type
= build_pointer_type (type
);
7892 size
= int_size_in_bytes (type
);
7893 rsize
= (size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
7895 nat_mode
= type_natural_mode (type
, NULL
);
7904 /* Unnamed 256bit vector mode parameters are passed on stack. */
7905 if (!TARGET_64BIT_MS_ABI
)
7912 container
= construct_container (nat_mode
, TYPE_MODE (type
),
7913 type
, 0, X86_64_REGPARM_MAX
,
7914 X86_64_SSE_REGPARM_MAX
, intreg
,
7919 /* Pull the value out of the saved registers. */
7921 addr
= create_tmp_var (ptr_type_node
, "addr");
7925 int needed_intregs
, needed_sseregs
;
7927 tree int_addr
, sse_addr
;
7929 lab_false
= create_artificial_label (UNKNOWN_LOCATION
);
7930 lab_over
= create_artificial_label (UNKNOWN_LOCATION
);
7932 examine_argument (nat_mode
, type
, 0, &needed_intregs
, &needed_sseregs
);
7934 need_temp
= (!REG_P (container
)
7935 && ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
7936 || TYPE_ALIGN (type
) > 128));
7938 /* In case we are passing structure, verify that it is consecutive block
7939 on the register save area. If not we need to do moves. */
7940 if (!need_temp
&& !REG_P (container
))
7942 /* Verify that all registers are strictly consecutive */
7943 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
7947 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
7949 rtx slot
= XVECEXP (container
, 0, i
);
7950 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
7951 || INTVAL (XEXP (slot
, 1)) != i
* 16)
7959 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
7961 rtx slot
= XVECEXP (container
, 0, i
);
7962 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
7963 || INTVAL (XEXP (slot
, 1)) != i
* 8)
7975 int_addr
= create_tmp_var (ptr_type_node
, "int_addr");
7976 sse_addr
= create_tmp_var (ptr_type_node
, "sse_addr");
7979 /* First ensure that we fit completely in registers. */
7982 t
= build_int_cst (TREE_TYPE (gpr
),
7983 (X86_64_REGPARM_MAX
- needed_intregs
+ 1) * 8);
7984 t
= build2 (GE_EXPR
, boolean_type_node
, gpr
, t
);
7985 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
7986 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
7987 gimplify_and_add (t
, pre_p
);
7991 t
= build_int_cst (TREE_TYPE (fpr
),
7992 (X86_64_SSE_REGPARM_MAX
- needed_sseregs
+ 1) * 16
7993 + X86_64_REGPARM_MAX
* 8);
7994 t
= build2 (GE_EXPR
, boolean_type_node
, fpr
, t
);
7995 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
7996 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
7997 gimplify_and_add (t
, pre_p
);
8000 /* Compute index to start of area used for integer regs. */
8003 /* int_addr = gpr + sav; */
8004 t
= fold_build_pointer_plus (sav
, gpr
);
8005 gimplify_assign (int_addr
, t
, pre_p
);
8009 /* sse_addr = fpr + sav; */
8010 t
= fold_build_pointer_plus (sav
, fpr
);
8011 gimplify_assign (sse_addr
, t
, pre_p
);
8015 int i
, prev_size
= 0;
8016 tree temp
= create_tmp_var (type
, "va_arg_tmp");
8019 t
= build1 (ADDR_EXPR
, build_pointer_type (type
), temp
);
8020 gimplify_assign (addr
, t
, pre_p
);
8022 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
8024 rtx slot
= XVECEXP (container
, 0, i
);
8025 rtx reg
= XEXP (slot
, 0);
8026 enum machine_mode mode
= GET_MODE (reg
);
8032 tree dest_addr
, dest
;
8033 int cur_size
= GET_MODE_SIZE (mode
);
8035 gcc_assert (prev_size
<= INTVAL (XEXP (slot
, 1)));
8036 prev_size
= INTVAL (XEXP (slot
, 1));
8037 if (prev_size
+ cur_size
> size
)
8039 cur_size
= size
- prev_size
;
8040 mode
= mode_for_size (cur_size
* BITS_PER_UNIT
, MODE_INT
, 1);
8041 if (mode
== BLKmode
)
8044 piece_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
8045 if (mode
== GET_MODE (reg
))
8046 addr_type
= build_pointer_type (piece_type
);
8048 addr_type
= build_pointer_type_for_mode (piece_type
, ptr_mode
,
8050 daddr_type
= build_pointer_type_for_mode (piece_type
, ptr_mode
,
8053 if (SSE_REGNO_P (REGNO (reg
)))
8055 src_addr
= sse_addr
;
8056 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
8060 src_addr
= int_addr
;
8061 src_offset
= REGNO (reg
) * 8;
8063 src_addr
= fold_convert (addr_type
, src_addr
);
8064 src_addr
= fold_build_pointer_plus_hwi (src_addr
, src_offset
);
8066 dest_addr
= fold_convert (daddr_type
, addr
);
8067 dest_addr
= fold_build_pointer_plus_hwi (dest_addr
, prev_size
);
8068 if (cur_size
== GET_MODE_SIZE (mode
))
8070 src
= build_va_arg_indirect_ref (src_addr
);
8071 dest
= build_va_arg_indirect_ref (dest_addr
);
8073 gimplify_assign (dest
, src
, pre_p
);
8078 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY
),
8079 3, dest_addr
, src_addr
,
8080 size_int (cur_size
));
8081 gimplify_and_add (copy
, pre_p
);
8083 prev_size
+= cur_size
;
8089 t
= build2 (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
8090 build_int_cst (TREE_TYPE (gpr
), needed_intregs
* 8));
8091 gimplify_assign (gpr
, t
, pre_p
);
8096 t
= build2 (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
8097 build_int_cst (TREE_TYPE (fpr
), needed_sseregs
* 16));
8098 gimplify_assign (fpr
, t
, pre_p
);
8101 gimple_seq_add_stmt (pre_p
, gimple_build_goto (lab_over
));
8103 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_false
));
8106 /* ... otherwise out of the overflow area. */
8108 /* When we align parameter on stack for caller, if the parameter
8109 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
8110 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
8111 here with caller. */
8112 arg_boundary
= ix86_function_arg_boundary (VOIDmode
, type
);
8113 if ((unsigned int) arg_boundary
> MAX_SUPPORTED_STACK_ALIGNMENT
)
8114 arg_boundary
= MAX_SUPPORTED_STACK_ALIGNMENT
;
8116 /* Care for on-stack alignment if needed. */
8117 if (arg_boundary
<= 64 || size
== 0)
8121 HOST_WIDE_INT align
= arg_boundary
/ 8;
8122 t
= fold_build_pointer_plus_hwi (ovf
, align
- 1);
8123 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
8124 build_int_cst (TREE_TYPE (t
), -align
));
8127 gimplify_expr (&t
, pre_p
, NULL
, is_gimple_val
, fb_rvalue
);
8128 gimplify_assign (addr
, t
, pre_p
);
8130 t
= fold_build_pointer_plus_hwi (t
, rsize
* UNITS_PER_WORD
);
8131 gimplify_assign (unshare_expr (ovf
), t
, pre_p
);
8134 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_over
));
8136 ptrtype
= build_pointer_type_for_mode (type
, ptr_mode
, true);
8137 addr
= fold_convert (ptrtype
, addr
);
8140 addr
= build_va_arg_indirect_ref (addr
);
8141 return build_va_arg_indirect_ref (addr
);
8144 /* Return true if OPNUM's MEM should be matched
8145 in movabs* patterns. */
8148 ix86_check_movabs (rtx insn
, int opnum
)
8152 set
= PATTERN (insn
);
8153 if (GET_CODE (set
) == PARALLEL
)
8154 set
= XVECEXP (set
, 0, 0);
8155 gcc_assert (GET_CODE (set
) == SET
);
8156 mem
= XEXP (set
, opnum
);
8157 while (GET_CODE (mem
) == SUBREG
)
8158 mem
= SUBREG_REG (mem
);
8159 gcc_assert (MEM_P (mem
));
8160 return volatile_ok
|| !MEM_VOLATILE_P (mem
);
8163 /* Initialize the table of extra 80387 mathematical constants. */
8166 init_ext_80387_constants (void)
8168 static const char * cst
[5] =
8170 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8171 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8172 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8173 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8174 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8178 for (i
= 0; i
< 5; i
++)
8180 real_from_string (&ext_80387_constants_table
[i
], cst
[i
]);
8181 /* Ensure each constant is rounded to XFmode precision. */
8182 real_convert (&ext_80387_constants_table
[i
],
8183 XFmode
, &ext_80387_constants_table
[i
]);
8186 ext_80387_constants_init
= 1;
8189 /* Return non-zero if the constant is something that
8190 can be loaded with a special instruction. */
8193 standard_80387_constant_p (rtx x
)
8195 enum machine_mode mode
= GET_MODE (x
);
8199 if (!(X87_FLOAT_MODE_P (mode
) && (GET_CODE (x
) == CONST_DOUBLE
)))
8202 if (x
== CONST0_RTX (mode
))
8204 if (x
== CONST1_RTX (mode
))
8207 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
8209 /* For XFmode constants, try to find a special 80387 instruction when
8210 optimizing for size or on those CPUs that benefit from them. */
8212 && (optimize_function_for_size_p (cfun
) || TARGET_EXT_80387_CONSTANTS
))
8216 if (! ext_80387_constants_init
)
8217 init_ext_80387_constants ();
8219 for (i
= 0; i
< 5; i
++)
8220 if (real_identical (&r
, &ext_80387_constants_table
[i
]))
8224 /* Load of the constant -0.0 or -1.0 will be split as
8225 fldz;fchs or fld1;fchs sequence. */
8226 if (real_isnegzero (&r
))
8228 if (real_identical (&r
, &dconstm1
))
8234 /* Return the opcode of the special instruction to be used to load
8238 standard_80387_constant_opcode (rtx x
)
8240 switch (standard_80387_constant_p (x
))
8264 /* Return the CONST_DOUBLE representing the 80387 constant that is
8265 loaded by the specified special instruction. The argument IDX
8266 matches the return value from standard_80387_constant_p. */
8269 standard_80387_constant_rtx (int idx
)
8273 if (! ext_80387_constants_init
)
8274 init_ext_80387_constants ();
8290 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table
[i
],
8294 /* Return 1 if X is all 0s and 2 if x is all 1s
8295 in supported SSE/AVX vector mode. */
8298 standard_sse_constant_p (rtx x
)
8300 enum machine_mode mode
= GET_MODE (x
);
8302 if (x
== const0_rtx
|| x
== CONST0_RTX (GET_MODE (x
)))
8304 if (vector_all_ones_operand (x
, mode
))
8326 /* Return the opcode of the special instruction to be used to load
8330 standard_sse_constant_opcode (rtx insn
, rtx x
)
8332 switch (standard_sse_constant_p (x
))
8335 switch (get_attr_mode (insn
))
8338 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
)
8339 return "%vpxor\t%0, %d0";
8341 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
)
8342 return "%vxorpd\t%0, %d0";
8344 return "%vxorps\t%0, %d0";
8347 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
)
8348 return "vpxor\t%x0, %x0, %x0";
8350 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
)
8351 return "vxorpd\t%x0, %x0, %x0";
8353 return "vxorps\t%x0, %x0, %x0";
8361 return "vpcmpeqd\t%0, %0, %0";
8363 return "pcmpeqd\t%0, %0";
8371 /* Returns true if OP contains a symbol reference */
8374 symbolic_reference_mentioned_p (rtx op
)
8379 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
8382 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
8383 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
8389 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
8390 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
8394 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
8401 /* Return true if it is appropriate to emit `ret' instructions in the
8402 body of a function. Do this only if the epilogue is simple, needing a
8403 couple of insns. Prior to reloading, we can't tell how many registers
8404 must be saved, so return false then. Return false if there is no frame
8405 marker to de-allocate. */
8408 ix86_can_use_return_insn_p (void)
8410 struct ix86_frame frame
;
8412 if (! reload_completed
|| frame_pointer_needed
)
8415 /* Don't allow more than 32k pop, since that's all we can do
8416 with one instruction. */
8417 if (crtl
->args
.pops_args
&& crtl
->args
.size
>= 32768)
8420 ix86_compute_frame_layout (&frame
);
8421 return (frame
.stack_pointer_offset
== UNITS_PER_WORD
8422 && (frame
.nregs
+ frame
.nsseregs
) == 0);
8425 /* Value should be nonzero if functions must have frame pointers.
8426 Zero means the frame pointer need not be set up (and parms may
8427 be accessed via the stack pointer) in functions that seem suitable. */
8430 ix86_frame_pointer_required (void)
8432 /* If we accessed previous frames, then the generated code expects
8433 to be able to access the saved ebp value in our frame. */
8434 if (cfun
->machine
->accesses_prev_frame
)
8437 /* Several x86 os'es need a frame pointer for other reasons,
8438 usually pertaining to setjmp. */
8439 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
8442 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
8443 if (TARGET_32BIT_MS_ABI
&& cfun
->calls_setjmp
)
8446 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
8447 turns off the frame pointer by default. Turn it back on now if
8448 we've not got a leaf function. */
8449 if (TARGET_OMIT_LEAF_FRAME_POINTER
8450 && (!current_function_is_leaf
8451 || ix86_current_function_calls_tls_descriptor
))
8454 if (crtl
->profile
&& !flag_fentry
)
8460 /* Record that the current function accesses previous call frames. */
8463 ix86_setup_frame_addresses (void)
8465 cfun
->machine
->accesses_prev_frame
= 1;
8468 #ifndef USE_HIDDEN_LINKONCE
8469 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
8470 # define USE_HIDDEN_LINKONCE 1
8472 # define USE_HIDDEN_LINKONCE 0
8476 static int pic_labels_used
;
8478 /* Fills in the label name that should be used for a pc thunk for
8479 the given register. */
8482 get_pc_thunk_name (char name
[32], unsigned int regno
)
8484 gcc_assert (!TARGET_64BIT
);
8486 if (USE_HIDDEN_LINKONCE
)
8487 sprintf (name
, "__x86.get_pc_thunk.%s", reg_names
[regno
]);
8489 ASM_GENERATE_INTERNAL_LABEL (name
, "LPR", regno
);
8493 /* This function generates code for -fpic that loads %ebx with
8494 the return address of the caller and then returns. */
8497 ix86_code_end (void)
8502 for (regno
= AX_REG
; regno
<= SP_REG
; regno
++)
8507 if (!(pic_labels_used
& (1 << regno
)))
8510 get_pc_thunk_name (name
, regno
);
8512 decl
= build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
8513 get_identifier (name
),
8514 build_function_type_list (void_type_node
, NULL_TREE
));
8515 DECL_RESULT (decl
) = build_decl (BUILTINS_LOCATION
, RESULT_DECL
,
8516 NULL_TREE
, void_type_node
);
8517 TREE_PUBLIC (decl
) = 1;
8518 TREE_STATIC (decl
) = 1;
8523 switch_to_section (darwin_sections
[text_coal_section
]);
8524 fputs ("\t.weak_definition\t", asm_out_file
);
8525 assemble_name (asm_out_file
, name
);
8526 fputs ("\n\t.private_extern\t", asm_out_file
);
8527 assemble_name (asm_out_file
, name
);
8528 putc ('\n', asm_out_file
);
8529 ASM_OUTPUT_LABEL (asm_out_file
, name
);
8530 DECL_WEAK (decl
) = 1;
8534 if (USE_HIDDEN_LINKONCE
)
8536 DECL_COMDAT_GROUP (decl
) = DECL_ASSEMBLER_NAME (decl
);
8538 targetm
.asm_out
.unique_section (decl
, 0);
8539 switch_to_section (get_named_section (decl
, NULL
, 0));
8541 targetm
.asm_out
.globalize_label (asm_out_file
, name
);
8542 fputs ("\t.hidden\t", asm_out_file
);
8543 assemble_name (asm_out_file
, name
);
8544 putc ('\n', asm_out_file
);
8545 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
8549 switch_to_section (text_section
);
8550 ASM_OUTPUT_LABEL (asm_out_file
, name
);
8553 DECL_INITIAL (decl
) = make_node (BLOCK
);
8554 current_function_decl
= decl
;
8555 init_function_start (decl
);
8556 first_function_block_is_cold
= false;
8557 /* Make sure unwind info is emitted for the thunk if needed. */
8558 final_start_function (emit_barrier (), asm_out_file
, 1);
8560 /* Pad stack IP move with 4 instructions (two NOPs count
8561 as one instruction). */
8562 if (TARGET_PAD_SHORT_FUNCTION
)
8567 fputs ("\tnop\n", asm_out_file
);
8570 xops
[0] = gen_rtx_REG (Pmode
, regno
);
8571 xops
[1] = gen_rtx_MEM (Pmode
, stack_pointer_rtx
);
8572 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops
);
8573 fputs ("\tret\n", asm_out_file
);
8574 final_end_function ();
8575 init_insn_lengths ();
8576 free_after_compilation (cfun
);
8578 current_function_decl
= NULL
;
8581 if (flag_split_stack
)
8582 file_end_indicate_split_stack ();
8585 /* Emit code for the SET_GOT patterns. */
8588 output_set_got (rtx dest
, rtx label ATTRIBUTE_UNUSED
)
8594 if (TARGET_VXWORKS_RTP
&& flag_pic
)
8596 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
8597 xops
[2] = gen_rtx_MEM (Pmode
,
8598 gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
));
8599 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
8601 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
8602 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
8603 an unadorned address. */
8604 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
8605 SYMBOL_REF_FLAGS (xops
[2]) |= SYMBOL_FLAG_LOCAL
;
8606 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops
);
8610 xops
[1] = gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
8614 xops
[2] = gen_rtx_LABEL_REF (Pmode
, label
? label
: gen_label_rtx ());
8616 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops
);
8619 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8620 is what will be referenced by the Mach-O PIC subsystem. */
8622 ASM_OUTPUT_LABEL (asm_out_file
, MACHOPIC_FUNCTION_BASE_NAME
);
8625 targetm
.asm_out
.internal_label (asm_out_file
, "L",
8626 CODE_LABEL_NUMBER (XEXP (xops
[2], 0)));
8631 get_pc_thunk_name (name
, REGNO (dest
));
8632 pic_labels_used
|= 1 << REGNO (dest
);
8634 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
8635 xops
[2] = gen_rtx_MEM (QImode
, xops
[2]);
8636 output_asm_insn ("call\t%X2", xops
);
8637 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8638 is what will be referenced by the Mach-O PIC subsystem. */
8641 ASM_OUTPUT_LABEL (asm_out_file
, MACHOPIC_FUNCTION_BASE_NAME
);
8643 targetm
.asm_out
.internal_label (asm_out_file
, "L",
8644 CODE_LABEL_NUMBER (label
));
8649 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops
);
8654 /* Generate an "push" pattern for input ARG. */
8659 struct machine_function
*m
= cfun
->machine
;
8661 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
8662 m
->fs
.cfa_offset
+= UNITS_PER_WORD
;
8663 m
->fs
.sp_offset
+= UNITS_PER_WORD
;
8665 if (REG_P (arg
) && GET_MODE (arg
) != word_mode
)
8666 arg
= gen_rtx_REG (word_mode
, REGNO (arg
));
8668 return gen_rtx_SET (VOIDmode
,
8669 gen_rtx_MEM (word_mode
,
8670 gen_rtx_PRE_DEC (Pmode
,
8671 stack_pointer_rtx
)),
8675 /* Generate an "pop" pattern for input ARG. */
8680 if (REG_P (arg
) && GET_MODE (arg
) != word_mode
)
8681 arg
= gen_rtx_REG (word_mode
, REGNO (arg
));
8683 return gen_rtx_SET (VOIDmode
,
8685 gen_rtx_MEM (word_mode
,
8686 gen_rtx_POST_INC (Pmode
,
8687 stack_pointer_rtx
)));
8690 /* Return >= 0 if there is an unused call-clobbered register available
8691 for the entire function. */
8694 ix86_select_alt_pic_regnum (void)
8696 if (current_function_is_leaf
8698 && !ix86_current_function_calls_tls_descriptor
)
8701 /* Can't use the same register for both PIC and DRAP. */
8703 drap
= REGNO (crtl
->drap_reg
);
8706 for (i
= 2; i
>= 0; --i
)
8707 if (i
!= drap
&& !df_regs_ever_live_p (i
))
8711 return INVALID_REGNUM
;
8714 /* Return TRUE if we need to save REGNO. */
8717 ix86_save_reg (unsigned int regno
, bool maybe_eh_return
)
8719 if (pic_offset_table_rtx
8720 && regno
== REAL_PIC_OFFSET_TABLE_REGNUM
8721 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM
)
8723 || crtl
->calls_eh_return
8724 || crtl
->uses_const_pool
))
8725 return ix86_select_alt_pic_regnum () == INVALID_REGNUM
;
8727 if (crtl
->calls_eh_return
&& maybe_eh_return
)
8732 unsigned test
= EH_RETURN_DATA_REGNO (i
);
8733 if (test
== INVALID_REGNUM
)
8740 if (crtl
->drap_reg
&& regno
== REGNO (crtl
->drap_reg
))
8743 return (df_regs_ever_live_p (regno
)
8744 && !call_used_regs
[regno
]
8745 && !fixed_regs
[regno
]
8746 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
8749 /* Return number of saved general prupose registers. */
8752 ix86_nsaved_regs (void)
8757 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
8758 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
8763 /* Return number of saved SSE registrers. */
8766 ix86_nsaved_sseregs (void)
8771 if (!TARGET_64BIT_MS_ABI
)
8773 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
8774 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
8779 /* Given FROM and TO register numbers, say whether this elimination is
8780 allowed. If stack alignment is needed, we can only replace argument
8781 pointer with hard frame pointer, or replace frame pointer with stack
8782 pointer. Otherwise, frame pointer elimination is automatically
8783 handled and all other eliminations are valid. */
8786 ix86_can_eliminate (const int from
, const int to
)
8788 if (stack_realign_fp
)
8789 return ((from
== ARG_POINTER_REGNUM
8790 && to
== HARD_FRAME_POINTER_REGNUM
)
8791 || (from
== FRAME_POINTER_REGNUM
8792 && to
== STACK_POINTER_REGNUM
));
8794 return to
== STACK_POINTER_REGNUM
? !frame_pointer_needed
: true;
8797 /* Return the offset between two registers, one to be eliminated, and the other
8798 its replacement, at the start of a routine. */
8801 ix86_initial_elimination_offset (int from
, int to
)
8803 struct ix86_frame frame
;
8804 ix86_compute_frame_layout (&frame
);
8806 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
8807 return frame
.hard_frame_pointer_offset
;
8808 else if (from
== FRAME_POINTER_REGNUM
8809 && to
== HARD_FRAME_POINTER_REGNUM
)
8810 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
8813 gcc_assert (to
== STACK_POINTER_REGNUM
);
8815 if (from
== ARG_POINTER_REGNUM
)
8816 return frame
.stack_pointer_offset
;
8818 gcc_assert (from
== FRAME_POINTER_REGNUM
);
8819 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
8823 /* In a dynamically-aligned function, we can't know the offset from
8824 stack pointer to frame pointer, so we must ensure that setjmp
8825 eliminates fp against the hard fp (%ebp) rather than trying to
8826 index from %esp up to the top of the frame across a gap that is
8827 of unknown (at compile-time) size. */
8829 ix86_builtin_setjmp_frame_value (void)
8831 return stack_realign_fp
? hard_frame_pointer_rtx
: virtual_stack_vars_rtx
;
8834 /* When using -fsplit-stack, the allocation routines set a field in
8835 the TCB to the bottom of the stack plus this much space, measured
8838 #define SPLIT_STACK_AVAILABLE 256
8840 /* Fill structure ix86_frame about frame of currently computed function. */
8843 ix86_compute_frame_layout (struct ix86_frame
*frame
)
8845 unsigned int stack_alignment_needed
;
8846 HOST_WIDE_INT offset
;
8847 unsigned int preferred_alignment
;
8848 HOST_WIDE_INT size
= get_frame_size ();
8849 HOST_WIDE_INT to_allocate
;
8851 frame
->nregs
= ix86_nsaved_regs ();
8852 frame
->nsseregs
= ix86_nsaved_sseregs ();
8854 stack_alignment_needed
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
8855 preferred_alignment
= crtl
->preferred_stack_boundary
/ BITS_PER_UNIT
;
8857 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
8858 function prologues and leaf. */
8859 if ((TARGET_64BIT_MS_ABI
&& preferred_alignment
< 16)
8860 && (!current_function_is_leaf
|| cfun
->calls_alloca
!= 0
8861 || ix86_current_function_calls_tls_descriptor
))
8863 preferred_alignment
= 16;
8864 stack_alignment_needed
= 16;
8865 crtl
->preferred_stack_boundary
= 128;
8866 crtl
->stack_alignment_needed
= 128;
8869 gcc_assert (!size
|| stack_alignment_needed
);
8870 gcc_assert (preferred_alignment
>= STACK_BOUNDARY
/ BITS_PER_UNIT
);
8871 gcc_assert (preferred_alignment
<= stack_alignment_needed
);
8873 /* For SEH we have to limit the amount of code movement into the prologue.
8874 At present we do this via a BLOCKAGE, at which point there's very little
8875 scheduling that can be done, which means that there's very little point
8876 in doing anything except PUSHs. */
8878 cfun
->machine
->use_fast_prologue_epilogue
= false;
8880 /* During reload iteration the amount of registers saved can change.
8881 Recompute the value as needed. Do not recompute when amount of registers
8882 didn't change as reload does multiple calls to the function and does not
8883 expect the decision to change within single iteration. */
8884 else if (!optimize_function_for_size_p (cfun
)
8885 && cfun
->machine
->use_fast_prologue_epilogue_nregs
!= frame
->nregs
)
8887 int count
= frame
->nregs
;
8888 struct cgraph_node
*node
= cgraph_get_node (current_function_decl
);
8890 cfun
->machine
->use_fast_prologue_epilogue_nregs
= count
;
8892 /* The fast prologue uses move instead of push to save registers. This
8893 is significantly longer, but also executes faster as modern hardware
8894 can execute the moves in parallel, but can't do that for push/pop.
8896 Be careful about choosing what prologue to emit: When function takes
8897 many instructions to execute we may use slow version as well as in
8898 case function is known to be outside hot spot (this is known with
8899 feedback only). Weight the size of function by number of registers
8900 to save as it is cheap to use one or two push instructions but very
8901 slow to use many of them. */
8903 count
= (count
- 1) * FAST_PROLOGUE_INSN_COUNT
;
8904 if (node
->frequency
< NODE_FREQUENCY_NORMAL
8905 || (flag_branch_probabilities
8906 && node
->frequency
< NODE_FREQUENCY_HOT
))
8907 cfun
->machine
->use_fast_prologue_epilogue
= false;
8909 cfun
->machine
->use_fast_prologue_epilogue
8910 = !expensive_function_p (count
);
8913 frame
->save_regs_using_mov
8914 = (TARGET_PROLOGUE_USING_MOVE
&& cfun
->machine
->use_fast_prologue_epilogue
8915 /* If static stack checking is enabled and done with probes,
8916 the registers need to be saved before allocating the frame. */
8917 && flag_stack_check
!= STATIC_BUILTIN_STACK_CHECK
);
8919 /* Skip return address. */
8920 offset
= UNITS_PER_WORD
;
8922 /* Skip pushed static chain. */
8923 if (ix86_static_chain_on_stack
)
8924 offset
+= UNITS_PER_WORD
;
8926 /* Skip saved base pointer. */
8927 if (frame_pointer_needed
)
8928 offset
+= UNITS_PER_WORD
;
8929 frame
->hfp_save_offset
= offset
;
8931 /* The traditional frame pointer location is at the top of the frame. */
8932 frame
->hard_frame_pointer_offset
= offset
;
8934 /* Register save area */
8935 offset
+= frame
->nregs
* UNITS_PER_WORD
;
8936 frame
->reg_save_offset
= offset
;
8938 /* Align and set SSE register save area. */
8939 if (frame
->nsseregs
)
8941 /* The only ABI that has saved SSE registers (Win64) also has a
8942 16-byte aligned default stack, and thus we don't need to be
8943 within the re-aligned local stack frame to save them. */
8944 gcc_assert (INCOMING_STACK_BOUNDARY
>= 128);
8945 offset
= (offset
+ 16 - 1) & -16;
8946 offset
+= frame
->nsseregs
* 16;
8948 frame
->sse_reg_save_offset
= offset
;
8950 /* The re-aligned stack starts here. Values before this point are not
8951 directly comparable with values below this point. In order to make
8952 sure that no value happens to be the same before and after, force
8953 the alignment computation below to add a non-zero value. */
8954 if (stack_realign_fp
)
8955 offset
= (offset
+ stack_alignment_needed
) & -stack_alignment_needed
;
8958 frame
->va_arg_size
= ix86_varargs_gpr_size
+ ix86_varargs_fpr_size
;
8959 offset
+= frame
->va_arg_size
;
8961 /* Align start of frame for local function. */
8962 if (stack_realign_fp
8963 || offset
!= frame
->sse_reg_save_offset
8965 || !current_function_is_leaf
8966 || cfun
->calls_alloca
8967 || ix86_current_function_calls_tls_descriptor
)
8968 offset
= (offset
+ stack_alignment_needed
- 1) & -stack_alignment_needed
;
8970 /* Frame pointer points here. */
8971 frame
->frame_pointer_offset
= offset
;
8975 /* Add outgoing arguments area. Can be skipped if we eliminated
8976 all the function calls as dead code.
8977 Skipping is however impossible when function calls alloca. Alloca
8978 expander assumes that last crtl->outgoing_args_size
8979 of stack frame are unused. */
8980 if (ACCUMULATE_OUTGOING_ARGS
8981 && (!current_function_is_leaf
|| cfun
->calls_alloca
8982 || ix86_current_function_calls_tls_descriptor
))
8984 offset
+= crtl
->outgoing_args_size
;
8985 frame
->outgoing_arguments_size
= crtl
->outgoing_args_size
;
8988 frame
->outgoing_arguments_size
= 0;
8990 /* Align stack boundary. Only needed if we're calling another function
8992 if (!current_function_is_leaf
|| cfun
->calls_alloca
8993 || ix86_current_function_calls_tls_descriptor
)
8994 offset
= (offset
+ preferred_alignment
- 1) & -preferred_alignment
;
8996 /* We've reached end of stack frame. */
8997 frame
->stack_pointer_offset
= offset
;
8999 /* Size prologue needs to allocate. */
9000 to_allocate
= offset
- frame
->sse_reg_save_offset
;
9002 if ((!to_allocate
&& frame
->nregs
<= 1)
9003 || (TARGET_64BIT
&& to_allocate
>= (HOST_WIDE_INT
) 0x80000000))
9004 frame
->save_regs_using_mov
= false;
9006 if (ix86_using_red_zone ()
9007 && current_function_sp_is_unchanging
9008 && current_function_is_leaf
9009 && !ix86_current_function_calls_tls_descriptor
)
9011 frame
->red_zone_size
= to_allocate
;
9012 if (frame
->save_regs_using_mov
)
9013 frame
->red_zone_size
+= frame
->nregs
* UNITS_PER_WORD
;
9014 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
9015 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
9018 frame
->red_zone_size
= 0;
9019 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
9021 /* The SEH frame pointer location is near the bottom of the frame.
9022 This is enforced by the fact that the difference between the
9023 stack pointer and the frame pointer is limited to 240 bytes in
9024 the unwind data structure. */
9029 /* If we can leave the frame pointer where it is, do so. */
9030 diff
= frame
->stack_pointer_offset
- frame
->hard_frame_pointer_offset
;
9031 if (diff
> 240 || (diff
& 15) != 0)
9033 /* Ideally we'd determine what portion of the local stack frame
9034 (within the constraint of the lowest 240) is most heavily used.
9035 But without that complication, simply bias the frame pointer
9036 by 128 bytes so as to maximize the amount of the local stack
9037 frame that is addressable with 8-bit offsets. */
9038 frame
->hard_frame_pointer_offset
= frame
->stack_pointer_offset
- 128;
9043 /* This is semi-inlined memory_address_length, but simplified
9044 since we know that we're always dealing with reg+offset, and
9045 to avoid having to create and discard all that rtl. */
9048 choose_baseaddr_len (unsigned int regno
, HOST_WIDE_INT offset
)
9054 /* EBP and R13 cannot be encoded without an offset. */
9055 len
= (regno
== BP_REG
|| regno
== R13_REG
);
9057 else if (IN_RANGE (offset
, -128, 127))
9060 /* ESP and R12 must be encoded with a SIB byte. */
9061 if (regno
== SP_REG
|| regno
== R12_REG
)
9067 /* Return an RTX that points to CFA_OFFSET within the stack frame.
9068 The valid base registers are taken from CFUN->MACHINE->FS. */
9071 choose_baseaddr (HOST_WIDE_INT cfa_offset
)
9073 const struct machine_function
*m
= cfun
->machine
;
9074 rtx base_reg
= NULL
;
9075 HOST_WIDE_INT base_offset
= 0;
9077 if (m
->use_fast_prologue_epilogue
)
9079 /* Choose the base register most likely to allow the most scheduling
9080 opportunities. Generally FP is valid througout the function,
9081 while DRAP must be reloaded within the epilogue. But choose either
9082 over the SP due to increased encoding size. */
9086 base_reg
= hard_frame_pointer_rtx
;
9087 base_offset
= m
->fs
.fp_offset
- cfa_offset
;
9089 else if (m
->fs
.drap_valid
)
9091 base_reg
= crtl
->drap_reg
;
9092 base_offset
= 0 - cfa_offset
;
9094 else if (m
->fs
.sp_valid
)
9096 base_reg
= stack_pointer_rtx
;
9097 base_offset
= m
->fs
.sp_offset
- cfa_offset
;
9102 HOST_WIDE_INT toffset
;
9105 /* Choose the base register with the smallest address encoding.
9106 With a tie, choose FP > DRAP > SP. */
9109 base_reg
= stack_pointer_rtx
;
9110 base_offset
= m
->fs
.sp_offset
- cfa_offset
;
9111 len
= choose_baseaddr_len (STACK_POINTER_REGNUM
, base_offset
);
9113 if (m
->fs
.drap_valid
)
9115 toffset
= 0 - cfa_offset
;
9116 tlen
= choose_baseaddr_len (REGNO (crtl
->drap_reg
), toffset
);
9119 base_reg
= crtl
->drap_reg
;
9120 base_offset
= toffset
;
9126 toffset
= m
->fs
.fp_offset
- cfa_offset
;
9127 tlen
= choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM
, toffset
);
9130 base_reg
= hard_frame_pointer_rtx
;
9131 base_offset
= toffset
;
9136 gcc_assert (base_reg
!= NULL
);
9138 return plus_constant (base_reg
, base_offset
);
9141 /* Emit code to save registers in the prologue. */
9144 ix86_emit_save_regs (void)
9149 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
-- > 0; )
9150 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9152 insn
= emit_insn (gen_push (gen_rtx_REG (word_mode
, regno
)));
9153 RTX_FRAME_RELATED_P (insn
) = 1;
9157 /* Emit a single register save at CFA - CFA_OFFSET. */
9160 ix86_emit_save_reg_using_mov (enum machine_mode mode
, unsigned int regno
,
9161 HOST_WIDE_INT cfa_offset
)
9163 struct machine_function
*m
= cfun
->machine
;
9164 rtx reg
= gen_rtx_REG (mode
, regno
);
9165 rtx mem
, addr
, base
, insn
;
9167 addr
= choose_baseaddr (cfa_offset
);
9168 mem
= gen_frame_mem (mode
, addr
);
9170 /* For SSE saves, we need to indicate the 128-bit alignment. */
9171 set_mem_align (mem
, GET_MODE_ALIGNMENT (mode
));
9173 insn
= emit_move_insn (mem
, reg
);
9174 RTX_FRAME_RELATED_P (insn
) = 1;
9177 if (GET_CODE (base
) == PLUS
)
9178 base
= XEXP (base
, 0);
9179 gcc_checking_assert (REG_P (base
));
9181 /* When saving registers into a re-aligned local stack frame, avoid
9182 any tricky guessing by dwarf2out. */
9183 if (m
->fs
.realigned
)
9185 gcc_checking_assert (stack_realign_drap
);
9187 if (regno
== REGNO (crtl
->drap_reg
))
9189 /* A bit of a hack. We force the DRAP register to be saved in
9190 the re-aligned stack frame, which provides us with a copy
9191 of the CFA that will last past the prologue. Install it. */
9192 gcc_checking_assert (cfun
->machine
->fs
.fp_valid
);
9193 addr
= plus_constant (hard_frame_pointer_rtx
,
9194 cfun
->machine
->fs
.fp_offset
- cfa_offset
);
9195 mem
= gen_rtx_MEM (mode
, addr
);
9196 add_reg_note (insn
, REG_CFA_DEF_CFA
, mem
);
9200 /* The frame pointer is a stable reference within the
9201 aligned frame. Use it. */
9202 gcc_checking_assert (cfun
->machine
->fs
.fp_valid
);
9203 addr
= plus_constant (hard_frame_pointer_rtx
,
9204 cfun
->machine
->fs
.fp_offset
- cfa_offset
);
9205 mem
= gen_rtx_MEM (mode
, addr
);
9206 add_reg_note (insn
, REG_CFA_EXPRESSION
,
9207 gen_rtx_SET (VOIDmode
, mem
, reg
));
9211 /* The memory may not be relative to the current CFA register,
9212 which means that we may need to generate a new pattern for
9213 use by the unwind info. */
9214 else if (base
!= m
->fs
.cfa_reg
)
9216 addr
= plus_constant (m
->fs
.cfa_reg
, m
->fs
.cfa_offset
- cfa_offset
);
9217 mem
= gen_rtx_MEM (mode
, addr
);
9218 add_reg_note (insn
, REG_CFA_OFFSET
, gen_rtx_SET (VOIDmode
, mem
, reg
));
9222 /* Emit code to save registers using MOV insns.
9223 First register is stored at CFA - CFA_OFFSET. */
9225 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset
)
9229 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9230 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9232 ix86_emit_save_reg_using_mov (word_mode
, regno
, cfa_offset
);
9233 cfa_offset
-= UNITS_PER_WORD
;
9237 /* Emit code to save SSE registers using MOV insns.
9238 First register is stored at CFA - CFA_OFFSET. */
9240 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset
)
9244 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9245 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9247 ix86_emit_save_reg_using_mov (V4SFmode
, regno
, cfa_offset
);
9252 static GTY(()) rtx queued_cfa_restores
;
9254 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9255 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9256 Don't add the note if the previously saved value will be left untouched
9257 within stack red-zone till return, as unwinders can find the same value
9258 in the register and on the stack. */
9261 ix86_add_cfa_restore_note (rtx insn
, rtx reg
, HOST_WIDE_INT cfa_offset
)
9263 if (!crtl
->shrink_wrapped
9264 && cfa_offset
<= cfun
->machine
->fs
.red_zone_offset
)
9269 add_reg_note (insn
, REG_CFA_RESTORE
, reg
);
9270 RTX_FRAME_RELATED_P (insn
) = 1;
9274 = alloc_reg_note (REG_CFA_RESTORE
, reg
, queued_cfa_restores
);
9277 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9280 ix86_add_queued_cfa_restore_notes (rtx insn
)
9283 if (!queued_cfa_restores
)
9285 for (last
= queued_cfa_restores
; XEXP (last
, 1); last
= XEXP (last
, 1))
9287 XEXP (last
, 1) = REG_NOTES (insn
);
9288 REG_NOTES (insn
) = queued_cfa_restores
;
9289 queued_cfa_restores
= NULL_RTX
;
9290 RTX_FRAME_RELATED_P (insn
) = 1;
9293 /* Expand prologue or epilogue stack adjustment.
9294 The pattern exist to put a dependency on all ebp-based memory accesses.
9295 STYLE should be negative if instructions should be marked as frame related,
9296 zero if %r11 register is live and cannot be freely used and positive
9300 pro_epilogue_adjust_stack (rtx dest
, rtx src
, rtx offset
,
9301 int style
, bool set_cfa
)
9303 struct machine_function
*m
= cfun
->machine
;
9305 bool add_frame_related_expr
= false;
9307 if (Pmode
== SImode
)
9308 insn
= gen_pro_epilogue_adjust_stack_si_add (dest
, src
, offset
);
9309 else if (x86_64_immediate_operand (offset
, DImode
))
9310 insn
= gen_pro_epilogue_adjust_stack_di_add (dest
, src
, offset
);
9314 /* r11 is used by indirect sibcall return as well, set before the
9315 epilogue and used after the epilogue. */
9317 tmp
= gen_rtx_REG (DImode
, R11_REG
);
9320 gcc_assert (src
!= hard_frame_pointer_rtx
9321 && dest
!= hard_frame_pointer_rtx
);
9322 tmp
= hard_frame_pointer_rtx
;
9324 insn
= emit_insn (gen_rtx_SET (DImode
, tmp
, offset
));
9326 add_frame_related_expr
= true;
9328 insn
= gen_pro_epilogue_adjust_stack_di_add (dest
, src
, tmp
);
9331 insn
= emit_insn (insn
);
9333 ix86_add_queued_cfa_restore_notes (insn
);
9339 gcc_assert (m
->fs
.cfa_reg
== src
);
9340 m
->fs
.cfa_offset
+= INTVAL (offset
);
9341 m
->fs
.cfa_reg
= dest
;
9343 r
= gen_rtx_PLUS (Pmode
, src
, offset
);
9344 r
= gen_rtx_SET (VOIDmode
, dest
, r
);
9345 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, r
);
9346 RTX_FRAME_RELATED_P (insn
) = 1;
9350 RTX_FRAME_RELATED_P (insn
) = 1;
9351 if (add_frame_related_expr
)
9353 rtx r
= gen_rtx_PLUS (Pmode
, src
, offset
);
9354 r
= gen_rtx_SET (VOIDmode
, dest
, r
);
9355 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, r
);
9359 if (dest
== stack_pointer_rtx
)
9361 HOST_WIDE_INT ooffset
= m
->fs
.sp_offset
;
9362 bool valid
= m
->fs
.sp_valid
;
9364 if (src
== hard_frame_pointer_rtx
)
9366 valid
= m
->fs
.fp_valid
;
9367 ooffset
= m
->fs
.fp_offset
;
9369 else if (src
== crtl
->drap_reg
)
9371 valid
= m
->fs
.drap_valid
;
9376 /* Else there are two possibilities: SP itself, which we set
9377 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
9378 taken care of this by hand along the eh_return path. */
9379 gcc_checking_assert (src
== stack_pointer_rtx
9380 || offset
== const0_rtx
);
9383 m
->fs
.sp_offset
= ooffset
- INTVAL (offset
);
9384 m
->fs
.sp_valid
= valid
;
9388 /* Find an available register to be used as dynamic realign argument
9389 pointer regsiter. Such a register will be written in prologue and
9390 used in begin of body, so it must not be
9391 1. parameter passing register.
9393 We reuse static-chain register if it is available. Otherwise, we
9394 use DI for i386 and R13 for x86-64. We chose R13 since it has
9397 Return: the regno of chosen register. */
9400 find_drap_reg (void)
9402 tree decl
= cfun
->decl
;
9406 /* Use R13 for nested function or function need static chain.
9407 Since function with tail call may use any caller-saved
9408 registers in epilogue, DRAP must not use caller-saved
9409 register in such case. */
9410 if (DECL_STATIC_CHAIN (decl
) || crtl
->tail_call_emit
)
9417 /* Use DI for nested function or function need static chain.
9418 Since function with tail call may use any caller-saved
9419 registers in epilogue, DRAP must not use caller-saved
9420 register in such case. */
9421 if (DECL_STATIC_CHAIN (decl
) || crtl
->tail_call_emit
)
9424 /* Reuse static chain register if it isn't used for parameter
9426 if (ix86_function_regparm (TREE_TYPE (decl
), decl
) <= 2)
9428 unsigned int ccvt
= ix86_get_callcvt (TREE_TYPE (decl
));
9429 if ((ccvt
& (IX86_CALLCVT_FASTCALL
| IX86_CALLCVT_THISCALL
)) == 0)
9436 /* Return minimum incoming stack alignment. */
9439 ix86_minimum_incoming_stack_boundary (bool sibcall
)
9441 unsigned int incoming_stack_boundary
;
9443 /* Prefer the one specified at command line. */
9444 if (ix86_user_incoming_stack_boundary
)
9445 incoming_stack_boundary
= ix86_user_incoming_stack_boundary
;
9446 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
9447 if -mstackrealign is used, it isn't used for sibcall check and
9448 estimated stack alignment is 128bit. */
9451 && ix86_force_align_arg_pointer
9452 && crtl
->stack_alignment_estimated
== 128)
9453 incoming_stack_boundary
= MIN_STACK_BOUNDARY
;
9455 incoming_stack_boundary
= ix86_default_incoming_stack_boundary
;
9457 /* Incoming stack alignment can be changed on individual functions
9458 via force_align_arg_pointer attribute. We use the smallest
9459 incoming stack boundary. */
9460 if (incoming_stack_boundary
> MIN_STACK_BOUNDARY
9461 && lookup_attribute (ix86_force_align_arg_pointer_string
,
9462 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))))
9463 incoming_stack_boundary
= MIN_STACK_BOUNDARY
;
9465 /* The incoming stack frame has to be aligned at least at
9466 parm_stack_boundary. */
9467 if (incoming_stack_boundary
< crtl
->parm_stack_boundary
)
9468 incoming_stack_boundary
= crtl
->parm_stack_boundary
;
9470 /* Stack at entrance of main is aligned by runtime. We use the
9471 smallest incoming stack boundary. */
9472 if (incoming_stack_boundary
> MAIN_STACK_BOUNDARY
9473 && DECL_NAME (current_function_decl
)
9474 && MAIN_NAME_P (DECL_NAME (current_function_decl
))
9475 && DECL_FILE_SCOPE_P (current_function_decl
))
9476 incoming_stack_boundary
= MAIN_STACK_BOUNDARY
;
9478 return incoming_stack_boundary
;
9481 /* Update incoming stack boundary and estimated stack alignment. */
9484 ix86_update_stack_boundary (void)
9486 ix86_incoming_stack_boundary
9487 = ix86_minimum_incoming_stack_boundary (false);
9489 /* x86_64 vararg needs 16byte stack alignment for register save
9493 && crtl
->stack_alignment_estimated
< 128)
9494 crtl
->stack_alignment_estimated
= 128;
9497 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
9498 needed or an rtx for DRAP otherwise. */
9501 ix86_get_drap_rtx (void)
9503 if (ix86_force_drap
|| !ACCUMULATE_OUTGOING_ARGS
)
9504 crtl
->need_drap
= true;
9506 if (stack_realign_drap
)
9508 /* Assign DRAP to vDRAP and returns vDRAP */
9509 unsigned int regno
= find_drap_reg ();
9514 arg_ptr
= gen_rtx_REG (Pmode
, regno
);
9515 crtl
->drap_reg
= arg_ptr
;
9518 drap_vreg
= copy_to_reg (arg_ptr
);
9522 insn
= emit_insn_before (seq
, NEXT_INSN (entry_of_function ()));
9525 add_reg_note (insn
, REG_CFA_SET_VDRAP
, drap_vreg
);
9526 RTX_FRAME_RELATED_P (insn
) = 1;
9534 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
9537 ix86_internal_arg_pointer (void)
9539 return virtual_incoming_args_rtx
;
9542 struct scratch_reg
{
9547 /* Return a short-lived scratch register for use on function entry.
9548 In 32-bit mode, it is valid only after the registers are saved
9549 in the prologue. This register must be released by means of
9550 release_scratch_register_on_entry once it is dead. */
9553 get_scratch_register_on_entry (struct scratch_reg
*sr
)
9561 /* We always use R11 in 64-bit mode. */
9566 tree decl
= current_function_decl
, fntype
= TREE_TYPE (decl
);
9568 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype
)) != NULL_TREE
;
9569 bool static_chain_p
= DECL_STATIC_CHAIN (decl
);
9570 int regparm
= ix86_function_regparm (fntype
, decl
);
9572 = crtl
->drap_reg
? REGNO (crtl
->drap_reg
) : INVALID_REGNUM
;
9574 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
9575 for the static chain register. */
9576 if ((regparm
< 1 || (fastcall_p
&& !static_chain_p
))
9577 && drap_regno
!= AX_REG
)
9579 else if (regparm
< 2 && drap_regno
!= DX_REG
)
9581 /* ecx is the static chain register. */
9582 else if (regparm
< 3 && !fastcall_p
&& !static_chain_p
9583 && drap_regno
!= CX_REG
)
9585 else if (ix86_save_reg (BX_REG
, true))
9587 /* esi is the static chain register. */
9588 else if (!(regparm
== 3 && static_chain_p
)
9589 && ix86_save_reg (SI_REG
, true))
9591 else if (ix86_save_reg (DI_REG
, true))
9595 regno
= (drap_regno
== AX_REG
? DX_REG
: AX_REG
);
9600 sr
->reg
= gen_rtx_REG (Pmode
, regno
);
9603 rtx insn
= emit_insn (gen_push (sr
->reg
));
9604 RTX_FRAME_RELATED_P (insn
) = 1;
9608 /* Release a scratch register obtained from the preceding function. */
9611 release_scratch_register_on_entry (struct scratch_reg
*sr
)
9615 rtx x
, insn
= emit_insn (gen_pop (sr
->reg
));
9617 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
9618 RTX_FRAME_RELATED_P (insn
) = 1;
9619 x
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, GEN_INT (UNITS_PER_WORD
));
9620 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
9621 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
9625 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
9627 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
9630 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size
)
9632 /* We skip the probe for the first interval + a small dope of 4 words and
9633 probe that many bytes past the specified size to maintain a protection
9634 area at the botton of the stack. */
9635 const int dope
= 4 * UNITS_PER_WORD
;
9636 rtx size_rtx
= GEN_INT (size
), last
;
9638 /* See if we have a constant small number of probes to generate. If so,
9639 that's the easy case. The run-time loop is made up of 11 insns in the
9640 generic case while the compile-time loop is made up of 3+2*(n-1) insns
9641 for n # of intervals. */
9642 if (size
<= 5 * PROBE_INTERVAL
)
9644 HOST_WIDE_INT i
, adjust
;
9645 bool first_probe
= true;
9647 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
9648 values of N from 1 until it exceeds SIZE. If only one probe is
9649 needed, this will not generate any code. Then adjust and probe
9650 to PROBE_INTERVAL + SIZE. */
9651 for (i
= PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
9655 adjust
= 2 * PROBE_INTERVAL
+ dope
;
9656 first_probe
= false;
9659 adjust
= PROBE_INTERVAL
;
9661 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9662 plus_constant (stack_pointer_rtx
, -adjust
)));
9663 emit_stack_probe (stack_pointer_rtx
);
9667 adjust
= size
+ PROBE_INTERVAL
+ dope
;
9669 adjust
= size
+ PROBE_INTERVAL
- i
;
9671 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9672 plus_constant (stack_pointer_rtx
, -adjust
)));
9673 emit_stack_probe (stack_pointer_rtx
);
9675 /* Adjust back to account for the additional first interval. */
9676 last
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9677 plus_constant (stack_pointer_rtx
,
9678 PROBE_INTERVAL
+ dope
)));
9681 /* Otherwise, do the same as above, but in a loop. Note that we must be
9682 extra careful with variables wrapping around because we might be at
9683 the very top (or the very bottom) of the address space and we have
9684 to be able to handle this case properly; in particular, we use an
9685 equality test for the loop condition. */
9688 HOST_WIDE_INT rounded_size
;
9689 struct scratch_reg sr
;
9691 get_scratch_register_on_entry (&sr
);
9694 /* Step 1: round SIZE to the previous multiple of the interval. */
9696 rounded_size
= size
& -PROBE_INTERVAL
;
9699 /* Step 2: compute initial and final value of the loop counter. */
9701 /* SP = SP_0 + PROBE_INTERVAL. */
9702 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9703 plus_constant (stack_pointer_rtx
,
9704 - (PROBE_INTERVAL
+ dope
))));
9706 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
9707 emit_move_insn (sr
.reg
, GEN_INT (-rounded_size
));
9708 emit_insn (gen_rtx_SET (VOIDmode
, sr
.reg
,
9709 gen_rtx_PLUS (Pmode
, sr
.reg
,
9710 stack_pointer_rtx
)));
9715 while (SP != LAST_ADDR)
9717 SP = SP + PROBE_INTERVAL
9721 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
9722 values of N from 1 until it is equal to ROUNDED_SIZE. */
9724 emit_insn (ix86_gen_adjust_stack_and_probe (sr
.reg
, sr
.reg
, size_rtx
));
9727 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
9728 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
9730 if (size
!= rounded_size
)
9732 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9733 plus_constant (stack_pointer_rtx
,
9734 rounded_size
- size
)));
9735 emit_stack_probe (stack_pointer_rtx
);
9738 /* Adjust back to account for the additional first interval. */
9739 last
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9740 plus_constant (stack_pointer_rtx
,
9741 PROBE_INTERVAL
+ dope
)));
9743 release_scratch_register_on_entry (&sr
);
9746 gcc_assert (cfun
->machine
->fs
.cfa_reg
!= stack_pointer_rtx
);
9748 /* Even if the stack pointer isn't the CFA register, we need to correctly
9749 describe the adjustments made to it, in particular differentiate the
9750 frame-related ones from the frame-unrelated ones. */
9753 rtx expr
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (2));
9754 XVECEXP (expr
, 0, 0)
9755 = gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9756 plus_constant (stack_pointer_rtx
, -size
));
9757 XVECEXP (expr
, 0, 1)
9758 = gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9759 plus_constant (stack_pointer_rtx
,
9760 PROBE_INTERVAL
+ dope
+ size
));
9761 add_reg_note (last
, REG_FRAME_RELATED_EXPR
, expr
);
9762 RTX_FRAME_RELATED_P (last
) = 1;
9764 cfun
->machine
->fs
.sp_offset
+= size
;
9767 /* Make sure nothing is scheduled before we are done. */
9768 emit_insn (gen_blockage ());
9771 /* Adjust the stack pointer up to REG while probing it. */
9774 output_adjust_stack_and_probe (rtx reg
)
9776 static int labelno
= 0;
9777 char loop_lab
[32], end_lab
[32];
9780 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
);
9781 ASM_GENERATE_INTERNAL_LABEL (end_lab
, "LPSRE", labelno
++);
9783 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
9785 /* Jump to END_LAB if SP == LAST_ADDR. */
9786 xops
[0] = stack_pointer_rtx
;
9788 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops
);
9789 fputs ("\tje\t", asm_out_file
);
9790 assemble_name_raw (asm_out_file
, end_lab
);
9791 fputc ('\n', asm_out_file
);
9793 /* SP = SP + PROBE_INTERVAL. */
9794 xops
[1] = GEN_INT (PROBE_INTERVAL
);
9795 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops
);
9798 xops
[1] = const0_rtx
;
9799 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops
);
9801 fprintf (asm_out_file
, "\tjmp\t");
9802 assemble_name_raw (asm_out_file
, loop_lab
);
9803 fputc ('\n', asm_out_file
);
9805 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, end_lab
);
9810 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
9811 inclusive. These are offsets from the current stack pointer. */
9814 ix86_emit_probe_stack_range (HOST_WIDE_INT first
, HOST_WIDE_INT size
)
9816 /* See if we have a constant small number of probes to generate. If so,
9817 that's the easy case. The run-time loop is made up of 7 insns in the
9818 generic case while the compile-time loop is made up of n insns for n #
9820 if (size
<= 7 * PROBE_INTERVAL
)
9824 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
9825 it exceeds SIZE. If only one probe is needed, this will not
9826 generate any code. Then probe at FIRST + SIZE. */
9827 for (i
= PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
9828 emit_stack_probe (plus_constant (stack_pointer_rtx
, -(first
+ i
)));
9830 emit_stack_probe (plus_constant (stack_pointer_rtx
, -(first
+ size
)));
9833 /* Otherwise, do the same as above, but in a loop. Note that we must be
9834 extra careful with variables wrapping around because we might be at
9835 the very top (or the very bottom) of the address space and we have
9836 to be able to handle this case properly; in particular, we use an
9837 equality test for the loop condition. */
9840 HOST_WIDE_INT rounded_size
, last
;
9841 struct scratch_reg sr
;
9843 get_scratch_register_on_entry (&sr
);
9846 /* Step 1: round SIZE to the previous multiple of the interval. */
9848 rounded_size
= size
& -PROBE_INTERVAL
;
9851 /* Step 2: compute initial and final value of the loop counter. */
9853 /* TEST_OFFSET = FIRST. */
9854 emit_move_insn (sr
.reg
, GEN_INT (-first
));
9856 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
9857 last
= first
+ rounded_size
;
9862 while (TEST_ADDR != LAST_ADDR)
9864 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
9868 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
9869 until it is equal to ROUNDED_SIZE. */
9871 emit_insn (ix86_gen_probe_stack_range (sr
.reg
, sr
.reg
, GEN_INT (-last
)));
9874 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
9875 that SIZE is equal to ROUNDED_SIZE. */
9877 if (size
!= rounded_size
)
9878 emit_stack_probe (plus_constant (gen_rtx_PLUS (Pmode
,
9881 rounded_size
- size
));
9883 release_scratch_register_on_entry (&sr
);
9886 /* Make sure nothing is scheduled before we are done. */
9887 emit_insn (gen_blockage ());
9890 /* Probe a range of stack addresses from REG to END, inclusive. These are
9891 offsets from the current stack pointer. */
9894 output_probe_stack_range (rtx reg
, rtx end
)
9896 static int labelno
= 0;
9897 char loop_lab
[32], end_lab
[32];
9900 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
);
9901 ASM_GENERATE_INTERNAL_LABEL (end_lab
, "LPSRE", labelno
++);
9903 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
9905 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
9908 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops
);
9909 fputs ("\tje\t", asm_out_file
);
9910 assemble_name_raw (asm_out_file
, end_lab
);
9911 fputc ('\n', asm_out_file
);
9913 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
9914 xops
[1] = GEN_INT (PROBE_INTERVAL
);
9915 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops
);
9917 /* Probe at TEST_ADDR. */
9918 xops
[0] = stack_pointer_rtx
;
9920 xops
[2] = const0_rtx
;
9921 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops
);
9923 fprintf (asm_out_file
, "\tjmp\t");
9924 assemble_name_raw (asm_out_file
, loop_lab
);
9925 fputc ('\n', asm_out_file
);
9927 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, end_lab
);
9932 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
9933 to be generated in correct form. */
9935 ix86_finalize_stack_realign_flags (void)
9937 /* Check if stack realign is really needed after reload, and
9938 stores result in cfun */
9939 unsigned int incoming_stack_boundary
9940 = (crtl
->parm_stack_boundary
> ix86_incoming_stack_boundary
9941 ? crtl
->parm_stack_boundary
: ix86_incoming_stack_boundary
);
9942 unsigned int stack_realign
= (incoming_stack_boundary
9943 < (current_function_is_leaf
9944 ? crtl
->max_used_stack_slot_alignment
9945 : crtl
->stack_alignment_needed
));
9947 if (crtl
->stack_realign_finalized
)
9949 /* After stack_realign_needed is finalized, we can't no longer
9951 gcc_assert (crtl
->stack_realign_needed
== stack_realign
);
9955 /* If the only reason for frame_pointer_needed is that we conservatively
9956 assumed stack realignment might be needed, but in the end nothing that
9957 needed the stack alignment had been spilled, clear frame_pointer_needed
9958 and say we don't need stack realignment. */
9961 && frame_pointer_needed
9962 && current_function_is_leaf
9963 && flag_omit_frame_pointer
9964 && current_function_sp_is_unchanging
9965 && !ix86_current_function_calls_tls_descriptor
9966 && !crtl
->accesses_prior_frames
9967 && !cfun
->calls_alloca
9968 && !crtl
->calls_eh_return
9969 && !(flag_stack_check
&& STACK_CHECK_MOVING_SP
)
9970 && !ix86_frame_pointer_required ()
9971 && get_frame_size () == 0
9972 && ix86_nsaved_sseregs () == 0
9973 && ix86_varargs_gpr_size
+ ix86_varargs_fpr_size
== 0)
9975 HARD_REG_SET set_up_by_prologue
, prologue_used
;
9978 CLEAR_HARD_REG_SET (prologue_used
);
9979 CLEAR_HARD_REG_SET (set_up_by_prologue
);
9980 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
, STACK_POINTER_REGNUM
);
9981 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
, ARG_POINTER_REGNUM
);
9982 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
,
9983 HARD_FRAME_POINTER_REGNUM
);
9987 FOR_BB_INSNS (bb
, insn
)
9988 if (NONDEBUG_INSN_P (insn
)
9989 && requires_stack_frame_p (insn
, prologue_used
,
9990 set_up_by_prologue
))
9992 crtl
->stack_realign_needed
= stack_realign
;
9993 crtl
->stack_realign_finalized
= true;
9998 frame_pointer_needed
= false;
9999 stack_realign
= false;
10000 crtl
->max_used_stack_slot_alignment
= incoming_stack_boundary
;
10001 crtl
->stack_alignment_needed
= incoming_stack_boundary
;
10002 crtl
->stack_alignment_estimated
= incoming_stack_boundary
;
10003 if (crtl
->preferred_stack_boundary
> incoming_stack_boundary
)
10004 crtl
->preferred_stack_boundary
= incoming_stack_boundary
;
10005 df_finish_pass (true);
10006 df_scan_alloc (NULL
);
10008 df_compute_regs_ever_live (true);
10012 crtl
->stack_realign_needed
= stack_realign
;
10013 crtl
->stack_realign_finalized
= true;
10016 /* Expand the prologue into a bunch of separate insns. */
10019 ix86_expand_prologue (void)
10021 struct machine_function
*m
= cfun
->machine
;
10024 struct ix86_frame frame
;
10025 HOST_WIDE_INT allocate
;
10026 bool int_registers_saved
;
10028 ix86_finalize_stack_realign_flags ();
10030 /* DRAP should not coexist with stack_realign_fp */
10031 gcc_assert (!(crtl
->drap_reg
&& stack_realign_fp
));
10033 memset (&m
->fs
, 0, sizeof (m
->fs
));
10035 /* Initialize CFA state for before the prologue. */
10036 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10037 m
->fs
.cfa_offset
= INCOMING_FRAME_SP_OFFSET
;
10039 /* Track SP offset to the CFA. We continue tracking this after we've
10040 swapped the CFA register away from SP. In the case of re-alignment
10041 this is fudged; we're interested to offsets within the local frame. */
10042 m
->fs
.sp_offset
= INCOMING_FRAME_SP_OFFSET
;
10043 m
->fs
.sp_valid
= true;
10045 ix86_compute_frame_layout (&frame
);
10047 if (!TARGET_64BIT
&& ix86_function_ms_hook_prologue (current_function_decl
))
10049 /* We should have already generated an error for any use of
10050 ms_hook on a nested function. */
10051 gcc_checking_assert (!ix86_static_chain_on_stack
);
10053 /* Check if profiling is active and we shall use profiling before
10054 prologue variant. If so sorry. */
10055 if (crtl
->profile
&& flag_fentry
!= 0)
10056 sorry ("ms_hook_prologue attribute isn%'t compatible "
10057 "with -mfentry for 32-bit");
10059 /* In ix86_asm_output_function_label we emitted:
10060 8b ff movl.s %edi,%edi
10062 8b ec movl.s %esp,%ebp
10064 This matches the hookable function prologue in Win32 API
10065 functions in Microsoft Windows XP Service Pack 2 and newer.
10066 Wine uses this to enable Windows apps to hook the Win32 API
10067 functions provided by Wine.
10069 What that means is that we've already set up the frame pointer. */
10071 if (frame_pointer_needed
10072 && !(crtl
->drap_reg
&& crtl
->stack_realign_needed
))
10076 /* We've decided to use the frame pointer already set up.
10077 Describe this to the unwinder by pretending that both
10078 push and mov insns happen right here.
10080 Putting the unwind info here at the end of the ms_hook
10081 is done so that we can make absolutely certain we get
10082 the required byte sequence at the start of the function,
10083 rather than relying on an assembler that can produce
10084 the exact encoding required.
10086 However it does mean (in the unpatched case) that we have
10087 a 1 insn window where the asynchronous unwind info is
10088 incorrect. However, if we placed the unwind info at
10089 its correct location we would have incorrect unwind info
10090 in the patched case. Which is probably all moot since
10091 I don't expect Wine generates dwarf2 unwind info for the
10092 system libraries that use this feature. */
10094 insn
= emit_insn (gen_blockage ());
10096 push
= gen_push (hard_frame_pointer_rtx
);
10097 mov
= gen_rtx_SET (VOIDmode
, hard_frame_pointer_rtx
,
10098 stack_pointer_rtx
);
10099 RTX_FRAME_RELATED_P (push
) = 1;
10100 RTX_FRAME_RELATED_P (mov
) = 1;
10102 RTX_FRAME_RELATED_P (insn
) = 1;
10103 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
10104 gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, push
, mov
)));
10106 /* Note that gen_push incremented m->fs.cfa_offset, even
10107 though we didn't emit the push insn here. */
10108 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10109 m
->fs
.fp_offset
= m
->fs
.cfa_offset
;
10110 m
->fs
.fp_valid
= true;
10114 /* The frame pointer is not needed so pop %ebp again.
10115 This leaves us with a pristine state. */
10116 emit_insn (gen_pop (hard_frame_pointer_rtx
));
10120 /* The first insn of a function that accepts its static chain on the
10121 stack is to push the register that would be filled in by a direct
10122 call. This insn will be skipped by the trampoline. */
10123 else if (ix86_static_chain_on_stack
)
10125 insn
= emit_insn (gen_push (ix86_static_chain (cfun
->decl
, false)));
10126 emit_insn (gen_blockage ());
10128 /* We don't want to interpret this push insn as a register save,
10129 only as a stack adjustment. The real copy of the register as
10130 a save will be done later, if needed. */
10131 t
= plus_constant (stack_pointer_rtx
, -UNITS_PER_WORD
);
10132 t
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
);
10133 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, t
);
10134 RTX_FRAME_RELATED_P (insn
) = 1;
10137 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
10138 of DRAP is needed and stack realignment is really needed after reload */
10139 if (stack_realign_drap
)
10141 int align_bytes
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10143 /* Only need to push parameter pointer reg if it is caller saved. */
10144 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
10146 /* Push arg pointer reg */
10147 insn
= emit_insn (gen_push (crtl
->drap_reg
));
10148 RTX_FRAME_RELATED_P (insn
) = 1;
10151 /* Grab the argument pointer. */
10152 t
= plus_constant (stack_pointer_rtx
, m
->fs
.sp_offset
);
10153 insn
= emit_insn (gen_rtx_SET (VOIDmode
, crtl
->drap_reg
, t
));
10154 RTX_FRAME_RELATED_P (insn
) = 1;
10155 m
->fs
.cfa_reg
= crtl
->drap_reg
;
10156 m
->fs
.cfa_offset
= 0;
10158 /* Align the stack. */
10159 insn
= emit_insn (ix86_gen_andsp (stack_pointer_rtx
,
10161 GEN_INT (-align_bytes
)));
10162 RTX_FRAME_RELATED_P (insn
) = 1;
10164 /* Replicate the return address on the stack so that return
10165 address can be reached via (argp - 1) slot. This is needed
10166 to implement macro RETURN_ADDR_RTX and intrinsic function
10167 expand_builtin_return_addr etc. */
10168 t
= plus_constant (crtl
->drap_reg
, -UNITS_PER_WORD
);
10169 t
= gen_frame_mem (word_mode
, t
);
10170 insn
= emit_insn (gen_push (t
));
10171 RTX_FRAME_RELATED_P (insn
) = 1;
10173 /* For the purposes of frame and register save area addressing,
10174 we've started over with a new frame. */
10175 m
->fs
.sp_offset
= INCOMING_FRAME_SP_OFFSET
;
10176 m
->fs
.realigned
= true;
10179 if (frame_pointer_needed
&& !m
->fs
.fp_valid
)
10181 /* Note: AT&T enter does NOT have reversed args. Enter is probably
10182 slower on all targets. Also sdb doesn't like it. */
10183 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
10184 RTX_FRAME_RELATED_P (insn
) = 1;
10186 if (m
->fs
.sp_offset
== frame
.hard_frame_pointer_offset
)
10188 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
10189 RTX_FRAME_RELATED_P (insn
) = 1;
10191 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10192 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10193 m
->fs
.fp_offset
= m
->fs
.sp_offset
;
10194 m
->fs
.fp_valid
= true;
10198 int_registers_saved
= (frame
.nregs
== 0);
10200 if (!int_registers_saved
)
10202 /* If saving registers via PUSH, do so now. */
10203 if (!frame
.save_regs_using_mov
)
10205 ix86_emit_save_regs ();
10206 int_registers_saved
= true;
10207 gcc_assert (m
->fs
.sp_offset
== frame
.reg_save_offset
);
10210 /* When using red zone we may start register saving before allocating
10211 the stack frame saving one cycle of the prologue. However, avoid
10212 doing this if we have to probe the stack; at least on x86_64 the
10213 stack probe can turn into a call that clobbers a red zone location. */
10214 else if (ix86_using_red_zone ()
10215 && (! TARGET_STACK_PROBE
10216 || frame
.stack_pointer_offset
< CHECK_STACK_LIMIT
))
10218 ix86_emit_save_regs_using_mov (frame
.reg_save_offset
);
10219 int_registers_saved
= true;
10223 if (stack_realign_fp
)
10225 int align_bytes
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10226 gcc_assert (align_bytes
> MIN_STACK_BOUNDARY
/ BITS_PER_UNIT
);
10228 /* The computation of the size of the re-aligned stack frame means
10229 that we must allocate the size of the register save area before
10230 performing the actual alignment. Otherwise we cannot guarantee
10231 that there's enough storage above the realignment point. */
10232 if (m
->fs
.sp_offset
!= frame
.sse_reg_save_offset
)
10233 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10234 GEN_INT (m
->fs
.sp_offset
10235 - frame
.sse_reg_save_offset
),
10238 /* Align the stack. */
10239 insn
= emit_insn (ix86_gen_andsp (stack_pointer_rtx
,
10241 GEN_INT (-align_bytes
)));
10243 /* For the purposes of register save area addressing, the stack
10244 pointer is no longer valid. As for the value of sp_offset,
10245 see ix86_compute_frame_layout, which we need to match in order
10246 to pass verification of stack_pointer_offset at the end. */
10247 m
->fs
.sp_offset
= (m
->fs
.sp_offset
+ align_bytes
) & -align_bytes
;
10248 m
->fs
.sp_valid
= false;
10251 allocate
= frame
.stack_pointer_offset
- m
->fs
.sp_offset
;
10253 if (flag_stack_usage_info
)
10255 /* We start to count from ARG_POINTER. */
10256 HOST_WIDE_INT stack_size
= frame
.stack_pointer_offset
;
10258 /* If it was realigned, take into account the fake frame. */
10259 if (stack_realign_drap
)
10261 if (ix86_static_chain_on_stack
)
10262 stack_size
+= UNITS_PER_WORD
;
10264 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
10265 stack_size
+= UNITS_PER_WORD
;
10267 /* This over-estimates by 1 minimal-stack-alignment-unit but
10268 mitigates that by counting in the new return address slot. */
10269 current_function_dynamic_stack_size
10270 += crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10273 current_function_static_stack_size
= stack_size
;
10276 /* The stack has already been decremented by the instruction calling us
10277 so probe if the size is non-negative to preserve the protection area. */
10278 if (allocate
>= 0 && flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
10280 /* We expect the registers to be saved when probes are used. */
10281 gcc_assert (int_registers_saved
);
10283 if (STACK_CHECK_MOVING_SP
)
10285 ix86_adjust_stack_and_probe (allocate
);
10290 HOST_WIDE_INT size
= allocate
;
10292 if (TARGET_64BIT
&& size
>= (HOST_WIDE_INT
) 0x80000000)
10293 size
= 0x80000000 - STACK_CHECK_PROTECT
- 1;
10295 if (TARGET_STACK_PROBE
)
10296 ix86_emit_probe_stack_range (0, size
+ STACK_CHECK_PROTECT
);
10298 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT
, size
);
10304 else if (!ix86_target_stack_probe ()
10305 || frame
.stack_pointer_offset
< CHECK_STACK_LIMIT
)
10307 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10308 GEN_INT (-allocate
), -1,
10309 m
->fs
.cfa_reg
== stack_pointer_rtx
);
10313 rtx eax
= gen_rtx_REG (Pmode
, AX_REG
);
10315 rtx (*adjust_stack_insn
)(rtx
, rtx
, rtx
);
10317 bool eax_live
= false;
10318 bool r10_live
= false;
10321 r10_live
= (DECL_STATIC_CHAIN (current_function_decl
) != 0);
10322 if (!TARGET_64BIT_MS_ABI
)
10323 eax_live
= ix86_eax_live_at_start_p ();
10327 emit_insn (gen_push (eax
));
10328 allocate
-= UNITS_PER_WORD
;
10332 r10
= gen_rtx_REG (Pmode
, R10_REG
);
10333 emit_insn (gen_push (r10
));
10334 allocate
-= UNITS_PER_WORD
;
10337 emit_move_insn (eax
, GEN_INT (allocate
));
10338 emit_insn (ix86_gen_allocate_stack_worker (eax
, eax
));
10340 /* Use the fact that AX still contains ALLOCATE. */
10341 adjust_stack_insn
= (Pmode
== DImode
10342 ? gen_pro_epilogue_adjust_stack_di_sub
10343 : gen_pro_epilogue_adjust_stack_si_sub
);
10345 insn
= emit_insn (adjust_stack_insn (stack_pointer_rtx
,
10346 stack_pointer_rtx
, eax
));
10348 /* Note that SEH directives need to continue tracking the stack
10349 pointer even after the frame pointer has been set up. */
10350 if (m
->fs
.cfa_reg
== stack_pointer_rtx
|| TARGET_SEH
)
10352 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10353 m
->fs
.cfa_offset
+= allocate
;
10355 RTX_FRAME_RELATED_P (insn
) = 1;
10356 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
10357 gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10358 plus_constant (stack_pointer_rtx
,
10361 m
->fs
.sp_offset
+= allocate
;
10363 if (r10_live
&& eax_live
)
10365 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
);
10366 emit_move_insn (gen_rtx_REG (word_mode
, R10_REG
),
10367 gen_frame_mem (word_mode
, t
));
10368 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
- UNITS_PER_WORD
);
10369 emit_move_insn (gen_rtx_REG (word_mode
, AX_REG
),
10370 gen_frame_mem (word_mode
, t
));
10372 else if (eax_live
|| r10_live
)
10374 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
);
10375 emit_move_insn (gen_rtx_REG (word_mode
,
10376 (eax_live
? AX_REG
: R10_REG
)),
10377 gen_frame_mem (word_mode
, t
));
10380 gcc_assert (m
->fs
.sp_offset
== frame
.stack_pointer_offset
);
10382 /* If we havn't already set up the frame pointer, do so now. */
10383 if (frame_pointer_needed
&& !m
->fs
.fp_valid
)
10385 insn
= ix86_gen_add3 (hard_frame_pointer_rtx
, stack_pointer_rtx
,
10386 GEN_INT (frame
.stack_pointer_offset
10387 - frame
.hard_frame_pointer_offset
));
10388 insn
= emit_insn (insn
);
10389 RTX_FRAME_RELATED_P (insn
) = 1;
10390 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, NULL
);
10392 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10393 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10394 m
->fs
.fp_offset
= frame
.hard_frame_pointer_offset
;
10395 m
->fs
.fp_valid
= true;
10398 if (!int_registers_saved
)
10399 ix86_emit_save_regs_using_mov (frame
.reg_save_offset
);
10400 if (frame
.nsseregs
)
10401 ix86_emit_save_sse_regs_using_mov (frame
.sse_reg_save_offset
);
10403 pic_reg_used
= false;
10404 if (pic_offset_table_rtx
10405 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM
)
10408 unsigned int alt_pic_reg_used
= ix86_select_alt_pic_regnum ();
10410 if (alt_pic_reg_used
!= INVALID_REGNUM
)
10411 SET_REGNO (pic_offset_table_rtx
, alt_pic_reg_used
);
10413 pic_reg_used
= true;
10420 if (ix86_cmodel
== CM_LARGE_PIC
)
10422 rtx tmp_reg
= gen_rtx_REG (DImode
, R11_REG
);
10423 rtx label
= gen_label_rtx ();
10424 emit_label (label
);
10425 LABEL_PRESERVE_P (label
) = 1;
10426 gcc_assert (REGNO (pic_offset_table_rtx
) != REGNO (tmp_reg
));
10427 insn
= emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx
, label
));
10428 insn
= emit_insn (gen_set_got_offset_rex64 (tmp_reg
, label
));
10429 insn
= emit_insn (gen_adddi3 (pic_offset_table_rtx
,
10430 pic_offset_table_rtx
, tmp_reg
));
10433 insn
= emit_insn (gen_set_got_rex64 (pic_offset_table_rtx
));
10437 insn
= emit_insn (gen_set_got (pic_offset_table_rtx
));
10438 RTX_FRAME_RELATED_P (insn
) = 1;
10439 add_reg_note (insn
, REG_CFA_FLUSH_QUEUE
, NULL_RTX
);
10443 /* In the pic_reg_used case, make sure that the got load isn't deleted
10444 when mcount needs it. Blockage to avoid call movement across mcount
10445 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
10447 if (crtl
->profile
&& !flag_fentry
&& pic_reg_used
)
10448 emit_insn (gen_prologue_use (pic_offset_table_rtx
));
10450 if (crtl
->drap_reg
&& !crtl
->stack_realign_needed
)
10452 /* vDRAP is setup but after reload it turns out stack realign
10453 isn't necessary, here we will emit prologue to setup DRAP
10454 without stack realign adjustment */
10455 t
= choose_baseaddr (0);
10456 emit_insn (gen_rtx_SET (VOIDmode
, crtl
->drap_reg
, t
));
10459 /* Prevent instructions from being scheduled into register save push
10460 sequence when access to the redzone area is done through frame pointer.
10461 The offset between the frame pointer and the stack pointer is calculated
10462 relative to the value of the stack pointer at the end of the function
10463 prologue, and moving instructions that access redzone area via frame
10464 pointer inside push sequence violates this assumption. */
10465 if (frame_pointer_needed
&& frame
.red_zone_size
)
10466 emit_insn (gen_memory_blockage ());
10468 /* Emit cld instruction if stringops are used in the function. */
10469 if (TARGET_CLD
&& ix86_current_function_needs_cld
)
10470 emit_insn (gen_cld ());
10472 /* SEH requires that the prologue end within 256 bytes of the start of
10473 the function. Prevent instruction schedules that would extend that.
10474 Further, prevent alloca modifications to the stack pointer from being
10475 combined with prologue modifications. */
10477 emit_insn (gen_prologue_use (stack_pointer_rtx
));
10480 /* Emit code to restore REG using a POP insn. */
10483 ix86_emit_restore_reg_using_pop (rtx reg
)
10485 struct machine_function
*m
= cfun
->machine
;
10486 rtx insn
= emit_insn (gen_pop (reg
));
10488 ix86_add_cfa_restore_note (insn
, reg
, m
->fs
.sp_offset
);
10489 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
10491 if (m
->fs
.cfa_reg
== crtl
->drap_reg
10492 && REGNO (reg
) == REGNO (crtl
->drap_reg
))
10494 /* Previously we'd represented the CFA as an expression
10495 like *(%ebp - 8). We've just popped that value from
10496 the stack, which means we need to reset the CFA to
10497 the drap register. This will remain until we restore
10498 the stack pointer. */
10499 add_reg_note (insn
, REG_CFA_DEF_CFA
, reg
);
10500 RTX_FRAME_RELATED_P (insn
) = 1;
10502 /* This means that the DRAP register is valid for addressing too. */
10503 m
->fs
.drap_valid
= true;
10507 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10509 rtx x
= plus_constant (stack_pointer_rtx
, UNITS_PER_WORD
);
10510 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
10511 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, x
);
10512 RTX_FRAME_RELATED_P (insn
) = 1;
10514 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
10517 /* When the frame pointer is the CFA, and we pop it, we are
10518 swapping back to the stack pointer as the CFA. This happens
10519 for stack frames that don't allocate other data, so we assume
10520 the stack pointer is now pointing at the return address, i.e.
10521 the function entry state, which makes the offset be 1 word. */
10522 if (reg
== hard_frame_pointer_rtx
)
10524 m
->fs
.fp_valid
= false;
10525 if (m
->fs
.cfa_reg
== hard_frame_pointer_rtx
)
10527 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10528 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
10530 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10531 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
10532 GEN_INT (m
->fs
.cfa_offset
)));
10533 RTX_FRAME_RELATED_P (insn
) = 1;
10538 /* Emit code to restore saved registers using POP insns. */
10541 ix86_emit_restore_regs_using_pop (void)
10543 unsigned int regno
;
10545 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
10546 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, false))
10547 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode
, regno
));
10550 /* Emit code and notes for the LEAVE instruction. */
10553 ix86_emit_leave (void)
10555 struct machine_function
*m
= cfun
->machine
;
10556 rtx insn
= emit_insn (ix86_gen_leave ());
10558 ix86_add_queued_cfa_restore_notes (insn
);
10560 gcc_assert (m
->fs
.fp_valid
);
10561 m
->fs
.sp_valid
= true;
10562 m
->fs
.sp_offset
= m
->fs
.fp_offset
- UNITS_PER_WORD
;
10563 m
->fs
.fp_valid
= false;
10565 if (m
->fs
.cfa_reg
== hard_frame_pointer_rtx
)
10567 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10568 m
->fs
.cfa_offset
= m
->fs
.sp_offset
;
10570 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10571 plus_constant (stack_pointer_rtx
, m
->fs
.sp_offset
));
10572 RTX_FRAME_RELATED_P (insn
) = 1;
10574 ix86_add_cfa_restore_note (insn
, hard_frame_pointer_rtx
,
10578 /* Emit code to restore saved registers using MOV insns.
10579 First register is restored from CFA - CFA_OFFSET. */
10581 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset
,
10582 bool maybe_eh_return
)
10584 struct machine_function
*m
= cfun
->machine
;
10585 unsigned int regno
;
10587 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
10588 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, maybe_eh_return
))
10590 rtx reg
= gen_rtx_REG (word_mode
, regno
);
10593 mem
= choose_baseaddr (cfa_offset
);
10594 mem
= gen_frame_mem (word_mode
, mem
);
10595 insn
= emit_move_insn (reg
, mem
);
10597 if (m
->fs
.cfa_reg
== crtl
->drap_reg
&& regno
== REGNO (crtl
->drap_reg
))
10599 /* Previously we'd represented the CFA as an expression
10600 like *(%ebp - 8). We've just popped that value from
10601 the stack, which means we need to reset the CFA to
10602 the drap register. This will remain until we restore
10603 the stack pointer. */
10604 add_reg_note (insn
, REG_CFA_DEF_CFA
, reg
);
10605 RTX_FRAME_RELATED_P (insn
) = 1;
10607 /* This means that the DRAP register is valid for addressing. */
10608 m
->fs
.drap_valid
= true;
10611 ix86_add_cfa_restore_note (NULL_RTX
, reg
, cfa_offset
);
10613 cfa_offset
-= UNITS_PER_WORD
;
10617 /* Emit code to restore saved registers using MOV insns.
10618 First register is restored from CFA - CFA_OFFSET. */
10620 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset
,
10621 bool maybe_eh_return
)
10623 unsigned int regno
;
10625 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
10626 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, maybe_eh_return
))
10628 rtx reg
= gen_rtx_REG (V4SFmode
, regno
);
10631 mem
= choose_baseaddr (cfa_offset
);
10632 mem
= gen_rtx_MEM (V4SFmode
, mem
);
10633 set_mem_align (mem
, 128);
10634 emit_move_insn (reg
, mem
);
10636 ix86_add_cfa_restore_note (NULL_RTX
, reg
, cfa_offset
);
10642 /* Emit vzeroupper if needed. */
10645 ix86_maybe_emit_epilogue_vzeroupper (void)
10647 if (TARGET_VZEROUPPER
10648 && !TREE_THIS_VOLATILE (cfun
->decl
)
10649 && !cfun
->machine
->caller_return_avx256_p
)
10650 emit_insn (gen_avx_vzeroupper (GEN_INT (call_no_avx256
)));
10653 /* Restore function stack, frame, and registers. */
10656 ix86_expand_epilogue (int style
)
10658 struct machine_function
*m
= cfun
->machine
;
10659 struct machine_frame_state frame_state_save
= m
->fs
;
10660 struct ix86_frame frame
;
10661 bool restore_regs_via_mov
;
10664 ix86_finalize_stack_realign_flags ();
10665 ix86_compute_frame_layout (&frame
);
10667 m
->fs
.sp_valid
= (!frame_pointer_needed
10668 || (current_function_sp_is_unchanging
10669 && !stack_realign_fp
));
10670 gcc_assert (!m
->fs
.sp_valid
10671 || m
->fs
.sp_offset
== frame
.stack_pointer_offset
);
10673 /* The FP must be valid if the frame pointer is present. */
10674 gcc_assert (frame_pointer_needed
== m
->fs
.fp_valid
);
10675 gcc_assert (!m
->fs
.fp_valid
10676 || m
->fs
.fp_offset
== frame
.hard_frame_pointer_offset
);
10678 /* We must have *some* valid pointer to the stack frame. */
10679 gcc_assert (m
->fs
.sp_valid
|| m
->fs
.fp_valid
);
10681 /* The DRAP is never valid at this point. */
10682 gcc_assert (!m
->fs
.drap_valid
);
10684 /* See the comment about red zone and frame
10685 pointer usage in ix86_expand_prologue. */
10686 if (frame_pointer_needed
&& frame
.red_zone_size
)
10687 emit_insn (gen_memory_blockage ());
10689 using_drap
= crtl
->drap_reg
&& crtl
->stack_realign_needed
;
10690 gcc_assert (!using_drap
|| m
->fs
.cfa_reg
== crtl
->drap_reg
);
10692 /* Determine the CFA offset of the end of the red-zone. */
10693 m
->fs
.red_zone_offset
= 0;
10694 if (ix86_using_red_zone () && crtl
->args
.pops_args
< 65536)
10696 /* The red-zone begins below the return address. */
10697 m
->fs
.red_zone_offset
= RED_ZONE_SIZE
+ UNITS_PER_WORD
;
10699 /* When the register save area is in the aligned portion of
10700 the stack, determine the maximum runtime displacement that
10701 matches up with the aligned frame. */
10702 if (stack_realign_drap
)
10703 m
->fs
.red_zone_offset
-= (crtl
->stack_alignment_needed
/ BITS_PER_UNIT
10707 /* Special care must be taken for the normal return case of a function
10708 using eh_return: the eax and edx registers are marked as saved, but
10709 not restored along this path. Adjust the save location to match. */
10710 if (crtl
->calls_eh_return
&& style
!= 2)
10711 frame
.reg_save_offset
-= 2 * UNITS_PER_WORD
;
10713 /* EH_RETURN requires the use of moves to function properly. */
10714 if (crtl
->calls_eh_return
)
10715 restore_regs_via_mov
= true;
10716 /* SEH requires the use of pops to identify the epilogue. */
10717 else if (TARGET_SEH
)
10718 restore_regs_via_mov
= false;
10719 /* If we're only restoring one register and sp is not valid then
10720 using a move instruction to restore the register since it's
10721 less work than reloading sp and popping the register. */
10722 else if (!m
->fs
.sp_valid
&& frame
.nregs
<= 1)
10723 restore_regs_via_mov
= true;
10724 else if (TARGET_EPILOGUE_USING_MOVE
10725 && cfun
->machine
->use_fast_prologue_epilogue
10726 && (frame
.nregs
> 1
10727 || m
->fs
.sp_offset
!= frame
.reg_save_offset
))
10728 restore_regs_via_mov
= true;
10729 else if (frame_pointer_needed
10731 && m
->fs
.sp_offset
!= frame
.reg_save_offset
)
10732 restore_regs_via_mov
= true;
10733 else if (frame_pointer_needed
10734 && TARGET_USE_LEAVE
10735 && cfun
->machine
->use_fast_prologue_epilogue
10736 && frame
.nregs
== 1)
10737 restore_regs_via_mov
= true;
10739 restore_regs_via_mov
= false;
10741 if (restore_regs_via_mov
|| frame
.nsseregs
)
10743 /* Ensure that the entire register save area is addressable via
10744 the stack pointer, if we will restore via sp. */
10746 && m
->fs
.sp_offset
> 0x7fffffff
10747 && !(m
->fs
.fp_valid
|| m
->fs
.drap_valid
)
10748 && (frame
.nsseregs
+ frame
.nregs
) != 0)
10750 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10751 GEN_INT (m
->fs
.sp_offset
10752 - frame
.sse_reg_save_offset
),
10754 m
->fs
.cfa_reg
== stack_pointer_rtx
);
10758 /* If there are any SSE registers to restore, then we have to do it
10759 via moves, since there's obviously no pop for SSE regs. */
10760 if (frame
.nsseregs
)
10761 ix86_emit_restore_sse_regs_using_mov (frame
.sse_reg_save_offset
,
10764 if (restore_regs_via_mov
)
10769 ix86_emit_restore_regs_using_mov (frame
.reg_save_offset
, style
== 2);
10771 /* eh_return epilogues need %ecx added to the stack pointer. */
10774 rtx insn
, sa
= EH_RETURN_STACKADJ_RTX
;
10776 /* Stack align doesn't work with eh_return. */
10777 gcc_assert (!stack_realign_drap
);
10778 /* Neither does regparm nested functions. */
10779 gcc_assert (!ix86_static_chain_on_stack
);
10781 if (frame_pointer_needed
)
10783 t
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
10784 t
= plus_constant (t
, m
->fs
.fp_offset
- UNITS_PER_WORD
);
10785 emit_insn (gen_rtx_SET (VOIDmode
, sa
, t
));
10787 t
= gen_frame_mem (Pmode
, hard_frame_pointer_rtx
);
10788 insn
= emit_move_insn (hard_frame_pointer_rtx
, t
);
10790 /* Note that we use SA as a temporary CFA, as the return
10791 address is at the proper place relative to it. We
10792 pretend this happens at the FP restore insn because
10793 prior to this insn the FP would be stored at the wrong
10794 offset relative to SA, and after this insn we have no
10795 other reasonable register to use for the CFA. We don't
10796 bother resetting the CFA to the SP for the duration of
10797 the return insn. */
10798 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10799 plus_constant (sa
, UNITS_PER_WORD
));
10800 ix86_add_queued_cfa_restore_notes (insn
);
10801 add_reg_note (insn
, REG_CFA_RESTORE
, hard_frame_pointer_rtx
);
10802 RTX_FRAME_RELATED_P (insn
) = 1;
10804 m
->fs
.cfa_reg
= sa
;
10805 m
->fs
.cfa_offset
= UNITS_PER_WORD
;
10806 m
->fs
.fp_valid
= false;
10808 pro_epilogue_adjust_stack (stack_pointer_rtx
, sa
,
10809 const0_rtx
, style
, false);
10813 t
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
10814 t
= plus_constant (t
, m
->fs
.sp_offset
- UNITS_PER_WORD
);
10815 insn
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
));
10816 ix86_add_queued_cfa_restore_notes (insn
);
10818 gcc_assert (m
->fs
.cfa_reg
== stack_pointer_rtx
);
10819 if (m
->fs
.cfa_offset
!= UNITS_PER_WORD
)
10821 m
->fs
.cfa_offset
= UNITS_PER_WORD
;
10822 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10823 plus_constant (stack_pointer_rtx
,
10825 RTX_FRAME_RELATED_P (insn
) = 1;
10828 m
->fs
.sp_offset
= UNITS_PER_WORD
;
10829 m
->fs
.sp_valid
= true;
10834 /* SEH requires that the function end with (1) a stack adjustment
10835 if necessary, (2) a sequence of pops, and (3) a return or
10836 jump instruction. Prevent insns from the function body from
10837 being scheduled into this sequence. */
10840 /* Prevent a catch region from being adjacent to the standard
10841 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
10842 several other flags that would be interesting to test are
10844 if (flag_non_call_exceptions
)
10845 emit_insn (gen_nops (const1_rtx
));
10847 emit_insn (gen_blockage ());
10850 /* First step is to deallocate the stack frame so that we can
10851 pop the registers. */
10852 if (!m
->fs
.sp_valid
)
10854 pro_epilogue_adjust_stack (stack_pointer_rtx
, hard_frame_pointer_rtx
,
10855 GEN_INT (m
->fs
.fp_offset
10856 - frame
.reg_save_offset
),
10859 else if (m
->fs
.sp_offset
!= frame
.reg_save_offset
)
10861 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10862 GEN_INT (m
->fs
.sp_offset
10863 - frame
.reg_save_offset
),
10865 m
->fs
.cfa_reg
== stack_pointer_rtx
);
10868 ix86_emit_restore_regs_using_pop ();
10871 /* If we used a stack pointer and haven't already got rid of it,
10873 if (m
->fs
.fp_valid
)
10875 /* If the stack pointer is valid and pointing at the frame
10876 pointer store address, then we only need a pop. */
10877 if (m
->fs
.sp_valid
&& m
->fs
.sp_offset
== frame
.hfp_save_offset
)
10878 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx
);
10879 /* Leave results in shorter dependency chains on CPUs that are
10880 able to grok it fast. */
10881 else if (TARGET_USE_LEAVE
10882 || optimize_function_for_size_p (cfun
)
10883 || !cfun
->machine
->use_fast_prologue_epilogue
)
10884 ix86_emit_leave ();
10887 pro_epilogue_adjust_stack (stack_pointer_rtx
,
10888 hard_frame_pointer_rtx
,
10889 const0_rtx
, style
, !using_drap
);
10890 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx
);
10896 int param_ptr_offset
= UNITS_PER_WORD
;
10899 gcc_assert (stack_realign_drap
);
10901 if (ix86_static_chain_on_stack
)
10902 param_ptr_offset
+= UNITS_PER_WORD
;
10903 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
10904 param_ptr_offset
+= UNITS_PER_WORD
;
10906 insn
= emit_insn (gen_rtx_SET
10907 (VOIDmode
, stack_pointer_rtx
,
10908 gen_rtx_PLUS (Pmode
,
10910 GEN_INT (-param_ptr_offset
))));
10911 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10912 m
->fs
.cfa_offset
= param_ptr_offset
;
10913 m
->fs
.sp_offset
= param_ptr_offset
;
10914 m
->fs
.realigned
= false;
10916 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10917 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
10918 GEN_INT (param_ptr_offset
)));
10919 RTX_FRAME_RELATED_P (insn
) = 1;
10921 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
10922 ix86_emit_restore_reg_using_pop (crtl
->drap_reg
);
10925 /* At this point the stack pointer must be valid, and we must have
10926 restored all of the registers. We may not have deallocated the
10927 entire stack frame. We've delayed this until now because it may
10928 be possible to merge the local stack deallocation with the
10929 deallocation forced by ix86_static_chain_on_stack. */
10930 gcc_assert (m
->fs
.sp_valid
);
10931 gcc_assert (!m
->fs
.fp_valid
);
10932 gcc_assert (!m
->fs
.realigned
);
10933 if (m
->fs
.sp_offset
!= UNITS_PER_WORD
)
10935 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10936 GEN_INT (m
->fs
.sp_offset
- UNITS_PER_WORD
),
10940 ix86_add_queued_cfa_restore_notes (get_last_insn ());
10942 /* Sibcall epilogues don't want a return instruction. */
10945 m
->fs
= frame_state_save
;
10949 /* Emit vzeroupper if needed. */
10950 ix86_maybe_emit_epilogue_vzeroupper ();
10952 if (crtl
->args
.pops_args
&& crtl
->args
.size
)
10954 rtx popc
= GEN_INT (crtl
->args
.pops_args
);
10956 /* i386 can only pop 64K bytes. If asked to pop more, pop return
10957 address, do explicit add, and jump indirectly to the caller. */
10959 if (crtl
->args
.pops_args
>= 65536)
10961 rtx ecx
= gen_rtx_REG (SImode
, CX_REG
);
10964 /* There is no "pascal" calling convention in any 64bit ABI. */
10965 gcc_assert (!TARGET_64BIT
);
10967 insn
= emit_insn (gen_pop (ecx
));
10968 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
10969 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
10971 add_reg_note (insn
, REG_CFA_ADJUST_CFA
,
10972 copy_rtx (XVECEXP (PATTERN (insn
), 0, 1)));
10973 add_reg_note (insn
, REG_CFA_REGISTER
,
10974 gen_rtx_SET (VOIDmode
, ecx
, pc_rtx
));
10975 RTX_FRAME_RELATED_P (insn
) = 1;
10977 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10979 emit_jump_insn (gen_simple_return_indirect_internal (ecx
));
10982 emit_jump_insn (gen_simple_return_pop_internal (popc
));
10985 emit_jump_insn (gen_simple_return_internal ());
10987 /* Restore the state back to the state from the prologue,
10988 so that it's correct for the next epilogue. */
10989 m
->fs
= frame_state_save
;
10992 /* Reset from the function's potential modifications. */
10995 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
10996 HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
10998 if (pic_offset_table_rtx
)
10999 SET_REGNO (pic_offset_table_rtx
, REAL_PIC_OFFSET_TABLE_REGNUM
);
11001 /* Mach-O doesn't support labels at the end of objects, so if
11002 it looks like we might want one, insert a NOP. */
11004 rtx insn
= get_last_insn ();
11005 rtx deleted_debug_label
= NULL_RTX
;
11008 && NOTE_KIND (insn
) != NOTE_INSN_DELETED_LABEL
)
11010 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
11011 notes only, instead set their CODE_LABEL_NUMBER to -1,
11012 otherwise there would be code generation differences
11013 in between -g and -g0. */
11014 if (NOTE_P (insn
) && NOTE_KIND (insn
) == NOTE_INSN_DELETED_DEBUG_LABEL
)
11015 deleted_debug_label
= insn
;
11016 insn
= PREV_INSN (insn
);
11021 && NOTE_KIND (insn
) == NOTE_INSN_DELETED_LABEL
)))
11022 fputs ("\tnop\n", file
);
11023 else if (deleted_debug_label
)
11024 for (insn
= deleted_debug_label
; insn
; insn
= NEXT_INSN (insn
))
11025 if (NOTE_KIND (insn
) == NOTE_INSN_DELETED_DEBUG_LABEL
)
11026 CODE_LABEL_NUMBER (insn
) = -1;
11032 /* Return a scratch register to use in the split stack prologue. The
11033 split stack prologue is used for -fsplit-stack. It is the first
11034 instructions in the function, even before the regular prologue.
11035 The scratch register can be any caller-saved register which is not
11036 used for parameters or for the static chain. */
11038 static unsigned int
11039 split_stack_prologue_scratch_regno (void)
11048 is_fastcall
= (lookup_attribute ("fastcall",
11049 TYPE_ATTRIBUTES (TREE_TYPE (cfun
->decl
)))
11051 regparm
= ix86_function_regparm (TREE_TYPE (cfun
->decl
), cfun
->decl
);
11055 if (DECL_STATIC_CHAIN (cfun
->decl
))
11057 sorry ("-fsplit-stack does not support fastcall with "
11058 "nested function");
11059 return INVALID_REGNUM
;
11063 else if (regparm
< 3)
11065 if (!DECL_STATIC_CHAIN (cfun
->decl
))
11071 sorry ("-fsplit-stack does not support 2 register "
11072 " parameters for a nested function");
11073 return INVALID_REGNUM
;
11080 /* FIXME: We could make this work by pushing a register
11081 around the addition and comparison. */
11082 sorry ("-fsplit-stack does not support 3 register parameters");
11083 return INVALID_REGNUM
;
11088 /* A SYMBOL_REF for the function which allocates new stackspace for
11091 static GTY(()) rtx split_stack_fn
;
11093 /* A SYMBOL_REF for the more stack function when using the large
11096 static GTY(()) rtx split_stack_fn_large
;
11098 /* Handle -fsplit-stack. These are the first instructions in the
11099 function, even before the regular prologue. */
11102 ix86_expand_split_stack_prologue (void)
11104 struct ix86_frame frame
;
11105 HOST_WIDE_INT allocate
;
11106 unsigned HOST_WIDE_INT args_size
;
11107 rtx label
, limit
, current
, jump_insn
, allocate_rtx
, call_insn
, call_fusage
;
11108 rtx scratch_reg
= NULL_RTX
;
11109 rtx varargs_label
= NULL_RTX
;
11112 gcc_assert (flag_split_stack
&& reload_completed
);
11114 ix86_finalize_stack_realign_flags ();
11115 ix86_compute_frame_layout (&frame
);
11116 allocate
= frame
.stack_pointer_offset
- INCOMING_FRAME_SP_OFFSET
;
11118 /* This is the label we will branch to if we have enough stack
11119 space. We expect the basic block reordering pass to reverse this
11120 branch if optimizing, so that we branch in the unlikely case. */
11121 label
= gen_label_rtx ();
11123 /* We need to compare the stack pointer minus the frame size with
11124 the stack boundary in the TCB. The stack boundary always gives
11125 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
11126 can compare directly. Otherwise we need to do an addition. */
11128 limit
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
11129 UNSPEC_STACK_CHECK
);
11130 limit
= gen_rtx_CONST (Pmode
, limit
);
11131 limit
= gen_rtx_MEM (Pmode
, limit
);
11132 if (allocate
< SPLIT_STACK_AVAILABLE
)
11133 current
= stack_pointer_rtx
;
11136 unsigned int scratch_regno
;
11139 /* We need a scratch register to hold the stack pointer minus
11140 the required frame size. Since this is the very start of the
11141 function, the scratch register can be any caller-saved
11142 register which is not used for parameters. */
11143 offset
= GEN_INT (- allocate
);
11144 scratch_regno
= split_stack_prologue_scratch_regno ();
11145 if (scratch_regno
== INVALID_REGNUM
)
11147 scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
11148 if (!TARGET_64BIT
|| x86_64_immediate_operand (offset
, Pmode
))
11150 /* We don't use ix86_gen_add3 in this case because it will
11151 want to split to lea, but when not optimizing the insn
11152 will not be split after this point. */
11153 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11154 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11159 emit_move_insn (scratch_reg
, offset
);
11160 emit_insn (gen_adddi3 (scratch_reg
, scratch_reg
,
11161 stack_pointer_rtx
));
11163 current
= scratch_reg
;
11166 ix86_expand_branch (GEU
, current
, limit
, label
);
11167 jump_insn
= get_last_insn ();
11168 JUMP_LABEL (jump_insn
) = label
;
11170 /* Mark the jump as very likely to be taken. */
11171 add_reg_note (jump_insn
, REG_BR_PROB
,
11172 GEN_INT (REG_BR_PROB_BASE
- REG_BR_PROB_BASE
/ 100));
11174 if (split_stack_fn
== NULL_RTX
)
11175 split_stack_fn
= gen_rtx_SYMBOL_REF (Pmode
, "__morestack");
11176 fn
= split_stack_fn
;
11178 /* Get more stack space. We pass in the desired stack space and the
11179 size of the arguments to copy to the new stack. In 32-bit mode
11180 we push the parameters; __morestack will return on a new stack
11181 anyhow. In 64-bit mode we pass the parameters in r10 and
11183 allocate_rtx
= GEN_INT (allocate
);
11184 args_size
= crtl
->args
.size
>= 0 ? crtl
->args
.size
: 0;
11185 call_fusage
= NULL_RTX
;
11190 reg10
= gen_rtx_REG (Pmode
, R10_REG
);
11191 reg11
= gen_rtx_REG (Pmode
, R11_REG
);
11193 /* If this function uses a static chain, it will be in %r10.
11194 Preserve it across the call to __morestack. */
11195 if (DECL_STATIC_CHAIN (cfun
->decl
))
11199 rax
= gen_rtx_REG (word_mode
, AX_REG
);
11200 emit_move_insn (rax
, gen_rtx_REG (word_mode
, R10_REG
));
11201 use_reg (&call_fusage
, rax
);
11204 if (ix86_cmodel
== CM_LARGE
|| ix86_cmodel
== CM_LARGE_PIC
)
11206 HOST_WIDE_INT argval
;
11208 /* When using the large model we need to load the address
11209 into a register, and we've run out of registers. So we
11210 switch to a different calling convention, and we call a
11211 different function: __morestack_large. We pass the
11212 argument size in the upper 32 bits of r10 and pass the
11213 frame size in the lower 32 bits. */
11214 gcc_assert ((allocate
& (HOST_WIDE_INT
) 0xffffffff) == allocate
);
11215 gcc_assert ((args_size
& 0xffffffff) == args_size
);
11217 if (split_stack_fn_large
== NULL_RTX
)
11218 split_stack_fn_large
=
11219 gen_rtx_SYMBOL_REF (Pmode
, "__morestack_large_model");
11221 if (ix86_cmodel
== CM_LARGE_PIC
)
11225 label
= gen_label_rtx ();
11226 emit_label (label
);
11227 LABEL_PRESERVE_P (label
) = 1;
11228 emit_insn (gen_set_rip_rex64 (reg10
, label
));
11229 emit_insn (gen_set_got_offset_rex64 (reg11
, label
));
11230 emit_insn (gen_adddi3 (reg10
, reg10
, reg11
));
11231 x
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, split_stack_fn_large
),
11233 x
= gen_rtx_CONST (Pmode
, x
);
11234 emit_move_insn (reg11
, x
);
11235 x
= gen_rtx_PLUS (Pmode
, reg10
, reg11
);
11236 x
= gen_const_mem (Pmode
, x
);
11237 emit_move_insn (reg11
, x
);
11240 emit_move_insn (reg11
, split_stack_fn_large
);
11244 argval
= ((args_size
<< 16) << 16) + allocate
;
11245 emit_move_insn (reg10
, GEN_INT (argval
));
11249 emit_move_insn (reg10
, allocate_rtx
);
11250 emit_move_insn (reg11
, GEN_INT (args_size
));
11251 use_reg (&call_fusage
, reg11
);
11254 use_reg (&call_fusage
, reg10
);
11258 emit_insn (gen_push (GEN_INT (args_size
)));
11259 emit_insn (gen_push (allocate_rtx
));
11261 call_insn
= ix86_expand_call (NULL_RTX
, gen_rtx_MEM (QImode
, fn
),
11262 GEN_INT (UNITS_PER_WORD
), constm1_rtx
,
11264 add_function_usage_to (call_insn
, call_fusage
);
11266 /* In order to make call/return prediction work right, we now need
11267 to execute a return instruction. See
11268 libgcc/config/i386/morestack.S for the details on how this works.
11270 For flow purposes gcc must not see this as a return
11271 instruction--we need control flow to continue at the subsequent
11272 label. Therefore, we use an unspec. */
11273 gcc_assert (crtl
->args
.pops_args
< 65536);
11274 emit_insn (gen_split_stack_return (GEN_INT (crtl
->args
.pops_args
)));
11276 /* If we are in 64-bit mode and this function uses a static chain,
11277 we saved %r10 in %rax before calling _morestack. */
11278 if (TARGET_64BIT
&& DECL_STATIC_CHAIN (cfun
->decl
))
11279 emit_move_insn (gen_rtx_REG (word_mode
, R10_REG
),
11280 gen_rtx_REG (word_mode
, AX_REG
));
11282 /* If this function calls va_start, we need to store a pointer to
11283 the arguments on the old stack, because they may not have been
11284 all copied to the new stack. At this point the old stack can be
11285 found at the frame pointer value used by __morestack, because
11286 __morestack has set that up before calling back to us. Here we
11287 store that pointer in a scratch register, and in
11288 ix86_expand_prologue we store the scratch register in a stack
11290 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11292 unsigned int scratch_regno
;
11296 scratch_regno
= split_stack_prologue_scratch_regno ();
11297 scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
11298 frame_reg
= gen_rtx_REG (Pmode
, BP_REG
);
11302 return address within this function
11303 return address of caller of this function
11305 So we add three words to get to the stack arguments.
11309 return address within this function
11310 first argument to __morestack
11311 second argument to __morestack
11312 return address of caller of this function
11314 So we add five words to get to the stack arguments.
11316 words
= TARGET_64BIT
? 3 : 5;
11317 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11318 gen_rtx_PLUS (Pmode
, frame_reg
,
11319 GEN_INT (words
* UNITS_PER_WORD
))));
11321 varargs_label
= gen_label_rtx ();
11322 emit_jump_insn (gen_jump (varargs_label
));
11323 JUMP_LABEL (get_last_insn ()) = varargs_label
;
11328 emit_label (label
);
11329 LABEL_NUSES (label
) = 1;
11331 /* If this function calls va_start, we now have to set the scratch
11332 register for the case where we do not call __morestack. In this
11333 case we need to set it based on the stack pointer. */
11334 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11336 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11337 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11338 GEN_INT (UNITS_PER_WORD
))));
11340 emit_label (varargs_label
);
11341 LABEL_NUSES (varargs_label
) = 1;
11345 /* We may have to tell the dataflow pass that the split stack prologue
11346 is initializing a scratch register. */
11349 ix86_live_on_entry (bitmap regs
)
11351 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11353 gcc_assert (flag_split_stack
);
11354 bitmap_set_bit (regs
, split_stack_prologue_scratch_regno ());
11358 /* Determine if op is suitable SUBREG RTX for address. */
11361 ix86_address_subreg_operand (rtx op
)
11363 enum machine_mode mode
;
11368 mode
= GET_MODE (op
);
11370 if (GET_MODE_CLASS (mode
) != MODE_INT
)
11373 /* Don't allow SUBREGs that span more than a word. It can lead to spill
11374 failures when the register is one word out of a two word structure. */
11375 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
11378 /* Allow only SUBREGs of non-eliminable hard registers. */
11379 return register_no_elim_operand (op
, mode
);
11382 /* Extract the parts of an RTL expression that is a valid memory address
11383 for an instruction. Return 0 if the structure of the address is
11384 grossly off. Return -1 if the address contains ASHIFT, so it is not
11385 strictly valid, but still used for computing length of lea instruction. */
11388 ix86_decompose_address (rtx addr
, struct ix86_address
*out
)
11390 rtx base
= NULL_RTX
, index
= NULL_RTX
, disp
= NULL_RTX
;
11391 rtx base_reg
, index_reg
;
11392 HOST_WIDE_INT scale
= 1;
11393 rtx scale_rtx
= NULL_RTX
;
11396 enum ix86_address_seg seg
= SEG_DEFAULT
;
11398 /* Allow zero-extended SImode addresses,
11399 they will be emitted with addr32 prefix. */
11400 if (TARGET_64BIT
&& GET_MODE (addr
) == DImode
)
11402 if (GET_CODE (addr
) == ZERO_EXTEND
11403 && GET_MODE (XEXP (addr
, 0)) == SImode
)
11404 addr
= XEXP (addr
, 0);
11405 else if (GET_CODE (addr
) == AND
11406 && const_32bit_mask (XEXP (addr
, 1), DImode
))
11408 addr
= XEXP (addr
, 0);
11410 /* Strip subreg. */
11411 if (GET_CODE (addr
) == SUBREG
11412 && GET_MODE (SUBREG_REG (addr
)) == SImode
)
11413 addr
= SUBREG_REG (addr
);
11419 else if (GET_CODE (addr
) == SUBREG
)
11421 if (ix86_address_subreg_operand (SUBREG_REG (addr
)))
11426 else if (GET_CODE (addr
) == PLUS
)
11428 rtx addends
[4], op
;
11436 addends
[n
++] = XEXP (op
, 1);
11439 while (GET_CODE (op
) == PLUS
);
11444 for (i
= n
; i
>= 0; --i
)
11447 switch (GET_CODE (op
))
11452 index
= XEXP (op
, 0);
11453 scale_rtx
= XEXP (op
, 1);
11459 index
= XEXP (op
, 0);
11460 tmp
= XEXP (op
, 1);
11461 if (!CONST_INT_P (tmp
))
11463 scale
= INTVAL (tmp
);
11464 if ((unsigned HOST_WIDE_INT
) scale
> 3)
11466 scale
= 1 << scale
;
11470 if (XINT (op
, 1) == UNSPEC_TP
11471 && TARGET_TLS_DIRECT_SEG_REFS
11472 && seg
== SEG_DEFAULT
)
11473 seg
= TARGET_64BIT
? SEG_FS
: SEG_GS
;
11479 if (!ix86_address_subreg_operand (SUBREG_REG (op
)))
11506 else if (GET_CODE (addr
) == MULT
)
11508 index
= XEXP (addr
, 0); /* index*scale */
11509 scale_rtx
= XEXP (addr
, 1);
11511 else if (GET_CODE (addr
) == ASHIFT
)
11513 /* We're called for lea too, which implements ashift on occasion. */
11514 index
= XEXP (addr
, 0);
11515 tmp
= XEXP (addr
, 1);
11516 if (!CONST_INT_P (tmp
))
11518 scale
= INTVAL (tmp
);
11519 if ((unsigned HOST_WIDE_INT
) scale
> 3)
11521 scale
= 1 << scale
;
11525 disp
= addr
; /* displacement */
11527 /* Since address override works only on the (reg32) part in fs:(reg32),
11528 we can't use it as memory operand. */
11529 if (Pmode
!= word_mode
&& seg
== SEG_FS
&& (base
|| index
))
11536 else if (GET_CODE (index
) == SUBREG
11537 && ix86_address_subreg_operand (SUBREG_REG (index
)))
11543 /* Extract the integral value of scale. */
11546 if (!CONST_INT_P (scale_rtx
))
11548 scale
= INTVAL (scale_rtx
);
11551 base_reg
= base
&& GET_CODE (base
) == SUBREG
? SUBREG_REG (base
) : base
;
11552 index_reg
= index
&& GET_CODE (index
) == SUBREG
? SUBREG_REG (index
) : index
;
11554 /* Avoid useless 0 displacement. */
11555 if (disp
== const0_rtx
&& (base
|| index
))
11558 /* Allow arg pointer and stack pointer as index if there is not scaling. */
11559 if (base_reg
&& index_reg
&& scale
== 1
11560 && (index_reg
== arg_pointer_rtx
11561 || index_reg
== frame_pointer_rtx
11562 || (REG_P (index_reg
) && REGNO (index_reg
) == STACK_POINTER_REGNUM
)))
11565 tmp
= base
, base
= index
, index
= tmp
;
11566 tmp
= base_reg
, base_reg
= index_reg
, index_reg
= tmp
;
11569 /* Special case: %ebp cannot be encoded as a base without a displacement.
11573 && (base_reg
== hard_frame_pointer_rtx
11574 || base_reg
== frame_pointer_rtx
11575 || base_reg
== arg_pointer_rtx
11576 || (REG_P (base_reg
)
11577 && (REGNO (base_reg
) == HARD_FRAME_POINTER_REGNUM
11578 || REGNO (base_reg
) == R13_REG
))))
11581 /* Special case: on K6, [%esi] makes the instruction vector decoded.
11582 Avoid this by transforming to [%esi+0].
11583 Reload calls address legitimization without cfun defined, so we need
11584 to test cfun for being non-NULL. */
11585 if (TARGET_K6
&& cfun
&& optimize_function_for_speed_p (cfun
)
11586 && base_reg
&& !index_reg
&& !disp
11587 && REG_P (base_reg
) && REGNO (base_reg
) == SI_REG
)
11590 /* Special case: encode reg+reg instead of reg*2. */
11591 if (!base
&& index
&& scale
== 2)
11592 base
= index
, base_reg
= index_reg
, scale
= 1;
11594 /* Special case: scaling cannot be encoded without base or displacement. */
11595 if (!base
&& !disp
&& index
&& scale
!= 1)
11599 out
->index
= index
;
11601 out
->scale
= scale
;
11607 /* Return cost of the memory address x.
11608 For i386, it is better to use a complex address than let gcc copy
11609 the address into a reg and make a new pseudo. But not if the address
11610 requires to two regs - that would mean more pseudos with longer
11613 ix86_address_cost (rtx x
, bool speed ATTRIBUTE_UNUSED
)
11615 struct ix86_address parts
;
11617 int ok
= ix86_decompose_address (x
, &parts
);
11621 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
11622 parts
.base
= SUBREG_REG (parts
.base
);
11623 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
11624 parts
.index
= SUBREG_REG (parts
.index
);
11626 /* Attempt to minimize number of registers in the address. */
11628 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
))
11630 && (!REG_P (parts
.index
)
11631 || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)))
11635 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
11637 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
11638 && parts
.base
!= parts
.index
)
11641 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
11642 since it's predecode logic can't detect the length of instructions
11643 and it degenerates to vector decoded. Increase cost of such
11644 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
11645 to split such addresses or even refuse such addresses at all.
11647 Following addressing modes are affected:
11652 The first and last case may be avoidable by explicitly coding the zero in
11653 memory address, but I don't have AMD-K6 machine handy to check this
11657 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
11658 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
11659 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
11665 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
11666 this is used for to form addresses to local data when -fPIC is in
11670 darwin_local_data_pic (rtx disp
)
11672 return (GET_CODE (disp
) == UNSPEC
11673 && XINT (disp
, 1) == UNSPEC_MACHOPIC_OFFSET
);
11676 /* Determine if a given RTX is a valid constant. We already know this
11677 satisfies CONSTANT_P. */
11680 ix86_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
11682 switch (GET_CODE (x
))
11687 if (GET_CODE (x
) == PLUS
)
11689 if (!CONST_INT_P (XEXP (x
, 1)))
11694 if (TARGET_MACHO
&& darwin_local_data_pic (x
))
11697 /* Only some unspecs are valid as "constants". */
11698 if (GET_CODE (x
) == UNSPEC
)
11699 switch (XINT (x
, 1))
11702 case UNSPEC_GOTOFF
:
11703 case UNSPEC_PLTOFF
:
11704 return TARGET_64BIT
;
11706 case UNSPEC_NTPOFF
:
11707 x
= XVECEXP (x
, 0, 0);
11708 return (GET_CODE (x
) == SYMBOL_REF
11709 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
11710 case UNSPEC_DTPOFF
:
11711 x
= XVECEXP (x
, 0, 0);
11712 return (GET_CODE (x
) == SYMBOL_REF
11713 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
);
11718 /* We must have drilled down to a symbol. */
11719 if (GET_CODE (x
) == LABEL_REF
)
11721 if (GET_CODE (x
) != SYMBOL_REF
)
11726 /* TLS symbols are never valid. */
11727 if (SYMBOL_REF_TLS_MODEL (x
))
11730 /* DLLIMPORT symbols are never valid. */
11731 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
11732 && SYMBOL_REF_DLLIMPORT_P (x
))
11736 /* mdynamic-no-pic */
11737 if (MACHO_DYNAMIC_NO_PIC_P
)
11738 return machopic_symbol_defined_p (x
);
11743 if (GET_MODE (x
) == TImode
11744 && x
!= CONST0_RTX (TImode
)
11750 if (!standard_sse_constant_p (x
))
11757 /* Otherwise we handle everything else in the move patterns. */
11761 /* Determine if it's legal to put X into the constant pool. This
11762 is not possible for the address of thread-local symbols, which
11763 is checked above. */
11766 ix86_cannot_force_const_mem (enum machine_mode mode
, rtx x
)
11768 /* We can always put integral constants and vectors in memory. */
11769 switch (GET_CODE (x
))
11779 return !ix86_legitimate_constant_p (mode
, x
);
11783 /* Nonzero if the constant value X is a legitimate general operand
11784 when generating PIC code. It is given that flag_pic is on and
11785 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
11788 legitimate_pic_operand_p (rtx x
)
11792 switch (GET_CODE (x
))
11795 inner
= XEXP (x
, 0);
11796 if (GET_CODE (inner
) == PLUS
11797 && CONST_INT_P (XEXP (inner
, 1)))
11798 inner
= XEXP (inner
, 0);
11800 /* Only some unspecs are valid as "constants". */
11801 if (GET_CODE (inner
) == UNSPEC
)
11802 switch (XINT (inner
, 1))
11805 case UNSPEC_GOTOFF
:
11806 case UNSPEC_PLTOFF
:
11807 return TARGET_64BIT
;
11809 x
= XVECEXP (inner
, 0, 0);
11810 return (GET_CODE (x
) == SYMBOL_REF
11811 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
11812 case UNSPEC_MACHOPIC_OFFSET
:
11813 return legitimate_pic_address_disp_p (x
);
11821 return legitimate_pic_address_disp_p (x
);
11828 /* Determine if a given CONST RTX is a valid memory displacement
11832 legitimate_pic_address_disp_p (rtx disp
)
11836 /* In 64bit mode we can allow direct addresses of symbols and labels
11837 when they are not dynamic symbols. */
11840 rtx op0
= disp
, op1
;
11842 switch (GET_CODE (disp
))
11848 if (GET_CODE (XEXP (disp
, 0)) != PLUS
)
11850 op0
= XEXP (XEXP (disp
, 0), 0);
11851 op1
= XEXP (XEXP (disp
, 0), 1);
11852 if (!CONST_INT_P (op1
)
11853 || INTVAL (op1
) >= 16*1024*1024
11854 || INTVAL (op1
) < -16*1024*1024)
11856 if (GET_CODE (op0
) == LABEL_REF
)
11858 if (GET_CODE (op0
) == CONST
11859 && GET_CODE (XEXP (op0
, 0)) == UNSPEC
11860 && XINT (XEXP (op0
, 0), 1) == UNSPEC_PCREL
)
11862 if (GET_CODE (op0
) == UNSPEC
11863 && XINT (op0
, 1) == UNSPEC_PCREL
)
11865 if (GET_CODE (op0
) != SYMBOL_REF
)
11870 /* TLS references should always be enclosed in UNSPEC. */
11871 if (SYMBOL_REF_TLS_MODEL (op0
))
11873 if (!SYMBOL_REF_FAR_ADDR_P (op0
) && SYMBOL_REF_LOCAL_P (op0
)
11874 && ix86_cmodel
!= CM_LARGE_PIC
)
11882 if (GET_CODE (disp
) != CONST
)
11884 disp
= XEXP (disp
, 0);
11888 /* We are unsafe to allow PLUS expressions. This limit allowed distance
11889 of GOT tables. We should not need these anyway. */
11890 if (GET_CODE (disp
) != UNSPEC
11891 || (XINT (disp
, 1) != UNSPEC_GOTPCREL
11892 && XINT (disp
, 1) != UNSPEC_GOTOFF
11893 && XINT (disp
, 1) != UNSPEC_PCREL
11894 && XINT (disp
, 1) != UNSPEC_PLTOFF
))
11897 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
11898 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
11904 if (GET_CODE (disp
) == PLUS
)
11906 if (!CONST_INT_P (XEXP (disp
, 1)))
11908 disp
= XEXP (disp
, 0);
11912 if (TARGET_MACHO
&& darwin_local_data_pic (disp
))
11915 if (GET_CODE (disp
) != UNSPEC
)
11918 switch (XINT (disp
, 1))
11923 /* We need to check for both symbols and labels because VxWorks loads
11924 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
11926 return (GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
11927 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
);
11928 case UNSPEC_GOTOFF
:
11929 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
11930 While ABI specify also 32bit relocation but we don't produce it in
11931 small PIC model at all. */
11932 if ((GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
11933 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
)
11935 return gotoff_operand (XVECEXP (disp
, 0, 0), Pmode
);
11937 case UNSPEC_GOTTPOFF
:
11938 case UNSPEC_GOTNTPOFF
:
11939 case UNSPEC_INDNTPOFF
:
11942 disp
= XVECEXP (disp
, 0, 0);
11943 return (GET_CODE (disp
) == SYMBOL_REF
11944 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_INITIAL_EXEC
);
11945 case UNSPEC_NTPOFF
:
11946 disp
= XVECEXP (disp
, 0, 0);
11947 return (GET_CODE (disp
) == SYMBOL_REF
11948 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_EXEC
);
11949 case UNSPEC_DTPOFF
:
11950 disp
= XVECEXP (disp
, 0, 0);
11951 return (GET_CODE (disp
) == SYMBOL_REF
11952 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_DYNAMIC
);
11958 /* Recognizes RTL expressions that are valid memory addresses for an
11959 instruction. The MODE argument is the machine mode for the MEM
11960 expression that wants to use this address.
11962 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
11963 convert common non-canonical forms to canonical form so that they will
11967 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED
,
11968 rtx addr
, bool strict
)
11970 struct ix86_address parts
;
11971 rtx base
, index
, disp
;
11972 HOST_WIDE_INT scale
;
11974 /* Since constant address in x32 is signed extended to 64bit,
11975 we have to prevent addresses from 0x80000000 to 0xffffffff. */
11977 && CONST_INT_P (addr
)
11978 && INTVAL (addr
) < 0)
11981 if (ix86_decompose_address (addr
, &parts
) <= 0)
11982 /* Decomposition failed. */
11986 index
= parts
.index
;
11988 scale
= parts
.scale
;
11990 /* Validate base register. */
11997 else if (GET_CODE (base
) == SUBREG
&& REG_P (SUBREG_REG (base
)))
11998 reg
= SUBREG_REG (base
);
12000 /* Base is not a register. */
12003 if (GET_MODE (base
) != SImode
&& GET_MODE (base
) != DImode
)
12006 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (reg
))
12007 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (reg
)))
12008 /* Base is not valid. */
12012 /* Validate index register. */
12019 else if (GET_CODE (index
) == SUBREG
&& REG_P (SUBREG_REG (index
)))
12020 reg
= SUBREG_REG (index
);
12022 /* Index is not a register. */
12025 if (GET_MODE (index
) != SImode
&& GET_MODE (index
) != DImode
)
12028 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (reg
))
12029 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (reg
)))
12030 /* Index is not valid. */
12034 /* Index and base should have the same mode. */
12036 && GET_MODE (base
) != GET_MODE (index
))
12039 /* Validate scale factor. */
12043 /* Scale without index. */
12046 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
12047 /* Scale is not a valid multiplier. */
12051 /* Validate displacement. */
12054 if (GET_CODE (disp
) == CONST
12055 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
12056 && XINT (XEXP (disp
, 0), 1) != UNSPEC_MACHOPIC_OFFSET
)
12057 switch (XINT (XEXP (disp
, 0), 1))
12059 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
12060 used. While ABI specify also 32bit relocations, we don't produce
12061 them at all and use IP relative instead. */
12063 case UNSPEC_GOTOFF
:
12064 gcc_assert (flag_pic
);
12066 goto is_legitimate_pic
;
12068 /* 64bit address unspec. */
12071 case UNSPEC_GOTPCREL
:
12073 gcc_assert (flag_pic
);
12074 goto is_legitimate_pic
;
12076 case UNSPEC_GOTTPOFF
:
12077 case UNSPEC_GOTNTPOFF
:
12078 case UNSPEC_INDNTPOFF
:
12079 case UNSPEC_NTPOFF
:
12080 case UNSPEC_DTPOFF
:
12083 case UNSPEC_STACK_CHECK
:
12084 gcc_assert (flag_split_stack
);
12088 /* Invalid address unspec. */
12092 else if (SYMBOLIC_CONST (disp
)
12096 && MACHOPIC_INDIRECT
12097 && !machopic_operand_p (disp
)
12103 if (TARGET_64BIT
&& (index
|| base
))
12105 /* foo@dtpoff(%rX) is ok. */
12106 if (GET_CODE (disp
) != CONST
12107 || GET_CODE (XEXP (disp
, 0)) != PLUS
12108 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) != UNSPEC
12109 || !CONST_INT_P (XEXP (XEXP (disp
, 0), 1))
12110 || (XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_DTPOFF
12111 && XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_NTPOFF
))
12112 /* Non-constant pic memory reference. */
12115 else if ((!TARGET_MACHO
|| flag_pic
)
12116 && ! legitimate_pic_address_disp_p (disp
))
12117 /* Displacement is an invalid pic construct. */
12120 else if (MACHO_DYNAMIC_NO_PIC_P
12121 && !ix86_legitimate_constant_p (Pmode
, disp
))
12122 /* displacment must be referenced via non_lazy_pointer */
12126 /* This code used to verify that a symbolic pic displacement
12127 includes the pic_offset_table_rtx register.
12129 While this is good idea, unfortunately these constructs may
12130 be created by "adds using lea" optimization for incorrect
12139 This code is nonsensical, but results in addressing
12140 GOT table with pic_offset_table_rtx base. We can't
12141 just refuse it easily, since it gets matched by
12142 "addsi3" pattern, that later gets split to lea in the
12143 case output register differs from input. While this
12144 can be handled by separate addsi pattern for this case
12145 that never results in lea, this seems to be easier and
12146 correct fix for crash to disable this test. */
12148 else if (GET_CODE (disp
) != LABEL_REF
12149 && !CONST_INT_P (disp
)
12150 && (GET_CODE (disp
) != CONST
12151 || !ix86_legitimate_constant_p (Pmode
, disp
))
12152 && (GET_CODE (disp
) != SYMBOL_REF
12153 || !ix86_legitimate_constant_p (Pmode
, disp
)))
12154 /* Displacement is not constant. */
12156 else if (TARGET_64BIT
12157 && !x86_64_immediate_operand (disp
, VOIDmode
))
12158 /* Displacement is out of range. */
12162 /* Everything looks valid. */
12166 /* Determine if a given RTX is a valid constant address. */
12169 constant_address_p (rtx x
)
12171 return CONSTANT_P (x
) && ix86_legitimate_address_p (Pmode
, x
, 1);
12174 /* Return a unique alias set for the GOT. */
12176 static alias_set_type
12177 ix86_GOT_alias_set (void)
12179 static alias_set_type set
= -1;
12181 set
= new_alias_set ();
12185 /* Return a legitimate reference for ORIG (an address) using the
12186 register REG. If REG is 0, a new pseudo is generated.
12188 There are two types of references that must be handled:
12190 1. Global data references must load the address from the GOT, via
12191 the PIC reg. An insn is emitted to do this load, and the reg is
12194 2. Static data references, constant pool addresses, and code labels
12195 compute the address as an offset from the GOT, whose base is in
12196 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
12197 differentiate them from global data objects. The returned
12198 address is the PIC reg + an unspec constant.
12200 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
12201 reg also appears in the address. */
12204 legitimize_pic_address (rtx orig
, rtx reg
)
12207 rtx new_rtx
= orig
;
12211 if (TARGET_MACHO
&& !TARGET_64BIT
)
12214 reg
= gen_reg_rtx (Pmode
);
12215 /* Use the generic Mach-O PIC machinery. */
12216 return machopic_legitimize_pic_address (orig
, GET_MODE (orig
), reg
);
12220 if (TARGET_64BIT
&& legitimate_pic_address_disp_p (addr
))
12222 else if (TARGET_64BIT
12223 && ix86_cmodel
!= CM_SMALL_PIC
12224 && gotoff_operand (addr
, Pmode
))
12227 /* This symbol may be referenced via a displacement from the PIC
12228 base address (@GOTOFF). */
12230 if (reload_in_progress
)
12231 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12232 if (GET_CODE (addr
) == CONST
)
12233 addr
= XEXP (addr
, 0);
12234 if (GET_CODE (addr
) == PLUS
)
12236 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
12238 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, XEXP (addr
, 1));
12241 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
12242 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12244 tmpreg
= gen_reg_rtx (Pmode
);
12247 emit_move_insn (tmpreg
, new_rtx
);
12251 new_rtx
= expand_simple_binop (Pmode
, PLUS
, reg
, pic_offset_table_rtx
,
12252 tmpreg
, 1, OPTAB_DIRECT
);
12255 else new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, tmpreg
);
12257 else if (!TARGET_64BIT
&& gotoff_operand (addr
, Pmode
))
12259 /* This symbol may be referenced via a displacement from the PIC
12260 base address (@GOTOFF). */
12262 if (reload_in_progress
)
12263 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12264 if (GET_CODE (addr
) == CONST
)
12265 addr
= XEXP (addr
, 0);
12266 if (GET_CODE (addr
) == PLUS
)
12268 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
12270 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, XEXP (addr
, 1));
12273 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
12274 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12275 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
12279 emit_move_insn (reg
, new_rtx
);
12283 else if ((GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (addr
) == 0)
12284 /* We can't use @GOTOFF for text labels on VxWorks;
12285 see gotoff_operand. */
12286 || (TARGET_VXWORKS_RTP
&& GET_CODE (addr
) == LABEL_REF
))
12288 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
12290 if (GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_DLLIMPORT_P (addr
))
12291 return legitimize_dllimport_symbol (addr
, true);
12292 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
12293 && GET_CODE (XEXP (XEXP (addr
, 0), 0)) == SYMBOL_REF
12294 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr
, 0), 0)))
12296 rtx t
= legitimize_dllimport_symbol (XEXP (XEXP (addr
, 0), 0), true);
12297 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (addr
, 0), 1));
12301 /* For x64 PE-COFF there is no GOT table. So we use address
12303 if (TARGET_64BIT
&& DEFAULT_ABI
== MS_ABI
)
12305 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_PCREL
);
12306 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12309 reg
= gen_reg_rtx (Pmode
);
12310 emit_move_insn (reg
, new_rtx
);
12313 else if (TARGET_64BIT
&& ix86_cmodel
!= CM_LARGE_PIC
)
12315 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTPCREL
);
12316 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12317 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
12318 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
12321 reg
= gen_reg_rtx (Pmode
);
12322 /* Use directly gen_movsi, otherwise the address is loaded
12323 into register for CSE. We don't want to CSE this addresses,
12324 instead we CSE addresses from the GOT table, so skip this. */
12325 emit_insn (gen_movsi (reg
, new_rtx
));
12330 /* This symbol must be referenced via a load from the
12331 Global Offset Table (@GOT). */
12333 if (reload_in_progress
)
12334 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12335 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
12336 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12338 new_rtx
= force_reg (Pmode
, new_rtx
);
12339 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
12340 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
12341 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
12344 reg
= gen_reg_rtx (Pmode
);
12345 emit_move_insn (reg
, new_rtx
);
12351 if (CONST_INT_P (addr
)
12352 && !x86_64_immediate_operand (addr
, VOIDmode
))
12356 emit_move_insn (reg
, addr
);
12360 new_rtx
= force_reg (Pmode
, addr
);
12362 else if (GET_CODE (addr
) == CONST
)
12364 addr
= XEXP (addr
, 0);
12366 /* We must match stuff we generate before. Assume the only
12367 unspecs that can get here are ours. Not that we could do
12368 anything with them anyway.... */
12369 if (GET_CODE (addr
) == UNSPEC
12370 || (GET_CODE (addr
) == PLUS
12371 && GET_CODE (XEXP (addr
, 0)) == UNSPEC
))
12373 gcc_assert (GET_CODE (addr
) == PLUS
);
12375 if (GET_CODE (addr
) == PLUS
)
12377 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
12379 /* Check first to see if this is a constant offset from a @GOTOFF
12380 symbol reference. */
12381 if (gotoff_operand (op0
, Pmode
)
12382 && CONST_INT_P (op1
))
12386 if (reload_in_progress
)
12387 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12388 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
),
12390 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, op1
);
12391 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12392 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
12396 emit_move_insn (reg
, new_rtx
);
12402 if (INTVAL (op1
) < -16*1024*1024
12403 || INTVAL (op1
) >= 16*1024*1024)
12405 if (!x86_64_immediate_operand (op1
, Pmode
))
12406 op1
= force_reg (Pmode
, op1
);
12407 new_rtx
= gen_rtx_PLUS (Pmode
, force_reg (Pmode
, op0
), op1
);
12413 base
= legitimize_pic_address (XEXP (addr
, 0), reg
);
12414 new_rtx
= legitimize_pic_address (XEXP (addr
, 1),
12415 base
== reg
? NULL_RTX
: reg
);
12417 if (CONST_INT_P (new_rtx
))
12418 new_rtx
= plus_constant (base
, INTVAL (new_rtx
));
12421 if (GET_CODE (new_rtx
) == PLUS
&& CONSTANT_P (XEXP (new_rtx
, 1)))
12423 base
= gen_rtx_PLUS (Pmode
, base
, XEXP (new_rtx
, 0));
12424 new_rtx
= XEXP (new_rtx
, 1);
12426 new_rtx
= gen_rtx_PLUS (Pmode
, base
, new_rtx
);
12434 /* Load the thread pointer. If TO_REG is true, force it into a register. */
12437 get_thread_pointer (bool to_reg
)
12439 rtx tp
= gen_rtx_UNSPEC (ptr_mode
, gen_rtvec (1, const0_rtx
), UNSPEC_TP
);
12441 if (GET_MODE (tp
) != Pmode
)
12442 tp
= convert_to_mode (Pmode
, tp
, 1);
12445 tp
= copy_addr_to_reg (tp
);
12450 /* Construct the SYMBOL_REF for the tls_get_addr function. */
12452 static GTY(()) rtx ix86_tls_symbol
;
12455 ix86_tls_get_addr (void)
12457 if (!ix86_tls_symbol
)
12460 = ((TARGET_ANY_GNU_TLS
&& !TARGET_64BIT
)
12461 ? "___tls_get_addr" : "__tls_get_addr");
12463 ix86_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
, sym
);
12466 return ix86_tls_symbol
;
12469 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
12471 static GTY(()) rtx ix86_tls_module_base_symbol
;
12474 ix86_tls_module_base (void)
12476 if (!ix86_tls_module_base_symbol
)
12478 ix86_tls_module_base_symbol
12479 = gen_rtx_SYMBOL_REF (Pmode
, "_TLS_MODULE_BASE_");
12481 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol
)
12482 |= TLS_MODEL_GLOBAL_DYNAMIC
<< SYMBOL_FLAG_TLS_SHIFT
;
12485 return ix86_tls_module_base_symbol
;
12488 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
12489 false if we expect this to be used for a memory address and true if
12490 we expect to load the address into a register. */
12493 legitimize_tls_address (rtx x
, enum tls_model model
, bool for_mov
)
12495 rtx dest
, base
, off
;
12496 rtx pic
= NULL_RTX
, tp
= NULL_RTX
;
12501 case TLS_MODEL_GLOBAL_DYNAMIC
:
12502 dest
= gen_reg_rtx (Pmode
);
12507 pic
= pic_offset_table_rtx
;
12510 pic
= gen_reg_rtx (Pmode
);
12511 emit_insn (gen_set_got (pic
));
12515 if (TARGET_GNU2_TLS
)
12518 emit_insn (gen_tls_dynamic_gnu2_64 (dest
, x
));
12520 emit_insn (gen_tls_dynamic_gnu2_32 (dest
, x
, pic
));
12522 tp
= get_thread_pointer (true);
12523 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, tp
, dest
));
12525 set_unique_reg_note (get_last_insn (), REG_EQUAL
, x
);
12529 rtx caddr
= ix86_tls_get_addr ();
12533 rtx rax
= gen_rtx_REG (Pmode
, AX_REG
), insns
;
12536 emit_call_insn (gen_tls_global_dynamic_64 (rax
, x
, caddr
));
12537 insns
= get_insns ();
12540 RTL_CONST_CALL_P (insns
) = 1;
12541 emit_libcall_block (insns
, dest
, rax
, x
);
12544 emit_insn (gen_tls_global_dynamic_32 (dest
, x
, pic
, caddr
));
12548 case TLS_MODEL_LOCAL_DYNAMIC
:
12549 base
= gen_reg_rtx (Pmode
);
12554 pic
= pic_offset_table_rtx
;
12557 pic
= gen_reg_rtx (Pmode
);
12558 emit_insn (gen_set_got (pic
));
12562 if (TARGET_GNU2_TLS
)
12564 rtx tmp
= ix86_tls_module_base ();
12567 emit_insn (gen_tls_dynamic_gnu2_64 (base
, tmp
));
12569 emit_insn (gen_tls_dynamic_gnu2_32 (base
, tmp
, pic
));
12571 tp
= get_thread_pointer (true);
12572 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
12573 gen_rtx_MINUS (Pmode
, tmp
, tp
));
12577 rtx caddr
= ix86_tls_get_addr ();
12581 rtx rax
= gen_rtx_REG (Pmode
, AX_REG
), insns
, eqv
;
12584 emit_call_insn (gen_tls_local_dynamic_base_64 (rax
, caddr
));
12585 insns
= get_insns ();
12588 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
12589 share the LD_BASE result with other LD model accesses. */
12590 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
12591 UNSPEC_TLS_LD_BASE
);
12593 RTL_CONST_CALL_P (insns
) = 1;
12594 emit_libcall_block (insns
, base
, rax
, eqv
);
12597 emit_insn (gen_tls_local_dynamic_base_32 (base
, pic
, caddr
));
12600 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_DTPOFF
);
12601 off
= gen_rtx_CONST (Pmode
, off
);
12603 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, base
, off
));
12605 if (TARGET_GNU2_TLS
)
12607 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, dest
, tp
));
12609 set_unique_reg_note (get_last_insn (), REG_EQUAL
, x
);
12613 case TLS_MODEL_INITIAL_EXEC
:
12616 if (TARGET_SUN_TLS
)
12618 /* The Sun linker took the AMD64 TLS spec literally
12619 and can only handle %rax as destination of the
12620 initial executable code sequence. */
12622 dest
= gen_reg_rtx (Pmode
);
12623 emit_insn (gen_tls_initial_exec_64_sun (dest
, x
));
12626 else if (Pmode
== SImode
)
12630 addl xgottpoff(%rip), %reg32
12631 to support linker IE->LE optimization and avoid
12632 fs:(%reg32) as memory operand. */
12633 dest
= gen_reg_rtx (Pmode
);
12634 emit_insn (gen_tls_initial_exec_x32 (dest
, x
));
12639 type
= UNSPEC_GOTNTPOFF
;
12643 if (reload_in_progress
)
12644 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12645 pic
= pic_offset_table_rtx
;
12646 type
= TARGET_ANY_GNU_TLS
? UNSPEC_GOTNTPOFF
: UNSPEC_GOTTPOFF
;
12648 else if (!TARGET_ANY_GNU_TLS
)
12650 pic
= gen_reg_rtx (Pmode
);
12651 emit_insn (gen_set_got (pic
));
12652 type
= UNSPEC_GOTTPOFF
;
12657 type
= UNSPEC_INDNTPOFF
;
12660 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), type
);
12661 off
= gen_rtx_CONST (Pmode
, off
);
12663 off
= gen_rtx_PLUS (Pmode
, pic
, off
);
12664 off
= gen_const_mem (Pmode
, off
);
12665 set_mem_alias_set (off
, ix86_GOT_alias_set ());
12667 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
12669 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
12670 off
= force_reg (Pmode
, off
);
12671 return gen_rtx_PLUS (Pmode
, base
, off
);
12675 base
= get_thread_pointer (true);
12676 dest
= gen_reg_rtx (Pmode
);
12677 emit_insn (gen_subsi3 (dest
, base
, off
));
12681 case TLS_MODEL_LOCAL_EXEC
:
12682 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
12683 (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
12684 ? UNSPEC_NTPOFF
: UNSPEC_TPOFF
);
12685 off
= gen_rtx_CONST (Pmode
, off
);
12687 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
12689 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
12690 return gen_rtx_PLUS (Pmode
, base
, off
);
12694 base
= get_thread_pointer (true);
12695 dest
= gen_reg_rtx (Pmode
);
12696 emit_insn (gen_subsi3 (dest
, base
, off
));
12701 gcc_unreachable ();
12707 /* Create or return the unique __imp_DECL dllimport symbol corresponding
12710 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map
)))
12711 htab_t dllimport_map
;
12714 get_dllimport_decl (tree decl
)
12716 struct tree_map
*h
, in
;
12719 const char *prefix
;
12720 size_t namelen
, prefixlen
;
12725 if (!dllimport_map
)
12726 dllimport_map
= htab_create_ggc (512, tree_map_hash
, tree_map_eq
, 0);
12728 in
.hash
= htab_hash_pointer (decl
);
12729 in
.base
.from
= decl
;
12730 loc
= htab_find_slot_with_hash (dllimport_map
, &in
, in
.hash
, INSERT
);
12731 h
= (struct tree_map
*) *loc
;
12735 *loc
= h
= ggc_alloc_tree_map ();
12737 h
->base
.from
= decl
;
12738 h
->to
= to
= build_decl (DECL_SOURCE_LOCATION (decl
),
12739 VAR_DECL
, NULL
, ptr_type_node
);
12740 DECL_ARTIFICIAL (to
) = 1;
12741 DECL_IGNORED_P (to
) = 1;
12742 DECL_EXTERNAL (to
) = 1;
12743 TREE_READONLY (to
) = 1;
12745 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
12746 name
= targetm
.strip_name_encoding (name
);
12747 prefix
= name
[0] == FASTCALL_PREFIX
|| user_label_prefix
[0] == 0
12748 ? "*__imp_" : "*__imp__";
12749 namelen
= strlen (name
);
12750 prefixlen
= strlen (prefix
);
12751 imp_name
= (char *) alloca (namelen
+ prefixlen
+ 1);
12752 memcpy (imp_name
, prefix
, prefixlen
);
12753 memcpy (imp_name
+ prefixlen
, name
, namelen
+ 1);
12755 name
= ggc_alloc_string (imp_name
, namelen
+ prefixlen
);
12756 rtl
= gen_rtx_SYMBOL_REF (Pmode
, name
);
12757 SET_SYMBOL_REF_DECL (rtl
, to
);
12758 SYMBOL_REF_FLAGS (rtl
) = SYMBOL_FLAG_LOCAL
;
12760 rtl
= gen_const_mem (Pmode
, rtl
);
12761 set_mem_alias_set (rtl
, ix86_GOT_alias_set ());
12763 SET_DECL_RTL (to
, rtl
);
12764 SET_DECL_ASSEMBLER_NAME (to
, get_identifier (name
));
12769 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
12770 true if we require the result be a register. */
12773 legitimize_dllimport_symbol (rtx symbol
, bool want_reg
)
12778 gcc_assert (SYMBOL_REF_DECL (symbol
));
12779 imp_decl
= get_dllimport_decl (SYMBOL_REF_DECL (symbol
));
12781 x
= DECL_RTL (imp_decl
);
12783 x
= force_reg (Pmode
, x
);
12787 /* Try machine-dependent ways of modifying an illegitimate address
12788 to be legitimate. If we find one, return the new, valid address.
12789 This macro is used in only one place: `memory_address' in explow.c.
12791 OLDX is the address as it was before break_out_memory_refs was called.
12792 In some cases it is useful to look at this to decide what needs to be done.
12794 It is always safe for this macro to do nothing. It exists to recognize
12795 opportunities to optimize the output.
12797 For the 80386, we handle X+REG by loading X into a register R and
12798 using R+REG. R will go in a general reg and indexing will be used.
12799 However, if REG is a broken-out memory address or multiplication,
12800 nothing needs to be done because REG can certainly go in a general reg.
12802 When -fpic is used, special handling is needed for symbolic references.
12803 See comments by legitimize_pic_address in i386.c for details. */
12806 ix86_legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
,
12807 enum machine_mode mode
)
12812 log
= GET_CODE (x
) == SYMBOL_REF
? SYMBOL_REF_TLS_MODEL (x
) : 0;
12814 return legitimize_tls_address (x
, (enum tls_model
) log
, false);
12815 if (GET_CODE (x
) == CONST
12816 && GET_CODE (XEXP (x
, 0)) == PLUS
12817 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
12818 && (log
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x
, 0), 0))))
12820 rtx t
= legitimize_tls_address (XEXP (XEXP (x
, 0), 0),
12821 (enum tls_model
) log
, false);
12822 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
12825 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
12827 if (GET_CODE (x
) == SYMBOL_REF
&& SYMBOL_REF_DLLIMPORT_P (x
))
12828 return legitimize_dllimport_symbol (x
, true);
12829 if (GET_CODE (x
) == CONST
12830 && GET_CODE (XEXP (x
, 0)) == PLUS
12831 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
12832 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x
, 0), 0)))
12834 rtx t
= legitimize_dllimport_symbol (XEXP (XEXP (x
, 0), 0), true);
12835 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
12839 if (flag_pic
&& SYMBOLIC_CONST (x
))
12840 return legitimize_pic_address (x
, 0);
12843 if (MACHO_DYNAMIC_NO_PIC_P
&& SYMBOLIC_CONST (x
))
12844 return machopic_indirect_data_reference (x
, 0);
12847 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
12848 if (GET_CODE (x
) == ASHIFT
12849 && CONST_INT_P (XEXP (x
, 1))
12850 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (x
, 1)) < 4)
12853 log
= INTVAL (XEXP (x
, 1));
12854 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
12855 GEN_INT (1 << log
));
12858 if (GET_CODE (x
) == PLUS
)
12860 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
12862 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
12863 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
12864 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 0), 1)) < 4)
12867 log
= INTVAL (XEXP (XEXP (x
, 0), 1));
12868 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
12869 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
12870 GEN_INT (1 << log
));
12873 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
12874 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
12875 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 1), 1)) < 4)
12878 log
= INTVAL (XEXP (XEXP (x
, 1), 1));
12879 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
12880 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
12881 GEN_INT (1 << log
));
12884 /* Put multiply first if it isn't already. */
12885 if (GET_CODE (XEXP (x
, 1)) == MULT
)
12887 rtx tmp
= XEXP (x
, 0);
12888 XEXP (x
, 0) = XEXP (x
, 1);
12893 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
12894 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
12895 created by virtual register instantiation, register elimination, and
12896 similar optimizations. */
12897 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
12900 x
= gen_rtx_PLUS (Pmode
,
12901 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
12902 XEXP (XEXP (x
, 1), 0)),
12903 XEXP (XEXP (x
, 1), 1));
12907 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
12908 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
12909 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
12910 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
12911 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
12912 && CONSTANT_P (XEXP (x
, 1)))
12915 rtx other
= NULL_RTX
;
12917 if (CONST_INT_P (XEXP (x
, 1)))
12919 constant
= XEXP (x
, 1);
12920 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
12922 else if (CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 1), 1)))
12924 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
12925 other
= XEXP (x
, 1);
12933 x
= gen_rtx_PLUS (Pmode
,
12934 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
12935 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
12936 plus_constant (other
, INTVAL (constant
)));
12940 if (changed
&& ix86_legitimate_address_p (mode
, x
, false))
12943 if (GET_CODE (XEXP (x
, 0)) == MULT
)
12946 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
12949 if (GET_CODE (XEXP (x
, 1)) == MULT
)
12952 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
12956 && REG_P (XEXP (x
, 1))
12957 && REG_P (XEXP (x
, 0)))
12960 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
12963 x
= legitimize_pic_address (x
, 0);
12966 if (changed
&& ix86_legitimate_address_p (mode
, x
, false))
12969 if (REG_P (XEXP (x
, 0)))
12971 rtx temp
= gen_reg_rtx (Pmode
);
12972 rtx val
= force_operand (XEXP (x
, 1), temp
);
12975 if (GET_MODE (val
) != Pmode
)
12976 val
= convert_to_mode (Pmode
, val
, 1);
12977 emit_move_insn (temp
, val
);
12980 XEXP (x
, 1) = temp
;
12984 else if (REG_P (XEXP (x
, 1)))
12986 rtx temp
= gen_reg_rtx (Pmode
);
12987 rtx val
= force_operand (XEXP (x
, 0), temp
);
12990 if (GET_MODE (val
) != Pmode
)
12991 val
= convert_to_mode (Pmode
, val
, 1);
12992 emit_move_insn (temp
, val
);
12995 XEXP (x
, 0) = temp
;
13003 /* Print an integer constant expression in assembler syntax. Addition
13004 and subtraction are the only arithmetic that may appear in these
13005 expressions. FILE is the stdio stream to write to, X is the rtx, and
13006 CODE is the operand print code from the output string. */
13009 output_pic_addr_const (FILE *file
, rtx x
, int code
)
13013 switch (GET_CODE (x
))
13016 gcc_assert (flag_pic
);
13021 if (TARGET_64BIT
|| ! TARGET_MACHO_BRANCH_ISLANDS
)
13022 output_addr_const (file
, x
);
13025 const char *name
= XSTR (x
, 0);
13027 /* Mark the decl as referenced so that cgraph will
13028 output the function. */
13029 if (SYMBOL_REF_DECL (x
))
13030 mark_decl_referenced (SYMBOL_REF_DECL (x
));
13033 if (MACHOPIC_INDIRECT
13034 && machopic_classify_symbol (x
) == MACHOPIC_UNDEFINED_FUNCTION
)
13035 name
= machopic_indirection_name (x
, /*stub_p=*/true);
13037 assemble_name (file
, name
);
13039 if (!TARGET_MACHO
&& !(TARGET_64BIT
&& DEFAULT_ABI
== MS_ABI
)
13040 && code
== 'P' && ! SYMBOL_REF_LOCAL_P (x
))
13041 fputs ("@PLT", file
);
13048 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
13049 assemble_name (asm_out_file
, buf
);
13053 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
13057 /* This used to output parentheses around the expression,
13058 but that does not work on the 386 (either ATT or BSD assembler). */
13059 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13063 if (GET_MODE (x
) == VOIDmode
)
13065 /* We can use %d if the number is <32 bits and positive. */
13066 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
13067 fprintf (file
, "0x%lx%08lx",
13068 (unsigned long) CONST_DOUBLE_HIGH (x
),
13069 (unsigned long) CONST_DOUBLE_LOW (x
));
13071 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
13074 /* We can't handle floating point constants;
13075 TARGET_PRINT_OPERAND must handle them. */
13076 output_operand_lossage ("floating constant misused");
13080 /* Some assemblers need integer constants to appear first. */
13081 if (CONST_INT_P (XEXP (x
, 0)))
13083 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13085 output_pic_addr_const (file
, XEXP (x
, 1), code
);
13089 gcc_assert (CONST_INT_P (XEXP (x
, 1)));
13090 output_pic_addr_const (file
, XEXP (x
, 1), code
);
13092 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13098 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
13099 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13101 output_pic_addr_const (file
, XEXP (x
, 1), code
);
13103 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
13107 if (XINT (x
, 1) == UNSPEC_STACK_CHECK
)
13109 bool f
= i386_asm_output_addr_const_extra (file
, x
);
13114 gcc_assert (XVECLEN (x
, 0) == 1);
13115 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
13116 switch (XINT (x
, 1))
13119 fputs ("@GOT", file
);
13121 case UNSPEC_GOTOFF
:
13122 fputs ("@GOTOFF", file
);
13124 case UNSPEC_PLTOFF
:
13125 fputs ("@PLTOFF", file
);
13128 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13129 "(%rip)" : "[rip]", file
);
13131 case UNSPEC_GOTPCREL
:
13132 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13133 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file
);
13135 case UNSPEC_GOTTPOFF
:
13136 /* FIXME: This might be @TPOFF in Sun ld too. */
13137 fputs ("@gottpoff", file
);
13140 fputs ("@tpoff", file
);
13142 case UNSPEC_NTPOFF
:
13144 fputs ("@tpoff", file
);
13146 fputs ("@ntpoff", file
);
13148 case UNSPEC_DTPOFF
:
13149 fputs ("@dtpoff", file
);
13151 case UNSPEC_GOTNTPOFF
:
13153 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13154 "@gottpoff(%rip)": "@gottpoff[rip]", file
);
13156 fputs ("@gotntpoff", file
);
13158 case UNSPEC_INDNTPOFF
:
13159 fputs ("@indntpoff", file
);
13162 case UNSPEC_MACHOPIC_OFFSET
:
13164 machopic_output_function_base_name (file
);
13168 output_operand_lossage ("invalid UNSPEC as operand");
13174 output_operand_lossage ("invalid expression as operand");
13178 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
13179 We need to emit DTP-relative relocations. */
13181 static void ATTRIBUTE_UNUSED
13182 i386_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
13184 fputs (ASM_LONG
, file
);
13185 output_addr_const (file
, x
);
13186 fputs ("@dtpoff", file
);
13192 fputs (", 0", file
);
13195 gcc_unreachable ();
13199 /* Return true if X is a representation of the PIC register. This copes
13200 with calls from ix86_find_base_term, where the register might have
13201 been replaced by a cselib value. */
13204 ix86_pic_register_p (rtx x
)
13206 if (GET_CODE (x
) == VALUE
&& CSELIB_VAL_PTR (x
))
13207 return (pic_offset_table_rtx
13208 && rtx_equal_for_cselib_p (x
, pic_offset_table_rtx
));
13210 return REG_P (x
) && REGNO (x
) == PIC_OFFSET_TABLE_REGNUM
;
13213 /* Helper function for ix86_delegitimize_address.
13214 Attempt to delegitimize TLS local-exec accesses. */
13217 ix86_delegitimize_tls_address (rtx orig_x
)
13219 rtx x
= orig_x
, unspec
;
13220 struct ix86_address addr
;
13222 if (!TARGET_TLS_DIRECT_SEG_REFS
)
13226 if (GET_CODE (x
) != PLUS
|| GET_MODE (x
) != Pmode
)
13228 if (ix86_decompose_address (x
, &addr
) == 0
13229 || addr
.seg
!= (TARGET_64BIT
? SEG_FS
: SEG_GS
)
13230 || addr
.disp
== NULL_RTX
13231 || GET_CODE (addr
.disp
) != CONST
)
13233 unspec
= XEXP (addr
.disp
, 0);
13234 if (GET_CODE (unspec
) == PLUS
&& CONST_INT_P (XEXP (unspec
, 1)))
13235 unspec
= XEXP (unspec
, 0);
13236 if (GET_CODE (unspec
) != UNSPEC
|| XINT (unspec
, 1) != UNSPEC_NTPOFF
)
13238 x
= XVECEXP (unspec
, 0, 0);
13239 gcc_assert (GET_CODE (x
) == SYMBOL_REF
);
13240 if (unspec
!= XEXP (addr
.disp
, 0))
13241 x
= gen_rtx_PLUS (Pmode
, x
, XEXP (XEXP (addr
.disp
, 0), 1));
13244 rtx idx
= addr
.index
;
13245 if (addr
.scale
!= 1)
13246 idx
= gen_rtx_MULT (Pmode
, idx
, GEN_INT (addr
.scale
));
13247 x
= gen_rtx_PLUS (Pmode
, idx
, x
);
13250 x
= gen_rtx_PLUS (Pmode
, addr
.base
, x
);
13251 if (MEM_P (orig_x
))
13252 x
= replace_equiv_address_nv (orig_x
, x
);
13256 /* In the name of slightly smaller debug output, and to cater to
13257 general assembler lossage, recognize PIC+GOTOFF and turn it back
13258 into a direct symbol reference.
13260 On Darwin, this is necessary to avoid a crash, because Darwin
13261 has a different PIC label for each routine but the DWARF debugging
13262 information is not associated with any particular routine, so it's
13263 necessary to remove references to the PIC label from RTL stored by
13264 the DWARF output code. */
13267 ix86_delegitimize_address (rtx x
)
13269 rtx orig_x
= delegitimize_mem_from_attrs (x
);
13270 /* addend is NULL or some rtx if x is something+GOTOFF where
13271 something doesn't include the PIC register. */
13272 rtx addend
= NULL_RTX
;
13273 /* reg_addend is NULL or a multiple of some register. */
13274 rtx reg_addend
= NULL_RTX
;
13275 /* const_addend is NULL or a const_int. */
13276 rtx const_addend
= NULL_RTX
;
13277 /* This is the result, or NULL. */
13278 rtx result
= NULL_RTX
;
13287 if (GET_CODE (x
) == CONST
13288 && GET_CODE (XEXP (x
, 0)) == PLUS
13289 && GET_MODE (XEXP (x
, 0)) == Pmode
13290 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
13291 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == UNSPEC
13292 && XINT (XEXP (XEXP (x
, 0), 0), 1) == UNSPEC_PCREL
)
13294 rtx x2
= XVECEXP (XEXP (XEXP (x
, 0), 0), 0, 0);
13295 x
= gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 1), x2
);
13296 if (MEM_P (orig_x
))
13297 x
= replace_equiv_address_nv (orig_x
, x
);
13300 if (GET_CODE (x
) != CONST
13301 || GET_CODE (XEXP (x
, 0)) != UNSPEC
13302 || (XINT (XEXP (x
, 0), 1) != UNSPEC_GOTPCREL
13303 && XINT (XEXP (x
, 0), 1) != UNSPEC_PCREL
)
13304 || (!MEM_P (orig_x
) && XINT (XEXP (x
, 0), 1) != UNSPEC_PCREL
))
13305 return ix86_delegitimize_tls_address (orig_x
);
13306 x
= XVECEXP (XEXP (x
, 0), 0, 0);
13307 if (GET_MODE (orig_x
) != GET_MODE (x
) && MEM_P (orig_x
))
13309 x
= simplify_gen_subreg (GET_MODE (orig_x
), x
,
13317 if (GET_CODE (x
) != PLUS
13318 || GET_CODE (XEXP (x
, 1)) != CONST
)
13319 return ix86_delegitimize_tls_address (orig_x
);
13321 if (ix86_pic_register_p (XEXP (x
, 0)))
13322 /* %ebx + GOT/GOTOFF */
13324 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
13326 /* %ebx + %reg * scale + GOT/GOTOFF */
13327 reg_addend
= XEXP (x
, 0);
13328 if (ix86_pic_register_p (XEXP (reg_addend
, 0)))
13329 reg_addend
= XEXP (reg_addend
, 1);
13330 else if (ix86_pic_register_p (XEXP (reg_addend
, 1)))
13331 reg_addend
= XEXP (reg_addend
, 0);
13334 reg_addend
= NULL_RTX
;
13335 addend
= XEXP (x
, 0);
13339 addend
= XEXP (x
, 0);
13341 x
= XEXP (XEXP (x
, 1), 0);
13342 if (GET_CODE (x
) == PLUS
13343 && CONST_INT_P (XEXP (x
, 1)))
13345 const_addend
= XEXP (x
, 1);
13349 if (GET_CODE (x
) == UNSPEC
13350 && ((XINT (x
, 1) == UNSPEC_GOT
&& MEM_P (orig_x
) && !addend
)
13351 || (XINT (x
, 1) == UNSPEC_GOTOFF
&& !MEM_P (orig_x
))))
13352 result
= XVECEXP (x
, 0, 0);
13354 if (TARGET_MACHO
&& darwin_local_data_pic (x
)
13355 && !MEM_P (orig_x
))
13356 result
= XVECEXP (x
, 0, 0);
13359 return ix86_delegitimize_tls_address (orig_x
);
13362 result
= gen_rtx_CONST (Pmode
, gen_rtx_PLUS (Pmode
, result
, const_addend
));
13364 result
= gen_rtx_PLUS (Pmode
, reg_addend
, result
);
13367 /* If the rest of original X doesn't involve the PIC register, add
13368 addend and subtract pic_offset_table_rtx. This can happen e.g.
13370 leal (%ebx, %ecx, 4), %ecx
13372 movl foo@GOTOFF(%ecx), %edx
13373 in which case we return (%ecx - %ebx) + foo. */
13374 if (pic_offset_table_rtx
)
13375 result
= gen_rtx_PLUS (Pmode
, gen_rtx_MINUS (Pmode
, copy_rtx (addend
),
13376 pic_offset_table_rtx
),
13381 if (GET_MODE (orig_x
) != Pmode
&& MEM_P (orig_x
))
13383 result
= simplify_gen_subreg (GET_MODE (orig_x
), result
, Pmode
, 0);
13384 if (result
== NULL_RTX
)
13390 /* If X is a machine specific address (i.e. a symbol or label being
13391 referenced as a displacement from the GOT implemented using an
13392 UNSPEC), then return the base term. Otherwise return X. */
13395 ix86_find_base_term (rtx x
)
13401 if (GET_CODE (x
) != CONST
)
13403 term
= XEXP (x
, 0);
13404 if (GET_CODE (term
) == PLUS
13405 && (CONST_INT_P (XEXP (term
, 1))
13406 || GET_CODE (XEXP (term
, 1)) == CONST_DOUBLE
))
13407 term
= XEXP (term
, 0);
13408 if (GET_CODE (term
) != UNSPEC
13409 || (XINT (term
, 1) != UNSPEC_GOTPCREL
13410 && XINT (term
, 1) != UNSPEC_PCREL
))
13413 return XVECEXP (term
, 0, 0);
13416 return ix86_delegitimize_address (x
);
13420 put_condition_code (enum rtx_code code
, enum machine_mode mode
, int reverse
,
13421 int fp
, FILE *file
)
13423 const char *suffix
;
13425 if (mode
== CCFPmode
|| mode
== CCFPUmode
)
13427 code
= ix86_fp_compare_code_to_integer (code
);
13431 code
= reverse_condition (code
);
13482 gcc_assert (mode
== CCmode
|| mode
== CCNOmode
|| mode
== CCGCmode
);
13486 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
13487 Those same assemblers have the same but opposite lossage on cmov. */
13488 if (mode
== CCmode
)
13489 suffix
= fp
? "nbe" : "a";
13490 else if (mode
== CCCmode
)
13493 gcc_unreachable ();
13509 gcc_unreachable ();
13513 gcc_assert (mode
== CCmode
|| mode
== CCCmode
);
13530 gcc_unreachable ();
13534 /* ??? As above. */
13535 gcc_assert (mode
== CCmode
|| mode
== CCCmode
);
13536 suffix
= fp
? "nb" : "ae";
13539 gcc_assert (mode
== CCmode
|| mode
== CCGCmode
|| mode
== CCNOmode
);
13543 /* ??? As above. */
13544 if (mode
== CCmode
)
13546 else if (mode
== CCCmode
)
13547 suffix
= fp
? "nb" : "ae";
13549 gcc_unreachable ();
13552 suffix
= fp
? "u" : "p";
13555 suffix
= fp
? "nu" : "np";
13558 gcc_unreachable ();
13560 fputs (suffix
, file
);
13563 /* Print the name of register X to FILE based on its machine mode and number.
13564 If CODE is 'w', pretend the mode is HImode.
13565 If CODE is 'b', pretend the mode is QImode.
13566 If CODE is 'k', pretend the mode is SImode.
13567 If CODE is 'q', pretend the mode is DImode.
13568 If CODE is 'x', pretend the mode is V4SFmode.
13569 If CODE is 't', pretend the mode is V8SFmode.
13570 If CODE is 'h', pretend the reg is the 'high' byte register.
13571 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
13572 If CODE is 'd', duplicate the operand for AVX instruction.
13576 print_reg (rtx x
, int code
, FILE *file
)
13579 bool duplicated
= code
== 'd' && TARGET_AVX
;
13581 gcc_assert (x
== pc_rtx
13582 || (REGNO (x
) != ARG_POINTER_REGNUM
13583 && REGNO (x
) != FRAME_POINTER_REGNUM
13584 && REGNO (x
) != FLAGS_REG
13585 && REGNO (x
) != FPSR_REG
13586 && REGNO (x
) != FPCR_REG
));
13588 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13593 gcc_assert (TARGET_64BIT
);
13594 fputs ("rip", file
);
13598 if (code
== 'w' || MMX_REG_P (x
))
13600 else if (code
== 'b')
13602 else if (code
== 'k')
13604 else if (code
== 'q')
13606 else if (code
== 'y')
13608 else if (code
== 'h')
13610 else if (code
== 'x')
13612 else if (code
== 't')
13615 code
= GET_MODE_SIZE (GET_MODE (x
));
13617 /* Irritatingly, AMD extended registers use different naming convention
13618 from the normal registers: "r%d[bwd]" */
13619 if (REX_INT_REG_P (x
))
13621 gcc_assert (TARGET_64BIT
);
13623 fprint_ul (file
, REGNO (x
) - FIRST_REX_INT_REG
+ 8);
13627 error ("extended registers have no high halves");
13642 error ("unsupported operand size for extended register");
13652 if (STACK_TOP_P (x
))
13661 if (! ANY_FP_REG_P (x
))
13662 putc (code
== 8 && TARGET_64BIT
? 'r' : 'e', file
);
13667 reg
= hi_reg_name
[REGNO (x
)];
13670 if (REGNO (x
) >= ARRAY_SIZE (qi_reg_name
))
13672 reg
= qi_reg_name
[REGNO (x
)];
13675 if (REGNO (x
) >= ARRAY_SIZE (qi_high_reg_name
))
13677 reg
= qi_high_reg_name
[REGNO (x
)];
13682 gcc_assert (!duplicated
);
13684 fputs (hi_reg_name
[REGNO (x
)] + 1, file
);
13689 gcc_unreachable ();
13695 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13696 fprintf (file
, ", %%%s", reg
);
13698 fprintf (file
, ", %s", reg
);
13702 /* Locate some local-dynamic symbol still in use by this function
13703 so that we can print its name in some tls_local_dynamic_base
13707 get_some_local_dynamic_name_1 (rtx
*px
, void *data ATTRIBUTE_UNUSED
)
13711 if (GET_CODE (x
) == SYMBOL_REF
13712 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
)
13714 cfun
->machine
->some_ld_name
= XSTR (x
, 0);
13721 static const char *
13722 get_some_local_dynamic_name (void)
13726 if (cfun
->machine
->some_ld_name
)
13727 return cfun
->machine
->some_ld_name
;
13729 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
13730 if (NONDEBUG_INSN_P (insn
)
13731 && for_each_rtx (&PATTERN (insn
), get_some_local_dynamic_name_1
, 0))
13732 return cfun
->machine
->some_ld_name
;
13737 /* Meaning of CODE:
13738 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
13739 C -- print opcode suffix for set/cmov insn.
13740 c -- like C, but print reversed condition
13741 F,f -- likewise, but for floating-point.
13742 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
13744 R -- print the prefix for register names.
13745 z -- print the opcode suffix for the size of the current operand.
13746 Z -- likewise, with special suffixes for x87 instructions.
13747 * -- print a star (in certain assembler syntax)
13748 A -- print an absolute memory reference.
13749 E -- print address with DImode register names if TARGET_64BIT.
13750 w -- print the operand as if it's a "word" (HImode) even if it isn't.
13751 s -- print a shift double count, followed by the assemblers argument
13753 b -- print the QImode name of the register for the indicated operand.
13754 %b0 would print %al if operands[0] is reg 0.
13755 w -- likewise, print the HImode name of the register.
13756 k -- likewise, print the SImode name of the register.
13757 q -- likewise, print the DImode name of the register.
13758 x -- likewise, print the V4SFmode name of the register.
13759 t -- likewise, print the V8SFmode name of the register.
13760 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
13761 y -- print "st(0)" instead of "st" as a register.
13762 d -- print duplicated register operand for AVX instruction.
13763 D -- print condition for SSE cmp instruction.
13764 P -- if PIC, print an @PLT suffix.
13765 p -- print raw symbol name.
13766 X -- don't print any sort of PIC '@' suffix for a symbol.
13767 & -- print some in-use local-dynamic symbol name.
13768 H -- print a memory address offset by 8; used for sse high-parts
13769 Y -- print condition for XOP pcom* instruction.
13770 + -- print a branch hint as 'cs' or 'ds' prefix
13771 ; -- print a semicolon (after prefixes due to bug in older gas).
13772 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
13773 @ -- print a segment register of thread base pointer load
13774 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
13778 ix86_print_operand (FILE *file
, rtx x
, int code
)
13785 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13791 const char *name
= get_some_local_dynamic_name ();
13793 output_operand_lossage ("'%%&' used without any "
13794 "local dynamic TLS references");
13796 assemble_name (file
, name
);
13801 switch (ASSEMBLER_DIALECT
)
13808 /* Intel syntax. For absolute addresses, registers should not
13809 be surrounded by braces. */
13813 ix86_print_operand (file
, x
, 0);
13820 gcc_unreachable ();
13823 ix86_print_operand (file
, x
, 0);
13827 /* Wrap address in an UNSPEC to declare special handling. */
13829 x
= gen_rtx_UNSPEC (DImode
, gen_rtvec (1, x
), UNSPEC_LEA_ADDR
);
13831 output_address (x
);
13835 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13840 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13845 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13850 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13855 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13860 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13865 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
13867 /* Opcodes don't get size suffixes if using Intel opcodes. */
13868 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
13871 switch (GET_MODE_SIZE (GET_MODE (x
)))
13890 output_operand_lossage
13891 ("invalid operand size for operand code '%c'", code
);
13896 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
13898 (0, "non-integer operand used with operand code '%c'", code
);
13902 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
13903 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
13906 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
13908 switch (GET_MODE_SIZE (GET_MODE (x
)))
13911 #ifdef HAVE_AS_IX86_FILDS
13921 #ifdef HAVE_AS_IX86_FILDQ
13924 fputs ("ll", file
);
13932 else if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
13934 /* 387 opcodes don't get size suffixes
13935 if the operands are registers. */
13936 if (STACK_REG_P (x
))
13939 switch (GET_MODE_SIZE (GET_MODE (x
)))
13960 output_operand_lossage
13961 ("invalid operand type used with operand code '%c'", code
);
13965 output_operand_lossage
13966 ("invalid operand size for operand code '%c'", code
);
13984 if (CONST_INT_P (x
) || ! SHIFT_DOUBLE_OMITS_COUNT
)
13986 ix86_print_operand (file
, x
, 0);
13987 fputs (", ", file
);
13992 /* Little bit of braindamage here. The SSE compare instructions
13993 does use completely different names for the comparisons that the
13994 fp conditional moves. */
13997 switch (GET_CODE (x
))
14000 fputs ("eq", file
);
14003 fputs ("eq_us", file
);
14006 fputs ("lt", file
);
14009 fputs ("nge", file
);
14012 fputs ("le", file
);
14015 fputs ("ngt", file
);
14018 fputs ("unord", file
);
14021 fputs ("neq", file
);
14024 fputs ("neq_oq", file
);
14027 fputs ("ge", file
);
14030 fputs ("nlt", file
);
14033 fputs ("gt", file
);
14036 fputs ("nle", file
);
14039 fputs ("ord", file
);
14042 output_operand_lossage ("operand is not a condition code, "
14043 "invalid operand code 'D'");
14049 switch (GET_CODE (x
))
14053 fputs ("eq", file
);
14057 fputs ("lt", file
);
14061 fputs ("le", file
);
14064 fputs ("unord", file
);
14068 fputs ("neq", file
);
14072 fputs ("nlt", file
);
14076 fputs ("nle", file
);
14079 fputs ("ord", file
);
14082 output_operand_lossage ("operand is not a condition code, "
14083 "invalid operand code 'D'");
14089 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14090 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14092 switch (GET_MODE (x
))
14094 case HImode
: putc ('w', file
); break;
14096 case SFmode
: putc ('l', file
); break;
14098 case DFmode
: putc ('q', file
); break;
14099 default: gcc_unreachable ();
14106 if (!COMPARISON_P (x
))
14108 output_operand_lossage ("operand is neither a constant nor a "
14109 "condition code, invalid operand code "
14113 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 0, file
);
14116 if (!COMPARISON_P (x
))
14118 output_operand_lossage ("operand is neither a constant nor a "
14119 "condition code, invalid operand code "
14123 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14124 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14127 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 1, file
);
14130 /* Like above, but reverse condition */
14132 /* Check to see if argument to %c is really a constant
14133 and not a condition code which needs to be reversed. */
14134 if (!COMPARISON_P (x
))
14136 output_operand_lossage ("operand is neither a constant nor a "
14137 "condition code, invalid operand "
14141 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 0, file
);
14144 if (!COMPARISON_P (x
))
14146 output_operand_lossage ("operand is neither a constant nor a "
14147 "condition code, invalid operand "
14151 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14152 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14155 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 1, file
);
14159 if (!offsettable_memref_p (x
))
14161 output_operand_lossage ("operand is not an offsettable memory "
14162 "reference, invalid operand "
14166 /* It doesn't actually matter what mode we use here, as we're
14167 only going to use this for printing. */
14168 x
= adjust_address_nv (x
, DImode
, 8);
14176 || optimize_function_for_size_p (cfun
)
14177 || !TARGET_BRANCH_PREDICTION_HINTS
)
14180 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
14183 int pred_val
= INTVAL (XEXP (x
, 0));
14185 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
14186 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
14188 bool taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
14190 = final_forward_branch_p (current_output_insn
) == 0;
14192 /* Emit hints only in the case default branch prediction
14193 heuristics would fail. */
14194 if (taken
!= cputaken
)
14196 /* We use 3e (DS) prefix for taken branches and
14197 2e (CS) prefix for not taken branches. */
14199 fputs ("ds ; ", file
);
14201 fputs ("cs ; ", file
);
14209 switch (GET_CODE (x
))
14212 fputs ("neq", file
);
14215 fputs ("eq", file
);
14219 fputs (INTEGRAL_MODE_P (GET_MODE (x
)) ? "ge" : "unlt", file
);
14223 fputs (INTEGRAL_MODE_P (GET_MODE (x
)) ? "gt" : "unle", file
);
14227 fputs ("le", file
);
14231 fputs ("lt", file
);
14234 fputs ("unord", file
);
14237 fputs ("ord", file
);
14240 fputs ("ueq", file
);
14243 fputs ("nlt", file
);
14246 fputs ("nle", file
);
14249 fputs ("ule", file
);
14252 fputs ("ult", file
);
14255 fputs ("une", file
);
14258 output_operand_lossage ("operand is not a condition code, "
14259 "invalid operand code 'Y'");
14265 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
14271 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14274 /* The kernel uses a different segment register for performance
14275 reasons; a system call would not have to trash the userspace
14276 segment register, which would be expensive. */
14277 if (TARGET_64BIT
&& ix86_cmodel
!= CM_KERNEL
)
14278 fputs ("fs", file
);
14280 fputs ("gs", file
);
14284 putc (TARGET_AVX2
? 'i' : 'f', file
);
14288 if (TARGET_64BIT
&& Pmode
!= word_mode
)
14289 fputs ("addr32 ", file
);
14293 output_operand_lossage ("invalid operand code '%c'", code
);
14298 print_reg (x
, code
, file
);
14300 else if (MEM_P (x
))
14302 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
14303 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P'
14304 && GET_MODE (x
) != BLKmode
)
14307 switch (GET_MODE_SIZE (GET_MODE (x
)))
14309 case 1: size
= "BYTE"; break;
14310 case 2: size
= "WORD"; break;
14311 case 4: size
= "DWORD"; break;
14312 case 8: size
= "QWORD"; break;
14313 case 12: size
= "TBYTE"; break;
14315 if (GET_MODE (x
) == XFmode
)
14320 case 32: size
= "YMMWORD"; break;
14322 gcc_unreachable ();
14325 /* Check for explicit size override (codes 'b', 'w', 'k',
14329 else if (code
== 'w')
14331 else if (code
== 'k')
14333 else if (code
== 'q')
14335 else if (code
== 'x')
14338 fputs (size
, file
);
14339 fputs (" PTR ", file
);
14343 /* Avoid (%rip) for call operands. */
14344 if (CONSTANT_ADDRESS_P (x
) && code
== 'P'
14345 && !CONST_INT_P (x
))
14346 output_addr_const (file
, x
);
14347 else if (this_is_asm_operands
&& ! address_operand (x
, VOIDmode
))
14348 output_operand_lossage ("invalid constraints for operand");
14350 output_address (x
);
14353 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
14358 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
14359 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
14361 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14363 /* Sign extend 32bit SFmode immediate to 8 bytes. */
14365 fprintf (file
, "0x%08llx", (unsigned long long) (int) l
);
14367 fprintf (file
, "0x%08x", (unsigned int) l
);
14370 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
14375 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
14376 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
14378 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14380 fprintf (file
, "0x%lx%08lx", l
[1] & 0xffffffff, l
[0] & 0xffffffff);
14383 /* These float cases don't actually occur as immediate operands. */
14384 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == XFmode
)
14388 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
14389 fputs (dstr
, file
);
14394 /* We have patterns that allow zero sets of memory, for instance.
14395 In 64-bit mode, we should probably support all 8-byte vectors,
14396 since we can in fact encode that into an immediate. */
14397 if (GET_CODE (x
) == CONST_VECTOR
)
14399 gcc_assert (x
== CONST0_RTX (GET_MODE (x
)));
14403 if (code
!= 'P' && code
!= 'p')
14405 if (CONST_INT_P (x
) || GET_CODE (x
) == CONST_DOUBLE
)
14407 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14410 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
14411 || GET_CODE (x
) == LABEL_REF
)
14413 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14416 fputs ("OFFSET FLAT:", file
);
14419 if (CONST_INT_P (x
))
14420 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
14421 else if (flag_pic
|| MACHOPIC_INDIRECT
)
14422 output_pic_addr_const (file
, x
, code
);
14424 output_addr_const (file
, x
);
14429 ix86_print_operand_punct_valid_p (unsigned char code
)
14431 return (code
== '@' || code
== '*' || code
== '+' || code
== '&'
14432 || code
== ';' || code
== '~' || code
== '^');
14435 /* Print a memory operand whose address is ADDR. */
14438 ix86_print_operand_address (FILE *file
, rtx addr
)
14440 struct ix86_address parts
;
14441 rtx base
, index
, disp
;
14447 if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_VSIBADDR
)
14449 ok
= ix86_decompose_address (XVECEXP (addr
, 0, 0), &parts
);
14450 gcc_assert (parts
.index
== NULL_RTX
);
14451 parts
.index
= XVECEXP (addr
, 0, 1);
14452 parts
.scale
= INTVAL (XVECEXP (addr
, 0, 2));
14453 addr
= XVECEXP (addr
, 0, 0);
14456 else if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_LEA_ADDR
)
14458 gcc_assert (TARGET_64BIT
);
14459 ok
= ix86_decompose_address (XVECEXP (addr
, 0, 0), &parts
);
14463 ok
= ix86_decompose_address (addr
, &parts
);
14467 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
14469 rtx tmp
= SUBREG_REG (parts
.base
);
14470 parts
.base
= simplify_subreg (GET_MODE (parts
.base
),
14471 tmp
, GET_MODE (tmp
), 0);
14474 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
14476 rtx tmp
= SUBREG_REG (parts
.index
);
14477 parts
.index
= simplify_subreg (GET_MODE (parts
.index
),
14478 tmp
, GET_MODE (tmp
), 0);
14482 index
= parts
.index
;
14484 scale
= parts
.scale
;
14492 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14494 fputs ((parts
.seg
== SEG_FS
? "fs:" : "gs:"), file
);
14497 gcc_unreachable ();
14500 /* Use one byte shorter RIP relative addressing for 64bit mode. */
14501 if (TARGET_64BIT
&& !base
&& !index
)
14505 if (GET_CODE (disp
) == CONST
14506 && GET_CODE (XEXP (disp
, 0)) == PLUS
14507 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
14508 symbol
= XEXP (XEXP (disp
, 0), 0);
14510 if (GET_CODE (symbol
) == LABEL_REF
14511 || (GET_CODE (symbol
) == SYMBOL_REF
14512 && SYMBOL_REF_TLS_MODEL (symbol
) == 0))
14515 if (!base
&& !index
)
14517 /* Displacement only requires special attention. */
14519 if (CONST_INT_P (disp
))
14521 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& parts
.seg
== SEG_DEFAULT
)
14522 fputs ("ds:", file
);
14523 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (disp
));
14526 output_pic_addr_const (file
, disp
, 0);
14528 output_addr_const (file
, disp
);
14532 /* Print SImode register names for zero-extended
14533 addresses to force addr32 prefix. */
14535 && (GET_CODE (addr
) == ZERO_EXTEND
14536 || GET_CODE (addr
) == AND
))
14538 gcc_assert (!code
);
14542 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14547 output_pic_addr_const (file
, disp
, 0);
14548 else if (GET_CODE (disp
) == LABEL_REF
)
14549 output_asm_label (disp
);
14551 output_addr_const (file
, disp
);
14556 print_reg (base
, code
, file
);
14560 print_reg (index
, vsib
? 0 : code
, file
);
14561 if (scale
!= 1 || vsib
)
14562 fprintf (file
, ",%d", scale
);
14568 rtx offset
= NULL_RTX
;
14572 /* Pull out the offset of a symbol; print any symbol itself. */
14573 if (GET_CODE (disp
) == CONST
14574 && GET_CODE (XEXP (disp
, 0)) == PLUS
14575 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
14577 offset
= XEXP (XEXP (disp
, 0), 1);
14578 disp
= gen_rtx_CONST (VOIDmode
,
14579 XEXP (XEXP (disp
, 0), 0));
14583 output_pic_addr_const (file
, disp
, 0);
14584 else if (GET_CODE (disp
) == LABEL_REF
)
14585 output_asm_label (disp
);
14586 else if (CONST_INT_P (disp
))
14589 output_addr_const (file
, disp
);
14595 print_reg (base
, code
, file
);
14598 if (INTVAL (offset
) >= 0)
14600 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
14604 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
14611 print_reg (index
, vsib
? 0 : code
, file
);
14612 if (scale
!= 1 || vsib
)
14613 fprintf (file
, "*%d", scale
);
14620 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
14623 i386_asm_output_addr_const_extra (FILE *file
, rtx x
)
14627 if (GET_CODE (x
) != UNSPEC
)
14630 op
= XVECEXP (x
, 0, 0);
14631 switch (XINT (x
, 1))
14633 case UNSPEC_GOTTPOFF
:
14634 output_addr_const (file
, op
);
14635 /* FIXME: This might be @TPOFF in Sun ld. */
14636 fputs ("@gottpoff", file
);
14639 output_addr_const (file
, op
);
14640 fputs ("@tpoff", file
);
14642 case UNSPEC_NTPOFF
:
14643 output_addr_const (file
, op
);
14645 fputs ("@tpoff", file
);
14647 fputs ("@ntpoff", file
);
14649 case UNSPEC_DTPOFF
:
14650 output_addr_const (file
, op
);
14651 fputs ("@dtpoff", file
);
14653 case UNSPEC_GOTNTPOFF
:
14654 output_addr_const (file
, op
);
14656 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
14657 "@gottpoff(%rip)" : "@gottpoff[rip]", file
);
14659 fputs ("@gotntpoff", file
);
14661 case UNSPEC_INDNTPOFF
:
14662 output_addr_const (file
, op
);
14663 fputs ("@indntpoff", file
);
14666 case UNSPEC_MACHOPIC_OFFSET
:
14667 output_addr_const (file
, op
);
14669 machopic_output_function_base_name (file
);
14673 case UNSPEC_STACK_CHECK
:
14677 gcc_assert (flag_split_stack
);
14679 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
14680 offset
= TARGET_THREAD_SPLIT_STACK_OFFSET
;
14682 gcc_unreachable ();
14685 fprintf (file
, "%s:%d", TARGET_64BIT
? "%fs" : "%gs", offset
);
14696 /* Split one or more double-mode RTL references into pairs of half-mode
14697 references. The RTL can be REG, offsettable MEM, integer constant, or
14698 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
14699 split and "num" is its length. lo_half and hi_half are output arrays
14700 that parallel "operands". */
14703 split_double_mode (enum machine_mode mode
, rtx operands
[],
14704 int num
, rtx lo_half
[], rtx hi_half
[])
14706 enum machine_mode half_mode
;
14712 half_mode
= DImode
;
14715 half_mode
= SImode
;
14718 gcc_unreachable ();
14721 byte
= GET_MODE_SIZE (half_mode
);
14725 rtx op
= operands
[num
];
14727 /* simplify_subreg refuse to split volatile memory addresses,
14728 but we still have to handle it. */
14731 lo_half
[num
] = adjust_address (op
, half_mode
, 0);
14732 hi_half
[num
] = adjust_address (op
, half_mode
, byte
);
14736 lo_half
[num
] = simplify_gen_subreg (half_mode
, op
,
14737 GET_MODE (op
) == VOIDmode
14738 ? mode
: GET_MODE (op
), 0);
14739 hi_half
[num
] = simplify_gen_subreg (half_mode
, op
,
14740 GET_MODE (op
) == VOIDmode
14741 ? mode
: GET_MODE (op
), byte
);
14746 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
14747 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
14748 is the expression of the binary operation. The output may either be
14749 emitted here, or returned to the caller, like all output_* functions.
14751 There is no guarantee that the operands are the same mode, as they
14752 might be within FLOAT or FLOAT_EXTEND expressions. */
14754 #ifndef SYSV386_COMPAT
14755 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
14756 wants to fix the assemblers because that causes incompatibility
14757 with gcc. No-one wants to fix gcc because that causes
14758 incompatibility with assemblers... You can use the option of
14759 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
14760 #define SYSV386_COMPAT 1
14764 output_387_binary_op (rtx insn
, rtx
*operands
)
14766 static char buf
[40];
14769 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]) || SSE_REG_P (operands
[2]);
14771 #ifdef ENABLE_CHECKING
14772 /* Even if we do not want to check the inputs, this documents input
14773 constraints. Which helps in understanding the following code. */
14774 if (STACK_REG_P (operands
[0])
14775 && ((REG_P (operands
[1])
14776 && REGNO (operands
[0]) == REGNO (operands
[1])
14777 && (STACK_REG_P (operands
[2]) || MEM_P (operands
[2])))
14778 || (REG_P (operands
[2])
14779 && REGNO (operands
[0]) == REGNO (operands
[2])
14780 && (STACK_REG_P (operands
[1]) || MEM_P (operands
[1]))))
14781 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
14784 gcc_assert (is_sse
);
14787 switch (GET_CODE (operands
[3]))
14790 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
14791 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
14799 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
14800 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
14808 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
14809 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
14817 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
14818 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
14826 gcc_unreachable ();
14833 strcpy (buf
, ssep
);
14834 if (GET_MODE (operands
[0]) == SFmode
)
14835 strcat (buf
, "ss\t{%2, %1, %0|%0, %1, %2}");
14837 strcat (buf
, "sd\t{%2, %1, %0|%0, %1, %2}");
14841 strcpy (buf
, ssep
+ 1);
14842 if (GET_MODE (operands
[0]) == SFmode
)
14843 strcat (buf
, "ss\t{%2, %0|%0, %2}");
14845 strcat (buf
, "sd\t{%2, %0|%0, %2}");
14851 switch (GET_CODE (operands
[3]))
14855 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
14857 rtx temp
= operands
[2];
14858 operands
[2] = operands
[1];
14859 operands
[1] = temp
;
14862 /* know operands[0] == operands[1]. */
14864 if (MEM_P (operands
[2]))
14870 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
14872 if (STACK_TOP_P (operands
[0]))
14873 /* How is it that we are storing to a dead operand[2]?
14874 Well, presumably operands[1] is dead too. We can't
14875 store the result to st(0) as st(0) gets popped on this
14876 instruction. Instead store to operands[2] (which I
14877 think has to be st(1)). st(1) will be popped later.
14878 gcc <= 2.8.1 didn't have this check and generated
14879 assembly code that the Unixware assembler rejected. */
14880 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14882 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14886 if (STACK_TOP_P (operands
[0]))
14887 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14889 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
14894 if (MEM_P (operands
[1]))
14900 if (MEM_P (operands
[2]))
14906 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
14909 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
14910 derived assemblers, confusingly reverse the direction of
14911 the operation for fsub{r} and fdiv{r} when the
14912 destination register is not st(0). The Intel assembler
14913 doesn't have this brain damage. Read !SYSV386_COMPAT to
14914 figure out what the hardware really does. */
14915 if (STACK_TOP_P (operands
[0]))
14916 p
= "{p\t%0, %2|rp\t%2, %0}";
14918 p
= "{rp\t%2, %0|p\t%0, %2}";
14920 if (STACK_TOP_P (operands
[0]))
14921 /* As above for fmul/fadd, we can't store to st(0). */
14922 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14924 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14929 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
14932 if (STACK_TOP_P (operands
[0]))
14933 p
= "{rp\t%0, %1|p\t%1, %0}";
14935 p
= "{p\t%1, %0|rp\t%0, %1}";
14937 if (STACK_TOP_P (operands
[0]))
14938 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
14940 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
14945 if (STACK_TOP_P (operands
[0]))
14947 if (STACK_TOP_P (operands
[1]))
14948 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14950 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
14953 else if (STACK_TOP_P (operands
[1]))
14956 p
= "{\t%1, %0|r\t%0, %1}";
14958 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
14964 p
= "{r\t%2, %0|\t%0, %2}";
14966 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
14972 gcc_unreachable ();
14979 /* Return needed mode for entity in optimize_mode_switching pass. */
14982 ix86_mode_needed (int entity
, rtx insn
)
14984 enum attr_i387_cw mode
;
14986 /* The mode UNINITIALIZED is used to store control word after a
14987 function call or ASM pattern. The mode ANY specify that function
14988 has no requirements on the control word and make no changes in the
14989 bits we are interested in. */
14992 || (NONJUMP_INSN_P (insn
)
14993 && (asm_noperands (PATTERN (insn
)) >= 0
14994 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
)))
14995 return I387_CW_UNINITIALIZED
;
14997 if (recog_memoized (insn
) < 0)
14998 return I387_CW_ANY
;
15000 mode
= get_attr_i387_cw (insn
);
15005 if (mode
== I387_CW_TRUNC
)
15010 if (mode
== I387_CW_FLOOR
)
15015 if (mode
== I387_CW_CEIL
)
15020 if (mode
== I387_CW_MASK_PM
)
15025 gcc_unreachable ();
15028 return I387_CW_ANY
;
15031 /* Output code to initialize control word copies used by trunc?f?i and
15032 rounding patterns. CURRENT_MODE is set to current control word,
15033 while NEW_MODE is set to new control word. */
15036 emit_i387_cw_initialization (int mode
)
15038 rtx stored_mode
= assign_386_stack_local (HImode
, SLOT_CW_STORED
);
15041 enum ix86_stack_slot slot
;
15043 rtx reg
= gen_reg_rtx (HImode
);
15045 emit_insn (gen_x86_fnstcw_1 (stored_mode
));
15046 emit_move_insn (reg
, copy_rtx (stored_mode
));
15048 if (TARGET_64BIT
|| TARGET_PARTIAL_REG_STALL
15049 || optimize_function_for_size_p (cfun
))
15053 case I387_CW_TRUNC
:
15054 /* round toward zero (truncate) */
15055 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0c00)));
15056 slot
= SLOT_CW_TRUNC
;
15059 case I387_CW_FLOOR
:
15060 /* round down toward -oo */
15061 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
15062 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0400)));
15063 slot
= SLOT_CW_FLOOR
;
15067 /* round up toward +oo */
15068 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
15069 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0800)));
15070 slot
= SLOT_CW_CEIL
;
15073 case I387_CW_MASK_PM
:
15074 /* mask precision exception for nearbyint() */
15075 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
15076 slot
= SLOT_CW_MASK_PM
;
15080 gcc_unreachable ();
15087 case I387_CW_TRUNC
:
15088 /* round toward zero (truncate) */
15089 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0xc)));
15090 slot
= SLOT_CW_TRUNC
;
15093 case I387_CW_FLOOR
:
15094 /* round down toward -oo */
15095 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x4)));
15096 slot
= SLOT_CW_FLOOR
;
15100 /* round up toward +oo */
15101 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x8)));
15102 slot
= SLOT_CW_CEIL
;
15105 case I387_CW_MASK_PM
:
15106 /* mask precision exception for nearbyint() */
15107 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
15108 slot
= SLOT_CW_MASK_PM
;
15112 gcc_unreachable ();
15116 gcc_assert (slot
< MAX_386_STACK_LOCALS
);
15118 new_mode
= assign_386_stack_local (HImode
, slot
);
15119 emit_move_insn (new_mode
, reg
);
15122 /* Output code for INSN to convert a float to a signed int. OPERANDS
15123 are the insn operands. The output may be [HSD]Imode and the input
15124 operand may be [SDX]Fmode. */
15127 output_fix_trunc (rtx insn
, rtx
*operands
, bool fisttp
)
15129 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
15130 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
15131 int round_mode
= get_attr_i387_cw (insn
);
15133 /* Jump through a hoop or two for DImode, since the hardware has no
15134 non-popping instruction. We used to do this a different way, but
15135 that was somewhat fragile and broke with post-reload splitters. */
15136 if ((dimode_p
|| fisttp
) && !stack_top_dies
)
15137 output_asm_insn ("fld\t%y1", operands
);
15139 gcc_assert (STACK_TOP_P (operands
[1]));
15140 gcc_assert (MEM_P (operands
[0]));
15141 gcc_assert (GET_MODE (operands
[1]) != TFmode
);
15144 output_asm_insn ("fisttp%Z0\t%0", operands
);
15147 if (round_mode
!= I387_CW_ANY
)
15148 output_asm_insn ("fldcw\t%3", operands
);
15149 if (stack_top_dies
|| dimode_p
)
15150 output_asm_insn ("fistp%Z0\t%0", operands
);
15152 output_asm_insn ("fist%Z0\t%0", operands
);
15153 if (round_mode
!= I387_CW_ANY
)
15154 output_asm_insn ("fldcw\t%2", operands
);
15160 /* Output code for x87 ffreep insn. The OPNO argument, which may only
15161 have the values zero or one, indicates the ffreep insn's operand
15162 from the OPERANDS array. */
15164 static const char *
15165 output_387_ffreep (rtx
*operands ATTRIBUTE_UNUSED
, int opno
)
15167 if (TARGET_USE_FFREEP
)
15168 #ifdef HAVE_AS_IX86_FFREEP
15169 return opno
? "ffreep\t%y1" : "ffreep\t%y0";
15172 static char retval
[32];
15173 int regno
= REGNO (operands
[opno
]);
15175 gcc_assert (FP_REGNO_P (regno
));
15177 regno
-= FIRST_STACK_REG
;
15179 snprintf (retval
, sizeof (retval
), ASM_SHORT
"0xc%ddf", regno
);
15184 return opno
? "fstp\t%y1" : "fstp\t%y0";
15188 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
15189 should be used. UNORDERED_P is true when fucom should be used. */
15192 output_fp_compare (rtx insn
, rtx
*operands
, bool eflags_p
, bool unordered_p
)
15194 int stack_top_dies
;
15195 rtx cmp_op0
, cmp_op1
;
15196 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]);
15200 cmp_op0
= operands
[0];
15201 cmp_op1
= operands
[1];
15205 cmp_op0
= operands
[1];
15206 cmp_op1
= operands
[2];
15211 if (GET_MODE (operands
[0]) == SFmode
)
15213 return "%vucomiss\t{%1, %0|%0, %1}";
15215 return "%vcomiss\t{%1, %0|%0, %1}";
15218 return "%vucomisd\t{%1, %0|%0, %1}";
15220 return "%vcomisd\t{%1, %0|%0, %1}";
15223 gcc_assert (STACK_TOP_P (cmp_op0
));
15225 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
15227 if (cmp_op1
== CONST0_RTX (GET_MODE (cmp_op1
)))
15229 if (stack_top_dies
)
15231 output_asm_insn ("ftst\n\tfnstsw\t%0", operands
);
15232 return output_387_ffreep (operands
, 1);
15235 return "ftst\n\tfnstsw\t%0";
15238 if (STACK_REG_P (cmp_op1
)
15240 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
15241 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
15243 /* If both the top of the 387 stack dies, and the other operand
15244 is also a stack register that dies, then this must be a
15245 `fcompp' float compare */
15249 /* There is no double popping fcomi variant. Fortunately,
15250 eflags is immune from the fstp's cc clobbering. */
15252 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
15254 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
15255 return output_387_ffreep (operands
, 0);
15260 return "fucompp\n\tfnstsw\t%0";
15262 return "fcompp\n\tfnstsw\t%0";
15267 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
15269 static const char * const alt
[16] =
15271 "fcom%Z2\t%y2\n\tfnstsw\t%0",
15272 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
15273 "fucom%Z2\t%y2\n\tfnstsw\t%0",
15274 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
15276 "ficom%Z2\t%y2\n\tfnstsw\t%0",
15277 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
15281 "fcomi\t{%y1, %0|%0, %y1}",
15282 "fcomip\t{%y1, %0|%0, %y1}",
15283 "fucomi\t{%y1, %0|%0, %y1}",
15284 "fucomip\t{%y1, %0|%0, %y1}",
15295 mask
= eflags_p
<< 3;
15296 mask
|= (GET_MODE_CLASS (GET_MODE (cmp_op1
)) == MODE_INT
) << 2;
15297 mask
|= unordered_p
<< 1;
15298 mask
|= stack_top_dies
;
15300 gcc_assert (mask
< 16);
15309 ix86_output_addr_vec_elt (FILE *file
, int value
)
15311 const char *directive
= ASM_LONG
;
15315 directive
= ASM_QUAD
;
15317 gcc_assert (!TARGET_64BIT
);
15320 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
15324 ix86_output_addr_diff_elt (FILE *file
, int value
, int rel
)
15326 const char *directive
= ASM_LONG
;
15329 if (TARGET_64BIT
&& CASE_VECTOR_MODE
== DImode
)
15330 directive
= ASM_QUAD
;
15332 gcc_assert (!TARGET_64BIT
);
15334 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
15335 if (TARGET_64BIT
|| TARGET_VXWORKS_RTP
)
15336 fprintf (file
, "%s%s%d-%s%d\n",
15337 directive
, LPREFIX
, value
, LPREFIX
, rel
);
15338 else if (HAVE_AS_GOTOFF_IN_DATA
)
15339 fprintf (file
, ASM_LONG
"%s%d@GOTOFF\n", LPREFIX
, value
);
15341 else if (TARGET_MACHO
)
15343 fprintf (file
, ASM_LONG
"%s%d-", LPREFIX
, value
);
15344 machopic_output_function_base_name (file
);
15349 asm_fprintf (file
, ASM_LONG
"%U%s+[.-%s%d]\n",
15350 GOT_SYMBOL_NAME
, LPREFIX
, value
);
15353 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
15357 ix86_expand_clear (rtx dest
)
15361 /* We play register width games, which are only valid after reload. */
15362 gcc_assert (reload_completed
);
15364 /* Avoid HImode and its attendant prefix byte. */
15365 if (GET_MODE_SIZE (GET_MODE (dest
)) < 4)
15366 dest
= gen_rtx_REG (SImode
, REGNO (dest
));
15367 tmp
= gen_rtx_SET (VOIDmode
, dest
, const0_rtx
);
15369 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
15370 if (!TARGET_USE_MOV0
|| optimize_insn_for_speed_p ())
15372 rtx clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
15373 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
15379 /* X is an unchanging MEM. If it is a constant pool reference, return
15380 the constant pool rtx, else NULL. */
15383 maybe_get_pool_constant (rtx x
)
15385 x
= ix86_delegitimize_address (XEXP (x
, 0));
15387 if (GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
15388 return get_pool_constant (x
);
15394 ix86_expand_move (enum machine_mode mode
, rtx operands
[])
15397 enum tls_model model
;
15402 if (GET_CODE (op1
) == SYMBOL_REF
)
15404 model
= SYMBOL_REF_TLS_MODEL (op1
);
15407 op1
= legitimize_tls_address (op1
, model
, true);
15408 op1
= force_operand (op1
, op0
);
15411 if (GET_MODE (op1
) != mode
)
15412 op1
= convert_to_mode (mode
, op1
, 1);
15414 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15415 && SYMBOL_REF_DLLIMPORT_P (op1
))
15416 op1
= legitimize_dllimport_symbol (op1
, false);
15418 else if (GET_CODE (op1
) == CONST
15419 && GET_CODE (XEXP (op1
, 0)) == PLUS
15420 && GET_CODE (XEXP (XEXP (op1
, 0), 0)) == SYMBOL_REF
)
15422 rtx addend
= XEXP (XEXP (op1
, 0), 1);
15423 rtx symbol
= XEXP (XEXP (op1
, 0), 0);
15426 model
= SYMBOL_REF_TLS_MODEL (symbol
);
15428 tmp
= legitimize_tls_address (symbol
, model
, true);
15429 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15430 && SYMBOL_REF_DLLIMPORT_P (symbol
))
15431 tmp
= legitimize_dllimport_symbol (symbol
, true);
15435 tmp
= force_operand (tmp
, NULL
);
15436 tmp
= expand_simple_binop (Pmode
, PLUS
, tmp
, addend
,
15437 op0
, 1, OPTAB_DIRECT
);
15440 if (GET_MODE (tmp
) != mode
)
15441 op1
= convert_to_mode (mode
, tmp
, 1);
15445 if ((flag_pic
|| MACHOPIC_INDIRECT
)
15446 && symbolic_operand (op1
, mode
))
15448 if (TARGET_MACHO
&& !TARGET_64BIT
)
15451 /* dynamic-no-pic */
15452 if (MACHOPIC_INDIRECT
)
15454 rtx temp
= ((reload_in_progress
15455 || ((op0
&& REG_P (op0
))
15457 ? op0
: gen_reg_rtx (Pmode
));
15458 op1
= machopic_indirect_data_reference (op1
, temp
);
15460 op1
= machopic_legitimize_pic_address (op1
, mode
,
15461 temp
== op1
? 0 : temp
);
15463 if (op0
!= op1
&& GET_CODE (op0
) != MEM
)
15465 rtx insn
= gen_rtx_SET (VOIDmode
, op0
, op1
);
15469 if (GET_CODE (op0
) == MEM
)
15470 op1
= force_reg (Pmode
, op1
);
15474 if (GET_CODE (temp
) != REG
)
15475 temp
= gen_reg_rtx (Pmode
);
15476 temp
= legitimize_pic_address (op1
, temp
);
15481 /* dynamic-no-pic */
15487 op1
= force_reg (mode
, op1
);
15488 else if (!(TARGET_64BIT
&& x86_64_movabs_operand (op1
, DImode
)))
15490 rtx reg
= can_create_pseudo_p () ? NULL_RTX
: op0
;
15491 op1
= legitimize_pic_address (op1
, reg
);
15494 if (GET_MODE (op1
) != mode
)
15495 op1
= convert_to_mode (mode
, op1
, 1);
15502 && (PUSH_ROUNDING (GET_MODE_SIZE (mode
)) != GET_MODE_SIZE (mode
)
15503 || !push_operand (op0
, mode
))
15505 op1
= force_reg (mode
, op1
);
15507 if (push_operand (op0
, mode
)
15508 && ! general_no_elim_operand (op1
, mode
))
15509 op1
= copy_to_mode_reg (mode
, op1
);
15511 /* Force large constants in 64bit compilation into register
15512 to get them CSEed. */
15513 if (can_create_pseudo_p ()
15514 && (mode
== DImode
) && TARGET_64BIT
15515 && immediate_operand (op1
, mode
)
15516 && !x86_64_zext_immediate_operand (op1
, VOIDmode
)
15517 && !register_operand (op0
, mode
)
15519 op1
= copy_to_mode_reg (mode
, op1
);
15521 if (can_create_pseudo_p ()
15522 && FLOAT_MODE_P (mode
)
15523 && GET_CODE (op1
) == CONST_DOUBLE
)
15525 /* If we are loading a floating point constant to a register,
15526 force the value to memory now, since we'll get better code
15527 out the back end. */
15529 op1
= validize_mem (force_const_mem (mode
, op1
));
15530 if (!register_operand (op0
, mode
))
15532 rtx temp
= gen_reg_rtx (mode
);
15533 emit_insn (gen_rtx_SET (VOIDmode
, temp
, op1
));
15534 emit_move_insn (op0
, temp
);
15540 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
15544 ix86_expand_vector_move (enum machine_mode mode
, rtx operands
[])
15546 rtx op0
= operands
[0], op1
= operands
[1];
15547 unsigned int align
= GET_MODE_ALIGNMENT (mode
);
15549 /* Force constants other than zero into memory. We do not know how
15550 the instructions used to build constants modify the upper 64 bits
15551 of the register, once we have that information we may be able
15552 to handle some of them more efficiently. */
15553 if (can_create_pseudo_p ()
15554 && register_operand (op0
, mode
)
15555 && (CONSTANT_P (op1
)
15556 || (GET_CODE (op1
) == SUBREG
15557 && CONSTANT_P (SUBREG_REG (op1
))))
15558 && !standard_sse_constant_p (op1
))
15559 op1
= validize_mem (force_const_mem (mode
, op1
));
15561 /* We need to check memory alignment for SSE mode since attribute
15562 can make operands unaligned. */
15563 if (can_create_pseudo_p ()
15564 && SSE_REG_MODE_P (mode
)
15565 && ((MEM_P (op0
) && (MEM_ALIGN (op0
) < align
))
15566 || (MEM_P (op1
) && (MEM_ALIGN (op1
) < align
))))
15570 /* ix86_expand_vector_move_misalign() does not like constants ... */
15571 if (CONSTANT_P (op1
)
15572 || (GET_CODE (op1
) == SUBREG
15573 && CONSTANT_P (SUBREG_REG (op1
))))
15574 op1
= validize_mem (force_const_mem (mode
, op1
));
15576 /* ... nor both arguments in memory. */
15577 if (!register_operand (op0
, mode
)
15578 && !register_operand (op1
, mode
))
15579 op1
= force_reg (mode
, op1
);
15581 tmp
[0] = op0
; tmp
[1] = op1
;
15582 ix86_expand_vector_move_misalign (mode
, tmp
);
15586 /* Make operand1 a register if it isn't already. */
15587 if (can_create_pseudo_p ()
15588 && !register_operand (op0
, mode
)
15589 && !register_operand (op1
, mode
))
15591 emit_move_insn (op0
, force_reg (GET_MODE (op0
), op1
));
15595 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
15598 /* Split 32-byte AVX unaligned load and store if needed. */
15601 ix86_avx256_split_vector_move_misalign (rtx op0
, rtx op1
)
15604 rtx (*extract
) (rtx
, rtx
, rtx
);
15605 rtx (*move_unaligned
) (rtx
, rtx
);
15606 enum machine_mode mode
;
15608 switch (GET_MODE (op0
))
15611 gcc_unreachable ();
15613 extract
= gen_avx_vextractf128v32qi
;
15614 move_unaligned
= gen_avx_movdqu256
;
15618 extract
= gen_avx_vextractf128v8sf
;
15619 move_unaligned
= gen_avx_movups256
;
15623 extract
= gen_avx_vextractf128v4df
;
15624 move_unaligned
= gen_avx_movupd256
;
15629 if (MEM_P (op1
) && TARGET_AVX256_SPLIT_UNALIGNED_LOAD
)
15631 rtx r
= gen_reg_rtx (mode
);
15632 m
= adjust_address (op1
, mode
, 0);
15633 emit_move_insn (r
, m
);
15634 m
= adjust_address (op1
, mode
, 16);
15635 r
= gen_rtx_VEC_CONCAT (GET_MODE (op0
), r
, m
);
15636 emit_move_insn (op0
, r
);
15638 else if (MEM_P (op0
) && TARGET_AVX256_SPLIT_UNALIGNED_STORE
)
15640 m
= adjust_address (op0
, mode
, 0);
15641 emit_insn (extract (m
, op1
, const0_rtx
));
15642 m
= adjust_address (op0
, mode
, 16);
15643 emit_insn (extract (m
, op1
, const1_rtx
));
15646 emit_insn (move_unaligned (op0
, op1
));
15649 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
15650 straight to ix86_expand_vector_move. */
15651 /* Code generation for scalar reg-reg moves of single and double precision data:
15652 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
15656 if (x86_sse_partial_reg_dependency == true)
15661 Code generation for scalar loads of double precision data:
15662 if (x86_sse_split_regs == true)
15663 movlpd mem, reg (gas syntax)
15667 Code generation for unaligned packed loads of single precision data
15668 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
15669 if (x86_sse_unaligned_move_optimal)
15672 if (x86_sse_partial_reg_dependency == true)
15684 Code generation for unaligned packed loads of double precision data
15685 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
15686 if (x86_sse_unaligned_move_optimal)
15689 if (x86_sse_split_regs == true)
15702 ix86_expand_vector_move_misalign (enum machine_mode mode
, rtx operands
[])
15711 switch (GET_MODE_CLASS (mode
))
15713 case MODE_VECTOR_INT
:
15715 switch (GET_MODE_SIZE (mode
))
15718 /* If we're optimizing for size, movups is the smallest. */
15719 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
)
15721 op0
= gen_lowpart (V4SFmode
, op0
);
15722 op1
= gen_lowpart (V4SFmode
, op1
);
15723 emit_insn (gen_sse_movups (op0
, op1
));
15726 op0
= gen_lowpart (V16QImode
, op0
);
15727 op1
= gen_lowpart (V16QImode
, op1
);
15728 emit_insn (gen_sse2_movdqu (op0
, op1
));
15731 op0
= gen_lowpart (V32QImode
, op0
);
15732 op1
= gen_lowpart (V32QImode
, op1
);
15733 ix86_avx256_split_vector_move_misalign (op0
, op1
);
15736 gcc_unreachable ();
15739 case MODE_VECTOR_FLOAT
:
15740 op0
= gen_lowpart (mode
, op0
);
15741 op1
= gen_lowpart (mode
, op1
);
15746 emit_insn (gen_sse_movups (op0
, op1
));
15749 ix86_avx256_split_vector_move_misalign (op0
, op1
);
15752 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
)
15754 op0
= gen_lowpart (V4SFmode
, op0
);
15755 op1
= gen_lowpart (V4SFmode
, op1
);
15756 emit_insn (gen_sse_movups (op0
, op1
));
15759 emit_insn (gen_sse2_movupd (op0
, op1
));
15762 ix86_avx256_split_vector_move_misalign (op0
, op1
);
15765 gcc_unreachable ();
15770 gcc_unreachable ();
15778 /* If we're optimizing for size, movups is the smallest. */
15779 if (optimize_insn_for_size_p ()
15780 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
)
15782 op0
= gen_lowpart (V4SFmode
, op0
);
15783 op1
= gen_lowpart (V4SFmode
, op1
);
15784 emit_insn (gen_sse_movups (op0
, op1
));
15788 /* ??? If we have typed data, then it would appear that using
15789 movdqu is the only way to get unaligned data loaded with
15791 if (TARGET_SSE2
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
15793 op0
= gen_lowpart (V16QImode
, op0
);
15794 op1
= gen_lowpart (V16QImode
, op1
);
15795 emit_insn (gen_sse2_movdqu (op0
, op1
));
15799 if (TARGET_SSE2
&& mode
== V2DFmode
)
15803 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
)
15805 op0
= gen_lowpart (V2DFmode
, op0
);
15806 op1
= gen_lowpart (V2DFmode
, op1
);
15807 emit_insn (gen_sse2_movupd (op0
, op1
));
15811 /* When SSE registers are split into halves, we can avoid
15812 writing to the top half twice. */
15813 if (TARGET_SSE_SPLIT_REGS
)
15815 emit_clobber (op0
);
15820 /* ??? Not sure about the best option for the Intel chips.
15821 The following would seem to satisfy; the register is
15822 entirely cleared, breaking the dependency chain. We
15823 then store to the upper half, with a dependency depth
15824 of one. A rumor has it that Intel recommends two movsd
15825 followed by an unpacklpd, but this is unconfirmed. And
15826 given that the dependency depth of the unpacklpd would
15827 still be one, I'm not sure why this would be better. */
15828 zero
= CONST0_RTX (V2DFmode
);
15831 m
= adjust_address (op1
, DFmode
, 0);
15832 emit_insn (gen_sse2_loadlpd (op0
, zero
, m
));
15833 m
= adjust_address (op1
, DFmode
, 8);
15834 emit_insn (gen_sse2_loadhpd (op0
, op0
, m
));
15838 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
)
15840 op0
= gen_lowpart (V4SFmode
, op0
);
15841 op1
= gen_lowpart (V4SFmode
, op1
);
15842 emit_insn (gen_sse_movups (op0
, op1
));
15846 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY
)
15847 emit_move_insn (op0
, CONST0_RTX (mode
));
15849 emit_clobber (op0
);
15851 if (mode
!= V4SFmode
)
15852 op0
= gen_lowpart (V4SFmode
, op0
);
15853 m
= adjust_address (op1
, V2SFmode
, 0);
15854 emit_insn (gen_sse_loadlps (op0
, op0
, m
));
15855 m
= adjust_address (op1
, V2SFmode
, 8);
15856 emit_insn (gen_sse_loadhps (op0
, op0
, m
));
15859 else if (MEM_P (op0
))
15861 /* If we're optimizing for size, movups is the smallest. */
15862 if (optimize_insn_for_size_p ()
15863 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
)
15865 op0
= gen_lowpart (V4SFmode
, op0
);
15866 op1
= gen_lowpart (V4SFmode
, op1
);
15867 emit_insn (gen_sse_movups (op0
, op1
));
15871 /* ??? Similar to above, only less clear because of quote
15872 typeless stores unquote. */
15873 if (TARGET_SSE2
&& !TARGET_SSE_TYPELESS_STORES
15874 && GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
15876 op0
= gen_lowpart (V16QImode
, op0
);
15877 op1
= gen_lowpart (V16QImode
, op1
);
15878 emit_insn (gen_sse2_movdqu (op0
, op1
));
15882 if (TARGET_SSE2
&& mode
== V2DFmode
)
15884 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL
)
15886 op0
= gen_lowpart (V2DFmode
, op0
);
15887 op1
= gen_lowpart (V2DFmode
, op1
);
15888 emit_insn (gen_sse2_movupd (op0
, op1
));
15892 m
= adjust_address (op0
, DFmode
, 0);
15893 emit_insn (gen_sse2_storelpd (m
, op1
));
15894 m
= adjust_address (op0
, DFmode
, 8);
15895 emit_insn (gen_sse2_storehpd (m
, op1
));
15900 if (mode
!= V4SFmode
)
15901 op1
= gen_lowpart (V4SFmode
, op1
);
15903 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL
)
15905 op0
= gen_lowpart (V4SFmode
, op0
);
15906 emit_insn (gen_sse_movups (op0
, op1
));
15910 m
= adjust_address (op0
, V2SFmode
, 0);
15911 emit_insn (gen_sse_storelps (m
, op1
));
15912 m
= adjust_address (op0
, V2SFmode
, 8);
15913 emit_insn (gen_sse_storehps (m
, op1
));
15918 gcc_unreachable ();
15921 /* Expand a push in MODE. This is some mode for which we do not support
15922 proper push instructions, at least from the registers that we expect
15923 the value to live in. */
15926 ix86_expand_push (enum machine_mode mode
, rtx x
)
15930 tmp
= expand_simple_binop (Pmode
, PLUS
, stack_pointer_rtx
,
15931 GEN_INT (-GET_MODE_SIZE (mode
)),
15932 stack_pointer_rtx
, 1, OPTAB_DIRECT
);
15933 if (tmp
!= stack_pointer_rtx
)
15934 emit_move_insn (stack_pointer_rtx
, tmp
);
15936 tmp
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
15938 /* When we push an operand onto stack, it has to be aligned at least
15939 at the function argument boundary. However since we don't have
15940 the argument type, we can't determine the actual argument
15942 emit_move_insn (tmp
, x
);
15945 /* Helper function of ix86_fixup_binary_operands to canonicalize
15946 operand order. Returns true if the operands should be swapped. */
15949 ix86_swap_binary_operands_p (enum rtx_code code
, enum machine_mode mode
,
15952 rtx dst
= operands
[0];
15953 rtx src1
= operands
[1];
15954 rtx src2
= operands
[2];
15956 /* If the operation is not commutative, we can't do anything. */
15957 if (GET_RTX_CLASS (code
) != RTX_COMM_ARITH
)
15960 /* Highest priority is that src1 should match dst. */
15961 if (rtx_equal_p (dst
, src1
))
15963 if (rtx_equal_p (dst
, src2
))
15966 /* Next highest priority is that immediate constants come second. */
15967 if (immediate_operand (src2
, mode
))
15969 if (immediate_operand (src1
, mode
))
15972 /* Lowest priority is that memory references should come second. */
15982 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
15983 destination to use for the operation. If different from the true
15984 destination in operands[0], a copy operation will be required. */
15987 ix86_fixup_binary_operands (enum rtx_code code
, enum machine_mode mode
,
15990 rtx dst
= operands
[0];
15991 rtx src1
= operands
[1];
15992 rtx src2
= operands
[2];
15994 /* Canonicalize operand order. */
15995 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
15999 /* It is invalid to swap operands of different modes. */
16000 gcc_assert (GET_MODE (src1
) == GET_MODE (src2
));
16007 /* Both source operands cannot be in memory. */
16008 if (MEM_P (src1
) && MEM_P (src2
))
16010 /* Optimization: Only read from memory once. */
16011 if (rtx_equal_p (src1
, src2
))
16013 src2
= force_reg (mode
, src2
);
16017 src2
= force_reg (mode
, src2
);
16020 /* If the destination is memory, and we do not have matching source
16021 operands, do things in registers. */
16022 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
16023 dst
= gen_reg_rtx (mode
);
16025 /* Source 1 cannot be a constant. */
16026 if (CONSTANT_P (src1
))
16027 src1
= force_reg (mode
, src1
);
16029 /* Source 1 cannot be a non-matching memory. */
16030 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
16031 src1
= force_reg (mode
, src1
);
16033 /* Improve address combine. */
16035 && GET_MODE_CLASS (mode
) == MODE_INT
16037 src2
= force_reg (mode
, src2
);
16039 operands
[1] = src1
;
16040 operands
[2] = src2
;
16044 /* Similarly, but assume that the destination has already been
16045 set up properly. */
16048 ix86_fixup_binary_operands_no_copy (enum rtx_code code
,
16049 enum machine_mode mode
, rtx operands
[])
16051 rtx dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
16052 gcc_assert (dst
== operands
[0]);
16055 /* Attempt to expand a binary operator. Make the expansion closer to the
16056 actual machine, then just general_operand, which will allow 3 separate
16057 memory references (one output, two input) in a single insn. */
16060 ix86_expand_binary_operator (enum rtx_code code
, enum machine_mode mode
,
16063 rtx src1
, src2
, dst
, op
, clob
;
16065 dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
16066 src1
= operands
[1];
16067 src2
= operands
[2];
16069 /* Emit the instruction. */
16071 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
16072 if (reload_in_progress
)
16074 /* Reload doesn't know about the flags register, and doesn't know that
16075 it doesn't want to clobber it. We can only do this with PLUS. */
16076 gcc_assert (code
== PLUS
);
16079 else if (reload_completed
16081 && !rtx_equal_p (dst
, src1
))
16083 /* This is going to be an LEA; avoid splitting it later. */
16088 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
16089 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
16092 /* Fix up the destination if needed. */
16093 if (dst
!= operands
[0])
16094 emit_move_insn (operands
[0], dst
);
16097 /* Return TRUE or FALSE depending on whether the binary operator meets the
16098 appropriate constraints. */
16101 ix86_binary_operator_ok (enum rtx_code code
, enum machine_mode mode
,
16104 rtx dst
= operands
[0];
16105 rtx src1
= operands
[1];
16106 rtx src2
= operands
[2];
16108 /* Both source operands cannot be in memory. */
16109 if (MEM_P (src1
) && MEM_P (src2
))
16112 /* Canonicalize operand order for commutative operators. */
16113 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
16120 /* If the destination is memory, we must have a matching source operand. */
16121 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
16124 /* Source 1 cannot be a constant. */
16125 if (CONSTANT_P (src1
))
16128 /* Source 1 cannot be a non-matching memory. */
16129 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
16130 /* Support "andhi/andsi/anddi" as a zero-extending move. */
16131 return (code
== AND
16134 || (TARGET_64BIT
&& mode
== DImode
))
16135 && satisfies_constraint_L (src2
));
16140 /* Attempt to expand a unary operator. Make the expansion closer to the
16141 actual machine, then just general_operand, which will allow 2 separate
16142 memory references (one output, one input) in a single insn. */
16145 ix86_expand_unary_operator (enum rtx_code code
, enum machine_mode mode
,
16148 int matching_memory
;
16149 rtx src
, dst
, op
, clob
;
16154 /* If the destination is memory, and we do not have matching source
16155 operands, do things in registers. */
16156 matching_memory
= 0;
16159 if (rtx_equal_p (dst
, src
))
16160 matching_memory
= 1;
16162 dst
= gen_reg_rtx (mode
);
16165 /* When source operand is memory, destination must match. */
16166 if (MEM_P (src
) && !matching_memory
)
16167 src
= force_reg (mode
, src
);
16169 /* Emit the instruction. */
16171 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_e (code
, mode
, src
));
16172 if (reload_in_progress
|| code
== NOT
)
16174 /* Reload doesn't know about the flags register, and doesn't know that
16175 it doesn't want to clobber it. */
16176 gcc_assert (code
== NOT
);
16181 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
16182 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
16185 /* Fix up the destination if needed. */
16186 if (dst
!= operands
[0])
16187 emit_move_insn (operands
[0], dst
);
16190 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
16191 divisor are within the range [0-255]. */
16194 ix86_split_idivmod (enum machine_mode mode
, rtx operands
[],
16197 rtx end_label
, qimode_label
;
16198 rtx insn
, div
, mod
;
16199 rtx scratch
, tmp0
, tmp1
, tmp2
;
16200 rtx (*gen_divmod4_1
) (rtx
, rtx
, rtx
, rtx
);
16201 rtx (*gen_zero_extend
) (rtx
, rtx
);
16202 rtx (*gen_test_ccno_1
) (rtx
, rtx
);
16207 gen_divmod4_1
= signed_p
? gen_divmodsi4_1
: gen_udivmodsi4_1
;
16208 gen_test_ccno_1
= gen_testsi_ccno_1
;
16209 gen_zero_extend
= gen_zero_extendqisi2
;
16212 gen_divmod4_1
= signed_p
? gen_divmoddi4_1
: gen_udivmoddi4_1
;
16213 gen_test_ccno_1
= gen_testdi_ccno_1
;
16214 gen_zero_extend
= gen_zero_extendqidi2
;
16217 gcc_unreachable ();
16220 end_label
= gen_label_rtx ();
16221 qimode_label
= gen_label_rtx ();
16223 scratch
= gen_reg_rtx (mode
);
16225 /* Use 8bit unsigned divimod if dividend and divisor are within
16226 the range [0-255]. */
16227 emit_move_insn (scratch
, operands
[2]);
16228 scratch
= expand_simple_binop (mode
, IOR
, scratch
, operands
[3],
16229 scratch
, 1, OPTAB_DIRECT
);
16230 emit_insn (gen_test_ccno_1 (scratch
, GEN_INT (-0x100)));
16231 tmp0
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
16232 tmp0
= gen_rtx_EQ (VOIDmode
, tmp0
, const0_rtx
);
16233 tmp0
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp0
,
16234 gen_rtx_LABEL_REF (VOIDmode
, qimode_label
),
16236 insn
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp0
));
16237 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
16238 JUMP_LABEL (insn
) = qimode_label
;
16240 /* Generate original signed/unsigned divimod. */
16241 div
= gen_divmod4_1 (operands
[0], operands
[1],
16242 operands
[2], operands
[3]);
16245 /* Branch to the end. */
16246 emit_jump_insn (gen_jump (end_label
));
16249 /* Generate 8bit unsigned divide. */
16250 emit_label (qimode_label
);
16251 /* Don't use operands[0] for result of 8bit divide since not all
16252 registers support QImode ZERO_EXTRACT. */
16253 tmp0
= simplify_gen_subreg (HImode
, scratch
, mode
, 0);
16254 tmp1
= simplify_gen_subreg (HImode
, operands
[2], mode
, 0);
16255 tmp2
= simplify_gen_subreg (QImode
, operands
[3], mode
, 0);
16256 emit_insn (gen_udivmodhiqi3 (tmp0
, tmp1
, tmp2
));
16260 div
= gen_rtx_DIV (SImode
, operands
[2], operands
[3]);
16261 mod
= gen_rtx_MOD (SImode
, operands
[2], operands
[3]);
16265 div
= gen_rtx_UDIV (SImode
, operands
[2], operands
[3]);
16266 mod
= gen_rtx_UMOD (SImode
, operands
[2], operands
[3]);
16269 /* Extract remainder from AH. */
16270 tmp1
= gen_rtx_ZERO_EXTRACT (mode
, tmp0
, GEN_INT (8), GEN_INT (8));
16271 if (REG_P (operands
[1]))
16272 insn
= emit_move_insn (operands
[1], tmp1
);
16275 /* Need a new scratch register since the old one has result
16277 scratch
= gen_reg_rtx (mode
);
16278 emit_move_insn (scratch
, tmp1
);
16279 insn
= emit_move_insn (operands
[1], scratch
);
16281 set_unique_reg_note (insn
, REG_EQUAL
, mod
);
16283 /* Zero extend quotient from AL. */
16284 tmp1
= gen_lowpart (QImode
, tmp0
);
16285 insn
= emit_insn (gen_zero_extend (operands
[0], tmp1
));
16286 set_unique_reg_note (insn
, REG_EQUAL
, div
);
16288 emit_label (end_label
);
16291 #define LEA_MAX_STALL (3)
16292 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
16294 /* Increase given DISTANCE in half-cycles according to
16295 dependencies between PREV and NEXT instructions.
16296 Add 1 half-cycle if there is no dependency and
16297 go to next cycle if there is some dependecy. */
16299 static unsigned int
16300 increase_distance (rtx prev
, rtx next
, unsigned int distance
)
16305 if (!prev
|| !next
)
16306 return distance
+ (distance
& 1) + 2;
16308 if (!DF_INSN_USES (next
) || !DF_INSN_DEFS (prev
))
16309 return distance
+ 1;
16311 for (use_rec
= DF_INSN_USES (next
); *use_rec
; use_rec
++)
16312 for (def_rec
= DF_INSN_DEFS (prev
); *def_rec
; def_rec
++)
16313 if (!DF_REF_IS_ARTIFICIAL (*def_rec
)
16314 && DF_REF_REGNO (*use_rec
) == DF_REF_REGNO (*def_rec
))
16315 return distance
+ (distance
& 1) + 2;
16317 return distance
+ 1;
16320 /* Function checks if instruction INSN defines register number
16321 REGNO1 or REGNO2. */
16324 insn_defines_reg (unsigned int regno1
, unsigned int regno2
,
16329 for (def_rec
= DF_INSN_DEFS (insn
); *def_rec
; def_rec
++)
16330 if (DF_REF_REG_DEF_P (*def_rec
)
16331 && !DF_REF_IS_ARTIFICIAL (*def_rec
)
16332 && (regno1
== DF_REF_REGNO (*def_rec
)
16333 || regno2
== DF_REF_REGNO (*def_rec
)))
16341 /* Function checks if instruction INSN uses register number
16342 REGNO as a part of address expression. */
16345 insn_uses_reg_mem (unsigned int regno
, rtx insn
)
16349 for (use_rec
= DF_INSN_USES (insn
); *use_rec
; use_rec
++)
16350 if (DF_REF_REG_MEM_P (*use_rec
) && regno
== DF_REF_REGNO (*use_rec
))
16356 /* Search backward for non-agu definition of register number REGNO1
16357 or register number REGNO2 in basic block starting from instruction
16358 START up to head of basic block or instruction INSN.
16360 Function puts true value into *FOUND var if definition was found
16361 and false otherwise.
16363 Distance in half-cycles between START and found instruction or head
16364 of BB is added to DISTANCE and returned. */
16367 distance_non_agu_define_in_bb (unsigned int regno1
, unsigned int regno2
,
16368 rtx insn
, int distance
,
16369 rtx start
, bool *found
)
16371 basic_block bb
= start
? BLOCK_FOR_INSN (start
) : NULL
;
16379 && distance
< LEA_SEARCH_THRESHOLD
)
16381 if (NONDEBUG_INSN_P (prev
) && NONJUMP_INSN_P (prev
))
16383 distance
= increase_distance (prev
, next
, distance
);
16384 if (insn_defines_reg (regno1
, regno2
, prev
))
16386 if (recog_memoized (prev
) < 0
16387 || get_attr_type (prev
) != TYPE_LEA
)
16396 if (prev
== BB_HEAD (bb
))
16399 prev
= PREV_INSN (prev
);
16405 /* Search backward for non-agu definition of register number REGNO1
16406 or register number REGNO2 in INSN's basic block until
16407 1. Pass LEA_SEARCH_THRESHOLD instructions, or
16408 2. Reach neighbour BBs boundary, or
16409 3. Reach agu definition.
16410 Returns the distance between the non-agu definition point and INSN.
16411 If no definition point, returns -1. */
16414 distance_non_agu_define (unsigned int regno1
, unsigned int regno2
,
16417 basic_block bb
= BLOCK_FOR_INSN (insn
);
16419 bool found
= false;
16421 if (insn
!= BB_HEAD (bb
))
16422 distance
= distance_non_agu_define_in_bb (regno1
, regno2
, insn
,
16423 distance
, PREV_INSN (insn
),
16426 if (!found
&& distance
< LEA_SEARCH_THRESHOLD
)
16430 bool simple_loop
= false;
16432 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
16435 simple_loop
= true;
16440 distance
= distance_non_agu_define_in_bb (regno1
, regno2
,
16442 BB_END (bb
), &found
);
16445 int shortest_dist
= -1;
16446 bool found_in_bb
= false;
16448 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
16451 = distance_non_agu_define_in_bb (regno1
, regno2
,
16457 if (shortest_dist
< 0)
16458 shortest_dist
= bb_dist
;
16459 else if (bb_dist
> 0)
16460 shortest_dist
= MIN (bb_dist
, shortest_dist
);
16466 distance
= shortest_dist
;
16470 /* get_attr_type may modify recog data. We want to make sure
16471 that recog data is valid for instruction INSN, on which
16472 distance_non_agu_define is called. INSN is unchanged here. */
16473 extract_insn_cached (insn
);
16478 return distance
>> 1;
16481 /* Return the distance in half-cycles between INSN and the next
16482 insn that uses register number REGNO in memory address added
16483 to DISTANCE. Return -1 if REGNO0 is set.
16485 Put true value into *FOUND if register usage was found and
16487 Put true value into *REDEFINED if register redefinition was
16488 found and false otherwise. */
16491 distance_agu_use_in_bb (unsigned int regno
,
16492 rtx insn
, int distance
, rtx start
,
16493 bool *found
, bool *redefined
)
16495 basic_block bb
= start
? BLOCK_FOR_INSN (start
) : NULL
;
16500 *redefined
= false;
16504 && distance
< LEA_SEARCH_THRESHOLD
)
16506 if (NONDEBUG_INSN_P (next
) && NONJUMP_INSN_P (next
))
16508 distance
= increase_distance(prev
, next
, distance
);
16509 if (insn_uses_reg_mem (regno
, next
))
16511 /* Return DISTANCE if OP0 is used in memory
16512 address in NEXT. */
16517 if (insn_defines_reg (regno
, INVALID_REGNUM
, next
))
16519 /* Return -1 if OP0 is set in NEXT. */
16527 if (next
== BB_END (bb
))
16530 next
= NEXT_INSN (next
);
16536 /* Return the distance between INSN and the next insn that uses
16537 register number REGNO0 in memory address. Return -1 if no such
16538 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
16541 distance_agu_use (unsigned int regno0
, rtx insn
)
16543 basic_block bb
= BLOCK_FOR_INSN (insn
);
16545 bool found
= false;
16546 bool redefined
= false;
16548 if (insn
!= BB_END (bb
))
16549 distance
= distance_agu_use_in_bb (regno0
, insn
, distance
,
16551 &found
, &redefined
);
16553 if (!found
&& !redefined
&& distance
< LEA_SEARCH_THRESHOLD
)
16557 bool simple_loop
= false;
16559 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
16562 simple_loop
= true;
16567 distance
= distance_agu_use_in_bb (regno0
, insn
,
16568 distance
, BB_HEAD (bb
),
16569 &found
, &redefined
);
16572 int shortest_dist
= -1;
16573 bool found_in_bb
= false;
16574 bool redefined_in_bb
= false;
16576 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
16579 = distance_agu_use_in_bb (regno0
, insn
,
16580 distance
, BB_HEAD (e
->dest
),
16581 &found_in_bb
, &redefined_in_bb
);
16584 if (shortest_dist
< 0)
16585 shortest_dist
= bb_dist
;
16586 else if (bb_dist
> 0)
16587 shortest_dist
= MIN (bb_dist
, shortest_dist
);
16593 distance
= shortest_dist
;
16597 if (!found
|| redefined
)
16600 return distance
>> 1;
16603 /* Define this macro to tune LEA priority vs ADD, it take effect when
16604 there is a dilemma of choicing LEA or ADD
16605 Negative value: ADD is more preferred than LEA
16607 Positive value: LEA is more preferred than ADD*/
16608 #define IX86_LEA_PRIORITY 0
16610 /* Return true if usage of lea INSN has performance advantage
16611 over a sequence of instructions. Instructions sequence has
16612 SPLIT_COST cycles higher latency than lea latency. */
16615 ix86_lea_outperforms (rtx insn
, unsigned int regno0
, unsigned int regno1
,
16616 unsigned int regno2
, unsigned int split_cost
)
16618 int dist_define
, dist_use
;
16620 dist_define
= distance_non_agu_define (regno1
, regno2
, insn
);
16621 dist_use
= distance_agu_use (regno0
, insn
);
16623 if (dist_define
< 0 || dist_define
>= LEA_MAX_STALL
)
16625 /* If there is no non AGU operand definition, no AGU
16626 operand usage and split cost is 0 then both lea
16627 and non lea variants have same priority. Currently
16628 we prefer lea for 64 bit code and non lea on 32 bit
16630 if (dist_use
< 0 && split_cost
== 0)
16631 return TARGET_64BIT
|| IX86_LEA_PRIORITY
;
16636 /* With longer definitions distance lea is more preferable.
16637 Here we change it to take into account splitting cost and
16639 dist_define
+= split_cost
+ IX86_LEA_PRIORITY
;
16641 /* If there is no use in memory addess then we just check
16642 that split cost does not exceed AGU stall. */
16644 return dist_define
>= LEA_MAX_STALL
;
16646 /* If this insn has both backward non-agu dependence and forward
16647 agu dependence, the one with short distance takes effect. */
16648 return dist_define
>= dist_use
;
16651 /* Return true if it is legal to clobber flags by INSN and
16652 false otherwise. */
16655 ix86_ok_to_clobber_flags (rtx insn
)
16657 basic_block bb
= BLOCK_FOR_INSN (insn
);
16663 if (NONDEBUG_INSN_P (insn
))
16665 for (use
= DF_INSN_USES (insn
); *use
; use
++)
16666 if (DF_REF_REG_USE_P (*use
) && DF_REF_REGNO (*use
) == FLAGS_REG
)
16669 if (insn_defines_reg (FLAGS_REG
, INVALID_REGNUM
, insn
))
16673 if (insn
== BB_END (bb
))
16676 insn
= NEXT_INSN (insn
);
16679 live
= df_get_live_out(bb
);
16680 return !REGNO_REG_SET_P (live
, FLAGS_REG
);
16683 /* Return true if we need to split op0 = op1 + op2 into a sequence of
16684 move and add to avoid AGU stalls. */
16687 ix86_avoid_lea_for_add (rtx insn
, rtx operands
[])
16689 unsigned int regno0
= true_regnum (operands
[0]);
16690 unsigned int regno1
= true_regnum (operands
[1]);
16691 unsigned int regno2
= true_regnum (operands
[2]);
16693 /* Check if we need to optimize. */
16694 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
16697 /* Check it is correct to split here. */
16698 if (!ix86_ok_to_clobber_flags(insn
))
16701 /* We need to split only adds with non destructive
16702 destination operand. */
16703 if (regno0
== regno1
|| regno0
== regno2
)
16706 return !ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, 1);
16709 /* Return true if we should emit lea instruction instead of mov
16713 ix86_use_lea_for_mov (rtx insn
, rtx operands
[])
16715 unsigned int regno0
;
16716 unsigned int regno1
;
16718 /* Check if we need to optimize. */
16719 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
16722 /* Use lea for reg to reg moves only. */
16723 if (!REG_P (operands
[0]) || !REG_P (operands
[1]))
16726 regno0
= true_regnum (operands
[0]);
16727 regno1
= true_regnum (operands
[1]);
16729 return ix86_lea_outperforms (insn
, regno0
, regno1
, -1, 0);
16732 /* Return true if we need to split lea into a sequence of
16733 instructions to avoid AGU stalls. */
16736 ix86_avoid_lea_for_addr (rtx insn
, rtx operands
[])
16738 unsigned int regno0
= true_regnum (operands
[0]) ;
16739 unsigned int regno1
= -1;
16740 unsigned int regno2
= -1;
16741 unsigned int split_cost
= 0;
16742 struct ix86_address parts
;
16745 /* Check we need to optimize. */
16746 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
16749 /* Check it is correct to split here. */
16750 if (!ix86_ok_to_clobber_flags(insn
))
16753 ok
= ix86_decompose_address (operands
[1], &parts
);
16756 /* We should not split into add if non legitimate pic
16757 operand is used as displacement. */
16758 if (parts
.disp
&& flag_pic
&& !LEGITIMATE_PIC_OPERAND_P (parts
.disp
))
16762 regno1
= true_regnum (parts
.base
);
16764 regno2
= true_regnum (parts
.index
);
16766 /* Compute how many cycles we will add to execution time
16767 if split lea into a sequence of instructions. */
16768 if (parts
.base
|| parts
.index
)
16770 /* Have to use mov instruction if non desctructive
16771 destination form is used. */
16772 if (regno1
!= regno0
&& regno2
!= regno0
)
16775 /* Have to add index to base if both exist. */
16776 if (parts
.base
&& parts
.index
)
16779 /* Have to use shift and adds if scale is 2 or greater. */
16780 if (parts
.scale
> 1)
16782 if (regno0
!= regno1
)
16784 else if (regno2
== regno0
)
16787 split_cost
+= parts
.scale
;
16790 /* Have to use add instruction with immediate if
16791 disp is non zero. */
16792 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
16795 /* Subtract the price of lea. */
16799 return !ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, split_cost
);
16802 /* Emit x86 binary operand CODE in mode MODE, where the first operand
16803 matches destination. RTX includes clobber of FLAGS_REG. */
16806 ix86_emit_binop (enum rtx_code code
, enum machine_mode mode
,
16811 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, dst
, src
));
16812 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
16814 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
16817 /* Split lea instructions into a sequence of instructions
16818 which are executed on ALU to avoid AGU stalls.
16819 It is assumed that it is allowed to clobber flags register
16820 at lea position. */
16823 ix86_split_lea_for_addr (rtx operands
[], enum machine_mode mode
)
16825 unsigned int regno0
= true_regnum (operands
[0]) ;
16826 unsigned int regno1
= INVALID_REGNUM
;
16827 unsigned int regno2
= INVALID_REGNUM
;
16828 struct ix86_address parts
;
16832 ok
= ix86_decompose_address (operands
[1], &parts
);
16837 if (GET_MODE (parts
.base
) != mode
)
16838 parts
.base
= gen_rtx_SUBREG (mode
, parts
.base
, 0);
16839 regno1
= true_regnum (parts
.base
);
16844 if (GET_MODE (parts
.index
) != mode
)
16845 parts
.index
= gen_rtx_SUBREG (mode
, parts
.index
, 0);
16846 regno2
= true_regnum (parts
.index
);
16849 if (parts
.scale
> 1)
16851 /* Case r1 = r1 + ... */
16852 if (regno1
== regno0
)
16854 /* If we have a case r1 = r1 + C * r1 then we
16855 should use multiplication which is very
16856 expensive. Assume cost model is wrong if we
16857 have such case here. */
16858 gcc_assert (regno2
!= regno0
);
16860 for (adds
= parts
.scale
; adds
> 0; adds
--)
16861 ix86_emit_binop (PLUS
, mode
, operands
[0], parts
.index
);
16865 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
16866 if (regno0
!= regno2
)
16867 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0], parts
.index
));
16869 /* Use shift for scaling. */
16870 ix86_emit_binop (ASHIFT
, mode
, operands
[0],
16871 GEN_INT (exact_log2 (parts
.scale
)));
16874 ix86_emit_binop (PLUS
, mode
, operands
[0], parts
.base
);
16876 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
16877 ix86_emit_binop (PLUS
, mode
, operands
[0], parts
.disp
);
16880 else if (!parts
.base
&& !parts
.index
)
16882 gcc_assert(parts
.disp
);
16883 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0], parts
.disp
));
16889 if (regno0
!= regno2
)
16890 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0], parts
.index
));
16892 else if (!parts
.index
)
16894 if (regno0
!= regno1
)
16895 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0], parts
.base
));
16899 if (regno0
== regno1
)
16901 else if (regno0
== regno2
)
16905 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0], parts
.base
));
16909 ix86_emit_binop (PLUS
, mode
, operands
[0], tmp
);
16912 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
16913 ix86_emit_binop (PLUS
, mode
, operands
[0], parts
.disp
);
16917 /* Return true if it is ok to optimize an ADD operation to LEA
16918 operation to avoid flag register consumation. For most processors,
16919 ADD is faster than LEA. For the processors like ATOM, if the
16920 destination register of LEA holds an actual address which will be
16921 used soon, LEA is better and otherwise ADD is better. */
16924 ix86_lea_for_add_ok (rtx insn
, rtx operands
[])
16926 unsigned int regno0
= true_regnum (operands
[0]);
16927 unsigned int regno1
= true_regnum (operands
[1]);
16928 unsigned int regno2
= true_regnum (operands
[2]);
16930 /* If a = b + c, (a!=b && a!=c), must use lea form. */
16931 if (regno0
!= regno1
&& regno0
!= regno2
)
16934 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
16937 return ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, 0);
16940 /* Return true if destination reg of SET_BODY is shift count of
16944 ix86_dep_by_shift_count_body (const_rtx set_body
, const_rtx use_body
)
16950 /* Retrieve destination of SET_BODY. */
16951 switch (GET_CODE (set_body
))
16954 set_dest
= SET_DEST (set_body
);
16955 if (!set_dest
|| !REG_P (set_dest
))
16959 for (i
= XVECLEN (set_body
, 0) - 1; i
>= 0; i
--)
16960 if (ix86_dep_by_shift_count_body (XVECEXP (set_body
, 0, i
),
16968 /* Retrieve shift count of USE_BODY. */
16969 switch (GET_CODE (use_body
))
16972 shift_rtx
= XEXP (use_body
, 1);
16975 for (i
= XVECLEN (use_body
, 0) - 1; i
>= 0; i
--)
16976 if (ix86_dep_by_shift_count_body (set_body
,
16977 XVECEXP (use_body
, 0, i
)))
16985 && (GET_CODE (shift_rtx
) == ASHIFT
16986 || GET_CODE (shift_rtx
) == LSHIFTRT
16987 || GET_CODE (shift_rtx
) == ASHIFTRT
16988 || GET_CODE (shift_rtx
) == ROTATE
16989 || GET_CODE (shift_rtx
) == ROTATERT
))
16991 rtx shift_count
= XEXP (shift_rtx
, 1);
16993 /* Return true if shift count is dest of SET_BODY. */
16994 if (REG_P (shift_count
)
16995 && true_regnum (set_dest
) == true_regnum (shift_count
))
17002 /* Return true if destination reg of SET_INSN is shift count of
17006 ix86_dep_by_shift_count (const_rtx set_insn
, const_rtx use_insn
)
17008 return ix86_dep_by_shift_count_body (PATTERN (set_insn
),
17009 PATTERN (use_insn
));
17012 /* Return TRUE or FALSE depending on whether the unary operator meets the
17013 appropriate constraints. */
17016 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED
,
17017 enum machine_mode mode ATTRIBUTE_UNUSED
,
17018 rtx operands
[2] ATTRIBUTE_UNUSED
)
17020 /* If one of operands is memory, source and destination must match. */
17021 if ((MEM_P (operands
[0])
17022 || MEM_P (operands
[1]))
17023 && ! rtx_equal_p (operands
[0], operands
[1]))
17028 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
17029 are ok, keeping in mind the possible movddup alternative. */
17032 ix86_vec_interleave_v2df_operator_ok (rtx operands
[3], bool high
)
17034 if (MEM_P (operands
[0]))
17035 return rtx_equal_p (operands
[0], operands
[1 + high
]);
17036 if (MEM_P (operands
[1]) && MEM_P (operands
[2]))
17037 return TARGET_SSE3
&& rtx_equal_p (operands
[1], operands
[2]);
17041 /* Post-reload splitter for converting an SF or DFmode value in an
17042 SSE register into an unsigned SImode. */
17045 ix86_split_convert_uns_si_sse (rtx operands
[])
17047 enum machine_mode vecmode
;
17048 rtx value
, large
, zero_or_two31
, input
, two31
, x
;
17050 large
= operands
[1];
17051 zero_or_two31
= operands
[2];
17052 input
= operands
[3];
17053 two31
= operands
[4];
17054 vecmode
= GET_MODE (large
);
17055 value
= gen_rtx_REG (vecmode
, REGNO (operands
[0]));
17057 /* Load up the value into the low element. We must ensure that the other
17058 elements are valid floats -- zero is the easiest such value. */
17061 if (vecmode
== V4SFmode
)
17062 emit_insn (gen_vec_setv4sf_0 (value
, CONST0_RTX (V4SFmode
), input
));
17064 emit_insn (gen_sse2_loadlpd (value
, CONST0_RTX (V2DFmode
), input
));
17068 input
= gen_rtx_REG (vecmode
, REGNO (input
));
17069 emit_move_insn (value
, CONST0_RTX (vecmode
));
17070 if (vecmode
== V4SFmode
)
17071 emit_insn (gen_sse_movss (value
, value
, input
));
17073 emit_insn (gen_sse2_movsd (value
, value
, input
));
17076 emit_move_insn (large
, two31
);
17077 emit_move_insn (zero_or_two31
, MEM_P (two31
) ? large
: two31
);
17079 x
= gen_rtx_fmt_ee (LE
, vecmode
, large
, value
);
17080 emit_insn (gen_rtx_SET (VOIDmode
, large
, x
));
17082 x
= gen_rtx_AND (vecmode
, zero_or_two31
, large
);
17083 emit_insn (gen_rtx_SET (VOIDmode
, zero_or_two31
, x
));
17085 x
= gen_rtx_MINUS (vecmode
, value
, zero_or_two31
);
17086 emit_insn (gen_rtx_SET (VOIDmode
, value
, x
));
17088 large
= gen_rtx_REG (V4SImode
, REGNO (large
));
17089 emit_insn (gen_ashlv4si3 (large
, large
, GEN_INT (31)));
17091 x
= gen_rtx_REG (V4SImode
, REGNO (value
));
17092 if (vecmode
== V4SFmode
)
17093 emit_insn (gen_fix_truncv4sfv4si2 (x
, value
));
17095 emit_insn (gen_sse2_cvttpd2dq (x
, value
));
17098 emit_insn (gen_xorv4si3 (value
, value
, large
));
17101 /* Convert an unsigned DImode value into a DFmode, using only SSE.
17102 Expects the 64-bit DImode to be supplied in a pair of integral
17103 registers. Requires SSE2; will use SSE3 if available. For x86_32,
17104 -mfpmath=sse, !optimize_size only. */
17107 ix86_expand_convert_uns_didf_sse (rtx target
, rtx input
)
17109 REAL_VALUE_TYPE bias_lo_rvt
, bias_hi_rvt
;
17110 rtx int_xmm
, fp_xmm
;
17111 rtx biases
, exponents
;
17114 int_xmm
= gen_reg_rtx (V4SImode
);
17115 if (TARGET_INTER_UNIT_MOVES
)
17116 emit_insn (gen_movdi_to_sse (int_xmm
, input
));
17117 else if (TARGET_SSE_SPLIT_REGS
)
17119 emit_clobber (int_xmm
);
17120 emit_move_insn (gen_lowpart (DImode
, int_xmm
), input
);
17124 x
= gen_reg_rtx (V2DImode
);
17125 ix86_expand_vector_init_one_nonzero (false, V2DImode
, x
, input
, 0);
17126 emit_move_insn (int_xmm
, gen_lowpart (V4SImode
, x
));
17129 x
= gen_rtx_CONST_VECTOR (V4SImode
,
17130 gen_rtvec (4, GEN_INT (0x43300000UL
),
17131 GEN_INT (0x45300000UL
),
17132 const0_rtx
, const0_rtx
));
17133 exponents
= validize_mem (force_const_mem (V4SImode
, x
));
17135 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
17136 emit_insn (gen_vec_interleave_lowv4si (int_xmm
, int_xmm
, exponents
));
17138 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
17139 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
17140 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
17141 (0x1.0p84 + double(fp_value_hi_xmm)).
17142 Note these exponents differ by 32. */
17144 fp_xmm
= copy_to_mode_reg (V2DFmode
, gen_lowpart (V2DFmode
, int_xmm
));
17146 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
17147 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
17148 real_ldexp (&bias_lo_rvt
, &dconst1
, 52);
17149 real_ldexp (&bias_hi_rvt
, &dconst1
, 84);
17150 biases
= const_double_from_real_value (bias_lo_rvt
, DFmode
);
17151 x
= const_double_from_real_value (bias_hi_rvt
, DFmode
);
17152 biases
= gen_rtx_CONST_VECTOR (V2DFmode
, gen_rtvec (2, biases
, x
));
17153 biases
= validize_mem (force_const_mem (V2DFmode
, biases
));
17154 emit_insn (gen_subv2df3 (fp_xmm
, fp_xmm
, biases
));
17156 /* Add the upper and lower DFmode values together. */
17158 emit_insn (gen_sse3_haddv2df3 (fp_xmm
, fp_xmm
, fp_xmm
));
17161 x
= copy_to_mode_reg (V2DFmode
, fp_xmm
);
17162 emit_insn (gen_vec_interleave_highv2df (fp_xmm
, fp_xmm
, fp_xmm
));
17163 emit_insn (gen_addv2df3 (fp_xmm
, fp_xmm
, x
));
17166 ix86_expand_vector_extract (false, target
, fp_xmm
, 0);
17169 /* Not used, but eases macroization of patterns. */
17171 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED
,
17172 rtx input ATTRIBUTE_UNUSED
)
17174 gcc_unreachable ();
17177 /* Convert an unsigned SImode value into a DFmode. Only currently used
17178 for SSE, but applicable anywhere. */
17181 ix86_expand_convert_uns_sidf_sse (rtx target
, rtx input
)
17183 REAL_VALUE_TYPE TWO31r
;
17186 x
= expand_simple_binop (SImode
, PLUS
, input
, GEN_INT (-2147483647 - 1),
17187 NULL
, 1, OPTAB_DIRECT
);
17189 fp
= gen_reg_rtx (DFmode
);
17190 emit_insn (gen_floatsidf2 (fp
, x
));
17192 real_ldexp (&TWO31r
, &dconst1
, 31);
17193 x
= const_double_from_real_value (TWO31r
, DFmode
);
17195 x
= expand_simple_binop (DFmode
, PLUS
, fp
, x
, target
, 0, OPTAB_DIRECT
);
17197 emit_move_insn (target
, x
);
17200 /* Convert a signed DImode value into a DFmode. Only used for SSE in
17201 32-bit mode; otherwise we have a direct convert instruction. */
17204 ix86_expand_convert_sign_didf_sse (rtx target
, rtx input
)
17206 REAL_VALUE_TYPE TWO32r
;
17207 rtx fp_lo
, fp_hi
, x
;
17209 fp_lo
= gen_reg_rtx (DFmode
);
17210 fp_hi
= gen_reg_rtx (DFmode
);
17212 emit_insn (gen_floatsidf2 (fp_hi
, gen_highpart (SImode
, input
)));
17214 real_ldexp (&TWO32r
, &dconst1
, 32);
17215 x
= const_double_from_real_value (TWO32r
, DFmode
);
17216 fp_hi
= expand_simple_binop (DFmode
, MULT
, fp_hi
, x
, fp_hi
, 0, OPTAB_DIRECT
);
17218 ix86_expand_convert_uns_sidf_sse (fp_lo
, gen_lowpart (SImode
, input
));
17220 x
= expand_simple_binop (DFmode
, PLUS
, fp_hi
, fp_lo
, target
,
17223 emit_move_insn (target
, x
);
17226 /* Convert an unsigned SImode value into a SFmode, using only SSE.
17227 For x86_32, -mfpmath=sse, !optimize_size only. */
17229 ix86_expand_convert_uns_sisf_sse (rtx target
, rtx input
)
17231 REAL_VALUE_TYPE ONE16r
;
17232 rtx fp_hi
, fp_lo
, int_hi
, int_lo
, x
;
17234 real_ldexp (&ONE16r
, &dconst1
, 16);
17235 x
= const_double_from_real_value (ONE16r
, SFmode
);
17236 int_lo
= expand_simple_binop (SImode
, AND
, input
, GEN_INT(0xffff),
17237 NULL
, 0, OPTAB_DIRECT
);
17238 int_hi
= expand_simple_binop (SImode
, LSHIFTRT
, input
, GEN_INT(16),
17239 NULL
, 0, OPTAB_DIRECT
);
17240 fp_hi
= gen_reg_rtx (SFmode
);
17241 fp_lo
= gen_reg_rtx (SFmode
);
17242 emit_insn (gen_floatsisf2 (fp_hi
, int_hi
));
17243 emit_insn (gen_floatsisf2 (fp_lo
, int_lo
));
17244 fp_hi
= expand_simple_binop (SFmode
, MULT
, fp_hi
, x
, fp_hi
,
17246 fp_hi
= expand_simple_binop (SFmode
, PLUS
, fp_hi
, fp_lo
, target
,
17248 if (!rtx_equal_p (target
, fp_hi
))
17249 emit_move_insn (target
, fp_hi
);
17252 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
17253 a vector of unsigned ints VAL to vector of floats TARGET. */
17256 ix86_expand_vector_convert_uns_vsivsf (rtx target
, rtx val
)
17259 REAL_VALUE_TYPE TWO16r
;
17260 enum machine_mode intmode
= GET_MODE (val
);
17261 enum machine_mode fltmode
= GET_MODE (target
);
17262 rtx (*cvt
) (rtx
, rtx
);
17264 if (intmode
== V4SImode
)
17265 cvt
= gen_floatv4siv4sf2
;
17267 cvt
= gen_floatv8siv8sf2
;
17268 tmp
[0] = ix86_build_const_vector (intmode
, 1, GEN_INT (0xffff));
17269 tmp
[0] = force_reg (intmode
, tmp
[0]);
17270 tmp
[1] = expand_simple_binop (intmode
, AND
, val
, tmp
[0], NULL_RTX
, 1,
17272 tmp
[2] = expand_simple_binop (intmode
, LSHIFTRT
, val
, GEN_INT (16),
17273 NULL_RTX
, 1, OPTAB_DIRECT
);
17274 tmp
[3] = gen_reg_rtx (fltmode
);
17275 emit_insn (cvt (tmp
[3], tmp
[1]));
17276 tmp
[4] = gen_reg_rtx (fltmode
);
17277 emit_insn (cvt (tmp
[4], tmp
[2]));
17278 real_ldexp (&TWO16r
, &dconst1
, 16);
17279 tmp
[5] = const_double_from_real_value (TWO16r
, SFmode
);
17280 tmp
[5] = force_reg (fltmode
, ix86_build_const_vector (fltmode
, 1, tmp
[5]));
17281 tmp
[6] = expand_simple_binop (fltmode
, MULT
, tmp
[4], tmp
[5], NULL_RTX
, 1,
17283 tmp
[7] = expand_simple_binop (fltmode
, PLUS
, tmp
[3], tmp
[6], target
, 1,
17285 if (tmp
[7] != target
)
17286 emit_move_insn (target
, tmp
[7]);
17289 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
17290 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
17291 This is done by doing just signed conversion if < 0x1p31, and otherwise by
17292 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
17295 ix86_expand_adjust_ufix_to_sfix_si (rtx val
, rtx
*xorp
)
17297 REAL_VALUE_TYPE TWO31r
;
17298 rtx two31r
, tmp
[4];
17299 enum machine_mode mode
= GET_MODE (val
);
17300 enum machine_mode scalarmode
= GET_MODE_INNER (mode
);
17301 enum machine_mode intmode
= GET_MODE_SIZE (mode
) == 32 ? V8SImode
: V4SImode
;
17302 rtx (*cmp
) (rtx
, rtx
, rtx
, rtx
);
17305 for (i
= 0; i
< 3; i
++)
17306 tmp
[i
] = gen_reg_rtx (mode
);
17307 real_ldexp (&TWO31r
, &dconst1
, 31);
17308 two31r
= const_double_from_real_value (TWO31r
, scalarmode
);
17309 two31r
= ix86_build_const_vector (mode
, 1, two31r
);
17310 two31r
= force_reg (mode
, two31r
);
17313 case V8SFmode
: cmp
= gen_avx_maskcmpv8sf3
; break;
17314 case V4SFmode
: cmp
= gen_sse_maskcmpv4sf3
; break;
17315 case V4DFmode
: cmp
= gen_avx_maskcmpv4df3
; break;
17316 case V2DFmode
: cmp
= gen_sse2_maskcmpv2df3
; break;
17317 default: gcc_unreachable ();
17319 tmp
[3] = gen_rtx_LE (mode
, two31r
, val
);
17320 emit_insn (cmp (tmp
[0], two31r
, val
, tmp
[3]));
17321 tmp
[1] = expand_simple_binop (mode
, AND
, tmp
[0], two31r
, tmp
[1],
17323 if (intmode
== V4SImode
|| TARGET_AVX2
)
17324 *xorp
= expand_simple_binop (intmode
, ASHIFT
,
17325 gen_lowpart (intmode
, tmp
[0]),
17326 GEN_INT (31), NULL_RTX
, 0,
17330 rtx two31
= GEN_INT ((unsigned HOST_WIDE_INT
) 1 << 31);
17331 two31
= ix86_build_const_vector (intmode
, 1, two31
);
17332 *xorp
= expand_simple_binop (intmode
, AND
,
17333 gen_lowpart (intmode
, tmp
[0]),
17334 two31
, NULL_RTX
, 0,
17337 return expand_simple_binop (mode
, MINUS
, val
, tmp
[1], tmp
[2],
17341 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
17342 then replicate the value for all elements of the vector
17346 ix86_build_const_vector (enum machine_mode mode
, bool vect
, rtx value
)
17350 enum machine_mode scalar_mode
;
17367 n_elt
= GET_MODE_NUNITS (mode
);
17368 v
= rtvec_alloc (n_elt
);
17369 scalar_mode
= GET_MODE_INNER (mode
);
17371 RTVEC_ELT (v
, 0) = value
;
17373 for (i
= 1; i
< n_elt
; ++i
)
17374 RTVEC_ELT (v
, i
) = vect
? value
: CONST0_RTX (scalar_mode
);
17376 return gen_rtx_CONST_VECTOR (mode
, v
);
17379 gcc_unreachable ();
17383 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
17384 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
17385 for an SSE register. If VECT is true, then replicate the mask for
17386 all elements of the vector register. If INVERT is true, then create
17387 a mask excluding the sign bit. */
17390 ix86_build_signbit_mask (enum machine_mode mode
, bool vect
, bool invert
)
17392 enum machine_mode vec_mode
, imode
;
17393 HOST_WIDE_INT hi
, lo
;
17398 /* Find the sign bit, sign extended to 2*HWI. */
17406 mode
= GET_MODE_INNER (mode
);
17408 lo
= 0x80000000, hi
= lo
< 0;
17416 mode
= GET_MODE_INNER (mode
);
17418 if (HOST_BITS_PER_WIDE_INT
>= 64)
17419 lo
= (HOST_WIDE_INT
)1 << shift
, hi
= -1;
17421 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
17426 vec_mode
= VOIDmode
;
17427 if (HOST_BITS_PER_WIDE_INT
>= 64)
17430 lo
= 0, hi
= (HOST_WIDE_INT
)1 << shift
;
17437 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
17441 lo
= ~lo
, hi
= ~hi
;
17447 mask
= immed_double_const (lo
, hi
, imode
);
17449 vec
= gen_rtvec (2, v
, mask
);
17450 v
= gen_rtx_CONST_VECTOR (V2DImode
, vec
);
17451 v
= copy_to_mode_reg (mode
, gen_lowpart (mode
, v
));
17458 gcc_unreachable ();
17462 lo
= ~lo
, hi
= ~hi
;
17464 /* Force this value into the low part of a fp vector constant. */
17465 mask
= immed_double_const (lo
, hi
, imode
);
17466 mask
= gen_lowpart (mode
, mask
);
17468 if (vec_mode
== VOIDmode
)
17469 return force_reg (mode
, mask
);
17471 v
= ix86_build_const_vector (vec_mode
, vect
, mask
);
17472 return force_reg (vec_mode
, v
);
17475 /* Generate code for floating point ABS or NEG. */
17478 ix86_expand_fp_absneg_operator (enum rtx_code code
, enum machine_mode mode
,
17481 rtx mask
, set
, dst
, src
;
17482 bool use_sse
= false;
17483 bool vector_mode
= VECTOR_MODE_P (mode
);
17484 enum machine_mode vmode
= mode
;
17488 else if (mode
== TFmode
)
17490 else if (TARGET_SSE_MATH
)
17492 use_sse
= SSE_FLOAT_MODE_P (mode
);
17493 if (mode
== SFmode
)
17495 else if (mode
== DFmode
)
17499 /* NEG and ABS performed with SSE use bitwise mask operations.
17500 Create the appropriate mask now. */
17502 mask
= ix86_build_signbit_mask (vmode
, vector_mode
, code
== ABS
);
17509 set
= gen_rtx_fmt_e (code
, mode
, src
);
17510 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
17517 use
= gen_rtx_USE (VOIDmode
, mask
);
17519 par
= gen_rtvec (2, set
, use
);
17522 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
17523 par
= gen_rtvec (3, set
, use
, clob
);
17525 emit_insn (gen_rtx_PARALLEL (VOIDmode
, par
));
17531 /* Expand a copysign operation. Special case operand 0 being a constant. */
17534 ix86_expand_copysign (rtx operands
[])
17536 enum machine_mode mode
, vmode
;
17537 rtx dest
, op0
, op1
, mask
, nmask
;
17539 dest
= operands
[0];
17543 mode
= GET_MODE (dest
);
17545 if (mode
== SFmode
)
17547 else if (mode
== DFmode
)
17552 if (GET_CODE (op0
) == CONST_DOUBLE
)
17554 rtx (*copysign_insn
)(rtx
, rtx
, rtx
, rtx
);
17556 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0
)))
17557 op0
= simplify_unary_operation (ABS
, mode
, op0
, mode
);
17559 if (mode
== SFmode
|| mode
== DFmode
)
17561 if (op0
== CONST0_RTX (mode
))
17562 op0
= CONST0_RTX (vmode
);
17565 rtx v
= ix86_build_const_vector (vmode
, false, op0
);
17567 op0
= force_reg (vmode
, v
);
17570 else if (op0
!= CONST0_RTX (mode
))
17571 op0
= force_reg (mode
, op0
);
17573 mask
= ix86_build_signbit_mask (vmode
, 0, 0);
17575 if (mode
== SFmode
)
17576 copysign_insn
= gen_copysignsf3_const
;
17577 else if (mode
== DFmode
)
17578 copysign_insn
= gen_copysigndf3_const
;
17580 copysign_insn
= gen_copysigntf3_const
;
17582 emit_insn (copysign_insn (dest
, op0
, op1
, mask
));
17586 rtx (*copysign_insn
)(rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
17588 nmask
= ix86_build_signbit_mask (vmode
, 0, 1);
17589 mask
= ix86_build_signbit_mask (vmode
, 0, 0);
17591 if (mode
== SFmode
)
17592 copysign_insn
= gen_copysignsf3_var
;
17593 else if (mode
== DFmode
)
17594 copysign_insn
= gen_copysigndf3_var
;
17596 copysign_insn
= gen_copysigntf3_var
;
17598 emit_insn (copysign_insn (dest
, NULL_RTX
, op0
, op1
, nmask
, mask
));
17602 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
17603 be a constant, and so has already been expanded into a vector constant. */
17606 ix86_split_copysign_const (rtx operands
[])
17608 enum machine_mode mode
, vmode
;
17609 rtx dest
, op0
, mask
, x
;
17611 dest
= operands
[0];
17613 mask
= operands
[3];
17615 mode
= GET_MODE (dest
);
17616 vmode
= GET_MODE (mask
);
17618 dest
= simplify_gen_subreg (vmode
, dest
, mode
, 0);
17619 x
= gen_rtx_AND (vmode
, dest
, mask
);
17620 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
17622 if (op0
!= CONST0_RTX (vmode
))
17624 x
= gen_rtx_IOR (vmode
, dest
, op0
);
17625 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
17629 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
17630 so we have to do two masks. */
17633 ix86_split_copysign_var (rtx operands
[])
17635 enum machine_mode mode
, vmode
;
17636 rtx dest
, scratch
, op0
, op1
, mask
, nmask
, x
;
17638 dest
= operands
[0];
17639 scratch
= operands
[1];
17642 nmask
= operands
[4];
17643 mask
= operands
[5];
17645 mode
= GET_MODE (dest
);
17646 vmode
= GET_MODE (mask
);
17648 if (rtx_equal_p (op0
, op1
))
17650 /* Shouldn't happen often (it's useless, obviously), but when it does
17651 we'd generate incorrect code if we continue below. */
17652 emit_move_insn (dest
, op0
);
17656 if (REG_P (mask
) && REGNO (dest
) == REGNO (mask
)) /* alternative 0 */
17658 gcc_assert (REGNO (op1
) == REGNO (scratch
));
17660 x
= gen_rtx_AND (vmode
, scratch
, mask
);
17661 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
17664 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
17665 x
= gen_rtx_NOT (vmode
, dest
);
17666 x
= gen_rtx_AND (vmode
, x
, op0
);
17667 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
17671 if (REGNO (op1
) == REGNO (scratch
)) /* alternative 1,3 */
17673 x
= gen_rtx_AND (vmode
, scratch
, mask
);
17675 else /* alternative 2,4 */
17677 gcc_assert (REGNO (mask
) == REGNO (scratch
));
17678 op1
= simplify_gen_subreg (vmode
, op1
, mode
, 0);
17679 x
= gen_rtx_AND (vmode
, scratch
, op1
);
17681 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
17683 if (REGNO (op0
) == REGNO (dest
)) /* alternative 1,2 */
17685 dest
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
17686 x
= gen_rtx_AND (vmode
, dest
, nmask
);
17688 else /* alternative 3,4 */
17690 gcc_assert (REGNO (nmask
) == REGNO (dest
));
17692 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
17693 x
= gen_rtx_AND (vmode
, dest
, op0
);
17695 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
17698 x
= gen_rtx_IOR (vmode
, dest
, scratch
);
17699 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
17702 /* Return TRUE or FALSE depending on whether the first SET in INSN
17703 has source and destination with matching CC modes, and that the
17704 CC mode is at least as constrained as REQ_MODE. */
17707 ix86_match_ccmode (rtx insn
, enum machine_mode req_mode
)
17710 enum machine_mode set_mode
;
17712 set
= PATTERN (insn
);
17713 if (GET_CODE (set
) == PARALLEL
)
17714 set
= XVECEXP (set
, 0, 0);
17715 gcc_assert (GET_CODE (set
) == SET
);
17716 gcc_assert (GET_CODE (SET_SRC (set
)) == COMPARE
);
17718 set_mode
= GET_MODE (SET_DEST (set
));
17722 if (req_mode
!= CCNOmode
17723 && (req_mode
!= CCmode
17724 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
17728 if (req_mode
== CCGCmode
)
17732 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
17736 if (req_mode
== CCZmode
)
17746 if (set_mode
!= req_mode
)
17751 gcc_unreachable ();
17754 return GET_MODE (SET_SRC (set
)) == set_mode
;
17757 /* Generate insn patterns to do an integer compare of OPERANDS. */
17760 ix86_expand_int_compare (enum rtx_code code
, rtx op0
, rtx op1
)
17762 enum machine_mode cmpmode
;
17765 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
17766 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
17768 /* This is very simple, but making the interface the same as in the
17769 FP case makes the rest of the code easier. */
17770 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
17771 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
17773 /* Return the test that should be put into the flags user, i.e.
17774 the bcc, scc, or cmov instruction. */
17775 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
17778 /* Figure out whether to use ordered or unordered fp comparisons.
17779 Return the appropriate mode to use. */
17782 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED
)
17784 /* ??? In order to make all comparisons reversible, we do all comparisons
17785 non-trapping when compiling for IEEE. Once gcc is able to distinguish
17786 all forms trapping and nontrapping comparisons, we can make inequality
17787 comparisons trapping again, since it results in better code when using
17788 FCOM based compares. */
17789 return TARGET_IEEE_FP
? CCFPUmode
: CCFPmode
;
17793 ix86_cc_mode (enum rtx_code code
, rtx op0
, rtx op1
)
17795 enum machine_mode mode
= GET_MODE (op0
);
17797 if (SCALAR_FLOAT_MODE_P (mode
))
17799 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
17800 return ix86_fp_compare_mode (code
);
17805 /* Only zero flag is needed. */
17806 case EQ
: /* ZF=0 */
17807 case NE
: /* ZF!=0 */
17809 /* Codes needing carry flag. */
17810 case GEU
: /* CF=0 */
17811 case LTU
: /* CF=1 */
17812 /* Detect overflow checks. They need just the carry flag. */
17813 if (GET_CODE (op0
) == PLUS
17814 && rtx_equal_p (op1
, XEXP (op0
, 0)))
17818 case GTU
: /* CF=0 & ZF=0 */
17819 case LEU
: /* CF=1 | ZF=1 */
17820 /* Detect overflow checks. They need just the carry flag. */
17821 if (GET_CODE (op0
) == MINUS
17822 && rtx_equal_p (op1
, XEXP (op0
, 0)))
17826 /* Codes possibly doable only with sign flag when
17827 comparing against zero. */
17828 case GE
: /* SF=OF or SF=0 */
17829 case LT
: /* SF<>OF or SF=1 */
17830 if (op1
== const0_rtx
)
17833 /* For other cases Carry flag is not required. */
17835 /* Codes doable only with sign flag when comparing
17836 against zero, but we miss jump instruction for it
17837 so we need to use relational tests against overflow
17838 that thus needs to be zero. */
17839 case GT
: /* ZF=0 & SF=OF */
17840 case LE
: /* ZF=1 | SF<>OF */
17841 if (op1
== const0_rtx
)
17845 /* strcmp pattern do (use flags) and combine may ask us for proper
17850 gcc_unreachable ();
17854 /* Return the fixed registers used for condition codes. */
17857 ix86_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
17864 /* If two condition code modes are compatible, return a condition code
17865 mode which is compatible with both. Otherwise, return
17868 static enum machine_mode
17869 ix86_cc_modes_compatible (enum machine_mode m1
, enum machine_mode m2
)
17874 if (GET_MODE_CLASS (m1
) != MODE_CC
|| GET_MODE_CLASS (m2
) != MODE_CC
)
17877 if ((m1
== CCGCmode
&& m2
== CCGOCmode
)
17878 || (m1
== CCGOCmode
&& m2
== CCGCmode
))
17881 if (m1
== CCZmode
&& (m2
== CCGCmode
|| m2
== CCGOCmode
))
17883 else if (m2
== CCZmode
&& (m1
== CCGCmode
|| m1
== CCGOCmode
))
17889 gcc_unreachable ();
17919 /* These are only compatible with themselves, which we already
17926 /* Return a comparison we can do and that it is equivalent to
17927 swap_condition (code) apart possibly from orderedness.
17928 But, never change orderedness if TARGET_IEEE_FP, returning
17929 UNKNOWN in that case if necessary. */
17931 static enum rtx_code
17932 ix86_fp_swap_condition (enum rtx_code code
)
17936 case GT
: /* GTU - CF=0 & ZF=0 */
17937 return TARGET_IEEE_FP
? UNKNOWN
: UNLT
;
17938 case GE
: /* GEU - CF=0 */
17939 return TARGET_IEEE_FP
? UNKNOWN
: UNLE
;
17940 case UNLT
: /* LTU - CF=1 */
17941 return TARGET_IEEE_FP
? UNKNOWN
: GT
;
17942 case UNLE
: /* LEU - CF=1 | ZF=1 */
17943 return TARGET_IEEE_FP
? UNKNOWN
: GE
;
17945 return swap_condition (code
);
17949 /* Return cost of comparison CODE using the best strategy for performance.
17950 All following functions do use number of instructions as a cost metrics.
17951 In future this should be tweaked to compute bytes for optimize_size and
17952 take into account performance of various instructions on various CPUs. */
17955 ix86_fp_comparison_cost (enum rtx_code code
)
17959 /* The cost of code using bit-twiddling on %ah. */
17976 arith_cost
= TARGET_IEEE_FP
? 5 : 4;
17980 arith_cost
= TARGET_IEEE_FP
? 6 : 4;
17983 gcc_unreachable ();
17986 switch (ix86_fp_comparison_strategy (code
))
17988 case IX86_FPCMP_COMI
:
17989 return arith_cost
> 4 ? 3 : 2;
17990 case IX86_FPCMP_SAHF
:
17991 return arith_cost
> 4 ? 4 : 3;
17997 /* Return strategy to use for floating-point. We assume that fcomi is always
17998 preferrable where available, since that is also true when looking at size
17999 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
18001 enum ix86_fpcmp_strategy
18002 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED
)
18004 /* Do fcomi/sahf based test when profitable. */
18007 return IX86_FPCMP_COMI
;
18009 if (TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_function_for_size_p (cfun
)))
18010 return IX86_FPCMP_SAHF
;
18012 return IX86_FPCMP_ARITH
;
18015 /* Swap, force into registers, or otherwise massage the two operands
18016 to a fp comparison. The operands are updated in place; the new
18017 comparison code is returned. */
18019 static enum rtx_code
18020 ix86_prepare_fp_compare_args (enum rtx_code code
, rtx
*pop0
, rtx
*pop1
)
18022 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
18023 rtx op0
= *pop0
, op1
= *pop1
;
18024 enum machine_mode op_mode
= GET_MODE (op0
);
18025 int is_sse
= TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (op_mode
);
18027 /* All of the unordered compare instructions only work on registers.
18028 The same is true of the fcomi compare instructions. The XFmode
18029 compare instructions require registers except when comparing
18030 against zero or when converting operand 1 from fixed point to
18034 && (fpcmp_mode
== CCFPUmode
18035 || (op_mode
== XFmode
18036 && ! (standard_80387_constant_p (op0
) == 1
18037 || standard_80387_constant_p (op1
) == 1)
18038 && GET_CODE (op1
) != FLOAT
)
18039 || ix86_fp_comparison_strategy (code
) == IX86_FPCMP_COMI
))
18041 op0
= force_reg (op_mode
, op0
);
18042 op1
= force_reg (op_mode
, op1
);
18046 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
18047 things around if they appear profitable, otherwise force op0
18048 into a register. */
18050 if (standard_80387_constant_p (op0
) == 0
18052 && ! (standard_80387_constant_p (op1
) == 0
18055 enum rtx_code new_code
= ix86_fp_swap_condition (code
);
18056 if (new_code
!= UNKNOWN
)
18059 tmp
= op0
, op0
= op1
, op1
= tmp
;
18065 op0
= force_reg (op_mode
, op0
);
18067 if (CONSTANT_P (op1
))
18069 int tmp
= standard_80387_constant_p (op1
);
18071 op1
= validize_mem (force_const_mem (op_mode
, op1
));
18075 op1
= force_reg (op_mode
, op1
);
18078 op1
= force_reg (op_mode
, op1
);
18082 /* Try to rearrange the comparison to make it cheaper. */
18083 if (ix86_fp_comparison_cost (code
)
18084 > ix86_fp_comparison_cost (swap_condition (code
))
18085 && (REG_P (op1
) || can_create_pseudo_p ()))
18088 tmp
= op0
, op0
= op1
, op1
= tmp
;
18089 code
= swap_condition (code
);
18091 op0
= force_reg (op_mode
, op0
);
18099 /* Convert comparison codes we use to represent FP comparison to integer
18100 code that will result in proper branch. Return UNKNOWN if no such code
18104 ix86_fp_compare_code_to_integer (enum rtx_code code
)
18133 /* Generate insn patterns to do a floating point compare of OPERANDS. */
18136 ix86_expand_fp_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx scratch
)
18138 enum machine_mode fpcmp_mode
, intcmp_mode
;
18141 fpcmp_mode
= ix86_fp_compare_mode (code
);
18142 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
18144 /* Do fcomi/sahf based test when profitable. */
18145 switch (ix86_fp_comparison_strategy (code
))
18147 case IX86_FPCMP_COMI
:
18148 intcmp_mode
= fpcmp_mode
;
18149 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
18150 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
18155 case IX86_FPCMP_SAHF
:
18156 intcmp_mode
= fpcmp_mode
;
18157 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
18158 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
18162 scratch
= gen_reg_rtx (HImode
);
18163 tmp2
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
18164 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, tmp2
)));
18167 case IX86_FPCMP_ARITH
:
18168 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
18169 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
18170 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
18172 scratch
= gen_reg_rtx (HImode
);
18173 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
18175 /* In the unordered case, we have to check C2 for NaN's, which
18176 doesn't happen to work out to anything nice combination-wise.
18177 So do some bit twiddling on the value we've got in AH to come
18178 up with an appropriate set of condition codes. */
18180 intcmp_mode
= CCNOmode
;
18185 if (code
== GT
|| !TARGET_IEEE_FP
)
18187 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
18192 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18193 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
18194 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
18195 intcmp_mode
= CCmode
;
18201 if (code
== LT
&& TARGET_IEEE_FP
)
18203 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18204 emit_insn (gen_cmpqi_ext_3 (scratch
, const1_rtx
));
18205 intcmp_mode
= CCmode
;
18210 emit_insn (gen_testqi_ext_ccno_0 (scratch
, const1_rtx
));
18216 if (code
== GE
|| !TARGET_IEEE_FP
)
18218 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x05)));
18223 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18224 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
, const1_rtx
));
18230 if (code
== LE
&& TARGET_IEEE_FP
)
18232 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18233 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
18234 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
18235 intcmp_mode
= CCmode
;
18240 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
18246 if (code
== EQ
&& TARGET_IEEE_FP
)
18248 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18249 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
18250 intcmp_mode
= CCmode
;
18255 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
18261 if (code
== NE
&& TARGET_IEEE_FP
)
18263 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18264 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
18270 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
18276 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
18280 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
18285 gcc_unreachable ();
18293 /* Return the test that should be put into the flags user, i.e.
18294 the bcc, scc, or cmov instruction. */
18295 return gen_rtx_fmt_ee (code
, VOIDmode
,
18296 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
18301 ix86_expand_compare (enum rtx_code code
, rtx op0
, rtx op1
)
18305 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_CC
)
18306 ret
= gen_rtx_fmt_ee (code
, VOIDmode
, op0
, op1
);
18308 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0
)))
18310 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0
)));
18311 ret
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
);
18314 ret
= ix86_expand_int_compare (code
, op0
, op1
);
18320 ix86_expand_branch (enum rtx_code code
, rtx op0
, rtx op1
, rtx label
)
18322 enum machine_mode mode
= GET_MODE (op0
);
18334 tmp
= ix86_expand_compare (code
, op0
, op1
);
18335 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
18336 gen_rtx_LABEL_REF (VOIDmode
, label
),
18338 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
18345 /* Expand DImode branch into multiple compare+branch. */
18347 rtx lo
[2], hi
[2], label2
;
18348 enum rtx_code code1
, code2
, code3
;
18349 enum machine_mode submode
;
18351 if (CONSTANT_P (op0
) && !CONSTANT_P (op1
))
18353 tmp
= op0
, op0
= op1
, op1
= tmp
;
18354 code
= swap_condition (code
);
18357 split_double_mode (mode
, &op0
, 1, lo
+0, hi
+0);
18358 split_double_mode (mode
, &op1
, 1, lo
+1, hi
+1);
18360 submode
= mode
== DImode
? SImode
: DImode
;
18362 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
18363 avoid two branches. This costs one extra insn, so disable when
18364 optimizing for size. */
18366 if ((code
== EQ
|| code
== NE
)
18367 && (!optimize_insn_for_size_p ()
18368 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
18373 if (hi
[1] != const0_rtx
)
18374 xor1
= expand_binop (submode
, xor_optab
, xor1
, hi
[1],
18375 NULL_RTX
, 0, OPTAB_WIDEN
);
18378 if (lo
[1] != const0_rtx
)
18379 xor0
= expand_binop (submode
, xor_optab
, xor0
, lo
[1],
18380 NULL_RTX
, 0, OPTAB_WIDEN
);
18382 tmp
= expand_binop (submode
, ior_optab
, xor1
, xor0
,
18383 NULL_RTX
, 0, OPTAB_WIDEN
);
18385 ix86_expand_branch (code
, tmp
, const0_rtx
, label
);
18389 /* Otherwise, if we are doing less-than or greater-or-equal-than,
18390 op1 is a constant and the low word is zero, then we can just
18391 examine the high word. Similarly for low word -1 and
18392 less-or-equal-than or greater-than. */
18394 if (CONST_INT_P (hi
[1]))
18397 case LT
: case LTU
: case GE
: case GEU
:
18398 if (lo
[1] == const0_rtx
)
18400 ix86_expand_branch (code
, hi
[0], hi
[1], label
);
18404 case LE
: case LEU
: case GT
: case GTU
:
18405 if (lo
[1] == constm1_rtx
)
18407 ix86_expand_branch (code
, hi
[0], hi
[1], label
);
18415 /* Otherwise, we need two or three jumps. */
18417 label2
= gen_label_rtx ();
18420 code2
= swap_condition (code
);
18421 code3
= unsigned_condition (code
);
18425 case LT
: case GT
: case LTU
: case GTU
:
18428 case LE
: code1
= LT
; code2
= GT
; break;
18429 case GE
: code1
= GT
; code2
= LT
; break;
18430 case LEU
: code1
= LTU
; code2
= GTU
; break;
18431 case GEU
: code1
= GTU
; code2
= LTU
; break;
18433 case EQ
: code1
= UNKNOWN
; code2
= NE
; break;
18434 case NE
: code2
= UNKNOWN
; break;
18437 gcc_unreachable ();
18442 * if (hi(a) < hi(b)) goto true;
18443 * if (hi(a) > hi(b)) goto false;
18444 * if (lo(a) < lo(b)) goto true;
18448 if (code1
!= UNKNOWN
)
18449 ix86_expand_branch (code1
, hi
[0], hi
[1], label
);
18450 if (code2
!= UNKNOWN
)
18451 ix86_expand_branch (code2
, hi
[0], hi
[1], label2
);
18453 ix86_expand_branch (code3
, lo
[0], lo
[1], label
);
18455 if (code2
!= UNKNOWN
)
18456 emit_label (label2
);
18461 gcc_assert (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_CC
);
18466 /* Split branch based on floating point condition. */
18468 ix86_split_fp_branch (enum rtx_code code
, rtx op1
, rtx op2
,
18469 rtx target1
, rtx target2
, rtx tmp
, rtx pushed
)
18474 if (target2
!= pc_rtx
)
18477 code
= reverse_condition_maybe_unordered (code
);
18482 condition
= ix86_expand_fp_compare (code
, op1
, op2
,
18485 /* Remove pushed operand from stack. */
18487 ix86_free_from_memory (GET_MODE (pushed
));
18489 i
= emit_jump_insn (gen_rtx_SET
18491 gen_rtx_IF_THEN_ELSE (VOIDmode
,
18492 condition
, target1
, target2
)));
18493 if (split_branch_probability
>= 0)
18494 add_reg_note (i
, REG_BR_PROB
, GEN_INT (split_branch_probability
));
18498 ix86_expand_setcc (rtx dest
, enum rtx_code code
, rtx op0
, rtx op1
)
18502 gcc_assert (GET_MODE (dest
) == QImode
);
18504 ret
= ix86_expand_compare (code
, op0
, op1
);
18505 PUT_MODE (ret
, QImode
);
18506 emit_insn (gen_rtx_SET (VOIDmode
, dest
, ret
));
18509 /* Expand comparison setting or clearing carry flag. Return true when
18510 successful and set pop for the operation. */
18512 ix86_expand_carry_flag_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx
*pop
)
18514 enum machine_mode mode
=
18515 GET_MODE (op0
) != VOIDmode
? GET_MODE (op0
) : GET_MODE (op1
);
18517 /* Do not handle double-mode compares that go through special path. */
18518 if (mode
== (TARGET_64BIT
? TImode
: DImode
))
18521 if (SCALAR_FLOAT_MODE_P (mode
))
18523 rtx compare_op
, compare_seq
;
18525 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
18527 /* Shortcut: following common codes never translate
18528 into carry flag compares. */
18529 if (code
== EQ
|| code
== NE
|| code
== UNEQ
|| code
== LTGT
18530 || code
== ORDERED
|| code
== UNORDERED
)
18533 /* These comparisons require zero flag; swap operands so they won't. */
18534 if ((code
== GT
|| code
== UNLE
|| code
== LE
|| code
== UNGT
)
18535 && !TARGET_IEEE_FP
)
18540 code
= swap_condition (code
);
18543 /* Try to expand the comparison and verify that we end up with
18544 carry flag based comparison. This fails to be true only when
18545 we decide to expand comparison using arithmetic that is not
18546 too common scenario. */
18548 compare_op
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
);
18549 compare_seq
= get_insns ();
18552 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
18553 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
18554 code
= ix86_fp_compare_code_to_integer (GET_CODE (compare_op
));
18556 code
= GET_CODE (compare_op
);
18558 if (code
!= LTU
&& code
!= GEU
)
18561 emit_insn (compare_seq
);
18566 if (!INTEGRAL_MODE_P (mode
))
18575 /* Convert a==0 into (unsigned)a<1. */
18578 if (op1
!= const0_rtx
)
18581 code
= (code
== EQ
? LTU
: GEU
);
18584 /* Convert a>b into b<a or a>=b-1. */
18587 if (CONST_INT_P (op1
))
18589 op1
= gen_int_mode (INTVAL (op1
) + 1, GET_MODE (op0
));
18590 /* Bail out on overflow. We still can swap operands but that
18591 would force loading of the constant into register. */
18592 if (op1
== const0_rtx
18593 || !x86_64_immediate_operand (op1
, GET_MODE (op1
)))
18595 code
= (code
== GTU
? GEU
: LTU
);
18602 code
= (code
== GTU
? LTU
: GEU
);
18606 /* Convert a>=0 into (unsigned)a<0x80000000. */
18609 if (mode
== DImode
|| op1
!= const0_rtx
)
18611 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
18612 code
= (code
== LT
? GEU
: LTU
);
18616 if (mode
== DImode
|| op1
!= constm1_rtx
)
18618 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
18619 code
= (code
== LE
? GEU
: LTU
);
18625 /* Swapping operands may cause constant to appear as first operand. */
18626 if (!nonimmediate_operand (op0
, VOIDmode
))
18628 if (!can_create_pseudo_p ())
18630 op0
= force_reg (mode
, op0
);
18632 *pop
= ix86_expand_compare (code
, op0
, op1
);
18633 gcc_assert (GET_CODE (*pop
) == LTU
|| GET_CODE (*pop
) == GEU
);
18638 ix86_expand_int_movcc (rtx operands
[])
18640 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
18641 rtx compare_seq
, compare_op
;
18642 enum machine_mode mode
= GET_MODE (operands
[0]);
18643 bool sign_bit_compare_p
= false;
18644 rtx op0
= XEXP (operands
[1], 0);
18645 rtx op1
= XEXP (operands
[1], 1);
18648 compare_op
= ix86_expand_compare (code
, op0
, op1
);
18649 compare_seq
= get_insns ();
18652 compare_code
= GET_CODE (compare_op
);
18654 if ((op1
== const0_rtx
&& (code
== GE
|| code
== LT
))
18655 || (op1
== constm1_rtx
&& (code
== GT
|| code
== LE
)))
18656 sign_bit_compare_p
= true;
18658 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
18659 HImode insns, we'd be swallowed in word prefix ops. */
18661 if ((mode
!= HImode
|| TARGET_FAST_PREFIX
)
18662 && (mode
!= (TARGET_64BIT
? TImode
: DImode
))
18663 && CONST_INT_P (operands
[2])
18664 && CONST_INT_P (operands
[3]))
18666 rtx out
= operands
[0];
18667 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
18668 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
18669 HOST_WIDE_INT diff
;
18672 /* Sign bit compares are better done using shifts than we do by using
18674 if (sign_bit_compare_p
18675 || ix86_expand_carry_flag_compare (code
, op0
, op1
, &compare_op
))
18677 /* Detect overlap between destination and compare sources. */
18680 if (!sign_bit_compare_p
)
18683 bool fpcmp
= false;
18685 compare_code
= GET_CODE (compare_op
);
18687 flags
= XEXP (compare_op
, 0);
18689 if (GET_MODE (flags
) == CCFPmode
18690 || GET_MODE (flags
) == CCFPUmode
)
18694 = ix86_fp_compare_code_to_integer (compare_code
);
18697 /* To simplify rest of code, restrict to the GEU case. */
18698 if (compare_code
== LTU
)
18700 HOST_WIDE_INT tmp
= ct
;
18703 compare_code
= reverse_condition (compare_code
);
18704 code
= reverse_condition (code
);
18709 PUT_CODE (compare_op
,
18710 reverse_condition_maybe_unordered
18711 (GET_CODE (compare_op
)));
18713 PUT_CODE (compare_op
,
18714 reverse_condition (GET_CODE (compare_op
)));
18718 if (reg_overlap_mentioned_p (out
, op0
)
18719 || reg_overlap_mentioned_p (out
, op1
))
18720 tmp
= gen_reg_rtx (mode
);
18722 if (mode
== DImode
)
18723 emit_insn (gen_x86_movdicc_0_m1 (tmp
, flags
, compare_op
));
18725 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode
, tmp
),
18726 flags
, compare_op
));
18730 if (code
== GT
|| code
== GE
)
18731 code
= reverse_condition (code
);
18734 HOST_WIDE_INT tmp
= ct
;
18739 tmp
= emit_store_flag (tmp
, code
, op0
, op1
, VOIDmode
, 0, -1);
18752 tmp
= expand_simple_binop (mode
, PLUS
,
18754 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
18765 tmp
= expand_simple_binop (mode
, IOR
,
18767 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
18769 else if (diff
== -1 && ct
)
18779 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
18781 tmp
= expand_simple_binop (mode
, PLUS
,
18782 copy_rtx (tmp
), GEN_INT (cf
),
18783 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
18791 * andl cf - ct, dest
18801 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
18804 tmp
= expand_simple_binop (mode
, AND
,
18806 gen_int_mode (cf
- ct
, mode
),
18807 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
18809 tmp
= expand_simple_binop (mode
, PLUS
,
18810 copy_rtx (tmp
), GEN_INT (ct
),
18811 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
18814 if (!rtx_equal_p (tmp
, out
))
18815 emit_move_insn (copy_rtx (out
), copy_rtx (tmp
));
18822 enum machine_mode cmp_mode
= GET_MODE (op0
);
18825 tmp
= ct
, ct
= cf
, cf
= tmp
;
18828 if (SCALAR_FLOAT_MODE_P (cmp_mode
))
18830 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode
));
18832 /* We may be reversing unordered compare to normal compare, that
18833 is not valid in general (we may convert non-trapping condition
18834 to trapping one), however on i386 we currently emit all
18835 comparisons unordered. */
18836 compare_code
= reverse_condition_maybe_unordered (compare_code
);
18837 code
= reverse_condition_maybe_unordered (code
);
18841 compare_code
= reverse_condition (compare_code
);
18842 code
= reverse_condition (code
);
18846 compare_code
= UNKNOWN
;
18847 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
18848 && CONST_INT_P (op1
))
18850 if (op1
== const0_rtx
18851 && (code
== LT
|| code
== GE
))
18852 compare_code
= code
;
18853 else if (op1
== constm1_rtx
)
18857 else if (code
== GT
)
18862 /* Optimize dest = (op0 < 0) ? -1 : cf. */
18863 if (compare_code
!= UNKNOWN
18864 && GET_MODE (op0
) == GET_MODE (out
)
18865 && (cf
== -1 || ct
== -1))
18867 /* If lea code below could be used, only optimize
18868 if it results in a 2 insn sequence. */
18870 if (! (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
18871 || diff
== 3 || diff
== 5 || diff
== 9)
18872 || (compare_code
== LT
&& ct
== -1)
18873 || (compare_code
== GE
&& cf
== -1))
18876 * notl op1 (if necessary)
18884 code
= reverse_condition (code
);
18887 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, -1);
18889 out
= expand_simple_binop (mode
, IOR
,
18891 out
, 1, OPTAB_DIRECT
);
18892 if (out
!= operands
[0])
18893 emit_move_insn (operands
[0], out
);
18900 if ((diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
18901 || diff
== 3 || diff
== 5 || diff
== 9)
18902 && ((mode
!= QImode
&& mode
!= HImode
) || !TARGET_PARTIAL_REG_STALL
)
18904 || x86_64_immediate_operand (GEN_INT (cf
), VOIDmode
)))
18910 * lea cf(dest*(ct-cf)),dest
18914 * This also catches the degenerate setcc-only case.
18920 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, 1);
18923 /* On x86_64 the lea instruction operates on Pmode, so we need
18924 to get arithmetics done in proper mode to match. */
18926 tmp
= copy_rtx (out
);
18930 out1
= copy_rtx (out
);
18931 tmp
= gen_rtx_MULT (mode
, out1
, GEN_INT (diff
& ~1));
18935 tmp
= gen_rtx_PLUS (mode
, tmp
, out1
);
18941 tmp
= gen_rtx_PLUS (mode
, tmp
, GEN_INT (cf
));
18944 if (!rtx_equal_p (tmp
, out
))
18947 out
= force_operand (tmp
, copy_rtx (out
));
18949 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (out
), copy_rtx (tmp
)));
18951 if (!rtx_equal_p (out
, operands
[0]))
18952 emit_move_insn (operands
[0], copy_rtx (out
));
18958 * General case: Jumpful:
18959 * xorl dest,dest cmpl op1, op2
18960 * cmpl op1, op2 movl ct, dest
18961 * setcc dest jcc 1f
18962 * decl dest movl cf, dest
18963 * andl (cf-ct),dest 1:
18966 * Size 20. Size 14.
18968 * This is reasonably steep, but branch mispredict costs are
18969 * high on modern cpus, so consider failing only if optimizing
18973 if ((!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
18974 && BRANCH_COST (optimize_insn_for_speed_p (),
18979 enum machine_mode cmp_mode
= GET_MODE (op0
);
18984 if (SCALAR_FLOAT_MODE_P (cmp_mode
))
18986 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode
));
18988 /* We may be reversing unordered compare to normal compare,
18989 that is not valid in general (we may convert non-trapping
18990 condition to trapping one), however on i386 we currently
18991 emit all comparisons unordered. */
18992 code
= reverse_condition_maybe_unordered (code
);
18996 code
= reverse_condition (code
);
18997 if (compare_code
!= UNKNOWN
)
18998 compare_code
= reverse_condition (compare_code
);
19002 if (compare_code
!= UNKNOWN
)
19004 /* notl op1 (if needed)
19009 For x < 0 (resp. x <= -1) there will be no notl,
19010 so if possible swap the constants to get rid of the
19012 True/false will be -1/0 while code below (store flag
19013 followed by decrement) is 0/-1, so the constants need
19014 to be exchanged once more. */
19016 if (compare_code
== GE
|| !cf
)
19018 code
= reverse_condition (code
);
19023 HOST_WIDE_INT tmp
= cf
;
19028 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, -1);
19032 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, 1);
19034 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
),
19036 copy_rtx (out
), 1, OPTAB_DIRECT
);
19039 out
= expand_simple_binop (mode
, AND
, copy_rtx (out
),
19040 gen_int_mode (cf
- ct
, mode
),
19041 copy_rtx (out
), 1, OPTAB_DIRECT
);
19043 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), GEN_INT (ct
),
19044 copy_rtx (out
), 1, OPTAB_DIRECT
);
19045 if (!rtx_equal_p (out
, operands
[0]))
19046 emit_move_insn (operands
[0], copy_rtx (out
));
19052 if (!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
19054 /* Try a few things more with specific constants and a variable. */
19057 rtx var
, orig_out
, out
, tmp
;
19059 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
19062 /* If one of the two operands is an interesting constant, load a
19063 constant with the above and mask it in with a logical operation. */
19065 if (CONST_INT_P (operands
[2]))
19068 if (INTVAL (operands
[2]) == 0 && operands
[3] != constm1_rtx
)
19069 operands
[3] = constm1_rtx
, op
= and_optab
;
19070 else if (INTVAL (operands
[2]) == -1 && operands
[3] != const0_rtx
)
19071 operands
[3] = const0_rtx
, op
= ior_optab
;
19075 else if (CONST_INT_P (operands
[3]))
19078 if (INTVAL (operands
[3]) == 0 && operands
[2] != constm1_rtx
)
19079 operands
[2] = constm1_rtx
, op
= and_optab
;
19080 else if (INTVAL (operands
[3]) == -1 && operands
[3] != const0_rtx
)
19081 operands
[2] = const0_rtx
, op
= ior_optab
;
19088 orig_out
= operands
[0];
19089 tmp
= gen_reg_rtx (mode
);
19092 /* Recurse to get the constant loaded. */
19093 if (ix86_expand_int_movcc (operands
) == 0)
19096 /* Mask in the interesting variable. */
19097 out
= expand_binop (mode
, op
, var
, tmp
, orig_out
, 0,
19099 if (!rtx_equal_p (out
, orig_out
))
19100 emit_move_insn (copy_rtx (orig_out
), copy_rtx (out
));
19106 * For comparison with above,
19116 if (! nonimmediate_operand (operands
[2], mode
))
19117 operands
[2] = force_reg (mode
, operands
[2]);
19118 if (! nonimmediate_operand (operands
[3], mode
))
19119 operands
[3] = force_reg (mode
, operands
[3]);
19121 if (! register_operand (operands
[2], VOIDmode
)
19123 || ! register_operand (operands
[3], VOIDmode
)))
19124 operands
[2] = force_reg (mode
, operands
[2]);
19127 && ! register_operand (operands
[3], VOIDmode
))
19128 operands
[3] = force_reg (mode
, operands
[3]);
19130 emit_insn (compare_seq
);
19131 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
19132 gen_rtx_IF_THEN_ELSE (mode
,
19133 compare_op
, operands
[2],
19138 /* Swap, force into registers, or otherwise massage the two operands
19139 to an sse comparison with a mask result. Thus we differ a bit from
19140 ix86_prepare_fp_compare_args which expects to produce a flags result.
19142 The DEST operand exists to help determine whether to commute commutative
19143 operators. The POP0/POP1 operands are updated in place. The new
19144 comparison code is returned, or UNKNOWN if not implementable. */
19146 static enum rtx_code
19147 ix86_prepare_sse_fp_compare_args (rtx dest
, enum rtx_code code
,
19148 rtx
*pop0
, rtx
*pop1
)
19156 /* AVX supports all the needed comparisons. */
19159 /* We have no LTGT as an operator. We could implement it with
19160 NE & ORDERED, but this requires an extra temporary. It's
19161 not clear that it's worth it. */
19168 /* These are supported directly. */
19175 /* AVX has 3 operand comparisons, no need to swap anything. */
19178 /* For commutative operators, try to canonicalize the destination
19179 operand to be first in the comparison - this helps reload to
19180 avoid extra moves. */
19181 if (!dest
|| !rtx_equal_p (dest
, *pop1
))
19189 /* These are not supported directly before AVX, and furthermore
19190 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
19191 comparison operands to transform into something that is
19196 code
= swap_condition (code
);
19200 gcc_unreachable ();
19206 /* Detect conditional moves that exactly match min/max operational
19207 semantics. Note that this is IEEE safe, as long as we don't
19208 interchange the operands.
19210 Returns FALSE if this conditional move doesn't match a MIN/MAX,
19211 and TRUE if the operation is successful and instructions are emitted. */
19214 ix86_expand_sse_fp_minmax (rtx dest
, enum rtx_code code
, rtx cmp_op0
,
19215 rtx cmp_op1
, rtx if_true
, rtx if_false
)
19217 enum machine_mode mode
;
19223 else if (code
== UNGE
)
19226 if_true
= if_false
;
19232 if (rtx_equal_p (cmp_op0
, if_true
) && rtx_equal_p (cmp_op1
, if_false
))
19234 else if (rtx_equal_p (cmp_op1
, if_true
) && rtx_equal_p (cmp_op0
, if_false
))
19239 mode
= GET_MODE (dest
);
19241 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
19242 but MODE may be a vector mode and thus not appropriate. */
19243 if (!flag_finite_math_only
|| !flag_unsafe_math_optimizations
)
19245 int u
= is_min
? UNSPEC_IEEE_MIN
: UNSPEC_IEEE_MAX
;
19248 if_true
= force_reg (mode
, if_true
);
19249 v
= gen_rtvec (2, if_true
, if_false
);
19250 tmp
= gen_rtx_UNSPEC (mode
, v
, u
);
19254 code
= is_min
? SMIN
: SMAX
;
19255 tmp
= gen_rtx_fmt_ee (code
, mode
, if_true
, if_false
);
19258 emit_insn (gen_rtx_SET (VOIDmode
, dest
, tmp
));
19262 /* Expand an sse vector comparison. Return the register with the result. */
19265 ix86_expand_sse_cmp (rtx dest
, enum rtx_code code
, rtx cmp_op0
, rtx cmp_op1
,
19266 rtx op_true
, rtx op_false
)
19268 enum machine_mode mode
= GET_MODE (dest
);
19269 enum machine_mode cmp_mode
= GET_MODE (cmp_op0
);
19272 cmp_op0
= force_reg (cmp_mode
, cmp_op0
);
19273 if (!nonimmediate_operand (cmp_op1
, cmp_mode
))
19274 cmp_op1
= force_reg (cmp_mode
, cmp_op1
);
19277 || reg_overlap_mentioned_p (dest
, op_true
)
19278 || reg_overlap_mentioned_p (dest
, op_false
))
19279 dest
= gen_reg_rtx (mode
);
19281 x
= gen_rtx_fmt_ee (code
, cmp_mode
, cmp_op0
, cmp_op1
);
19282 if (cmp_mode
!= mode
)
19284 x
= force_reg (cmp_mode
, x
);
19285 convert_move (dest
, x
, false);
19288 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19293 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
19294 operations. This is used for both scalar and vector conditional moves. */
19297 ix86_expand_sse_movcc (rtx dest
, rtx cmp
, rtx op_true
, rtx op_false
)
19299 enum machine_mode mode
= GET_MODE (dest
);
19302 if (vector_all_ones_operand (op_true
, mode
)
19303 && rtx_equal_p (op_false
, CONST0_RTX (mode
)))
19305 emit_insn (gen_rtx_SET (VOIDmode
, dest
, cmp
));
19307 else if (op_false
== CONST0_RTX (mode
))
19309 op_true
= force_reg (mode
, op_true
);
19310 x
= gen_rtx_AND (mode
, cmp
, op_true
);
19311 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19313 else if (op_true
== CONST0_RTX (mode
))
19315 op_false
= force_reg (mode
, op_false
);
19316 x
= gen_rtx_NOT (mode
, cmp
);
19317 x
= gen_rtx_AND (mode
, x
, op_false
);
19318 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19320 else if (INTEGRAL_MODE_P (mode
) && op_true
== CONSTM1_RTX (mode
))
19322 op_false
= force_reg (mode
, op_false
);
19323 x
= gen_rtx_IOR (mode
, cmp
, op_false
);
19324 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19326 else if (TARGET_XOP
)
19328 op_true
= force_reg (mode
, op_true
);
19330 if (!nonimmediate_operand (op_false
, mode
))
19331 op_false
= force_reg (mode
, op_false
);
19333 emit_insn (gen_rtx_SET (mode
, dest
,
19334 gen_rtx_IF_THEN_ELSE (mode
, cmp
,
19340 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
) = NULL
;
19342 if (!nonimmediate_operand (op_true
, mode
))
19343 op_true
= force_reg (mode
, op_true
);
19345 op_false
= force_reg (mode
, op_false
);
19351 gen
= gen_sse4_1_blendvps
;
19355 gen
= gen_sse4_1_blendvpd
;
19363 gen
= gen_sse4_1_pblendvb
;
19364 dest
= gen_lowpart (V16QImode
, dest
);
19365 op_false
= gen_lowpart (V16QImode
, op_false
);
19366 op_true
= gen_lowpart (V16QImode
, op_true
);
19367 cmp
= gen_lowpart (V16QImode
, cmp
);
19372 gen
= gen_avx_blendvps256
;
19376 gen
= gen_avx_blendvpd256
;
19384 gen
= gen_avx2_pblendvb
;
19385 dest
= gen_lowpart (V32QImode
, dest
);
19386 op_false
= gen_lowpart (V32QImode
, op_false
);
19387 op_true
= gen_lowpart (V32QImode
, op_true
);
19388 cmp
= gen_lowpart (V32QImode
, cmp
);
19396 emit_insn (gen (dest
, op_false
, op_true
, cmp
));
19399 op_true
= force_reg (mode
, op_true
);
19401 t2
= gen_reg_rtx (mode
);
19403 t3
= gen_reg_rtx (mode
);
19407 x
= gen_rtx_AND (mode
, op_true
, cmp
);
19408 emit_insn (gen_rtx_SET (VOIDmode
, t2
, x
));
19410 x
= gen_rtx_NOT (mode
, cmp
);
19411 x
= gen_rtx_AND (mode
, x
, op_false
);
19412 emit_insn (gen_rtx_SET (VOIDmode
, t3
, x
));
19414 x
= gen_rtx_IOR (mode
, t3
, t2
);
19415 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19420 /* Expand a floating-point conditional move. Return true if successful. */
19423 ix86_expand_fp_movcc (rtx operands
[])
19425 enum machine_mode mode
= GET_MODE (operands
[0]);
19426 enum rtx_code code
= GET_CODE (operands
[1]);
19427 rtx tmp
, compare_op
;
19428 rtx op0
= XEXP (operands
[1], 0);
19429 rtx op1
= XEXP (operands
[1], 1);
19431 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
19433 enum machine_mode cmode
;
19435 /* Since we've no cmove for sse registers, don't force bad register
19436 allocation just to gain access to it. Deny movcc when the
19437 comparison mode doesn't match the move mode. */
19438 cmode
= GET_MODE (op0
);
19439 if (cmode
== VOIDmode
)
19440 cmode
= GET_MODE (op1
);
19444 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
, &op0
, &op1
);
19445 if (code
== UNKNOWN
)
19448 if (ix86_expand_sse_fp_minmax (operands
[0], code
, op0
, op1
,
19449 operands
[2], operands
[3]))
19452 tmp
= ix86_expand_sse_cmp (operands
[0], code
, op0
, op1
,
19453 operands
[2], operands
[3]);
19454 ix86_expand_sse_movcc (operands
[0], tmp
, operands
[2], operands
[3]);
19458 /* The floating point conditional move instructions don't directly
19459 support conditions resulting from a signed integer comparison. */
19461 compare_op
= ix86_expand_compare (code
, op0
, op1
);
19462 if (!fcmov_comparison_operator (compare_op
, VOIDmode
))
19464 tmp
= gen_reg_rtx (QImode
);
19465 ix86_expand_setcc (tmp
, code
, op0
, op1
);
19467 compare_op
= ix86_expand_compare (NE
, tmp
, const0_rtx
);
19470 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
19471 gen_rtx_IF_THEN_ELSE (mode
, compare_op
,
19472 operands
[2], operands
[3])));
19477 /* Expand a floating-point vector conditional move; a vcond operation
19478 rather than a movcc operation. */
19481 ix86_expand_fp_vcond (rtx operands
[])
19483 enum rtx_code code
= GET_CODE (operands
[3]);
19486 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
19487 &operands
[4], &operands
[5]);
19488 if (code
== UNKNOWN
)
19491 switch (GET_CODE (operands
[3]))
19494 temp
= ix86_expand_sse_cmp (operands
[0], ORDERED
, operands
[4],
19495 operands
[5], operands
[0], operands
[0]);
19496 cmp
= ix86_expand_sse_cmp (operands
[0], NE
, operands
[4],
19497 operands
[5], operands
[1], operands
[2]);
19501 temp
= ix86_expand_sse_cmp (operands
[0], UNORDERED
, operands
[4],
19502 operands
[5], operands
[0], operands
[0]);
19503 cmp
= ix86_expand_sse_cmp (operands
[0], EQ
, operands
[4],
19504 operands
[5], operands
[1], operands
[2]);
19508 gcc_unreachable ();
19510 cmp
= expand_simple_binop (GET_MODE (cmp
), code
, temp
, cmp
, cmp
, 1,
19512 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
19516 if (ix86_expand_sse_fp_minmax (operands
[0], code
, operands
[4],
19517 operands
[5], operands
[1], operands
[2]))
19520 cmp
= ix86_expand_sse_cmp (operands
[0], code
, operands
[4], operands
[5],
19521 operands
[1], operands
[2]);
19522 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
19526 /* Expand a signed/unsigned integral vector conditional move. */
19529 ix86_expand_int_vcond (rtx operands
[])
19531 enum machine_mode data_mode
= GET_MODE (operands
[0]);
19532 enum machine_mode mode
= GET_MODE (operands
[4]);
19533 enum rtx_code code
= GET_CODE (operands
[3]);
19534 bool negate
= false;
19537 cop0
= operands
[4];
19538 cop1
= operands
[5];
19540 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
19541 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
19542 if ((code
== LT
|| code
== GE
)
19543 && data_mode
== mode
19544 && cop1
== CONST0_RTX (mode
)
19545 && operands
[1 + (code
== LT
)] == CONST0_RTX (data_mode
)
19546 && GET_MODE_SIZE (GET_MODE_INNER (data_mode
)) > 1
19547 && GET_MODE_SIZE (GET_MODE_INNER (data_mode
)) <= 8
19548 && (GET_MODE_SIZE (data_mode
) == 16
19549 || (TARGET_AVX2
&& GET_MODE_SIZE (data_mode
) == 32)))
19551 rtx negop
= operands
[2 - (code
== LT
)];
19552 int shift
= GET_MODE_BITSIZE (GET_MODE_INNER (data_mode
)) - 1;
19553 if (negop
== CONST1_RTX (data_mode
))
19555 rtx res
= expand_simple_binop (mode
, LSHIFTRT
, cop0
, GEN_INT (shift
),
19556 operands
[0], 1, OPTAB_DIRECT
);
19557 if (res
!= operands
[0])
19558 emit_move_insn (operands
[0], res
);
19561 else if (GET_MODE_INNER (data_mode
) != DImode
19562 && vector_all_ones_operand (negop
, data_mode
))
19564 rtx res
= expand_simple_binop (mode
, ASHIFTRT
, cop0
, GEN_INT (shift
),
19565 operands
[0], 0, OPTAB_DIRECT
);
19566 if (res
!= operands
[0])
19567 emit_move_insn (operands
[0], res
);
19572 if (!nonimmediate_operand (cop1
, mode
))
19573 cop1
= force_reg (mode
, cop1
);
19574 if (!general_operand (operands
[1], data_mode
))
19575 operands
[1] = force_reg (data_mode
, operands
[1]);
19576 if (!general_operand (operands
[2], data_mode
))
19577 operands
[2] = force_reg (data_mode
, operands
[2]);
19579 /* XOP supports all of the comparisons on all 128-bit vector int types. */
19581 && (mode
== V16QImode
|| mode
== V8HImode
19582 || mode
== V4SImode
|| mode
== V2DImode
))
19586 /* Canonicalize the comparison to EQ, GT, GTU. */
19597 code
= reverse_condition (code
);
19603 code
= reverse_condition (code
);
19609 code
= swap_condition (code
);
19610 x
= cop0
, cop0
= cop1
, cop1
= x
;
19614 gcc_unreachable ();
19617 /* Only SSE4.1/SSE4.2 supports V2DImode. */
19618 if (mode
== V2DImode
)
19623 /* SSE4.1 supports EQ. */
19624 if (!TARGET_SSE4_1
)
19630 /* SSE4.2 supports GT/GTU. */
19631 if (!TARGET_SSE4_2
)
19636 gcc_unreachable ();
19640 /* Unsigned parallel compare is not supported by the hardware.
19641 Play some tricks to turn this into a signed comparison
19645 cop0
= force_reg (mode
, cop0
);
19655 rtx (*gen_sub3
) (rtx
, rtx
, rtx
);
19659 case V8SImode
: gen_sub3
= gen_subv8si3
; break;
19660 case V4DImode
: gen_sub3
= gen_subv4di3
; break;
19661 case V4SImode
: gen_sub3
= gen_subv4si3
; break;
19662 case V2DImode
: gen_sub3
= gen_subv2di3
; break;
19664 gcc_unreachable ();
19666 /* Subtract (-(INT MAX) - 1) from both operands to make
19668 mask
= ix86_build_signbit_mask (mode
, true, false);
19669 t1
= gen_reg_rtx (mode
);
19670 emit_insn (gen_sub3 (t1
, cop0
, mask
));
19672 t2
= gen_reg_rtx (mode
);
19673 emit_insn (gen_sub3 (t2
, cop1
, mask
));
19685 /* Perform a parallel unsigned saturating subtraction. */
19686 x
= gen_reg_rtx (mode
);
19687 emit_insn (gen_rtx_SET (VOIDmode
, x
,
19688 gen_rtx_US_MINUS (mode
, cop0
, cop1
)));
19691 cop1
= CONST0_RTX (mode
);
19697 gcc_unreachable ();
19702 /* Allow the comparison to be done in one mode, but the movcc to
19703 happen in another mode. */
19704 if (data_mode
== mode
)
19706 x
= ix86_expand_sse_cmp (operands
[0], code
, cop0
, cop1
,
19707 operands
[1+negate
], operands
[2-negate
]);
19711 gcc_assert (GET_MODE_SIZE (data_mode
) == GET_MODE_SIZE (mode
));
19712 x
= ix86_expand_sse_cmp (gen_lowpart (mode
, operands
[0]),
19714 operands
[1+negate
], operands
[2-negate
]);
19715 x
= gen_lowpart (data_mode
, x
);
19718 ix86_expand_sse_movcc (operands
[0], x
, operands
[1+negate
],
19719 operands
[2-negate
]);
19723 /* Expand a variable vector permutation. */
19726 ix86_expand_vec_perm (rtx operands
[])
19728 rtx target
= operands
[0];
19729 rtx op0
= operands
[1];
19730 rtx op1
= operands
[2];
19731 rtx mask
= operands
[3];
19732 rtx t1
, t2
, t3
, t4
, vt
, vt2
, vec
[32];
19733 enum machine_mode mode
= GET_MODE (op0
);
19734 enum machine_mode maskmode
= GET_MODE (mask
);
19736 bool one_operand_shuffle
= rtx_equal_p (op0
, op1
);
19738 /* Number of elements in the vector. */
19739 w
= GET_MODE_NUNITS (mode
);
19740 e
= GET_MODE_UNIT_SIZE (mode
);
19741 gcc_assert (w
<= 32);
19745 if (mode
== V4DImode
|| mode
== V4DFmode
|| mode
== V16HImode
)
19747 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
19748 an constant shuffle operand. With a tiny bit of effort we can
19749 use VPERMD instead. A re-interpretation stall for V4DFmode is
19750 unfortunate but there's no avoiding it.
19751 Similarly for V16HImode we don't have instructions for variable
19752 shuffling, while for V32QImode we can use after preparing suitable
19753 masks vpshufb; vpshufb; vpermq; vpor. */
19755 if (mode
== V16HImode
)
19757 maskmode
= mode
= V32QImode
;
19763 maskmode
= mode
= V8SImode
;
19767 t1
= gen_reg_rtx (maskmode
);
19769 /* Replicate the low bits of the V4DImode mask into V8SImode:
19771 t1 = { A A B B C C D D }. */
19772 for (i
= 0; i
< w
/ 2; ++i
)
19773 vec
[i
*2 + 1] = vec
[i
*2] = GEN_INT (i
* 2);
19774 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
19775 vt
= force_reg (maskmode
, vt
);
19776 mask
= gen_lowpart (maskmode
, mask
);
19777 if (maskmode
== V8SImode
)
19778 emit_insn (gen_avx2_permvarv8si (t1
, vt
, mask
));
19780 emit_insn (gen_avx2_pshufbv32qi3 (t1
, mask
, vt
));
19782 /* Multiply the shuffle indicies by two. */
19783 t1
= expand_simple_binop (maskmode
, PLUS
, t1
, t1
, t1
, 1,
19786 /* Add one to the odd shuffle indicies:
19787 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
19788 for (i
= 0; i
< w
/ 2; ++i
)
19790 vec
[i
* 2] = const0_rtx
;
19791 vec
[i
* 2 + 1] = const1_rtx
;
19793 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
19794 vt
= force_const_mem (maskmode
, vt
);
19795 t1
= expand_simple_binop (maskmode
, PLUS
, t1
, vt
, t1
, 1,
19798 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
19799 operands
[3] = mask
= t1
;
19800 target
= gen_lowpart (mode
, target
);
19801 op0
= gen_lowpart (mode
, op0
);
19802 op1
= gen_lowpart (mode
, op1
);
19808 /* The VPERMD and VPERMPS instructions already properly ignore
19809 the high bits of the shuffle elements. No need for us to
19810 perform an AND ourselves. */
19811 if (one_operand_shuffle
)
19812 emit_insn (gen_avx2_permvarv8si (target
, mask
, op0
));
19815 t1
= gen_reg_rtx (V8SImode
);
19816 t2
= gen_reg_rtx (V8SImode
);
19817 emit_insn (gen_avx2_permvarv8si (t1
, mask
, op0
));
19818 emit_insn (gen_avx2_permvarv8si (t2
, mask
, op1
));
19824 mask
= gen_lowpart (V8SFmode
, mask
);
19825 if (one_operand_shuffle
)
19826 emit_insn (gen_avx2_permvarv8sf (target
, mask
, op0
));
19829 t1
= gen_reg_rtx (V8SFmode
);
19830 t2
= gen_reg_rtx (V8SFmode
);
19831 emit_insn (gen_avx2_permvarv8sf (t1
, mask
, op0
));
19832 emit_insn (gen_avx2_permvarv8sf (t2
, mask
, op1
));
19838 /* By combining the two 128-bit input vectors into one 256-bit
19839 input vector, we can use VPERMD and VPERMPS for the full
19840 two-operand shuffle. */
19841 t1
= gen_reg_rtx (V8SImode
);
19842 t2
= gen_reg_rtx (V8SImode
);
19843 emit_insn (gen_avx_vec_concatv8si (t1
, op0
, op1
));
19844 emit_insn (gen_avx_vec_concatv8si (t2
, mask
, mask
));
19845 emit_insn (gen_avx2_permvarv8si (t1
, t2
, t1
));
19846 emit_insn (gen_avx_vextractf128v8si (target
, t1
, const0_rtx
));
19850 t1
= gen_reg_rtx (V8SFmode
);
19851 t2
= gen_reg_rtx (V8SFmode
);
19852 mask
= gen_lowpart (V4SFmode
, mask
);
19853 emit_insn (gen_avx_vec_concatv8sf (t1
, op0
, op1
));
19854 emit_insn (gen_avx_vec_concatv8sf (t2
, mask
, mask
));
19855 emit_insn (gen_avx2_permvarv8sf (t1
, t2
, t1
));
19856 emit_insn (gen_avx_vextractf128v8sf (target
, t1
, const0_rtx
));
19860 t1
= gen_reg_rtx (V32QImode
);
19861 t2
= gen_reg_rtx (V32QImode
);
19862 t3
= gen_reg_rtx (V32QImode
);
19863 vt2
= GEN_INT (128);
19864 for (i
= 0; i
< 32; i
++)
19866 vt
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, vec
));
19867 vt
= force_reg (V32QImode
, vt
);
19868 for (i
= 0; i
< 32; i
++)
19869 vec
[i
] = i
< 16 ? vt2
: const0_rtx
;
19870 vt2
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, vec
));
19871 vt2
= force_reg (V32QImode
, vt2
);
19872 /* From mask create two adjusted masks, which contain the same
19873 bits as mask in the low 7 bits of each vector element.
19874 The first mask will have the most significant bit clear
19875 if it requests element from the same 128-bit lane
19876 and MSB set if it requests element from the other 128-bit lane.
19877 The second mask will have the opposite values of the MSB,
19878 and additionally will have its 128-bit lanes swapped.
19879 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
19880 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
19881 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
19882 stands for other 12 bytes. */
19883 /* The bit whether element is from the same lane or the other
19884 lane is bit 4, so shift it up by 3 to the MSB position. */
19885 emit_insn (gen_ashlv4di3 (gen_lowpart (V4DImode
, t1
),
19886 gen_lowpart (V4DImode
, mask
),
19888 /* Clear MSB bits from the mask just in case it had them set. */
19889 emit_insn (gen_avx2_andnotv32qi3 (t2
, vt
, mask
));
19890 /* After this t1 will have MSB set for elements from other lane. */
19891 emit_insn (gen_xorv32qi3 (t1
, t1
, vt2
));
19892 /* Clear bits other than MSB. */
19893 emit_insn (gen_andv32qi3 (t1
, t1
, vt
));
19894 /* Or in the lower bits from mask into t3. */
19895 emit_insn (gen_iorv32qi3 (t3
, t1
, t2
));
19896 /* And invert MSB bits in t1, so MSB is set for elements from the same
19898 emit_insn (gen_xorv32qi3 (t1
, t1
, vt
));
19899 /* Swap 128-bit lanes in t3. */
19900 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t3
),
19901 gen_lowpart (V4DImode
, t3
),
19902 const2_rtx
, GEN_INT (3),
19903 const0_rtx
, const1_rtx
));
19904 /* And or in the lower bits from mask into t1. */
19905 emit_insn (gen_iorv32qi3 (t1
, t1
, t2
));
19906 if (one_operand_shuffle
)
19908 /* Each of these shuffles will put 0s in places where
19909 element from the other 128-bit lane is needed, otherwise
19910 will shuffle in the requested value. */
19911 emit_insn (gen_avx2_pshufbv32qi3 (t3
, op0
, t3
));
19912 emit_insn (gen_avx2_pshufbv32qi3 (t1
, op0
, t1
));
19913 /* For t3 the 128-bit lanes are swapped again. */
19914 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t3
),
19915 gen_lowpart (V4DImode
, t3
),
19916 const2_rtx
, GEN_INT (3),
19917 const0_rtx
, const1_rtx
));
19918 /* And oring both together leads to the result. */
19919 emit_insn (gen_iorv32qi3 (target
, t1
, t3
));
19923 t4
= gen_reg_rtx (V32QImode
);
19924 /* Similarly to the above one_operand_shuffle code,
19925 just for repeated twice for each operand. merge_two:
19926 code will merge the two results together. */
19927 emit_insn (gen_avx2_pshufbv32qi3 (t4
, op0
, t3
));
19928 emit_insn (gen_avx2_pshufbv32qi3 (t3
, op1
, t3
));
19929 emit_insn (gen_avx2_pshufbv32qi3 (t2
, op0
, t1
));
19930 emit_insn (gen_avx2_pshufbv32qi3 (t1
, op1
, t1
));
19931 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t4
),
19932 gen_lowpart (V4DImode
, t4
),
19933 const2_rtx
, GEN_INT (3),
19934 const0_rtx
, const1_rtx
));
19935 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t3
),
19936 gen_lowpart (V4DImode
, t3
),
19937 const2_rtx
, GEN_INT (3),
19938 const0_rtx
, const1_rtx
));
19939 emit_insn (gen_iorv32qi3 (t4
, t2
, t4
));
19940 emit_insn (gen_iorv32qi3 (t3
, t1
, t3
));
19946 gcc_assert (GET_MODE_SIZE (mode
) <= 16);
19953 /* The XOP VPPERM insn supports three inputs. By ignoring the
19954 one_operand_shuffle special case, we avoid creating another
19955 set of constant vectors in memory. */
19956 one_operand_shuffle
= false;
19958 /* mask = mask & {2*w-1, ...} */
19959 vt
= GEN_INT (2*w
- 1);
19963 /* mask = mask & {w-1, ...} */
19964 vt
= GEN_INT (w
- 1);
19967 for (i
= 0; i
< w
; i
++)
19969 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
19970 mask
= expand_simple_binop (maskmode
, AND
, mask
, vt
,
19971 NULL_RTX
, 0, OPTAB_DIRECT
);
19973 /* For non-QImode operations, convert the word permutation control
19974 into a byte permutation control. */
19975 if (mode
!= V16QImode
)
19977 mask
= expand_simple_binop (maskmode
, ASHIFT
, mask
,
19978 GEN_INT (exact_log2 (e
)),
19979 NULL_RTX
, 0, OPTAB_DIRECT
);
19981 /* Convert mask to vector of chars. */
19982 mask
= force_reg (V16QImode
, gen_lowpart (V16QImode
, mask
));
19984 /* Replicate each of the input bytes into byte positions:
19985 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
19986 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
19987 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
19988 for (i
= 0; i
< 16; ++i
)
19989 vec
[i
] = GEN_INT (i
/e
* e
);
19990 vt
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, vec
));
19991 vt
= force_const_mem (V16QImode
, vt
);
19993 emit_insn (gen_xop_pperm (mask
, mask
, mask
, vt
));
19995 emit_insn (gen_ssse3_pshufbv16qi3 (mask
, mask
, vt
));
19997 /* Convert it into the byte positions by doing
19998 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
19999 for (i
= 0; i
< 16; ++i
)
20000 vec
[i
] = GEN_INT (i
% e
);
20001 vt
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, vec
));
20002 vt
= force_const_mem (V16QImode
, vt
);
20003 emit_insn (gen_addv16qi3 (mask
, mask
, vt
));
20006 /* The actual shuffle operations all operate on V16QImode. */
20007 op0
= gen_lowpart (V16QImode
, op0
);
20008 op1
= gen_lowpart (V16QImode
, op1
);
20009 target
= gen_lowpart (V16QImode
, target
);
20013 emit_insn (gen_xop_pperm (target
, op0
, op1
, mask
));
20015 else if (one_operand_shuffle
)
20017 emit_insn (gen_ssse3_pshufbv16qi3 (target
, op0
, mask
));
20024 /* Shuffle the two input vectors independently. */
20025 t1
= gen_reg_rtx (V16QImode
);
20026 t2
= gen_reg_rtx (V16QImode
);
20027 emit_insn (gen_ssse3_pshufbv16qi3 (t1
, op0
, mask
));
20028 emit_insn (gen_ssse3_pshufbv16qi3 (t2
, op1
, mask
));
20031 /* Then merge them together. The key is whether any given control
20032 element contained a bit set that indicates the second word. */
20033 mask
= operands
[3];
20035 if (maskmode
== V2DImode
&& !TARGET_SSE4_1
)
20037 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
20038 more shuffle to convert the V2DI input mask into a V4SI
20039 input mask. At which point the masking that expand_int_vcond
20040 will work as desired. */
20041 rtx t3
= gen_reg_rtx (V4SImode
);
20042 emit_insn (gen_sse2_pshufd_1 (t3
, gen_lowpart (V4SImode
, mask
),
20043 const0_rtx
, const0_rtx
,
20044 const2_rtx
, const2_rtx
));
20046 maskmode
= V4SImode
;
20050 for (i
= 0; i
< w
; i
++)
20052 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
20053 vt
= force_reg (maskmode
, vt
);
20054 mask
= expand_simple_binop (maskmode
, AND
, mask
, vt
,
20055 NULL_RTX
, 0, OPTAB_DIRECT
);
20057 xops
[0] = gen_lowpart (mode
, operands
[0]);
20058 xops
[1] = gen_lowpart (mode
, t2
);
20059 xops
[2] = gen_lowpart (mode
, t1
);
20060 xops
[3] = gen_rtx_EQ (maskmode
, mask
, vt
);
20063 ok
= ix86_expand_int_vcond (xops
);
20068 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
20069 true if we should do zero extension, else sign extension. HIGH_P is
20070 true if we want the N/2 high elements, else the low elements. */
20073 ix86_expand_sse_unpack (rtx operands
[2], bool unsigned_p
, bool high_p
)
20075 enum machine_mode imode
= GET_MODE (operands
[1]);
20080 rtx (*unpack
)(rtx
, rtx
);
20081 rtx (*extract
)(rtx
, rtx
) = NULL
;
20082 enum machine_mode halfmode
= BLKmode
;
20088 unpack
= gen_avx2_zero_extendv16qiv16hi2
;
20090 unpack
= gen_avx2_sign_extendv16qiv16hi2
;
20091 halfmode
= V16QImode
;
20093 = high_p
? gen_vec_extract_hi_v32qi
: gen_vec_extract_lo_v32qi
;
20097 unpack
= gen_avx2_zero_extendv8hiv8si2
;
20099 unpack
= gen_avx2_sign_extendv8hiv8si2
;
20100 halfmode
= V8HImode
;
20102 = high_p
? gen_vec_extract_hi_v16hi
: gen_vec_extract_lo_v16hi
;
20106 unpack
= gen_avx2_zero_extendv4siv4di2
;
20108 unpack
= gen_avx2_sign_extendv4siv4di2
;
20109 halfmode
= V4SImode
;
20111 = high_p
? gen_vec_extract_hi_v8si
: gen_vec_extract_lo_v8si
;
20115 unpack
= gen_sse4_1_zero_extendv8qiv8hi2
;
20117 unpack
= gen_sse4_1_sign_extendv8qiv8hi2
;
20121 unpack
= gen_sse4_1_zero_extendv4hiv4si2
;
20123 unpack
= gen_sse4_1_sign_extendv4hiv4si2
;
20127 unpack
= gen_sse4_1_zero_extendv2siv2di2
;
20129 unpack
= gen_sse4_1_sign_extendv2siv2di2
;
20132 gcc_unreachable ();
20135 if (GET_MODE_SIZE (imode
) == 32)
20137 tmp
= gen_reg_rtx (halfmode
);
20138 emit_insn (extract (tmp
, operands
[1]));
20142 /* Shift higher 8 bytes to lower 8 bytes. */
20143 tmp
= gen_reg_rtx (imode
);
20144 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode
, tmp
),
20145 gen_lowpart (V1TImode
, operands
[1]),
20151 emit_insn (unpack (operands
[0], tmp
));
20155 rtx (*unpack
)(rtx
, rtx
, rtx
);
20161 unpack
= gen_vec_interleave_highv16qi
;
20163 unpack
= gen_vec_interleave_lowv16qi
;
20167 unpack
= gen_vec_interleave_highv8hi
;
20169 unpack
= gen_vec_interleave_lowv8hi
;
20173 unpack
= gen_vec_interleave_highv4si
;
20175 unpack
= gen_vec_interleave_lowv4si
;
20178 gcc_unreachable ();
20181 dest
= gen_lowpart (imode
, operands
[0]);
20184 tmp
= force_reg (imode
, CONST0_RTX (imode
));
20186 tmp
= ix86_expand_sse_cmp (gen_reg_rtx (imode
), GT
, CONST0_RTX (imode
),
20187 operands
[1], pc_rtx
, pc_rtx
);
20189 emit_insn (unpack (dest
, operands
[1], tmp
));
20193 /* Expand conditional increment or decrement using adb/sbb instructions.
20194 The default case using setcc followed by the conditional move can be
20195 done by generic code. */
20197 ix86_expand_int_addcc (rtx operands
[])
20199 enum rtx_code code
= GET_CODE (operands
[1]);
20201 rtx (*insn
)(rtx
, rtx
, rtx
, rtx
, rtx
);
20203 rtx val
= const0_rtx
;
20204 bool fpcmp
= false;
20205 enum machine_mode mode
;
20206 rtx op0
= XEXP (operands
[1], 0);
20207 rtx op1
= XEXP (operands
[1], 1);
20209 if (operands
[3] != const1_rtx
20210 && operands
[3] != constm1_rtx
)
20212 if (!ix86_expand_carry_flag_compare (code
, op0
, op1
, &compare_op
))
20214 code
= GET_CODE (compare_op
);
20216 flags
= XEXP (compare_op
, 0);
20218 if (GET_MODE (flags
) == CCFPmode
20219 || GET_MODE (flags
) == CCFPUmode
)
20222 code
= ix86_fp_compare_code_to_integer (code
);
20229 PUT_CODE (compare_op
,
20230 reverse_condition_maybe_unordered
20231 (GET_CODE (compare_op
)));
20233 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
20236 mode
= GET_MODE (operands
[0]);
20238 /* Construct either adc or sbb insn. */
20239 if ((code
== LTU
) == (operands
[3] == constm1_rtx
))
20244 insn
= gen_subqi3_carry
;
20247 insn
= gen_subhi3_carry
;
20250 insn
= gen_subsi3_carry
;
20253 insn
= gen_subdi3_carry
;
20256 gcc_unreachable ();
20264 insn
= gen_addqi3_carry
;
20267 insn
= gen_addhi3_carry
;
20270 insn
= gen_addsi3_carry
;
20273 insn
= gen_adddi3_carry
;
20276 gcc_unreachable ();
20279 emit_insn (insn (operands
[0], operands
[2], val
, flags
, compare_op
));
20285 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
20286 but works for floating pointer parameters and nonoffsetable memories.
20287 For pushes, it returns just stack offsets; the values will be saved
20288 in the right order. Maximally three parts are generated. */
20291 ix86_split_to_parts (rtx operand
, rtx
*parts
, enum machine_mode mode
)
20296 size
= mode
==XFmode
? 3 : GET_MODE_SIZE (mode
) / 4;
20298 size
= (GET_MODE_SIZE (mode
) + 4) / 8;
20300 gcc_assert (!REG_P (operand
) || !MMX_REGNO_P (REGNO (operand
)));
20301 gcc_assert (size
>= 2 && size
<= 4);
20303 /* Optimize constant pool reference to immediates. This is used by fp
20304 moves, that force all constants to memory to allow combining. */
20305 if (MEM_P (operand
) && MEM_READONLY_P (operand
))
20307 rtx tmp
= maybe_get_pool_constant (operand
);
20312 if (MEM_P (operand
) && !offsettable_memref_p (operand
))
20314 /* The only non-offsetable memories we handle are pushes. */
20315 int ok
= push_operand (operand
, VOIDmode
);
20319 operand
= copy_rtx (operand
);
20320 PUT_MODE (operand
, word_mode
);
20321 parts
[0] = parts
[1] = parts
[2] = parts
[3] = operand
;
20325 if (GET_CODE (operand
) == CONST_VECTOR
)
20327 enum machine_mode imode
= int_mode_for_mode (mode
);
20328 /* Caution: if we looked through a constant pool memory above,
20329 the operand may actually have a different mode now. That's
20330 ok, since we want to pun this all the way back to an integer. */
20331 operand
= simplify_subreg (imode
, operand
, GET_MODE (operand
), 0);
20332 gcc_assert (operand
!= NULL
);
20338 if (mode
== DImode
)
20339 split_double_mode (mode
, &operand
, 1, &parts
[0], &parts
[1]);
20344 if (REG_P (operand
))
20346 gcc_assert (reload_completed
);
20347 for (i
= 0; i
< size
; i
++)
20348 parts
[i
] = gen_rtx_REG (SImode
, REGNO (operand
) + i
);
20350 else if (offsettable_memref_p (operand
))
20352 operand
= adjust_address (operand
, SImode
, 0);
20353 parts
[0] = operand
;
20354 for (i
= 1; i
< size
; i
++)
20355 parts
[i
] = adjust_address (operand
, SImode
, 4 * i
);
20357 else if (GET_CODE (operand
) == CONST_DOUBLE
)
20362 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
20366 real_to_target (l
, &r
, mode
);
20367 parts
[3] = gen_int_mode (l
[3], SImode
);
20368 parts
[2] = gen_int_mode (l
[2], SImode
);
20371 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r
, l
);
20372 parts
[2] = gen_int_mode (l
[2], SImode
);
20375 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
20378 gcc_unreachable ();
20380 parts
[1] = gen_int_mode (l
[1], SImode
);
20381 parts
[0] = gen_int_mode (l
[0], SImode
);
20384 gcc_unreachable ();
20389 if (mode
== TImode
)
20390 split_double_mode (mode
, &operand
, 1, &parts
[0], &parts
[1]);
20391 if (mode
== XFmode
|| mode
== TFmode
)
20393 enum machine_mode upper_mode
= mode
==XFmode
? SImode
: DImode
;
20394 if (REG_P (operand
))
20396 gcc_assert (reload_completed
);
20397 parts
[0] = gen_rtx_REG (DImode
, REGNO (operand
) + 0);
20398 parts
[1] = gen_rtx_REG (upper_mode
, REGNO (operand
) + 1);
20400 else if (offsettable_memref_p (operand
))
20402 operand
= adjust_address (operand
, DImode
, 0);
20403 parts
[0] = operand
;
20404 parts
[1] = adjust_address (operand
, upper_mode
, 8);
20406 else if (GET_CODE (operand
) == CONST_DOUBLE
)
20411 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
20412 real_to_target (l
, &r
, mode
);
20414 /* Do not use shift by 32 to avoid warning on 32bit systems. */
20415 if (HOST_BITS_PER_WIDE_INT
>= 64)
20418 ((l
[0] & (((HOST_WIDE_INT
) 2 << 31) - 1))
20419 + ((((HOST_WIDE_INT
) l
[1]) << 31) << 1),
20422 parts
[0] = immed_double_const (l
[0], l
[1], DImode
);
20424 if (upper_mode
== SImode
)
20425 parts
[1] = gen_int_mode (l
[2], SImode
);
20426 else if (HOST_BITS_PER_WIDE_INT
>= 64)
20429 ((l
[2] & (((HOST_WIDE_INT
) 2 << 31) - 1))
20430 + ((((HOST_WIDE_INT
) l
[3]) << 31) << 1),
20433 parts
[1] = immed_double_const (l
[2], l
[3], DImode
);
20436 gcc_unreachable ();
20443 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
20444 Return false when normal moves are needed; true when all required
20445 insns have been emitted. Operands 2-4 contain the input values
20446 int the correct order; operands 5-7 contain the output values. */
20449 ix86_split_long_move (rtx operands
[])
20454 int collisions
= 0;
20455 enum machine_mode mode
= GET_MODE (operands
[0]);
20456 bool collisionparts
[4];
20458 /* The DFmode expanders may ask us to move double.
20459 For 64bit target this is single move. By hiding the fact
20460 here we simplify i386.md splitters. */
20461 if (TARGET_64BIT
&& GET_MODE_SIZE (GET_MODE (operands
[0])) == 8)
20463 /* Optimize constant pool reference to immediates. This is used by
20464 fp moves, that force all constants to memory to allow combining. */
20466 if (MEM_P (operands
[1])
20467 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
20468 && CONSTANT_POOL_ADDRESS_P (XEXP (operands
[1], 0)))
20469 operands
[1] = get_pool_constant (XEXP (operands
[1], 0));
20470 if (push_operand (operands
[0], VOIDmode
))
20472 operands
[0] = copy_rtx (operands
[0]);
20473 PUT_MODE (operands
[0], word_mode
);
20476 operands
[0] = gen_lowpart (DImode
, operands
[0]);
20477 operands
[1] = gen_lowpart (DImode
, operands
[1]);
20478 emit_move_insn (operands
[0], operands
[1]);
20482 /* The only non-offsettable memory we handle is push. */
20483 if (push_operand (operands
[0], VOIDmode
))
20486 gcc_assert (!MEM_P (operands
[0])
20487 || offsettable_memref_p (operands
[0]));
20489 nparts
= ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands
[0]));
20490 ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands
[0]));
20492 /* When emitting push, take care for source operands on the stack. */
20493 if (push
&& MEM_P (operands
[1])
20494 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
20496 rtx src_base
= XEXP (part
[1][nparts
- 1], 0);
20498 /* Compensate for the stack decrement by 4. */
20499 if (!TARGET_64BIT
&& nparts
== 3
20500 && mode
== XFmode
&& TARGET_128BIT_LONG_DOUBLE
)
20501 src_base
= plus_constant (src_base
, 4);
20503 /* src_base refers to the stack pointer and is
20504 automatically decreased by emitted push. */
20505 for (i
= 0; i
< nparts
; i
++)
20506 part
[1][i
] = change_address (part
[1][i
],
20507 GET_MODE (part
[1][i
]), src_base
);
20510 /* We need to do copy in the right order in case an address register
20511 of the source overlaps the destination. */
20512 if (REG_P (part
[0][0]) && MEM_P (part
[1][0]))
20516 for (i
= 0; i
< nparts
; i
++)
20519 = reg_overlap_mentioned_p (part
[0][i
], XEXP (part
[1][0], 0));
20520 if (collisionparts
[i
])
20524 /* Collision in the middle part can be handled by reordering. */
20525 if (collisions
== 1 && nparts
== 3 && collisionparts
[1])
20527 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
20528 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
20530 else if (collisions
== 1
20532 && (collisionparts
[1] || collisionparts
[2]))
20534 if (collisionparts
[1])
20536 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
20537 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
20541 tmp
= part
[0][2]; part
[0][2] = part
[0][3]; part
[0][3] = tmp
;
20542 tmp
= part
[1][2]; part
[1][2] = part
[1][3]; part
[1][3] = tmp
;
20546 /* If there are more collisions, we can't handle it by reordering.
20547 Do an lea to the last part and use only one colliding move. */
20548 else if (collisions
> 1)
20554 base
= part
[0][nparts
- 1];
20556 /* Handle the case when the last part isn't valid for lea.
20557 Happens in 64-bit mode storing the 12-byte XFmode. */
20558 if (GET_MODE (base
) != Pmode
)
20559 base
= gen_rtx_REG (Pmode
, REGNO (base
));
20561 emit_insn (gen_rtx_SET (VOIDmode
, base
, XEXP (part
[1][0], 0)));
20562 part
[1][0] = replace_equiv_address (part
[1][0], base
);
20563 for (i
= 1; i
< nparts
; i
++)
20565 tmp
= plus_constant (base
, UNITS_PER_WORD
* i
);
20566 part
[1][i
] = replace_equiv_address (part
[1][i
], tmp
);
20577 if (TARGET_128BIT_LONG_DOUBLE
&& mode
== XFmode
)
20578 emit_insn (gen_addsi3 (stack_pointer_rtx
,
20579 stack_pointer_rtx
, GEN_INT (-4)));
20580 emit_move_insn (part
[0][2], part
[1][2]);
20582 else if (nparts
== 4)
20584 emit_move_insn (part
[0][3], part
[1][3]);
20585 emit_move_insn (part
[0][2], part
[1][2]);
20590 /* In 64bit mode we don't have 32bit push available. In case this is
20591 register, it is OK - we will just use larger counterpart. We also
20592 retype memory - these comes from attempt to avoid REX prefix on
20593 moving of second half of TFmode value. */
20594 if (GET_MODE (part
[1][1]) == SImode
)
20596 switch (GET_CODE (part
[1][1]))
20599 part
[1][1] = adjust_address (part
[1][1], DImode
, 0);
20603 part
[1][1] = gen_rtx_REG (DImode
, REGNO (part
[1][1]));
20607 gcc_unreachable ();
20610 if (GET_MODE (part
[1][0]) == SImode
)
20611 part
[1][0] = part
[1][1];
20614 emit_move_insn (part
[0][1], part
[1][1]);
20615 emit_move_insn (part
[0][0], part
[1][0]);
20619 /* Choose correct order to not overwrite the source before it is copied. */
20620 if ((REG_P (part
[0][0])
20621 && REG_P (part
[1][1])
20622 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
20624 && REGNO (part
[0][0]) == REGNO (part
[1][2]))
20626 && REGNO (part
[0][0]) == REGNO (part
[1][3]))))
20628 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
20630 for (i
= 0, j
= nparts
- 1; i
< nparts
; i
++, j
--)
20632 operands
[2 + i
] = part
[0][j
];
20633 operands
[6 + i
] = part
[1][j
];
20638 for (i
= 0; i
< nparts
; i
++)
20640 operands
[2 + i
] = part
[0][i
];
20641 operands
[6 + i
] = part
[1][i
];
20645 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
20646 if (optimize_insn_for_size_p ())
20648 for (j
= 0; j
< nparts
- 1; j
++)
20649 if (CONST_INT_P (operands
[6 + j
])
20650 && operands
[6 + j
] != const0_rtx
20651 && REG_P (operands
[2 + j
]))
20652 for (i
= j
; i
< nparts
- 1; i
++)
20653 if (CONST_INT_P (operands
[7 + i
])
20654 && INTVAL (operands
[7 + i
]) == INTVAL (operands
[6 + j
]))
20655 operands
[7 + i
] = operands
[2 + j
];
20658 for (i
= 0; i
< nparts
; i
++)
20659 emit_move_insn (operands
[2 + i
], operands
[6 + i
]);
20664 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
20665 left shift by a constant, either using a single shift or
20666 a sequence of add instructions. */
20669 ix86_expand_ashl_const (rtx operand
, int count
, enum machine_mode mode
)
20671 rtx (*insn
)(rtx
, rtx
, rtx
);
20674 || (count
* ix86_cost
->add
<= ix86_cost
->shift_const
20675 && !optimize_insn_for_size_p ()))
20677 insn
= mode
== DImode
? gen_addsi3
: gen_adddi3
;
20678 while (count
-- > 0)
20679 emit_insn (insn (operand
, operand
, operand
));
20683 insn
= mode
== DImode
? gen_ashlsi3
: gen_ashldi3
;
20684 emit_insn (insn (operand
, operand
, GEN_INT (count
)));
20689 ix86_split_ashl (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
20691 rtx (*gen_ashl3
)(rtx
, rtx
, rtx
);
20692 rtx (*gen_shld
)(rtx
, rtx
, rtx
);
20693 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
20695 rtx low
[2], high
[2];
20698 if (CONST_INT_P (operands
[2]))
20700 split_double_mode (mode
, operands
, 2, low
, high
);
20701 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
20703 if (count
>= half_width
)
20705 emit_move_insn (high
[0], low
[1]);
20706 emit_move_insn (low
[0], const0_rtx
);
20708 if (count
> half_width
)
20709 ix86_expand_ashl_const (high
[0], count
- half_width
, mode
);
20713 gen_shld
= mode
== DImode
? gen_x86_shld
: gen_x86_64_shld
;
20715 if (!rtx_equal_p (operands
[0], operands
[1]))
20716 emit_move_insn (operands
[0], operands
[1]);
20718 emit_insn (gen_shld (high
[0], low
[0], GEN_INT (count
)));
20719 ix86_expand_ashl_const (low
[0], count
, mode
);
20724 split_double_mode (mode
, operands
, 1, low
, high
);
20726 gen_ashl3
= mode
== DImode
? gen_ashlsi3
: gen_ashldi3
;
20728 if (operands
[1] == const1_rtx
)
20730 /* Assuming we've chosen a QImode capable registers, then 1 << N
20731 can be done with two 32/64-bit shifts, no branches, no cmoves. */
20732 if (ANY_QI_REG_P (low
[0]) && ANY_QI_REG_P (high
[0]))
20734 rtx s
, d
, flags
= gen_rtx_REG (CCZmode
, FLAGS_REG
);
20736 ix86_expand_clear (low
[0]);
20737 ix86_expand_clear (high
[0]);
20738 emit_insn (gen_testqi_ccz_1 (operands
[2], GEN_INT (half_width
)));
20740 d
= gen_lowpart (QImode
, low
[0]);
20741 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
20742 s
= gen_rtx_EQ (QImode
, flags
, const0_rtx
);
20743 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
20745 d
= gen_lowpart (QImode
, high
[0]);
20746 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
20747 s
= gen_rtx_NE (QImode
, flags
, const0_rtx
);
20748 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
20751 /* Otherwise, we can get the same results by manually performing
20752 a bit extract operation on bit 5/6, and then performing the two
20753 shifts. The two methods of getting 0/1 into low/high are exactly
20754 the same size. Avoiding the shift in the bit extract case helps
20755 pentium4 a bit; no one else seems to care much either way. */
20758 enum machine_mode half_mode
;
20759 rtx (*gen_lshr3
)(rtx
, rtx
, rtx
);
20760 rtx (*gen_and3
)(rtx
, rtx
, rtx
);
20761 rtx (*gen_xor3
)(rtx
, rtx
, rtx
);
20762 HOST_WIDE_INT bits
;
20765 if (mode
== DImode
)
20767 half_mode
= SImode
;
20768 gen_lshr3
= gen_lshrsi3
;
20769 gen_and3
= gen_andsi3
;
20770 gen_xor3
= gen_xorsi3
;
20775 half_mode
= DImode
;
20776 gen_lshr3
= gen_lshrdi3
;
20777 gen_and3
= gen_anddi3
;
20778 gen_xor3
= gen_xordi3
;
20782 if (TARGET_PARTIAL_REG_STALL
&& !optimize_insn_for_size_p ())
20783 x
= gen_rtx_ZERO_EXTEND (half_mode
, operands
[2]);
20785 x
= gen_lowpart (half_mode
, operands
[2]);
20786 emit_insn (gen_rtx_SET (VOIDmode
, high
[0], x
));
20788 emit_insn (gen_lshr3 (high
[0], high
[0], GEN_INT (bits
)));
20789 emit_insn (gen_and3 (high
[0], high
[0], const1_rtx
));
20790 emit_move_insn (low
[0], high
[0]);
20791 emit_insn (gen_xor3 (low
[0], low
[0], const1_rtx
));
20794 emit_insn (gen_ashl3 (low
[0], low
[0], operands
[2]));
20795 emit_insn (gen_ashl3 (high
[0], high
[0], operands
[2]));
20799 if (operands
[1] == constm1_rtx
)
20801 /* For -1 << N, we can avoid the shld instruction, because we
20802 know that we're shifting 0...31/63 ones into a -1. */
20803 emit_move_insn (low
[0], constm1_rtx
);
20804 if (optimize_insn_for_size_p ())
20805 emit_move_insn (high
[0], low
[0]);
20807 emit_move_insn (high
[0], constm1_rtx
);
20811 gen_shld
= mode
== DImode
? gen_x86_shld
: gen_x86_64_shld
;
20813 if (!rtx_equal_p (operands
[0], operands
[1]))
20814 emit_move_insn (operands
[0], operands
[1]);
20816 split_double_mode (mode
, operands
, 1, low
, high
);
20817 emit_insn (gen_shld (high
[0], low
[0], operands
[2]));
20820 emit_insn (gen_ashl3 (low
[0], low
[0], operands
[2]));
20822 if (TARGET_CMOVE
&& scratch
)
20824 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
20825 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
20827 ix86_expand_clear (scratch
);
20828 emit_insn (gen_x86_shift_adj_1 (high
[0], low
[0], operands
[2], scratch
));
20832 rtx (*gen_x86_shift_adj_2
)(rtx
, rtx
, rtx
)
20833 = mode
== DImode
? gen_x86_shiftsi_adj_2
: gen_x86_shiftdi_adj_2
;
20835 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
20840 ix86_split_ashr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
20842 rtx (*gen_ashr3
)(rtx
, rtx
, rtx
)
20843 = mode
== DImode
? gen_ashrsi3
: gen_ashrdi3
;
20844 rtx (*gen_shrd
)(rtx
, rtx
, rtx
);
20845 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
20847 rtx low
[2], high
[2];
20850 if (CONST_INT_P (operands
[2]))
20852 split_double_mode (mode
, operands
, 2, low
, high
);
20853 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
20855 if (count
== GET_MODE_BITSIZE (mode
) - 1)
20857 emit_move_insn (high
[0], high
[1]);
20858 emit_insn (gen_ashr3 (high
[0], high
[0],
20859 GEN_INT (half_width
- 1)));
20860 emit_move_insn (low
[0], high
[0]);
20863 else if (count
>= half_width
)
20865 emit_move_insn (low
[0], high
[1]);
20866 emit_move_insn (high
[0], low
[0]);
20867 emit_insn (gen_ashr3 (high
[0], high
[0],
20868 GEN_INT (half_width
- 1)));
20870 if (count
> half_width
)
20871 emit_insn (gen_ashr3 (low
[0], low
[0],
20872 GEN_INT (count
- half_width
)));
20876 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
20878 if (!rtx_equal_p (operands
[0], operands
[1]))
20879 emit_move_insn (operands
[0], operands
[1]);
20881 emit_insn (gen_shrd (low
[0], high
[0], GEN_INT (count
)));
20882 emit_insn (gen_ashr3 (high
[0], high
[0], GEN_INT (count
)));
20887 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
20889 if (!rtx_equal_p (operands
[0], operands
[1]))
20890 emit_move_insn (operands
[0], operands
[1]);
20892 split_double_mode (mode
, operands
, 1, low
, high
);
20894 emit_insn (gen_shrd (low
[0], high
[0], operands
[2]));
20895 emit_insn (gen_ashr3 (high
[0], high
[0], operands
[2]));
20897 if (TARGET_CMOVE
&& scratch
)
20899 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
20900 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
20902 emit_move_insn (scratch
, high
[0]);
20903 emit_insn (gen_ashr3 (scratch
, scratch
,
20904 GEN_INT (half_width
- 1)));
20905 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
20910 rtx (*gen_x86_shift_adj_3
)(rtx
, rtx
, rtx
)
20911 = mode
== DImode
? gen_x86_shiftsi_adj_3
: gen_x86_shiftdi_adj_3
;
20913 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
20919 ix86_split_lshr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
20921 rtx (*gen_lshr3
)(rtx
, rtx
, rtx
)
20922 = mode
== DImode
? gen_lshrsi3
: gen_lshrdi3
;
20923 rtx (*gen_shrd
)(rtx
, rtx
, rtx
);
20924 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
20926 rtx low
[2], high
[2];
20929 if (CONST_INT_P (operands
[2]))
20931 split_double_mode (mode
, operands
, 2, low
, high
);
20932 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
20934 if (count
>= half_width
)
20936 emit_move_insn (low
[0], high
[1]);
20937 ix86_expand_clear (high
[0]);
20939 if (count
> half_width
)
20940 emit_insn (gen_lshr3 (low
[0], low
[0],
20941 GEN_INT (count
- half_width
)));
20945 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
20947 if (!rtx_equal_p (operands
[0], operands
[1]))
20948 emit_move_insn (operands
[0], operands
[1]);
20950 emit_insn (gen_shrd (low
[0], high
[0], GEN_INT (count
)));
20951 emit_insn (gen_lshr3 (high
[0], high
[0], GEN_INT (count
)));
20956 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
20958 if (!rtx_equal_p (operands
[0], operands
[1]))
20959 emit_move_insn (operands
[0], operands
[1]);
20961 split_double_mode (mode
, operands
, 1, low
, high
);
20963 emit_insn (gen_shrd (low
[0], high
[0], operands
[2]));
20964 emit_insn (gen_lshr3 (high
[0], high
[0], operands
[2]));
20966 if (TARGET_CMOVE
&& scratch
)
20968 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
20969 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
20971 ix86_expand_clear (scratch
);
20972 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
20977 rtx (*gen_x86_shift_adj_2
)(rtx
, rtx
, rtx
)
20978 = mode
== DImode
? gen_x86_shiftsi_adj_2
: gen_x86_shiftdi_adj_2
;
20980 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
20985 /* Predict just emitted jump instruction to be taken with probability PROB. */
20987 predict_jump (int prob
)
20989 rtx insn
= get_last_insn ();
20990 gcc_assert (JUMP_P (insn
));
20991 add_reg_note (insn
, REG_BR_PROB
, GEN_INT (prob
));
20994 /* Helper function for the string operations below. Dest VARIABLE whether
20995 it is aligned to VALUE bytes. If true, jump to the label. */
20997 ix86_expand_aligntest (rtx variable
, int value
, bool epilogue
)
20999 rtx label
= gen_label_rtx ();
21000 rtx tmpcount
= gen_reg_rtx (GET_MODE (variable
));
21001 if (GET_MODE (variable
) == DImode
)
21002 emit_insn (gen_anddi3 (tmpcount
, variable
, GEN_INT (value
)));
21004 emit_insn (gen_andsi3 (tmpcount
, variable
, GEN_INT (value
)));
21005 emit_cmp_and_jump_insns (tmpcount
, const0_rtx
, EQ
, 0, GET_MODE (variable
),
21008 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
21010 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
21014 /* Adjust COUNTER by the VALUE. */
21016 ix86_adjust_counter (rtx countreg
, HOST_WIDE_INT value
)
21018 rtx (*gen_add
)(rtx
, rtx
, rtx
)
21019 = GET_MODE (countreg
) == DImode
? gen_adddi3
: gen_addsi3
;
21021 emit_insn (gen_add (countreg
, countreg
, GEN_INT (-value
)));
21024 /* Zero extend possibly SImode EXP to Pmode register. */
21026 ix86_zero_extend_to_Pmode (rtx exp
)
21029 if (GET_MODE (exp
) == VOIDmode
)
21030 return force_reg (Pmode
, exp
);
21031 if (GET_MODE (exp
) == Pmode
)
21032 return copy_to_mode_reg (Pmode
, exp
);
21033 r
= gen_reg_rtx (Pmode
);
21034 emit_insn (gen_zero_extendsidi2 (r
, exp
));
21038 /* Divide COUNTREG by SCALE. */
21040 scale_counter (rtx countreg
, int scale
)
21046 if (CONST_INT_P (countreg
))
21047 return GEN_INT (INTVAL (countreg
) / scale
);
21048 gcc_assert (REG_P (countreg
));
21050 sc
= expand_simple_binop (GET_MODE (countreg
), LSHIFTRT
, countreg
,
21051 GEN_INT (exact_log2 (scale
)),
21052 NULL
, 1, OPTAB_DIRECT
);
21056 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
21057 DImode for constant loop counts. */
21059 static enum machine_mode
21060 counter_mode (rtx count_exp
)
21062 if (GET_MODE (count_exp
) != VOIDmode
)
21063 return GET_MODE (count_exp
);
21064 if (!CONST_INT_P (count_exp
))
21066 if (TARGET_64BIT
&& (INTVAL (count_exp
) & ~0xffffffff))
21071 /* When SRCPTR is non-NULL, output simple loop to move memory
21072 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
21073 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
21074 equivalent loop to set memory by VALUE (supposed to be in MODE).
21076 The size is rounded down to whole number of chunk size moved at once.
21077 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
21081 expand_set_or_movmem_via_loop (rtx destmem
, rtx srcmem
,
21082 rtx destptr
, rtx srcptr
, rtx value
,
21083 rtx count
, enum machine_mode mode
, int unroll
,
21086 rtx out_label
, top_label
, iter
, tmp
;
21087 enum machine_mode iter_mode
= counter_mode (count
);
21088 rtx piece_size
= GEN_INT (GET_MODE_SIZE (mode
) * unroll
);
21089 rtx piece_size_mask
= GEN_INT (~((GET_MODE_SIZE (mode
) * unroll
) - 1));
21095 top_label
= gen_label_rtx ();
21096 out_label
= gen_label_rtx ();
21097 iter
= gen_reg_rtx (iter_mode
);
21099 size
= expand_simple_binop (iter_mode
, AND
, count
, piece_size_mask
,
21100 NULL
, 1, OPTAB_DIRECT
);
21101 /* Those two should combine. */
21102 if (piece_size
== const1_rtx
)
21104 emit_cmp_and_jump_insns (size
, const0_rtx
, EQ
, NULL_RTX
, iter_mode
,
21106 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
21108 emit_move_insn (iter
, const0_rtx
);
21110 emit_label (top_label
);
21112 tmp
= convert_modes (Pmode
, iter_mode
, iter
, true);
21113 x_addr
= gen_rtx_PLUS (Pmode
, destptr
, tmp
);
21114 destmem
= change_address (destmem
, mode
, x_addr
);
21118 y_addr
= gen_rtx_PLUS (Pmode
, srcptr
, copy_rtx (tmp
));
21119 srcmem
= change_address (srcmem
, mode
, y_addr
);
21121 /* When unrolling for chips that reorder memory reads and writes,
21122 we can save registers by using single temporary.
21123 Also using 4 temporaries is overkill in 32bit mode. */
21124 if (!TARGET_64BIT
&& 0)
21126 for (i
= 0; i
< unroll
; i
++)
21131 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
21133 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
21135 emit_move_insn (destmem
, srcmem
);
21141 gcc_assert (unroll
<= 4);
21142 for (i
= 0; i
< unroll
; i
++)
21144 tmpreg
[i
] = gen_reg_rtx (mode
);
21148 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
21150 emit_move_insn (tmpreg
[i
], srcmem
);
21152 for (i
= 0; i
< unroll
; i
++)
21157 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
21159 emit_move_insn (destmem
, tmpreg
[i
]);
21164 for (i
= 0; i
< unroll
; i
++)
21168 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
21169 emit_move_insn (destmem
, value
);
21172 tmp
= expand_simple_binop (iter_mode
, PLUS
, iter
, piece_size
, iter
,
21173 true, OPTAB_LIB_WIDEN
);
21175 emit_move_insn (iter
, tmp
);
21177 emit_cmp_and_jump_insns (iter
, size
, LT
, NULL_RTX
, iter_mode
,
21179 if (expected_size
!= -1)
21181 expected_size
/= GET_MODE_SIZE (mode
) * unroll
;
21182 if (expected_size
== 0)
21184 else if (expected_size
> REG_BR_PROB_BASE
)
21185 predict_jump (REG_BR_PROB_BASE
- 1);
21187 predict_jump (REG_BR_PROB_BASE
- (REG_BR_PROB_BASE
+ expected_size
/ 2) / expected_size
);
21190 predict_jump (REG_BR_PROB_BASE
* 80 / 100);
21191 iter
= ix86_zero_extend_to_Pmode (iter
);
21192 tmp
= expand_simple_binop (Pmode
, PLUS
, destptr
, iter
, destptr
,
21193 true, OPTAB_LIB_WIDEN
);
21194 if (tmp
!= destptr
)
21195 emit_move_insn (destptr
, tmp
);
21198 tmp
= expand_simple_binop (Pmode
, PLUS
, srcptr
, iter
, srcptr
,
21199 true, OPTAB_LIB_WIDEN
);
21201 emit_move_insn (srcptr
, tmp
);
21203 emit_label (out_label
);
21206 /* Output "rep; mov" instruction.
21207 Arguments have same meaning as for previous function */
21209 expand_movmem_via_rep_mov (rtx destmem
, rtx srcmem
,
21210 rtx destptr
, rtx srcptr
,
21212 enum machine_mode mode
)
21217 HOST_WIDE_INT rounded_count
;
21219 /* If the size is known, it is shorter to use rep movs. */
21220 if (mode
== QImode
&& CONST_INT_P (count
)
21221 && !(INTVAL (count
) & 3))
21224 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
21225 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
21226 if (srcptr
!= XEXP (srcmem
, 0) || GET_MODE (srcmem
) != BLKmode
)
21227 srcmem
= adjust_automodify_address_nv (srcmem
, BLKmode
, srcptr
, 0);
21228 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
21229 if (mode
!= QImode
)
21231 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
21232 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
21233 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
21234 srcexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
21235 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
21236 srcexp
= gen_rtx_PLUS (Pmode
, srcexp
, srcptr
);
21240 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
21241 srcexp
= gen_rtx_PLUS (Pmode
, srcptr
, countreg
);
21243 if (CONST_INT_P (count
))
21245 rounded_count
= (INTVAL (count
)
21246 & ~((HOST_WIDE_INT
) GET_MODE_SIZE (mode
) - 1));
21247 destmem
= shallow_copy_rtx (destmem
);
21248 srcmem
= shallow_copy_rtx (srcmem
);
21249 set_mem_size (destmem
, rounded_count
);
21250 set_mem_size (srcmem
, rounded_count
);
21254 if (MEM_SIZE_KNOWN_P (destmem
))
21255 clear_mem_size (destmem
);
21256 if (MEM_SIZE_KNOWN_P (srcmem
))
21257 clear_mem_size (srcmem
);
21259 emit_insn (gen_rep_mov (destptr
, destmem
, srcptr
, srcmem
, countreg
,
21263 /* Output "rep; stos" instruction.
21264 Arguments have same meaning as for previous function */
21266 expand_setmem_via_rep_stos (rtx destmem
, rtx destptr
, rtx value
,
21267 rtx count
, enum machine_mode mode
,
21272 HOST_WIDE_INT rounded_count
;
21274 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
21275 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
21276 value
= force_reg (mode
, gen_lowpart (mode
, value
));
21277 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
21278 if (mode
!= QImode
)
21280 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
21281 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
21282 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
21285 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
21286 if (orig_value
== const0_rtx
&& CONST_INT_P (count
))
21288 rounded_count
= (INTVAL (count
)
21289 & ~((HOST_WIDE_INT
) GET_MODE_SIZE (mode
) - 1));
21290 destmem
= shallow_copy_rtx (destmem
);
21291 set_mem_size (destmem
, rounded_count
);
21293 else if (MEM_SIZE_KNOWN_P (destmem
))
21294 clear_mem_size (destmem
);
21295 emit_insn (gen_rep_stos (destptr
, countreg
, destmem
, value
, destexp
));
21299 emit_strmov (rtx destmem
, rtx srcmem
,
21300 rtx destptr
, rtx srcptr
, enum machine_mode mode
, int offset
)
21302 rtx src
= adjust_automodify_address_nv (srcmem
, mode
, srcptr
, offset
);
21303 rtx dest
= adjust_automodify_address_nv (destmem
, mode
, destptr
, offset
);
21304 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
21307 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
21309 expand_movmem_epilogue (rtx destmem
, rtx srcmem
,
21310 rtx destptr
, rtx srcptr
, rtx count
, int max_size
)
21313 if (CONST_INT_P (count
))
21315 HOST_WIDE_INT countval
= INTVAL (count
);
21318 if ((countval
& 0x10) && max_size
> 16)
21322 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
21323 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
+ 8);
21326 gcc_unreachable ();
21329 if ((countval
& 0x08) && max_size
> 8)
21332 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
21335 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
);
21336 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
+ 4);
21340 if ((countval
& 0x04) && max_size
> 4)
21342 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
);
21345 if ((countval
& 0x02) && max_size
> 2)
21347 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, HImode
, offset
);
21350 if ((countval
& 0x01) && max_size
> 1)
21352 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, QImode
, offset
);
21359 count
= expand_simple_binop (GET_MODE (count
), AND
, count
, GEN_INT (max_size
- 1),
21360 count
, 1, OPTAB_DIRECT
);
21361 expand_set_or_movmem_via_loop (destmem
, srcmem
, destptr
, srcptr
, NULL
,
21362 count
, QImode
, 1, 4);
21366 /* When there are stringops, we can cheaply increase dest and src pointers.
21367 Otherwise we save code size by maintaining offset (zero is readily
21368 available from preceding rep operation) and using x86 addressing modes.
21370 if (TARGET_SINGLE_STRINGOP
)
21374 rtx label
= ix86_expand_aligntest (count
, 4, true);
21375 src
= change_address (srcmem
, SImode
, srcptr
);
21376 dest
= change_address (destmem
, SImode
, destptr
);
21377 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
21378 emit_label (label
);
21379 LABEL_NUSES (label
) = 1;
21383 rtx label
= ix86_expand_aligntest (count
, 2, true);
21384 src
= change_address (srcmem
, HImode
, srcptr
);
21385 dest
= change_address (destmem
, HImode
, destptr
);
21386 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
21387 emit_label (label
);
21388 LABEL_NUSES (label
) = 1;
21392 rtx label
= ix86_expand_aligntest (count
, 1, true);
21393 src
= change_address (srcmem
, QImode
, srcptr
);
21394 dest
= change_address (destmem
, QImode
, destptr
);
21395 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
21396 emit_label (label
);
21397 LABEL_NUSES (label
) = 1;
21402 rtx offset
= force_reg (Pmode
, const0_rtx
);
21407 rtx label
= ix86_expand_aligntest (count
, 4, true);
21408 src
= change_address (srcmem
, SImode
, srcptr
);
21409 dest
= change_address (destmem
, SImode
, destptr
);
21410 emit_move_insn (dest
, src
);
21411 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (4), NULL
,
21412 true, OPTAB_LIB_WIDEN
);
21414 emit_move_insn (offset
, tmp
);
21415 emit_label (label
);
21416 LABEL_NUSES (label
) = 1;
21420 rtx label
= ix86_expand_aligntest (count
, 2, true);
21421 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
21422 src
= change_address (srcmem
, HImode
, tmp
);
21423 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
21424 dest
= change_address (destmem
, HImode
, tmp
);
21425 emit_move_insn (dest
, src
);
21426 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (2), tmp
,
21427 true, OPTAB_LIB_WIDEN
);
21429 emit_move_insn (offset
, tmp
);
21430 emit_label (label
);
21431 LABEL_NUSES (label
) = 1;
21435 rtx label
= ix86_expand_aligntest (count
, 1, true);
21436 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
21437 src
= change_address (srcmem
, QImode
, tmp
);
21438 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
21439 dest
= change_address (destmem
, QImode
, tmp
);
21440 emit_move_insn (dest
, src
);
21441 emit_label (label
);
21442 LABEL_NUSES (label
) = 1;
21447 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
21449 expand_setmem_epilogue_via_loop (rtx destmem
, rtx destptr
, rtx value
,
21450 rtx count
, int max_size
)
21453 expand_simple_binop (counter_mode (count
), AND
, count
,
21454 GEN_INT (max_size
- 1), count
, 1, OPTAB_DIRECT
);
21455 expand_set_or_movmem_via_loop (destmem
, NULL
, destptr
, NULL
,
21456 gen_lowpart (QImode
, value
), count
, QImode
,
21460 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
21462 expand_setmem_epilogue (rtx destmem
, rtx destptr
, rtx value
, rtx count
, int max_size
)
21466 if (CONST_INT_P (count
))
21468 HOST_WIDE_INT countval
= INTVAL (count
);
21471 if ((countval
& 0x10) && max_size
> 16)
21475 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
21476 emit_insn (gen_strset (destptr
, dest
, value
));
21477 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
+ 8);
21478 emit_insn (gen_strset (destptr
, dest
, value
));
21481 gcc_unreachable ();
21484 if ((countval
& 0x08) && max_size
> 8)
21488 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
21489 emit_insn (gen_strset (destptr
, dest
, value
));
21493 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
21494 emit_insn (gen_strset (destptr
, dest
, value
));
21495 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
+ 4);
21496 emit_insn (gen_strset (destptr
, dest
, value
));
21500 if ((countval
& 0x04) && max_size
> 4)
21502 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
21503 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
21506 if ((countval
& 0x02) && max_size
> 2)
21508 dest
= adjust_automodify_address_nv (destmem
, HImode
, destptr
, offset
);
21509 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
21512 if ((countval
& 0x01) && max_size
> 1)
21514 dest
= adjust_automodify_address_nv (destmem
, QImode
, destptr
, offset
);
21515 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
21522 expand_setmem_epilogue_via_loop (destmem
, destptr
, value
, count
, max_size
);
21527 rtx label
= ix86_expand_aligntest (count
, 16, true);
21530 dest
= change_address (destmem
, DImode
, destptr
);
21531 emit_insn (gen_strset (destptr
, dest
, value
));
21532 emit_insn (gen_strset (destptr
, dest
, value
));
21536 dest
= change_address (destmem
, SImode
, destptr
);
21537 emit_insn (gen_strset (destptr
, dest
, value
));
21538 emit_insn (gen_strset (destptr
, dest
, value
));
21539 emit_insn (gen_strset (destptr
, dest
, value
));
21540 emit_insn (gen_strset (destptr
, dest
, value
));
21542 emit_label (label
);
21543 LABEL_NUSES (label
) = 1;
21547 rtx label
= ix86_expand_aligntest (count
, 8, true);
21550 dest
= change_address (destmem
, DImode
, destptr
);
21551 emit_insn (gen_strset (destptr
, dest
, value
));
21555 dest
= change_address (destmem
, SImode
, destptr
);
21556 emit_insn (gen_strset (destptr
, dest
, value
));
21557 emit_insn (gen_strset (destptr
, dest
, value
));
21559 emit_label (label
);
21560 LABEL_NUSES (label
) = 1;
21564 rtx label
= ix86_expand_aligntest (count
, 4, true);
21565 dest
= change_address (destmem
, SImode
, destptr
);
21566 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
21567 emit_label (label
);
21568 LABEL_NUSES (label
) = 1;
21572 rtx label
= ix86_expand_aligntest (count
, 2, true);
21573 dest
= change_address (destmem
, HImode
, destptr
);
21574 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
21575 emit_label (label
);
21576 LABEL_NUSES (label
) = 1;
21580 rtx label
= ix86_expand_aligntest (count
, 1, true);
21581 dest
= change_address (destmem
, QImode
, destptr
);
21582 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
21583 emit_label (label
);
21584 LABEL_NUSES (label
) = 1;
21588 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
21589 DESIRED_ALIGNMENT. */
21591 expand_movmem_prologue (rtx destmem
, rtx srcmem
,
21592 rtx destptr
, rtx srcptr
, rtx count
,
21593 int align
, int desired_alignment
)
21595 if (align
<= 1 && desired_alignment
> 1)
21597 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
21598 srcmem
= change_address (srcmem
, QImode
, srcptr
);
21599 destmem
= change_address (destmem
, QImode
, destptr
);
21600 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
21601 ix86_adjust_counter (count
, 1);
21602 emit_label (label
);
21603 LABEL_NUSES (label
) = 1;
21605 if (align
<= 2 && desired_alignment
> 2)
21607 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
21608 srcmem
= change_address (srcmem
, HImode
, srcptr
);
21609 destmem
= change_address (destmem
, HImode
, destptr
);
21610 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
21611 ix86_adjust_counter (count
, 2);
21612 emit_label (label
);
21613 LABEL_NUSES (label
) = 1;
21615 if (align
<= 4 && desired_alignment
> 4)
21617 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
21618 srcmem
= change_address (srcmem
, SImode
, srcptr
);
21619 destmem
= change_address (destmem
, SImode
, destptr
);
21620 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
21621 ix86_adjust_counter (count
, 4);
21622 emit_label (label
);
21623 LABEL_NUSES (label
) = 1;
21625 gcc_assert (desired_alignment
<= 8);
21628 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
21629 ALIGN_BYTES is how many bytes need to be copied. */
21631 expand_constant_movmem_prologue (rtx dst
, rtx
*srcp
, rtx destreg
, rtx srcreg
,
21632 int desired_align
, int align_bytes
)
21635 rtx orig_dst
= dst
;
21636 rtx orig_src
= src
;
21638 int src_align_bytes
= get_mem_align_offset (src
, desired_align
* BITS_PER_UNIT
);
21639 if (src_align_bytes
>= 0)
21640 src_align_bytes
= desired_align
- src_align_bytes
;
21641 if (align_bytes
& 1)
21643 dst
= adjust_automodify_address_nv (dst
, QImode
, destreg
, 0);
21644 src
= adjust_automodify_address_nv (src
, QImode
, srcreg
, 0);
21646 emit_insn (gen_strmov (destreg
, dst
, srcreg
, src
));
21648 if (align_bytes
& 2)
21650 dst
= adjust_automodify_address_nv (dst
, HImode
, destreg
, off
);
21651 src
= adjust_automodify_address_nv (src
, HImode
, srcreg
, off
);
21652 if (MEM_ALIGN (dst
) < 2 * BITS_PER_UNIT
)
21653 set_mem_align (dst
, 2 * BITS_PER_UNIT
);
21654 if (src_align_bytes
>= 0
21655 && (src_align_bytes
& 1) == (align_bytes
& 1)
21656 && MEM_ALIGN (src
) < 2 * BITS_PER_UNIT
)
21657 set_mem_align (src
, 2 * BITS_PER_UNIT
);
21659 emit_insn (gen_strmov (destreg
, dst
, srcreg
, src
));
21661 if (align_bytes
& 4)
21663 dst
= adjust_automodify_address_nv (dst
, SImode
, destreg
, off
);
21664 src
= adjust_automodify_address_nv (src
, SImode
, srcreg
, off
);
21665 if (MEM_ALIGN (dst
) < 4 * BITS_PER_UNIT
)
21666 set_mem_align (dst
, 4 * BITS_PER_UNIT
);
21667 if (src_align_bytes
>= 0)
21669 unsigned int src_align
= 0;
21670 if ((src_align_bytes
& 3) == (align_bytes
& 3))
21672 else if ((src_align_bytes
& 1) == (align_bytes
& 1))
21674 if (MEM_ALIGN (src
) < src_align
* BITS_PER_UNIT
)
21675 set_mem_align (src
, src_align
* BITS_PER_UNIT
);
21678 emit_insn (gen_strmov (destreg
, dst
, srcreg
, src
));
21680 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
, off
);
21681 src
= adjust_automodify_address_nv (src
, BLKmode
, srcreg
, off
);
21682 if (MEM_ALIGN (dst
) < (unsigned int) desired_align
* BITS_PER_UNIT
)
21683 set_mem_align (dst
, desired_align
* BITS_PER_UNIT
);
21684 if (src_align_bytes
>= 0)
21686 unsigned int src_align
= 0;
21687 if ((src_align_bytes
& 7) == (align_bytes
& 7))
21689 else if ((src_align_bytes
& 3) == (align_bytes
& 3))
21691 else if ((src_align_bytes
& 1) == (align_bytes
& 1))
21693 if (src_align
> (unsigned int) desired_align
)
21694 src_align
= desired_align
;
21695 if (MEM_ALIGN (src
) < src_align
* BITS_PER_UNIT
)
21696 set_mem_align (src
, src_align
* BITS_PER_UNIT
);
21698 if (MEM_SIZE_KNOWN_P (orig_dst
))
21699 set_mem_size (dst
, MEM_SIZE (orig_dst
) - align_bytes
);
21700 if (MEM_SIZE_KNOWN_P (orig_src
))
21701 set_mem_size (src
, MEM_SIZE (orig_src
) - align_bytes
);
21706 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
21707 DESIRED_ALIGNMENT. */
21709 expand_setmem_prologue (rtx destmem
, rtx destptr
, rtx value
, rtx count
,
21710 int align
, int desired_alignment
)
21712 if (align
<= 1 && desired_alignment
> 1)
21714 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
21715 destmem
= change_address (destmem
, QImode
, destptr
);
21716 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (QImode
, value
)));
21717 ix86_adjust_counter (count
, 1);
21718 emit_label (label
);
21719 LABEL_NUSES (label
) = 1;
21721 if (align
<= 2 && desired_alignment
> 2)
21723 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
21724 destmem
= change_address (destmem
, HImode
, destptr
);
21725 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (HImode
, value
)));
21726 ix86_adjust_counter (count
, 2);
21727 emit_label (label
);
21728 LABEL_NUSES (label
) = 1;
21730 if (align
<= 4 && desired_alignment
> 4)
21732 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
21733 destmem
= change_address (destmem
, SImode
, destptr
);
21734 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (SImode
, value
)));
21735 ix86_adjust_counter (count
, 4);
21736 emit_label (label
);
21737 LABEL_NUSES (label
) = 1;
21739 gcc_assert (desired_alignment
<= 8);
21742 /* Set enough from DST to align DST known to by aligned by ALIGN to
21743 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
21745 expand_constant_setmem_prologue (rtx dst
, rtx destreg
, rtx value
,
21746 int desired_align
, int align_bytes
)
21749 rtx orig_dst
= dst
;
21750 if (align_bytes
& 1)
21752 dst
= adjust_automodify_address_nv (dst
, QImode
, destreg
, 0);
21754 emit_insn (gen_strset (destreg
, dst
,
21755 gen_lowpart (QImode
, value
)));
21757 if (align_bytes
& 2)
21759 dst
= adjust_automodify_address_nv (dst
, HImode
, destreg
, off
);
21760 if (MEM_ALIGN (dst
) < 2 * BITS_PER_UNIT
)
21761 set_mem_align (dst
, 2 * BITS_PER_UNIT
);
21763 emit_insn (gen_strset (destreg
, dst
,
21764 gen_lowpart (HImode
, value
)));
21766 if (align_bytes
& 4)
21768 dst
= adjust_automodify_address_nv (dst
, SImode
, destreg
, off
);
21769 if (MEM_ALIGN (dst
) < 4 * BITS_PER_UNIT
)
21770 set_mem_align (dst
, 4 * BITS_PER_UNIT
);
21772 emit_insn (gen_strset (destreg
, dst
,
21773 gen_lowpart (SImode
, value
)));
21775 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
, off
);
21776 if (MEM_ALIGN (dst
) < (unsigned int) desired_align
* BITS_PER_UNIT
)
21777 set_mem_align (dst
, desired_align
* BITS_PER_UNIT
);
21778 if (MEM_SIZE_KNOWN_P (orig_dst
))
21779 set_mem_size (dst
, MEM_SIZE (orig_dst
) - align_bytes
);
21783 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
21784 static enum stringop_alg
21785 decide_alg (HOST_WIDE_INT count
, HOST_WIDE_INT expected_size
, bool memset
,
21786 int *dynamic_check
)
21788 const struct stringop_algs
* algs
;
21789 bool optimize_for_speed
;
21790 /* Algorithms using the rep prefix want at least edi and ecx;
21791 additionally, memset wants eax and memcpy wants esi. Don't
21792 consider such algorithms if the user has appropriated those
21793 registers for their own purposes. */
21794 bool rep_prefix_usable
= !(fixed_regs
[CX_REG
] || fixed_regs
[DI_REG
]
21796 ? fixed_regs
[AX_REG
] : fixed_regs
[SI_REG
]));
21798 #define ALG_USABLE_P(alg) (rep_prefix_usable \
21799 || (alg != rep_prefix_1_byte \
21800 && alg != rep_prefix_4_byte \
21801 && alg != rep_prefix_8_byte))
21802 const struct processor_costs
*cost
;
21804 /* Even if the string operation call is cold, we still might spend a lot
21805 of time processing large blocks. */
21806 if (optimize_function_for_size_p (cfun
)
21807 || (optimize_insn_for_size_p ()
21808 && expected_size
!= -1 && expected_size
< 256))
21809 optimize_for_speed
= false;
21811 optimize_for_speed
= true;
21813 cost
= optimize_for_speed
? ix86_cost
: &ix86_size_cost
;
21815 *dynamic_check
= -1;
21817 algs
= &cost
->memset
[TARGET_64BIT
!= 0];
21819 algs
= &cost
->memcpy
[TARGET_64BIT
!= 0];
21820 if (ix86_stringop_alg
!= no_stringop
&& ALG_USABLE_P (ix86_stringop_alg
))
21821 return ix86_stringop_alg
;
21822 /* rep; movq or rep; movl is the smallest variant. */
21823 else if (!optimize_for_speed
)
21825 if (!count
|| (count
& 3))
21826 return rep_prefix_usable
? rep_prefix_1_byte
: loop_1_byte
;
21828 return rep_prefix_usable
? rep_prefix_4_byte
: loop
;
21830 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
21832 else if (expected_size
!= -1 && expected_size
< 4)
21833 return loop_1_byte
;
21834 else if (expected_size
!= -1)
21837 enum stringop_alg alg
= libcall
;
21838 for (i
= 0; i
< MAX_STRINGOP_ALGS
; i
++)
21840 /* We get here if the algorithms that were not libcall-based
21841 were rep-prefix based and we are unable to use rep prefixes
21842 based on global register usage. Break out of the loop and
21843 use the heuristic below. */
21844 if (algs
->size
[i
].max
== 0)
21846 if (algs
->size
[i
].max
>= expected_size
|| algs
->size
[i
].max
== -1)
21848 enum stringop_alg candidate
= algs
->size
[i
].alg
;
21850 if (candidate
!= libcall
&& ALG_USABLE_P (candidate
))
21852 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
21853 last non-libcall inline algorithm. */
21854 if (TARGET_INLINE_ALL_STRINGOPS
)
21856 /* When the current size is best to be copied by a libcall,
21857 but we are still forced to inline, run the heuristic below
21858 that will pick code for medium sized blocks. */
21859 if (alg
!= libcall
)
21863 else if (ALG_USABLE_P (candidate
))
21867 gcc_assert (TARGET_INLINE_ALL_STRINGOPS
|| !rep_prefix_usable
);
21869 /* When asked to inline the call anyway, try to pick meaningful choice.
21870 We look for maximal size of block that is faster to copy by hand and
21871 take blocks of at most of that size guessing that average size will
21872 be roughly half of the block.
21874 If this turns out to be bad, we might simply specify the preferred
21875 choice in ix86_costs. */
21876 if ((TARGET_INLINE_ALL_STRINGOPS
|| TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
21877 && (algs
->unknown_size
== libcall
|| !ALG_USABLE_P (algs
->unknown_size
)))
21880 enum stringop_alg alg
;
21882 bool any_alg_usable_p
= true;
21884 for (i
= 0; i
< MAX_STRINGOP_ALGS
; i
++)
21886 enum stringop_alg candidate
= algs
->size
[i
].alg
;
21887 any_alg_usable_p
= any_alg_usable_p
&& ALG_USABLE_P (candidate
);
21889 if (candidate
!= libcall
&& candidate
21890 && ALG_USABLE_P (candidate
))
21891 max
= algs
->size
[i
].max
;
21893 /* If there aren't any usable algorithms, then recursing on
21894 smaller sizes isn't going to find anything. Just return the
21895 simple byte-at-a-time copy loop. */
21896 if (!any_alg_usable_p
)
21898 /* Pick something reasonable. */
21899 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
21900 *dynamic_check
= 128;
21901 return loop_1_byte
;
21905 alg
= decide_alg (count
, max
/ 2, memset
, dynamic_check
);
21906 gcc_assert (*dynamic_check
== -1);
21907 gcc_assert (alg
!= libcall
);
21908 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
21909 *dynamic_check
= max
;
21912 return ALG_USABLE_P (algs
->unknown_size
) ? algs
->unknown_size
: libcall
;
21913 #undef ALG_USABLE_P
21916 /* Decide on alignment. We know that the operand is already aligned to ALIGN
21917 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
21919 decide_alignment (int align
,
21920 enum stringop_alg alg
,
21923 int desired_align
= 0;
21927 gcc_unreachable ();
21929 case unrolled_loop
:
21930 desired_align
= GET_MODE_SIZE (Pmode
);
21932 case rep_prefix_8_byte
:
21935 case rep_prefix_4_byte
:
21936 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
21937 copying whole cacheline at once. */
21938 if (TARGET_PENTIUMPRO
)
21943 case rep_prefix_1_byte
:
21944 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
21945 copying whole cacheline at once. */
21946 if (TARGET_PENTIUMPRO
)
21960 if (desired_align
< align
)
21961 desired_align
= align
;
21962 if (expected_size
!= -1 && expected_size
< 4)
21963 desired_align
= align
;
21964 return desired_align
;
21967 /* Return the smallest power of 2 greater than VAL. */
21969 smallest_pow2_greater_than (int val
)
21977 /* Expand string move (memcpy) operation. Use i386 string operations
21978 when profitable. expand_setmem contains similar code. The code
21979 depends upon architecture, block size and alignment, but always has
21980 the same overall structure:
21982 1) Prologue guard: Conditional that jumps up to epilogues for small
21983 blocks that can be handled by epilogue alone. This is faster
21984 but also needed for correctness, since prologue assume the block
21985 is larger than the desired alignment.
21987 Optional dynamic check for size and libcall for large
21988 blocks is emitted here too, with -minline-stringops-dynamically.
21990 2) Prologue: copy first few bytes in order to get destination
21991 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
21992 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
21993 copied. We emit either a jump tree on power of two sized
21994 blocks, or a byte loop.
21996 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
21997 with specified algorithm.
21999 4) Epilogue: code copying tail of the block that is too small to be
22000 handled by main body (or up to size guarded by prologue guard). */
22003 ix86_expand_movmem (rtx dst
, rtx src
, rtx count_exp
, rtx align_exp
,
22004 rtx expected_align_exp
, rtx expected_size_exp
)
22010 rtx jump_around_label
= NULL
;
22011 HOST_WIDE_INT align
= 1;
22012 unsigned HOST_WIDE_INT count
= 0;
22013 HOST_WIDE_INT expected_size
= -1;
22014 int size_needed
= 0, epilogue_size_needed
;
22015 int desired_align
= 0, align_bytes
= 0;
22016 enum stringop_alg alg
;
22018 bool need_zero_guard
= false;
22020 if (CONST_INT_P (align_exp
))
22021 align
= INTVAL (align_exp
);
22022 /* i386 can do misaligned access on reasonably increased cost. */
22023 if (CONST_INT_P (expected_align_exp
)
22024 && INTVAL (expected_align_exp
) > align
)
22025 align
= INTVAL (expected_align_exp
);
22026 /* ALIGN is the minimum of destination and source alignment, but we care here
22027 just about destination alignment. */
22028 else if (MEM_ALIGN (dst
) > (unsigned HOST_WIDE_INT
) align
* BITS_PER_UNIT
)
22029 align
= MEM_ALIGN (dst
) / BITS_PER_UNIT
;
22031 if (CONST_INT_P (count_exp
))
22032 count
= expected_size
= INTVAL (count_exp
);
22033 if (CONST_INT_P (expected_size_exp
) && count
== 0)
22034 expected_size
= INTVAL (expected_size_exp
);
22036 /* Make sure we don't need to care about overflow later on. */
22037 if (count
> ((unsigned HOST_WIDE_INT
) 1 << 30))
22040 /* Step 0: Decide on preferred algorithm, desired alignment and
22041 size of chunks to be copied by main loop. */
22043 alg
= decide_alg (count
, expected_size
, false, &dynamic_check
);
22044 desired_align
= decide_alignment (align
, alg
, expected_size
);
22046 if (!TARGET_ALIGN_STRINGOPS
)
22047 align
= desired_align
;
22049 if (alg
== libcall
)
22051 gcc_assert (alg
!= no_stringop
);
22053 count_exp
= copy_to_mode_reg (GET_MODE (count_exp
), count_exp
);
22054 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
22055 srcreg
= copy_to_mode_reg (Pmode
, XEXP (src
, 0));
22060 gcc_unreachable ();
22062 need_zero_guard
= true;
22063 size_needed
= GET_MODE_SIZE (Pmode
);
22065 case unrolled_loop
:
22066 need_zero_guard
= true;
22067 size_needed
= GET_MODE_SIZE (Pmode
) * (TARGET_64BIT
? 4 : 2);
22069 case rep_prefix_8_byte
:
22072 case rep_prefix_4_byte
:
22075 case rep_prefix_1_byte
:
22079 need_zero_guard
= true;
22084 epilogue_size_needed
= size_needed
;
22086 /* Step 1: Prologue guard. */
22088 /* Alignment code needs count to be in register. */
22089 if (CONST_INT_P (count_exp
) && desired_align
> align
)
22091 if (INTVAL (count_exp
) > desired_align
22092 && INTVAL (count_exp
) > size_needed
)
22095 = get_mem_align_offset (dst
, desired_align
* BITS_PER_UNIT
);
22096 if (align_bytes
<= 0)
22099 align_bytes
= desired_align
- align_bytes
;
22101 if (align_bytes
== 0)
22102 count_exp
= force_reg (counter_mode (count_exp
), count_exp
);
22104 gcc_assert (desired_align
>= 1 && align
>= 1);
22106 /* Ensure that alignment prologue won't copy past end of block. */
22107 if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
22109 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
22110 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
22111 Make sure it is power of 2. */
22112 epilogue_size_needed
= smallest_pow2_greater_than (epilogue_size_needed
);
22116 if (count
< (unsigned HOST_WIDE_INT
)epilogue_size_needed
)
22118 /* If main algorithm works on QImode, no epilogue is needed.
22119 For small sizes just don't align anything. */
22120 if (size_needed
== 1)
22121 desired_align
= align
;
22128 label
= gen_label_rtx ();
22129 emit_cmp_and_jump_insns (count_exp
,
22130 GEN_INT (epilogue_size_needed
),
22131 LTU
, 0, counter_mode (count_exp
), 1, label
);
22132 if (expected_size
== -1 || expected_size
< epilogue_size_needed
)
22133 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
22135 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
22139 /* Emit code to decide on runtime whether library call or inline should be
22141 if (dynamic_check
!= -1)
22143 if (CONST_INT_P (count_exp
))
22145 if (UINTVAL (count_exp
) >= (unsigned HOST_WIDE_INT
)dynamic_check
)
22147 emit_block_move_via_libcall (dst
, src
, count_exp
, false);
22148 count_exp
= const0_rtx
;
22154 rtx hot_label
= gen_label_rtx ();
22155 jump_around_label
= gen_label_rtx ();
22156 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
22157 LEU
, 0, GET_MODE (count_exp
), 1, hot_label
);
22158 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
22159 emit_block_move_via_libcall (dst
, src
, count_exp
, false);
22160 emit_jump (jump_around_label
);
22161 emit_label (hot_label
);
22165 /* Step 2: Alignment prologue. */
22167 if (desired_align
> align
)
22169 if (align_bytes
== 0)
22171 /* Except for the first move in epilogue, we no longer know
22172 constant offset in aliasing info. It don't seems to worth
22173 the pain to maintain it for the first move, so throw away
22175 src
= change_address (src
, BLKmode
, srcreg
);
22176 dst
= change_address (dst
, BLKmode
, destreg
);
22177 expand_movmem_prologue (dst
, src
, destreg
, srcreg
, count_exp
, align
,
22182 /* If we know how many bytes need to be stored before dst is
22183 sufficiently aligned, maintain aliasing info accurately. */
22184 dst
= expand_constant_movmem_prologue (dst
, &src
, destreg
, srcreg
,
22185 desired_align
, align_bytes
);
22186 count_exp
= plus_constant (count_exp
, -align_bytes
);
22187 count
-= align_bytes
;
22189 if (need_zero_guard
22190 && (count
< (unsigned HOST_WIDE_INT
) size_needed
22191 || (align_bytes
== 0
22192 && count
< ((unsigned HOST_WIDE_INT
) size_needed
22193 + desired_align
- align
))))
22195 /* It is possible that we copied enough so the main loop will not
22197 gcc_assert (size_needed
> 1);
22198 if (label
== NULL_RTX
)
22199 label
= gen_label_rtx ();
22200 emit_cmp_and_jump_insns (count_exp
,
22201 GEN_INT (size_needed
),
22202 LTU
, 0, counter_mode (count_exp
), 1, label
);
22203 if (expected_size
== -1
22204 || expected_size
< (desired_align
- align
) / 2 + size_needed
)
22205 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
22207 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
22210 if (label
&& size_needed
== 1)
22212 emit_label (label
);
22213 LABEL_NUSES (label
) = 1;
22215 epilogue_size_needed
= 1;
22217 else if (label
== NULL_RTX
)
22218 epilogue_size_needed
= size_needed
;
22220 /* Step 3: Main loop. */
22226 gcc_unreachable ();
22228 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
22229 count_exp
, QImode
, 1, expected_size
);
22232 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
22233 count_exp
, Pmode
, 1, expected_size
);
22235 case unrolled_loop
:
22236 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
22237 registers for 4 temporaries anyway. */
22238 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
22239 count_exp
, Pmode
, TARGET_64BIT
? 4 : 2,
22242 case rep_prefix_8_byte
:
22243 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
22246 case rep_prefix_4_byte
:
22247 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
22250 case rep_prefix_1_byte
:
22251 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
22255 /* Adjust properly the offset of src and dest memory for aliasing. */
22256 if (CONST_INT_P (count_exp
))
22258 src
= adjust_automodify_address_nv (src
, BLKmode
, srcreg
,
22259 (count
/ size_needed
) * size_needed
);
22260 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
22261 (count
/ size_needed
) * size_needed
);
22265 src
= change_address (src
, BLKmode
, srcreg
);
22266 dst
= change_address (dst
, BLKmode
, destreg
);
22269 /* Step 4: Epilogue to copy the remaining bytes. */
22273 /* When the main loop is done, COUNT_EXP might hold original count,
22274 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
22275 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
22276 bytes. Compensate if needed. */
22278 if (size_needed
< epilogue_size_needed
)
22281 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
22282 GEN_INT (size_needed
- 1), count_exp
, 1,
22284 if (tmp
!= count_exp
)
22285 emit_move_insn (count_exp
, tmp
);
22287 emit_label (label
);
22288 LABEL_NUSES (label
) = 1;
22291 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
22292 expand_movmem_epilogue (dst
, src
, destreg
, srcreg
, count_exp
,
22293 epilogue_size_needed
);
22294 if (jump_around_label
)
22295 emit_label (jump_around_label
);
22299 /* Helper function for memcpy. For QImode value 0xXY produce
22300 0xXYXYXYXY of wide specified by MODE. This is essentially
22301 a * 0x10101010, but we can do slightly better than
22302 synth_mult by unwinding the sequence by hand on CPUs with
22305 promote_duplicated_reg (enum machine_mode mode
, rtx val
)
22307 enum machine_mode valmode
= GET_MODE (val
);
22309 int nops
= mode
== DImode
? 3 : 2;
22311 gcc_assert (mode
== SImode
|| mode
== DImode
);
22312 if (val
== const0_rtx
)
22313 return copy_to_mode_reg (mode
, const0_rtx
);
22314 if (CONST_INT_P (val
))
22316 HOST_WIDE_INT v
= INTVAL (val
) & 255;
22320 if (mode
== DImode
)
22321 v
|= (v
<< 16) << 16;
22322 return copy_to_mode_reg (mode
, gen_int_mode (v
, mode
));
22325 if (valmode
== VOIDmode
)
22327 if (valmode
!= QImode
)
22328 val
= gen_lowpart (QImode
, val
);
22329 if (mode
== QImode
)
22331 if (!TARGET_PARTIAL_REG_STALL
)
22333 if (ix86_cost
->mult_init
[mode
== DImode
? 3 : 2]
22334 + ix86_cost
->mult_bit
* (mode
== DImode
? 8 : 4)
22335 <= (ix86_cost
->shift_const
+ ix86_cost
->add
) * nops
22336 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL
== 0)))
22338 rtx reg
= convert_modes (mode
, QImode
, val
, true);
22339 tmp
= promote_duplicated_reg (mode
, const1_rtx
);
22340 return expand_simple_binop (mode
, MULT
, reg
, tmp
, NULL
, 1,
22345 rtx reg
= convert_modes (mode
, QImode
, val
, true);
22347 if (!TARGET_PARTIAL_REG_STALL
)
22348 if (mode
== SImode
)
22349 emit_insn (gen_movsi_insv_1 (reg
, reg
));
22351 emit_insn (gen_movdi_insv_1 (reg
, reg
));
22354 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (8),
22355 NULL
, 1, OPTAB_DIRECT
);
22357 expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
22359 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (16),
22360 NULL
, 1, OPTAB_DIRECT
);
22361 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
22362 if (mode
== SImode
)
22364 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (32),
22365 NULL
, 1, OPTAB_DIRECT
);
22366 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
22371 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
22372 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
22373 alignment from ALIGN to DESIRED_ALIGN. */
22375 promote_duplicated_reg_to_size (rtx val
, int size_needed
, int desired_align
, int align
)
22380 && (size_needed
> 4 || (desired_align
> align
&& desired_align
> 4)))
22381 promoted_val
= promote_duplicated_reg (DImode
, val
);
22382 else if (size_needed
> 2 || (desired_align
> align
&& desired_align
> 2))
22383 promoted_val
= promote_duplicated_reg (SImode
, val
);
22384 else if (size_needed
> 1 || (desired_align
> align
&& desired_align
> 1))
22385 promoted_val
= promote_duplicated_reg (HImode
, val
);
22387 promoted_val
= val
;
22389 return promoted_val
;
22392 /* Expand string clear operation (bzero). Use i386 string operations when
22393 profitable. See expand_movmem comment for explanation of individual
22394 steps performed. */
22396 ix86_expand_setmem (rtx dst
, rtx count_exp
, rtx val_exp
, rtx align_exp
,
22397 rtx expected_align_exp
, rtx expected_size_exp
)
22402 rtx jump_around_label
= NULL
;
22403 HOST_WIDE_INT align
= 1;
22404 unsigned HOST_WIDE_INT count
= 0;
22405 HOST_WIDE_INT expected_size
= -1;
22406 int size_needed
= 0, epilogue_size_needed
;
22407 int desired_align
= 0, align_bytes
= 0;
22408 enum stringop_alg alg
;
22409 rtx promoted_val
= NULL
;
22410 bool force_loopy_epilogue
= false;
22412 bool need_zero_guard
= false;
22414 if (CONST_INT_P (align_exp
))
22415 align
= INTVAL (align_exp
);
22416 /* i386 can do misaligned access on reasonably increased cost. */
22417 if (CONST_INT_P (expected_align_exp
)
22418 && INTVAL (expected_align_exp
) > align
)
22419 align
= INTVAL (expected_align_exp
);
22420 if (CONST_INT_P (count_exp
))
22421 count
= expected_size
= INTVAL (count_exp
);
22422 if (CONST_INT_P (expected_size_exp
) && count
== 0)
22423 expected_size
= INTVAL (expected_size_exp
);
22425 /* Make sure we don't need to care about overflow later on. */
22426 if (count
> ((unsigned HOST_WIDE_INT
) 1 << 30))
22429 /* Step 0: Decide on preferred algorithm, desired alignment and
22430 size of chunks to be copied by main loop. */
22432 alg
= decide_alg (count
, expected_size
, true, &dynamic_check
);
22433 desired_align
= decide_alignment (align
, alg
, expected_size
);
22435 if (!TARGET_ALIGN_STRINGOPS
)
22436 align
= desired_align
;
22438 if (alg
== libcall
)
22440 gcc_assert (alg
!= no_stringop
);
22442 count_exp
= copy_to_mode_reg (counter_mode (count_exp
), count_exp
);
22443 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
22448 gcc_unreachable ();
22450 need_zero_guard
= true;
22451 size_needed
= GET_MODE_SIZE (Pmode
);
22453 case unrolled_loop
:
22454 need_zero_guard
= true;
22455 size_needed
= GET_MODE_SIZE (Pmode
) * 4;
22457 case rep_prefix_8_byte
:
22460 case rep_prefix_4_byte
:
22463 case rep_prefix_1_byte
:
22467 need_zero_guard
= true;
22471 epilogue_size_needed
= size_needed
;
22473 /* Step 1: Prologue guard. */
22475 /* Alignment code needs count to be in register. */
22476 if (CONST_INT_P (count_exp
) && desired_align
> align
)
22478 if (INTVAL (count_exp
) > desired_align
22479 && INTVAL (count_exp
) > size_needed
)
22482 = get_mem_align_offset (dst
, desired_align
* BITS_PER_UNIT
);
22483 if (align_bytes
<= 0)
22486 align_bytes
= desired_align
- align_bytes
;
22488 if (align_bytes
== 0)
22490 enum machine_mode mode
= SImode
;
22491 if (TARGET_64BIT
&& (count
& ~0xffffffff))
22493 count_exp
= force_reg (mode
, count_exp
);
22496 /* Do the cheap promotion to allow better CSE across the
22497 main loop and epilogue (ie one load of the big constant in the
22498 front of all code. */
22499 if (CONST_INT_P (val_exp
))
22500 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
22501 desired_align
, align
);
22502 /* Ensure that alignment prologue won't copy past end of block. */
22503 if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
22505 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
22506 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
22507 Make sure it is power of 2. */
22508 epilogue_size_needed
= smallest_pow2_greater_than (epilogue_size_needed
);
22510 /* To improve performance of small blocks, we jump around the VAL
22511 promoting mode. This mean that if the promoted VAL is not constant,
22512 we might not use it in the epilogue and have to use byte
22514 if (epilogue_size_needed
> 2 && !promoted_val
)
22515 force_loopy_epilogue
= true;
22518 if (count
< (unsigned HOST_WIDE_INT
)epilogue_size_needed
)
22520 /* If main algorithm works on QImode, no epilogue is needed.
22521 For small sizes just don't align anything. */
22522 if (size_needed
== 1)
22523 desired_align
= align
;
22530 label
= gen_label_rtx ();
22531 emit_cmp_and_jump_insns (count_exp
,
22532 GEN_INT (epilogue_size_needed
),
22533 LTU
, 0, counter_mode (count_exp
), 1, label
);
22534 if (expected_size
== -1 || expected_size
<= epilogue_size_needed
)
22535 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
22537 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
22540 if (dynamic_check
!= -1)
22542 rtx hot_label
= gen_label_rtx ();
22543 jump_around_label
= gen_label_rtx ();
22544 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
22545 LEU
, 0, counter_mode (count_exp
), 1, hot_label
);
22546 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
22547 set_storage_via_libcall (dst
, count_exp
, val_exp
, false);
22548 emit_jump (jump_around_label
);
22549 emit_label (hot_label
);
22552 /* Step 2: Alignment prologue. */
22554 /* Do the expensive promotion once we branched off the small blocks. */
22556 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
22557 desired_align
, align
);
22558 gcc_assert (desired_align
>= 1 && align
>= 1);
22560 if (desired_align
> align
)
22562 if (align_bytes
== 0)
22564 /* Except for the first move in epilogue, we no longer know
22565 constant offset in aliasing info. It don't seems to worth
22566 the pain to maintain it for the first move, so throw away
22568 dst
= change_address (dst
, BLKmode
, destreg
);
22569 expand_setmem_prologue (dst
, destreg
, promoted_val
, count_exp
, align
,
22574 /* If we know how many bytes need to be stored before dst is
22575 sufficiently aligned, maintain aliasing info accurately. */
22576 dst
= expand_constant_setmem_prologue (dst
, destreg
, promoted_val
,
22577 desired_align
, align_bytes
);
22578 count_exp
= plus_constant (count_exp
, -align_bytes
);
22579 count
-= align_bytes
;
22581 if (need_zero_guard
22582 && (count
< (unsigned HOST_WIDE_INT
) size_needed
22583 || (align_bytes
== 0
22584 && count
< ((unsigned HOST_WIDE_INT
) size_needed
22585 + desired_align
- align
))))
22587 /* It is possible that we copied enough so the main loop will not
22589 gcc_assert (size_needed
> 1);
22590 if (label
== NULL_RTX
)
22591 label
= gen_label_rtx ();
22592 emit_cmp_and_jump_insns (count_exp
,
22593 GEN_INT (size_needed
),
22594 LTU
, 0, counter_mode (count_exp
), 1, label
);
22595 if (expected_size
== -1
22596 || expected_size
< (desired_align
- align
) / 2 + size_needed
)
22597 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
22599 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
22602 if (label
&& size_needed
== 1)
22604 emit_label (label
);
22605 LABEL_NUSES (label
) = 1;
22607 promoted_val
= val_exp
;
22608 epilogue_size_needed
= 1;
22610 else if (label
== NULL_RTX
)
22611 epilogue_size_needed
= size_needed
;
22613 /* Step 3: Main loop. */
22619 gcc_unreachable ();
22621 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
22622 count_exp
, QImode
, 1, expected_size
);
22625 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
22626 count_exp
, Pmode
, 1, expected_size
);
22628 case unrolled_loop
:
22629 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
22630 count_exp
, Pmode
, 4, expected_size
);
22632 case rep_prefix_8_byte
:
22633 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
22636 case rep_prefix_4_byte
:
22637 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
22640 case rep_prefix_1_byte
:
22641 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
22645 /* Adjust properly the offset of src and dest memory for aliasing. */
22646 if (CONST_INT_P (count_exp
))
22647 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
22648 (count
/ size_needed
) * size_needed
);
22650 dst
= change_address (dst
, BLKmode
, destreg
);
22652 /* Step 4: Epilogue to copy the remaining bytes. */
22656 /* When the main loop is done, COUNT_EXP might hold original count,
22657 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
22658 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
22659 bytes. Compensate if needed. */
22661 if (size_needed
< epilogue_size_needed
)
22664 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
22665 GEN_INT (size_needed
- 1), count_exp
, 1,
22667 if (tmp
!= count_exp
)
22668 emit_move_insn (count_exp
, tmp
);
22670 emit_label (label
);
22671 LABEL_NUSES (label
) = 1;
22674 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
22676 if (force_loopy_epilogue
)
22677 expand_setmem_epilogue_via_loop (dst
, destreg
, val_exp
, count_exp
,
22678 epilogue_size_needed
);
22680 expand_setmem_epilogue (dst
, destreg
, promoted_val
, count_exp
,
22681 epilogue_size_needed
);
22683 if (jump_around_label
)
22684 emit_label (jump_around_label
);
22688 /* Expand the appropriate insns for doing strlen if not just doing
22691 out = result, initialized with the start address
22692 align_rtx = alignment of the address.
22693 scratch = scratch register, initialized with the startaddress when
22694 not aligned, otherwise undefined
22696 This is just the body. It needs the initializations mentioned above and
22697 some address computing at the end. These things are done in i386.md. */
22700 ix86_expand_strlensi_unroll_1 (rtx out
, rtx src
, rtx align_rtx
)
22704 rtx align_2_label
= NULL_RTX
;
22705 rtx align_3_label
= NULL_RTX
;
22706 rtx align_4_label
= gen_label_rtx ();
22707 rtx end_0_label
= gen_label_rtx ();
22709 rtx tmpreg
= gen_reg_rtx (SImode
);
22710 rtx scratch
= gen_reg_rtx (SImode
);
22714 if (CONST_INT_P (align_rtx
))
22715 align
= INTVAL (align_rtx
);
22717 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
22719 /* Is there a known alignment and is it less than 4? */
22722 rtx scratch1
= gen_reg_rtx (Pmode
);
22723 emit_move_insn (scratch1
, out
);
22724 /* Is there a known alignment and is it not 2? */
22727 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
22728 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
22730 /* Leave just the 3 lower bits. */
22731 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (3),
22732 NULL_RTX
, 0, OPTAB_WIDEN
);
22734 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
22735 Pmode
, 1, align_4_label
);
22736 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, EQ
, NULL
,
22737 Pmode
, 1, align_2_label
);
22738 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, GTU
, NULL
,
22739 Pmode
, 1, align_3_label
);
22743 /* Since the alignment is 2, we have to check 2 or 0 bytes;
22744 check if is aligned to 4 - byte. */
22746 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, const2_rtx
,
22747 NULL_RTX
, 0, OPTAB_WIDEN
);
22749 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
22750 Pmode
, 1, align_4_label
);
22753 mem
= change_address (src
, QImode
, out
);
22755 /* Now compare the bytes. */
22757 /* Compare the first n unaligned byte on a byte per byte basis. */
22758 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
22759 QImode
, 1, end_0_label
);
22761 /* Increment the address. */
22762 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
22764 /* Not needed with an alignment of 2 */
22767 emit_label (align_2_label
);
22769 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
22772 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
22774 emit_label (align_3_label
);
22777 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
22780 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
22783 /* Generate loop to check 4 bytes at a time. It is not a good idea to
22784 align this loop. It gives only huge programs, but does not help to
22786 emit_label (align_4_label
);
22788 mem
= change_address (src
, SImode
, out
);
22789 emit_move_insn (scratch
, mem
);
22790 emit_insn (ix86_gen_add3 (out
, out
, GEN_INT (4)));
22792 /* This formula yields a nonzero result iff one of the bytes is zero.
22793 This saves three branches inside loop and many cycles. */
22795 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
22796 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
22797 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
22798 emit_insn (gen_andsi3 (tmpreg
, tmpreg
,
22799 gen_int_mode (0x80808080, SImode
)));
22800 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0, SImode
, 1,
22805 rtx reg
= gen_reg_rtx (SImode
);
22806 rtx reg2
= gen_reg_rtx (Pmode
);
22807 emit_move_insn (reg
, tmpreg
);
22808 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
22810 /* If zero is not in the first two bytes, move two bytes forward. */
22811 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
22812 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
22813 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
22814 emit_insn (gen_rtx_SET (VOIDmode
, tmpreg
,
22815 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
22818 /* Emit lea manually to avoid clobbering of flags. */
22819 emit_insn (gen_rtx_SET (SImode
, reg2
,
22820 gen_rtx_PLUS (Pmode
, out
, const2_rtx
)));
22822 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
22823 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
22824 emit_insn (gen_rtx_SET (VOIDmode
, out
,
22825 gen_rtx_IF_THEN_ELSE (Pmode
, tmp
,
22831 rtx end_2_label
= gen_label_rtx ();
22832 /* Is zero in the first two bytes? */
22834 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
22835 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
22836 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
22837 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
22838 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
22840 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
22841 JUMP_LABEL (tmp
) = end_2_label
;
22843 /* Not in the first two. Move two bytes forward. */
22844 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
22845 emit_insn (ix86_gen_add3 (out
, out
, const2_rtx
));
22847 emit_label (end_2_label
);
22851 /* Avoid branch in fixing the byte. */
22852 tmpreg
= gen_lowpart (QImode
, tmpreg
);
22853 emit_insn (gen_addqi3_cc (tmpreg
, tmpreg
, tmpreg
));
22854 tmp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
22855 cmp
= gen_rtx_LTU (VOIDmode
, tmp
, const0_rtx
);
22856 emit_insn (ix86_gen_sub3_carry (out
, out
, GEN_INT (3), tmp
, cmp
));
22858 emit_label (end_0_label
);
22861 /* Expand strlen. */
22864 ix86_expand_strlen (rtx out
, rtx src
, rtx eoschar
, rtx align
)
22866 rtx addr
, scratch1
, scratch2
, scratch3
, scratch4
;
22868 /* The generic case of strlen expander is long. Avoid it's
22869 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
22871 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
22872 && !TARGET_INLINE_ALL_STRINGOPS
22873 && !optimize_insn_for_size_p ()
22874 && (!CONST_INT_P (align
) || INTVAL (align
) < 4))
22877 addr
= force_reg (Pmode
, XEXP (src
, 0));
22878 scratch1
= gen_reg_rtx (Pmode
);
22880 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
22881 && !optimize_insn_for_size_p ())
22883 /* Well it seems that some optimizer does not combine a call like
22884 foo(strlen(bar), strlen(bar));
22885 when the move and the subtraction is done here. It does calculate
22886 the length just once when these instructions are done inside of
22887 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
22888 often used and I use one fewer register for the lifetime of
22889 output_strlen_unroll() this is better. */
22891 emit_move_insn (out
, addr
);
22893 ix86_expand_strlensi_unroll_1 (out
, src
, align
);
22895 /* strlensi_unroll_1 returns the address of the zero at the end of
22896 the string, like memchr(), so compute the length by subtracting
22897 the start address. */
22898 emit_insn (ix86_gen_sub3 (out
, out
, addr
));
22904 /* Can't use this if the user has appropriated eax, ecx, or edi. */
22905 if (fixed_regs
[AX_REG
] || fixed_regs
[CX_REG
] || fixed_regs
[DI_REG
])
22908 scratch2
= gen_reg_rtx (Pmode
);
22909 scratch3
= gen_reg_rtx (Pmode
);
22910 scratch4
= force_reg (Pmode
, constm1_rtx
);
22912 emit_move_insn (scratch3
, addr
);
22913 eoschar
= force_reg (QImode
, eoschar
);
22915 src
= replace_equiv_address_nv (src
, scratch3
);
22917 /* If .md starts supporting :P, this can be done in .md. */
22918 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (4, src
, eoschar
, align
,
22919 scratch4
), UNSPEC_SCAS
);
22920 emit_insn (gen_strlenqi_1 (scratch1
, scratch3
, unspec
));
22921 emit_insn (ix86_gen_one_cmpl2 (scratch2
, scratch1
));
22922 emit_insn (ix86_gen_add3 (out
, scratch2
, constm1_rtx
));
22927 /* For given symbol (function) construct code to compute address of it's PLT
22928 entry in large x86-64 PIC model. */
22930 construct_plt_address (rtx symbol
)
22932 rtx tmp
= gen_reg_rtx (Pmode
);
22933 rtx unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, symbol
), UNSPEC_PLTOFF
);
22935 gcc_assert (GET_CODE (symbol
) == SYMBOL_REF
);
22936 gcc_assert (ix86_cmodel
== CM_LARGE_PIC
);
22938 emit_move_insn (tmp
, gen_rtx_CONST (Pmode
, unspec
));
22939 emit_insn (gen_adddi3 (tmp
, tmp
, pic_offset_table_rtx
));
22944 ix86_expand_call (rtx retval
, rtx fnaddr
, rtx callarg1
,
22946 rtx pop
, bool sibcall
)
22948 /* We need to represent that SI and DI registers are clobbered
22950 static int clobbered_registers
[] = {
22951 XMM6_REG
, XMM7_REG
, XMM8_REG
,
22952 XMM9_REG
, XMM10_REG
, XMM11_REG
,
22953 XMM12_REG
, XMM13_REG
, XMM14_REG
,
22954 XMM15_REG
, SI_REG
, DI_REG
22956 rtx vec
[ARRAY_SIZE (clobbered_registers
) + 3];
22957 rtx use
= NULL
, call
;
22958 unsigned int vec_len
;
22960 if (pop
== const0_rtx
)
22962 gcc_assert (!TARGET_64BIT
|| !pop
);
22964 if (TARGET_MACHO
&& !TARGET_64BIT
)
22967 if (flag_pic
&& GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
)
22968 fnaddr
= machopic_indirect_call_target (fnaddr
);
22973 /* Static functions and indirect calls don't need the pic register. */
22974 if (flag_pic
&& (!TARGET_64BIT
|| ix86_cmodel
== CM_LARGE_PIC
)
22975 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
22976 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr
, 0)))
22977 use_reg (&use
, pic_offset_table_rtx
);
22980 if (TARGET_64BIT
&& INTVAL (callarg2
) >= 0)
22982 rtx al
= gen_rtx_REG (QImode
, AX_REG
);
22983 emit_move_insn (al
, callarg2
);
22984 use_reg (&use
, al
);
22987 if (ix86_cmodel
== CM_LARGE_PIC
22989 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
22990 && !local_symbolic_operand (XEXP (fnaddr
, 0), VOIDmode
))
22991 fnaddr
= gen_rtx_MEM (QImode
, construct_plt_address (XEXP (fnaddr
, 0)));
22993 ? !sibcall_insn_operand (XEXP (fnaddr
, 0), word_mode
)
22994 : !call_insn_operand (XEXP (fnaddr
, 0), word_mode
))
22996 fnaddr
= XEXP (fnaddr
, 0);
22997 if (GET_MODE (fnaddr
) != word_mode
)
22998 fnaddr
= convert_to_mode (word_mode
, fnaddr
, 1);
22999 fnaddr
= gen_rtx_MEM (QImode
, copy_to_mode_reg (word_mode
, fnaddr
));
23003 call
= gen_rtx_CALL (VOIDmode
, fnaddr
, callarg1
);
23005 call
= gen_rtx_SET (VOIDmode
, retval
, call
);
23006 vec
[vec_len
++] = call
;
23010 pop
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, pop
);
23011 pop
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, pop
);
23012 vec
[vec_len
++] = pop
;
23015 if (TARGET_64BIT_MS_ABI
23016 && (!callarg2
|| INTVAL (callarg2
) != -2))
23020 vec
[vec_len
++] = gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, const0_rtx
),
23021 UNSPEC_MS_TO_SYSV_CALL
);
23023 for (i
= 0; i
< ARRAY_SIZE (clobbered_registers
); i
++)
23025 = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers
[i
])
23027 gen_rtx_REG (SSE_REGNO_P (clobbered_registers
[i
])
23029 clobbered_registers
[i
]));
23032 /* Add UNSPEC_CALL_NEEDS_VZEROUPPER decoration. */
23033 if (TARGET_VZEROUPPER
)
23036 if (cfun
->machine
->callee_pass_avx256_p
)
23038 if (cfun
->machine
->callee_return_avx256_p
)
23039 avx256
= callee_return_pass_avx256
;
23041 avx256
= callee_pass_avx256
;
23043 else if (cfun
->machine
->callee_return_avx256_p
)
23044 avx256
= callee_return_avx256
;
23046 avx256
= call_no_avx256
;
23048 if (reload_completed
)
23049 emit_insn (gen_avx_vzeroupper (GEN_INT (avx256
)));
23051 vec
[vec_len
++] = gen_rtx_UNSPEC (VOIDmode
,
23052 gen_rtvec (1, GEN_INT (avx256
)),
23053 UNSPEC_CALL_NEEDS_VZEROUPPER
);
23057 call
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (vec_len
, vec
));
23058 call
= emit_call_insn (call
);
23060 CALL_INSN_FUNCTION_USAGE (call
) = use
;
23066 ix86_split_call_vzeroupper (rtx insn
, rtx vzeroupper
)
23068 rtx pat
= PATTERN (insn
);
23069 rtvec vec
= XVEC (pat
, 0);
23070 int len
= GET_NUM_ELEM (vec
) - 1;
23072 /* Strip off the last entry of the parallel. */
23073 gcc_assert (GET_CODE (RTVEC_ELT (vec
, len
)) == UNSPEC
);
23074 gcc_assert (XINT (RTVEC_ELT (vec
, len
), 1) == UNSPEC_CALL_NEEDS_VZEROUPPER
);
23076 pat
= RTVEC_ELT (vec
, 0);
23078 pat
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (len
, &RTVEC_ELT (vec
, 0)));
23080 emit_insn (gen_avx_vzeroupper (vzeroupper
));
23081 emit_call_insn (pat
);
23084 /* Output the assembly for a call instruction. */
23087 ix86_output_call_insn (rtx insn
, rtx call_op
)
23089 bool direct_p
= constant_call_address_operand (call_op
, Pmode
);
23090 bool seh_nop_p
= false;
23093 if (SIBLING_CALL_P (insn
))
23097 /* SEH epilogue detection requires the indirect branch case
23098 to include REX.W. */
23099 else if (TARGET_SEH
)
23100 xasm
= "rex.W jmp %A0";
23104 output_asm_insn (xasm
, &call_op
);
23108 /* SEH unwinding can require an extra nop to be emitted in several
23109 circumstances. Determine if we have one of those. */
23114 for (i
= NEXT_INSN (insn
); i
; i
= NEXT_INSN (i
))
23116 /* If we get to another real insn, we don't need the nop. */
23120 /* If we get to the epilogue note, prevent a catch region from
23121 being adjacent to the standard epilogue sequence. If non-
23122 call-exceptions, we'll have done this during epilogue emission. */
23123 if (NOTE_P (i
) && NOTE_KIND (i
) == NOTE_INSN_EPILOGUE_BEG
23124 && !flag_non_call_exceptions
23125 && !can_throw_internal (insn
))
23132 /* If we didn't find a real insn following the call, prevent the
23133 unwinder from looking into the next function. */
23139 xasm
= "call\t%P0";
23141 xasm
= "call\t%A0";
23143 output_asm_insn (xasm
, &call_op
);
23151 /* Clear stack slot assignments remembered from previous functions.
23152 This is called from INIT_EXPANDERS once before RTL is emitted for each
23155 static struct machine_function
*
23156 ix86_init_machine_status (void)
23158 struct machine_function
*f
;
23160 f
= ggc_alloc_cleared_machine_function ();
23161 f
->use_fast_prologue_epilogue_nregs
= -1;
23162 f
->tls_descriptor_call_expanded_p
= 0;
23163 f
->call_abi
= ix86_abi
;
23168 /* Return a MEM corresponding to a stack slot with mode MODE.
23169 Allocate a new slot if necessary.
23171 The RTL for a function can have several slots available: N is
23172 which slot to use. */
23175 assign_386_stack_local (enum machine_mode mode
, enum ix86_stack_slot n
)
23177 struct stack_local_entry
*s
;
23179 gcc_assert (n
< MAX_386_STACK_LOCALS
);
23181 /* Virtual slot is valid only before vregs are instantiated. */
23182 gcc_assert ((n
== SLOT_VIRTUAL
) == !virtuals_instantiated
);
23184 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
23185 if (s
->mode
== mode
&& s
->n
== n
)
23186 return validize_mem (copy_rtx (s
->rtl
));
23188 s
= ggc_alloc_stack_local_entry ();
23191 s
->rtl
= assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
23193 s
->next
= ix86_stack_locals
;
23194 ix86_stack_locals
= s
;
23195 return validize_mem (s
->rtl
);
23198 /* Calculate the length of the memory address in the instruction encoding.
23199 Includes addr32 prefix, does not include the one-byte modrm, opcode,
23200 or other prefixes. */
23203 memory_address_length (rtx addr
)
23205 struct ix86_address parts
;
23206 rtx base
, index
, disp
;
23210 if (GET_CODE (addr
) == PRE_DEC
23211 || GET_CODE (addr
) == POST_INC
23212 || GET_CODE (addr
) == PRE_MODIFY
23213 || GET_CODE (addr
) == POST_MODIFY
)
23216 ok
= ix86_decompose_address (addr
, &parts
);
23219 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
23220 parts
.base
= SUBREG_REG (parts
.base
);
23221 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
23222 parts
.index
= SUBREG_REG (parts
.index
);
23225 index
= parts
.index
;
23228 /* Add length of addr32 prefix. */
23229 len
= (GET_CODE (addr
) == ZERO_EXTEND
23230 || GET_CODE (addr
) == AND
);
23233 - esp as the base always wants an index,
23234 - ebp as the base always wants a displacement,
23235 - r12 as the base always wants an index,
23236 - r13 as the base always wants a displacement. */
23238 /* Register Indirect. */
23239 if (base
&& !index
&& !disp
)
23241 /* esp (for its index) and ebp (for its displacement) need
23242 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
23245 && (addr
== arg_pointer_rtx
23246 || addr
== frame_pointer_rtx
23247 || REGNO (addr
) == SP_REG
23248 || REGNO (addr
) == BP_REG
23249 || REGNO (addr
) == R12_REG
23250 || REGNO (addr
) == R13_REG
))
23254 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
23255 is not disp32, but disp32(%rip), so for disp32
23256 SIB byte is needed, unless print_operand_address
23257 optimizes it into disp32(%rip) or (%rip) is implied
23259 else if (disp
&& !base
&& !index
)
23266 if (GET_CODE (disp
) == CONST
)
23267 symbol
= XEXP (disp
, 0);
23268 if (GET_CODE (symbol
) == PLUS
23269 && CONST_INT_P (XEXP (symbol
, 1)))
23270 symbol
= XEXP (symbol
, 0);
23272 if (GET_CODE (symbol
) != LABEL_REF
23273 && (GET_CODE (symbol
) != SYMBOL_REF
23274 || SYMBOL_REF_TLS_MODEL (symbol
) != 0)
23275 && (GET_CODE (symbol
) != UNSPEC
23276 || (XINT (symbol
, 1) != UNSPEC_GOTPCREL
23277 && XINT (symbol
, 1) != UNSPEC_PCREL
23278 && XINT (symbol
, 1) != UNSPEC_GOTNTPOFF
)))
23285 /* Find the length of the displacement constant. */
23288 if (base
&& satisfies_constraint_K (disp
))
23293 /* ebp always wants a displacement. Similarly r13. */
23294 else if (base
&& REG_P (base
)
23295 && (REGNO (base
) == BP_REG
|| REGNO (base
) == R13_REG
))
23298 /* An index requires the two-byte modrm form.... */
23300 /* ...like esp (or r12), which always wants an index. */
23301 || base
== arg_pointer_rtx
23302 || base
== frame_pointer_rtx
23303 || (base
&& REG_P (base
)
23304 && (REGNO (base
) == SP_REG
|| REGNO (base
) == R12_REG
)))
23321 /* Compute default value for "length_immediate" attribute. When SHORTFORM
23322 is set, expect that insn have 8bit immediate alternative. */
23324 ix86_attr_length_immediate_default (rtx insn
, bool shortform
)
23328 extract_insn_cached (insn
);
23329 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
23330 if (CONSTANT_P (recog_data
.operand
[i
]))
23332 enum attr_mode mode
= get_attr_mode (insn
);
23335 if (shortform
&& CONST_INT_P (recog_data
.operand
[i
]))
23337 HOST_WIDE_INT ival
= INTVAL (recog_data
.operand
[i
]);
23344 ival
= trunc_int_for_mode (ival
, HImode
);
23347 ival
= trunc_int_for_mode (ival
, SImode
);
23352 if (IN_RANGE (ival
, -128, 127))
23369 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
23374 fatal_insn ("unknown insn mode", insn
);
23379 /* Compute default value for "length_address" attribute. */
23381 ix86_attr_length_address_default (rtx insn
)
23385 if (get_attr_type (insn
) == TYPE_LEA
)
23387 rtx set
= PATTERN (insn
), addr
;
23389 if (GET_CODE (set
) == PARALLEL
)
23390 set
= XVECEXP (set
, 0, 0);
23392 gcc_assert (GET_CODE (set
) == SET
);
23394 addr
= SET_SRC (set
);
23395 if (TARGET_64BIT
&& get_attr_mode (insn
) == MODE_SI
)
23397 if (GET_CODE (addr
) == ZERO_EXTEND
)
23398 addr
= XEXP (addr
, 0);
23399 if (GET_CODE (addr
) == SUBREG
)
23400 addr
= SUBREG_REG (addr
);
23403 return memory_address_length (addr
);
23406 extract_insn_cached (insn
);
23407 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
23408 if (MEM_P (recog_data
.operand
[i
]))
23410 constrain_operands_cached (reload_completed
);
23411 if (which_alternative
!= -1)
23413 const char *constraints
= recog_data
.constraints
[i
];
23414 int alt
= which_alternative
;
23416 while (*constraints
== '=' || *constraints
== '+')
23419 while (*constraints
++ != ',')
23421 /* Skip ignored operands. */
23422 if (*constraints
== 'X')
23425 return memory_address_length (XEXP (recog_data
.operand
[i
], 0));
23430 /* Compute default value for "length_vex" attribute. It includes
23431 2 or 3 byte VEX prefix and 1 opcode byte. */
23434 ix86_attr_length_vex_default (rtx insn
, bool has_0f_opcode
, bool has_vex_w
)
23438 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
23439 byte VEX prefix. */
23440 if (!has_0f_opcode
|| has_vex_w
)
23443 /* We can always use 2 byte VEX prefix in 32bit. */
23447 extract_insn_cached (insn
);
23449 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
23450 if (REG_P (recog_data
.operand
[i
]))
23452 /* REX.W bit uses 3 byte VEX prefix. */
23453 if (GET_MODE (recog_data
.operand
[i
]) == DImode
23454 && GENERAL_REG_P (recog_data
.operand
[i
]))
23459 /* REX.X or REX.B bits use 3 byte VEX prefix. */
23460 if (MEM_P (recog_data
.operand
[i
])
23461 && x86_extended_reg_mentioned_p (recog_data
.operand
[i
]))
23468 /* Return the maximum number of instructions a cpu can issue. */
23471 ix86_issue_rate (void)
23475 case PROCESSOR_PENTIUM
:
23476 case PROCESSOR_ATOM
:
23480 case PROCESSOR_PENTIUMPRO
:
23481 case PROCESSOR_PENTIUM4
:
23482 case PROCESSOR_CORE2_32
:
23483 case PROCESSOR_CORE2_64
:
23484 case PROCESSOR_COREI7_32
:
23485 case PROCESSOR_COREI7_64
:
23486 case PROCESSOR_ATHLON
:
23488 case PROCESSOR_AMDFAM10
:
23489 case PROCESSOR_NOCONA
:
23490 case PROCESSOR_GENERIC32
:
23491 case PROCESSOR_GENERIC64
:
23492 case PROCESSOR_BDVER1
:
23493 case PROCESSOR_BDVER2
:
23494 case PROCESSOR_BTVER1
:
23502 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
23503 by DEP_INSN and nothing set by DEP_INSN. */
23506 ix86_flags_dependent (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
23510 /* Simplify the test for uninteresting insns. */
23511 if (insn_type
!= TYPE_SETCC
23512 && insn_type
!= TYPE_ICMOV
23513 && insn_type
!= TYPE_FCMOV
23514 && insn_type
!= TYPE_IBR
)
23517 if ((set
= single_set (dep_insn
)) != 0)
23519 set
= SET_DEST (set
);
23522 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
23523 && XVECLEN (PATTERN (dep_insn
), 0) == 2
23524 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
23525 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
23527 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
23528 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
23533 if (!REG_P (set
) || REGNO (set
) != FLAGS_REG
)
23536 /* This test is true if the dependent insn reads the flags but
23537 not any other potentially set register. */
23538 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
23541 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
23547 /* Return true iff USE_INSN has a memory address with operands set by
23551 ix86_agi_dependent (rtx set_insn
, rtx use_insn
)
23554 extract_insn_cached (use_insn
);
23555 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
23556 if (MEM_P (recog_data
.operand
[i
]))
23558 rtx addr
= XEXP (recog_data
.operand
[i
], 0);
23559 return modified_in_p (addr
, set_insn
) != 0;
23565 ix86_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
)
23567 enum attr_type insn_type
, dep_insn_type
;
23568 enum attr_memory memory
;
23570 int dep_insn_code_number
;
23572 /* Anti and output dependencies have zero cost on all CPUs. */
23573 if (REG_NOTE_KIND (link
) != 0)
23576 dep_insn_code_number
= recog_memoized (dep_insn
);
23578 /* If we can't recognize the insns, we can't really do anything. */
23579 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
23582 insn_type
= get_attr_type (insn
);
23583 dep_insn_type
= get_attr_type (dep_insn
);
23587 case PROCESSOR_PENTIUM
:
23588 /* Address Generation Interlock adds a cycle of latency. */
23589 if (insn_type
== TYPE_LEA
)
23591 rtx addr
= PATTERN (insn
);
23593 if (GET_CODE (addr
) == PARALLEL
)
23594 addr
= XVECEXP (addr
, 0, 0);
23596 gcc_assert (GET_CODE (addr
) == SET
);
23598 addr
= SET_SRC (addr
);
23599 if (modified_in_p (addr
, dep_insn
))
23602 else if (ix86_agi_dependent (dep_insn
, insn
))
23605 /* ??? Compares pair with jump/setcc. */
23606 if (ix86_flags_dependent (insn
, dep_insn
, insn_type
))
23609 /* Floating point stores require value to be ready one cycle earlier. */
23610 if (insn_type
== TYPE_FMOV
23611 && get_attr_memory (insn
) == MEMORY_STORE
23612 && !ix86_agi_dependent (dep_insn
, insn
))
23616 case PROCESSOR_PENTIUMPRO
:
23617 memory
= get_attr_memory (insn
);
23619 /* INT->FP conversion is expensive. */
23620 if (get_attr_fp_int_src (dep_insn
))
23623 /* There is one cycle extra latency between an FP op and a store. */
23624 if (insn_type
== TYPE_FMOV
23625 && (set
= single_set (dep_insn
)) != NULL_RTX
23626 && (set2
= single_set (insn
)) != NULL_RTX
23627 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
23628 && MEM_P (SET_DEST (set2
)))
23631 /* Show ability of reorder buffer to hide latency of load by executing
23632 in parallel with previous instruction in case
23633 previous instruction is not needed to compute the address. */
23634 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
23635 && !ix86_agi_dependent (dep_insn
, insn
))
23637 /* Claim moves to take one cycle, as core can issue one load
23638 at time and the next load can start cycle later. */
23639 if (dep_insn_type
== TYPE_IMOV
23640 || dep_insn_type
== TYPE_FMOV
)
23648 memory
= get_attr_memory (insn
);
23650 /* The esp dependency is resolved before the instruction is really
23652 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
23653 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
23656 /* INT->FP conversion is expensive. */
23657 if (get_attr_fp_int_src (dep_insn
))
23660 /* Show ability of reorder buffer to hide latency of load by executing
23661 in parallel with previous instruction in case
23662 previous instruction is not needed to compute the address. */
23663 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
23664 && !ix86_agi_dependent (dep_insn
, insn
))
23666 /* Claim moves to take one cycle, as core can issue one load
23667 at time and the next load can start cycle later. */
23668 if (dep_insn_type
== TYPE_IMOV
23669 || dep_insn_type
== TYPE_FMOV
)
23678 case PROCESSOR_ATHLON
:
23680 case PROCESSOR_AMDFAM10
:
23681 case PROCESSOR_BDVER1
:
23682 case PROCESSOR_BDVER2
:
23683 case PROCESSOR_BTVER1
:
23684 case PROCESSOR_ATOM
:
23685 case PROCESSOR_GENERIC32
:
23686 case PROCESSOR_GENERIC64
:
23687 memory
= get_attr_memory (insn
);
23689 /* Show ability of reorder buffer to hide latency of load by executing
23690 in parallel with previous instruction in case
23691 previous instruction is not needed to compute the address. */
23692 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
23693 && !ix86_agi_dependent (dep_insn
, insn
))
23695 enum attr_unit unit
= get_attr_unit (insn
);
23698 /* Because of the difference between the length of integer and
23699 floating unit pipeline preparation stages, the memory operands
23700 for floating point are cheaper.
23702 ??? For Athlon it the difference is most probably 2. */
23703 if (unit
== UNIT_INTEGER
|| unit
== UNIT_UNKNOWN
)
23706 loadcost
= TARGET_ATHLON
? 2 : 0;
23708 if (cost
>= loadcost
)
23721 /* How many alternative schedules to try. This should be as wide as the
23722 scheduling freedom in the DFA, but no wider. Making this value too
23723 large results extra work for the scheduler. */
23726 ia32_multipass_dfa_lookahead (void)
23730 case PROCESSOR_PENTIUM
:
23733 case PROCESSOR_PENTIUMPRO
:
23737 case PROCESSOR_CORE2_32
:
23738 case PROCESSOR_CORE2_64
:
23739 case PROCESSOR_COREI7_32
:
23740 case PROCESSOR_COREI7_64
:
23741 /* Generally, we want haifa-sched:max_issue() to look ahead as far
23742 as many instructions can be executed on a cycle, i.e.,
23743 issue_rate. I wonder why tuning for many CPUs does not do this. */
23744 return ix86_issue_rate ();
23753 /* Model decoder of Core 2/i7.
23754 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
23755 track the instruction fetch block boundaries and make sure that long
23756 (9+ bytes) instructions are assigned to D0. */
23758 /* Maximum length of an insn that can be handled by
23759 a secondary decoder unit. '8' for Core 2/i7. */
23760 static int core2i7_secondary_decoder_max_insn_size
;
23762 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
23763 '16' for Core 2/i7. */
23764 static int core2i7_ifetch_block_size
;
23766 /* Maximum number of instructions decoder can handle per cycle.
23767 '6' for Core 2/i7. */
23768 static int core2i7_ifetch_block_max_insns
;
23770 typedef struct ix86_first_cycle_multipass_data_
*
23771 ix86_first_cycle_multipass_data_t
;
23772 typedef const struct ix86_first_cycle_multipass_data_
*
23773 const_ix86_first_cycle_multipass_data_t
;
23775 /* A variable to store target state across calls to max_issue within
23777 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data
,
23778 *ix86_first_cycle_multipass_data
= &_ix86_first_cycle_multipass_data
;
23780 /* Initialize DATA. */
23782 core2i7_first_cycle_multipass_init (void *_data
)
23784 ix86_first_cycle_multipass_data_t data
23785 = (ix86_first_cycle_multipass_data_t
) _data
;
23787 data
->ifetch_block_len
= 0;
23788 data
->ifetch_block_n_insns
= 0;
23789 data
->ready_try_change
= NULL
;
23790 data
->ready_try_change_size
= 0;
23793 /* Advancing the cycle; reset ifetch block counts. */
23795 core2i7_dfa_post_advance_cycle (void)
23797 ix86_first_cycle_multipass_data_t data
= ix86_first_cycle_multipass_data
;
23799 gcc_assert (data
->ifetch_block_n_insns
<= core2i7_ifetch_block_max_insns
);
23801 data
->ifetch_block_len
= 0;
23802 data
->ifetch_block_n_insns
= 0;
23805 static int min_insn_size (rtx
);
23807 /* Filter out insns from ready_try that the core will not be able to issue
23808 on current cycle due to decoder. */
23810 core2i7_first_cycle_multipass_filter_ready_try
23811 (const_ix86_first_cycle_multipass_data_t data
,
23812 char *ready_try
, int n_ready
, bool first_cycle_insn_p
)
23819 if (ready_try
[n_ready
])
23822 insn
= get_ready_element (n_ready
);
23823 insn_size
= min_insn_size (insn
);
23825 if (/* If this is a too long an insn for a secondary decoder ... */
23826 (!first_cycle_insn_p
23827 && insn_size
> core2i7_secondary_decoder_max_insn_size
)
23828 /* ... or it would not fit into the ifetch block ... */
23829 || data
->ifetch_block_len
+ insn_size
> core2i7_ifetch_block_size
23830 /* ... or the decoder is full already ... */
23831 || data
->ifetch_block_n_insns
+ 1 > core2i7_ifetch_block_max_insns
)
23832 /* ... mask the insn out. */
23834 ready_try
[n_ready
] = 1;
23836 if (data
->ready_try_change
)
23837 SET_BIT (data
->ready_try_change
, n_ready
);
23842 /* Prepare for a new round of multipass lookahead scheduling. */
23844 core2i7_first_cycle_multipass_begin (void *_data
, char *ready_try
, int n_ready
,
23845 bool first_cycle_insn_p
)
23847 ix86_first_cycle_multipass_data_t data
23848 = (ix86_first_cycle_multipass_data_t
) _data
;
23849 const_ix86_first_cycle_multipass_data_t prev_data
23850 = ix86_first_cycle_multipass_data
;
23852 /* Restore the state from the end of the previous round. */
23853 data
->ifetch_block_len
= prev_data
->ifetch_block_len
;
23854 data
->ifetch_block_n_insns
= prev_data
->ifetch_block_n_insns
;
23856 /* Filter instructions that cannot be issued on current cycle due to
23857 decoder restrictions. */
23858 core2i7_first_cycle_multipass_filter_ready_try (data
, ready_try
, n_ready
,
23859 first_cycle_insn_p
);
23862 /* INSN is being issued in current solution. Account for its impact on
23863 the decoder model. */
23865 core2i7_first_cycle_multipass_issue (void *_data
, char *ready_try
, int n_ready
,
23866 rtx insn
, const void *_prev_data
)
23868 ix86_first_cycle_multipass_data_t data
23869 = (ix86_first_cycle_multipass_data_t
) _data
;
23870 const_ix86_first_cycle_multipass_data_t prev_data
23871 = (const_ix86_first_cycle_multipass_data_t
) _prev_data
;
23873 int insn_size
= min_insn_size (insn
);
23875 data
->ifetch_block_len
= prev_data
->ifetch_block_len
+ insn_size
;
23876 data
->ifetch_block_n_insns
= prev_data
->ifetch_block_n_insns
+ 1;
23877 gcc_assert (data
->ifetch_block_len
<= core2i7_ifetch_block_size
23878 && data
->ifetch_block_n_insns
<= core2i7_ifetch_block_max_insns
);
23880 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
23881 if (!data
->ready_try_change
)
23883 data
->ready_try_change
= sbitmap_alloc (n_ready
);
23884 data
->ready_try_change_size
= n_ready
;
23886 else if (data
->ready_try_change_size
< n_ready
)
23888 data
->ready_try_change
= sbitmap_resize (data
->ready_try_change
,
23890 data
->ready_try_change_size
= n_ready
;
23892 sbitmap_zero (data
->ready_try_change
);
23894 /* Filter out insns from ready_try that the core will not be able to issue
23895 on current cycle due to decoder. */
23896 core2i7_first_cycle_multipass_filter_ready_try (data
, ready_try
, n_ready
,
23900 /* Revert the effect on ready_try. */
23902 core2i7_first_cycle_multipass_backtrack (const void *_data
,
23904 int n_ready ATTRIBUTE_UNUSED
)
23906 const_ix86_first_cycle_multipass_data_t data
23907 = (const_ix86_first_cycle_multipass_data_t
) _data
;
23908 unsigned int i
= 0;
23909 sbitmap_iterator sbi
;
23911 gcc_assert (sbitmap_last_set_bit (data
->ready_try_change
) < n_ready
);
23912 EXECUTE_IF_SET_IN_SBITMAP (data
->ready_try_change
, 0, i
, sbi
)
23918 /* Save the result of multipass lookahead scheduling for the next round. */
23920 core2i7_first_cycle_multipass_end (const void *_data
)
23922 const_ix86_first_cycle_multipass_data_t data
23923 = (const_ix86_first_cycle_multipass_data_t
) _data
;
23924 ix86_first_cycle_multipass_data_t next_data
23925 = ix86_first_cycle_multipass_data
;
23929 next_data
->ifetch_block_len
= data
->ifetch_block_len
;
23930 next_data
->ifetch_block_n_insns
= data
->ifetch_block_n_insns
;
23934 /* Deallocate target data. */
23936 core2i7_first_cycle_multipass_fini (void *_data
)
23938 ix86_first_cycle_multipass_data_t data
23939 = (ix86_first_cycle_multipass_data_t
) _data
;
23941 if (data
->ready_try_change
)
23943 sbitmap_free (data
->ready_try_change
);
23944 data
->ready_try_change
= NULL
;
23945 data
->ready_try_change_size
= 0;
23949 /* Prepare for scheduling pass. */
23951 ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED
,
23952 int verbose ATTRIBUTE_UNUSED
,
23953 int max_uid ATTRIBUTE_UNUSED
)
23955 /* Install scheduling hooks for current CPU. Some of these hooks are used
23956 in time-critical parts of the scheduler, so we only set them up when
23957 they are actually used. */
23960 case PROCESSOR_CORE2_32
:
23961 case PROCESSOR_CORE2_64
:
23962 case PROCESSOR_COREI7_32
:
23963 case PROCESSOR_COREI7_64
:
23964 targetm
.sched
.dfa_post_advance_cycle
23965 = core2i7_dfa_post_advance_cycle
;
23966 targetm
.sched
.first_cycle_multipass_init
23967 = core2i7_first_cycle_multipass_init
;
23968 targetm
.sched
.first_cycle_multipass_begin
23969 = core2i7_first_cycle_multipass_begin
;
23970 targetm
.sched
.first_cycle_multipass_issue
23971 = core2i7_first_cycle_multipass_issue
;
23972 targetm
.sched
.first_cycle_multipass_backtrack
23973 = core2i7_first_cycle_multipass_backtrack
;
23974 targetm
.sched
.first_cycle_multipass_end
23975 = core2i7_first_cycle_multipass_end
;
23976 targetm
.sched
.first_cycle_multipass_fini
23977 = core2i7_first_cycle_multipass_fini
;
23979 /* Set decoder parameters. */
23980 core2i7_secondary_decoder_max_insn_size
= 8;
23981 core2i7_ifetch_block_size
= 16;
23982 core2i7_ifetch_block_max_insns
= 6;
23986 targetm
.sched
.dfa_post_advance_cycle
= NULL
;
23987 targetm
.sched
.first_cycle_multipass_init
= NULL
;
23988 targetm
.sched
.first_cycle_multipass_begin
= NULL
;
23989 targetm
.sched
.first_cycle_multipass_issue
= NULL
;
23990 targetm
.sched
.first_cycle_multipass_backtrack
= NULL
;
23991 targetm
.sched
.first_cycle_multipass_end
= NULL
;
23992 targetm
.sched
.first_cycle_multipass_fini
= NULL
;
23998 /* Compute the alignment given to a constant that is being placed in memory.
23999 EXP is the constant and ALIGN is the alignment that the object would
24001 The value of this function is used instead of that alignment to align
24005 ix86_constant_alignment (tree exp
, int align
)
24007 if (TREE_CODE (exp
) == REAL_CST
|| TREE_CODE (exp
) == VECTOR_CST
24008 || TREE_CODE (exp
) == INTEGER_CST
)
24010 if (TYPE_MODE (TREE_TYPE (exp
)) == DFmode
&& align
< 64)
24012 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp
))) && align
< 128)
24015 else if (!optimize_size
&& TREE_CODE (exp
) == STRING_CST
24016 && TREE_STRING_LENGTH (exp
) >= 31 && align
< BITS_PER_WORD
)
24017 return BITS_PER_WORD
;
24022 /* Compute the alignment for a static variable.
24023 TYPE is the data type, and ALIGN is the alignment that
24024 the object would ordinarily have. The value of this function is used
24025 instead of that alignment to align the object. */
24028 ix86_data_alignment (tree type
, int align
)
24030 int max_align
= optimize_size
? BITS_PER_WORD
: MIN (256, MAX_OFILE_ALIGNMENT
);
24032 if (AGGREGATE_TYPE_P (type
)
24033 && TYPE_SIZE (type
)
24034 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
24035 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= (unsigned) max_align
24036 || TREE_INT_CST_HIGH (TYPE_SIZE (type
)))
24037 && align
< max_align
)
24040 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
24041 to 16byte boundary. */
24044 if (AGGREGATE_TYPE_P (type
)
24045 && TYPE_SIZE (type
)
24046 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
24047 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 128
24048 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
24052 if (TREE_CODE (type
) == ARRAY_TYPE
)
24054 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
24056 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
24059 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
24062 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
24064 if ((TYPE_MODE (type
) == XCmode
24065 || TYPE_MODE (type
) == TCmode
) && align
< 128)
24068 else if ((TREE_CODE (type
) == RECORD_TYPE
24069 || TREE_CODE (type
) == UNION_TYPE
24070 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
24071 && TYPE_FIELDS (type
))
24073 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
24075 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
24078 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
24079 || TREE_CODE (type
) == INTEGER_TYPE
)
24081 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
24083 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
24090 /* Compute the alignment for a local variable or a stack slot. EXP is
24091 the data type or decl itself, MODE is the widest mode available and
24092 ALIGN is the alignment that the object would ordinarily have. The
24093 value of this macro is used instead of that alignment to align the
24097 ix86_local_alignment (tree exp
, enum machine_mode mode
,
24098 unsigned int align
)
24102 if (exp
&& DECL_P (exp
))
24104 type
= TREE_TYPE (exp
);
24113 /* Don't do dynamic stack realignment for long long objects with
24114 -mpreferred-stack-boundary=2. */
24117 && ix86_preferred_stack_boundary
< 64
24118 && (mode
== DImode
|| (type
&& TYPE_MODE (type
) == DImode
))
24119 && (!type
|| !TYPE_USER_ALIGN (type
))
24120 && (!decl
|| !DECL_USER_ALIGN (decl
)))
24123 /* If TYPE is NULL, we are allocating a stack slot for caller-save
24124 register in MODE. We will return the largest alignment of XF
24128 if (mode
== XFmode
&& align
< GET_MODE_ALIGNMENT (DFmode
))
24129 align
= GET_MODE_ALIGNMENT (DFmode
);
24133 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
24134 to 16byte boundary. Exact wording is:
24136 An array uses the same alignment as its elements, except that a local or
24137 global array variable of length at least 16 bytes or
24138 a C99 variable-length array variable always has alignment of at least 16 bytes.
24140 This was added to allow use of aligned SSE instructions at arrays. This
24141 rule is meant for static storage (where compiler can not do the analysis
24142 by itself). We follow it for automatic variables only when convenient.
24143 We fully control everything in the function compiled and functions from
24144 other unit can not rely on the alignment.
24146 Exclude va_list type. It is the common case of local array where
24147 we can not benefit from the alignment. */
24148 if (TARGET_64BIT
&& optimize_function_for_speed_p (cfun
)
24151 if (AGGREGATE_TYPE_P (type
)
24152 && (va_list_type_node
== NULL_TREE
24153 || (TYPE_MAIN_VARIANT (type
)
24154 != TYPE_MAIN_VARIANT (va_list_type_node
)))
24155 && TYPE_SIZE (type
)
24156 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
24157 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 16
24158 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
24161 if (TREE_CODE (type
) == ARRAY_TYPE
)
24163 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
24165 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
24168 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
24170 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
24172 if ((TYPE_MODE (type
) == XCmode
24173 || TYPE_MODE (type
) == TCmode
) && align
< 128)
24176 else if ((TREE_CODE (type
) == RECORD_TYPE
24177 || TREE_CODE (type
) == UNION_TYPE
24178 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
24179 && TYPE_FIELDS (type
))
24181 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
24183 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
24186 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
24187 || TREE_CODE (type
) == INTEGER_TYPE
)
24190 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
24192 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
24198 /* Compute the minimum required alignment for dynamic stack realignment
24199 purposes for a local variable, parameter or a stack slot. EXP is
24200 the data type or decl itself, MODE is its mode and ALIGN is the
24201 alignment that the object would ordinarily have. */
24204 ix86_minimum_alignment (tree exp
, enum machine_mode mode
,
24205 unsigned int align
)
24209 if (exp
&& DECL_P (exp
))
24211 type
= TREE_TYPE (exp
);
24220 if (TARGET_64BIT
|| align
!= 64 || ix86_preferred_stack_boundary
>= 64)
24223 /* Don't do dynamic stack realignment for long long objects with
24224 -mpreferred-stack-boundary=2. */
24225 if ((mode
== DImode
|| (type
&& TYPE_MODE (type
) == DImode
))
24226 && (!type
|| !TYPE_USER_ALIGN (type
))
24227 && (!decl
|| !DECL_USER_ALIGN (decl
)))
24233 /* Find a location for the static chain incoming to a nested function.
24234 This is a register, unless all free registers are used by arguments. */
24237 ix86_static_chain (const_tree fndecl
, bool incoming_p
)
24241 if (!DECL_STATIC_CHAIN (fndecl
))
24246 /* We always use R10 in 64-bit mode. */
24254 /* By default in 32-bit mode we use ECX to pass the static chain. */
24257 fntype
= TREE_TYPE (fndecl
);
24258 ccvt
= ix86_get_callcvt (fntype
);
24259 if ((ccvt
& (IX86_CALLCVT_FASTCALL
| IX86_CALLCVT_THISCALL
)) != 0)
24261 /* Fastcall functions use ecx/edx for arguments, which leaves
24262 us with EAX for the static chain.
24263 Thiscall functions use ecx for arguments, which also
24264 leaves us with EAX for the static chain. */
24267 else if (ix86_function_regparm (fntype
, fndecl
) == 3)
24269 /* For regparm 3, we have no free call-clobbered registers in
24270 which to store the static chain. In order to implement this,
24271 we have the trampoline push the static chain to the stack.
24272 However, we can't push a value below the return address when
24273 we call the nested function directly, so we have to use an
24274 alternate entry point. For this we use ESI, and have the
24275 alternate entry point push ESI, so that things appear the
24276 same once we're executing the nested function. */
24279 if (fndecl
== current_function_decl
)
24280 ix86_static_chain_on_stack
= true;
24281 return gen_frame_mem (SImode
,
24282 plus_constant (arg_pointer_rtx
, -8));
24288 return gen_rtx_REG (Pmode
, regno
);
24291 /* Emit RTL insns to initialize the variable parts of a trampoline.
24292 FNDECL is the decl of the target address; M_TRAMP is a MEM for
24293 the trampoline, and CHAIN_VALUE is an RTX for the static chain
24294 to be passed to the target function. */
24297 ix86_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
24303 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
24309 /* Load the function address to r11. Try to load address using
24310 the shorter movl instead of movabs. We may want to support
24311 movq for kernel mode, but kernel does not use trampolines at
24313 if (x86_64_zext_immediate_operand (fnaddr
, VOIDmode
))
24315 fnaddr
= copy_to_mode_reg (DImode
, fnaddr
);
24317 mem
= adjust_address (m_tramp
, HImode
, offset
);
24318 emit_move_insn (mem
, gen_int_mode (0xbb41, HImode
));
24320 mem
= adjust_address (m_tramp
, SImode
, offset
+ 2);
24321 emit_move_insn (mem
, gen_lowpart (SImode
, fnaddr
));
24326 mem
= adjust_address (m_tramp
, HImode
, offset
);
24327 emit_move_insn (mem
, gen_int_mode (0xbb49, HImode
));
24329 mem
= adjust_address (m_tramp
, DImode
, offset
+ 2);
24330 emit_move_insn (mem
, fnaddr
);
24334 /* Load static chain using movabs to r10. Use the
24335 shorter movl instead of movabs for x32. */
24347 mem
= adjust_address (m_tramp
, HImode
, offset
);
24348 emit_move_insn (mem
, gen_int_mode (opcode
, HImode
));
24350 mem
= adjust_address (m_tramp
, ptr_mode
, offset
+ 2);
24351 emit_move_insn (mem
, chain_value
);
24354 /* Jump to r11; the last (unused) byte is a nop, only there to
24355 pad the write out to a single 32-bit store. */
24356 mem
= adjust_address (m_tramp
, SImode
, offset
);
24357 emit_move_insn (mem
, gen_int_mode (0x90e3ff49, SImode
));
24364 /* Depending on the static chain location, either load a register
24365 with a constant, or push the constant to the stack. All of the
24366 instructions are the same size. */
24367 chain
= ix86_static_chain (fndecl
, true);
24370 switch (REGNO (chain
))
24373 opcode
= 0xb8; break;
24375 opcode
= 0xb9; break;
24377 gcc_unreachable ();
24383 mem
= adjust_address (m_tramp
, QImode
, offset
);
24384 emit_move_insn (mem
, gen_int_mode (opcode
, QImode
));
24386 mem
= adjust_address (m_tramp
, SImode
, offset
+ 1);
24387 emit_move_insn (mem
, chain_value
);
24390 mem
= adjust_address (m_tramp
, QImode
, offset
);
24391 emit_move_insn (mem
, gen_int_mode (0xe9, QImode
));
24393 mem
= adjust_address (m_tramp
, SImode
, offset
+ 1);
24395 /* Compute offset from the end of the jmp to the target function.
24396 In the case in which the trampoline stores the static chain on
24397 the stack, we need to skip the first insn which pushes the
24398 (call-saved) register static chain; this push is 1 byte. */
24400 disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
24401 plus_constant (XEXP (m_tramp
, 0),
24402 offset
- (MEM_P (chain
) ? 1 : 0)),
24403 NULL_RTX
, 1, OPTAB_DIRECT
);
24404 emit_move_insn (mem
, disp
);
24407 gcc_assert (offset
<= TRAMPOLINE_SIZE
);
24409 #ifdef HAVE_ENABLE_EXECUTE_STACK
24410 #ifdef CHECK_EXECUTE_STACK_ENABLED
24411 if (CHECK_EXECUTE_STACK_ENABLED
)
24413 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__enable_execute_stack"),
24414 LCT_NORMAL
, VOIDmode
, 1, XEXP (m_tramp
, 0), Pmode
);
24418 /* The following file contains several enumerations and data structures
24419 built from the definitions in i386-builtin-types.def. */
24421 #include "i386-builtin-types.inc"
24423 /* Table for the ix86 builtin non-function types. */
24424 static GTY(()) tree ix86_builtin_type_tab
[(int) IX86_BT_LAST_CPTR
+ 1];
24426 /* Retrieve an element from the above table, building some of
24427 the types lazily. */
24430 ix86_get_builtin_type (enum ix86_builtin_type tcode
)
24432 unsigned int index
;
24435 gcc_assert ((unsigned)tcode
< ARRAY_SIZE(ix86_builtin_type_tab
));
24437 type
= ix86_builtin_type_tab
[(int) tcode
];
24441 gcc_assert (tcode
> IX86_BT_LAST_PRIM
);
24442 if (tcode
<= IX86_BT_LAST_VECT
)
24444 enum machine_mode mode
;
24446 index
= tcode
- IX86_BT_LAST_PRIM
- 1;
24447 itype
= ix86_get_builtin_type (ix86_builtin_type_vect_base
[index
]);
24448 mode
= ix86_builtin_type_vect_mode
[index
];
24450 type
= build_vector_type_for_mode (itype
, mode
);
24456 index
= tcode
- IX86_BT_LAST_VECT
- 1;
24457 if (tcode
<= IX86_BT_LAST_PTR
)
24458 quals
= TYPE_UNQUALIFIED
;
24460 quals
= TYPE_QUAL_CONST
;
24462 itype
= ix86_get_builtin_type (ix86_builtin_type_ptr_base
[index
]);
24463 if (quals
!= TYPE_UNQUALIFIED
)
24464 itype
= build_qualified_type (itype
, quals
);
24466 type
= build_pointer_type (itype
);
24469 ix86_builtin_type_tab
[(int) tcode
] = type
;
24473 /* Table for the ix86 builtin function types. */
24474 static GTY(()) tree ix86_builtin_func_type_tab
[(int) IX86_BT_LAST_ALIAS
+ 1];
24476 /* Retrieve an element from the above table, building some of
24477 the types lazily. */
24480 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode
)
24484 gcc_assert ((unsigned)tcode
< ARRAY_SIZE (ix86_builtin_func_type_tab
));
24486 type
= ix86_builtin_func_type_tab
[(int) tcode
];
24490 if (tcode
<= IX86_BT_LAST_FUNC
)
24492 unsigned start
= ix86_builtin_func_start
[(int) tcode
];
24493 unsigned after
= ix86_builtin_func_start
[(int) tcode
+ 1];
24494 tree rtype
, atype
, args
= void_list_node
;
24497 rtype
= ix86_get_builtin_type (ix86_builtin_func_args
[start
]);
24498 for (i
= after
- 1; i
> start
; --i
)
24500 atype
= ix86_get_builtin_type (ix86_builtin_func_args
[i
]);
24501 args
= tree_cons (NULL
, atype
, args
);
24504 type
= build_function_type (rtype
, args
);
24508 unsigned index
= tcode
- IX86_BT_LAST_FUNC
- 1;
24509 enum ix86_builtin_func_type icode
;
24511 icode
= ix86_builtin_func_alias_base
[index
];
24512 type
= ix86_get_builtin_func_type (icode
);
24515 ix86_builtin_func_type_tab
[(int) tcode
] = type
;
24520 /* Codes for all the SSE/MMX builtins. */
24523 IX86_BUILTIN_ADDPS
,
24524 IX86_BUILTIN_ADDSS
,
24525 IX86_BUILTIN_DIVPS
,
24526 IX86_BUILTIN_DIVSS
,
24527 IX86_BUILTIN_MULPS
,
24528 IX86_BUILTIN_MULSS
,
24529 IX86_BUILTIN_SUBPS
,
24530 IX86_BUILTIN_SUBSS
,
24532 IX86_BUILTIN_CMPEQPS
,
24533 IX86_BUILTIN_CMPLTPS
,
24534 IX86_BUILTIN_CMPLEPS
,
24535 IX86_BUILTIN_CMPGTPS
,
24536 IX86_BUILTIN_CMPGEPS
,
24537 IX86_BUILTIN_CMPNEQPS
,
24538 IX86_BUILTIN_CMPNLTPS
,
24539 IX86_BUILTIN_CMPNLEPS
,
24540 IX86_BUILTIN_CMPNGTPS
,
24541 IX86_BUILTIN_CMPNGEPS
,
24542 IX86_BUILTIN_CMPORDPS
,
24543 IX86_BUILTIN_CMPUNORDPS
,
24544 IX86_BUILTIN_CMPEQSS
,
24545 IX86_BUILTIN_CMPLTSS
,
24546 IX86_BUILTIN_CMPLESS
,
24547 IX86_BUILTIN_CMPNEQSS
,
24548 IX86_BUILTIN_CMPNLTSS
,
24549 IX86_BUILTIN_CMPNLESS
,
24550 IX86_BUILTIN_CMPNGTSS
,
24551 IX86_BUILTIN_CMPNGESS
,
24552 IX86_BUILTIN_CMPORDSS
,
24553 IX86_BUILTIN_CMPUNORDSS
,
24555 IX86_BUILTIN_COMIEQSS
,
24556 IX86_BUILTIN_COMILTSS
,
24557 IX86_BUILTIN_COMILESS
,
24558 IX86_BUILTIN_COMIGTSS
,
24559 IX86_BUILTIN_COMIGESS
,
24560 IX86_BUILTIN_COMINEQSS
,
24561 IX86_BUILTIN_UCOMIEQSS
,
24562 IX86_BUILTIN_UCOMILTSS
,
24563 IX86_BUILTIN_UCOMILESS
,
24564 IX86_BUILTIN_UCOMIGTSS
,
24565 IX86_BUILTIN_UCOMIGESS
,
24566 IX86_BUILTIN_UCOMINEQSS
,
24568 IX86_BUILTIN_CVTPI2PS
,
24569 IX86_BUILTIN_CVTPS2PI
,
24570 IX86_BUILTIN_CVTSI2SS
,
24571 IX86_BUILTIN_CVTSI642SS
,
24572 IX86_BUILTIN_CVTSS2SI
,
24573 IX86_BUILTIN_CVTSS2SI64
,
24574 IX86_BUILTIN_CVTTPS2PI
,
24575 IX86_BUILTIN_CVTTSS2SI
,
24576 IX86_BUILTIN_CVTTSS2SI64
,
24578 IX86_BUILTIN_MAXPS
,
24579 IX86_BUILTIN_MAXSS
,
24580 IX86_BUILTIN_MINPS
,
24581 IX86_BUILTIN_MINSS
,
24583 IX86_BUILTIN_LOADUPS
,
24584 IX86_BUILTIN_STOREUPS
,
24585 IX86_BUILTIN_MOVSS
,
24587 IX86_BUILTIN_MOVHLPS
,
24588 IX86_BUILTIN_MOVLHPS
,
24589 IX86_BUILTIN_LOADHPS
,
24590 IX86_BUILTIN_LOADLPS
,
24591 IX86_BUILTIN_STOREHPS
,
24592 IX86_BUILTIN_STORELPS
,
24594 IX86_BUILTIN_MASKMOVQ
,
24595 IX86_BUILTIN_MOVMSKPS
,
24596 IX86_BUILTIN_PMOVMSKB
,
24598 IX86_BUILTIN_MOVNTPS
,
24599 IX86_BUILTIN_MOVNTQ
,
24601 IX86_BUILTIN_LOADDQU
,
24602 IX86_BUILTIN_STOREDQU
,
24604 IX86_BUILTIN_PACKSSWB
,
24605 IX86_BUILTIN_PACKSSDW
,
24606 IX86_BUILTIN_PACKUSWB
,
24608 IX86_BUILTIN_PADDB
,
24609 IX86_BUILTIN_PADDW
,
24610 IX86_BUILTIN_PADDD
,
24611 IX86_BUILTIN_PADDQ
,
24612 IX86_BUILTIN_PADDSB
,
24613 IX86_BUILTIN_PADDSW
,
24614 IX86_BUILTIN_PADDUSB
,
24615 IX86_BUILTIN_PADDUSW
,
24616 IX86_BUILTIN_PSUBB
,
24617 IX86_BUILTIN_PSUBW
,
24618 IX86_BUILTIN_PSUBD
,
24619 IX86_BUILTIN_PSUBQ
,
24620 IX86_BUILTIN_PSUBSB
,
24621 IX86_BUILTIN_PSUBSW
,
24622 IX86_BUILTIN_PSUBUSB
,
24623 IX86_BUILTIN_PSUBUSW
,
24626 IX86_BUILTIN_PANDN
,
24630 IX86_BUILTIN_PAVGB
,
24631 IX86_BUILTIN_PAVGW
,
24633 IX86_BUILTIN_PCMPEQB
,
24634 IX86_BUILTIN_PCMPEQW
,
24635 IX86_BUILTIN_PCMPEQD
,
24636 IX86_BUILTIN_PCMPGTB
,
24637 IX86_BUILTIN_PCMPGTW
,
24638 IX86_BUILTIN_PCMPGTD
,
24640 IX86_BUILTIN_PMADDWD
,
24642 IX86_BUILTIN_PMAXSW
,
24643 IX86_BUILTIN_PMAXUB
,
24644 IX86_BUILTIN_PMINSW
,
24645 IX86_BUILTIN_PMINUB
,
24647 IX86_BUILTIN_PMULHUW
,
24648 IX86_BUILTIN_PMULHW
,
24649 IX86_BUILTIN_PMULLW
,
24651 IX86_BUILTIN_PSADBW
,
24652 IX86_BUILTIN_PSHUFW
,
24654 IX86_BUILTIN_PSLLW
,
24655 IX86_BUILTIN_PSLLD
,
24656 IX86_BUILTIN_PSLLQ
,
24657 IX86_BUILTIN_PSRAW
,
24658 IX86_BUILTIN_PSRAD
,
24659 IX86_BUILTIN_PSRLW
,
24660 IX86_BUILTIN_PSRLD
,
24661 IX86_BUILTIN_PSRLQ
,
24662 IX86_BUILTIN_PSLLWI
,
24663 IX86_BUILTIN_PSLLDI
,
24664 IX86_BUILTIN_PSLLQI
,
24665 IX86_BUILTIN_PSRAWI
,
24666 IX86_BUILTIN_PSRADI
,
24667 IX86_BUILTIN_PSRLWI
,
24668 IX86_BUILTIN_PSRLDI
,
24669 IX86_BUILTIN_PSRLQI
,
24671 IX86_BUILTIN_PUNPCKHBW
,
24672 IX86_BUILTIN_PUNPCKHWD
,
24673 IX86_BUILTIN_PUNPCKHDQ
,
24674 IX86_BUILTIN_PUNPCKLBW
,
24675 IX86_BUILTIN_PUNPCKLWD
,
24676 IX86_BUILTIN_PUNPCKLDQ
,
24678 IX86_BUILTIN_SHUFPS
,
24680 IX86_BUILTIN_RCPPS
,
24681 IX86_BUILTIN_RCPSS
,
24682 IX86_BUILTIN_RSQRTPS
,
24683 IX86_BUILTIN_RSQRTPS_NR
,
24684 IX86_BUILTIN_RSQRTSS
,
24685 IX86_BUILTIN_RSQRTF
,
24686 IX86_BUILTIN_SQRTPS
,
24687 IX86_BUILTIN_SQRTPS_NR
,
24688 IX86_BUILTIN_SQRTSS
,
24690 IX86_BUILTIN_UNPCKHPS
,
24691 IX86_BUILTIN_UNPCKLPS
,
24693 IX86_BUILTIN_ANDPS
,
24694 IX86_BUILTIN_ANDNPS
,
24696 IX86_BUILTIN_XORPS
,
24699 IX86_BUILTIN_LDMXCSR
,
24700 IX86_BUILTIN_STMXCSR
,
24701 IX86_BUILTIN_SFENCE
,
24703 /* 3DNow! Original */
24704 IX86_BUILTIN_FEMMS
,
24705 IX86_BUILTIN_PAVGUSB
,
24706 IX86_BUILTIN_PF2ID
,
24707 IX86_BUILTIN_PFACC
,
24708 IX86_BUILTIN_PFADD
,
24709 IX86_BUILTIN_PFCMPEQ
,
24710 IX86_BUILTIN_PFCMPGE
,
24711 IX86_BUILTIN_PFCMPGT
,
24712 IX86_BUILTIN_PFMAX
,
24713 IX86_BUILTIN_PFMIN
,
24714 IX86_BUILTIN_PFMUL
,
24715 IX86_BUILTIN_PFRCP
,
24716 IX86_BUILTIN_PFRCPIT1
,
24717 IX86_BUILTIN_PFRCPIT2
,
24718 IX86_BUILTIN_PFRSQIT1
,
24719 IX86_BUILTIN_PFRSQRT
,
24720 IX86_BUILTIN_PFSUB
,
24721 IX86_BUILTIN_PFSUBR
,
24722 IX86_BUILTIN_PI2FD
,
24723 IX86_BUILTIN_PMULHRW
,
24725 /* 3DNow! Athlon Extensions */
24726 IX86_BUILTIN_PF2IW
,
24727 IX86_BUILTIN_PFNACC
,
24728 IX86_BUILTIN_PFPNACC
,
24729 IX86_BUILTIN_PI2FW
,
24730 IX86_BUILTIN_PSWAPDSI
,
24731 IX86_BUILTIN_PSWAPDSF
,
24734 IX86_BUILTIN_ADDPD
,
24735 IX86_BUILTIN_ADDSD
,
24736 IX86_BUILTIN_DIVPD
,
24737 IX86_BUILTIN_DIVSD
,
24738 IX86_BUILTIN_MULPD
,
24739 IX86_BUILTIN_MULSD
,
24740 IX86_BUILTIN_SUBPD
,
24741 IX86_BUILTIN_SUBSD
,
24743 IX86_BUILTIN_CMPEQPD
,
24744 IX86_BUILTIN_CMPLTPD
,
24745 IX86_BUILTIN_CMPLEPD
,
24746 IX86_BUILTIN_CMPGTPD
,
24747 IX86_BUILTIN_CMPGEPD
,
24748 IX86_BUILTIN_CMPNEQPD
,
24749 IX86_BUILTIN_CMPNLTPD
,
24750 IX86_BUILTIN_CMPNLEPD
,
24751 IX86_BUILTIN_CMPNGTPD
,
24752 IX86_BUILTIN_CMPNGEPD
,
24753 IX86_BUILTIN_CMPORDPD
,
24754 IX86_BUILTIN_CMPUNORDPD
,
24755 IX86_BUILTIN_CMPEQSD
,
24756 IX86_BUILTIN_CMPLTSD
,
24757 IX86_BUILTIN_CMPLESD
,
24758 IX86_BUILTIN_CMPNEQSD
,
24759 IX86_BUILTIN_CMPNLTSD
,
24760 IX86_BUILTIN_CMPNLESD
,
24761 IX86_BUILTIN_CMPORDSD
,
24762 IX86_BUILTIN_CMPUNORDSD
,
24764 IX86_BUILTIN_COMIEQSD
,
24765 IX86_BUILTIN_COMILTSD
,
24766 IX86_BUILTIN_COMILESD
,
24767 IX86_BUILTIN_COMIGTSD
,
24768 IX86_BUILTIN_COMIGESD
,
24769 IX86_BUILTIN_COMINEQSD
,
24770 IX86_BUILTIN_UCOMIEQSD
,
24771 IX86_BUILTIN_UCOMILTSD
,
24772 IX86_BUILTIN_UCOMILESD
,
24773 IX86_BUILTIN_UCOMIGTSD
,
24774 IX86_BUILTIN_UCOMIGESD
,
24775 IX86_BUILTIN_UCOMINEQSD
,
24777 IX86_BUILTIN_MAXPD
,
24778 IX86_BUILTIN_MAXSD
,
24779 IX86_BUILTIN_MINPD
,
24780 IX86_BUILTIN_MINSD
,
24782 IX86_BUILTIN_ANDPD
,
24783 IX86_BUILTIN_ANDNPD
,
24785 IX86_BUILTIN_XORPD
,
24787 IX86_BUILTIN_SQRTPD
,
24788 IX86_BUILTIN_SQRTSD
,
24790 IX86_BUILTIN_UNPCKHPD
,
24791 IX86_BUILTIN_UNPCKLPD
,
24793 IX86_BUILTIN_SHUFPD
,
24795 IX86_BUILTIN_LOADUPD
,
24796 IX86_BUILTIN_STOREUPD
,
24797 IX86_BUILTIN_MOVSD
,
24799 IX86_BUILTIN_LOADHPD
,
24800 IX86_BUILTIN_LOADLPD
,
24802 IX86_BUILTIN_CVTDQ2PD
,
24803 IX86_BUILTIN_CVTDQ2PS
,
24805 IX86_BUILTIN_CVTPD2DQ
,
24806 IX86_BUILTIN_CVTPD2PI
,
24807 IX86_BUILTIN_CVTPD2PS
,
24808 IX86_BUILTIN_CVTTPD2DQ
,
24809 IX86_BUILTIN_CVTTPD2PI
,
24811 IX86_BUILTIN_CVTPI2PD
,
24812 IX86_BUILTIN_CVTSI2SD
,
24813 IX86_BUILTIN_CVTSI642SD
,
24815 IX86_BUILTIN_CVTSD2SI
,
24816 IX86_BUILTIN_CVTSD2SI64
,
24817 IX86_BUILTIN_CVTSD2SS
,
24818 IX86_BUILTIN_CVTSS2SD
,
24819 IX86_BUILTIN_CVTTSD2SI
,
24820 IX86_BUILTIN_CVTTSD2SI64
,
24822 IX86_BUILTIN_CVTPS2DQ
,
24823 IX86_BUILTIN_CVTPS2PD
,
24824 IX86_BUILTIN_CVTTPS2DQ
,
24826 IX86_BUILTIN_MOVNTI
,
24827 IX86_BUILTIN_MOVNTI64
,
24828 IX86_BUILTIN_MOVNTPD
,
24829 IX86_BUILTIN_MOVNTDQ
,
24831 IX86_BUILTIN_MOVQ128
,
24834 IX86_BUILTIN_MASKMOVDQU
,
24835 IX86_BUILTIN_MOVMSKPD
,
24836 IX86_BUILTIN_PMOVMSKB128
,
24838 IX86_BUILTIN_PACKSSWB128
,
24839 IX86_BUILTIN_PACKSSDW128
,
24840 IX86_BUILTIN_PACKUSWB128
,
24842 IX86_BUILTIN_PADDB128
,
24843 IX86_BUILTIN_PADDW128
,
24844 IX86_BUILTIN_PADDD128
,
24845 IX86_BUILTIN_PADDQ128
,
24846 IX86_BUILTIN_PADDSB128
,
24847 IX86_BUILTIN_PADDSW128
,
24848 IX86_BUILTIN_PADDUSB128
,
24849 IX86_BUILTIN_PADDUSW128
,
24850 IX86_BUILTIN_PSUBB128
,
24851 IX86_BUILTIN_PSUBW128
,
24852 IX86_BUILTIN_PSUBD128
,
24853 IX86_BUILTIN_PSUBQ128
,
24854 IX86_BUILTIN_PSUBSB128
,
24855 IX86_BUILTIN_PSUBSW128
,
24856 IX86_BUILTIN_PSUBUSB128
,
24857 IX86_BUILTIN_PSUBUSW128
,
24859 IX86_BUILTIN_PAND128
,
24860 IX86_BUILTIN_PANDN128
,
24861 IX86_BUILTIN_POR128
,
24862 IX86_BUILTIN_PXOR128
,
24864 IX86_BUILTIN_PAVGB128
,
24865 IX86_BUILTIN_PAVGW128
,
24867 IX86_BUILTIN_PCMPEQB128
,
24868 IX86_BUILTIN_PCMPEQW128
,
24869 IX86_BUILTIN_PCMPEQD128
,
24870 IX86_BUILTIN_PCMPGTB128
,
24871 IX86_BUILTIN_PCMPGTW128
,
24872 IX86_BUILTIN_PCMPGTD128
,
24874 IX86_BUILTIN_PMADDWD128
,
24876 IX86_BUILTIN_PMAXSW128
,
24877 IX86_BUILTIN_PMAXUB128
,
24878 IX86_BUILTIN_PMINSW128
,
24879 IX86_BUILTIN_PMINUB128
,
24881 IX86_BUILTIN_PMULUDQ
,
24882 IX86_BUILTIN_PMULUDQ128
,
24883 IX86_BUILTIN_PMULHUW128
,
24884 IX86_BUILTIN_PMULHW128
,
24885 IX86_BUILTIN_PMULLW128
,
24887 IX86_BUILTIN_PSADBW128
,
24888 IX86_BUILTIN_PSHUFHW
,
24889 IX86_BUILTIN_PSHUFLW
,
24890 IX86_BUILTIN_PSHUFD
,
24892 IX86_BUILTIN_PSLLDQI128
,
24893 IX86_BUILTIN_PSLLWI128
,
24894 IX86_BUILTIN_PSLLDI128
,
24895 IX86_BUILTIN_PSLLQI128
,
24896 IX86_BUILTIN_PSRAWI128
,
24897 IX86_BUILTIN_PSRADI128
,
24898 IX86_BUILTIN_PSRLDQI128
,
24899 IX86_BUILTIN_PSRLWI128
,
24900 IX86_BUILTIN_PSRLDI128
,
24901 IX86_BUILTIN_PSRLQI128
,
24903 IX86_BUILTIN_PSLLDQ128
,
24904 IX86_BUILTIN_PSLLW128
,
24905 IX86_BUILTIN_PSLLD128
,
24906 IX86_BUILTIN_PSLLQ128
,
24907 IX86_BUILTIN_PSRAW128
,
24908 IX86_BUILTIN_PSRAD128
,
24909 IX86_BUILTIN_PSRLW128
,
24910 IX86_BUILTIN_PSRLD128
,
24911 IX86_BUILTIN_PSRLQ128
,
24913 IX86_BUILTIN_PUNPCKHBW128
,
24914 IX86_BUILTIN_PUNPCKHWD128
,
24915 IX86_BUILTIN_PUNPCKHDQ128
,
24916 IX86_BUILTIN_PUNPCKHQDQ128
,
24917 IX86_BUILTIN_PUNPCKLBW128
,
24918 IX86_BUILTIN_PUNPCKLWD128
,
24919 IX86_BUILTIN_PUNPCKLDQ128
,
24920 IX86_BUILTIN_PUNPCKLQDQ128
,
24922 IX86_BUILTIN_CLFLUSH
,
24923 IX86_BUILTIN_MFENCE
,
24924 IX86_BUILTIN_LFENCE
,
24925 IX86_BUILTIN_PAUSE
,
24927 IX86_BUILTIN_BSRSI
,
24928 IX86_BUILTIN_BSRDI
,
24929 IX86_BUILTIN_RDPMC
,
24930 IX86_BUILTIN_RDTSC
,
24931 IX86_BUILTIN_RDTSCP
,
24932 IX86_BUILTIN_ROLQI
,
24933 IX86_BUILTIN_ROLHI
,
24934 IX86_BUILTIN_RORQI
,
24935 IX86_BUILTIN_RORHI
,
24938 IX86_BUILTIN_ADDSUBPS
,
24939 IX86_BUILTIN_HADDPS
,
24940 IX86_BUILTIN_HSUBPS
,
24941 IX86_BUILTIN_MOVSHDUP
,
24942 IX86_BUILTIN_MOVSLDUP
,
24943 IX86_BUILTIN_ADDSUBPD
,
24944 IX86_BUILTIN_HADDPD
,
24945 IX86_BUILTIN_HSUBPD
,
24946 IX86_BUILTIN_LDDQU
,
24948 IX86_BUILTIN_MONITOR
,
24949 IX86_BUILTIN_MWAIT
,
24952 IX86_BUILTIN_PHADDW
,
24953 IX86_BUILTIN_PHADDD
,
24954 IX86_BUILTIN_PHADDSW
,
24955 IX86_BUILTIN_PHSUBW
,
24956 IX86_BUILTIN_PHSUBD
,
24957 IX86_BUILTIN_PHSUBSW
,
24958 IX86_BUILTIN_PMADDUBSW
,
24959 IX86_BUILTIN_PMULHRSW
,
24960 IX86_BUILTIN_PSHUFB
,
24961 IX86_BUILTIN_PSIGNB
,
24962 IX86_BUILTIN_PSIGNW
,
24963 IX86_BUILTIN_PSIGND
,
24964 IX86_BUILTIN_PALIGNR
,
24965 IX86_BUILTIN_PABSB
,
24966 IX86_BUILTIN_PABSW
,
24967 IX86_BUILTIN_PABSD
,
24969 IX86_BUILTIN_PHADDW128
,
24970 IX86_BUILTIN_PHADDD128
,
24971 IX86_BUILTIN_PHADDSW128
,
24972 IX86_BUILTIN_PHSUBW128
,
24973 IX86_BUILTIN_PHSUBD128
,
24974 IX86_BUILTIN_PHSUBSW128
,
24975 IX86_BUILTIN_PMADDUBSW128
,
24976 IX86_BUILTIN_PMULHRSW128
,
24977 IX86_BUILTIN_PSHUFB128
,
24978 IX86_BUILTIN_PSIGNB128
,
24979 IX86_BUILTIN_PSIGNW128
,
24980 IX86_BUILTIN_PSIGND128
,
24981 IX86_BUILTIN_PALIGNR128
,
24982 IX86_BUILTIN_PABSB128
,
24983 IX86_BUILTIN_PABSW128
,
24984 IX86_BUILTIN_PABSD128
,
24986 /* AMDFAM10 - SSE4A New Instructions. */
24987 IX86_BUILTIN_MOVNTSD
,
24988 IX86_BUILTIN_MOVNTSS
,
24989 IX86_BUILTIN_EXTRQI
,
24990 IX86_BUILTIN_EXTRQ
,
24991 IX86_BUILTIN_INSERTQI
,
24992 IX86_BUILTIN_INSERTQ
,
24995 IX86_BUILTIN_BLENDPD
,
24996 IX86_BUILTIN_BLENDPS
,
24997 IX86_BUILTIN_BLENDVPD
,
24998 IX86_BUILTIN_BLENDVPS
,
24999 IX86_BUILTIN_PBLENDVB128
,
25000 IX86_BUILTIN_PBLENDW128
,
25005 IX86_BUILTIN_INSERTPS128
,
25007 IX86_BUILTIN_MOVNTDQA
,
25008 IX86_BUILTIN_MPSADBW128
,
25009 IX86_BUILTIN_PACKUSDW128
,
25010 IX86_BUILTIN_PCMPEQQ
,
25011 IX86_BUILTIN_PHMINPOSUW128
,
25013 IX86_BUILTIN_PMAXSB128
,
25014 IX86_BUILTIN_PMAXSD128
,
25015 IX86_BUILTIN_PMAXUD128
,
25016 IX86_BUILTIN_PMAXUW128
,
25018 IX86_BUILTIN_PMINSB128
,
25019 IX86_BUILTIN_PMINSD128
,
25020 IX86_BUILTIN_PMINUD128
,
25021 IX86_BUILTIN_PMINUW128
,
25023 IX86_BUILTIN_PMOVSXBW128
,
25024 IX86_BUILTIN_PMOVSXBD128
,
25025 IX86_BUILTIN_PMOVSXBQ128
,
25026 IX86_BUILTIN_PMOVSXWD128
,
25027 IX86_BUILTIN_PMOVSXWQ128
,
25028 IX86_BUILTIN_PMOVSXDQ128
,
25030 IX86_BUILTIN_PMOVZXBW128
,
25031 IX86_BUILTIN_PMOVZXBD128
,
25032 IX86_BUILTIN_PMOVZXBQ128
,
25033 IX86_BUILTIN_PMOVZXWD128
,
25034 IX86_BUILTIN_PMOVZXWQ128
,
25035 IX86_BUILTIN_PMOVZXDQ128
,
25037 IX86_BUILTIN_PMULDQ128
,
25038 IX86_BUILTIN_PMULLD128
,
25040 IX86_BUILTIN_ROUNDSD
,
25041 IX86_BUILTIN_ROUNDSS
,
25043 IX86_BUILTIN_ROUNDPD
,
25044 IX86_BUILTIN_ROUNDPS
,
25046 IX86_BUILTIN_FLOORPD
,
25047 IX86_BUILTIN_CEILPD
,
25048 IX86_BUILTIN_TRUNCPD
,
25049 IX86_BUILTIN_RINTPD
,
25050 IX86_BUILTIN_ROUNDPD_AZ
,
25052 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX
,
25053 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX
,
25054 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX
,
25056 IX86_BUILTIN_FLOORPS
,
25057 IX86_BUILTIN_CEILPS
,
25058 IX86_BUILTIN_TRUNCPS
,
25059 IX86_BUILTIN_RINTPS
,
25060 IX86_BUILTIN_ROUNDPS_AZ
,
25062 IX86_BUILTIN_FLOORPS_SFIX
,
25063 IX86_BUILTIN_CEILPS_SFIX
,
25064 IX86_BUILTIN_ROUNDPS_AZ_SFIX
,
25066 IX86_BUILTIN_PTESTZ
,
25067 IX86_BUILTIN_PTESTC
,
25068 IX86_BUILTIN_PTESTNZC
,
25070 IX86_BUILTIN_VEC_INIT_V2SI
,
25071 IX86_BUILTIN_VEC_INIT_V4HI
,
25072 IX86_BUILTIN_VEC_INIT_V8QI
,
25073 IX86_BUILTIN_VEC_EXT_V2DF
,
25074 IX86_BUILTIN_VEC_EXT_V2DI
,
25075 IX86_BUILTIN_VEC_EXT_V4SF
,
25076 IX86_BUILTIN_VEC_EXT_V4SI
,
25077 IX86_BUILTIN_VEC_EXT_V8HI
,
25078 IX86_BUILTIN_VEC_EXT_V2SI
,
25079 IX86_BUILTIN_VEC_EXT_V4HI
,
25080 IX86_BUILTIN_VEC_EXT_V16QI
,
25081 IX86_BUILTIN_VEC_SET_V2DI
,
25082 IX86_BUILTIN_VEC_SET_V4SF
,
25083 IX86_BUILTIN_VEC_SET_V4SI
,
25084 IX86_BUILTIN_VEC_SET_V8HI
,
25085 IX86_BUILTIN_VEC_SET_V4HI
,
25086 IX86_BUILTIN_VEC_SET_V16QI
,
25088 IX86_BUILTIN_VEC_PACK_SFIX
,
25089 IX86_BUILTIN_VEC_PACK_SFIX256
,
25092 IX86_BUILTIN_CRC32QI
,
25093 IX86_BUILTIN_CRC32HI
,
25094 IX86_BUILTIN_CRC32SI
,
25095 IX86_BUILTIN_CRC32DI
,
25097 IX86_BUILTIN_PCMPESTRI128
,
25098 IX86_BUILTIN_PCMPESTRM128
,
25099 IX86_BUILTIN_PCMPESTRA128
,
25100 IX86_BUILTIN_PCMPESTRC128
,
25101 IX86_BUILTIN_PCMPESTRO128
,
25102 IX86_BUILTIN_PCMPESTRS128
,
25103 IX86_BUILTIN_PCMPESTRZ128
,
25104 IX86_BUILTIN_PCMPISTRI128
,
25105 IX86_BUILTIN_PCMPISTRM128
,
25106 IX86_BUILTIN_PCMPISTRA128
,
25107 IX86_BUILTIN_PCMPISTRC128
,
25108 IX86_BUILTIN_PCMPISTRO128
,
25109 IX86_BUILTIN_PCMPISTRS128
,
25110 IX86_BUILTIN_PCMPISTRZ128
,
25112 IX86_BUILTIN_PCMPGTQ
,
25114 /* AES instructions */
25115 IX86_BUILTIN_AESENC128
,
25116 IX86_BUILTIN_AESENCLAST128
,
25117 IX86_BUILTIN_AESDEC128
,
25118 IX86_BUILTIN_AESDECLAST128
,
25119 IX86_BUILTIN_AESIMC128
,
25120 IX86_BUILTIN_AESKEYGENASSIST128
,
25122 /* PCLMUL instruction */
25123 IX86_BUILTIN_PCLMULQDQ128
,
25126 IX86_BUILTIN_ADDPD256
,
25127 IX86_BUILTIN_ADDPS256
,
25128 IX86_BUILTIN_ADDSUBPD256
,
25129 IX86_BUILTIN_ADDSUBPS256
,
25130 IX86_BUILTIN_ANDPD256
,
25131 IX86_BUILTIN_ANDPS256
,
25132 IX86_BUILTIN_ANDNPD256
,
25133 IX86_BUILTIN_ANDNPS256
,
25134 IX86_BUILTIN_BLENDPD256
,
25135 IX86_BUILTIN_BLENDPS256
,
25136 IX86_BUILTIN_BLENDVPD256
,
25137 IX86_BUILTIN_BLENDVPS256
,
25138 IX86_BUILTIN_DIVPD256
,
25139 IX86_BUILTIN_DIVPS256
,
25140 IX86_BUILTIN_DPPS256
,
25141 IX86_BUILTIN_HADDPD256
,
25142 IX86_BUILTIN_HADDPS256
,
25143 IX86_BUILTIN_HSUBPD256
,
25144 IX86_BUILTIN_HSUBPS256
,
25145 IX86_BUILTIN_MAXPD256
,
25146 IX86_BUILTIN_MAXPS256
,
25147 IX86_BUILTIN_MINPD256
,
25148 IX86_BUILTIN_MINPS256
,
25149 IX86_BUILTIN_MULPD256
,
25150 IX86_BUILTIN_MULPS256
,
25151 IX86_BUILTIN_ORPD256
,
25152 IX86_BUILTIN_ORPS256
,
25153 IX86_BUILTIN_SHUFPD256
,
25154 IX86_BUILTIN_SHUFPS256
,
25155 IX86_BUILTIN_SUBPD256
,
25156 IX86_BUILTIN_SUBPS256
,
25157 IX86_BUILTIN_XORPD256
,
25158 IX86_BUILTIN_XORPS256
,
25159 IX86_BUILTIN_CMPSD
,
25160 IX86_BUILTIN_CMPSS
,
25161 IX86_BUILTIN_CMPPD
,
25162 IX86_BUILTIN_CMPPS
,
25163 IX86_BUILTIN_CMPPD256
,
25164 IX86_BUILTIN_CMPPS256
,
25165 IX86_BUILTIN_CVTDQ2PD256
,
25166 IX86_BUILTIN_CVTDQ2PS256
,
25167 IX86_BUILTIN_CVTPD2PS256
,
25168 IX86_BUILTIN_CVTPS2DQ256
,
25169 IX86_BUILTIN_CVTPS2PD256
,
25170 IX86_BUILTIN_CVTTPD2DQ256
,
25171 IX86_BUILTIN_CVTPD2DQ256
,
25172 IX86_BUILTIN_CVTTPS2DQ256
,
25173 IX86_BUILTIN_EXTRACTF128PD256
,
25174 IX86_BUILTIN_EXTRACTF128PS256
,
25175 IX86_BUILTIN_EXTRACTF128SI256
,
25176 IX86_BUILTIN_VZEROALL
,
25177 IX86_BUILTIN_VZEROUPPER
,
25178 IX86_BUILTIN_VPERMILVARPD
,
25179 IX86_BUILTIN_VPERMILVARPS
,
25180 IX86_BUILTIN_VPERMILVARPD256
,
25181 IX86_BUILTIN_VPERMILVARPS256
,
25182 IX86_BUILTIN_VPERMILPD
,
25183 IX86_BUILTIN_VPERMILPS
,
25184 IX86_BUILTIN_VPERMILPD256
,
25185 IX86_BUILTIN_VPERMILPS256
,
25186 IX86_BUILTIN_VPERMIL2PD
,
25187 IX86_BUILTIN_VPERMIL2PS
,
25188 IX86_BUILTIN_VPERMIL2PD256
,
25189 IX86_BUILTIN_VPERMIL2PS256
,
25190 IX86_BUILTIN_VPERM2F128PD256
,
25191 IX86_BUILTIN_VPERM2F128PS256
,
25192 IX86_BUILTIN_VPERM2F128SI256
,
25193 IX86_BUILTIN_VBROADCASTSS
,
25194 IX86_BUILTIN_VBROADCASTSD256
,
25195 IX86_BUILTIN_VBROADCASTSS256
,
25196 IX86_BUILTIN_VBROADCASTPD256
,
25197 IX86_BUILTIN_VBROADCASTPS256
,
25198 IX86_BUILTIN_VINSERTF128PD256
,
25199 IX86_BUILTIN_VINSERTF128PS256
,
25200 IX86_BUILTIN_VINSERTF128SI256
,
25201 IX86_BUILTIN_LOADUPD256
,
25202 IX86_BUILTIN_LOADUPS256
,
25203 IX86_BUILTIN_STOREUPD256
,
25204 IX86_BUILTIN_STOREUPS256
,
25205 IX86_BUILTIN_LDDQU256
,
25206 IX86_BUILTIN_MOVNTDQ256
,
25207 IX86_BUILTIN_MOVNTPD256
,
25208 IX86_BUILTIN_MOVNTPS256
,
25209 IX86_BUILTIN_LOADDQU256
,
25210 IX86_BUILTIN_STOREDQU256
,
25211 IX86_BUILTIN_MASKLOADPD
,
25212 IX86_BUILTIN_MASKLOADPS
,
25213 IX86_BUILTIN_MASKSTOREPD
,
25214 IX86_BUILTIN_MASKSTOREPS
,
25215 IX86_BUILTIN_MASKLOADPD256
,
25216 IX86_BUILTIN_MASKLOADPS256
,
25217 IX86_BUILTIN_MASKSTOREPD256
,
25218 IX86_BUILTIN_MASKSTOREPS256
,
25219 IX86_BUILTIN_MOVSHDUP256
,
25220 IX86_BUILTIN_MOVSLDUP256
,
25221 IX86_BUILTIN_MOVDDUP256
,
25223 IX86_BUILTIN_SQRTPD256
,
25224 IX86_BUILTIN_SQRTPS256
,
25225 IX86_BUILTIN_SQRTPS_NR256
,
25226 IX86_BUILTIN_RSQRTPS256
,
25227 IX86_BUILTIN_RSQRTPS_NR256
,
25229 IX86_BUILTIN_RCPPS256
,
25231 IX86_BUILTIN_ROUNDPD256
,
25232 IX86_BUILTIN_ROUNDPS256
,
25234 IX86_BUILTIN_FLOORPD256
,
25235 IX86_BUILTIN_CEILPD256
,
25236 IX86_BUILTIN_TRUNCPD256
,
25237 IX86_BUILTIN_RINTPD256
,
25238 IX86_BUILTIN_ROUNDPD_AZ256
,
25240 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256
,
25241 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256
,
25242 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256
,
25244 IX86_BUILTIN_FLOORPS256
,
25245 IX86_BUILTIN_CEILPS256
,
25246 IX86_BUILTIN_TRUNCPS256
,
25247 IX86_BUILTIN_RINTPS256
,
25248 IX86_BUILTIN_ROUNDPS_AZ256
,
25250 IX86_BUILTIN_FLOORPS_SFIX256
,
25251 IX86_BUILTIN_CEILPS_SFIX256
,
25252 IX86_BUILTIN_ROUNDPS_AZ_SFIX256
,
25254 IX86_BUILTIN_UNPCKHPD256
,
25255 IX86_BUILTIN_UNPCKLPD256
,
25256 IX86_BUILTIN_UNPCKHPS256
,
25257 IX86_BUILTIN_UNPCKLPS256
,
25259 IX86_BUILTIN_SI256_SI
,
25260 IX86_BUILTIN_PS256_PS
,
25261 IX86_BUILTIN_PD256_PD
,
25262 IX86_BUILTIN_SI_SI256
,
25263 IX86_BUILTIN_PS_PS256
,
25264 IX86_BUILTIN_PD_PD256
,
25266 IX86_BUILTIN_VTESTZPD
,
25267 IX86_BUILTIN_VTESTCPD
,
25268 IX86_BUILTIN_VTESTNZCPD
,
25269 IX86_BUILTIN_VTESTZPS
,
25270 IX86_BUILTIN_VTESTCPS
,
25271 IX86_BUILTIN_VTESTNZCPS
,
25272 IX86_BUILTIN_VTESTZPD256
,
25273 IX86_BUILTIN_VTESTCPD256
,
25274 IX86_BUILTIN_VTESTNZCPD256
,
25275 IX86_BUILTIN_VTESTZPS256
,
25276 IX86_BUILTIN_VTESTCPS256
,
25277 IX86_BUILTIN_VTESTNZCPS256
,
25278 IX86_BUILTIN_PTESTZ256
,
25279 IX86_BUILTIN_PTESTC256
,
25280 IX86_BUILTIN_PTESTNZC256
,
25282 IX86_BUILTIN_MOVMSKPD256
,
25283 IX86_BUILTIN_MOVMSKPS256
,
25286 IX86_BUILTIN_MPSADBW256
,
25287 IX86_BUILTIN_PABSB256
,
25288 IX86_BUILTIN_PABSW256
,
25289 IX86_BUILTIN_PABSD256
,
25290 IX86_BUILTIN_PACKSSDW256
,
25291 IX86_BUILTIN_PACKSSWB256
,
25292 IX86_BUILTIN_PACKUSDW256
,
25293 IX86_BUILTIN_PACKUSWB256
,
25294 IX86_BUILTIN_PADDB256
,
25295 IX86_BUILTIN_PADDW256
,
25296 IX86_BUILTIN_PADDD256
,
25297 IX86_BUILTIN_PADDQ256
,
25298 IX86_BUILTIN_PADDSB256
,
25299 IX86_BUILTIN_PADDSW256
,
25300 IX86_BUILTIN_PADDUSB256
,
25301 IX86_BUILTIN_PADDUSW256
,
25302 IX86_BUILTIN_PALIGNR256
,
25303 IX86_BUILTIN_AND256I
,
25304 IX86_BUILTIN_ANDNOT256I
,
25305 IX86_BUILTIN_PAVGB256
,
25306 IX86_BUILTIN_PAVGW256
,
25307 IX86_BUILTIN_PBLENDVB256
,
25308 IX86_BUILTIN_PBLENDVW256
,
25309 IX86_BUILTIN_PCMPEQB256
,
25310 IX86_BUILTIN_PCMPEQW256
,
25311 IX86_BUILTIN_PCMPEQD256
,
25312 IX86_BUILTIN_PCMPEQQ256
,
25313 IX86_BUILTIN_PCMPGTB256
,
25314 IX86_BUILTIN_PCMPGTW256
,
25315 IX86_BUILTIN_PCMPGTD256
,
25316 IX86_BUILTIN_PCMPGTQ256
,
25317 IX86_BUILTIN_PHADDW256
,
25318 IX86_BUILTIN_PHADDD256
,
25319 IX86_BUILTIN_PHADDSW256
,
25320 IX86_BUILTIN_PHSUBW256
,
25321 IX86_BUILTIN_PHSUBD256
,
25322 IX86_BUILTIN_PHSUBSW256
,
25323 IX86_BUILTIN_PMADDUBSW256
,
25324 IX86_BUILTIN_PMADDWD256
,
25325 IX86_BUILTIN_PMAXSB256
,
25326 IX86_BUILTIN_PMAXSW256
,
25327 IX86_BUILTIN_PMAXSD256
,
25328 IX86_BUILTIN_PMAXUB256
,
25329 IX86_BUILTIN_PMAXUW256
,
25330 IX86_BUILTIN_PMAXUD256
,
25331 IX86_BUILTIN_PMINSB256
,
25332 IX86_BUILTIN_PMINSW256
,
25333 IX86_BUILTIN_PMINSD256
,
25334 IX86_BUILTIN_PMINUB256
,
25335 IX86_BUILTIN_PMINUW256
,
25336 IX86_BUILTIN_PMINUD256
,
25337 IX86_BUILTIN_PMOVMSKB256
,
25338 IX86_BUILTIN_PMOVSXBW256
,
25339 IX86_BUILTIN_PMOVSXBD256
,
25340 IX86_BUILTIN_PMOVSXBQ256
,
25341 IX86_BUILTIN_PMOVSXWD256
,
25342 IX86_BUILTIN_PMOVSXWQ256
,
25343 IX86_BUILTIN_PMOVSXDQ256
,
25344 IX86_BUILTIN_PMOVZXBW256
,
25345 IX86_BUILTIN_PMOVZXBD256
,
25346 IX86_BUILTIN_PMOVZXBQ256
,
25347 IX86_BUILTIN_PMOVZXWD256
,
25348 IX86_BUILTIN_PMOVZXWQ256
,
25349 IX86_BUILTIN_PMOVZXDQ256
,
25350 IX86_BUILTIN_PMULDQ256
,
25351 IX86_BUILTIN_PMULHRSW256
,
25352 IX86_BUILTIN_PMULHUW256
,
25353 IX86_BUILTIN_PMULHW256
,
25354 IX86_BUILTIN_PMULLW256
,
25355 IX86_BUILTIN_PMULLD256
,
25356 IX86_BUILTIN_PMULUDQ256
,
25357 IX86_BUILTIN_POR256
,
25358 IX86_BUILTIN_PSADBW256
,
25359 IX86_BUILTIN_PSHUFB256
,
25360 IX86_BUILTIN_PSHUFD256
,
25361 IX86_BUILTIN_PSHUFHW256
,
25362 IX86_BUILTIN_PSHUFLW256
,
25363 IX86_BUILTIN_PSIGNB256
,
25364 IX86_BUILTIN_PSIGNW256
,
25365 IX86_BUILTIN_PSIGND256
,
25366 IX86_BUILTIN_PSLLDQI256
,
25367 IX86_BUILTIN_PSLLWI256
,
25368 IX86_BUILTIN_PSLLW256
,
25369 IX86_BUILTIN_PSLLDI256
,
25370 IX86_BUILTIN_PSLLD256
,
25371 IX86_BUILTIN_PSLLQI256
,
25372 IX86_BUILTIN_PSLLQ256
,
25373 IX86_BUILTIN_PSRAWI256
,
25374 IX86_BUILTIN_PSRAW256
,
25375 IX86_BUILTIN_PSRADI256
,
25376 IX86_BUILTIN_PSRAD256
,
25377 IX86_BUILTIN_PSRLDQI256
,
25378 IX86_BUILTIN_PSRLWI256
,
25379 IX86_BUILTIN_PSRLW256
,
25380 IX86_BUILTIN_PSRLDI256
,
25381 IX86_BUILTIN_PSRLD256
,
25382 IX86_BUILTIN_PSRLQI256
,
25383 IX86_BUILTIN_PSRLQ256
,
25384 IX86_BUILTIN_PSUBB256
,
25385 IX86_BUILTIN_PSUBW256
,
25386 IX86_BUILTIN_PSUBD256
,
25387 IX86_BUILTIN_PSUBQ256
,
25388 IX86_BUILTIN_PSUBSB256
,
25389 IX86_BUILTIN_PSUBSW256
,
25390 IX86_BUILTIN_PSUBUSB256
,
25391 IX86_BUILTIN_PSUBUSW256
,
25392 IX86_BUILTIN_PUNPCKHBW256
,
25393 IX86_BUILTIN_PUNPCKHWD256
,
25394 IX86_BUILTIN_PUNPCKHDQ256
,
25395 IX86_BUILTIN_PUNPCKHQDQ256
,
25396 IX86_BUILTIN_PUNPCKLBW256
,
25397 IX86_BUILTIN_PUNPCKLWD256
,
25398 IX86_BUILTIN_PUNPCKLDQ256
,
25399 IX86_BUILTIN_PUNPCKLQDQ256
,
25400 IX86_BUILTIN_PXOR256
,
25401 IX86_BUILTIN_MOVNTDQA256
,
25402 IX86_BUILTIN_VBROADCASTSS_PS
,
25403 IX86_BUILTIN_VBROADCASTSS_PS256
,
25404 IX86_BUILTIN_VBROADCASTSD_PD256
,
25405 IX86_BUILTIN_VBROADCASTSI256
,
25406 IX86_BUILTIN_PBLENDD256
,
25407 IX86_BUILTIN_PBLENDD128
,
25408 IX86_BUILTIN_PBROADCASTB256
,
25409 IX86_BUILTIN_PBROADCASTW256
,
25410 IX86_BUILTIN_PBROADCASTD256
,
25411 IX86_BUILTIN_PBROADCASTQ256
,
25412 IX86_BUILTIN_PBROADCASTB128
,
25413 IX86_BUILTIN_PBROADCASTW128
,
25414 IX86_BUILTIN_PBROADCASTD128
,
25415 IX86_BUILTIN_PBROADCASTQ128
,
25416 IX86_BUILTIN_VPERMVARSI256
,
25417 IX86_BUILTIN_VPERMDF256
,
25418 IX86_BUILTIN_VPERMVARSF256
,
25419 IX86_BUILTIN_VPERMDI256
,
25420 IX86_BUILTIN_VPERMTI256
,
25421 IX86_BUILTIN_VEXTRACT128I256
,
25422 IX86_BUILTIN_VINSERT128I256
,
25423 IX86_BUILTIN_MASKLOADD
,
25424 IX86_BUILTIN_MASKLOADQ
,
25425 IX86_BUILTIN_MASKLOADD256
,
25426 IX86_BUILTIN_MASKLOADQ256
,
25427 IX86_BUILTIN_MASKSTORED
,
25428 IX86_BUILTIN_MASKSTOREQ
,
25429 IX86_BUILTIN_MASKSTORED256
,
25430 IX86_BUILTIN_MASKSTOREQ256
,
25431 IX86_BUILTIN_PSLLVV4DI
,
25432 IX86_BUILTIN_PSLLVV2DI
,
25433 IX86_BUILTIN_PSLLVV8SI
,
25434 IX86_BUILTIN_PSLLVV4SI
,
25435 IX86_BUILTIN_PSRAVV8SI
,
25436 IX86_BUILTIN_PSRAVV4SI
,
25437 IX86_BUILTIN_PSRLVV4DI
,
25438 IX86_BUILTIN_PSRLVV2DI
,
25439 IX86_BUILTIN_PSRLVV8SI
,
25440 IX86_BUILTIN_PSRLVV4SI
,
25442 IX86_BUILTIN_GATHERSIV2DF
,
25443 IX86_BUILTIN_GATHERSIV4DF
,
25444 IX86_BUILTIN_GATHERDIV2DF
,
25445 IX86_BUILTIN_GATHERDIV4DF
,
25446 IX86_BUILTIN_GATHERSIV4SF
,
25447 IX86_BUILTIN_GATHERSIV8SF
,
25448 IX86_BUILTIN_GATHERDIV4SF
,
25449 IX86_BUILTIN_GATHERDIV8SF
,
25450 IX86_BUILTIN_GATHERSIV2DI
,
25451 IX86_BUILTIN_GATHERSIV4DI
,
25452 IX86_BUILTIN_GATHERDIV2DI
,
25453 IX86_BUILTIN_GATHERDIV4DI
,
25454 IX86_BUILTIN_GATHERSIV4SI
,
25455 IX86_BUILTIN_GATHERSIV8SI
,
25456 IX86_BUILTIN_GATHERDIV4SI
,
25457 IX86_BUILTIN_GATHERDIV8SI
,
25459 /* Alternate 4 element gather for the vectorizer where
25460 all operands are 32-byte wide. */
25461 IX86_BUILTIN_GATHERALTSIV4DF
,
25462 IX86_BUILTIN_GATHERALTDIV8SF
,
25463 IX86_BUILTIN_GATHERALTSIV4DI
,
25464 IX86_BUILTIN_GATHERALTDIV8SI
,
25466 /* TFmode support builtins. */
25468 IX86_BUILTIN_HUGE_VALQ
,
25469 IX86_BUILTIN_FABSQ
,
25470 IX86_BUILTIN_COPYSIGNQ
,
25472 /* Vectorizer support builtins. */
25473 IX86_BUILTIN_CPYSGNPS
,
25474 IX86_BUILTIN_CPYSGNPD
,
25475 IX86_BUILTIN_CPYSGNPS256
,
25476 IX86_BUILTIN_CPYSGNPD256
,
25478 /* FMA4 instructions. */
25479 IX86_BUILTIN_VFMADDSS
,
25480 IX86_BUILTIN_VFMADDSD
,
25481 IX86_BUILTIN_VFMADDPS
,
25482 IX86_BUILTIN_VFMADDPD
,
25483 IX86_BUILTIN_VFMADDPS256
,
25484 IX86_BUILTIN_VFMADDPD256
,
25485 IX86_BUILTIN_VFMADDSUBPS
,
25486 IX86_BUILTIN_VFMADDSUBPD
,
25487 IX86_BUILTIN_VFMADDSUBPS256
,
25488 IX86_BUILTIN_VFMADDSUBPD256
,
25490 /* FMA3 instructions. */
25491 IX86_BUILTIN_VFMADDSS3
,
25492 IX86_BUILTIN_VFMADDSD3
,
25494 /* XOP instructions. */
25495 IX86_BUILTIN_VPCMOV
,
25496 IX86_BUILTIN_VPCMOV_V2DI
,
25497 IX86_BUILTIN_VPCMOV_V4SI
,
25498 IX86_BUILTIN_VPCMOV_V8HI
,
25499 IX86_BUILTIN_VPCMOV_V16QI
,
25500 IX86_BUILTIN_VPCMOV_V4SF
,
25501 IX86_BUILTIN_VPCMOV_V2DF
,
25502 IX86_BUILTIN_VPCMOV256
,
25503 IX86_BUILTIN_VPCMOV_V4DI256
,
25504 IX86_BUILTIN_VPCMOV_V8SI256
,
25505 IX86_BUILTIN_VPCMOV_V16HI256
,
25506 IX86_BUILTIN_VPCMOV_V32QI256
,
25507 IX86_BUILTIN_VPCMOV_V8SF256
,
25508 IX86_BUILTIN_VPCMOV_V4DF256
,
25510 IX86_BUILTIN_VPPERM
,
25512 IX86_BUILTIN_VPMACSSWW
,
25513 IX86_BUILTIN_VPMACSWW
,
25514 IX86_BUILTIN_VPMACSSWD
,
25515 IX86_BUILTIN_VPMACSWD
,
25516 IX86_BUILTIN_VPMACSSDD
,
25517 IX86_BUILTIN_VPMACSDD
,
25518 IX86_BUILTIN_VPMACSSDQL
,
25519 IX86_BUILTIN_VPMACSSDQH
,
25520 IX86_BUILTIN_VPMACSDQL
,
25521 IX86_BUILTIN_VPMACSDQH
,
25522 IX86_BUILTIN_VPMADCSSWD
,
25523 IX86_BUILTIN_VPMADCSWD
,
25525 IX86_BUILTIN_VPHADDBW
,
25526 IX86_BUILTIN_VPHADDBD
,
25527 IX86_BUILTIN_VPHADDBQ
,
25528 IX86_BUILTIN_VPHADDWD
,
25529 IX86_BUILTIN_VPHADDWQ
,
25530 IX86_BUILTIN_VPHADDDQ
,
25531 IX86_BUILTIN_VPHADDUBW
,
25532 IX86_BUILTIN_VPHADDUBD
,
25533 IX86_BUILTIN_VPHADDUBQ
,
25534 IX86_BUILTIN_VPHADDUWD
,
25535 IX86_BUILTIN_VPHADDUWQ
,
25536 IX86_BUILTIN_VPHADDUDQ
,
25537 IX86_BUILTIN_VPHSUBBW
,
25538 IX86_BUILTIN_VPHSUBWD
,
25539 IX86_BUILTIN_VPHSUBDQ
,
25541 IX86_BUILTIN_VPROTB
,
25542 IX86_BUILTIN_VPROTW
,
25543 IX86_BUILTIN_VPROTD
,
25544 IX86_BUILTIN_VPROTQ
,
25545 IX86_BUILTIN_VPROTB_IMM
,
25546 IX86_BUILTIN_VPROTW_IMM
,
25547 IX86_BUILTIN_VPROTD_IMM
,
25548 IX86_BUILTIN_VPROTQ_IMM
,
25550 IX86_BUILTIN_VPSHLB
,
25551 IX86_BUILTIN_VPSHLW
,
25552 IX86_BUILTIN_VPSHLD
,
25553 IX86_BUILTIN_VPSHLQ
,
25554 IX86_BUILTIN_VPSHAB
,
25555 IX86_BUILTIN_VPSHAW
,
25556 IX86_BUILTIN_VPSHAD
,
25557 IX86_BUILTIN_VPSHAQ
,
25559 IX86_BUILTIN_VFRCZSS
,
25560 IX86_BUILTIN_VFRCZSD
,
25561 IX86_BUILTIN_VFRCZPS
,
25562 IX86_BUILTIN_VFRCZPD
,
25563 IX86_BUILTIN_VFRCZPS256
,
25564 IX86_BUILTIN_VFRCZPD256
,
25566 IX86_BUILTIN_VPCOMEQUB
,
25567 IX86_BUILTIN_VPCOMNEUB
,
25568 IX86_BUILTIN_VPCOMLTUB
,
25569 IX86_BUILTIN_VPCOMLEUB
,
25570 IX86_BUILTIN_VPCOMGTUB
,
25571 IX86_BUILTIN_VPCOMGEUB
,
25572 IX86_BUILTIN_VPCOMFALSEUB
,
25573 IX86_BUILTIN_VPCOMTRUEUB
,
25575 IX86_BUILTIN_VPCOMEQUW
,
25576 IX86_BUILTIN_VPCOMNEUW
,
25577 IX86_BUILTIN_VPCOMLTUW
,
25578 IX86_BUILTIN_VPCOMLEUW
,
25579 IX86_BUILTIN_VPCOMGTUW
,
25580 IX86_BUILTIN_VPCOMGEUW
,
25581 IX86_BUILTIN_VPCOMFALSEUW
,
25582 IX86_BUILTIN_VPCOMTRUEUW
,
25584 IX86_BUILTIN_VPCOMEQUD
,
25585 IX86_BUILTIN_VPCOMNEUD
,
25586 IX86_BUILTIN_VPCOMLTUD
,
25587 IX86_BUILTIN_VPCOMLEUD
,
25588 IX86_BUILTIN_VPCOMGTUD
,
25589 IX86_BUILTIN_VPCOMGEUD
,
25590 IX86_BUILTIN_VPCOMFALSEUD
,
25591 IX86_BUILTIN_VPCOMTRUEUD
,
25593 IX86_BUILTIN_VPCOMEQUQ
,
25594 IX86_BUILTIN_VPCOMNEUQ
,
25595 IX86_BUILTIN_VPCOMLTUQ
,
25596 IX86_BUILTIN_VPCOMLEUQ
,
25597 IX86_BUILTIN_VPCOMGTUQ
,
25598 IX86_BUILTIN_VPCOMGEUQ
,
25599 IX86_BUILTIN_VPCOMFALSEUQ
,
25600 IX86_BUILTIN_VPCOMTRUEUQ
,
25602 IX86_BUILTIN_VPCOMEQB
,
25603 IX86_BUILTIN_VPCOMNEB
,
25604 IX86_BUILTIN_VPCOMLTB
,
25605 IX86_BUILTIN_VPCOMLEB
,
25606 IX86_BUILTIN_VPCOMGTB
,
25607 IX86_BUILTIN_VPCOMGEB
,
25608 IX86_BUILTIN_VPCOMFALSEB
,
25609 IX86_BUILTIN_VPCOMTRUEB
,
25611 IX86_BUILTIN_VPCOMEQW
,
25612 IX86_BUILTIN_VPCOMNEW
,
25613 IX86_BUILTIN_VPCOMLTW
,
25614 IX86_BUILTIN_VPCOMLEW
,
25615 IX86_BUILTIN_VPCOMGTW
,
25616 IX86_BUILTIN_VPCOMGEW
,
25617 IX86_BUILTIN_VPCOMFALSEW
,
25618 IX86_BUILTIN_VPCOMTRUEW
,
25620 IX86_BUILTIN_VPCOMEQD
,
25621 IX86_BUILTIN_VPCOMNED
,
25622 IX86_BUILTIN_VPCOMLTD
,
25623 IX86_BUILTIN_VPCOMLED
,
25624 IX86_BUILTIN_VPCOMGTD
,
25625 IX86_BUILTIN_VPCOMGED
,
25626 IX86_BUILTIN_VPCOMFALSED
,
25627 IX86_BUILTIN_VPCOMTRUED
,
25629 IX86_BUILTIN_VPCOMEQQ
,
25630 IX86_BUILTIN_VPCOMNEQ
,
25631 IX86_BUILTIN_VPCOMLTQ
,
25632 IX86_BUILTIN_VPCOMLEQ
,
25633 IX86_BUILTIN_VPCOMGTQ
,
25634 IX86_BUILTIN_VPCOMGEQ
,
25635 IX86_BUILTIN_VPCOMFALSEQ
,
25636 IX86_BUILTIN_VPCOMTRUEQ
,
25638 /* LWP instructions. */
25639 IX86_BUILTIN_LLWPCB
,
25640 IX86_BUILTIN_SLWPCB
,
25641 IX86_BUILTIN_LWPVAL32
,
25642 IX86_BUILTIN_LWPVAL64
,
25643 IX86_BUILTIN_LWPINS32
,
25644 IX86_BUILTIN_LWPINS64
,
25648 /* BMI instructions. */
25649 IX86_BUILTIN_BEXTR32
,
25650 IX86_BUILTIN_BEXTR64
,
25653 /* TBM instructions. */
25654 IX86_BUILTIN_BEXTRI32
,
25655 IX86_BUILTIN_BEXTRI64
,
25657 /* BMI2 instructions. */
25658 IX86_BUILTIN_BZHI32
,
25659 IX86_BUILTIN_BZHI64
,
25660 IX86_BUILTIN_PDEP32
,
25661 IX86_BUILTIN_PDEP64
,
25662 IX86_BUILTIN_PEXT32
,
25663 IX86_BUILTIN_PEXT64
,
25665 /* FSGSBASE instructions. */
25666 IX86_BUILTIN_RDFSBASE32
,
25667 IX86_BUILTIN_RDFSBASE64
,
25668 IX86_BUILTIN_RDGSBASE32
,
25669 IX86_BUILTIN_RDGSBASE64
,
25670 IX86_BUILTIN_WRFSBASE32
,
25671 IX86_BUILTIN_WRFSBASE64
,
25672 IX86_BUILTIN_WRGSBASE32
,
25673 IX86_BUILTIN_WRGSBASE64
,
25675 /* RDRND instructions. */
25676 IX86_BUILTIN_RDRAND16_STEP
,
25677 IX86_BUILTIN_RDRAND32_STEP
,
25678 IX86_BUILTIN_RDRAND64_STEP
,
25680 /* F16C instructions. */
25681 IX86_BUILTIN_CVTPH2PS
,
25682 IX86_BUILTIN_CVTPH2PS256
,
25683 IX86_BUILTIN_CVTPS2PH
,
25684 IX86_BUILTIN_CVTPS2PH256
,
25686 /* CFString built-in for darwin */
25687 IX86_BUILTIN_CFSTRING
,
25692 /* Table for the ix86 builtin decls. */
25693 static GTY(()) tree ix86_builtins
[(int) IX86_BUILTIN_MAX
];
25695 /* Table of all of the builtin functions that are possible with different ISA's
25696 but are waiting to be built until a function is declared to use that
25698 struct builtin_isa
{
25699 const char *name
; /* function name */
25700 enum ix86_builtin_func_type tcode
; /* type to use in the declaration */
25701 HOST_WIDE_INT isa
; /* isa_flags this builtin is defined for */
25702 bool const_p
; /* true if the declaration is constant */
25703 bool set_and_not_built_p
;
25706 static struct builtin_isa ix86_builtins_isa
[(int) IX86_BUILTIN_MAX
];
25709 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
25710 of which isa_flags to use in the ix86_builtins_isa array. Stores the
25711 function decl in the ix86_builtins array. Returns the function decl or
25712 NULL_TREE, if the builtin was not added.
25714 If the front end has a special hook for builtin functions, delay adding
25715 builtin functions that aren't in the current ISA until the ISA is changed
25716 with function specific optimization. Doing so, can save about 300K for the
25717 default compiler. When the builtin is expanded, check at that time whether
25720 If the front end doesn't have a special hook, record all builtins, even if
25721 it isn't an instruction set in the current ISA in case the user uses
25722 function specific options for a different ISA, so that we don't get scope
25723 errors if a builtin is added in the middle of a function scope. */
25726 def_builtin (HOST_WIDE_INT mask
, const char *name
,
25727 enum ix86_builtin_func_type tcode
,
25728 enum ix86_builtins code
)
25730 tree decl
= NULL_TREE
;
25732 if (!(mask
& OPTION_MASK_ISA_64BIT
) || TARGET_64BIT
)
25734 ix86_builtins_isa
[(int) code
].isa
= mask
;
25736 mask
&= ~OPTION_MASK_ISA_64BIT
;
25738 || (mask
& ix86_isa_flags
) != 0
25739 || (lang_hooks
.builtin_function
25740 == lang_hooks
.builtin_function_ext_scope
))
25743 tree type
= ix86_get_builtin_func_type (tcode
);
25744 decl
= add_builtin_function (name
, type
, code
, BUILT_IN_MD
,
25746 ix86_builtins
[(int) code
] = decl
;
25747 ix86_builtins_isa
[(int) code
].set_and_not_built_p
= false;
25751 ix86_builtins
[(int) code
] = NULL_TREE
;
25752 ix86_builtins_isa
[(int) code
].tcode
= tcode
;
25753 ix86_builtins_isa
[(int) code
].name
= name
;
25754 ix86_builtins_isa
[(int) code
].const_p
= false;
25755 ix86_builtins_isa
[(int) code
].set_and_not_built_p
= true;
25762 /* Like def_builtin, but also marks the function decl "const". */
25765 def_builtin_const (HOST_WIDE_INT mask
, const char *name
,
25766 enum ix86_builtin_func_type tcode
, enum ix86_builtins code
)
25768 tree decl
= def_builtin (mask
, name
, tcode
, code
);
25770 TREE_READONLY (decl
) = 1;
25772 ix86_builtins_isa
[(int) code
].const_p
= true;
25777 /* Add any new builtin functions for a given ISA that may not have been
25778 declared. This saves a bit of space compared to adding all of the
25779 declarations to the tree, even if we didn't use them. */
25782 ix86_add_new_builtins (HOST_WIDE_INT isa
)
25786 for (i
= 0; i
< (int)IX86_BUILTIN_MAX
; i
++)
25788 if ((ix86_builtins_isa
[i
].isa
& isa
) != 0
25789 && ix86_builtins_isa
[i
].set_and_not_built_p
)
25793 /* Don't define the builtin again. */
25794 ix86_builtins_isa
[i
].set_and_not_built_p
= false;
25796 type
= ix86_get_builtin_func_type (ix86_builtins_isa
[i
].tcode
);
25797 decl
= add_builtin_function_ext_scope (ix86_builtins_isa
[i
].name
,
25798 type
, i
, BUILT_IN_MD
, NULL
,
25801 ix86_builtins
[i
] = decl
;
25802 if (ix86_builtins_isa
[i
].const_p
)
25803 TREE_READONLY (decl
) = 1;
25808 /* Bits for builtin_description.flag. */
25810 /* Set when we don't support the comparison natively, and should
25811 swap_comparison in order to support it. */
25812 #define BUILTIN_DESC_SWAP_OPERANDS 1
25814 struct builtin_description
25816 const HOST_WIDE_INT mask
;
25817 const enum insn_code icode
;
25818 const char *const name
;
25819 const enum ix86_builtins code
;
25820 const enum rtx_code comparison
;
25824 static const struct builtin_description bdesc_comi
[] =
25826 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS
, UNEQ
, 0 },
25827 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS
, UNLT
, 0 },
25828 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS
, UNLE
, 0 },
25829 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS
, GT
, 0 },
25830 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS
, GE
, 0 },
25831 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS
, LTGT
, 0 },
25832 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS
, UNEQ
, 0 },
25833 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS
, UNLT
, 0 },
25834 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS
, UNLE
, 0 },
25835 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS
, GT
, 0 },
25836 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS
, GE
, 0 },
25837 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS
, LTGT
, 0 },
25838 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD
, UNEQ
, 0 },
25839 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD
, UNLT
, 0 },
25840 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD
, UNLE
, 0 },
25841 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD
, GT
, 0 },
25842 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD
, GE
, 0 },
25843 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD
, LTGT
, 0 },
25844 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD
, UNEQ
, 0 },
25845 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD
, UNLT
, 0 },
25846 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD
, UNLE
, 0 },
25847 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD
, GT
, 0 },
25848 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD
, GE
, 0 },
25849 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD
, LTGT
, 0 },
25852 static const struct builtin_description bdesc_pcmpestr
[] =
25855 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128
, UNKNOWN
, 0 },
25856 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128
, UNKNOWN
, 0 },
25857 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128
, UNKNOWN
, (int) CCAmode
},
25858 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128
, UNKNOWN
, (int) CCCmode
},
25859 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128
, UNKNOWN
, (int) CCOmode
},
25860 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128
, UNKNOWN
, (int) CCSmode
},
25861 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128
, UNKNOWN
, (int) CCZmode
},
25864 static const struct builtin_description bdesc_pcmpistr
[] =
25867 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128
, UNKNOWN
, 0 },
25868 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128
, UNKNOWN
, 0 },
25869 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128
, UNKNOWN
, (int) CCAmode
},
25870 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128
, UNKNOWN
, (int) CCCmode
},
25871 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128
, UNKNOWN
, (int) CCOmode
},
25872 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128
, UNKNOWN
, (int) CCSmode
},
25873 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128
, UNKNOWN
, (int) CCZmode
},
25876 /* Special builtins with variable number of arguments. */
25877 static const struct builtin_description bdesc_special_args
[] =
25879 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rdtsc
, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
25880 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rdtscp
, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP
, UNKNOWN
, (int) UINT64_FTYPE_PUNSIGNED
},
25881 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_pause
, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
25884 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_emms
, "__builtin_ia32_emms", IX86_BUILTIN_EMMS
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
25887 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_femms
, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
25890 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movups
, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
25891 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movntv4sf
, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
25892 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movups
, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS
, UNKNOWN
, (int) V4SF_FTYPE_PCFLOAT
},
25894 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_loadhps_exp
, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_PCV2SF
},
25895 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_loadlps_exp
, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_PCV2SF
},
25896 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_storehps
, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS
, UNKNOWN
, (int) VOID_FTYPE_PV2SF_V4SF
},
25897 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_storelps
, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS
, UNKNOWN
, (int) VOID_FTYPE_PV2SF_V4SF
},
25899 /* SSE or 3DNow!A */
25900 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_sse_sfence
, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
25901 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_sse_movntq
, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ
, UNKNOWN
, (int) VOID_FTYPE_PULONGLONG_ULONGLONG
},
25904 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_lfence
, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
25905 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_mfence
, 0, IX86_BUILTIN_MFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
25906 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movupd
, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
25907 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movdqu
, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU
, UNKNOWN
, (int) VOID_FTYPE_PCHAR_V16QI
},
25908 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntv2df
, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
25909 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntv2di
, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ
, UNKNOWN
, (int) VOID_FTYPE_PV2DI_V2DI
},
25910 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntisi
, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI
, UNKNOWN
, (int) VOID_FTYPE_PINT_INT
},
25911 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_movntidi
, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64
, UNKNOWN
, (int) VOID_FTYPE_PLONGLONG_LONGLONG
},
25912 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movupd
, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD
, UNKNOWN
, (int) V2DF_FTYPE_PCDOUBLE
},
25913 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movdqu
, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU
, UNKNOWN
, (int) V16QI_FTYPE_PCCHAR
},
25915 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loadhpd_exp
, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_PCDOUBLE
},
25916 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loadlpd_exp
, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_PCDOUBLE
},
25919 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_lddqu
, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU
, UNKNOWN
, (int) V16QI_FTYPE_PCCHAR
},
25922 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_movntdqa
, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA
, UNKNOWN
, (int) V2DI_FTYPE_PV2DI
},
25925 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_vmmovntv2df
, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
25926 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_vmmovntv4sf
, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
25929 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vzeroall
, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
25930 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vzeroupper
, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
25932 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv4sf
, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS
, UNKNOWN
, (int) V4SF_FTYPE_PCFLOAT
},
25933 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv4df
, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256
, UNKNOWN
, (int) V4DF_FTYPE_PCDOUBLE
},
25934 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv8sf
, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256
, UNKNOWN
, (int) V8SF_FTYPE_PCFLOAT
},
25935 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vbroadcastf128_v4df
, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCV2DF
},
25936 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vbroadcastf128_v8sf
, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCV4SF
},
25938 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movupd256
, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCDOUBLE
},
25939 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movups256
, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCFLOAT
},
25940 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movupd256
, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V4DF
},
25941 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movups256
, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V8SF
},
25942 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movdqu256
, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256
, UNKNOWN
, (int) V32QI_FTYPE_PCCHAR
},
25943 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movdqu256
, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256
, UNKNOWN
, (int) VOID_FTYPE_PCHAR_V32QI
},
25944 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_lddqu256
, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256
, UNKNOWN
, (int) V32QI_FTYPE_PCCHAR
},
25946 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv4di
, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256
, UNKNOWN
, (int) VOID_FTYPE_PV4DI_V4DI
},
25947 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv4df
, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V4DF
},
25948 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv8sf
, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V8SF
},
25950 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadpd
, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD
, UNKNOWN
, (int) V2DF_FTYPE_PCV2DF_V2DI
},
25951 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadps
, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS
, UNKNOWN
, (int) V4SF_FTYPE_PCV4SF_V4SI
},
25952 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadpd256
, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCV4DF_V4DI
},
25953 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadps256
, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCV8SF_V8SI
},
25954 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstorepd
, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD
, UNKNOWN
, (int) VOID_FTYPE_PV2DF_V2DI_V2DF
},
25955 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstoreps
, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS
, UNKNOWN
, (int) VOID_FTYPE_PV4SF_V4SI_V4SF
},
25956 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstorepd256
, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256
, UNKNOWN
, (int) VOID_FTYPE_PV4DF_V4DI_V4DF
},
25957 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstoreps256
, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256
, UNKNOWN
, (int) VOID_FTYPE_PV8SF_V8SI_V8SF
},
25960 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_movntdqa
, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256
, UNKNOWN
, (int) V4DI_FTYPE_PV4DI
},
25961 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadd
, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD
, UNKNOWN
, (int) V4SI_FTYPE_PCV4SI_V4SI
},
25962 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadq
, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ
, UNKNOWN
, (int) V2DI_FTYPE_PCV2DI_V2DI
},
25963 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadd256
, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256
, UNKNOWN
, (int) V8SI_FTYPE_PCV8SI_V8SI
},
25964 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadq256
, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256
, UNKNOWN
, (int) V4DI_FTYPE_PCV4DI_V4DI
},
25965 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstored
, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED
, UNKNOWN
, (int) VOID_FTYPE_PV4SI_V4SI_V4SI
},
25966 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstoreq
, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ
, UNKNOWN
, (int) VOID_FTYPE_PV2DI_V2DI_V2DI
},
25967 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstored256
, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256
, UNKNOWN
, (int) VOID_FTYPE_PV8SI_V8SI_V8SI
},
25968 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstoreq256
, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256
, UNKNOWN
, (int) VOID_FTYPE_PV4DI_V4DI_V4DI
},
25970 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_llwpcb
, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
25971 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_slwpcb
, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB
, UNKNOWN
, (int) PVOID_FTYPE_VOID
},
25972 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpvalsi3
, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32
, UNKNOWN
, (int) VOID_FTYPE_UINT_UINT_UINT
},
25973 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpvaldi3
, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64
, UNKNOWN
, (int) VOID_FTYPE_UINT64_UINT_UINT
},
25974 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpinssi3
, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32
, UNKNOWN
, (int) UCHAR_FTYPE_UINT_UINT_UINT
},
25975 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpinsdi3
, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64
, UNKNOWN
, (int) UCHAR_FTYPE_UINT64_UINT_UINT
},
25978 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdfsbasesi
, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
25979 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdfsbasedi
, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
25980 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdgsbasesi
, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
25981 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdgsbasedi
, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
25982 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrfsbasesi
, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32
, UNKNOWN
, (int) VOID_FTYPE_UNSIGNED
},
25983 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrfsbasedi
, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64
, UNKNOWN
, (int) VOID_FTYPE_UINT64
},
25984 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrgsbasesi
, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32
, UNKNOWN
, (int) VOID_FTYPE_UNSIGNED
},
25985 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrgsbasedi
, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64
, UNKNOWN
, (int) VOID_FTYPE_UINT64
},
25988 /* Builtins with variable number of arguments. */
25989 static const struct builtin_description bdesc_args
[] =
25991 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_bsr
, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI
, UNKNOWN
, (int) INT_FTYPE_INT
},
25992 { OPTION_MASK_ISA_64BIT
, CODE_FOR_bsr_rex64
, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI
, UNKNOWN
, (int) INT64_FTYPE_INT64
},
25993 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rdpmc
, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC
, UNKNOWN
, (int) UINT64_FTYPE_INT
},
25994 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotlqi3
, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI
, UNKNOWN
, (int) UINT8_FTYPE_UINT8_INT
},
25995 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotlhi3
, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI
, UNKNOWN
, (int) UINT16_FTYPE_UINT16_INT
},
25996 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotrqi3
, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI
, UNKNOWN
, (int) UINT8_FTYPE_UINT8_INT
},
25997 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotrhi3
, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI
, UNKNOWN
, (int) UINT16_FTYPE_UINT16_INT
},
26000 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv8qi3
, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26001 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv4hi3
, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26002 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv2si3
, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26003 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv8qi3
, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26004 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv4hi3
, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26005 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv2si3
, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26007 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ssaddv8qi3
, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26008 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ssaddv4hi3
, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26009 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_sssubv8qi3
, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26010 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_sssubv4hi3
, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26011 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_usaddv8qi3
, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26012 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_usaddv4hi3
, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26013 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ussubv8qi3
, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26014 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ussubv4hi3
, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26016 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_mulv4hi3
, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26017 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_smulv4hi3_highpart
, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26019 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_andv2si3
, "__builtin_ia32_pand", IX86_BUILTIN_PAND
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26020 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_andnotv2si3
, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26021 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_iorv2si3
, "__builtin_ia32_por", IX86_BUILTIN_POR
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26022 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_xorv2si3
, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26024 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv8qi3
, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26025 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv4hi3
, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26026 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv2si3
, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26027 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv8qi3
, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26028 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv4hi3
, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26029 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv2si3
, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26031 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhbw
, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26032 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhwd
, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26033 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhdq
, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26034 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpcklbw
, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26035 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpcklwd
, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26036 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckldq
, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26038 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packsswb
, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB
, UNKNOWN
, (int) V8QI_FTYPE_V4HI_V4HI
},
26039 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packssdw
, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW
, UNKNOWN
, (int) V4HI_FTYPE_V2SI_V2SI
},
26040 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packuswb
, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB
, UNKNOWN
, (int) V8QI_FTYPE_V4HI_V4HI
},
26042 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_pmaddwd
, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD
, UNKNOWN
, (int) V2SI_FTYPE_V4HI_V4HI
},
26044 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv4hi3
, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
26045 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv2si3
, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
26046 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv1di3
, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_SI_COUNT
},
26047 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv4hi3
, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
26048 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv2si3
, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
26049 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv1di3
, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_COUNT
},
26051 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv4hi3
, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
26052 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv2si3
, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
26053 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv1di3
, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_SI_COUNT
},
26054 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv4hi3
, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
26055 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv2si3
, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
26056 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv1di3
, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_COUNT
},
26058 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv4hi3
, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
26059 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv2si3
, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
26060 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv4hi3
, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
26061 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv2si3
, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
26064 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_pf2id
, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID
, UNKNOWN
, (int) V2SI_FTYPE_V2SF
},
26065 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_floatv2si2
, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD
, UNKNOWN
, (int) V2SF_FTYPE_V2SI
},
26066 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpv2sf2
, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
26067 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rsqrtv2sf2
, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
26069 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26070 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_haddv2sf3
, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26071 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_addv2sf3
, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26072 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_eqv2sf3
, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
26073 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_gev2sf3
, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
26074 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_gtv2sf3
, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
26075 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_smaxv2sf3
, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26076 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_sminv2sf3
, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26077 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_mulv2sf3
, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26078 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpit1v2sf3
, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26079 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpit2v2sf3
, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26080 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rsqit1v2sf3
, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26081 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_subv2sf3
, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26082 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_subrv2sf3
, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26083 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_pmulhrwv4hi3
, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26086 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pf2iw
, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW
, UNKNOWN
, (int) V2SI_FTYPE_V2SF
},
26087 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pi2fw
, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW
, UNKNOWN
, (int) V2SF_FTYPE_V2SI
},
26088 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pswapdv2si2
, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI
},
26089 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pswapdv2sf2
, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
26090 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_hsubv2sf3
, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26091 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_addsubv2sf3
, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26094 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movmskps
, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
26095 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_sqrtv4sf2
, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26096 { OPTION_MASK_ISA_SSE
, CODE_FOR_sqrtv4sf2
, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26097 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_rsqrtv4sf2
, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26098 { OPTION_MASK_ISA_SSE
, CODE_FOR_rsqrtv4sf2
, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26099 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_rcpv4sf2
, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26100 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtps2pi
, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI
, UNKNOWN
, (int) V2SI_FTYPE_V4SF
},
26101 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtss2si
, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
26102 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvtss2siq
, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64
, UNKNOWN
, (int) INT64_FTYPE_V4SF
},
26103 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvttps2pi
, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI
, UNKNOWN
, (int) V2SI_FTYPE_V4SF
},
26104 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvttss2si
, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
26105 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvttss2siq
, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64
, UNKNOWN
, (int) INT64_FTYPE_V4SF
},
26107 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_shufps
, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
26109 { OPTION_MASK_ISA_SSE
, CODE_FOR_addv4sf3
, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26110 { OPTION_MASK_ISA_SSE
, CODE_FOR_subv4sf3
, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26111 { OPTION_MASK_ISA_SSE
, CODE_FOR_mulv4sf3
, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26112 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_divv4sf3
, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26113 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmaddv4sf3
, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26114 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsubv4sf3
, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26115 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmulv4sf3
, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26116 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmdivv4sf3
, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26118 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS
, EQ
, (int) V4SF_FTYPE_V4SF_V4SF
},
26119 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF
},
26120 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF
},
26121 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
26122 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
26123 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS
, UNORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
26124 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS
, NE
, (int) V4SF_FTYPE_V4SF_V4SF
},
26125 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF
},
26126 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF
},
26127 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
26128 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
26129 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS
, ORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
26130 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS
, EQ
, (int) V4SF_FTYPE_V4SF_V4SF
},
26131 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF
},
26132 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF
},
26133 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS
, UNORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
26134 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS
, NE
, (int) V4SF_FTYPE_V4SF_V4SF
},
26135 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF
},
26136 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF
},
26137 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
26138 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
26139 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS
, ORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
26141 { OPTION_MASK_ISA_SSE
, CODE_FOR_sminv4sf3
, "__builtin_ia32_minps", IX86_BUILTIN_MINPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26142 { OPTION_MASK_ISA_SSE
, CODE_FOR_smaxv4sf3
, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26143 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsminv4sf3
, "__builtin_ia32_minss", IX86_BUILTIN_MINSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26144 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsmaxv4sf3
, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26146 { OPTION_MASK_ISA_SSE
, CODE_FOR_andv4sf3
, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26147 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_andnotv4sf3
, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26148 { OPTION_MASK_ISA_SSE
, CODE_FOR_iorv4sf3
, "__builtin_ia32_orps", IX86_BUILTIN_ORPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26149 { OPTION_MASK_ISA_SSE
, CODE_FOR_xorv4sf3
, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26151 { OPTION_MASK_ISA_SSE
, CODE_FOR_copysignv4sf3
, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26153 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movss
, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26154 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movhlps_exp
, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26155 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movlhps_exp
, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26156 { OPTION_MASK_ISA_SSE
, CODE_FOR_vec_interleave_highv4sf
, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26157 { OPTION_MASK_ISA_SSE
, CODE_FOR_vec_interleave_lowv4sf
, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26159 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtpi2ps
, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V2SI
},
26160 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtsi2ss
, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_SI
},
26161 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvtsi2ssq
, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS
, UNKNOWN
, V4SF_FTYPE_V4SF_DI
},
26163 { OPTION_MASK_ISA_SSE
, CODE_FOR_rsqrtsf2
, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF
, UNKNOWN
, (int) FLOAT_FTYPE_FLOAT
},
26165 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsqrtv4sf2
, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
26166 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmrsqrtv4sf2
, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
26167 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmrcpv4sf2
, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
26169 /* SSE MMX or 3Dnow!A */
26170 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26171 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uavgv4hi3
, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26172 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_umulv4hi3_highpart
, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26174 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_umaxv8qi3
, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26175 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_smaxv4hi3
, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26176 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uminv8qi3
, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26177 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_sminv4hi3
, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26179 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_psadbw
, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW
, UNKNOWN
, (int) V1DI_FTYPE_V8QI_V8QI
},
26180 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pmovmskb
, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB
, UNKNOWN
, (int) INT_FTYPE_V8QI
},
26182 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pshufw
, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_INT
},
26185 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_shufpd
, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
26187 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movmskpd
, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
26188 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pmovmskb
, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128
, UNKNOWN
, (int) INT_FTYPE_V16QI
},
26189 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sqrtv2df2
, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF
},
26190 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtdq2pd
, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD
, UNKNOWN
, (int) V2DF_FTYPE_V4SI
},
26191 { OPTION_MASK_ISA_SSE2
, CODE_FOR_floatv4siv4sf2
, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SI
},
26193 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2dq
, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V2DF
},
26194 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2pi
, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI
, UNKNOWN
, (int) V2SI_FTYPE_V2DF
},
26195 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2ps
, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS
, UNKNOWN
, (int) V4SF_FTYPE_V2DF
},
26196 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttpd2dq
, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V2DF
},
26197 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttpd2pi
, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI
, UNKNOWN
, (int) V2SI_FTYPE_V2DF
},
26199 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpi2pd
, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD
, UNKNOWN
, (int) V2DF_FTYPE_V2SI
},
26201 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsd2si
, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
26202 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttsd2si
, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
26203 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvtsd2siq
, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64
, UNKNOWN
, (int) INT64_FTYPE_V2DF
},
26204 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvttsd2siq
, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64
, UNKNOWN
, (int) INT64_FTYPE_V2DF
},
26206 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtps2dq
, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
26207 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtps2pd
, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD
, UNKNOWN
, (int) V2DF_FTYPE_V4SF
},
26208 { OPTION_MASK_ISA_SSE2
, CODE_FOR_fix_truncv4sfv4si2
, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
26210 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv2df3
, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26211 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv2df3
, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26212 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mulv2df3
, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26213 { OPTION_MASK_ISA_SSE2
, CODE_FOR_divv2df3
, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26214 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmaddv2df3
, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26215 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsubv2df3
, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26216 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmulv2df3
, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26217 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmdivv2df3
, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26219 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD
, EQ
, (int) V2DF_FTYPE_V2DF_V2DF
},
26220 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF
},
26221 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF
},
26222 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
26223 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
26224 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD
, UNORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
26225 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD
, NE
, (int) V2DF_FTYPE_V2DF_V2DF
},
26226 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF
},
26227 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF
},
26228 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
26229 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
26230 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD
, ORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
26231 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD
, EQ
, (int) V2DF_FTYPE_V2DF_V2DF
},
26232 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF
},
26233 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF
},
26234 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD
, UNORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
26235 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD
, NE
, (int) V2DF_FTYPE_V2DF_V2DF
},
26236 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF
},
26237 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF
},
26238 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD
, ORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
26240 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sminv2df3
, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26241 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smaxv2df3
, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26242 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsminv2df3
, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26243 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsmaxv2df3
, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26245 { OPTION_MASK_ISA_SSE2
, CODE_FOR_andv2df3
, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26246 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_andnotv2df3
, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26247 { OPTION_MASK_ISA_SSE2
, CODE_FOR_iorv2df3
, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26248 { OPTION_MASK_ISA_SSE2
, CODE_FOR_xorv2df3
, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26250 { OPTION_MASK_ISA_SSE2
, CODE_FOR_copysignv2df3
, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26252 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movsd
, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26253 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv2df
, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26254 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv2df
, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26256 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_pack_sfix_v2df
, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX
, UNKNOWN
, (int) V4SI_FTYPE_V2DF_V2DF
},
26258 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv16qi3
, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26259 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv8hi3
, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26260 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26261 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv2di3
, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26262 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv16qi3
, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26263 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv8hi3
, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26264 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26265 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv2di3
, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26267 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ssaddv16qi3
, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26268 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ssaddv8hi3
, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26269 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_sssubv16qi3
, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26270 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_sssubv8hi3
, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26271 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_usaddv16qi3
, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26272 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_usaddv8hi3
, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26273 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ussubv16qi3
, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26274 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ussubv8hi3
, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26276 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mulv8hi3
, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26277 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smulv8hi3_highpart
, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128
, UNKNOWN
,(int) V8HI_FTYPE_V8HI_V8HI
},
26279 { OPTION_MASK_ISA_SSE2
, CODE_FOR_andv2di3
, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26280 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_andnotv2di3
, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26281 { OPTION_MASK_ISA_SSE2
, CODE_FOR_iorv2di3
, "__builtin_ia32_por128", IX86_BUILTIN_POR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26282 { OPTION_MASK_ISA_SSE2
, CODE_FOR_xorv2di3
, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26284 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_uavgv16qi3
, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26285 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_uavgv8hi3
, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26287 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv16qi3
, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26288 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv8hi3
, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26289 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv4si3
, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26290 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv16qi3
, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26291 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv8hi3
, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26292 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv4si3
, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26294 { OPTION_MASK_ISA_SSE2
, CODE_FOR_umaxv16qi3
, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26295 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smaxv8hi3
, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26296 { OPTION_MASK_ISA_SSE2
, CODE_FOR_uminv16qi3
, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26297 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sminv8hi3
, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26299 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv16qi
, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26300 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv8hi
, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26301 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv4si
, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26302 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv2di
, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26303 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv16qi
, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26304 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv8hi
, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26305 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv4si
, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26306 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv2di
, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26308 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packsswb
, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128
, UNKNOWN
, (int) V16QI_FTYPE_V8HI_V8HI
},
26309 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packssdw
, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128
, UNKNOWN
, (int) V8HI_FTYPE_V4SI_V4SI
},
26310 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packuswb
, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128
, UNKNOWN
, (int) V16QI_FTYPE_V8HI_V8HI
},
26312 { OPTION_MASK_ISA_SSE2
, CODE_FOR_umulv8hi3_highpart
, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26313 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_psadbw
, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI_V16QI
},
26315 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_umulv1siv1di3
, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ
, UNKNOWN
, (int) V1DI_FTYPE_V2SI_V2SI
},
26316 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_umulv2siv2di3
, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI_V4SI
},
26318 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pmaddwd
, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI_V8HI
},
26320 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsi2sd
, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_SI
},
26321 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvtsi2sdq
, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_DI
},
26322 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsd2ss
, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V2DF
},
26323 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtss2sd
, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V4SF
},
26325 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ashlv1ti3
, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT_CONVERT
},
26326 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv8hi3
, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
26327 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv4si3
, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
26328 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv2di3
, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_SI_COUNT
},
26329 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv8hi3
, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
26330 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv4si3
, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
26331 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv2di3
, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_COUNT
},
26333 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_lshrv1ti3
, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT_CONVERT
},
26334 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv8hi3
, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
26335 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv4si3
, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
26336 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv2di3
, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_SI_COUNT
},
26337 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv8hi3
, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
26338 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv4si3
, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
26339 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv2di3
, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_COUNT
},
26341 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv8hi3
, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
26342 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv4si3
, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
26343 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv8hi3
, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
26344 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv4si3
, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
26346 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshufd
, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_INT
},
26347 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshuflw
, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_INT
},
26348 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshufhw
, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_INT
},
26350 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsqrtv2df2
, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_VEC_MERGE
},
26352 { OPTION_MASK_ISA_SSE2
, CODE_FOR_abstf2
, 0, IX86_BUILTIN_FABSQ
, UNKNOWN
, (int) FLOAT128_FTYPE_FLOAT128
},
26353 { OPTION_MASK_ISA_SSE2
, CODE_FOR_copysigntf3
, 0, IX86_BUILTIN_COPYSIGNQ
, UNKNOWN
, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128
},
26355 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse2_movq128
, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
26358 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mmx_addv1di3
, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI
},
26359 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mmx_subv1di3
, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI
},
26362 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_movshdup
, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26363 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_movsldup
, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26365 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_addsubv4sf3
, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26366 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_addsubv2df3
, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26367 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_haddv4sf3
, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26368 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_haddv2df3
, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26369 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_hsubv4sf3
, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26370 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_hsubv2df3
, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26373 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv16qi2
, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI
},
26374 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv8qi2
, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI
},
26375 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv8hi2
, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
26376 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv4hi2
, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI
},
26377 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv4si2
, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI
},
26378 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv2si2
, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI
},
26380 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddwv8hi3
, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26381 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddwv4hi3
, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26382 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phadddv4si3
, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26383 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phadddv2si3
, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26384 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddswv8hi3
, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26385 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddswv4hi3
, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26386 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubwv8hi3
, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26387 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubwv4hi3
, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26388 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubdv4si3
, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26389 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubdv2si3
, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26390 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubswv8hi3
, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26391 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubswv4hi3
, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26392 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmaddubsw128
, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI_V16QI
},
26393 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmaddubsw
, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V8QI_V8QI
},
26394 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmulhrswv8hi3
, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26395 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmulhrswv4hi3
, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26396 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pshufbv16qi3
, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26397 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pshufbv8qi3
, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26398 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv16qi3
, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26399 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv8qi3
, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26400 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv8hi3
, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26401 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv4hi3
, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26402 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv4si3
, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26403 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv2si3
, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26406 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_palignrti
, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT
},
26407 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_palignrdi
, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT
},
26410 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendpd
, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
26411 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendps
, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
26412 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendvpd
, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_V2DF
},
26413 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendvps
, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_V4SF
},
26414 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_dppd
, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
26415 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_dpps
, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
26416 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_insertps
, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
26417 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_mpsadbw
, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI_INT
},
26418 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_pblendvb
, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI_V16QI
},
26419 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_pblendw
, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_INT
},
26421 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv8qiv8hi2
, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI
},
26422 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv4qiv4si2
, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128
, UNKNOWN
, (int) V4SI_FTYPE_V16QI
},
26423 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2qiv2di2
, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI
},
26424 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv4hiv4si2
, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI
},
26425 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2hiv2di2
, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128
, UNKNOWN
, (int) V2DI_FTYPE_V8HI
},
26426 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2siv2di2
, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI
},
26427 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv8qiv8hi2
, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI
},
26428 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv4qiv4si2
, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128
, UNKNOWN
, (int) V4SI_FTYPE_V16QI
},
26429 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2qiv2di2
, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI
},
26430 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv4hiv4si2
, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI
},
26431 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2hiv2di2
, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128
, UNKNOWN
, (int) V2DI_FTYPE_V8HI
},
26432 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2siv2di2
, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI
},
26433 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_phminposuw
, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
26435 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_packusdw
, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128
, UNKNOWN
, (int) V8HI_FTYPE_V4SI_V4SI
},
26436 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_eqv2di3
, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26437 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_smaxv16qi3
, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26438 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_smaxv4si3
, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26439 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_umaxv4si3
, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26440 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_umaxv8hi3
, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26441 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sminv16qi3
, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26442 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sminv4si3
, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26443 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_uminv4si3
, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26444 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_uminv8hi3
, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26445 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_mulv2siv2di3
, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI_V4SI
},
26446 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_mulv4si3
, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26449 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_INT
},
26450 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_INT
},
26451 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundsd
, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
26452 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundss
, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
26454 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD
, (enum rtx_code
) ROUND_FLOOR
, (int) V2DF_FTYPE_V2DF_ROUND
},
26455 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD
, (enum rtx_code
) ROUND_CEIL
, (int) V2DF_FTYPE_V2DF_ROUND
},
26456 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD
, (enum rtx_code
) ROUND_TRUNC
, (int) V2DF_FTYPE_V2DF_ROUND
},
26457 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD
, (enum rtx_code
) ROUND_MXCSR
, (int) V2DF_FTYPE_V2DF_ROUND
},
26459 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd_vec_pack_sfix
, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX
, (enum rtx_code
) ROUND_FLOOR
, (int) V4SI_FTYPE_V2DF_V2DF_ROUND
},
26460 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd_vec_pack_sfix
, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX
, (enum rtx_code
) ROUND_CEIL
, (int) V4SI_FTYPE_V2DF_V2DF_ROUND
},
26462 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv2df2
, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ
, UNKNOWN
, (int) V2DF_FTYPE_V2DF
},
26463 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv2df2_vec_pack_sfix
, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX
, UNKNOWN
, (int) V4SI_FTYPE_V2DF_V2DF
},
26465 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS
, (enum rtx_code
) ROUND_FLOOR
, (int) V4SF_FTYPE_V4SF_ROUND
},
26466 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS
, (enum rtx_code
) ROUND_CEIL
, (int) V4SF_FTYPE_V4SF_ROUND
},
26467 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS
, (enum rtx_code
) ROUND_TRUNC
, (int) V4SF_FTYPE_V4SF_ROUND
},
26468 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS
, (enum rtx_code
) ROUND_MXCSR
, (int) V4SF_FTYPE_V4SF_ROUND
},
26470 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps_sfix
, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX
, (enum rtx_code
) ROUND_FLOOR
, (int) V4SI_FTYPE_V4SF_ROUND
},
26471 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps_sfix
, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX
, (enum rtx_code
) ROUND_CEIL
, (int) V4SI_FTYPE_V4SF_ROUND
},
26473 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv4sf2
, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26474 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv4sf2_sfix
, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
26476 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ
, EQ
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
26477 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC
, LTU
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
26478 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC
, GTU
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
26481 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_gtv2di3
, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26482 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32qi
, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI
, UNKNOWN
, (int) UINT_FTYPE_UINT_UCHAR
},
26483 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32hi
, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI
, UNKNOWN
, (int) UINT_FTYPE_UINT_USHORT
},
26484 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32si
, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
26485 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse4_2_crc32di
, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
26488 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_extrqi
, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_UINT_UINT
},
26489 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_extrq
, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V16QI
},
26490 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_insertqi
, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT
},
26491 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_insertq
, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26494 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aeskeygenassist
, 0, IX86_BUILTIN_AESKEYGENASSIST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT
},
26495 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesimc
, 0, IX86_BUILTIN_AESIMC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
26497 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesenc
, 0, IX86_BUILTIN_AESENC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26498 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesenclast
, 0, IX86_BUILTIN_AESENCLAST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26499 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesdec
, 0, IX86_BUILTIN_AESDEC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26500 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesdeclast
, 0, IX86_BUILTIN_AESDECLAST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26503 { OPTION_MASK_ISA_SSE2
, CODE_FOR_pclmulqdq
, 0, IX86_BUILTIN_PCLMULQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_INT
},
26506 { OPTION_MASK_ISA_AVX
, CODE_FOR_addv4df3
, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26507 { OPTION_MASK_ISA_AVX
, CODE_FOR_addv8sf3
, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26508 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_addsubv4df3
, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26509 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_addsubv8sf3
, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26510 { OPTION_MASK_ISA_AVX
, CODE_FOR_andv4df3
, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26511 { OPTION_MASK_ISA_AVX
, CODE_FOR_andv8sf3
, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26512 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_andnotv4df3
, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26513 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_andnotv8sf3
, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26514 { OPTION_MASK_ISA_AVX
, CODE_FOR_divv4df3
, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26515 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_divv8sf3
, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26516 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_haddv4df3
, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26517 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_hsubv8sf3
, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26518 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_hsubv4df3
, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26519 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_haddv8sf3
, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26520 { OPTION_MASK_ISA_AVX
, CODE_FOR_smaxv4df3
, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26521 { OPTION_MASK_ISA_AVX
, CODE_FOR_smaxv8sf3
, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26522 { OPTION_MASK_ISA_AVX
, CODE_FOR_sminv4df3
, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26523 { OPTION_MASK_ISA_AVX
, CODE_FOR_sminv8sf3
, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26524 { OPTION_MASK_ISA_AVX
, CODE_FOR_mulv4df3
, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26525 { OPTION_MASK_ISA_AVX
, CODE_FOR_mulv8sf3
, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26526 { OPTION_MASK_ISA_AVX
, CODE_FOR_iorv4df3
, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26527 { OPTION_MASK_ISA_AVX
, CODE_FOR_iorv8sf3
, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26528 { OPTION_MASK_ISA_AVX
, CODE_FOR_subv4df3
, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26529 { OPTION_MASK_ISA_AVX
, CODE_FOR_subv8sf3
, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26530 { OPTION_MASK_ISA_AVX
, CODE_FOR_xorv4df3
, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26531 { OPTION_MASK_ISA_AVX
, CODE_FOR_xorv8sf3
, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26533 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv2df3
, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DI
},
26534 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv4sf3
, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SI
},
26535 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv4df3
, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DI
},
26536 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv8sf3
, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SI
},
26538 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendpd256
, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
26539 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendps256
, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
26540 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendvpd256
, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_V4DF
},
26541 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendvps256
, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_V8SF
},
26542 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_dpps256
, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
26543 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_shufpd256
, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
26544 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_shufps256
, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
26545 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vmcmpv2df3
, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
26546 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vmcmpv4sf3
, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
26547 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv2df3
, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
26548 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv4sf3
, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
26549 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv4df3
, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
26550 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv8sf3
, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
26551 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v4df
, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256
, UNKNOWN
, (int) V2DF_FTYPE_V4DF_INT
},
26552 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v8sf
, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256
, UNKNOWN
, (int) V4SF_FTYPE_V8SF_INT
},
26553 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v8si
, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256
, UNKNOWN
, (int) V4SI_FTYPE_V8SI_INT
},
26554 { OPTION_MASK_ISA_AVX
, CODE_FOR_floatv4siv4df2
, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4SI
},
26555 { OPTION_MASK_ISA_AVX
, CODE_FOR_floatv8siv8sf2
, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SI
},
26556 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtpd2ps256
, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256
, UNKNOWN
, (int) V4SF_FTYPE_V4DF
},
26557 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtps2dq256
, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
26558 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtps2pd256
, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4SF
},
26559 { OPTION_MASK_ISA_AVX
, CODE_FOR_fix_truncv4dfv4si2
, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256
, UNKNOWN
, (int) V4SI_FTYPE_V4DF
},
26560 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtpd2dq256
, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256
, UNKNOWN
, (int) V4SI_FTYPE_V4DF
},
26561 { OPTION_MASK_ISA_AVX
, CODE_FOR_fix_truncv8sfv8si2
, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
26562 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v4df3
, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
26563 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v8sf3
, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
26564 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v8si3
, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI_INT
},
26565 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv2df
, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_INT
},
26566 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv4sf
, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_INT
},
26567 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv4df
, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
26568 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv8sf
, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_INT
},
26569 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v4df
, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V2DF_INT
},
26570 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v8sf
, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V4SF_INT
},
26571 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v8si
, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_INT
},
26573 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movshdup256
, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
26574 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movsldup256
, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
26575 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movddup256
, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
26577 { OPTION_MASK_ISA_AVX
, CODE_FOR_sqrtv4df2
, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
26578 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_sqrtv8sf2
, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
26579 { OPTION_MASK_ISA_AVX
, CODE_FOR_sqrtv8sf2
, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
26580 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_rsqrtv8sf2
, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
26581 { OPTION_MASK_ISA_AVX
, CODE_FOR_rsqrtv8sf2
, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
26583 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_rcpv8sf2
, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
26585 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
26586 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_INT
},
26588 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256
, (enum rtx_code
) ROUND_FLOOR
, (int) V4DF_FTYPE_V4DF_ROUND
},
26589 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256
, (enum rtx_code
) ROUND_CEIL
, (int) V4DF_FTYPE_V4DF_ROUND
},
26590 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256
, (enum rtx_code
) ROUND_TRUNC
, (int) V4DF_FTYPE_V4DF_ROUND
},
26591 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256
, (enum rtx_code
) ROUND_MXCSR
, (int) V4DF_FTYPE_V4DF_ROUND
},
26593 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv4df2
, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
26594 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv4df2_vec_pack_sfix
, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256
, UNKNOWN
, (int) V8SI_FTYPE_V4DF_V4DF
},
26596 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd_vec_pack_sfix256
, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256
, (enum rtx_code
) ROUND_FLOOR
, (int) V8SI_FTYPE_V4DF_V4DF_ROUND
},
26597 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd_vec_pack_sfix256
, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256
, (enum rtx_code
) ROUND_CEIL
, (int) V8SI_FTYPE_V4DF_V4DF_ROUND
},
26599 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256
, (enum rtx_code
) ROUND_FLOOR
, (int) V8SF_FTYPE_V8SF_ROUND
},
26600 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256
, (enum rtx_code
) ROUND_CEIL
, (int) V8SF_FTYPE_V8SF_ROUND
},
26601 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256
, (enum rtx_code
) ROUND_TRUNC
, (int) V8SF_FTYPE_V8SF_ROUND
},
26602 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256
, (enum rtx_code
) ROUND_MXCSR
, (int) V8SF_FTYPE_V8SF_ROUND
},
26604 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps_sfix256
, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256
, (enum rtx_code
) ROUND_FLOOR
, (int) V8SI_FTYPE_V8SF_ROUND
},
26605 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps_sfix256
, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256
, (enum rtx_code
) ROUND_CEIL
, (int) V8SI_FTYPE_V8SF_ROUND
},
26607 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv8sf2
, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
26608 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv8sf2_sfix
, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
26610 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpckhpd256
, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26611 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpcklpd256
, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26612 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpckhps256
, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26613 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpcklps256
, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26615 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_si256_si
, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI
, UNKNOWN
, (int) V8SI_FTYPE_V4SI
},
26616 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ps256_ps
, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS
, UNKNOWN
, (int) V8SF_FTYPE_V4SF
},
26617 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_pd256_pd
, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD
, UNKNOWN
, (int) V4DF_FTYPE_V2DF
},
26618 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v8si
, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256
, UNKNOWN
, (int) V4SI_FTYPE_V8SI
},
26619 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v8sf
, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256
, UNKNOWN
, (int) V4SF_FTYPE_V8SF
},
26620 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v4df
, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256
, UNKNOWN
, (int) V2DF_FTYPE_V4DF
},
26622 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD
, EQ
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
26623 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD
, LTU
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
26624 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD
, GTU
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
26625 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS
, EQ
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
26626 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS
, LTU
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
26627 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS
, GTU
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
26628 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256
, EQ
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
26629 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256
, LTU
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
26630 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256
, GTU
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
26631 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256
, EQ
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
26632 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256
, LTU
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
26633 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256
, GTU
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
26634 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256
, EQ
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
26635 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256
, LTU
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
26636 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256
, GTU
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
26638 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movmskpd256
, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256
, UNKNOWN
, (int) INT_FTYPE_V4DF
},
26639 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movmskps256
, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256
, UNKNOWN
, (int) INT_FTYPE_V8SF
},
26641 { OPTION_MASK_ISA_AVX
, CODE_FOR_copysignv8sf3
, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26642 { OPTION_MASK_ISA_AVX
, CODE_FOR_copysignv4df3
, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26644 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_pack_sfix_v4df
, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256
, UNKNOWN
, (int) V8SI_FTYPE_V4DF_V4DF
},
26647 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_mpsadbw
, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI_INT
},
26648 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv32qi2
, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI
},
26649 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv16hi2
, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI
},
26650 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv8si2
, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI
},
26651 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packssdw
, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256
, UNKNOWN
, (int) V16HI_FTYPE_V8SI_V8SI
},
26652 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packsswb
, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256
, UNKNOWN
, (int) V32QI_FTYPE_V16HI_V16HI
},
26653 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packusdw
, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256
, UNKNOWN
, (int) V16HI_FTYPE_V8SI_V8SI
},
26654 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packuswb
, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256
, UNKNOWN
, (int) V32QI_FTYPE_V16HI_V16HI
},
26655 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv32qi3
, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26656 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv16hi3
, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26657 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv8si3
, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26658 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv4di3
, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26659 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ssaddv32qi3
, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26660 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ssaddv16hi3
, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26661 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_usaddv32qi3
, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26662 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_usaddv16hi3
, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26663 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_palignrv2ti
, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT
},
26664 { OPTION_MASK_ISA_AVX2
, CODE_FOR_andv4di3
, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26665 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_andnotv4di3
, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26666 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_uavgv32qi3
, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26667 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_uavgv16hi3
, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26668 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblendvb
, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI_V32QI
},
26669 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblendw
, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI_INT
},
26670 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv32qi3
, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26671 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv16hi3
, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26672 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv8si3
, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26673 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv4di3
, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26674 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv32qi3
, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26675 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv16hi3
, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26676 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv8si3
, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26677 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv4di3
, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26678 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phaddwv16hi3
, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26679 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phadddv8si3
, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26680 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phaddswv16hi3
, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26681 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubwv16hi3
, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26682 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubdv8si3
, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26683 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubswv16hi3
, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26684 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmaddubsw256
, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V32QI_V32QI
},
26685 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmaddwd
, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256
, UNKNOWN
, (int) V8SI_FTYPE_V16HI_V16HI
},
26686 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv32qi3
, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26687 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv16hi3
, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26688 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv8si3
, "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26689 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv32qi3
, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26690 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv16hi3
, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26691 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv8si3
, "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26692 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv32qi3
, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26693 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv16hi3
, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26694 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv8si3
, "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26695 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv32qi3
, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26696 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv16hi3
, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26697 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv8si3
, "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26698 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmovmskb
, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256
, UNKNOWN
, (int) INT_FTYPE_V32QI
},
26699 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv16qiv16hi2
, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16QI
},
26700 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv8qiv8si2
, "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256
, UNKNOWN
, (int) V8SI_FTYPE_V16QI
},
26701 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4qiv4di2
, "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V16QI
},
26702 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv8hiv8si2
, "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256
, UNKNOWN
, (int) V8SI_FTYPE_V8HI
},
26703 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4hiv4di2
, "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8HI
},
26704 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4siv4di2
, "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4SI
},
26705 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv16qiv16hi2
, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16QI
},
26706 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv8qiv8si2
, "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256
, UNKNOWN
, (int) V8SI_FTYPE_V16QI
},
26707 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4qiv4di2
, "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V16QI
},
26708 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv8hiv8si2
, "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256
, UNKNOWN
, (int) V8SI_FTYPE_V8HI
},
26709 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4hiv4di2
, "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8HI
},
26710 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4siv4di2
, "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4SI
},
26711 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_mulv4siv4di3
, "__builtin_ia32_pmuldq256" , IX86_BUILTIN_PMULDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8SI_V8SI
},
26712 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_umulhrswv16hi3
, "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26713 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umulv16hi3_highpart
, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26714 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smulv16hi3_highpart
, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26715 { OPTION_MASK_ISA_AVX2
, CODE_FOR_mulv16hi3
, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26716 { OPTION_MASK_ISA_AVX2
, CODE_FOR_mulv8si3
, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26717 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_umulv4siv4di3
, "__builtin_ia32_pmuludq256" , IX86_BUILTIN_PMULUDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8SI_V8SI
},
26718 { OPTION_MASK_ISA_AVX2
, CODE_FOR_iorv4di3
, "__builtin_ia32_por256", IX86_BUILTIN_POR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26719 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psadbw
, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256
, UNKNOWN
, (int) V16HI_FTYPE_V32QI_V32QI
},
26720 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufbv32qi3
, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26721 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufdv3
, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_INT
},
26722 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufhwv3
, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_INT
},
26723 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshuflwv3
, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_INT
},
26724 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv32qi3
, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26725 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv16hi3
, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26726 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv8si3
, "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26727 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlv2ti3
, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_CONVERT
},
26728 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv16hi3
, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
26729 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv16hi3
, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
26730 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv8si3
, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
26731 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv8si3
, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
26732 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv4di3
, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_COUNT
},
26733 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv4di3
, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_COUNT
},
26734 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv16hi3
, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
26735 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv16hi3
, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
26736 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv8si3
, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
26737 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv8si3
, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
26738 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrv2ti3
, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_CONVERT
},
26739 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv16hi3
, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
26740 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv16hi3
, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
26741 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv8si3
, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
26742 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv8si3
, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
26743 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv4di3
, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_COUNT
},
26744 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv4di3
, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_COUNT
},
26745 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv32qi3
, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26746 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv16hi3
, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26747 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv8si3
, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26748 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv4di3
, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26749 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sssubv32qi3
, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26750 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sssubv16hi3
, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26751 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ussubv32qi3
, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26752 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ussubv16hi3
, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26753 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv32qi
, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26754 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv16hi
, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26755 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv8si
, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26756 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv4di
, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26757 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv32qi
, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26758 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv16hi
, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26759 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv8si
, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26760 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv4di
, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26761 { OPTION_MASK_ISA_AVX2
, CODE_FOR_xorv4di3
, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26762 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv4sf
, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26763 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv8sf
, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256
, UNKNOWN
, (int) V8SF_FTYPE_V4SF
},
26764 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv4df
, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256
, UNKNOWN
, (int) V4DF_FTYPE_V2DF
},
26765 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vbroadcasti128_v4di
, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256
, UNKNOWN
, (int) V4DI_FTYPE_V2DI
},
26766 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblenddv4si
, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_INT
},
26767 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblenddv8si
, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI_INT
},
26768 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv32qi
, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256
, UNKNOWN
, (int) V32QI_FTYPE_V16QI
},
26769 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv16hi
, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256
, UNKNOWN
, (int) V16HI_FTYPE_V8HI
},
26770 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv8si
, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256
, UNKNOWN
, (int) V8SI_FTYPE_V4SI
},
26771 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv4di
, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256
, UNKNOWN
, (int) V4DI_FTYPE_V2DI
},
26772 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv16qi
, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI
},
26773 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv8hi
, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
26774 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv4si
, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI
},
26775 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv2di
, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
26776 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permvarv8si
, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26777 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv4df
, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
26778 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permvarv8sf
, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26779 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv4di
, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT
},
26780 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv2ti
, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI_INT
},
26781 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_extracti128
, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256
, UNKNOWN
, (int) V2DI_FTYPE_V4DI_INT
},
26782 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_inserti128
, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_INT
},
26783 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv4di
, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26784 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv2di
, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26785 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv8si
, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26786 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv4si
, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26787 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashrvv8si
, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26788 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashrvv4si
, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26789 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv4di
, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26790 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv2di
, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26791 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv8si
, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26792 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv4si
, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26794 { OPTION_MASK_ISA_LZCNT
, CODE_FOR_clzhi2_lzcnt
, "__builtin_clzs", IX86_BUILTIN_CLZS
, UNKNOWN
, (int) UINT16_FTYPE_UINT16
},
26797 { OPTION_MASK_ISA_BMI
, CODE_FOR_bmi_bextr_si
, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
26798 { OPTION_MASK_ISA_BMI
, CODE_FOR_bmi_bextr_di
, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
26799 { OPTION_MASK_ISA_BMI
, CODE_FOR_ctzhi2
, "__builtin_ctzs", IX86_BUILTIN_CTZS
, UNKNOWN
, (int) UINT16_FTYPE_UINT16
},
26802 { OPTION_MASK_ISA_TBM
, CODE_FOR_tbm_bextri_si
, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
26803 { OPTION_MASK_ISA_TBM
, CODE_FOR_tbm_bextri_di
, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
26806 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtph2ps
, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS
, UNKNOWN
, (int) V4SF_FTYPE_V8HI
},
26807 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtph2ps256
, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8HI
},
26808 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtps2ph
, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH
, UNKNOWN
, (int) V8HI_FTYPE_V4SF_INT
},
26809 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtps2ph256
, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256
, UNKNOWN
, (int) V8HI_FTYPE_V8SF_INT
},
26812 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_bzhi_si3
, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
26813 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_bzhi_di3
, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
26814 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pdep_si3
, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
26815 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pdep_di3
, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
26816 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pext_si3
, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
26817 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pext_di3
, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
26820 /* FMA4 and XOP. */
26821 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
26822 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
26823 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
26824 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
26825 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
26826 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
26827 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
26828 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
26829 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
26830 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
26831 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
26832 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
26833 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
26834 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
26835 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
26836 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
26837 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
26838 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
26839 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
26840 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
26841 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
26842 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
26843 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
26844 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
26845 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
26846 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
26847 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
26848 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
26849 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
26850 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
26851 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
26852 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
26853 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
26854 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
26855 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
26856 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
26857 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
26858 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
26859 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
26860 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
26861 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
26862 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
26863 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
26864 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
26865 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
26866 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
26867 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
26868 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
26869 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
26870 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
26871 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
26872 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
26874 static const struct builtin_description bdesc_multi_arg
[] =
26876 { OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_vmfmadd_v4sf
,
26877 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS
,
26878 UNKNOWN
, (int)MULTI_ARG_3_SF
},
26879 { OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_vmfmadd_v2df
,
26880 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD
,
26881 UNKNOWN
, (int)MULTI_ARG_3_DF
},
26883 { OPTION_MASK_ISA_FMA
, CODE_FOR_fmai_vmfmadd_v4sf
,
26884 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3
,
26885 UNKNOWN
, (int)MULTI_ARG_3_SF
},
26886 { OPTION_MASK_ISA_FMA
, CODE_FOR_fmai_vmfmadd_v2df
,
26887 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3
,
26888 UNKNOWN
, (int)MULTI_ARG_3_DF
},
26890 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v4sf
,
26891 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS
,
26892 UNKNOWN
, (int)MULTI_ARG_3_SF
},
26893 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v2df
,
26894 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD
,
26895 UNKNOWN
, (int)MULTI_ARG_3_DF
},
26896 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v8sf
,
26897 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256
,
26898 UNKNOWN
, (int)MULTI_ARG_3_SF2
},
26899 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v4df
,
26900 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256
,
26901 UNKNOWN
, (int)MULTI_ARG_3_DF2
},
26903 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v4sf
,
26904 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS
,
26905 UNKNOWN
, (int)MULTI_ARG_3_SF
},
26906 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v2df
,
26907 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD
,
26908 UNKNOWN
, (int)MULTI_ARG_3_DF
},
26909 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v8sf
,
26910 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256
,
26911 UNKNOWN
, (int)MULTI_ARG_3_SF2
},
26912 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v4df
,
26913 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256
,
26914 UNKNOWN
, (int)MULTI_ARG_3_DF2
},
26916 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2di
, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV
, UNKNOWN
, (int)MULTI_ARG_3_DI
},
26917 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2di
, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI
, UNKNOWN
, (int)MULTI_ARG_3_DI
},
26918 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4si
, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
26919 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8hi
, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
26920 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v16qi
, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI
,UNKNOWN
, (int)MULTI_ARG_3_QI
},
26921 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2df
, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF
, UNKNOWN
, (int)MULTI_ARG_3_DF
},
26922 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4sf
, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF
, UNKNOWN
, (int)MULTI_ARG_3_SF
},
26924 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4di256
, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256
, UNKNOWN
, (int)MULTI_ARG_3_DI2
},
26925 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4di256
, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256
, UNKNOWN
, (int)MULTI_ARG_3_DI2
},
26926 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8si256
, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256
, UNKNOWN
, (int)MULTI_ARG_3_SI2
},
26927 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v16hi256
, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256
, UNKNOWN
, (int)MULTI_ARG_3_HI2
},
26928 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v32qi256
, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256
, UNKNOWN
, (int)MULTI_ARG_3_QI2
},
26929 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4df256
, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256
, UNKNOWN
, (int)MULTI_ARG_3_DF2
},
26930 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8sf256
, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256
, UNKNOWN
, (int)MULTI_ARG_3_SF2
},
26932 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pperm
, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM
, UNKNOWN
, (int)MULTI_ARG_3_QI
},
26934 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssww
, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
26935 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsww
, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
26936 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsswd
, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
26937 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacswd
, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
26938 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdd
, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
26939 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdd
, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
26940 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdql
, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
26941 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdqh
, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
26942 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdql
, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
26943 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdqh
, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
26944 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmadcsswd
, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
26945 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmadcswd
, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
26947 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv2di3
, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
26948 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv4si3
, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
26949 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv8hi3
, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
26950 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv16qi3
, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
26951 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv2di3
, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM
, UNKNOWN
, (int)MULTI_ARG_2_DI_IMM
},
26952 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv4si3
, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM
, UNKNOWN
, (int)MULTI_ARG_2_SI_IMM
},
26953 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv8hi3
, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM
, UNKNOWN
, (int)MULTI_ARG_2_HI_IMM
},
26954 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv16qi3
, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM
, UNKNOWN
, (int)MULTI_ARG_2_QI_IMM
},
26955 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav2di3
, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
26956 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav4si3
, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
26957 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav8hi3
, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
26958 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav16qi3
, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
26959 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv2di3
, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
26960 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv4si3
, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
26961 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv8hi3
, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
26962 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv16qi3
, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
26964 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vmfrczv4sf2
, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS
, UNKNOWN
, (int)MULTI_ARG_2_SF
},
26965 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vmfrczv2df2
, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD
, UNKNOWN
, (int)MULTI_ARG_2_DF
},
26966 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv4sf2
, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS
, UNKNOWN
, (int)MULTI_ARG_1_SF
},
26967 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv2df2
, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD
, UNKNOWN
, (int)MULTI_ARG_1_DF
},
26968 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv8sf2
, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256
, UNKNOWN
, (int)MULTI_ARG_1_SF2
},
26969 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv4df2
, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256
, UNKNOWN
, (int)MULTI_ARG_1_DF2
},
26971 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbw
, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
26972 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbd
, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD
, UNKNOWN
, (int)MULTI_ARG_1_QI_SI
},
26973 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbq
, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ
, UNKNOWN
, (int)MULTI_ARG_1_QI_DI
},
26974 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddwd
, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
26975 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddwq
, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ
, UNKNOWN
, (int)MULTI_ARG_1_HI_DI
},
26976 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadddq
, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
26977 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubw
, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
26978 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubd
, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD
, UNKNOWN
, (int)MULTI_ARG_1_QI_SI
},
26979 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubq
, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ
, UNKNOWN
, (int)MULTI_ARG_1_QI_DI
},
26980 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadduwd
, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
26981 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadduwq
, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ
, UNKNOWN
, (int)MULTI_ARG_1_HI_DI
},
26982 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddudq
, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
26983 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubbw
, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
26984 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubwd
, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
26985 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubdq
, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
26987 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB
, EQ
, (int)MULTI_ARG_2_QI_CMP
},
26988 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
26989 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
26990 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB
, LT
, (int)MULTI_ARG_2_QI_CMP
},
26991 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB
, LE
, (int)MULTI_ARG_2_QI_CMP
},
26992 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB
, GT
, (int)MULTI_ARG_2_QI_CMP
},
26993 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB
, GE
, (int)MULTI_ARG_2_QI_CMP
},
26995 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW
, EQ
, (int)MULTI_ARG_2_HI_CMP
},
26996 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
26997 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
26998 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW
, LT
, (int)MULTI_ARG_2_HI_CMP
},
26999 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW
, LE
, (int)MULTI_ARG_2_HI_CMP
},
27000 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW
, GT
, (int)MULTI_ARG_2_HI_CMP
},
27001 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW
, GE
, (int)MULTI_ARG_2_HI_CMP
},
27003 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD
, EQ
, (int)MULTI_ARG_2_SI_CMP
},
27004 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED
, NE
, (int)MULTI_ARG_2_SI_CMP
},
27005 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED
, NE
, (int)MULTI_ARG_2_SI_CMP
},
27006 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD
, LT
, (int)MULTI_ARG_2_SI_CMP
},
27007 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED
, LE
, (int)MULTI_ARG_2_SI_CMP
},
27008 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD
, GT
, (int)MULTI_ARG_2_SI_CMP
},
27009 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED
, GE
, (int)MULTI_ARG_2_SI_CMP
},
27011 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ
, EQ
, (int)MULTI_ARG_2_DI_CMP
},
27012 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
27013 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
27014 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ
, LT
, (int)MULTI_ARG_2_DI_CMP
},
27015 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ
, LE
, (int)MULTI_ARG_2_DI_CMP
},
27016 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ
, GT
, (int)MULTI_ARG_2_DI_CMP
},
27017 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ
, GE
, (int)MULTI_ARG_2_DI_CMP
},
27019 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB
, EQ
, (int)MULTI_ARG_2_QI_CMP
},
27020 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
27021 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
27022 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB
, LTU
, (int)MULTI_ARG_2_QI_CMP
},
27023 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB
, LEU
, (int)MULTI_ARG_2_QI_CMP
},
27024 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB
, GTU
, (int)MULTI_ARG_2_QI_CMP
},
27025 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB
, GEU
, (int)MULTI_ARG_2_QI_CMP
},
27027 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW
, EQ
, (int)MULTI_ARG_2_HI_CMP
},
27028 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
27029 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
27030 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW
, LTU
, (int)MULTI_ARG_2_HI_CMP
},
27031 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW
, LEU
, (int)MULTI_ARG_2_HI_CMP
},
27032 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW
, GTU
, (int)MULTI_ARG_2_HI_CMP
},
27033 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW
, GEU
, (int)MULTI_ARG_2_HI_CMP
},
27035 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD
, EQ
, (int)MULTI_ARG_2_SI_CMP
},
27036 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD
, NE
, (int)MULTI_ARG_2_SI_CMP
},
27037 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD
, NE
, (int)MULTI_ARG_2_SI_CMP
},
27038 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD
, LTU
, (int)MULTI_ARG_2_SI_CMP
},
27039 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD
, LEU
, (int)MULTI_ARG_2_SI_CMP
},
27040 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD
, GTU
, (int)MULTI_ARG_2_SI_CMP
},
27041 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD
, GEU
, (int)MULTI_ARG_2_SI_CMP
},
27043 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ
, EQ
, (int)MULTI_ARG_2_DI_CMP
},
27044 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
27045 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
27046 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ
, LTU
, (int)MULTI_ARG_2_DI_CMP
},
27047 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ
, LEU
, (int)MULTI_ARG_2_DI_CMP
},
27048 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ
, GTU
, (int)MULTI_ARG_2_DI_CMP
},
27049 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ
, GEU
, (int)MULTI_ARG_2_DI_CMP
},
27051 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_QI_TF
},
27052 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_HI_TF
},
27053 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_SI_TF
},
27054 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_DI_TF
},
27055 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_QI_TF
},
27056 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_HI_TF
},
27057 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_SI_TF
},
27058 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_DI_TF
},
27060 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_QI_TF
},
27061 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_HI_TF
},
27062 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_SI_TF
},
27063 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_DI_TF
},
27064 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_QI_TF
},
27065 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_HI_TF
},
27066 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_SI_TF
},
27067 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_DI_TF
},
27069 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v2df3
, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD
, UNKNOWN
, (int)MULTI_ARG_4_DF2_DI_I
},
27070 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v4sf3
, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS
, UNKNOWN
, (int)MULTI_ARG_4_SF2_SI_I
},
27071 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v4df3
, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256
, UNKNOWN
, (int)MULTI_ARG_4_DF2_DI_I1
},
27072 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v8sf3
, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256
, UNKNOWN
, (int)MULTI_ARG_4_SF2_SI_I1
},
27076 /* TM vector builtins. */
27078 /* Reuse the existing x86-specific `struct builtin_description' cause
27079 we're lazy. Add casts to make them fit. */
27080 static const struct builtin_description bdesc_tm
[] =
27082 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
27083 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WaRM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
27084 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WaWM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
27085 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
27086 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RaRM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
27087 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RaWM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
27088 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RfWM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
27090 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
27091 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WaRM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
27092 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WaWM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
27093 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
27094 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RaRM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
27095 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RaWM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
27096 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RfWM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
27098 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
27099 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WaRM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
27100 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WaWM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
27101 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
27102 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RaRM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
27103 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RaWM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
27104 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RfWM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
27106 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_LM64", (enum ix86_builtins
) BUILT_IN_TM_LOG_M64
, UNKNOWN
, VOID_FTYPE_PCVOID
},
27107 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_LM128", (enum ix86_builtins
) BUILT_IN_TM_LOG_M128
, UNKNOWN
, VOID_FTYPE_PCVOID
},
27108 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_LM256", (enum ix86_builtins
) BUILT_IN_TM_LOG_M256
, UNKNOWN
, VOID_FTYPE_PCVOID
},
27111 /* TM callbacks. */
27113 /* Return the builtin decl needed to load a vector of TYPE. */
27116 ix86_builtin_tm_load (tree type
)
27118 if (TREE_CODE (type
) == VECTOR_TYPE
)
27120 switch (tree_low_cst (TYPE_SIZE (type
), 1))
27123 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64
);
27125 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128
);
27127 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256
);
27133 /* Return the builtin decl needed to store a vector of TYPE. */
27136 ix86_builtin_tm_store (tree type
)
27138 if (TREE_CODE (type
) == VECTOR_TYPE
)
27140 switch (tree_low_cst (TYPE_SIZE (type
), 1))
27143 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64
);
27145 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128
);
27147 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256
);
27153 /* Initialize the transactional memory vector load/store builtins. */
27156 ix86_init_tm_builtins (void)
27158 enum ix86_builtin_func_type ftype
;
27159 const struct builtin_description
*d
;
27162 tree attrs_load
, attrs_type_load
, attrs_store
, attrs_type_store
;
27163 tree attrs_log
, attrs_type_log
;
27168 /* If there are no builtins defined, we must be compiling in a
27169 language without trans-mem support. */
27170 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1
))
27173 /* Use whatever attributes a normal TM load has. */
27174 decl
= builtin_decl_explicit (BUILT_IN_TM_LOAD_1
);
27175 attrs_load
= DECL_ATTRIBUTES (decl
);
27176 attrs_type_load
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
27177 /* Use whatever attributes a normal TM store has. */
27178 decl
= builtin_decl_explicit (BUILT_IN_TM_STORE_1
);
27179 attrs_store
= DECL_ATTRIBUTES (decl
);
27180 attrs_type_store
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
27181 /* Use whatever attributes a normal TM log has. */
27182 decl
= builtin_decl_explicit (BUILT_IN_TM_LOG
);
27183 attrs_log
= DECL_ATTRIBUTES (decl
);
27184 attrs_type_log
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
27186 for (i
= 0, d
= bdesc_tm
;
27187 i
< ARRAY_SIZE (bdesc_tm
);
27190 if ((d
->mask
& ix86_isa_flags
) != 0
27191 || (lang_hooks
.builtin_function
27192 == lang_hooks
.builtin_function_ext_scope
))
27194 tree type
, attrs
, attrs_type
;
27195 enum built_in_function code
= (enum built_in_function
) d
->code
;
27197 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
27198 type
= ix86_get_builtin_func_type (ftype
);
27200 if (BUILTIN_TM_LOAD_P (code
))
27202 attrs
= attrs_load
;
27203 attrs_type
= attrs_type_load
;
27205 else if (BUILTIN_TM_STORE_P (code
))
27207 attrs
= attrs_store
;
27208 attrs_type
= attrs_type_store
;
27213 attrs_type
= attrs_type_log
;
27215 decl
= add_builtin_function (d
->name
, type
, code
, BUILT_IN_NORMAL
,
27216 /* The builtin without the prefix for
27217 calling it directly. */
27218 d
->name
+ strlen ("__builtin_"),
27220 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
27221 set the TYPE_ATTRIBUTES. */
27222 decl_attributes (&TREE_TYPE (decl
), attrs_type
, ATTR_FLAG_BUILT_IN
);
27224 set_builtin_decl (code
, decl
, false);
27229 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
27230 in the current target ISA to allow the user to compile particular modules
27231 with different target specific options that differ from the command line
27234 ix86_init_mmx_sse_builtins (void)
27236 const struct builtin_description
* d
;
27237 enum ix86_builtin_func_type ftype
;
27240 /* Add all special builtins with variable number of operands. */
27241 for (i
= 0, d
= bdesc_special_args
;
27242 i
< ARRAY_SIZE (bdesc_special_args
);
27248 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
27249 def_builtin (d
->mask
, d
->name
, ftype
, d
->code
);
27252 /* Add all builtins with variable number of operands. */
27253 for (i
= 0, d
= bdesc_args
;
27254 i
< ARRAY_SIZE (bdesc_args
);
27260 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
27261 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
27264 /* pcmpestr[im] insns. */
27265 for (i
= 0, d
= bdesc_pcmpestr
;
27266 i
< ARRAY_SIZE (bdesc_pcmpestr
);
27269 if (d
->code
== IX86_BUILTIN_PCMPESTRM128
)
27270 ftype
= V16QI_FTYPE_V16QI_INT_V16QI_INT_INT
;
27272 ftype
= INT_FTYPE_V16QI_INT_V16QI_INT_INT
;
27273 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
27276 /* pcmpistr[im] insns. */
27277 for (i
= 0, d
= bdesc_pcmpistr
;
27278 i
< ARRAY_SIZE (bdesc_pcmpistr
);
27281 if (d
->code
== IX86_BUILTIN_PCMPISTRM128
)
27282 ftype
= V16QI_FTYPE_V16QI_V16QI_INT
;
27284 ftype
= INT_FTYPE_V16QI_V16QI_INT
;
27285 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
27288 /* comi/ucomi insns. */
27289 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
27291 if (d
->mask
== OPTION_MASK_ISA_SSE2
)
27292 ftype
= INT_FTYPE_V2DF_V2DF
;
27294 ftype
= INT_FTYPE_V4SF_V4SF
;
27295 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
27299 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_ldmxcsr",
27300 VOID_FTYPE_UNSIGNED
, IX86_BUILTIN_LDMXCSR
);
27301 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_stmxcsr",
27302 UNSIGNED_FTYPE_VOID
, IX86_BUILTIN_STMXCSR
);
27304 /* SSE or 3DNow!A */
27305 def_builtin (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
27306 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR
,
27307 IX86_BUILTIN_MASKMOVQ
);
27310 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_maskmovdqu",
27311 VOID_FTYPE_V16QI_V16QI_PCHAR
, IX86_BUILTIN_MASKMOVDQU
);
27313 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_clflush",
27314 VOID_FTYPE_PCVOID
, IX86_BUILTIN_CLFLUSH
);
27315 x86_mfence
= def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_mfence",
27316 VOID_FTYPE_VOID
, IX86_BUILTIN_MFENCE
);
27319 def_builtin (OPTION_MASK_ISA_SSE3
, "__builtin_ia32_monitor",
27320 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED
, IX86_BUILTIN_MONITOR
);
27321 def_builtin (OPTION_MASK_ISA_SSE3
, "__builtin_ia32_mwait",
27322 VOID_FTYPE_UNSIGNED_UNSIGNED
, IX86_BUILTIN_MWAIT
);
27325 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesenc128",
27326 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESENC128
);
27327 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesenclast128",
27328 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESENCLAST128
);
27329 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesdec128",
27330 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESDEC128
);
27331 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesdeclast128",
27332 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESDECLAST128
);
27333 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesimc128",
27334 V2DI_FTYPE_V2DI
, IX86_BUILTIN_AESIMC128
);
27335 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aeskeygenassist128",
27336 V2DI_FTYPE_V2DI_INT
, IX86_BUILTIN_AESKEYGENASSIST128
);
27339 def_builtin_const (OPTION_MASK_ISA_PCLMUL
, "__builtin_ia32_pclmulqdq128",
27340 V2DI_FTYPE_V2DI_V2DI_INT
, IX86_BUILTIN_PCLMULQDQ128
);
27343 def_builtin (OPTION_MASK_ISA_RDRND
, "__builtin_ia32_rdrand16_step",
27344 INT_FTYPE_PUSHORT
, IX86_BUILTIN_RDRAND16_STEP
);
27345 def_builtin (OPTION_MASK_ISA_RDRND
, "__builtin_ia32_rdrand32_step",
27346 INT_FTYPE_PUNSIGNED
, IX86_BUILTIN_RDRAND32_STEP
);
27347 def_builtin (OPTION_MASK_ISA_RDRND
| OPTION_MASK_ISA_64BIT
,
27348 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG
,
27349 IX86_BUILTIN_RDRAND64_STEP
);
27352 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv2df",
27353 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT
,
27354 IX86_BUILTIN_GATHERSIV2DF
);
27356 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4df",
27357 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT
,
27358 IX86_BUILTIN_GATHERSIV4DF
);
27360 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv2df",
27361 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT
,
27362 IX86_BUILTIN_GATHERDIV2DF
);
27364 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4df",
27365 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT
,
27366 IX86_BUILTIN_GATHERDIV4DF
);
27368 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4sf",
27369 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT
,
27370 IX86_BUILTIN_GATHERSIV4SF
);
27372 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv8sf",
27373 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT
,
27374 IX86_BUILTIN_GATHERSIV8SF
);
27376 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4sf",
27377 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT
,
27378 IX86_BUILTIN_GATHERDIV4SF
);
27380 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4sf256",
27381 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT
,
27382 IX86_BUILTIN_GATHERDIV8SF
);
27384 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv2di",
27385 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT
,
27386 IX86_BUILTIN_GATHERSIV2DI
);
27388 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4di",
27389 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT
,
27390 IX86_BUILTIN_GATHERSIV4DI
);
27392 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv2di",
27393 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT
,
27394 IX86_BUILTIN_GATHERDIV2DI
);
27396 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4di",
27397 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT
,
27398 IX86_BUILTIN_GATHERDIV4DI
);
27400 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4si",
27401 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT
,
27402 IX86_BUILTIN_GATHERSIV4SI
);
27404 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv8si",
27405 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT
,
27406 IX86_BUILTIN_GATHERSIV8SI
);
27408 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4si",
27409 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT
,
27410 IX86_BUILTIN_GATHERDIV4SI
);
27412 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4si256",
27413 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT
,
27414 IX86_BUILTIN_GATHERDIV8SI
);
27416 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltsiv4df ",
27417 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT
,
27418 IX86_BUILTIN_GATHERALTSIV4DF
);
27420 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltdiv4sf256 ",
27421 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT
,
27422 IX86_BUILTIN_GATHERALTDIV8SF
);
27424 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltsiv4di ",
27425 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT
,
27426 IX86_BUILTIN_GATHERALTSIV4DI
);
27428 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltdiv4si256 ",
27429 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT
,
27430 IX86_BUILTIN_GATHERALTDIV8SI
);
27432 /* MMX access to the vec_init patterns. */
27433 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v2si",
27434 V2SI_FTYPE_INT_INT
, IX86_BUILTIN_VEC_INIT_V2SI
);
27436 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v4hi",
27437 V4HI_FTYPE_HI_HI_HI_HI
,
27438 IX86_BUILTIN_VEC_INIT_V4HI
);
27440 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v8qi",
27441 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI
,
27442 IX86_BUILTIN_VEC_INIT_V8QI
);
27444 /* Access to the vec_extract patterns. */
27445 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v2df",
27446 DOUBLE_FTYPE_V2DF_INT
, IX86_BUILTIN_VEC_EXT_V2DF
);
27447 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v2di",
27448 DI_FTYPE_V2DI_INT
, IX86_BUILTIN_VEC_EXT_V2DI
);
27449 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_vec_ext_v4sf",
27450 FLOAT_FTYPE_V4SF_INT
, IX86_BUILTIN_VEC_EXT_V4SF
);
27451 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v4si",
27452 SI_FTYPE_V4SI_INT
, IX86_BUILTIN_VEC_EXT_V4SI
);
27453 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v8hi",
27454 HI_FTYPE_V8HI_INT
, IX86_BUILTIN_VEC_EXT_V8HI
);
27456 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
27457 "__builtin_ia32_vec_ext_v4hi",
27458 HI_FTYPE_V4HI_INT
, IX86_BUILTIN_VEC_EXT_V4HI
);
27460 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_ext_v2si",
27461 SI_FTYPE_V2SI_INT
, IX86_BUILTIN_VEC_EXT_V2SI
);
27463 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v16qi",
27464 QI_FTYPE_V16QI_INT
, IX86_BUILTIN_VEC_EXT_V16QI
);
27466 /* Access to the vec_set patterns. */
27467 def_builtin_const (OPTION_MASK_ISA_SSE4_1
| OPTION_MASK_ISA_64BIT
,
27468 "__builtin_ia32_vec_set_v2di",
27469 V2DI_FTYPE_V2DI_DI_INT
, IX86_BUILTIN_VEC_SET_V2DI
);
27471 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v4sf",
27472 V4SF_FTYPE_V4SF_FLOAT_INT
, IX86_BUILTIN_VEC_SET_V4SF
);
27474 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v4si",
27475 V4SI_FTYPE_V4SI_SI_INT
, IX86_BUILTIN_VEC_SET_V4SI
);
27477 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_set_v8hi",
27478 V8HI_FTYPE_V8HI_HI_INT
, IX86_BUILTIN_VEC_SET_V8HI
);
27480 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
27481 "__builtin_ia32_vec_set_v4hi",
27482 V4HI_FTYPE_V4HI_HI_INT
, IX86_BUILTIN_VEC_SET_V4HI
);
27484 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v16qi",
27485 V16QI_FTYPE_V16QI_QI_INT
, IX86_BUILTIN_VEC_SET_V16QI
);
27487 /* Add FMA4 multi-arg argument instructions */
27488 for (i
= 0, d
= bdesc_multi_arg
; i
< ARRAY_SIZE (bdesc_multi_arg
); i
++, d
++)
27493 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
27494 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
27498 /* Internal method for ix86_init_builtins. */
27501 ix86_init_builtins_va_builtins_abi (void)
27503 tree ms_va_ref
, sysv_va_ref
;
27504 tree fnvoid_va_end_ms
, fnvoid_va_end_sysv
;
27505 tree fnvoid_va_start_ms
, fnvoid_va_start_sysv
;
27506 tree fnvoid_va_copy_ms
, fnvoid_va_copy_sysv
;
27507 tree fnattr_ms
= NULL_TREE
, fnattr_sysv
= NULL_TREE
;
27511 fnattr_ms
= build_tree_list (get_identifier ("ms_abi"), NULL_TREE
);
27512 fnattr_sysv
= build_tree_list (get_identifier ("sysv_abi"), NULL_TREE
);
27513 ms_va_ref
= build_reference_type (ms_va_list_type_node
);
27515 build_pointer_type (TREE_TYPE (sysv_va_list_type_node
));
27518 build_function_type_list (void_type_node
, ms_va_ref
, NULL_TREE
);
27519 fnvoid_va_start_ms
=
27520 build_varargs_function_type_list (void_type_node
, ms_va_ref
, NULL_TREE
);
27521 fnvoid_va_end_sysv
=
27522 build_function_type_list (void_type_node
, sysv_va_ref
, NULL_TREE
);
27523 fnvoid_va_start_sysv
=
27524 build_varargs_function_type_list (void_type_node
, sysv_va_ref
,
27526 fnvoid_va_copy_ms
=
27527 build_function_type_list (void_type_node
, ms_va_ref
, ms_va_list_type_node
,
27529 fnvoid_va_copy_sysv
=
27530 build_function_type_list (void_type_node
, sysv_va_ref
,
27531 sysv_va_ref
, NULL_TREE
);
27533 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms
,
27534 BUILT_IN_VA_START
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
27535 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms
,
27536 BUILT_IN_VA_END
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
27537 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms
,
27538 BUILT_IN_VA_COPY
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
27539 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv
,
27540 BUILT_IN_VA_START
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
27541 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv
,
27542 BUILT_IN_VA_END
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
27543 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv
,
27544 BUILT_IN_VA_COPY
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
27548 ix86_init_builtin_types (void)
27550 tree float128_type_node
, float80_type_node
;
27552 /* The __float80 type. */
27553 float80_type_node
= long_double_type_node
;
27554 if (TYPE_MODE (float80_type_node
) != XFmode
)
27556 /* The __float80 type. */
27557 float80_type_node
= make_node (REAL_TYPE
);
27559 TYPE_PRECISION (float80_type_node
) = 80;
27560 layout_type (float80_type_node
);
27562 lang_hooks
.types
.register_builtin_type (float80_type_node
, "__float80");
27564 /* The __float128 type. */
27565 float128_type_node
= make_node (REAL_TYPE
);
27566 TYPE_PRECISION (float128_type_node
) = 128;
27567 layout_type (float128_type_node
);
27568 lang_hooks
.types
.register_builtin_type (float128_type_node
, "__float128");
27570 /* This macro is built by i386-builtin-types.awk. */
27571 DEFINE_BUILTIN_PRIMITIVE_TYPES
;
27575 ix86_init_builtins (void)
27579 ix86_init_builtin_types ();
27581 /* TFmode support builtins. */
27582 def_builtin_const (0, "__builtin_infq",
27583 FLOAT128_FTYPE_VOID
, IX86_BUILTIN_INFQ
);
27584 def_builtin_const (0, "__builtin_huge_valq",
27585 FLOAT128_FTYPE_VOID
, IX86_BUILTIN_HUGE_VALQ
);
27587 /* We will expand them to normal call if SSE2 isn't available since
27588 they are used by libgcc. */
27589 t
= ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128
);
27590 t
= add_builtin_function ("__builtin_fabsq", t
, IX86_BUILTIN_FABSQ
,
27591 BUILT_IN_MD
, "__fabstf2", NULL_TREE
);
27592 TREE_READONLY (t
) = 1;
27593 ix86_builtins
[(int) IX86_BUILTIN_FABSQ
] = t
;
27595 t
= ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128
);
27596 t
= add_builtin_function ("__builtin_copysignq", t
, IX86_BUILTIN_COPYSIGNQ
,
27597 BUILT_IN_MD
, "__copysigntf3", NULL_TREE
);
27598 TREE_READONLY (t
) = 1;
27599 ix86_builtins
[(int) IX86_BUILTIN_COPYSIGNQ
] = t
;
27601 ix86_init_tm_builtins ();
27602 ix86_init_mmx_sse_builtins ();
27605 ix86_init_builtins_va_builtins_abi ();
27607 #ifdef SUBTARGET_INIT_BUILTINS
27608 SUBTARGET_INIT_BUILTINS
;
27612 /* Return the ix86 builtin for CODE. */
27615 ix86_builtin_decl (unsigned code
, bool initialize_p ATTRIBUTE_UNUSED
)
27617 if (code
>= IX86_BUILTIN_MAX
)
27618 return error_mark_node
;
27620 return ix86_builtins
[code
];
27623 /* Errors in the source file can cause expand_expr to return const0_rtx
27624 where we expect a vector. To avoid crashing, use one of the vector
27625 clear instructions. */
27627 safe_vector_operand (rtx x
, enum machine_mode mode
)
27629 if (x
== const0_rtx
)
27630 x
= CONST0_RTX (mode
);
27634 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
27637 ix86_expand_binop_builtin (enum insn_code icode
, tree exp
, rtx target
)
27640 tree arg0
= CALL_EXPR_ARG (exp
, 0);
27641 tree arg1
= CALL_EXPR_ARG (exp
, 1);
27642 rtx op0
= expand_normal (arg0
);
27643 rtx op1
= expand_normal (arg1
);
27644 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
27645 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
27646 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
27648 if (VECTOR_MODE_P (mode0
))
27649 op0
= safe_vector_operand (op0
, mode0
);
27650 if (VECTOR_MODE_P (mode1
))
27651 op1
= safe_vector_operand (op1
, mode1
);
27653 if (optimize
|| !target
27654 || GET_MODE (target
) != tmode
27655 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
27656 target
= gen_reg_rtx (tmode
);
27658 if (GET_MODE (op1
) == SImode
&& mode1
== TImode
)
27660 rtx x
= gen_reg_rtx (V4SImode
);
27661 emit_insn (gen_sse2_loadd (x
, op1
));
27662 op1
= gen_lowpart (TImode
, x
);
27665 if (!insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
27666 op0
= copy_to_mode_reg (mode0
, op0
);
27667 if (!insn_data
[icode
].operand
[2].predicate (op1
, mode1
))
27668 op1
= copy_to_mode_reg (mode1
, op1
);
27670 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
27679 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
27682 ix86_expand_multi_arg_builtin (enum insn_code icode
, tree exp
, rtx target
,
27683 enum ix86_builtin_func_type m_type
,
27684 enum rtx_code sub_code
)
27689 bool comparison_p
= false;
27691 bool last_arg_constant
= false;
27692 int num_memory
= 0;
27695 enum machine_mode mode
;
27698 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
27702 case MULTI_ARG_4_DF2_DI_I
:
27703 case MULTI_ARG_4_DF2_DI_I1
:
27704 case MULTI_ARG_4_SF2_SI_I
:
27705 case MULTI_ARG_4_SF2_SI_I1
:
27707 last_arg_constant
= true;
27710 case MULTI_ARG_3_SF
:
27711 case MULTI_ARG_3_DF
:
27712 case MULTI_ARG_3_SF2
:
27713 case MULTI_ARG_3_DF2
:
27714 case MULTI_ARG_3_DI
:
27715 case MULTI_ARG_3_SI
:
27716 case MULTI_ARG_3_SI_DI
:
27717 case MULTI_ARG_3_HI
:
27718 case MULTI_ARG_3_HI_SI
:
27719 case MULTI_ARG_3_QI
:
27720 case MULTI_ARG_3_DI2
:
27721 case MULTI_ARG_3_SI2
:
27722 case MULTI_ARG_3_HI2
:
27723 case MULTI_ARG_3_QI2
:
27727 case MULTI_ARG_2_SF
:
27728 case MULTI_ARG_2_DF
:
27729 case MULTI_ARG_2_DI
:
27730 case MULTI_ARG_2_SI
:
27731 case MULTI_ARG_2_HI
:
27732 case MULTI_ARG_2_QI
:
27736 case MULTI_ARG_2_DI_IMM
:
27737 case MULTI_ARG_2_SI_IMM
:
27738 case MULTI_ARG_2_HI_IMM
:
27739 case MULTI_ARG_2_QI_IMM
:
27741 last_arg_constant
= true;
27744 case MULTI_ARG_1_SF
:
27745 case MULTI_ARG_1_DF
:
27746 case MULTI_ARG_1_SF2
:
27747 case MULTI_ARG_1_DF2
:
27748 case MULTI_ARG_1_DI
:
27749 case MULTI_ARG_1_SI
:
27750 case MULTI_ARG_1_HI
:
27751 case MULTI_ARG_1_QI
:
27752 case MULTI_ARG_1_SI_DI
:
27753 case MULTI_ARG_1_HI_DI
:
27754 case MULTI_ARG_1_HI_SI
:
27755 case MULTI_ARG_1_QI_DI
:
27756 case MULTI_ARG_1_QI_SI
:
27757 case MULTI_ARG_1_QI_HI
:
27761 case MULTI_ARG_2_DI_CMP
:
27762 case MULTI_ARG_2_SI_CMP
:
27763 case MULTI_ARG_2_HI_CMP
:
27764 case MULTI_ARG_2_QI_CMP
:
27766 comparison_p
= true;
27769 case MULTI_ARG_2_SF_TF
:
27770 case MULTI_ARG_2_DF_TF
:
27771 case MULTI_ARG_2_DI_TF
:
27772 case MULTI_ARG_2_SI_TF
:
27773 case MULTI_ARG_2_HI_TF
:
27774 case MULTI_ARG_2_QI_TF
:
27780 gcc_unreachable ();
27783 if (optimize
|| !target
27784 || GET_MODE (target
) != tmode
27785 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
27786 target
= gen_reg_rtx (tmode
);
27788 gcc_assert (nargs
<= 4);
27790 for (i
= 0; i
< nargs
; i
++)
27792 tree arg
= CALL_EXPR_ARG (exp
, i
);
27793 rtx op
= expand_normal (arg
);
27794 int adjust
= (comparison_p
) ? 1 : 0;
27795 enum machine_mode mode
= insn_data
[icode
].operand
[i
+adjust
+1].mode
;
27797 if (last_arg_constant
&& i
== nargs
- 1)
27799 if (!insn_data
[icode
].operand
[i
+ 1].predicate (op
, mode
))
27801 enum insn_code new_icode
= icode
;
27804 case CODE_FOR_xop_vpermil2v2df3
:
27805 case CODE_FOR_xop_vpermil2v4sf3
:
27806 case CODE_FOR_xop_vpermil2v4df3
:
27807 case CODE_FOR_xop_vpermil2v8sf3
:
27808 error ("the last argument must be a 2-bit immediate");
27809 return gen_reg_rtx (tmode
);
27810 case CODE_FOR_xop_rotlv2di3
:
27811 new_icode
= CODE_FOR_rotlv2di3
;
27813 case CODE_FOR_xop_rotlv4si3
:
27814 new_icode
= CODE_FOR_rotlv4si3
;
27816 case CODE_FOR_xop_rotlv8hi3
:
27817 new_icode
= CODE_FOR_rotlv8hi3
;
27819 case CODE_FOR_xop_rotlv16qi3
:
27820 new_icode
= CODE_FOR_rotlv16qi3
;
27822 if (CONST_INT_P (op
))
27824 int mask
= GET_MODE_BITSIZE (GET_MODE_INNER (tmode
)) - 1;
27825 op
= GEN_INT (INTVAL (op
) & mask
);
27826 gcc_checking_assert
27827 (insn_data
[icode
].operand
[i
+ 1].predicate (op
, mode
));
27831 gcc_checking_assert
27833 && insn_data
[new_icode
].operand
[0].mode
== tmode
27834 && insn_data
[new_icode
].operand
[1].mode
== tmode
27835 && insn_data
[new_icode
].operand
[2].mode
== mode
27836 && insn_data
[new_icode
].operand
[0].predicate
27837 == insn_data
[icode
].operand
[0].predicate
27838 && insn_data
[new_icode
].operand
[1].predicate
27839 == insn_data
[icode
].operand
[1].predicate
);
27845 gcc_unreachable ();
27852 if (VECTOR_MODE_P (mode
))
27853 op
= safe_vector_operand (op
, mode
);
27855 /* If we aren't optimizing, only allow one memory operand to be
27857 if (memory_operand (op
, mode
))
27860 gcc_assert (GET_MODE (op
) == mode
|| GET_MODE (op
) == VOIDmode
);
27863 || !insn_data
[icode
].operand
[i
+adjust
+1].predicate (op
, mode
)
27865 op
= force_reg (mode
, op
);
27869 args
[i
].mode
= mode
;
27875 pat
= GEN_FCN (icode
) (target
, args
[0].op
);
27880 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
,
27881 GEN_INT ((int)sub_code
));
27882 else if (! comparison_p
)
27883 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
);
27886 rtx cmp_op
= gen_rtx_fmt_ee (sub_code
, GET_MODE (target
),
27890 pat
= GEN_FCN (icode
) (target
, cmp_op
, args
[0].op
, args
[1].op
);
27895 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
);
27899 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
, args
[3].op
);
27903 gcc_unreachable ();
27913 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
27914 insns with vec_merge. */
27917 ix86_expand_unop_vec_merge_builtin (enum insn_code icode
, tree exp
,
27921 tree arg0
= CALL_EXPR_ARG (exp
, 0);
27922 rtx op1
, op0
= expand_normal (arg0
);
27923 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
27924 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
27926 if (optimize
|| !target
27927 || GET_MODE (target
) != tmode
27928 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
27929 target
= gen_reg_rtx (tmode
);
27931 if (VECTOR_MODE_P (mode0
))
27932 op0
= safe_vector_operand (op0
, mode0
);
27934 if ((optimize
&& !register_operand (op0
, mode0
))
27935 || !insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
27936 op0
= copy_to_mode_reg (mode0
, op0
);
27939 if (!insn_data
[icode
].operand
[2].predicate (op1
, mode0
))
27940 op1
= copy_to_mode_reg (mode0
, op1
);
27942 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
27949 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
27952 ix86_expand_sse_compare (const struct builtin_description
*d
,
27953 tree exp
, rtx target
, bool swap
)
27956 tree arg0
= CALL_EXPR_ARG (exp
, 0);
27957 tree arg1
= CALL_EXPR_ARG (exp
, 1);
27958 rtx op0
= expand_normal (arg0
);
27959 rtx op1
= expand_normal (arg1
);
27961 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
27962 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
27963 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
27964 enum rtx_code comparison
= d
->comparison
;
27966 if (VECTOR_MODE_P (mode0
))
27967 op0
= safe_vector_operand (op0
, mode0
);
27968 if (VECTOR_MODE_P (mode1
))
27969 op1
= safe_vector_operand (op1
, mode1
);
27971 /* Swap operands if we have a comparison that isn't available in
27975 rtx tmp
= gen_reg_rtx (mode1
);
27976 emit_move_insn (tmp
, op1
);
27981 if (optimize
|| !target
27982 || GET_MODE (target
) != tmode
27983 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
27984 target
= gen_reg_rtx (tmode
);
27986 if ((optimize
&& !register_operand (op0
, mode0
))
27987 || !insn_data
[d
->icode
].operand
[1].predicate (op0
, mode0
))
27988 op0
= copy_to_mode_reg (mode0
, op0
);
27989 if ((optimize
&& !register_operand (op1
, mode1
))
27990 || !insn_data
[d
->icode
].operand
[2].predicate (op1
, mode1
))
27991 op1
= copy_to_mode_reg (mode1
, op1
);
27993 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
27994 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
28001 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
28004 ix86_expand_sse_comi (const struct builtin_description
*d
, tree exp
,
28008 tree arg0
= CALL_EXPR_ARG (exp
, 0);
28009 tree arg1
= CALL_EXPR_ARG (exp
, 1);
28010 rtx op0
= expand_normal (arg0
);
28011 rtx op1
= expand_normal (arg1
);
28012 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
28013 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
28014 enum rtx_code comparison
= d
->comparison
;
28016 if (VECTOR_MODE_P (mode0
))
28017 op0
= safe_vector_operand (op0
, mode0
);
28018 if (VECTOR_MODE_P (mode1
))
28019 op1
= safe_vector_operand (op1
, mode1
);
28021 /* Swap operands if we have a comparison that isn't available in
28023 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
28030 target
= gen_reg_rtx (SImode
);
28031 emit_move_insn (target
, const0_rtx
);
28032 target
= gen_rtx_SUBREG (QImode
, target
, 0);
28034 if ((optimize
&& !register_operand (op0
, mode0
))
28035 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
28036 op0
= copy_to_mode_reg (mode0
, op0
);
28037 if ((optimize
&& !register_operand (op1
, mode1
))
28038 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
28039 op1
= copy_to_mode_reg (mode1
, op1
);
28041 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
28045 emit_insn (gen_rtx_SET (VOIDmode
,
28046 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
28047 gen_rtx_fmt_ee (comparison
, QImode
,
28051 return SUBREG_REG (target
);
28054 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
28057 ix86_expand_sse_round (const struct builtin_description
*d
, tree exp
,
28061 tree arg0
= CALL_EXPR_ARG (exp
, 0);
28062 rtx op1
, op0
= expand_normal (arg0
);
28063 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
28064 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
28066 if (optimize
|| target
== 0
28067 || GET_MODE (target
) != tmode
28068 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
28069 target
= gen_reg_rtx (tmode
);
28071 if (VECTOR_MODE_P (mode0
))
28072 op0
= safe_vector_operand (op0
, mode0
);
28074 if ((optimize
&& !register_operand (op0
, mode0
))
28075 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
28076 op0
= copy_to_mode_reg (mode0
, op0
);
28078 op1
= GEN_INT (d
->comparison
);
28080 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
);
28088 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description
*d
,
28089 tree exp
, rtx target
)
28092 tree arg0
= CALL_EXPR_ARG (exp
, 0);
28093 tree arg1
= CALL_EXPR_ARG (exp
, 1);
28094 rtx op0
= expand_normal (arg0
);
28095 rtx op1
= expand_normal (arg1
);
28097 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
28098 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
28099 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
28101 if (optimize
|| target
== 0
28102 || GET_MODE (target
) != tmode
28103 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
28104 target
= gen_reg_rtx (tmode
);
28106 op0
= safe_vector_operand (op0
, mode0
);
28107 op1
= safe_vector_operand (op1
, mode1
);
28109 if ((optimize
&& !register_operand (op0
, mode0
))
28110 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
28111 op0
= copy_to_mode_reg (mode0
, op0
);
28112 if ((optimize
&& !register_operand (op1
, mode1
))
28113 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
28114 op1
= copy_to_mode_reg (mode1
, op1
);
28116 op2
= GEN_INT (d
->comparison
);
28118 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
28125 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
28128 ix86_expand_sse_ptest (const struct builtin_description
*d
, tree exp
,
28132 tree arg0
= CALL_EXPR_ARG (exp
, 0);
28133 tree arg1
= CALL_EXPR_ARG (exp
, 1);
28134 rtx op0
= expand_normal (arg0
);
28135 rtx op1
= expand_normal (arg1
);
28136 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
28137 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
28138 enum rtx_code comparison
= d
->comparison
;
28140 if (VECTOR_MODE_P (mode0
))
28141 op0
= safe_vector_operand (op0
, mode0
);
28142 if (VECTOR_MODE_P (mode1
))
28143 op1
= safe_vector_operand (op1
, mode1
);
28145 target
= gen_reg_rtx (SImode
);
28146 emit_move_insn (target
, const0_rtx
);
28147 target
= gen_rtx_SUBREG (QImode
, target
, 0);
28149 if ((optimize
&& !register_operand (op0
, mode0
))
28150 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
28151 op0
= copy_to_mode_reg (mode0
, op0
);
28152 if ((optimize
&& !register_operand (op1
, mode1
))
28153 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
28154 op1
= copy_to_mode_reg (mode1
, op1
);
28156 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
28160 emit_insn (gen_rtx_SET (VOIDmode
,
28161 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
28162 gen_rtx_fmt_ee (comparison
, QImode
,
28166 return SUBREG_REG (target
);
28169 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
28172 ix86_expand_sse_pcmpestr (const struct builtin_description
*d
,
28173 tree exp
, rtx target
)
28176 tree arg0
= CALL_EXPR_ARG (exp
, 0);
28177 tree arg1
= CALL_EXPR_ARG (exp
, 1);
28178 tree arg2
= CALL_EXPR_ARG (exp
, 2);
28179 tree arg3
= CALL_EXPR_ARG (exp
, 3);
28180 tree arg4
= CALL_EXPR_ARG (exp
, 4);
28181 rtx scratch0
, scratch1
;
28182 rtx op0
= expand_normal (arg0
);
28183 rtx op1
= expand_normal (arg1
);
28184 rtx op2
= expand_normal (arg2
);
28185 rtx op3
= expand_normal (arg3
);
28186 rtx op4
= expand_normal (arg4
);
28187 enum machine_mode tmode0
, tmode1
, modev2
, modei3
, modev4
, modei5
, modeimm
;
28189 tmode0
= insn_data
[d
->icode
].operand
[0].mode
;
28190 tmode1
= insn_data
[d
->icode
].operand
[1].mode
;
28191 modev2
= insn_data
[d
->icode
].operand
[2].mode
;
28192 modei3
= insn_data
[d
->icode
].operand
[3].mode
;
28193 modev4
= insn_data
[d
->icode
].operand
[4].mode
;
28194 modei5
= insn_data
[d
->icode
].operand
[5].mode
;
28195 modeimm
= insn_data
[d
->icode
].operand
[6].mode
;
28197 if (VECTOR_MODE_P (modev2
))
28198 op0
= safe_vector_operand (op0
, modev2
);
28199 if (VECTOR_MODE_P (modev4
))
28200 op2
= safe_vector_operand (op2
, modev4
);
28202 if (!insn_data
[d
->icode
].operand
[2].predicate (op0
, modev2
))
28203 op0
= copy_to_mode_reg (modev2
, op0
);
28204 if (!insn_data
[d
->icode
].operand
[3].predicate (op1
, modei3
))
28205 op1
= copy_to_mode_reg (modei3
, op1
);
28206 if ((optimize
&& !register_operand (op2
, modev4
))
28207 || !insn_data
[d
->icode
].operand
[4].predicate (op2
, modev4
))
28208 op2
= copy_to_mode_reg (modev4
, op2
);
28209 if (!insn_data
[d
->icode
].operand
[5].predicate (op3
, modei5
))
28210 op3
= copy_to_mode_reg (modei5
, op3
);
28212 if (!insn_data
[d
->icode
].operand
[6].predicate (op4
, modeimm
))
28214 error ("the fifth argument must be an 8-bit immediate");
28218 if (d
->code
== IX86_BUILTIN_PCMPESTRI128
)
28220 if (optimize
|| !target
28221 || GET_MODE (target
) != tmode0
28222 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode0
))
28223 target
= gen_reg_rtx (tmode0
);
28225 scratch1
= gen_reg_rtx (tmode1
);
28227 pat
= GEN_FCN (d
->icode
) (target
, scratch1
, op0
, op1
, op2
, op3
, op4
);
28229 else if (d
->code
== IX86_BUILTIN_PCMPESTRM128
)
28231 if (optimize
|| !target
28232 || GET_MODE (target
) != tmode1
28233 || !insn_data
[d
->icode
].operand
[1].predicate (target
, tmode1
))
28234 target
= gen_reg_rtx (tmode1
);
28236 scratch0
= gen_reg_rtx (tmode0
);
28238 pat
= GEN_FCN (d
->icode
) (scratch0
, target
, op0
, op1
, op2
, op3
, op4
);
28242 gcc_assert (d
->flag
);
28244 scratch0
= gen_reg_rtx (tmode0
);
28245 scratch1
= gen_reg_rtx (tmode1
);
28247 pat
= GEN_FCN (d
->icode
) (scratch0
, scratch1
, op0
, op1
, op2
, op3
, op4
);
28257 target
= gen_reg_rtx (SImode
);
28258 emit_move_insn (target
, const0_rtx
);
28259 target
= gen_rtx_SUBREG (QImode
, target
, 0);
28262 (gen_rtx_SET (VOIDmode
, gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
28263 gen_rtx_fmt_ee (EQ
, QImode
,
28264 gen_rtx_REG ((enum machine_mode
) d
->flag
,
28267 return SUBREG_REG (target
);
28274 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
28277 ix86_expand_sse_pcmpistr (const struct builtin_description
*d
,
28278 tree exp
, rtx target
)
28281 tree arg0
= CALL_EXPR_ARG (exp
, 0);
28282 tree arg1
= CALL_EXPR_ARG (exp
, 1);
28283 tree arg2
= CALL_EXPR_ARG (exp
, 2);
28284 rtx scratch0
, scratch1
;
28285 rtx op0
= expand_normal (arg0
);
28286 rtx op1
= expand_normal (arg1
);
28287 rtx op2
= expand_normal (arg2
);
28288 enum machine_mode tmode0
, tmode1
, modev2
, modev3
, modeimm
;
28290 tmode0
= insn_data
[d
->icode
].operand
[0].mode
;
28291 tmode1
= insn_data
[d
->icode
].operand
[1].mode
;
28292 modev2
= insn_data
[d
->icode
].operand
[2].mode
;
28293 modev3
= insn_data
[d
->icode
].operand
[3].mode
;
28294 modeimm
= insn_data
[d
->icode
].operand
[4].mode
;
28296 if (VECTOR_MODE_P (modev2
))
28297 op0
= safe_vector_operand (op0
, modev2
);
28298 if (VECTOR_MODE_P (modev3
))
28299 op1
= safe_vector_operand (op1
, modev3
);
28301 if (!insn_data
[d
->icode
].operand
[2].predicate (op0
, modev2
))
28302 op0
= copy_to_mode_reg (modev2
, op0
);
28303 if ((optimize
&& !register_operand (op1
, modev3
))
28304 || !insn_data
[d
->icode
].operand
[3].predicate (op1
, modev3
))
28305 op1
= copy_to_mode_reg (modev3
, op1
);
28307 if (!insn_data
[d
->icode
].operand
[4].predicate (op2
, modeimm
))
28309 error ("the third argument must be an 8-bit immediate");
28313 if (d
->code
== IX86_BUILTIN_PCMPISTRI128
)
28315 if (optimize
|| !target
28316 || GET_MODE (target
) != tmode0
28317 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode0
))
28318 target
= gen_reg_rtx (tmode0
);
28320 scratch1
= gen_reg_rtx (tmode1
);
28322 pat
= GEN_FCN (d
->icode
) (target
, scratch1
, op0
, op1
, op2
);
28324 else if (d
->code
== IX86_BUILTIN_PCMPISTRM128
)
28326 if (optimize
|| !target
28327 || GET_MODE (target
) != tmode1
28328 || !insn_data
[d
->icode
].operand
[1].predicate (target
, tmode1
))
28329 target
= gen_reg_rtx (tmode1
);
28331 scratch0
= gen_reg_rtx (tmode0
);
28333 pat
= GEN_FCN (d
->icode
) (scratch0
, target
, op0
, op1
, op2
);
28337 gcc_assert (d
->flag
);
28339 scratch0
= gen_reg_rtx (tmode0
);
28340 scratch1
= gen_reg_rtx (tmode1
);
28342 pat
= GEN_FCN (d
->icode
) (scratch0
, scratch1
, op0
, op1
, op2
);
28352 target
= gen_reg_rtx (SImode
);
28353 emit_move_insn (target
, const0_rtx
);
28354 target
= gen_rtx_SUBREG (QImode
, target
, 0);
28357 (gen_rtx_SET (VOIDmode
, gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
28358 gen_rtx_fmt_ee (EQ
, QImode
,
28359 gen_rtx_REG ((enum machine_mode
) d
->flag
,
28362 return SUBREG_REG (target
);
28368 /* Subroutine of ix86_expand_builtin to take care of insns with
28369 variable number of operands. */
28372 ix86_expand_args_builtin (const struct builtin_description
*d
,
28373 tree exp
, rtx target
)
28375 rtx pat
, real_target
;
28376 unsigned int i
, nargs
;
28377 unsigned int nargs_constant
= 0;
28378 int num_memory
= 0;
28382 enum machine_mode mode
;
28384 bool last_arg_count
= false;
28385 enum insn_code icode
= d
->icode
;
28386 const struct insn_data_d
*insn_p
= &insn_data
[icode
];
28387 enum machine_mode tmode
= insn_p
->operand
[0].mode
;
28388 enum machine_mode rmode
= VOIDmode
;
28390 enum rtx_code comparison
= d
->comparison
;
28392 switch ((enum ix86_builtin_func_type
) d
->flag
)
28394 case V2DF_FTYPE_V2DF_ROUND
:
28395 case V4DF_FTYPE_V4DF_ROUND
:
28396 case V4SF_FTYPE_V4SF_ROUND
:
28397 case V8SF_FTYPE_V8SF_ROUND
:
28398 case V4SI_FTYPE_V4SF_ROUND
:
28399 case V8SI_FTYPE_V8SF_ROUND
:
28400 return ix86_expand_sse_round (d
, exp
, target
);
28401 case V4SI_FTYPE_V2DF_V2DF_ROUND
:
28402 case V8SI_FTYPE_V4DF_V4DF_ROUND
:
28403 return ix86_expand_sse_round_vec_pack_sfix (d
, exp
, target
);
28404 case INT_FTYPE_V8SF_V8SF_PTEST
:
28405 case INT_FTYPE_V4DI_V4DI_PTEST
:
28406 case INT_FTYPE_V4DF_V4DF_PTEST
:
28407 case INT_FTYPE_V4SF_V4SF_PTEST
:
28408 case INT_FTYPE_V2DI_V2DI_PTEST
:
28409 case INT_FTYPE_V2DF_V2DF_PTEST
:
28410 return ix86_expand_sse_ptest (d
, exp
, target
);
28411 case FLOAT128_FTYPE_FLOAT128
:
28412 case FLOAT_FTYPE_FLOAT
:
28413 case INT_FTYPE_INT
:
28414 case UINT64_FTYPE_INT
:
28415 case UINT16_FTYPE_UINT16
:
28416 case INT64_FTYPE_INT64
:
28417 case INT64_FTYPE_V4SF
:
28418 case INT64_FTYPE_V2DF
:
28419 case INT_FTYPE_V16QI
:
28420 case INT_FTYPE_V8QI
:
28421 case INT_FTYPE_V8SF
:
28422 case INT_FTYPE_V4DF
:
28423 case INT_FTYPE_V4SF
:
28424 case INT_FTYPE_V2DF
:
28425 case INT_FTYPE_V32QI
:
28426 case V16QI_FTYPE_V16QI
:
28427 case V8SI_FTYPE_V8SF
:
28428 case V8SI_FTYPE_V4SI
:
28429 case V8HI_FTYPE_V8HI
:
28430 case V8HI_FTYPE_V16QI
:
28431 case V8QI_FTYPE_V8QI
:
28432 case V8SF_FTYPE_V8SF
:
28433 case V8SF_FTYPE_V8SI
:
28434 case V8SF_FTYPE_V4SF
:
28435 case V8SF_FTYPE_V8HI
:
28436 case V4SI_FTYPE_V4SI
:
28437 case V4SI_FTYPE_V16QI
:
28438 case V4SI_FTYPE_V4SF
:
28439 case V4SI_FTYPE_V8SI
:
28440 case V4SI_FTYPE_V8HI
:
28441 case V4SI_FTYPE_V4DF
:
28442 case V4SI_FTYPE_V2DF
:
28443 case V4HI_FTYPE_V4HI
:
28444 case V4DF_FTYPE_V4DF
:
28445 case V4DF_FTYPE_V4SI
:
28446 case V4DF_FTYPE_V4SF
:
28447 case V4DF_FTYPE_V2DF
:
28448 case V4SF_FTYPE_V4SF
:
28449 case V4SF_FTYPE_V4SI
:
28450 case V4SF_FTYPE_V8SF
:
28451 case V4SF_FTYPE_V4DF
:
28452 case V4SF_FTYPE_V8HI
:
28453 case V4SF_FTYPE_V2DF
:
28454 case V2DI_FTYPE_V2DI
:
28455 case V2DI_FTYPE_V16QI
:
28456 case V2DI_FTYPE_V8HI
:
28457 case V2DI_FTYPE_V4SI
:
28458 case V2DF_FTYPE_V2DF
:
28459 case V2DF_FTYPE_V4SI
:
28460 case V2DF_FTYPE_V4DF
:
28461 case V2DF_FTYPE_V4SF
:
28462 case V2DF_FTYPE_V2SI
:
28463 case V2SI_FTYPE_V2SI
:
28464 case V2SI_FTYPE_V4SF
:
28465 case V2SI_FTYPE_V2SF
:
28466 case V2SI_FTYPE_V2DF
:
28467 case V2SF_FTYPE_V2SF
:
28468 case V2SF_FTYPE_V2SI
:
28469 case V32QI_FTYPE_V32QI
:
28470 case V32QI_FTYPE_V16QI
:
28471 case V16HI_FTYPE_V16HI
:
28472 case V16HI_FTYPE_V8HI
:
28473 case V8SI_FTYPE_V8SI
:
28474 case V16HI_FTYPE_V16QI
:
28475 case V8SI_FTYPE_V16QI
:
28476 case V4DI_FTYPE_V16QI
:
28477 case V8SI_FTYPE_V8HI
:
28478 case V4DI_FTYPE_V8HI
:
28479 case V4DI_FTYPE_V4SI
:
28480 case V4DI_FTYPE_V2DI
:
28483 case V4SF_FTYPE_V4SF_VEC_MERGE
:
28484 case V2DF_FTYPE_V2DF_VEC_MERGE
:
28485 return ix86_expand_unop_vec_merge_builtin (icode
, exp
, target
);
28486 case FLOAT128_FTYPE_FLOAT128_FLOAT128
:
28487 case V16QI_FTYPE_V16QI_V16QI
:
28488 case V16QI_FTYPE_V8HI_V8HI
:
28489 case V8QI_FTYPE_V8QI_V8QI
:
28490 case V8QI_FTYPE_V4HI_V4HI
:
28491 case V8HI_FTYPE_V8HI_V8HI
:
28492 case V8HI_FTYPE_V16QI_V16QI
:
28493 case V8HI_FTYPE_V4SI_V4SI
:
28494 case V8SF_FTYPE_V8SF_V8SF
:
28495 case V8SF_FTYPE_V8SF_V8SI
:
28496 case V4SI_FTYPE_V4SI_V4SI
:
28497 case V4SI_FTYPE_V8HI_V8HI
:
28498 case V4SI_FTYPE_V4SF_V4SF
:
28499 case V4SI_FTYPE_V2DF_V2DF
:
28500 case V4HI_FTYPE_V4HI_V4HI
:
28501 case V4HI_FTYPE_V8QI_V8QI
:
28502 case V4HI_FTYPE_V2SI_V2SI
:
28503 case V4DF_FTYPE_V4DF_V4DF
:
28504 case V4DF_FTYPE_V4DF_V4DI
:
28505 case V4SF_FTYPE_V4SF_V4SF
:
28506 case V4SF_FTYPE_V4SF_V4SI
:
28507 case V4SF_FTYPE_V4SF_V2SI
:
28508 case V4SF_FTYPE_V4SF_V2DF
:
28509 case V4SF_FTYPE_V4SF_DI
:
28510 case V4SF_FTYPE_V4SF_SI
:
28511 case V2DI_FTYPE_V2DI_V2DI
:
28512 case V2DI_FTYPE_V16QI_V16QI
:
28513 case V2DI_FTYPE_V4SI_V4SI
:
28514 case V2DI_FTYPE_V2DI_V16QI
:
28515 case V2DI_FTYPE_V2DF_V2DF
:
28516 case V2SI_FTYPE_V2SI_V2SI
:
28517 case V2SI_FTYPE_V4HI_V4HI
:
28518 case V2SI_FTYPE_V2SF_V2SF
:
28519 case V2DF_FTYPE_V2DF_V2DF
:
28520 case V2DF_FTYPE_V2DF_V4SF
:
28521 case V2DF_FTYPE_V2DF_V2DI
:
28522 case V2DF_FTYPE_V2DF_DI
:
28523 case V2DF_FTYPE_V2DF_SI
:
28524 case V2SF_FTYPE_V2SF_V2SF
:
28525 case V1DI_FTYPE_V1DI_V1DI
:
28526 case V1DI_FTYPE_V8QI_V8QI
:
28527 case V1DI_FTYPE_V2SI_V2SI
:
28528 case V32QI_FTYPE_V16HI_V16HI
:
28529 case V16HI_FTYPE_V8SI_V8SI
:
28530 case V32QI_FTYPE_V32QI_V32QI
:
28531 case V16HI_FTYPE_V32QI_V32QI
:
28532 case V16HI_FTYPE_V16HI_V16HI
:
28533 case V8SI_FTYPE_V4DF_V4DF
:
28534 case V8SI_FTYPE_V8SI_V8SI
:
28535 case V8SI_FTYPE_V16HI_V16HI
:
28536 case V4DI_FTYPE_V4DI_V4DI
:
28537 case V4DI_FTYPE_V8SI_V8SI
:
28538 if (comparison
== UNKNOWN
)
28539 return ix86_expand_binop_builtin (icode
, exp
, target
);
28542 case V4SF_FTYPE_V4SF_V4SF_SWAP
:
28543 case V2DF_FTYPE_V2DF_V2DF_SWAP
:
28544 gcc_assert (comparison
!= UNKNOWN
);
28548 case V16HI_FTYPE_V16HI_V8HI_COUNT
:
28549 case V16HI_FTYPE_V16HI_SI_COUNT
:
28550 case V8SI_FTYPE_V8SI_V4SI_COUNT
:
28551 case V8SI_FTYPE_V8SI_SI_COUNT
:
28552 case V4DI_FTYPE_V4DI_V2DI_COUNT
:
28553 case V4DI_FTYPE_V4DI_INT_COUNT
:
28554 case V8HI_FTYPE_V8HI_V8HI_COUNT
:
28555 case V8HI_FTYPE_V8HI_SI_COUNT
:
28556 case V4SI_FTYPE_V4SI_V4SI_COUNT
:
28557 case V4SI_FTYPE_V4SI_SI_COUNT
:
28558 case V4HI_FTYPE_V4HI_V4HI_COUNT
:
28559 case V4HI_FTYPE_V4HI_SI_COUNT
:
28560 case V2DI_FTYPE_V2DI_V2DI_COUNT
:
28561 case V2DI_FTYPE_V2DI_SI_COUNT
:
28562 case V2SI_FTYPE_V2SI_V2SI_COUNT
:
28563 case V2SI_FTYPE_V2SI_SI_COUNT
:
28564 case V1DI_FTYPE_V1DI_V1DI_COUNT
:
28565 case V1DI_FTYPE_V1DI_SI_COUNT
:
28567 last_arg_count
= true;
28569 case UINT64_FTYPE_UINT64_UINT64
:
28570 case UINT_FTYPE_UINT_UINT
:
28571 case UINT_FTYPE_UINT_USHORT
:
28572 case UINT_FTYPE_UINT_UCHAR
:
28573 case UINT16_FTYPE_UINT16_INT
:
28574 case UINT8_FTYPE_UINT8_INT
:
28577 case V2DI_FTYPE_V2DI_INT_CONVERT
:
28580 nargs_constant
= 1;
28582 case V4DI_FTYPE_V4DI_INT_CONVERT
:
28585 nargs_constant
= 1;
28587 case V8HI_FTYPE_V8HI_INT
:
28588 case V8HI_FTYPE_V8SF_INT
:
28589 case V8HI_FTYPE_V4SF_INT
:
28590 case V8SF_FTYPE_V8SF_INT
:
28591 case V4SI_FTYPE_V4SI_INT
:
28592 case V4SI_FTYPE_V8SI_INT
:
28593 case V4HI_FTYPE_V4HI_INT
:
28594 case V4DF_FTYPE_V4DF_INT
:
28595 case V4SF_FTYPE_V4SF_INT
:
28596 case V4SF_FTYPE_V8SF_INT
:
28597 case V2DI_FTYPE_V2DI_INT
:
28598 case V2DF_FTYPE_V2DF_INT
:
28599 case V2DF_FTYPE_V4DF_INT
:
28600 case V16HI_FTYPE_V16HI_INT
:
28601 case V8SI_FTYPE_V8SI_INT
:
28602 case V4DI_FTYPE_V4DI_INT
:
28603 case V2DI_FTYPE_V4DI_INT
:
28605 nargs_constant
= 1;
28607 case V16QI_FTYPE_V16QI_V16QI_V16QI
:
28608 case V8SF_FTYPE_V8SF_V8SF_V8SF
:
28609 case V4DF_FTYPE_V4DF_V4DF_V4DF
:
28610 case V4SF_FTYPE_V4SF_V4SF_V4SF
:
28611 case V2DF_FTYPE_V2DF_V2DF_V2DF
:
28612 case V32QI_FTYPE_V32QI_V32QI_V32QI
:
28615 case V32QI_FTYPE_V32QI_V32QI_INT
:
28616 case V16HI_FTYPE_V16HI_V16HI_INT
:
28617 case V16QI_FTYPE_V16QI_V16QI_INT
:
28618 case V4DI_FTYPE_V4DI_V4DI_INT
:
28619 case V8HI_FTYPE_V8HI_V8HI_INT
:
28620 case V8SI_FTYPE_V8SI_V8SI_INT
:
28621 case V8SI_FTYPE_V8SI_V4SI_INT
:
28622 case V8SF_FTYPE_V8SF_V8SF_INT
:
28623 case V8SF_FTYPE_V8SF_V4SF_INT
:
28624 case V4SI_FTYPE_V4SI_V4SI_INT
:
28625 case V4DF_FTYPE_V4DF_V4DF_INT
:
28626 case V4DF_FTYPE_V4DF_V2DF_INT
:
28627 case V4SF_FTYPE_V4SF_V4SF_INT
:
28628 case V2DI_FTYPE_V2DI_V2DI_INT
:
28629 case V4DI_FTYPE_V4DI_V2DI_INT
:
28630 case V2DF_FTYPE_V2DF_V2DF_INT
:
28632 nargs_constant
= 1;
28634 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT
:
28637 nargs_constant
= 1;
28639 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT
:
28642 nargs_constant
= 1;
28644 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT
:
28647 nargs_constant
= 1;
28649 case V2DI_FTYPE_V2DI_UINT_UINT
:
28651 nargs_constant
= 2;
28653 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT
:
28654 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT
:
28655 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT
:
28656 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT
:
28658 nargs_constant
= 1;
28660 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT
:
28662 nargs_constant
= 2;
28665 gcc_unreachable ();
28668 gcc_assert (nargs
<= ARRAY_SIZE (args
));
28670 if (comparison
!= UNKNOWN
)
28672 gcc_assert (nargs
== 2);
28673 return ix86_expand_sse_compare (d
, exp
, target
, swap
);
28676 if (rmode
== VOIDmode
|| rmode
== tmode
)
28680 || GET_MODE (target
) != tmode
28681 || !insn_p
->operand
[0].predicate (target
, tmode
))
28682 target
= gen_reg_rtx (tmode
);
28683 real_target
= target
;
28687 target
= gen_reg_rtx (rmode
);
28688 real_target
= simplify_gen_subreg (tmode
, target
, rmode
, 0);
28691 for (i
= 0; i
< nargs
; i
++)
28693 tree arg
= CALL_EXPR_ARG (exp
, i
);
28694 rtx op
= expand_normal (arg
);
28695 enum machine_mode mode
= insn_p
->operand
[i
+ 1].mode
;
28696 bool match
= insn_p
->operand
[i
+ 1].predicate (op
, mode
);
28698 if (last_arg_count
&& (i
+ 1) == nargs
)
28700 /* SIMD shift insns take either an 8-bit immediate or
28701 register as count. But builtin functions take int as
28702 count. If count doesn't match, we put it in register. */
28705 op
= simplify_gen_subreg (SImode
, op
, GET_MODE (op
), 0);
28706 if (!insn_p
->operand
[i
+ 1].predicate (op
, mode
))
28707 op
= copy_to_reg (op
);
28710 else if ((nargs
- i
) <= nargs_constant
)
28715 case CODE_FOR_avx2_inserti128
:
28716 case CODE_FOR_avx2_extracti128
:
28717 error ("the last argument must be an 1-bit immediate");
28720 case CODE_FOR_sse4_1_roundsd
:
28721 case CODE_FOR_sse4_1_roundss
:
28723 case CODE_FOR_sse4_1_roundpd
:
28724 case CODE_FOR_sse4_1_roundps
:
28725 case CODE_FOR_avx_roundpd256
:
28726 case CODE_FOR_avx_roundps256
:
28728 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix
:
28729 case CODE_FOR_sse4_1_roundps_sfix
:
28730 case CODE_FOR_avx_roundpd_vec_pack_sfix256
:
28731 case CODE_FOR_avx_roundps_sfix256
:
28733 case CODE_FOR_sse4_1_blendps
:
28734 case CODE_FOR_avx_blendpd256
:
28735 case CODE_FOR_avx_vpermilv4df
:
28736 error ("the last argument must be a 4-bit immediate");
28739 case CODE_FOR_sse4_1_blendpd
:
28740 case CODE_FOR_avx_vpermilv2df
:
28741 case CODE_FOR_xop_vpermil2v2df3
:
28742 case CODE_FOR_xop_vpermil2v4sf3
:
28743 case CODE_FOR_xop_vpermil2v4df3
:
28744 case CODE_FOR_xop_vpermil2v8sf3
:
28745 error ("the last argument must be a 2-bit immediate");
28748 case CODE_FOR_avx_vextractf128v4df
:
28749 case CODE_FOR_avx_vextractf128v8sf
:
28750 case CODE_FOR_avx_vextractf128v8si
:
28751 case CODE_FOR_avx_vinsertf128v4df
:
28752 case CODE_FOR_avx_vinsertf128v8sf
:
28753 case CODE_FOR_avx_vinsertf128v8si
:
28754 error ("the last argument must be a 1-bit immediate");
28757 case CODE_FOR_avx_vmcmpv2df3
:
28758 case CODE_FOR_avx_vmcmpv4sf3
:
28759 case CODE_FOR_avx_cmpv2df3
:
28760 case CODE_FOR_avx_cmpv4sf3
:
28761 case CODE_FOR_avx_cmpv4df3
:
28762 case CODE_FOR_avx_cmpv8sf3
:
28763 error ("the last argument must be a 5-bit immediate");
28767 switch (nargs_constant
)
28770 if ((nargs
- i
) == nargs_constant
)
28772 error ("the next to last argument must be an 8-bit immediate");
28776 error ("the last argument must be an 8-bit immediate");
28779 gcc_unreachable ();
28786 if (VECTOR_MODE_P (mode
))
28787 op
= safe_vector_operand (op
, mode
);
28789 /* If we aren't optimizing, only allow one memory operand to
28791 if (memory_operand (op
, mode
))
28794 if (GET_MODE (op
) == mode
|| GET_MODE (op
) == VOIDmode
)
28796 if (optimize
|| !match
|| num_memory
> 1)
28797 op
= copy_to_mode_reg (mode
, op
);
28801 op
= copy_to_reg (op
);
28802 op
= simplify_gen_subreg (mode
, op
, GET_MODE (op
), 0);
28807 args
[i
].mode
= mode
;
28813 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
);
28816 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
);
28819 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
,
28823 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
,
28824 args
[2].op
, args
[3].op
);
28827 gcc_unreachable ();
28837 /* Subroutine of ix86_expand_builtin to take care of special insns
28838 with variable number of operands. */
28841 ix86_expand_special_args_builtin (const struct builtin_description
*d
,
28842 tree exp
, rtx target
)
28846 unsigned int i
, nargs
, arg_adjust
, memory
;
28850 enum machine_mode mode
;
28852 enum insn_code icode
= d
->icode
;
28853 bool last_arg_constant
= false;
28854 const struct insn_data_d
*insn_p
= &insn_data
[icode
];
28855 enum machine_mode tmode
= insn_p
->operand
[0].mode
;
28856 enum { load
, store
} klass
;
28858 switch ((enum ix86_builtin_func_type
) d
->flag
)
28860 case VOID_FTYPE_VOID
:
28861 if (icode
== CODE_FOR_avx_vzeroupper
)
28862 target
= GEN_INT (vzeroupper_intrinsic
);
28863 emit_insn (GEN_FCN (icode
) (target
));
28865 case VOID_FTYPE_UINT64
:
28866 case VOID_FTYPE_UNSIGNED
:
28871 case UINT64_FTYPE_VOID
:
28872 case UNSIGNED_FTYPE_VOID
:
28877 case UINT64_FTYPE_PUNSIGNED
:
28878 case V2DI_FTYPE_PV2DI
:
28879 case V4DI_FTYPE_PV4DI
:
28880 case V32QI_FTYPE_PCCHAR
:
28881 case V16QI_FTYPE_PCCHAR
:
28882 case V8SF_FTYPE_PCV4SF
:
28883 case V8SF_FTYPE_PCFLOAT
:
28884 case V4SF_FTYPE_PCFLOAT
:
28885 case V4DF_FTYPE_PCV2DF
:
28886 case V4DF_FTYPE_PCDOUBLE
:
28887 case V2DF_FTYPE_PCDOUBLE
:
28888 case VOID_FTYPE_PVOID
:
28893 case VOID_FTYPE_PV2SF_V4SF
:
28894 case VOID_FTYPE_PV4DI_V4DI
:
28895 case VOID_FTYPE_PV2DI_V2DI
:
28896 case VOID_FTYPE_PCHAR_V32QI
:
28897 case VOID_FTYPE_PCHAR_V16QI
:
28898 case VOID_FTYPE_PFLOAT_V8SF
:
28899 case VOID_FTYPE_PFLOAT_V4SF
:
28900 case VOID_FTYPE_PDOUBLE_V4DF
:
28901 case VOID_FTYPE_PDOUBLE_V2DF
:
28902 case VOID_FTYPE_PLONGLONG_LONGLONG
:
28903 case VOID_FTYPE_PULONGLONG_ULONGLONG
:
28904 case VOID_FTYPE_PINT_INT
:
28907 /* Reserve memory operand for target. */
28908 memory
= ARRAY_SIZE (args
);
28910 case V4SF_FTYPE_V4SF_PCV2SF
:
28911 case V2DF_FTYPE_V2DF_PCDOUBLE
:
28916 case V8SF_FTYPE_PCV8SF_V8SI
:
28917 case V4DF_FTYPE_PCV4DF_V4DI
:
28918 case V4SF_FTYPE_PCV4SF_V4SI
:
28919 case V2DF_FTYPE_PCV2DF_V2DI
:
28920 case V8SI_FTYPE_PCV8SI_V8SI
:
28921 case V4DI_FTYPE_PCV4DI_V4DI
:
28922 case V4SI_FTYPE_PCV4SI_V4SI
:
28923 case V2DI_FTYPE_PCV2DI_V2DI
:
28928 case VOID_FTYPE_PV8SF_V8SI_V8SF
:
28929 case VOID_FTYPE_PV4DF_V4DI_V4DF
:
28930 case VOID_FTYPE_PV4SF_V4SI_V4SF
:
28931 case VOID_FTYPE_PV2DF_V2DI_V2DF
:
28932 case VOID_FTYPE_PV8SI_V8SI_V8SI
:
28933 case VOID_FTYPE_PV4DI_V4DI_V4DI
:
28934 case VOID_FTYPE_PV4SI_V4SI_V4SI
:
28935 case VOID_FTYPE_PV2DI_V2DI_V2DI
:
28938 /* Reserve memory operand for target. */
28939 memory
= ARRAY_SIZE (args
);
28941 case VOID_FTYPE_UINT_UINT_UINT
:
28942 case VOID_FTYPE_UINT64_UINT_UINT
:
28943 case UCHAR_FTYPE_UINT_UINT_UINT
:
28944 case UCHAR_FTYPE_UINT64_UINT_UINT
:
28947 memory
= ARRAY_SIZE (args
);
28948 last_arg_constant
= true;
28951 gcc_unreachable ();
28954 gcc_assert (nargs
<= ARRAY_SIZE (args
));
28956 if (klass
== store
)
28958 arg
= CALL_EXPR_ARG (exp
, 0);
28959 op
= expand_normal (arg
);
28960 gcc_assert (target
== 0);
28963 if (GET_MODE (op
) != Pmode
)
28964 op
= convert_to_mode (Pmode
, op
, 1);
28965 target
= gen_rtx_MEM (tmode
, force_reg (Pmode
, op
));
28968 target
= force_reg (tmode
, op
);
28976 || GET_MODE (target
) != tmode
28977 || !insn_p
->operand
[0].predicate (target
, tmode
))
28978 target
= gen_reg_rtx (tmode
);
28981 for (i
= 0; i
< nargs
; i
++)
28983 enum machine_mode mode
= insn_p
->operand
[i
+ 1].mode
;
28986 arg
= CALL_EXPR_ARG (exp
, i
+ arg_adjust
);
28987 op
= expand_normal (arg
);
28988 match
= insn_p
->operand
[i
+ 1].predicate (op
, mode
);
28990 if (last_arg_constant
&& (i
+ 1) == nargs
)
28994 if (icode
== CODE_FOR_lwp_lwpvalsi3
28995 || icode
== CODE_FOR_lwp_lwpinssi3
28996 || icode
== CODE_FOR_lwp_lwpvaldi3
28997 || icode
== CODE_FOR_lwp_lwpinsdi3
)
28998 error ("the last argument must be a 32-bit immediate");
29000 error ("the last argument must be an 8-bit immediate");
29008 /* This must be the memory operand. */
29009 if (GET_MODE (op
) != Pmode
)
29010 op
= convert_to_mode (Pmode
, op
, 1);
29011 op
= gen_rtx_MEM (mode
, force_reg (Pmode
, op
));
29012 gcc_assert (GET_MODE (op
) == mode
29013 || GET_MODE (op
) == VOIDmode
);
29017 /* This must be register. */
29018 if (VECTOR_MODE_P (mode
))
29019 op
= safe_vector_operand (op
, mode
);
29021 gcc_assert (GET_MODE (op
) == mode
29022 || GET_MODE (op
) == VOIDmode
);
29023 op
= copy_to_mode_reg (mode
, op
);
29028 args
[i
].mode
= mode
;
29034 pat
= GEN_FCN (icode
) (target
);
29037 pat
= GEN_FCN (icode
) (target
, args
[0].op
);
29040 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
);
29043 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
);
29046 gcc_unreachable ();
29052 return klass
== store
? 0 : target
;
29055 /* Return the integer constant in ARG. Constrain it to be in the range
29056 of the subparts of VEC_TYPE; issue an error if not. */
29059 get_element_number (tree vec_type
, tree arg
)
29061 unsigned HOST_WIDE_INT elt
, max
= TYPE_VECTOR_SUBPARTS (vec_type
) - 1;
29063 if (!host_integerp (arg
, 1)
29064 || (elt
= tree_low_cst (arg
, 1), elt
> max
))
29066 error ("selector must be an integer constant in the range 0..%wi", max
);
29073 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
29074 ix86_expand_vector_init. We DO have language-level syntax for this, in
29075 the form of (type){ init-list }. Except that since we can't place emms
29076 instructions from inside the compiler, we can't allow the use of MMX
29077 registers unless the user explicitly asks for it. So we do *not* define
29078 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
29079 we have builtins invoked by mmintrin.h that gives us license to emit
29080 these sorts of instructions. */
29083 ix86_expand_vec_init_builtin (tree type
, tree exp
, rtx target
)
29085 enum machine_mode tmode
= TYPE_MODE (type
);
29086 enum machine_mode inner_mode
= GET_MODE_INNER (tmode
);
29087 int i
, n_elt
= GET_MODE_NUNITS (tmode
);
29088 rtvec v
= rtvec_alloc (n_elt
);
29090 gcc_assert (VECTOR_MODE_P (tmode
));
29091 gcc_assert (call_expr_nargs (exp
) == n_elt
);
29093 for (i
= 0; i
< n_elt
; ++i
)
29095 rtx x
= expand_normal (CALL_EXPR_ARG (exp
, i
));
29096 RTVEC_ELT (v
, i
) = gen_lowpart (inner_mode
, x
);
29099 if (!target
|| !register_operand (target
, tmode
))
29100 target
= gen_reg_rtx (tmode
);
29102 ix86_expand_vector_init (true, target
, gen_rtx_PARALLEL (tmode
, v
));
29106 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
29107 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
29108 had a language-level syntax for referencing vector elements. */
29111 ix86_expand_vec_ext_builtin (tree exp
, rtx target
)
29113 enum machine_mode tmode
, mode0
;
29118 arg0
= CALL_EXPR_ARG (exp
, 0);
29119 arg1
= CALL_EXPR_ARG (exp
, 1);
29121 op0
= expand_normal (arg0
);
29122 elt
= get_element_number (TREE_TYPE (arg0
), arg1
);
29124 tmode
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
29125 mode0
= TYPE_MODE (TREE_TYPE (arg0
));
29126 gcc_assert (VECTOR_MODE_P (mode0
));
29128 op0
= force_reg (mode0
, op0
);
29130 if (optimize
|| !target
|| !register_operand (target
, tmode
))
29131 target
= gen_reg_rtx (tmode
);
29133 ix86_expand_vector_extract (true, target
, op0
, elt
);
29138 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
29139 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
29140 a language-level syntax for referencing vector elements. */
29143 ix86_expand_vec_set_builtin (tree exp
)
29145 enum machine_mode tmode
, mode1
;
29146 tree arg0
, arg1
, arg2
;
29148 rtx op0
, op1
, target
;
29150 arg0
= CALL_EXPR_ARG (exp
, 0);
29151 arg1
= CALL_EXPR_ARG (exp
, 1);
29152 arg2
= CALL_EXPR_ARG (exp
, 2);
29154 tmode
= TYPE_MODE (TREE_TYPE (arg0
));
29155 mode1
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
29156 gcc_assert (VECTOR_MODE_P (tmode
));
29158 op0
= expand_expr (arg0
, NULL_RTX
, tmode
, EXPAND_NORMAL
);
29159 op1
= expand_expr (arg1
, NULL_RTX
, mode1
, EXPAND_NORMAL
);
29160 elt
= get_element_number (TREE_TYPE (arg0
), arg2
);
29162 if (GET_MODE (op1
) != mode1
&& GET_MODE (op1
) != VOIDmode
)
29163 op1
= convert_modes (mode1
, GET_MODE (op1
), op1
, true);
29165 op0
= force_reg (tmode
, op0
);
29166 op1
= force_reg (mode1
, op1
);
29168 /* OP0 is the source of these builtin functions and shouldn't be
29169 modified. Create a copy, use it and return it as target. */
29170 target
= gen_reg_rtx (tmode
);
29171 emit_move_insn (target
, op0
);
29172 ix86_expand_vector_set (true, target
, op1
, elt
);
29177 /* Expand an expression EXP that calls a built-in function,
29178 with result going to TARGET if that's convenient
29179 (and in mode MODE if that's convenient).
29180 SUBTARGET may be used as the target for computing one of EXP's operands.
29181 IGNORE is nonzero if the value is to be ignored. */
29184 ix86_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
29185 enum machine_mode mode ATTRIBUTE_UNUSED
,
29186 int ignore ATTRIBUTE_UNUSED
)
29188 const struct builtin_description
*d
;
29190 enum insn_code icode
;
29191 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
29192 tree arg0
, arg1
, arg2
, arg3
, arg4
;
29193 rtx op0
, op1
, op2
, op3
, op4
, pat
;
29194 enum machine_mode mode0
, mode1
, mode2
, mode3
, mode4
;
29195 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
29197 /* Determine whether the builtin function is available under the current ISA.
29198 Originally the builtin was not created if it wasn't applicable to the
29199 current ISA based on the command line switches. With function specific
29200 options, we need to check in the context of the function making the call
29201 whether it is supported. */
29202 if (ix86_builtins_isa
[fcode
].isa
29203 && !(ix86_builtins_isa
[fcode
].isa
& ix86_isa_flags
))
29205 char *opts
= ix86_target_string (ix86_builtins_isa
[fcode
].isa
, 0, NULL
,
29206 NULL
, (enum fpmath_unit
) 0, false);
29209 error ("%qE needs unknown isa option", fndecl
);
29212 gcc_assert (opts
!= NULL
);
29213 error ("%qE needs isa option %s", fndecl
, opts
);
29221 case IX86_BUILTIN_MASKMOVQ
:
29222 case IX86_BUILTIN_MASKMOVDQU
:
29223 icode
= (fcode
== IX86_BUILTIN_MASKMOVQ
29224 ? CODE_FOR_mmx_maskmovq
29225 : CODE_FOR_sse2_maskmovdqu
);
29226 /* Note the arg order is different from the operand order. */
29227 arg1
= CALL_EXPR_ARG (exp
, 0);
29228 arg2
= CALL_EXPR_ARG (exp
, 1);
29229 arg0
= CALL_EXPR_ARG (exp
, 2);
29230 op0
= expand_normal (arg0
);
29231 op1
= expand_normal (arg1
);
29232 op2
= expand_normal (arg2
);
29233 mode0
= insn_data
[icode
].operand
[0].mode
;
29234 mode1
= insn_data
[icode
].operand
[1].mode
;
29235 mode2
= insn_data
[icode
].operand
[2].mode
;
29237 if (GET_MODE (op0
) != Pmode
)
29238 op0
= convert_to_mode (Pmode
, op0
, 1);
29239 op0
= gen_rtx_MEM (mode1
, force_reg (Pmode
, op0
));
29241 if (!insn_data
[icode
].operand
[0].predicate (op0
, mode0
))
29242 op0
= copy_to_mode_reg (mode0
, op0
);
29243 if (!insn_data
[icode
].operand
[1].predicate (op1
, mode1
))
29244 op1
= copy_to_mode_reg (mode1
, op1
);
29245 if (!insn_data
[icode
].operand
[2].predicate (op2
, mode2
))
29246 op2
= copy_to_mode_reg (mode2
, op2
);
29247 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
29253 case IX86_BUILTIN_LDMXCSR
:
29254 op0
= expand_normal (CALL_EXPR_ARG (exp
, 0));
29255 target
= assign_386_stack_local (SImode
, SLOT_VIRTUAL
);
29256 emit_move_insn (target
, op0
);
29257 emit_insn (gen_sse_ldmxcsr (target
));
29260 case IX86_BUILTIN_STMXCSR
:
29261 target
= assign_386_stack_local (SImode
, SLOT_VIRTUAL
);
29262 emit_insn (gen_sse_stmxcsr (target
));
29263 return copy_to_mode_reg (SImode
, target
);
29265 case IX86_BUILTIN_CLFLUSH
:
29266 arg0
= CALL_EXPR_ARG (exp
, 0);
29267 op0
= expand_normal (arg0
);
29268 icode
= CODE_FOR_sse2_clflush
;
29269 if (!insn_data
[icode
].operand
[0].predicate (op0
, Pmode
))
29271 if (GET_MODE (op0
) != Pmode
)
29272 op0
= convert_to_mode (Pmode
, op0
, 1);
29273 op0
= force_reg (Pmode
, op0
);
29276 emit_insn (gen_sse2_clflush (op0
));
29279 case IX86_BUILTIN_MONITOR
:
29280 arg0
= CALL_EXPR_ARG (exp
, 0);
29281 arg1
= CALL_EXPR_ARG (exp
, 1);
29282 arg2
= CALL_EXPR_ARG (exp
, 2);
29283 op0
= expand_normal (arg0
);
29284 op1
= expand_normal (arg1
);
29285 op2
= expand_normal (arg2
);
29288 if (GET_MODE (op0
) != Pmode
)
29289 op0
= convert_to_mode (Pmode
, op0
, 1);
29290 op0
= force_reg (Pmode
, op0
);
29293 op1
= copy_to_mode_reg (SImode
, op1
);
29295 op2
= copy_to_mode_reg (SImode
, op2
);
29296 emit_insn (ix86_gen_monitor (op0
, op1
, op2
));
29299 case IX86_BUILTIN_MWAIT
:
29300 arg0
= CALL_EXPR_ARG (exp
, 0);
29301 arg1
= CALL_EXPR_ARG (exp
, 1);
29302 op0
= expand_normal (arg0
);
29303 op1
= expand_normal (arg1
);
29305 op0
= copy_to_mode_reg (SImode
, op0
);
29307 op1
= copy_to_mode_reg (SImode
, op1
);
29308 emit_insn (gen_sse3_mwait (op0
, op1
));
29311 case IX86_BUILTIN_VEC_INIT_V2SI
:
29312 case IX86_BUILTIN_VEC_INIT_V4HI
:
29313 case IX86_BUILTIN_VEC_INIT_V8QI
:
29314 return ix86_expand_vec_init_builtin (TREE_TYPE (exp
), exp
, target
);
29316 case IX86_BUILTIN_VEC_EXT_V2DF
:
29317 case IX86_BUILTIN_VEC_EXT_V2DI
:
29318 case IX86_BUILTIN_VEC_EXT_V4SF
:
29319 case IX86_BUILTIN_VEC_EXT_V4SI
:
29320 case IX86_BUILTIN_VEC_EXT_V8HI
:
29321 case IX86_BUILTIN_VEC_EXT_V2SI
:
29322 case IX86_BUILTIN_VEC_EXT_V4HI
:
29323 case IX86_BUILTIN_VEC_EXT_V16QI
:
29324 return ix86_expand_vec_ext_builtin (exp
, target
);
29326 case IX86_BUILTIN_VEC_SET_V2DI
:
29327 case IX86_BUILTIN_VEC_SET_V4SF
:
29328 case IX86_BUILTIN_VEC_SET_V4SI
:
29329 case IX86_BUILTIN_VEC_SET_V8HI
:
29330 case IX86_BUILTIN_VEC_SET_V4HI
:
29331 case IX86_BUILTIN_VEC_SET_V16QI
:
29332 return ix86_expand_vec_set_builtin (exp
);
29334 case IX86_BUILTIN_INFQ
:
29335 case IX86_BUILTIN_HUGE_VALQ
:
29337 REAL_VALUE_TYPE inf
;
29341 tmp
= CONST_DOUBLE_FROM_REAL_VALUE (inf
, mode
);
29343 tmp
= validize_mem (force_const_mem (mode
, tmp
));
29346 target
= gen_reg_rtx (mode
);
29348 emit_move_insn (target
, tmp
);
29352 case IX86_BUILTIN_LLWPCB
:
29353 arg0
= CALL_EXPR_ARG (exp
, 0);
29354 op0
= expand_normal (arg0
);
29355 icode
= CODE_FOR_lwp_llwpcb
;
29356 if (!insn_data
[icode
].operand
[0].predicate (op0
, Pmode
))
29358 if (GET_MODE (op0
) != Pmode
)
29359 op0
= convert_to_mode (Pmode
, op0
, 1);
29360 op0
= force_reg (Pmode
, op0
);
29362 emit_insn (gen_lwp_llwpcb (op0
));
29365 case IX86_BUILTIN_SLWPCB
:
29366 icode
= CODE_FOR_lwp_slwpcb
;
29368 || !insn_data
[icode
].operand
[0].predicate (target
, Pmode
))
29369 target
= gen_reg_rtx (Pmode
);
29370 emit_insn (gen_lwp_slwpcb (target
));
29373 case IX86_BUILTIN_BEXTRI32
:
29374 case IX86_BUILTIN_BEXTRI64
:
29375 arg0
= CALL_EXPR_ARG (exp
, 0);
29376 arg1
= CALL_EXPR_ARG (exp
, 1);
29377 op0
= expand_normal (arg0
);
29378 op1
= expand_normal (arg1
);
29379 icode
= (fcode
== IX86_BUILTIN_BEXTRI32
29380 ? CODE_FOR_tbm_bextri_si
29381 : CODE_FOR_tbm_bextri_di
);
29382 if (!CONST_INT_P (op1
))
29384 error ("last argument must be an immediate");
29389 unsigned char length
= (INTVAL (op1
) >> 8) & 0xFF;
29390 unsigned char lsb_index
= INTVAL (op1
) & 0xFF;
29391 op1
= GEN_INT (length
);
29392 op2
= GEN_INT (lsb_index
);
29393 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
29399 case IX86_BUILTIN_RDRAND16_STEP
:
29400 icode
= CODE_FOR_rdrandhi_1
;
29404 case IX86_BUILTIN_RDRAND32_STEP
:
29405 icode
= CODE_FOR_rdrandsi_1
;
29409 case IX86_BUILTIN_RDRAND64_STEP
:
29410 icode
= CODE_FOR_rdranddi_1
;
29414 op0
= gen_reg_rtx (mode0
);
29415 emit_insn (GEN_FCN (icode
) (op0
));
29417 arg0
= CALL_EXPR_ARG (exp
, 0);
29418 op1
= expand_normal (arg0
);
29419 if (!address_operand (op1
, VOIDmode
))
29421 op1
= convert_memory_address (Pmode
, op1
);
29422 op1
= copy_addr_to_reg (op1
);
29424 emit_move_insn (gen_rtx_MEM (mode0
, op1
), op0
);
29426 op1
= gen_reg_rtx (SImode
);
29427 emit_move_insn (op1
, CONST1_RTX (SImode
));
29429 /* Emit SImode conditional move. */
29430 if (mode0
== HImode
)
29432 op2
= gen_reg_rtx (SImode
);
29433 emit_insn (gen_zero_extendhisi2 (op2
, op0
));
29435 else if (mode0
== SImode
)
29438 op2
= gen_rtx_SUBREG (SImode
, op0
, 0);
29441 target
= gen_reg_rtx (SImode
);
29443 pat
= gen_rtx_GEU (VOIDmode
, gen_rtx_REG (CCCmode
, FLAGS_REG
),
29445 emit_insn (gen_rtx_SET (VOIDmode
, target
,
29446 gen_rtx_IF_THEN_ELSE (SImode
, pat
, op2
, op1
)));
29449 case IX86_BUILTIN_GATHERSIV2DF
:
29450 icode
= CODE_FOR_avx2_gathersiv2df
;
29452 case IX86_BUILTIN_GATHERSIV4DF
:
29453 icode
= CODE_FOR_avx2_gathersiv4df
;
29455 case IX86_BUILTIN_GATHERDIV2DF
:
29456 icode
= CODE_FOR_avx2_gatherdiv2df
;
29458 case IX86_BUILTIN_GATHERDIV4DF
:
29459 icode
= CODE_FOR_avx2_gatherdiv4df
;
29461 case IX86_BUILTIN_GATHERSIV4SF
:
29462 icode
= CODE_FOR_avx2_gathersiv4sf
;
29464 case IX86_BUILTIN_GATHERSIV8SF
:
29465 icode
= CODE_FOR_avx2_gathersiv8sf
;
29467 case IX86_BUILTIN_GATHERDIV4SF
:
29468 icode
= CODE_FOR_avx2_gatherdiv4sf
;
29470 case IX86_BUILTIN_GATHERDIV8SF
:
29471 icode
= CODE_FOR_avx2_gatherdiv8sf
;
29473 case IX86_BUILTIN_GATHERSIV2DI
:
29474 icode
= CODE_FOR_avx2_gathersiv2di
;
29476 case IX86_BUILTIN_GATHERSIV4DI
:
29477 icode
= CODE_FOR_avx2_gathersiv4di
;
29479 case IX86_BUILTIN_GATHERDIV2DI
:
29480 icode
= CODE_FOR_avx2_gatherdiv2di
;
29482 case IX86_BUILTIN_GATHERDIV4DI
:
29483 icode
= CODE_FOR_avx2_gatherdiv4di
;
29485 case IX86_BUILTIN_GATHERSIV4SI
:
29486 icode
= CODE_FOR_avx2_gathersiv4si
;
29488 case IX86_BUILTIN_GATHERSIV8SI
:
29489 icode
= CODE_FOR_avx2_gathersiv8si
;
29491 case IX86_BUILTIN_GATHERDIV4SI
:
29492 icode
= CODE_FOR_avx2_gatherdiv4si
;
29494 case IX86_BUILTIN_GATHERDIV8SI
:
29495 icode
= CODE_FOR_avx2_gatherdiv8si
;
29497 case IX86_BUILTIN_GATHERALTSIV4DF
:
29498 icode
= CODE_FOR_avx2_gathersiv4df
;
29500 case IX86_BUILTIN_GATHERALTDIV8SF
:
29501 icode
= CODE_FOR_avx2_gatherdiv8sf
;
29503 case IX86_BUILTIN_GATHERALTSIV4DI
:
29504 icode
= CODE_FOR_avx2_gathersiv4di
;
29506 case IX86_BUILTIN_GATHERALTDIV8SI
:
29507 icode
= CODE_FOR_avx2_gatherdiv8si
;
29511 arg0
= CALL_EXPR_ARG (exp
, 0);
29512 arg1
= CALL_EXPR_ARG (exp
, 1);
29513 arg2
= CALL_EXPR_ARG (exp
, 2);
29514 arg3
= CALL_EXPR_ARG (exp
, 3);
29515 arg4
= CALL_EXPR_ARG (exp
, 4);
29516 op0
= expand_normal (arg0
);
29517 op1
= expand_normal (arg1
);
29518 op2
= expand_normal (arg2
);
29519 op3
= expand_normal (arg3
);
29520 op4
= expand_normal (arg4
);
29521 /* Note the arg order is different from the operand order. */
29522 mode0
= insn_data
[icode
].operand
[1].mode
;
29523 mode2
= insn_data
[icode
].operand
[3].mode
;
29524 mode3
= insn_data
[icode
].operand
[4].mode
;
29525 mode4
= insn_data
[icode
].operand
[5].mode
;
29527 if (target
== NULL_RTX
29528 || GET_MODE (target
) != insn_data
[icode
].operand
[0].mode
)
29529 subtarget
= gen_reg_rtx (insn_data
[icode
].operand
[0].mode
);
29531 subtarget
= target
;
29533 if (fcode
== IX86_BUILTIN_GATHERALTSIV4DF
29534 || fcode
== IX86_BUILTIN_GATHERALTSIV4DI
)
29536 rtx half
= gen_reg_rtx (V4SImode
);
29537 if (!nonimmediate_operand (op2
, V8SImode
))
29538 op2
= copy_to_mode_reg (V8SImode
, op2
);
29539 emit_insn (gen_vec_extract_lo_v8si (half
, op2
));
29542 else if (fcode
== IX86_BUILTIN_GATHERALTDIV8SF
29543 || fcode
== IX86_BUILTIN_GATHERALTDIV8SI
)
29545 rtx (*gen
) (rtx
, rtx
);
29546 rtx half
= gen_reg_rtx (mode0
);
29547 if (mode0
== V4SFmode
)
29548 gen
= gen_vec_extract_lo_v8sf
;
29550 gen
= gen_vec_extract_lo_v8si
;
29551 if (!nonimmediate_operand (op0
, GET_MODE (op0
)))
29552 op0
= copy_to_mode_reg (GET_MODE (op0
), op0
);
29553 emit_insn (gen (half
, op0
));
29555 if (!nonimmediate_operand (op3
, GET_MODE (op3
)))
29556 op3
= copy_to_mode_reg (GET_MODE (op3
), op3
);
29557 emit_insn (gen (half
, op3
));
29561 /* Force memory operand only with base register here. But we
29562 don't want to do it on memory operand for other builtin
29564 if (GET_MODE (op1
) != Pmode
)
29565 op1
= convert_to_mode (Pmode
, op1
, 1);
29566 op1
= force_reg (Pmode
, op1
);
29568 if (!insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
29569 op0
= copy_to_mode_reg (mode0
, op0
);
29570 if (!insn_data
[icode
].operand
[2].predicate (op1
, Pmode
))
29571 op1
= copy_to_mode_reg (Pmode
, op1
);
29572 if (!insn_data
[icode
].operand
[3].predicate (op2
, mode2
))
29573 op2
= copy_to_mode_reg (mode2
, op2
);
29574 if (!insn_data
[icode
].operand
[4].predicate (op3
, mode3
))
29575 op3
= copy_to_mode_reg (mode3
, op3
);
29576 if (!insn_data
[icode
].operand
[5].predicate (op4
, mode4
))
29578 error ("last argument must be scale 1, 2, 4, 8");
29582 /* Optimize. If mask is known to have all high bits set,
29583 replace op0 with pc_rtx to signal that the instruction
29584 overwrites the whole destination and doesn't use its
29585 previous contents. */
29588 if (TREE_CODE (arg3
) == VECTOR_CST
)
29591 unsigned int negative
= 0;
29592 for (elt
= TREE_VECTOR_CST_ELTS (arg3
);
29593 elt
; elt
= TREE_CHAIN (elt
))
29595 tree cst
= TREE_VALUE (elt
);
29596 if (TREE_CODE (cst
) == INTEGER_CST
29597 && tree_int_cst_sign_bit (cst
))
29599 else if (TREE_CODE (cst
) == REAL_CST
29600 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst
)))
29603 if (negative
== TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3
)))
29606 else if (TREE_CODE (arg3
) == SSA_NAME
)
29608 /* Recognize also when mask is like:
29609 __v2df src = _mm_setzero_pd ();
29610 __v2df mask = _mm_cmpeq_pd (src, src);
29612 __v8sf src = _mm256_setzero_ps ();
29613 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
29614 as that is a cheaper way to load all ones into
29615 a register than having to load a constant from
29617 gimple def_stmt
= SSA_NAME_DEF_STMT (arg3
);
29618 if (is_gimple_call (def_stmt
))
29620 tree fndecl
= gimple_call_fndecl (def_stmt
);
29622 && DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_MD
)
29623 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl
))
29625 case IX86_BUILTIN_CMPPD
:
29626 case IX86_BUILTIN_CMPPS
:
29627 case IX86_BUILTIN_CMPPD256
:
29628 case IX86_BUILTIN_CMPPS256
:
29629 if (!integer_zerop (gimple_call_arg (def_stmt
, 2)))
29632 case IX86_BUILTIN_CMPEQPD
:
29633 case IX86_BUILTIN_CMPEQPS
:
29634 if (initializer_zerop (gimple_call_arg (def_stmt
, 0))
29635 && initializer_zerop (gimple_call_arg (def_stmt
,
29646 pat
= GEN_FCN (icode
) (subtarget
, op0
, op1
, op2
, op3
, op4
);
29651 if (fcode
== IX86_BUILTIN_GATHERDIV8SF
29652 || fcode
== IX86_BUILTIN_GATHERDIV8SI
)
29654 enum machine_mode tmode
= GET_MODE (subtarget
) == V8SFmode
29655 ? V4SFmode
: V4SImode
;
29656 if (target
== NULL_RTX
)
29657 target
= gen_reg_rtx (tmode
);
29658 if (tmode
== V4SFmode
)
29659 emit_insn (gen_vec_extract_lo_v8sf (target
, subtarget
));
29661 emit_insn (gen_vec_extract_lo_v8si (target
, subtarget
));
29664 target
= subtarget
;
29672 for (i
= 0, d
= bdesc_special_args
;
29673 i
< ARRAY_SIZE (bdesc_special_args
);
29675 if (d
->code
== fcode
)
29676 return ix86_expand_special_args_builtin (d
, exp
, target
);
29678 for (i
= 0, d
= bdesc_args
;
29679 i
< ARRAY_SIZE (bdesc_args
);
29681 if (d
->code
== fcode
)
29684 case IX86_BUILTIN_FABSQ
:
29685 case IX86_BUILTIN_COPYSIGNQ
:
29687 /* Emit a normal call if SSE2 isn't available. */
29688 return expand_call (exp
, target
, ignore
);
29690 return ix86_expand_args_builtin (d
, exp
, target
);
29693 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
29694 if (d
->code
== fcode
)
29695 return ix86_expand_sse_comi (d
, exp
, target
);
29697 for (i
= 0, d
= bdesc_pcmpestr
;
29698 i
< ARRAY_SIZE (bdesc_pcmpestr
);
29700 if (d
->code
== fcode
)
29701 return ix86_expand_sse_pcmpestr (d
, exp
, target
);
29703 for (i
= 0, d
= bdesc_pcmpistr
;
29704 i
< ARRAY_SIZE (bdesc_pcmpistr
);
29706 if (d
->code
== fcode
)
29707 return ix86_expand_sse_pcmpistr (d
, exp
, target
);
29709 for (i
= 0, d
= bdesc_multi_arg
; i
< ARRAY_SIZE (bdesc_multi_arg
); i
++, d
++)
29710 if (d
->code
== fcode
)
29711 return ix86_expand_multi_arg_builtin (d
->icode
, exp
, target
,
29712 (enum ix86_builtin_func_type
)
29713 d
->flag
, d
->comparison
);
29715 gcc_unreachable ();
29718 /* Returns a function decl for a vectorized version of the builtin function
29719 with builtin function code FN and the result vector type TYPE, or NULL_TREE
29720 if it is not available. */
29723 ix86_builtin_vectorized_function (tree fndecl
, tree type_out
,
29726 enum machine_mode in_mode
, out_mode
;
29728 enum built_in_function fn
= DECL_FUNCTION_CODE (fndecl
);
29730 if (TREE_CODE (type_out
) != VECTOR_TYPE
29731 || TREE_CODE (type_in
) != VECTOR_TYPE
29732 || DECL_BUILT_IN_CLASS (fndecl
) != BUILT_IN_NORMAL
)
29735 out_mode
= TYPE_MODE (TREE_TYPE (type_out
));
29736 out_n
= TYPE_VECTOR_SUBPARTS (type_out
);
29737 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
29738 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
29742 case BUILT_IN_SQRT
:
29743 if (out_mode
== DFmode
&& in_mode
== DFmode
)
29745 if (out_n
== 2 && in_n
== 2)
29746 return ix86_builtins
[IX86_BUILTIN_SQRTPD
];
29747 else if (out_n
== 4 && in_n
== 4)
29748 return ix86_builtins
[IX86_BUILTIN_SQRTPD256
];
29752 case BUILT_IN_SQRTF
:
29753 if (out_mode
== SFmode
&& in_mode
== SFmode
)
29755 if (out_n
== 4 && in_n
== 4)
29756 return ix86_builtins
[IX86_BUILTIN_SQRTPS_NR
];
29757 else if (out_n
== 8 && in_n
== 8)
29758 return ix86_builtins
[IX86_BUILTIN_SQRTPS_NR256
];
29762 case BUILT_IN_IFLOOR
:
29763 case BUILT_IN_LFLOOR
:
29764 case BUILT_IN_LLFLOOR
:
29765 /* The round insn does not trap on denormals. */
29766 if (flag_trapping_math
|| !TARGET_ROUND
)
29769 if (out_mode
== SImode
&& in_mode
== DFmode
)
29771 if (out_n
== 4 && in_n
== 2)
29772 return ix86_builtins
[IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX
];
29773 else if (out_n
== 8 && in_n
== 4)
29774 return ix86_builtins
[IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256
];
29778 case BUILT_IN_IFLOORF
:
29779 case BUILT_IN_LFLOORF
:
29780 case BUILT_IN_LLFLOORF
:
29781 /* The round insn does not trap on denormals. */
29782 if (flag_trapping_math
|| !TARGET_ROUND
)
29785 if (out_mode
== SImode
&& in_mode
== SFmode
)
29787 if (out_n
== 4 && in_n
== 4)
29788 return ix86_builtins
[IX86_BUILTIN_FLOORPS_SFIX
];
29789 else if (out_n
== 8 && in_n
== 8)
29790 return ix86_builtins
[IX86_BUILTIN_FLOORPS_SFIX256
];
29794 case BUILT_IN_ICEIL
:
29795 case BUILT_IN_LCEIL
:
29796 case BUILT_IN_LLCEIL
:
29797 /* The round insn does not trap on denormals. */
29798 if (flag_trapping_math
|| !TARGET_ROUND
)
29801 if (out_mode
== SImode
&& in_mode
== DFmode
)
29803 if (out_n
== 4 && in_n
== 2)
29804 return ix86_builtins
[IX86_BUILTIN_CEILPD_VEC_PACK_SFIX
];
29805 else if (out_n
== 8 && in_n
== 4)
29806 return ix86_builtins
[IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256
];
29810 case BUILT_IN_ICEILF
:
29811 case BUILT_IN_LCEILF
:
29812 case BUILT_IN_LLCEILF
:
29813 /* The round insn does not trap on denormals. */
29814 if (flag_trapping_math
|| !TARGET_ROUND
)
29817 if (out_mode
== SImode
&& in_mode
== SFmode
)
29819 if (out_n
== 4 && in_n
== 4)
29820 return ix86_builtins
[IX86_BUILTIN_CEILPS_SFIX
];
29821 else if (out_n
== 8 && in_n
== 8)
29822 return ix86_builtins
[IX86_BUILTIN_CEILPS_SFIX256
];
29826 case BUILT_IN_IRINT
:
29827 case BUILT_IN_LRINT
:
29828 case BUILT_IN_LLRINT
:
29829 if (out_mode
== SImode
&& in_mode
== DFmode
)
29831 if (out_n
== 4 && in_n
== 2)
29832 return ix86_builtins
[IX86_BUILTIN_VEC_PACK_SFIX
];
29833 else if (out_n
== 8 && in_n
== 4)
29834 return ix86_builtins
[IX86_BUILTIN_VEC_PACK_SFIX256
];
29838 case BUILT_IN_IRINTF
:
29839 case BUILT_IN_LRINTF
:
29840 case BUILT_IN_LLRINTF
:
29841 if (out_mode
== SImode
&& in_mode
== SFmode
)
29843 if (out_n
== 4 && in_n
== 4)
29844 return ix86_builtins
[IX86_BUILTIN_CVTPS2DQ
];
29845 else if (out_n
== 8 && in_n
== 8)
29846 return ix86_builtins
[IX86_BUILTIN_CVTPS2DQ256
];
29850 case BUILT_IN_IROUND
:
29851 case BUILT_IN_LROUND
:
29852 case BUILT_IN_LLROUND
:
29853 /* The round insn does not trap on denormals. */
29854 if (flag_trapping_math
|| !TARGET_ROUND
)
29857 if (out_mode
== SImode
&& in_mode
== DFmode
)
29859 if (out_n
== 4 && in_n
== 2)
29860 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX
];
29861 else if (out_n
== 8 && in_n
== 4)
29862 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256
];
29866 case BUILT_IN_IROUNDF
:
29867 case BUILT_IN_LROUNDF
:
29868 case BUILT_IN_LLROUNDF
:
29869 /* The round insn does not trap on denormals. */
29870 if (flag_trapping_math
|| !TARGET_ROUND
)
29873 if (out_mode
== SImode
&& in_mode
== SFmode
)
29875 if (out_n
== 4 && in_n
== 4)
29876 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ_SFIX
];
29877 else if (out_n
== 8 && in_n
== 8)
29878 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ_SFIX256
];
29882 case BUILT_IN_COPYSIGN
:
29883 if (out_mode
== DFmode
&& in_mode
== DFmode
)
29885 if (out_n
== 2 && in_n
== 2)
29886 return ix86_builtins
[IX86_BUILTIN_CPYSGNPD
];
29887 else if (out_n
== 4 && in_n
== 4)
29888 return ix86_builtins
[IX86_BUILTIN_CPYSGNPD256
];
29892 case BUILT_IN_COPYSIGNF
:
29893 if (out_mode
== SFmode
&& in_mode
== SFmode
)
29895 if (out_n
== 4 && in_n
== 4)
29896 return ix86_builtins
[IX86_BUILTIN_CPYSGNPS
];
29897 else if (out_n
== 8 && in_n
== 8)
29898 return ix86_builtins
[IX86_BUILTIN_CPYSGNPS256
];
29902 case BUILT_IN_FLOOR
:
29903 /* The round insn does not trap on denormals. */
29904 if (flag_trapping_math
|| !TARGET_ROUND
)
29907 if (out_mode
== DFmode
&& in_mode
== DFmode
)
29909 if (out_n
== 2 && in_n
== 2)
29910 return ix86_builtins
[IX86_BUILTIN_FLOORPD
];
29911 else if (out_n
== 4 && in_n
== 4)
29912 return ix86_builtins
[IX86_BUILTIN_FLOORPD256
];
29916 case BUILT_IN_FLOORF
:
29917 /* The round insn does not trap on denormals. */
29918 if (flag_trapping_math
|| !TARGET_ROUND
)
29921 if (out_mode
== SFmode
&& in_mode
== SFmode
)
29923 if (out_n
== 4 && in_n
== 4)
29924 return ix86_builtins
[IX86_BUILTIN_FLOORPS
];
29925 else if (out_n
== 8 && in_n
== 8)
29926 return ix86_builtins
[IX86_BUILTIN_FLOORPS256
];
29930 case BUILT_IN_CEIL
:
29931 /* The round insn does not trap on denormals. */
29932 if (flag_trapping_math
|| !TARGET_ROUND
)
29935 if (out_mode
== DFmode
&& in_mode
== DFmode
)
29937 if (out_n
== 2 && in_n
== 2)
29938 return ix86_builtins
[IX86_BUILTIN_CEILPD
];
29939 else if (out_n
== 4 && in_n
== 4)
29940 return ix86_builtins
[IX86_BUILTIN_CEILPD256
];
29944 case BUILT_IN_CEILF
:
29945 /* The round insn does not trap on denormals. */
29946 if (flag_trapping_math
|| !TARGET_ROUND
)
29949 if (out_mode
== SFmode
&& in_mode
== SFmode
)
29951 if (out_n
== 4 && in_n
== 4)
29952 return ix86_builtins
[IX86_BUILTIN_CEILPS
];
29953 else if (out_n
== 8 && in_n
== 8)
29954 return ix86_builtins
[IX86_BUILTIN_CEILPS256
];
29958 case BUILT_IN_TRUNC
:
29959 /* The round insn does not trap on denormals. */
29960 if (flag_trapping_math
|| !TARGET_ROUND
)
29963 if (out_mode
== DFmode
&& in_mode
== DFmode
)
29965 if (out_n
== 2 && in_n
== 2)
29966 return ix86_builtins
[IX86_BUILTIN_TRUNCPD
];
29967 else if (out_n
== 4 && in_n
== 4)
29968 return ix86_builtins
[IX86_BUILTIN_TRUNCPD256
];
29972 case BUILT_IN_TRUNCF
:
29973 /* The round insn does not trap on denormals. */
29974 if (flag_trapping_math
|| !TARGET_ROUND
)
29977 if (out_mode
== SFmode
&& in_mode
== SFmode
)
29979 if (out_n
== 4 && in_n
== 4)
29980 return ix86_builtins
[IX86_BUILTIN_TRUNCPS
];
29981 else if (out_n
== 8 && in_n
== 8)
29982 return ix86_builtins
[IX86_BUILTIN_TRUNCPS256
];
29986 case BUILT_IN_RINT
:
29987 /* The round insn does not trap on denormals. */
29988 if (flag_trapping_math
|| !TARGET_ROUND
)
29991 if (out_mode
== DFmode
&& in_mode
== DFmode
)
29993 if (out_n
== 2 && in_n
== 2)
29994 return ix86_builtins
[IX86_BUILTIN_RINTPD
];
29995 else if (out_n
== 4 && in_n
== 4)
29996 return ix86_builtins
[IX86_BUILTIN_RINTPD256
];
30000 case BUILT_IN_RINTF
:
30001 /* The round insn does not trap on denormals. */
30002 if (flag_trapping_math
|| !TARGET_ROUND
)
30005 if (out_mode
== SFmode
&& in_mode
== SFmode
)
30007 if (out_n
== 4 && in_n
== 4)
30008 return ix86_builtins
[IX86_BUILTIN_RINTPS
];
30009 else if (out_n
== 8 && in_n
== 8)
30010 return ix86_builtins
[IX86_BUILTIN_RINTPS256
];
30014 case BUILT_IN_ROUND
:
30015 /* The round insn does not trap on denormals. */
30016 if (flag_trapping_math
|| !TARGET_ROUND
)
30019 if (out_mode
== DFmode
&& in_mode
== DFmode
)
30021 if (out_n
== 2 && in_n
== 2)
30022 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ
];
30023 else if (out_n
== 4 && in_n
== 4)
30024 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ256
];
30028 case BUILT_IN_ROUNDF
:
30029 /* The round insn does not trap on denormals. */
30030 if (flag_trapping_math
|| !TARGET_ROUND
)
30033 if (out_mode
== SFmode
&& in_mode
== SFmode
)
30035 if (out_n
== 4 && in_n
== 4)
30036 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ
];
30037 else if (out_n
== 8 && in_n
== 8)
30038 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ256
];
30043 if (out_mode
== DFmode
&& in_mode
== DFmode
)
30045 if (out_n
== 2 && in_n
== 2)
30046 return ix86_builtins
[IX86_BUILTIN_VFMADDPD
];
30047 if (out_n
== 4 && in_n
== 4)
30048 return ix86_builtins
[IX86_BUILTIN_VFMADDPD256
];
30052 case BUILT_IN_FMAF
:
30053 if (out_mode
== SFmode
&& in_mode
== SFmode
)
30055 if (out_n
== 4 && in_n
== 4)
30056 return ix86_builtins
[IX86_BUILTIN_VFMADDPS
];
30057 if (out_n
== 8 && in_n
== 8)
30058 return ix86_builtins
[IX86_BUILTIN_VFMADDPS256
];
30066 /* Dispatch to a handler for a vectorization library. */
30067 if (ix86_veclib_handler
)
30068 return ix86_veclib_handler ((enum built_in_function
) fn
, type_out
,
30074 /* Handler for an SVML-style interface to
30075 a library with vectorized intrinsics. */
30078 ix86_veclibabi_svml (enum built_in_function fn
, tree type_out
, tree type_in
)
30081 tree fntype
, new_fndecl
, args
;
30084 enum machine_mode el_mode
, in_mode
;
30087 /* The SVML is suitable for unsafe math only. */
30088 if (!flag_unsafe_math_optimizations
)
30091 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
30092 n
= TYPE_VECTOR_SUBPARTS (type_out
);
30093 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
30094 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
30095 if (el_mode
!= in_mode
30103 case BUILT_IN_LOG10
:
30105 case BUILT_IN_TANH
:
30107 case BUILT_IN_ATAN
:
30108 case BUILT_IN_ATAN2
:
30109 case BUILT_IN_ATANH
:
30110 case BUILT_IN_CBRT
:
30111 case BUILT_IN_SINH
:
30113 case BUILT_IN_ASINH
:
30114 case BUILT_IN_ASIN
:
30115 case BUILT_IN_COSH
:
30117 case BUILT_IN_ACOSH
:
30118 case BUILT_IN_ACOS
:
30119 if (el_mode
!= DFmode
|| n
!= 2)
30123 case BUILT_IN_EXPF
:
30124 case BUILT_IN_LOGF
:
30125 case BUILT_IN_LOG10F
:
30126 case BUILT_IN_POWF
:
30127 case BUILT_IN_TANHF
:
30128 case BUILT_IN_TANF
:
30129 case BUILT_IN_ATANF
:
30130 case BUILT_IN_ATAN2F
:
30131 case BUILT_IN_ATANHF
:
30132 case BUILT_IN_CBRTF
:
30133 case BUILT_IN_SINHF
:
30134 case BUILT_IN_SINF
:
30135 case BUILT_IN_ASINHF
:
30136 case BUILT_IN_ASINF
:
30137 case BUILT_IN_COSHF
:
30138 case BUILT_IN_COSF
:
30139 case BUILT_IN_ACOSHF
:
30140 case BUILT_IN_ACOSF
:
30141 if (el_mode
!= SFmode
|| n
!= 4)
30149 bname
= IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn
)));
30151 if (fn
== BUILT_IN_LOGF
)
30152 strcpy (name
, "vmlsLn4");
30153 else if (fn
== BUILT_IN_LOG
)
30154 strcpy (name
, "vmldLn2");
30157 sprintf (name
, "vmls%s", bname
+10);
30158 name
[strlen (name
)-1] = '4';
30161 sprintf (name
, "vmld%s2", bname
+10);
30163 /* Convert to uppercase. */
30167 for (args
= DECL_ARGUMENTS (builtin_decl_implicit (fn
));
30169 args
= TREE_CHAIN (args
))
30173 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
30175 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
30177 /* Build a function declaration for the vectorized function. */
30178 new_fndecl
= build_decl (BUILTINS_LOCATION
,
30179 FUNCTION_DECL
, get_identifier (name
), fntype
);
30180 TREE_PUBLIC (new_fndecl
) = 1;
30181 DECL_EXTERNAL (new_fndecl
) = 1;
30182 DECL_IS_NOVOPS (new_fndecl
) = 1;
30183 TREE_READONLY (new_fndecl
) = 1;
30188 /* Handler for an ACML-style interface to
30189 a library with vectorized intrinsics. */
30192 ix86_veclibabi_acml (enum built_in_function fn
, tree type_out
, tree type_in
)
30194 char name
[20] = "__vr.._";
30195 tree fntype
, new_fndecl
, args
;
30198 enum machine_mode el_mode
, in_mode
;
30201 /* The ACML is 64bits only and suitable for unsafe math only as
30202 it does not correctly support parts of IEEE with the required
30203 precision such as denormals. */
30205 || !flag_unsafe_math_optimizations
)
30208 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
30209 n
= TYPE_VECTOR_SUBPARTS (type_out
);
30210 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
30211 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
30212 if (el_mode
!= in_mode
30222 case BUILT_IN_LOG2
:
30223 case BUILT_IN_LOG10
:
30226 if (el_mode
!= DFmode
30231 case BUILT_IN_SINF
:
30232 case BUILT_IN_COSF
:
30233 case BUILT_IN_EXPF
:
30234 case BUILT_IN_POWF
:
30235 case BUILT_IN_LOGF
:
30236 case BUILT_IN_LOG2F
:
30237 case BUILT_IN_LOG10F
:
30240 if (el_mode
!= SFmode
30249 bname
= IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn
)));
30250 sprintf (name
+ 7, "%s", bname
+10);
30253 for (args
= DECL_ARGUMENTS (builtin_decl_implicit (fn
));
30255 args
= TREE_CHAIN (args
))
30259 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
30261 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
30263 /* Build a function declaration for the vectorized function. */
30264 new_fndecl
= build_decl (BUILTINS_LOCATION
,
30265 FUNCTION_DECL
, get_identifier (name
), fntype
);
30266 TREE_PUBLIC (new_fndecl
) = 1;
30267 DECL_EXTERNAL (new_fndecl
) = 1;
30268 DECL_IS_NOVOPS (new_fndecl
) = 1;
30269 TREE_READONLY (new_fndecl
) = 1;
30274 /* Returns a decl of a function that implements gather load with
30275 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
30276 Return NULL_TREE if it is not available. */
30279 ix86_vectorize_builtin_gather (const_tree mem_vectype
,
30280 const_tree index_type
, int scale
)
30283 enum ix86_builtins code
;
30288 if ((TREE_CODE (index_type
) != INTEGER_TYPE
30289 && !POINTER_TYPE_P (index_type
))
30290 || (TYPE_MODE (index_type
) != SImode
30291 && TYPE_MODE (index_type
) != DImode
))
30294 if (TYPE_PRECISION (index_type
) > POINTER_SIZE
)
30297 /* v*gather* insn sign extends index to pointer mode. */
30298 if (TYPE_PRECISION (index_type
) < POINTER_SIZE
30299 && TYPE_UNSIGNED (index_type
))
30304 || (scale
& (scale
- 1)) != 0)
30307 si
= TYPE_MODE (index_type
) == SImode
;
30308 switch (TYPE_MODE (mem_vectype
))
30311 code
= si
? IX86_BUILTIN_GATHERSIV2DF
: IX86_BUILTIN_GATHERDIV2DF
;
30314 code
= si
? IX86_BUILTIN_GATHERALTSIV4DF
: IX86_BUILTIN_GATHERDIV4DF
;
30317 code
= si
? IX86_BUILTIN_GATHERSIV2DI
: IX86_BUILTIN_GATHERDIV2DI
;
30320 code
= si
? IX86_BUILTIN_GATHERALTSIV4DI
: IX86_BUILTIN_GATHERDIV4DI
;
30323 code
= si
? IX86_BUILTIN_GATHERSIV4SF
: IX86_BUILTIN_GATHERDIV4SF
;
30326 code
= si
? IX86_BUILTIN_GATHERSIV8SF
: IX86_BUILTIN_GATHERALTDIV8SF
;
30329 code
= si
? IX86_BUILTIN_GATHERSIV4SI
: IX86_BUILTIN_GATHERDIV4SI
;
30332 code
= si
? IX86_BUILTIN_GATHERSIV8SI
: IX86_BUILTIN_GATHERALTDIV8SI
;
30338 return ix86_builtins
[code
];
30341 /* Returns a code for a target-specific builtin that implements
30342 reciprocal of the function, or NULL_TREE if not available. */
30345 ix86_builtin_reciprocal (unsigned int fn
, bool md_fn
,
30346 bool sqrt ATTRIBUTE_UNUSED
)
30348 if (! (TARGET_SSE_MATH
&& !optimize_insn_for_size_p ()
30349 && flag_finite_math_only
&& !flag_trapping_math
30350 && flag_unsafe_math_optimizations
))
30354 /* Machine dependent builtins. */
30357 /* Vectorized version of sqrt to rsqrt conversion. */
30358 case IX86_BUILTIN_SQRTPS_NR
:
30359 return ix86_builtins
[IX86_BUILTIN_RSQRTPS_NR
];
30361 case IX86_BUILTIN_SQRTPS_NR256
:
30362 return ix86_builtins
[IX86_BUILTIN_RSQRTPS_NR256
];
30368 /* Normal builtins. */
30371 /* Sqrt to rsqrt conversion. */
30372 case BUILT_IN_SQRTF
:
30373 return ix86_builtins
[IX86_BUILTIN_RSQRTF
];
30380 /* Helper for avx_vpermilps256_operand et al. This is also used by
30381 the expansion functions to turn the parallel back into a mask.
30382 The return value is 0 for no match and the imm8+1 for a match. */
30385 avx_vpermilp_parallel (rtx par
, enum machine_mode mode
)
30387 unsigned i
, nelt
= GET_MODE_NUNITS (mode
);
30389 unsigned char ipar
[8];
30391 if (XVECLEN (par
, 0) != (int) nelt
)
30394 /* Validate that all of the elements are constants, and not totally
30395 out of range. Copy the data into an integral array to make the
30396 subsequent checks easier. */
30397 for (i
= 0; i
< nelt
; ++i
)
30399 rtx er
= XVECEXP (par
, 0, i
);
30400 unsigned HOST_WIDE_INT ei
;
30402 if (!CONST_INT_P (er
))
30413 /* In the 256-bit DFmode case, we can only move elements within
30415 for (i
= 0; i
< 2; ++i
)
30419 mask
|= ipar
[i
] << i
;
30421 for (i
= 2; i
< 4; ++i
)
30425 mask
|= (ipar
[i
] - 2) << i
;
30430 /* In the 256-bit SFmode case, we have full freedom of movement
30431 within the low 128-bit lane, but the high 128-bit lane must
30432 mirror the exact same pattern. */
30433 for (i
= 0; i
< 4; ++i
)
30434 if (ipar
[i
] + 4 != ipar
[i
+ 4])
30441 /* In the 128-bit case, we've full freedom in the placement of
30442 the elements from the source operand. */
30443 for (i
= 0; i
< nelt
; ++i
)
30444 mask
|= ipar
[i
] << (i
* (nelt
/ 2));
30448 gcc_unreachable ();
30451 /* Make sure success has a non-zero value by adding one. */
30455 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
30456 the expansion functions to turn the parallel back into a mask.
30457 The return value is 0 for no match and the imm8+1 for a match. */
30460 avx_vperm2f128_parallel (rtx par
, enum machine_mode mode
)
30462 unsigned i
, nelt
= GET_MODE_NUNITS (mode
), nelt2
= nelt
/ 2;
30464 unsigned char ipar
[8];
30466 if (XVECLEN (par
, 0) != (int) nelt
)
30469 /* Validate that all of the elements are constants, and not totally
30470 out of range. Copy the data into an integral array to make the
30471 subsequent checks easier. */
30472 for (i
= 0; i
< nelt
; ++i
)
30474 rtx er
= XVECEXP (par
, 0, i
);
30475 unsigned HOST_WIDE_INT ei
;
30477 if (!CONST_INT_P (er
))
30480 if (ei
>= 2 * nelt
)
30485 /* Validate that the halves of the permute are halves. */
30486 for (i
= 0; i
< nelt2
- 1; ++i
)
30487 if (ipar
[i
] + 1 != ipar
[i
+ 1])
30489 for (i
= nelt2
; i
< nelt
- 1; ++i
)
30490 if (ipar
[i
] + 1 != ipar
[i
+ 1])
30493 /* Reconstruct the mask. */
30494 for (i
= 0; i
< 2; ++i
)
30496 unsigned e
= ipar
[i
* nelt2
];
30500 mask
|= e
<< (i
* 4);
30503 /* Make sure success has a non-zero value by adding one. */
30507 /* Store OPERAND to the memory after reload is completed. This means
30508 that we can't easily use assign_stack_local. */
30510 ix86_force_to_memory (enum machine_mode mode
, rtx operand
)
30514 gcc_assert (reload_completed
);
30515 if (ix86_using_red_zone ())
30517 result
= gen_rtx_MEM (mode
,
30518 gen_rtx_PLUS (Pmode
,
30520 GEN_INT (-RED_ZONE_SIZE
)));
30521 emit_move_insn (result
, operand
);
30523 else if (TARGET_64BIT
)
30529 operand
= gen_lowpart (DImode
, operand
);
30533 gen_rtx_SET (VOIDmode
,
30534 gen_rtx_MEM (DImode
,
30535 gen_rtx_PRE_DEC (DImode
,
30536 stack_pointer_rtx
)),
30540 gcc_unreachable ();
30542 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
30551 split_double_mode (mode
, &operand
, 1, operands
, operands
+ 1);
30553 gen_rtx_SET (VOIDmode
,
30554 gen_rtx_MEM (SImode
,
30555 gen_rtx_PRE_DEC (Pmode
,
30556 stack_pointer_rtx
)),
30559 gen_rtx_SET (VOIDmode
,
30560 gen_rtx_MEM (SImode
,
30561 gen_rtx_PRE_DEC (Pmode
,
30562 stack_pointer_rtx
)),
30567 /* Store HImodes as SImodes. */
30568 operand
= gen_lowpart (SImode
, operand
);
30572 gen_rtx_SET (VOIDmode
,
30573 gen_rtx_MEM (GET_MODE (operand
),
30574 gen_rtx_PRE_DEC (SImode
,
30575 stack_pointer_rtx
)),
30579 gcc_unreachable ();
30581 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
30586 /* Free operand from the memory. */
30588 ix86_free_from_memory (enum machine_mode mode
)
30590 if (!ix86_using_red_zone ())
30594 if (mode
== DImode
|| TARGET_64BIT
)
30598 /* Use LEA to deallocate stack space. In peephole2 it will be converted
30599 to pop or add instruction if registers are available. */
30600 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
30601 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
30606 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
30608 Put float CONST_DOUBLE in the constant pool instead of fp regs.
30609 QImode must go into class Q_REGS.
30610 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
30611 movdf to do mem-to-mem moves through integer regs. */
30614 ix86_preferred_reload_class (rtx x
, reg_class_t regclass
)
30616 enum machine_mode mode
= GET_MODE (x
);
30618 /* We're only allowed to return a subclass of CLASS. Many of the
30619 following checks fail for NO_REGS, so eliminate that early. */
30620 if (regclass
== NO_REGS
)
30623 /* All classes can load zeros. */
30624 if (x
== CONST0_RTX (mode
))
30627 /* Force constants into memory if we are loading a (nonzero) constant into
30628 an MMX or SSE register. This is because there are no MMX/SSE instructions
30629 to load from a constant. */
30631 && (MAYBE_MMX_CLASS_P (regclass
) || MAYBE_SSE_CLASS_P (regclass
)))
30634 /* Prefer SSE regs only, if we can use them for math. */
30635 if (TARGET_SSE_MATH
&& !TARGET_MIX_SSE_I387
&& SSE_FLOAT_MODE_P (mode
))
30636 return SSE_CLASS_P (regclass
) ? regclass
: NO_REGS
;
30638 /* Floating-point constants need more complex checks. */
30639 if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) != VOIDmode
)
30641 /* General regs can load everything. */
30642 if (reg_class_subset_p (regclass
, GENERAL_REGS
))
30645 /* Floats can load 0 and 1 plus some others. Note that we eliminated
30646 zero above. We only want to wind up preferring 80387 registers if
30647 we plan on doing computation with them. */
30649 && standard_80387_constant_p (x
) > 0)
30651 /* Limit class to non-sse. */
30652 if (regclass
== FLOAT_SSE_REGS
)
30654 if (regclass
== FP_TOP_SSE_REGS
)
30656 if (regclass
== FP_SECOND_SSE_REGS
)
30657 return FP_SECOND_REG
;
30658 if (regclass
== FLOAT_INT_REGS
|| regclass
== FLOAT_REGS
)
30665 /* Generally when we see PLUS here, it's the function invariant
30666 (plus soft-fp const_int). Which can only be computed into general
30668 if (GET_CODE (x
) == PLUS
)
30669 return reg_class_subset_p (regclass
, GENERAL_REGS
) ? regclass
: NO_REGS
;
30671 /* QImode constants are easy to load, but non-constant QImode data
30672 must go into Q_REGS. */
30673 if (GET_MODE (x
) == QImode
&& !CONSTANT_P (x
))
30675 if (reg_class_subset_p (regclass
, Q_REGS
))
30677 if (reg_class_subset_p (Q_REGS
, regclass
))
30685 /* Discourage putting floating-point values in SSE registers unless
30686 SSE math is being used, and likewise for the 387 registers. */
30688 ix86_preferred_output_reload_class (rtx x
, reg_class_t regclass
)
30690 enum machine_mode mode
= GET_MODE (x
);
30692 /* Restrict the output reload class to the register bank that we are doing
30693 math on. If we would like not to return a subset of CLASS, reject this
30694 alternative: if reload cannot do this, it will still use its choice. */
30695 mode
= GET_MODE (x
);
30696 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
30697 return MAYBE_SSE_CLASS_P (regclass
) ? SSE_REGS
: NO_REGS
;
30699 if (X87_FLOAT_MODE_P (mode
))
30701 if (regclass
== FP_TOP_SSE_REGS
)
30703 else if (regclass
== FP_SECOND_SSE_REGS
)
30704 return FP_SECOND_REG
;
30706 return FLOAT_CLASS_P (regclass
) ? regclass
: NO_REGS
;
30713 ix86_secondary_reload (bool in_p
, rtx x
, reg_class_t rclass
,
30714 enum machine_mode mode
, secondary_reload_info
*sri
)
30716 /* Double-word spills from general registers to non-offsettable memory
30717 references (zero-extended addresses) require special handling. */
30720 && GET_MODE_SIZE (mode
) > UNITS_PER_WORD
30721 && rclass
== GENERAL_REGS
30722 && !offsettable_memref_p (x
))
30725 ? CODE_FOR_reload_noff_load
30726 : CODE_FOR_reload_noff_store
);
30727 /* Add the cost of moving address to a temporary. */
30728 sri
->extra_cost
= 1;
30733 /* QImode spills from non-QI registers require
30734 intermediate register on 32bit targets. */
30736 && !in_p
&& mode
== QImode
30737 && (rclass
== GENERAL_REGS
30738 || rclass
== LEGACY_REGS
30739 || rclass
== INDEX_REGS
))
30748 if (regno
>= FIRST_PSEUDO_REGISTER
|| GET_CODE (x
) == SUBREG
)
30749 regno
= true_regnum (x
);
30751 /* Return Q_REGS if the operand is in memory. */
30756 /* This condition handles corner case where an expression involving
30757 pointers gets vectorized. We're trying to use the address of a
30758 stack slot as a vector initializer.
30760 (set (reg:V2DI 74 [ vect_cst_.2 ])
30761 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
30763 Eventually frame gets turned into sp+offset like this:
30765 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
30766 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
30767 (const_int 392 [0x188]))))
30769 That later gets turned into:
30771 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
30772 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
30773 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
30775 We'll have the following reload recorded:
30777 Reload 0: reload_in (DI) =
30778 (plus:DI (reg/f:DI 7 sp)
30779 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
30780 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
30781 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
30782 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
30783 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
30784 reload_reg_rtx: (reg:V2DI 22 xmm1)
30786 Which isn't going to work since SSE instructions can't handle scalar
30787 additions. Returning GENERAL_REGS forces the addition into integer
30788 register and reload can handle subsequent reloads without problems. */
30790 if (in_p
&& GET_CODE (x
) == PLUS
30791 && SSE_CLASS_P (rclass
)
30792 && SCALAR_INT_MODE_P (mode
))
30793 return GENERAL_REGS
;
30798 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
30801 ix86_class_likely_spilled_p (reg_class_t rclass
)
30812 case SSE_FIRST_REG
:
30814 case FP_SECOND_REG
:
30824 /* If we are copying between general and FP registers, we need a memory
30825 location. The same is true for SSE and MMX registers.
30827 To optimize register_move_cost performance, allow inline variant.
30829 The macro can't work reliably when one of the CLASSES is class containing
30830 registers from multiple units (SSE, MMX, integer). We avoid this by never
30831 combining those units in single alternative in the machine description.
30832 Ensure that this constraint holds to avoid unexpected surprises.
30834 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
30835 enforce these sanity checks. */
30838 inline_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
30839 enum machine_mode mode
, int strict
)
30841 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
30842 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
30843 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
30844 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
30845 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
30846 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
))
30848 gcc_assert (!strict
);
30852 if (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
))
30855 /* ??? This is a lie. We do have moves between mmx/general, and for
30856 mmx/sse2. But by saying we need secondary memory we discourage the
30857 register allocator from using the mmx registers unless needed. */
30858 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
))
30861 if (SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
30863 /* SSE1 doesn't have any direct moves from other classes. */
30867 /* If the target says that inter-unit moves are more expensive
30868 than moving through memory, then don't generate them. */
30869 if (!TARGET_INTER_UNIT_MOVES
)
30872 /* Between SSE and general, we have moves no larger than word size. */
30873 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
30881 ix86_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
30882 enum machine_mode mode
, int strict
)
30884 return inline_secondary_memory_needed (class1
, class2
, mode
, strict
);
30887 /* Implement the TARGET_CLASS_MAX_NREGS hook.
30889 On the 80386, this is the size of MODE in words,
30890 except in the FP regs, where a single reg is always enough. */
30892 static unsigned char
30893 ix86_class_max_nregs (reg_class_t rclass
, enum machine_mode mode
)
30895 if (MAYBE_INTEGER_CLASS_P (rclass
))
30897 if (mode
== XFmode
)
30898 return (TARGET_64BIT
? 2 : 3);
30899 else if (mode
== XCmode
)
30900 return (TARGET_64BIT
? 4 : 6);
30902 return ((GET_MODE_SIZE (mode
) + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
);
30906 if (COMPLEX_MODE_P (mode
))
30913 /* Return true if the registers in CLASS cannot represent the change from
30914 modes FROM to TO. */
30917 ix86_cannot_change_mode_class (enum machine_mode from
, enum machine_mode to
,
30918 enum reg_class regclass
)
30923 /* x87 registers can't do subreg at all, as all values are reformatted
30924 to extended precision. */
30925 if (MAYBE_FLOAT_CLASS_P (regclass
))
30928 if (MAYBE_SSE_CLASS_P (regclass
) || MAYBE_MMX_CLASS_P (regclass
))
30930 /* Vector registers do not support QI or HImode loads. If we don't
30931 disallow a change to these modes, reload will assume it's ok to
30932 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
30933 the vec_dupv4hi pattern. */
30934 if (GET_MODE_SIZE (from
) < 4)
30937 /* Vector registers do not support subreg with nonzero offsets, which
30938 are otherwise valid for integer registers. Since we can't see
30939 whether we have a nonzero offset from here, prohibit all
30940 nonparadoxical subregs changing size. */
30941 if (GET_MODE_SIZE (to
) < GET_MODE_SIZE (from
))
30948 /* Return the cost of moving data of mode M between a
30949 register and memory. A value of 2 is the default; this cost is
30950 relative to those in `REGISTER_MOVE_COST'.
30952 This function is used extensively by register_move_cost that is used to
30953 build tables at startup. Make it inline in this case.
30954 When IN is 2, return maximum of in and out move cost.
30956 If moving between registers and memory is more expensive than
30957 between two registers, you should define this macro to express the
30960 Model also increased moving costs of QImode registers in non
30964 inline_memory_move_cost (enum machine_mode mode
, enum reg_class regclass
,
30968 if (FLOAT_CLASS_P (regclass
))
30986 return MAX (ix86_cost
->fp_load
[index
], ix86_cost
->fp_store
[index
]);
30987 return in
? ix86_cost
->fp_load
[index
] : ix86_cost
->fp_store
[index
];
30989 if (SSE_CLASS_P (regclass
))
30992 switch (GET_MODE_SIZE (mode
))
31007 return MAX (ix86_cost
->sse_load
[index
], ix86_cost
->sse_store
[index
]);
31008 return in
? ix86_cost
->sse_load
[index
] : ix86_cost
->sse_store
[index
];
31010 if (MMX_CLASS_P (regclass
))
31013 switch (GET_MODE_SIZE (mode
))
31025 return MAX (ix86_cost
->mmx_load
[index
], ix86_cost
->mmx_store
[index
]);
31026 return in
? ix86_cost
->mmx_load
[index
] : ix86_cost
->mmx_store
[index
];
31028 switch (GET_MODE_SIZE (mode
))
31031 if (Q_CLASS_P (regclass
) || TARGET_64BIT
)
31034 return ix86_cost
->int_store
[0];
31035 if (TARGET_PARTIAL_REG_DEPENDENCY
31036 && optimize_function_for_speed_p (cfun
))
31037 cost
= ix86_cost
->movzbl_load
;
31039 cost
= ix86_cost
->int_load
[0];
31041 return MAX (cost
, ix86_cost
->int_store
[0]);
31047 return MAX (ix86_cost
->movzbl_load
, ix86_cost
->int_store
[0] + 4);
31049 return ix86_cost
->movzbl_load
;
31051 return ix86_cost
->int_store
[0] + 4;
31056 return MAX (ix86_cost
->int_load
[1], ix86_cost
->int_store
[1]);
31057 return in
? ix86_cost
->int_load
[1] : ix86_cost
->int_store
[1];
31059 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
31060 if (mode
== TFmode
)
31063 cost
= MAX (ix86_cost
->int_load
[2] , ix86_cost
->int_store
[2]);
31065 cost
= ix86_cost
->int_load
[2];
31067 cost
= ix86_cost
->int_store
[2];
31068 return (cost
* (((int) GET_MODE_SIZE (mode
)
31069 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
));
31074 ix86_memory_move_cost (enum machine_mode mode
, reg_class_t regclass
,
31077 return inline_memory_move_cost (mode
, (enum reg_class
) regclass
, in
? 1 : 0);
31081 /* Return the cost of moving data from a register in class CLASS1 to
31082 one in class CLASS2.
31084 It is not required that the cost always equal 2 when FROM is the same as TO;
31085 on some machines it is expensive to move between registers if they are not
31086 general registers. */
31089 ix86_register_move_cost (enum machine_mode mode
, reg_class_t class1_i
,
31090 reg_class_t class2_i
)
31092 enum reg_class class1
= (enum reg_class
) class1_i
;
31093 enum reg_class class2
= (enum reg_class
) class2_i
;
31095 /* In case we require secondary memory, compute cost of the store followed
31096 by load. In order to avoid bad register allocation choices, we need
31097 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
31099 if (inline_secondary_memory_needed (class1
, class2
, mode
, 0))
31103 cost
+= inline_memory_move_cost (mode
, class1
, 2);
31104 cost
+= inline_memory_move_cost (mode
, class2
, 2);
31106 /* In case of copying from general_purpose_register we may emit multiple
31107 stores followed by single load causing memory size mismatch stall.
31108 Count this as arbitrarily high cost of 20. */
31109 if (targetm
.class_max_nregs (class1
, mode
)
31110 > targetm
.class_max_nregs (class2
, mode
))
31113 /* In the case of FP/MMX moves, the registers actually overlap, and we
31114 have to switch modes in order to treat them differently. */
31115 if ((MMX_CLASS_P (class1
) && MAYBE_FLOAT_CLASS_P (class2
))
31116 || (MMX_CLASS_P (class2
) && MAYBE_FLOAT_CLASS_P (class1
)))
31122 /* Moves between SSE/MMX and integer unit are expensive. */
31123 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
31124 || SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
31126 /* ??? By keeping returned value relatively high, we limit the number
31127 of moves between integer and MMX/SSE registers for all targets.
31128 Additionally, high value prevents problem with x86_modes_tieable_p(),
31129 where integer modes in MMX/SSE registers are not tieable
31130 because of missing QImode and HImode moves to, from or between
31131 MMX/SSE registers. */
31132 return MAX (8, ix86_cost
->mmxsse_to_integer
);
31134 if (MAYBE_FLOAT_CLASS_P (class1
))
31135 return ix86_cost
->fp_move
;
31136 if (MAYBE_SSE_CLASS_P (class1
))
31137 return ix86_cost
->sse_move
;
31138 if (MAYBE_MMX_CLASS_P (class1
))
31139 return ix86_cost
->mmx_move
;
31143 /* Return TRUE if hard register REGNO can hold a value of machine-mode
31147 ix86_hard_regno_mode_ok (int regno
, enum machine_mode mode
)
31149 /* Flags and only flags can only hold CCmode values. */
31150 if (CC_REGNO_P (regno
))
31151 return GET_MODE_CLASS (mode
) == MODE_CC
;
31152 if (GET_MODE_CLASS (mode
) == MODE_CC
31153 || GET_MODE_CLASS (mode
) == MODE_RANDOM
31154 || GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
31156 if (FP_REGNO_P (regno
))
31157 return VALID_FP_MODE_P (mode
);
31158 if (SSE_REGNO_P (regno
))
31160 /* We implement the move patterns for all vector modes into and
31161 out of SSE registers, even when no operation instructions
31162 are available. OImode move is available only when AVX is
31164 return ((TARGET_AVX
&& mode
== OImode
)
31165 || VALID_AVX256_REG_MODE (mode
)
31166 || VALID_SSE_REG_MODE (mode
)
31167 || VALID_SSE2_REG_MODE (mode
)
31168 || VALID_MMX_REG_MODE (mode
)
31169 || VALID_MMX_REG_MODE_3DNOW (mode
));
31171 if (MMX_REGNO_P (regno
))
31173 /* We implement the move patterns for 3DNOW modes even in MMX mode,
31174 so if the register is available at all, then we can move data of
31175 the given mode into or out of it. */
31176 return (VALID_MMX_REG_MODE (mode
)
31177 || VALID_MMX_REG_MODE_3DNOW (mode
));
31180 if (mode
== QImode
)
31182 /* Take care for QImode values - they can be in non-QI regs,
31183 but then they do cause partial register stalls. */
31184 if (regno
<= BX_REG
|| TARGET_64BIT
)
31186 if (!TARGET_PARTIAL_REG_STALL
)
31188 return !can_create_pseudo_p ();
31190 /* We handle both integer and floats in the general purpose registers. */
31191 else if (VALID_INT_MODE_P (mode
))
31193 else if (VALID_FP_MODE_P (mode
))
31195 else if (VALID_DFP_MODE_P (mode
))
31197 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
31198 on to use that value in smaller contexts, this can easily force a
31199 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
31200 supporting DImode, allow it. */
31201 else if (VALID_MMX_REG_MODE_3DNOW (mode
) || VALID_MMX_REG_MODE (mode
))
31207 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
31208 tieable integer mode. */
31211 ix86_tieable_integer_mode_p (enum machine_mode mode
)
31220 return TARGET_64BIT
|| !TARGET_PARTIAL_REG_STALL
;
31223 return TARGET_64BIT
;
31230 /* Return true if MODE1 is accessible in a register that can hold MODE2
31231 without copying. That is, all register classes that can hold MODE2
31232 can also hold MODE1. */
31235 ix86_modes_tieable_p (enum machine_mode mode1
, enum machine_mode mode2
)
31237 if (mode1
== mode2
)
31240 if (ix86_tieable_integer_mode_p (mode1
)
31241 && ix86_tieable_integer_mode_p (mode2
))
31244 /* MODE2 being XFmode implies fp stack or general regs, which means we
31245 can tie any smaller floating point modes to it. Note that we do not
31246 tie this with TFmode. */
31247 if (mode2
== XFmode
)
31248 return mode1
== SFmode
|| mode1
== DFmode
;
31250 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
31251 that we can tie it with SFmode. */
31252 if (mode2
== DFmode
)
31253 return mode1
== SFmode
;
31255 /* If MODE2 is only appropriate for an SSE register, then tie with
31256 any other mode acceptable to SSE registers. */
31257 if (GET_MODE_SIZE (mode2
) == 16
31258 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
31259 return (GET_MODE_SIZE (mode1
) == 16
31260 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
31262 /* If MODE2 is appropriate for an MMX register, then tie
31263 with any other mode acceptable to MMX registers. */
31264 if (GET_MODE_SIZE (mode2
) == 8
31265 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode2
))
31266 return (GET_MODE_SIZE (mode1
) == 8
31267 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode1
));
31272 /* Compute a (partial) cost for rtx X. Return true if the complete
31273 cost has been computed, and false if subexpressions should be
31274 scanned. In either case, *TOTAL contains the cost result. */
31277 ix86_rtx_costs (rtx x
, int code
, int outer_code_i
, int opno
, int *total
,
31280 enum rtx_code outer_code
= (enum rtx_code
) outer_code_i
;
31281 enum machine_mode mode
= GET_MODE (x
);
31282 const struct processor_costs
*cost
= speed
? ix86_cost
: &ix86_size_cost
;
31290 if (TARGET_64BIT
&& !x86_64_immediate_operand (x
, VOIDmode
))
31292 else if (TARGET_64BIT
&& !x86_64_zext_immediate_operand (x
, VOIDmode
))
31294 else if (flag_pic
&& SYMBOLIC_CONST (x
)
31296 || (!GET_CODE (x
) != LABEL_REF
31297 && (GET_CODE (x
) != SYMBOL_REF
31298 || !SYMBOL_REF_LOCAL_P (x
)))))
31305 if (mode
== VOIDmode
)
31308 switch (standard_80387_constant_p (x
))
31313 default: /* Other constants */
31318 /* Start with (MEM (SYMBOL_REF)), since that's where
31319 it'll probably end up. Add a penalty for size. */
31320 *total
= (COSTS_N_INSNS (1)
31321 + (flag_pic
!= 0 && !TARGET_64BIT
)
31322 + (mode
== SFmode
? 0 : mode
== DFmode
? 1 : 2));
31328 /* The zero extensions is often completely free on x86_64, so make
31329 it as cheap as possible. */
31330 if (TARGET_64BIT
&& mode
== DImode
31331 && GET_MODE (XEXP (x
, 0)) == SImode
)
31333 else if (TARGET_ZERO_EXTEND_WITH_AND
)
31334 *total
= cost
->add
;
31336 *total
= cost
->movzx
;
31340 *total
= cost
->movsx
;
31344 if (CONST_INT_P (XEXP (x
, 1))
31345 && (GET_MODE (XEXP (x
, 0)) != DImode
|| TARGET_64BIT
))
31347 HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
31350 *total
= cost
->add
;
31353 if ((value
== 2 || value
== 3)
31354 && cost
->lea
<= cost
->shift_const
)
31356 *total
= cost
->lea
;
31366 if (!TARGET_64BIT
&& GET_MODE (XEXP (x
, 0)) == DImode
)
31368 if (CONST_INT_P (XEXP (x
, 1)))
31370 if (INTVAL (XEXP (x
, 1)) > 32)
31371 *total
= cost
->shift_const
+ COSTS_N_INSNS (2);
31373 *total
= cost
->shift_const
* 2;
31377 if (GET_CODE (XEXP (x
, 1)) == AND
)
31378 *total
= cost
->shift_var
* 2;
31380 *total
= cost
->shift_var
* 6 + COSTS_N_INSNS (2);
31385 if (CONST_INT_P (XEXP (x
, 1)))
31386 *total
= cost
->shift_const
;
31388 *total
= cost
->shift_var
;
31396 gcc_assert (FLOAT_MODE_P (mode
));
31397 gcc_assert (TARGET_FMA
|| TARGET_FMA4
);
31399 /* ??? SSE scalar/vector cost should be used here. */
31400 /* ??? Bald assumption that fma has the same cost as fmul. */
31401 *total
= cost
->fmul
;
31402 *total
+= rtx_cost (XEXP (x
, 1), FMA
, 1, speed
);
31404 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
31406 if (GET_CODE (sub
) == NEG
)
31407 sub
= XEXP (sub
, 0);
31408 *total
+= rtx_cost (sub
, FMA
, 0, speed
);
31411 if (GET_CODE (sub
) == NEG
)
31412 sub
= XEXP (sub
, 0);
31413 *total
+= rtx_cost (sub
, FMA
, 2, speed
);
31418 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
31420 /* ??? SSE scalar cost should be used here. */
31421 *total
= cost
->fmul
;
31424 else if (X87_FLOAT_MODE_P (mode
))
31426 *total
= cost
->fmul
;
31429 else if (FLOAT_MODE_P (mode
))
31431 /* ??? SSE vector cost should be used here. */
31432 *total
= cost
->fmul
;
31437 rtx op0
= XEXP (x
, 0);
31438 rtx op1
= XEXP (x
, 1);
31440 if (CONST_INT_P (XEXP (x
, 1)))
31442 unsigned HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
31443 for (nbits
= 0; value
!= 0; value
&= value
- 1)
31447 /* This is arbitrary. */
31450 /* Compute costs correctly for widening multiplication. */
31451 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op0
) == ZERO_EXTEND
)
31452 && GET_MODE_SIZE (GET_MODE (XEXP (op0
, 0))) * 2
31453 == GET_MODE_SIZE (mode
))
31455 int is_mulwiden
= 0;
31456 enum machine_mode inner_mode
= GET_MODE (op0
);
31458 if (GET_CODE (op0
) == GET_CODE (op1
))
31459 is_mulwiden
= 1, op1
= XEXP (op1
, 0);
31460 else if (CONST_INT_P (op1
))
31462 if (GET_CODE (op0
) == SIGN_EXTEND
)
31463 is_mulwiden
= trunc_int_for_mode (INTVAL (op1
), inner_mode
)
31466 is_mulwiden
= !(INTVAL (op1
) & ~GET_MODE_MASK (inner_mode
));
31470 op0
= XEXP (op0
, 0), mode
= GET_MODE (op0
);
31473 *total
= (cost
->mult_init
[MODE_INDEX (mode
)]
31474 + nbits
* cost
->mult_bit
31475 + rtx_cost (op0
, outer_code
, opno
, speed
)
31476 + rtx_cost (op1
, outer_code
, opno
, speed
));
31485 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
31486 /* ??? SSE cost should be used here. */
31487 *total
= cost
->fdiv
;
31488 else if (X87_FLOAT_MODE_P (mode
))
31489 *total
= cost
->fdiv
;
31490 else if (FLOAT_MODE_P (mode
))
31491 /* ??? SSE vector cost should be used here. */
31492 *total
= cost
->fdiv
;
31494 *total
= cost
->divide
[MODE_INDEX (mode
)];
31498 if (GET_MODE_CLASS (mode
) == MODE_INT
31499 && GET_MODE_BITSIZE (mode
) <= GET_MODE_BITSIZE (Pmode
))
31501 if (GET_CODE (XEXP (x
, 0)) == PLUS
31502 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
31503 && CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 0), 1))
31504 && CONSTANT_P (XEXP (x
, 1)))
31506 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1));
31507 if (val
== 2 || val
== 4 || val
== 8)
31509 *total
= cost
->lea
;
31510 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1),
31511 outer_code
, opno
, speed
);
31512 *total
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
31513 outer_code
, opno
, speed
);
31514 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
31518 else if (GET_CODE (XEXP (x
, 0)) == MULT
31519 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
31521 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (x
, 0), 1));
31522 if (val
== 2 || val
== 4 || val
== 8)
31524 *total
= cost
->lea
;
31525 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0),
31526 outer_code
, opno
, speed
);
31527 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
31531 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
31533 *total
= cost
->lea
;
31534 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0),
31535 outer_code
, opno
, speed
);
31536 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1),
31537 outer_code
, opno
, speed
);
31538 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
31545 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
31547 /* ??? SSE cost should be used here. */
31548 *total
= cost
->fadd
;
31551 else if (X87_FLOAT_MODE_P (mode
))
31553 *total
= cost
->fadd
;
31556 else if (FLOAT_MODE_P (mode
))
31558 /* ??? SSE vector cost should be used here. */
31559 *total
= cost
->fadd
;
31567 if (!TARGET_64BIT
&& mode
== DImode
)
31569 *total
= (cost
->add
* 2
31570 + (rtx_cost (XEXP (x
, 0), outer_code
, opno
, speed
)
31571 << (GET_MODE (XEXP (x
, 0)) != DImode
))
31572 + (rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
)
31573 << (GET_MODE (XEXP (x
, 1)) != DImode
)));
31579 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
31581 /* ??? SSE cost should be used here. */
31582 *total
= cost
->fchs
;
31585 else if (X87_FLOAT_MODE_P (mode
))
31587 *total
= cost
->fchs
;
31590 else if (FLOAT_MODE_P (mode
))
31592 /* ??? SSE vector cost should be used here. */
31593 *total
= cost
->fchs
;
31599 if (!TARGET_64BIT
&& mode
== DImode
)
31600 *total
= cost
->add
* 2;
31602 *total
= cost
->add
;
31606 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTRACT
31607 && XEXP (XEXP (x
, 0), 1) == const1_rtx
31608 && CONST_INT_P (XEXP (XEXP (x
, 0), 2))
31609 && XEXP (x
, 1) == const0_rtx
)
31611 /* This kind of construct is implemented using test[bwl].
31612 Treat it as if we had an AND. */
31613 *total
= (cost
->add
31614 + rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
, opno
, speed
)
31615 + rtx_cost (const1_rtx
, outer_code
, opno
, speed
));
31621 if (!(SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
))
31626 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
31627 /* ??? SSE cost should be used here. */
31628 *total
= cost
->fabs
;
31629 else if (X87_FLOAT_MODE_P (mode
))
31630 *total
= cost
->fabs
;
31631 else if (FLOAT_MODE_P (mode
))
31632 /* ??? SSE vector cost should be used here. */
31633 *total
= cost
->fabs
;
31637 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
31638 /* ??? SSE cost should be used here. */
31639 *total
= cost
->fsqrt
;
31640 else if (X87_FLOAT_MODE_P (mode
))
31641 *total
= cost
->fsqrt
;
31642 else if (FLOAT_MODE_P (mode
))
31643 /* ??? SSE vector cost should be used here. */
31644 *total
= cost
->fsqrt
;
31648 if (XINT (x
, 1) == UNSPEC_TP
)
31655 case VEC_DUPLICATE
:
31656 /* ??? Assume all of these vector manipulation patterns are
31657 recognizable. In which case they all pretty much have the
31659 *total
= COSTS_N_INSNS (1);
31669 static int current_machopic_label_num
;
31671 /* Given a symbol name and its associated stub, write out the
31672 definition of the stub. */
31675 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
31677 unsigned int length
;
31678 char *binder_name
, *symbol_name
, lazy_ptr_name
[32];
31679 int label
= ++current_machopic_label_num
;
31681 /* For 64-bit we shouldn't get here. */
31682 gcc_assert (!TARGET_64BIT
);
31684 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
31685 symb
= targetm
.strip_name_encoding (symb
);
31687 length
= strlen (stub
);
31688 binder_name
= XALLOCAVEC (char, length
+ 32);
31689 GEN_BINDER_NAME_FOR_STUB (binder_name
, stub
, length
);
31691 length
= strlen (symb
);
31692 symbol_name
= XALLOCAVEC (char, length
+ 32);
31693 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
31695 sprintf (lazy_ptr_name
, "L%d$lz", label
);
31697 if (MACHOPIC_ATT_STUB
)
31698 switch_to_section (darwin_sections
[machopic_picsymbol_stub3_section
]);
31699 else if (MACHOPIC_PURE
)
31700 switch_to_section (darwin_sections
[machopic_picsymbol_stub2_section
]);
31702 switch_to_section (darwin_sections
[machopic_symbol_stub_section
]);
31704 fprintf (file
, "%s:\n", stub
);
31705 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
31707 if (MACHOPIC_ATT_STUB
)
31709 fprintf (file
, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
31711 else if (MACHOPIC_PURE
)
31714 /* 25-byte PIC stub using "CALL get_pc_thunk". */
31715 rtx tmp
= gen_rtx_REG (SImode
, 2 /* ECX */);
31716 output_set_got (tmp
, NULL_RTX
); /* "CALL ___<cpu>.get_pc_thunk.cx". */
31717 fprintf (file
, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
31718 label
, lazy_ptr_name
, label
);
31719 fprintf (file
, "\tjmp\t*%%ecx\n");
31722 fprintf (file
, "\tjmp\t*%s\n", lazy_ptr_name
);
31724 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
31725 it needs no stub-binding-helper. */
31726 if (MACHOPIC_ATT_STUB
)
31729 fprintf (file
, "%s:\n", binder_name
);
31733 fprintf (file
, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name
, binder_name
);
31734 fprintf (file
, "\tpushl\t%%ecx\n");
31737 fprintf (file
, "\tpushl\t$%s\n", lazy_ptr_name
);
31739 fputs ("\tjmp\tdyld_stub_binding_helper\n", file
);
31741 /* N.B. Keep the correspondence of these
31742 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
31743 old-pic/new-pic/non-pic stubs; altering this will break
31744 compatibility with existing dylibs. */
31747 /* 25-byte PIC stub using "CALL get_pc_thunk". */
31748 switch_to_section (darwin_sections
[machopic_lazy_symbol_ptr2_section
]);
31751 /* 16-byte -mdynamic-no-pic stub. */
31752 switch_to_section(darwin_sections
[machopic_lazy_symbol_ptr3_section
]);
31754 fprintf (file
, "%s:\n", lazy_ptr_name
);
31755 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
31756 fprintf (file
, ASM_LONG
"%s\n", binder_name
);
31758 #endif /* TARGET_MACHO */
31760 /* Order the registers for register allocator. */
31763 x86_order_regs_for_local_alloc (void)
31768 /* First allocate the local general purpose registers. */
31769 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
31770 if (GENERAL_REGNO_P (i
) && call_used_regs
[i
])
31771 reg_alloc_order
[pos
++] = i
;
31773 /* Global general purpose registers. */
31774 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
31775 if (GENERAL_REGNO_P (i
) && !call_used_regs
[i
])
31776 reg_alloc_order
[pos
++] = i
;
31778 /* x87 registers come first in case we are doing FP math
31780 if (!TARGET_SSE_MATH
)
31781 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
31782 reg_alloc_order
[pos
++] = i
;
31784 /* SSE registers. */
31785 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
31786 reg_alloc_order
[pos
++] = i
;
31787 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
31788 reg_alloc_order
[pos
++] = i
;
31790 /* x87 registers. */
31791 if (TARGET_SSE_MATH
)
31792 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
31793 reg_alloc_order
[pos
++] = i
;
31795 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
31796 reg_alloc_order
[pos
++] = i
;
31798 /* Initialize the rest of array as we do not allocate some registers
31800 while (pos
< FIRST_PSEUDO_REGISTER
)
31801 reg_alloc_order
[pos
++] = 0;
31804 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
31805 in struct attribute_spec handler. */
31807 ix86_handle_callee_pop_aggregate_return (tree
*node
, tree name
,
31809 int flags ATTRIBUTE_UNUSED
,
31810 bool *no_add_attrs
)
31812 if (TREE_CODE (*node
) != FUNCTION_TYPE
31813 && TREE_CODE (*node
) != METHOD_TYPE
31814 && TREE_CODE (*node
) != FIELD_DECL
31815 && TREE_CODE (*node
) != TYPE_DECL
)
31817 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
31819 *no_add_attrs
= true;
31824 warning (OPT_Wattributes
, "%qE attribute only available for 32-bit",
31826 *no_add_attrs
= true;
31829 if (is_attribute_p ("callee_pop_aggregate_return", name
))
31833 cst
= TREE_VALUE (args
);
31834 if (TREE_CODE (cst
) != INTEGER_CST
)
31836 warning (OPT_Wattributes
,
31837 "%qE attribute requires an integer constant argument",
31839 *no_add_attrs
= true;
31841 else if (compare_tree_int (cst
, 0) != 0
31842 && compare_tree_int (cst
, 1) != 0)
31844 warning (OPT_Wattributes
,
31845 "argument to %qE attribute is neither zero, nor one",
31847 *no_add_attrs
= true;
31856 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
31857 struct attribute_spec.handler. */
31859 ix86_handle_abi_attribute (tree
*node
, tree name
,
31860 tree args ATTRIBUTE_UNUSED
,
31861 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
31863 if (TREE_CODE (*node
) != FUNCTION_TYPE
31864 && TREE_CODE (*node
) != METHOD_TYPE
31865 && TREE_CODE (*node
) != FIELD_DECL
31866 && TREE_CODE (*node
) != TYPE_DECL
)
31868 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
31870 *no_add_attrs
= true;
31874 /* Can combine regparm with all attributes but fastcall. */
31875 if (is_attribute_p ("ms_abi", name
))
31877 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node
)))
31879 error ("ms_abi and sysv_abi attributes are not compatible");
31884 else if (is_attribute_p ("sysv_abi", name
))
31886 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node
)))
31888 error ("ms_abi and sysv_abi attributes are not compatible");
31897 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
31898 struct attribute_spec.handler. */
31900 ix86_handle_struct_attribute (tree
*node
, tree name
,
31901 tree args ATTRIBUTE_UNUSED
,
31902 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
31905 if (DECL_P (*node
))
31907 if (TREE_CODE (*node
) == TYPE_DECL
)
31908 type
= &TREE_TYPE (*node
);
31913 if (!(type
&& (TREE_CODE (*type
) == RECORD_TYPE
31914 || TREE_CODE (*type
) == UNION_TYPE
)))
31916 warning (OPT_Wattributes
, "%qE attribute ignored",
31918 *no_add_attrs
= true;
31921 else if ((is_attribute_p ("ms_struct", name
)
31922 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type
)))
31923 || ((is_attribute_p ("gcc_struct", name
)
31924 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type
)))))
31926 warning (OPT_Wattributes
, "%qE incompatible attribute ignored",
31928 *no_add_attrs
= true;
31935 ix86_handle_fndecl_attribute (tree
*node
, tree name
,
31936 tree args ATTRIBUTE_UNUSED
,
31937 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
31939 if (TREE_CODE (*node
) != FUNCTION_DECL
)
31941 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
31943 *no_add_attrs
= true;
31949 ix86_ms_bitfield_layout_p (const_tree record_type
)
31951 return ((TARGET_MS_BITFIELD_LAYOUT
31952 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
31953 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
)));
31956 /* Returns an expression indicating where the this parameter is
31957 located on entry to the FUNCTION. */
31960 x86_this_parameter (tree function
)
31962 tree type
= TREE_TYPE (function
);
31963 bool aggr
= aggregate_value_p (TREE_TYPE (type
), type
) != 0;
31968 const int *parm_regs
;
31970 if (ix86_function_type_abi (type
) == MS_ABI
)
31971 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
31973 parm_regs
= x86_64_int_parameter_registers
;
31974 return gen_rtx_REG (Pmode
, parm_regs
[aggr
]);
31977 nregs
= ix86_function_regparm (type
, function
);
31979 if (nregs
> 0 && !stdarg_p (type
))
31982 unsigned int ccvt
= ix86_get_callcvt (type
);
31984 if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
31985 regno
= aggr
? DX_REG
: CX_REG
;
31986 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
31990 return gen_rtx_MEM (SImode
,
31991 plus_constant (stack_pointer_rtx
, 4));
32000 return gen_rtx_MEM (SImode
,
32001 plus_constant (stack_pointer_rtx
, 4));
32004 return gen_rtx_REG (SImode
, regno
);
32007 return gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, aggr
? 8 : 4));
32010 /* Determine whether x86_output_mi_thunk can succeed. */
32013 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED
,
32014 HOST_WIDE_INT delta ATTRIBUTE_UNUSED
,
32015 HOST_WIDE_INT vcall_offset
, const_tree function
)
32017 /* 64-bit can handle anything. */
32021 /* For 32-bit, everything's fine if we have one free register. */
32022 if (ix86_function_regparm (TREE_TYPE (function
), function
) < 3)
32025 /* Need a free register for vcall_offset. */
32029 /* Need a free register for GOT references. */
32030 if (flag_pic
&& !targetm
.binds_local_p (function
))
32033 /* Otherwise ok. */
32037 /* Output the assembler code for a thunk function. THUNK_DECL is the
32038 declaration for the thunk function itself, FUNCTION is the decl for
32039 the target function. DELTA is an immediate constant offset to be
32040 added to THIS. If VCALL_OFFSET is nonzero, the word at
32041 *(*this + vcall_offset) should be added to THIS. */
32044 x86_output_mi_thunk (FILE *file
,
32045 tree thunk ATTRIBUTE_UNUSED
, HOST_WIDE_INT delta
,
32046 HOST_WIDE_INT vcall_offset
, tree function
)
32048 rtx this_param
= x86_this_parameter (function
);
32049 rtx this_reg
, tmp
, fnaddr
;
32051 emit_note (NOTE_INSN_PROLOGUE_END
);
32053 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
32054 pull it in now and let DELTA benefit. */
32055 if (REG_P (this_param
))
32056 this_reg
= this_param
;
32057 else if (vcall_offset
)
32059 /* Put the this parameter into %eax. */
32060 this_reg
= gen_rtx_REG (Pmode
, AX_REG
);
32061 emit_move_insn (this_reg
, this_param
);
32064 this_reg
= NULL_RTX
;
32066 /* Adjust the this parameter by a fixed constant. */
32069 rtx delta_rtx
= GEN_INT (delta
);
32070 rtx delta_dst
= this_reg
? this_reg
: this_param
;
32074 if (!x86_64_general_operand (delta_rtx
, Pmode
))
32076 tmp
= gen_rtx_REG (Pmode
, R10_REG
);
32077 emit_move_insn (tmp
, delta_rtx
);
32082 ix86_emit_binop (PLUS
, Pmode
, delta_dst
, delta_rtx
);
32085 /* Adjust the this parameter by a value stored in the vtable. */
32088 rtx vcall_addr
, vcall_mem
, this_mem
;
32089 unsigned int tmp_regno
;
32092 tmp_regno
= R10_REG
;
32095 unsigned int ccvt
= ix86_get_callcvt (TREE_TYPE (function
));
32096 if ((ccvt
& (IX86_CALLCVT_FASTCALL
| IX86_CALLCVT_THISCALL
)) != 0)
32097 tmp_regno
= AX_REG
;
32099 tmp_regno
= CX_REG
;
32101 tmp
= gen_rtx_REG (Pmode
, tmp_regno
);
32103 this_mem
= gen_rtx_MEM (ptr_mode
, this_reg
);
32104 if (Pmode
!= ptr_mode
)
32105 this_mem
= gen_rtx_ZERO_EXTEND (Pmode
, this_mem
);
32106 emit_move_insn (tmp
, this_mem
);
32108 /* Adjust the this parameter. */
32109 vcall_addr
= plus_constant (tmp
, vcall_offset
);
32111 && !ix86_legitimate_address_p (ptr_mode
, vcall_addr
, true))
32113 rtx tmp2
= gen_rtx_REG (Pmode
, R11_REG
);
32114 emit_move_insn (tmp2
, GEN_INT (vcall_offset
));
32115 vcall_addr
= gen_rtx_PLUS (Pmode
, tmp
, tmp2
);
32118 vcall_mem
= gen_rtx_MEM (ptr_mode
, vcall_addr
);
32119 if (Pmode
!= ptr_mode
)
32120 emit_insn (gen_addsi_1_zext (this_reg
,
32121 gen_rtx_REG (ptr_mode
,
32125 ix86_emit_binop (PLUS
, Pmode
, this_reg
, vcall_mem
);
32128 /* If necessary, drop THIS back to its stack slot. */
32129 if (this_reg
&& this_reg
!= this_param
)
32130 emit_move_insn (this_param
, this_reg
);
32132 fnaddr
= XEXP (DECL_RTL (function
), 0);
32135 if (!flag_pic
|| targetm
.binds_local_p (function
)
32136 || cfun
->machine
->call_abi
== MS_ABI
)
32140 tmp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, fnaddr
), UNSPEC_GOTPCREL
);
32141 tmp
= gen_rtx_CONST (Pmode
, tmp
);
32142 fnaddr
= gen_rtx_MEM (Pmode
, tmp
);
32147 if (!flag_pic
|| targetm
.binds_local_p (function
))
32150 else if (TARGET_MACHO
)
32152 fnaddr
= machopic_indirect_call_target (DECL_RTL (function
));
32153 fnaddr
= XEXP (fnaddr
, 0);
32155 #endif /* TARGET_MACHO */
32158 tmp
= gen_rtx_REG (Pmode
, CX_REG
);
32159 output_set_got (tmp
, NULL_RTX
);
32161 fnaddr
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, fnaddr
), UNSPEC_GOT
);
32162 fnaddr
= gen_rtx_PLUS (Pmode
, fnaddr
, tmp
);
32163 fnaddr
= gen_rtx_MEM (Pmode
, fnaddr
);
32167 /* Our sibling call patterns do not allow memories, because we have no
32168 predicate that can distinguish between frame and non-frame memory.
32169 For our purposes here, we can get away with (ab)using a jump pattern,
32170 because we're going to do no optimization. */
32171 if (MEM_P (fnaddr
))
32172 emit_jump_insn (gen_indirect_jump (fnaddr
));
32175 tmp
= gen_rtx_MEM (QImode
, fnaddr
);
32176 tmp
= gen_rtx_CALL (VOIDmode
, tmp
, const0_rtx
);
32177 tmp
= emit_call_insn (tmp
);
32178 SIBLING_CALL_P (tmp
) = 1;
32182 /* Emit just enough of rest_of_compilation to get the insns emitted.
32183 Note that use_thunk calls assemble_start_function et al. */
32184 tmp
= get_insns ();
32185 insn_locators_alloc ();
32186 shorten_branches (tmp
);
32187 final_start_function (tmp
, file
, 1);
32188 final (tmp
, file
, 1);
32189 final_end_function ();
32193 x86_file_start (void)
32195 default_file_start ();
32197 darwin_file_start ();
32199 if (X86_FILE_START_VERSION_DIRECTIVE
)
32200 fputs ("\t.version\t\"01.01\"\n", asm_out_file
);
32201 if (X86_FILE_START_FLTUSED
)
32202 fputs ("\t.global\t__fltused\n", asm_out_file
);
32203 if (ix86_asm_dialect
== ASM_INTEL
)
32204 fputs ("\t.intel_syntax noprefix\n", asm_out_file
);
32208 x86_field_alignment (tree field
, int computed
)
32210 enum machine_mode mode
;
32211 tree type
= TREE_TYPE (field
);
32213 if (TARGET_64BIT
|| TARGET_ALIGN_DOUBLE
)
32215 mode
= TYPE_MODE (strip_array_types (type
));
32216 if (mode
== DFmode
|| mode
== DCmode
32217 || GET_MODE_CLASS (mode
) == MODE_INT
32218 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
32219 return MIN (32, computed
);
32223 /* Output assembler code to FILE to increment profiler label # LABELNO
32224 for profiling a function entry. */
32226 x86_function_profiler (FILE *file
, int labelno ATTRIBUTE_UNUSED
)
32228 const char *mcount_name
= (flag_fentry
? MCOUNT_NAME_BEFORE_PROLOGUE
32233 #ifndef NO_PROFILE_COUNTERS
32234 fprintf (file
, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX
, labelno
);
32237 if (DEFAULT_ABI
== SYSV_ABI
&& flag_pic
)
32238 fprintf (file
, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name
);
32240 fprintf (file
, "\tcall\t%s\n", mcount_name
);
32244 #ifndef NO_PROFILE_COUNTERS
32245 fprintf (file
, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER
"\n",
32248 fprintf (file
, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name
);
32252 #ifndef NO_PROFILE_COUNTERS
32253 fprintf (file
, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER
"\n",
32256 fprintf (file
, "\tcall\t%s\n", mcount_name
);
32260 /* We don't have exact information about the insn sizes, but we may assume
32261 quite safely that we are informed about all 1 byte insns and memory
32262 address sizes. This is enough to eliminate unnecessary padding in
32266 min_insn_size (rtx insn
)
32270 if (!INSN_P (insn
) || !active_insn_p (insn
))
32273 /* Discard alignments we've emit and jump instructions. */
32274 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
32275 && XINT (PATTERN (insn
), 1) == UNSPECV_ALIGN
)
32277 if (JUMP_TABLE_DATA_P (insn
))
32280 /* Important case - calls are always 5 bytes.
32281 It is common to have many calls in the row. */
32283 && symbolic_reference_mentioned_p (PATTERN (insn
))
32284 && !SIBLING_CALL_P (insn
))
32286 len
= get_attr_length (insn
);
32290 /* For normal instructions we rely on get_attr_length being exact,
32291 with a few exceptions. */
32292 if (!JUMP_P (insn
))
32294 enum attr_type type
= get_attr_type (insn
);
32299 if (GET_CODE (PATTERN (insn
)) == ASM_INPUT
32300 || asm_noperands (PATTERN (insn
)) >= 0)
32307 /* Otherwise trust get_attr_length. */
32311 l
= get_attr_length_address (insn
);
32312 if (l
< 4 && symbolic_reference_mentioned_p (PATTERN (insn
)))
32321 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
32323 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
32327 ix86_avoid_jump_mispredicts (void)
32329 rtx insn
, start
= get_insns ();
32330 int nbytes
= 0, njumps
= 0;
32333 /* Look for all minimal intervals of instructions containing 4 jumps.
32334 The intervals are bounded by START and INSN. NBYTES is the total
32335 size of instructions in the interval including INSN and not including
32336 START. When the NBYTES is smaller than 16 bytes, it is possible
32337 that the end of START and INSN ends up in the same 16byte page.
32339 The smallest offset in the page INSN can start is the case where START
32340 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
32341 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
32343 for (insn
= start
; insn
; insn
= NEXT_INSN (insn
))
32347 if (LABEL_P (insn
))
32349 int align
= label_to_alignment (insn
);
32350 int max_skip
= label_to_max_skip (insn
);
32354 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
32355 already in the current 16 byte page, because otherwise
32356 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
32357 bytes to reach 16 byte boundary. */
32359 || (align
<= 3 && max_skip
!= (1 << align
) - 1))
32362 fprintf (dump_file
, "Label %i with max_skip %i\n",
32363 INSN_UID (insn
), max_skip
);
32366 while (nbytes
+ max_skip
>= 16)
32368 start
= NEXT_INSN (start
);
32369 if ((JUMP_P (start
)
32370 && GET_CODE (PATTERN (start
)) != ADDR_VEC
32371 && GET_CODE (PATTERN (start
)) != ADDR_DIFF_VEC
)
32373 njumps
--, isjump
= 1;
32376 nbytes
-= min_insn_size (start
);
32382 min_size
= min_insn_size (insn
);
32383 nbytes
+= min_size
;
32385 fprintf (dump_file
, "Insn %i estimated to %i bytes\n",
32386 INSN_UID (insn
), min_size
);
32388 && GET_CODE (PATTERN (insn
)) != ADDR_VEC
32389 && GET_CODE (PATTERN (insn
)) != ADDR_DIFF_VEC
)
32397 start
= NEXT_INSN (start
);
32398 if ((JUMP_P (start
)
32399 && GET_CODE (PATTERN (start
)) != ADDR_VEC
32400 && GET_CODE (PATTERN (start
)) != ADDR_DIFF_VEC
)
32402 njumps
--, isjump
= 1;
32405 nbytes
-= min_insn_size (start
);
32407 gcc_assert (njumps
>= 0);
32409 fprintf (dump_file
, "Interval %i to %i has %i bytes\n",
32410 INSN_UID (start
), INSN_UID (insn
), nbytes
);
32412 if (njumps
== 3 && isjump
&& nbytes
< 16)
32414 int padsize
= 15 - nbytes
+ min_insn_size (insn
);
32417 fprintf (dump_file
, "Padding insn %i by %i bytes!\n",
32418 INSN_UID (insn
), padsize
);
32419 emit_insn_before (gen_pad (GEN_INT (padsize
)), insn
);
32425 /* AMD Athlon works faster
32426 when RET is not destination of conditional jump or directly preceded
32427 by other jump instruction. We avoid the penalty by inserting NOP just
32428 before the RET instructions in such cases. */
32430 ix86_pad_returns (void)
32435 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
32437 basic_block bb
= e
->src
;
32438 rtx ret
= BB_END (bb
);
32440 bool replace
= false;
32442 if (!JUMP_P (ret
) || !ANY_RETURN_P (PATTERN (ret
))
32443 || optimize_bb_for_size_p (bb
))
32445 for (prev
= PREV_INSN (ret
); prev
; prev
= PREV_INSN (prev
))
32446 if (active_insn_p (prev
) || LABEL_P (prev
))
32448 if (prev
&& LABEL_P (prev
))
32453 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
32454 if (EDGE_FREQUENCY (e
) && e
->src
->index
>= 0
32455 && !(e
->flags
& EDGE_FALLTHRU
))
32460 prev
= prev_active_insn (ret
);
32462 && ((JUMP_P (prev
) && any_condjump_p (prev
))
32465 /* Empty functions get branch mispredict even when
32466 the jump destination is not visible to us. */
32467 if (!prev
&& !optimize_function_for_size_p (cfun
))
32472 emit_jump_insn_before (gen_simple_return_internal_long (), ret
);
32478 /* Count the minimum number of instructions in BB. Return 4 if the
32479 number of instructions >= 4. */
32482 ix86_count_insn_bb (basic_block bb
)
32485 int insn_count
= 0;
32487 /* Count number of instructions in this block. Return 4 if the number
32488 of instructions >= 4. */
32489 FOR_BB_INSNS (bb
, insn
)
32491 /* Only happen in exit blocks. */
32493 && ANY_RETURN_P (PATTERN (insn
)))
32496 if (NONDEBUG_INSN_P (insn
)
32497 && GET_CODE (PATTERN (insn
)) != USE
32498 && GET_CODE (PATTERN (insn
)) != CLOBBER
)
32501 if (insn_count
>= 4)
32510 /* Count the minimum number of instructions in code path in BB.
32511 Return 4 if the number of instructions >= 4. */
32514 ix86_count_insn (basic_block bb
)
32518 int min_prev_count
;
32520 /* Only bother counting instructions along paths with no
32521 more than 2 basic blocks between entry and exit. Given
32522 that BB has an edge to exit, determine if a predecessor
32523 of BB has an edge from entry. If so, compute the number
32524 of instructions in the predecessor block. If there
32525 happen to be multiple such blocks, compute the minimum. */
32526 min_prev_count
= 4;
32527 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
32530 edge_iterator prev_ei
;
32532 if (e
->src
== ENTRY_BLOCK_PTR
)
32534 min_prev_count
= 0;
32537 FOR_EACH_EDGE (prev_e
, prev_ei
, e
->src
->preds
)
32539 if (prev_e
->src
== ENTRY_BLOCK_PTR
)
32541 int count
= ix86_count_insn_bb (e
->src
);
32542 if (count
< min_prev_count
)
32543 min_prev_count
= count
;
32549 if (min_prev_count
< 4)
32550 min_prev_count
+= ix86_count_insn_bb (bb
);
32552 return min_prev_count
;
32555 /* Pad short funtion to 4 instructions. */
32558 ix86_pad_short_function (void)
32563 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
32565 rtx ret
= BB_END (e
->src
);
32566 if (JUMP_P (ret
) && ANY_RETURN_P (PATTERN (ret
)))
32568 int insn_count
= ix86_count_insn (e
->src
);
32570 /* Pad short function. */
32571 if (insn_count
< 4)
32575 /* Find epilogue. */
32578 || NOTE_KIND (insn
) != NOTE_INSN_EPILOGUE_BEG
))
32579 insn
= PREV_INSN (insn
);
32584 /* Two NOPs count as one instruction. */
32585 insn_count
= 2 * (4 - insn_count
);
32586 emit_insn_before (gen_nops (GEN_INT (insn_count
)), insn
);
32592 /* Implement machine specific optimizations. We implement padding of returns
32593 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
32597 /* We are freeing block_for_insn in the toplev to keep compatibility
32598 with old MDEP_REORGS that are not CFG based. Recompute it now. */
32599 compute_bb_for_insn ();
32601 /* Run the vzeroupper optimization if needed. */
32602 if (TARGET_VZEROUPPER
)
32603 move_or_delete_vzeroupper ();
32605 if (optimize
&& optimize_function_for_speed_p (cfun
))
32607 if (TARGET_PAD_SHORT_FUNCTION
)
32608 ix86_pad_short_function ();
32609 else if (TARGET_PAD_RETURNS
)
32610 ix86_pad_returns ();
32611 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
32612 if (TARGET_FOUR_JUMP_LIMIT
)
32613 ix86_avoid_jump_mispredicts ();
32618 /* Return nonzero when QImode register that must be represented via REX prefix
32621 x86_extended_QIreg_mentioned_p (rtx insn
)
32624 extract_insn_cached (insn
);
32625 for (i
= 0; i
< recog_data
.n_operands
; i
++)
32626 if (REG_P (recog_data
.operand
[i
])
32627 && REGNO (recog_data
.operand
[i
]) > BX_REG
)
32632 /* Return nonzero when P points to register encoded via REX prefix.
32633 Called via for_each_rtx. */
32635 extended_reg_mentioned_1 (rtx
*p
, void *data ATTRIBUTE_UNUSED
)
32637 unsigned int regno
;
32640 regno
= REGNO (*p
);
32641 return REX_INT_REGNO_P (regno
) || REX_SSE_REGNO_P (regno
);
32644 /* Return true when INSN mentions register that must be encoded using REX
32647 x86_extended_reg_mentioned_p (rtx insn
)
32649 return for_each_rtx (INSN_P (insn
) ? &PATTERN (insn
) : &insn
,
32650 extended_reg_mentioned_1
, NULL
);
32653 /* If profitable, negate (without causing overflow) integer constant
32654 of mode MODE at location LOC. Return true in this case. */
32656 x86_maybe_negate_const_int (rtx
*loc
, enum machine_mode mode
)
32660 if (!CONST_INT_P (*loc
))
32666 /* DImode x86_64 constants must fit in 32 bits. */
32667 gcc_assert (x86_64_immediate_operand (*loc
, mode
));
32678 gcc_unreachable ();
32681 /* Avoid overflows. */
32682 if (mode_signbit_p (mode
, *loc
))
32685 val
= INTVAL (*loc
);
32687 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
32688 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
32689 if ((val
< 0 && val
!= -128)
32692 *loc
= GEN_INT (-val
);
32699 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
32700 optabs would emit if we didn't have TFmode patterns. */
32703 x86_emit_floatuns (rtx operands
[2])
32705 rtx neglab
, donelab
, i0
, i1
, f0
, in
, out
;
32706 enum machine_mode mode
, inmode
;
32708 inmode
= GET_MODE (operands
[1]);
32709 gcc_assert (inmode
== SImode
|| inmode
== DImode
);
32712 in
= force_reg (inmode
, operands
[1]);
32713 mode
= GET_MODE (out
);
32714 neglab
= gen_label_rtx ();
32715 donelab
= gen_label_rtx ();
32716 f0
= gen_reg_rtx (mode
);
32718 emit_cmp_and_jump_insns (in
, const0_rtx
, LT
, const0_rtx
, inmode
, 0, neglab
);
32720 expand_float (out
, in
, 0);
32722 emit_jump_insn (gen_jump (donelab
));
32725 emit_label (neglab
);
32727 i0
= expand_simple_binop (inmode
, LSHIFTRT
, in
, const1_rtx
, NULL
,
32729 i1
= expand_simple_binop (inmode
, AND
, in
, const1_rtx
, NULL
,
32731 i0
= expand_simple_binop (inmode
, IOR
, i0
, i1
, i0
, 1, OPTAB_DIRECT
);
32733 expand_float (f0
, i0
, 0);
32735 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_PLUS (mode
, f0
, f0
)));
32737 emit_label (donelab
);
32740 /* AVX2 does support 32-byte integer vector operations,
32741 thus the longest vector we are faced with is V32QImode. */
32742 #define MAX_VECT_LEN 32
32744 struct expand_vec_perm_d
32746 rtx target
, op0
, op1
;
32747 unsigned char perm
[MAX_VECT_LEN
];
32748 enum machine_mode vmode
;
32749 unsigned char nelt
;
32753 static bool expand_vec_perm_1 (struct expand_vec_perm_d
*d
);
32754 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d
*d
);
32756 /* Get a vector mode of the same size as the original but with elements
32757 twice as wide. This is only guaranteed to apply to integral vectors. */
32759 static inline enum machine_mode
32760 get_mode_wider_vector (enum machine_mode o
)
32762 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
32763 enum machine_mode n
= GET_MODE_WIDER_MODE (o
);
32764 gcc_assert (GET_MODE_NUNITS (o
) == GET_MODE_NUNITS (n
) * 2);
32765 gcc_assert (GET_MODE_SIZE (o
) == GET_MODE_SIZE (n
));
32769 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
32770 with all elements equal to VAR. Return true if successful. */
32773 ix86_expand_vector_init_duplicate (bool mmx_ok
, enum machine_mode mode
,
32774 rtx target
, rtx val
)
32797 /* First attempt to recognize VAL as-is. */
32798 dup
= gen_rtx_VEC_DUPLICATE (mode
, val
);
32799 insn
= emit_insn (gen_rtx_SET (VOIDmode
, target
, dup
));
32800 if (recog_memoized (insn
) < 0)
32803 /* If that fails, force VAL into a register. */
32806 XEXP (dup
, 0) = force_reg (GET_MODE_INNER (mode
), val
);
32807 seq
= get_insns ();
32810 emit_insn_before (seq
, insn
);
32812 ok
= recog_memoized (insn
) >= 0;
32821 if (TARGET_SSE
|| TARGET_3DNOW_A
)
32825 val
= gen_lowpart (SImode
, val
);
32826 x
= gen_rtx_TRUNCATE (HImode
, val
);
32827 x
= gen_rtx_VEC_DUPLICATE (mode
, x
);
32828 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
32841 struct expand_vec_perm_d dperm
;
32845 memset (&dperm
, 0, sizeof (dperm
));
32846 dperm
.target
= target
;
32847 dperm
.vmode
= mode
;
32848 dperm
.nelt
= GET_MODE_NUNITS (mode
);
32849 dperm
.op0
= dperm
.op1
= gen_reg_rtx (mode
);
32851 /* Extend to SImode using a paradoxical SUBREG. */
32852 tmp1
= gen_reg_rtx (SImode
);
32853 emit_move_insn (tmp1
, gen_lowpart (SImode
, val
));
32855 /* Insert the SImode value as low element of a V4SImode vector. */
32856 tmp2
= gen_lowpart (V4SImode
, dperm
.op0
);
32857 emit_insn (gen_vec_setv4si_0 (tmp2
, CONST0_RTX (V4SImode
), tmp1
));
32859 ok
= (expand_vec_perm_1 (&dperm
)
32860 || expand_vec_perm_broadcast_1 (&dperm
));
32872 /* Replicate the value once into the next wider mode and recurse. */
32874 enum machine_mode smode
, wsmode
, wvmode
;
32877 smode
= GET_MODE_INNER (mode
);
32878 wvmode
= get_mode_wider_vector (mode
);
32879 wsmode
= GET_MODE_INNER (wvmode
);
32881 val
= convert_modes (wsmode
, smode
, val
, true);
32882 x
= expand_simple_binop (wsmode
, ASHIFT
, val
,
32883 GEN_INT (GET_MODE_BITSIZE (smode
)),
32884 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
32885 val
= expand_simple_binop (wsmode
, IOR
, val
, x
, x
, 1, OPTAB_LIB_WIDEN
);
32887 x
= gen_lowpart (wvmode
, target
);
32888 ok
= ix86_expand_vector_init_duplicate (mmx_ok
, wvmode
, x
, val
);
32896 enum machine_mode hvmode
= (mode
== V16HImode
? V8HImode
: V16QImode
);
32897 rtx x
= gen_reg_rtx (hvmode
);
32899 ok
= ix86_expand_vector_init_duplicate (false, hvmode
, x
, val
);
32902 x
= gen_rtx_VEC_CONCAT (mode
, x
, x
);
32903 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
32912 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
32913 whose ONE_VAR element is VAR, and other elements are zero. Return true
32917 ix86_expand_vector_init_one_nonzero (bool mmx_ok
, enum machine_mode mode
,
32918 rtx target
, rtx var
, int one_var
)
32920 enum machine_mode vsimode
;
32923 bool use_vector_set
= false;
32928 /* For SSE4.1, we normally use vector set. But if the second
32929 element is zero and inter-unit moves are OK, we use movq
32931 use_vector_set
= (TARGET_64BIT
32933 && !(TARGET_INTER_UNIT_MOVES
32939 use_vector_set
= TARGET_SSE4_1
;
32942 use_vector_set
= TARGET_SSE2
;
32945 use_vector_set
= TARGET_SSE
|| TARGET_3DNOW_A
;
32952 use_vector_set
= TARGET_AVX
;
32955 /* Use ix86_expand_vector_set in 64bit mode only. */
32956 use_vector_set
= TARGET_AVX
&& TARGET_64BIT
;
32962 if (use_vector_set
)
32964 emit_insn (gen_rtx_SET (VOIDmode
, target
, CONST0_RTX (mode
)));
32965 var
= force_reg (GET_MODE_INNER (mode
), var
);
32966 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
32982 var
= force_reg (GET_MODE_INNER (mode
), var
);
32983 x
= gen_rtx_VEC_CONCAT (mode
, var
, CONST0_RTX (GET_MODE_INNER (mode
)));
32984 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
32989 if (!REG_P (target
) || REGNO (target
) < FIRST_PSEUDO_REGISTER
)
32990 new_target
= gen_reg_rtx (mode
);
32992 new_target
= target
;
32993 var
= force_reg (GET_MODE_INNER (mode
), var
);
32994 x
= gen_rtx_VEC_DUPLICATE (mode
, var
);
32995 x
= gen_rtx_VEC_MERGE (mode
, x
, CONST0_RTX (mode
), const1_rtx
);
32996 emit_insn (gen_rtx_SET (VOIDmode
, new_target
, x
));
32999 /* We need to shuffle the value to the correct position, so
33000 create a new pseudo to store the intermediate result. */
33002 /* With SSE2, we can use the integer shuffle insns. */
33003 if (mode
!= V4SFmode
&& TARGET_SSE2
)
33005 emit_insn (gen_sse2_pshufd_1 (new_target
, new_target
,
33007 GEN_INT (one_var
== 1 ? 0 : 1),
33008 GEN_INT (one_var
== 2 ? 0 : 1),
33009 GEN_INT (one_var
== 3 ? 0 : 1)));
33010 if (target
!= new_target
)
33011 emit_move_insn (target
, new_target
);
33015 /* Otherwise convert the intermediate result to V4SFmode and
33016 use the SSE1 shuffle instructions. */
33017 if (mode
!= V4SFmode
)
33019 tmp
= gen_reg_rtx (V4SFmode
);
33020 emit_move_insn (tmp
, gen_lowpart (V4SFmode
, new_target
));
33025 emit_insn (gen_sse_shufps_v4sf (tmp
, tmp
, tmp
,
33027 GEN_INT (one_var
== 1 ? 0 : 1),
33028 GEN_INT (one_var
== 2 ? 0+4 : 1+4),
33029 GEN_INT (one_var
== 3 ? 0+4 : 1+4)));
33031 if (mode
!= V4SFmode
)
33032 emit_move_insn (target
, gen_lowpart (V4SImode
, tmp
));
33033 else if (tmp
!= target
)
33034 emit_move_insn (target
, tmp
);
33036 else if (target
!= new_target
)
33037 emit_move_insn (target
, new_target
);
33042 vsimode
= V4SImode
;
33048 vsimode
= V2SImode
;
33054 /* Zero extend the variable element to SImode and recurse. */
33055 var
= convert_modes (SImode
, GET_MODE_INNER (mode
), var
, true);
33057 x
= gen_reg_rtx (vsimode
);
33058 if (!ix86_expand_vector_init_one_nonzero (mmx_ok
, vsimode
, x
,
33060 gcc_unreachable ();
33062 emit_move_insn (target
, gen_lowpart (mode
, x
));
33070 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
33071 consisting of the values in VALS. It is known that all elements
33072 except ONE_VAR are constants. Return true if successful. */
33075 ix86_expand_vector_init_one_var (bool mmx_ok
, enum machine_mode mode
,
33076 rtx target
, rtx vals
, int one_var
)
33078 rtx var
= XVECEXP (vals
, 0, one_var
);
33079 enum machine_mode wmode
;
33082 const_vec
= copy_rtx (vals
);
33083 XVECEXP (const_vec
, 0, one_var
) = CONST0_RTX (GET_MODE_INNER (mode
));
33084 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (const_vec
, 0));
33092 /* For the two element vectors, it's just as easy to use
33093 the general case. */
33097 /* Use ix86_expand_vector_set in 64bit mode only. */
33120 /* There's no way to set one QImode entry easily. Combine
33121 the variable value with its adjacent constant value, and
33122 promote to an HImode set. */
33123 x
= XVECEXP (vals
, 0, one_var
^ 1);
33126 var
= convert_modes (HImode
, QImode
, var
, true);
33127 var
= expand_simple_binop (HImode
, ASHIFT
, var
, GEN_INT (8),
33128 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
33129 x
= GEN_INT (INTVAL (x
) & 0xff);
33133 var
= convert_modes (HImode
, QImode
, var
, true);
33134 x
= gen_int_mode (INTVAL (x
) << 8, HImode
);
33136 if (x
!= const0_rtx
)
33137 var
= expand_simple_binop (HImode
, IOR
, var
, x
, var
,
33138 1, OPTAB_LIB_WIDEN
);
33140 x
= gen_reg_rtx (wmode
);
33141 emit_move_insn (x
, gen_lowpart (wmode
, const_vec
));
33142 ix86_expand_vector_set (mmx_ok
, x
, var
, one_var
>> 1);
33144 emit_move_insn (target
, gen_lowpart (mode
, x
));
33151 emit_move_insn (target
, const_vec
);
33152 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
33156 /* A subroutine of ix86_expand_vector_init_general. Use vector
33157 concatenate to handle the most general case: all values variable,
33158 and none identical. */
33161 ix86_expand_vector_init_concat (enum machine_mode mode
,
33162 rtx target
, rtx
*ops
, int n
)
33164 enum machine_mode cmode
, hmode
= VOIDmode
;
33165 rtx first
[8], second
[4];
33205 gcc_unreachable ();
33208 if (!register_operand (ops
[1], cmode
))
33209 ops
[1] = force_reg (cmode
, ops
[1]);
33210 if (!register_operand (ops
[0], cmode
))
33211 ops
[0] = force_reg (cmode
, ops
[0]);
33212 emit_insn (gen_rtx_SET (VOIDmode
, target
,
33213 gen_rtx_VEC_CONCAT (mode
, ops
[0],
33233 gcc_unreachable ();
33249 gcc_unreachable ();
33254 /* FIXME: We process inputs backward to help RA. PR 36222. */
33257 for (; i
> 0; i
-= 2, j
--)
33259 first
[j
] = gen_reg_rtx (cmode
);
33260 v
= gen_rtvec (2, ops
[i
- 1], ops
[i
]);
33261 ix86_expand_vector_init (false, first
[j
],
33262 gen_rtx_PARALLEL (cmode
, v
));
33268 gcc_assert (hmode
!= VOIDmode
);
33269 for (i
= j
= 0; i
< n
; i
+= 2, j
++)
33271 second
[j
] = gen_reg_rtx (hmode
);
33272 ix86_expand_vector_init_concat (hmode
, second
[j
],
33276 ix86_expand_vector_init_concat (mode
, target
, second
, n
);
33279 ix86_expand_vector_init_concat (mode
, target
, first
, n
);
33283 gcc_unreachable ();
33287 /* A subroutine of ix86_expand_vector_init_general. Use vector
33288 interleave to handle the most general case: all values variable,
33289 and none identical. */
33292 ix86_expand_vector_init_interleave (enum machine_mode mode
,
33293 rtx target
, rtx
*ops
, int n
)
33295 enum machine_mode first_imode
, second_imode
, third_imode
, inner_mode
;
33298 rtx (*gen_load_even
) (rtx
, rtx
, rtx
);
33299 rtx (*gen_interleave_first_low
) (rtx
, rtx
, rtx
);
33300 rtx (*gen_interleave_second_low
) (rtx
, rtx
, rtx
);
33305 gen_load_even
= gen_vec_setv8hi
;
33306 gen_interleave_first_low
= gen_vec_interleave_lowv4si
;
33307 gen_interleave_second_low
= gen_vec_interleave_lowv2di
;
33308 inner_mode
= HImode
;
33309 first_imode
= V4SImode
;
33310 second_imode
= V2DImode
;
33311 third_imode
= VOIDmode
;
33314 gen_load_even
= gen_vec_setv16qi
;
33315 gen_interleave_first_low
= gen_vec_interleave_lowv8hi
;
33316 gen_interleave_second_low
= gen_vec_interleave_lowv4si
;
33317 inner_mode
= QImode
;
33318 first_imode
= V8HImode
;
33319 second_imode
= V4SImode
;
33320 third_imode
= V2DImode
;
33323 gcc_unreachable ();
33326 for (i
= 0; i
< n
; i
++)
33328 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
33329 op0
= gen_reg_rtx (SImode
);
33330 emit_move_insn (op0
, gen_lowpart (SImode
, ops
[i
+ i
]));
33332 /* Insert the SImode value as low element of V4SImode vector. */
33333 op1
= gen_reg_rtx (V4SImode
);
33334 op0
= gen_rtx_VEC_MERGE (V4SImode
,
33335 gen_rtx_VEC_DUPLICATE (V4SImode
,
33337 CONST0_RTX (V4SImode
),
33339 emit_insn (gen_rtx_SET (VOIDmode
, op1
, op0
));
33341 /* Cast the V4SImode vector back to a vector in orignal mode. */
33342 op0
= gen_reg_rtx (mode
);
33343 emit_move_insn (op0
, gen_lowpart (mode
, op1
));
33345 /* Load even elements into the second positon. */
33346 emit_insn (gen_load_even (op0
,
33347 force_reg (inner_mode
,
33351 /* Cast vector to FIRST_IMODE vector. */
33352 ops
[i
] = gen_reg_rtx (first_imode
);
33353 emit_move_insn (ops
[i
], gen_lowpart (first_imode
, op0
));
33356 /* Interleave low FIRST_IMODE vectors. */
33357 for (i
= j
= 0; i
< n
; i
+= 2, j
++)
33359 op0
= gen_reg_rtx (first_imode
);
33360 emit_insn (gen_interleave_first_low (op0
, ops
[i
], ops
[i
+ 1]));
33362 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
33363 ops
[j
] = gen_reg_rtx (second_imode
);
33364 emit_move_insn (ops
[j
], gen_lowpart (second_imode
, op0
));
33367 /* Interleave low SECOND_IMODE vectors. */
33368 switch (second_imode
)
33371 for (i
= j
= 0; i
< n
/ 2; i
+= 2, j
++)
33373 op0
= gen_reg_rtx (second_imode
);
33374 emit_insn (gen_interleave_second_low (op0
, ops
[i
],
33377 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
33379 ops
[j
] = gen_reg_rtx (third_imode
);
33380 emit_move_insn (ops
[j
], gen_lowpart (third_imode
, op0
));
33382 second_imode
= V2DImode
;
33383 gen_interleave_second_low
= gen_vec_interleave_lowv2di
;
33387 op0
= gen_reg_rtx (second_imode
);
33388 emit_insn (gen_interleave_second_low (op0
, ops
[0],
33391 /* Cast the SECOND_IMODE vector back to a vector on original
33393 emit_insn (gen_rtx_SET (VOIDmode
, target
,
33394 gen_lowpart (mode
, op0
)));
33398 gcc_unreachable ();
33402 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
33403 all values variable, and none identical. */
33406 ix86_expand_vector_init_general (bool mmx_ok
, enum machine_mode mode
,
33407 rtx target
, rtx vals
)
33409 rtx ops
[32], op0
, op1
;
33410 enum machine_mode half_mode
= VOIDmode
;
33417 if (!mmx_ok
&& !TARGET_SSE
)
33429 n
= GET_MODE_NUNITS (mode
);
33430 for (i
= 0; i
< n
; i
++)
33431 ops
[i
] = XVECEXP (vals
, 0, i
);
33432 ix86_expand_vector_init_concat (mode
, target
, ops
, n
);
33436 half_mode
= V16QImode
;
33440 half_mode
= V8HImode
;
33444 n
= GET_MODE_NUNITS (mode
);
33445 for (i
= 0; i
< n
; i
++)
33446 ops
[i
] = XVECEXP (vals
, 0, i
);
33447 op0
= gen_reg_rtx (half_mode
);
33448 op1
= gen_reg_rtx (half_mode
);
33449 ix86_expand_vector_init_interleave (half_mode
, op0
, ops
,
33451 ix86_expand_vector_init_interleave (half_mode
, op1
,
33452 &ops
[n
>> 1], n
>> 2);
33453 emit_insn (gen_rtx_SET (VOIDmode
, target
,
33454 gen_rtx_VEC_CONCAT (mode
, op0
, op1
)));
33458 if (!TARGET_SSE4_1
)
33466 /* Don't use ix86_expand_vector_init_interleave if we can't
33467 move from GPR to SSE register directly. */
33468 if (!TARGET_INTER_UNIT_MOVES
)
33471 n
= GET_MODE_NUNITS (mode
);
33472 for (i
= 0; i
< n
; i
++)
33473 ops
[i
] = XVECEXP (vals
, 0, i
);
33474 ix86_expand_vector_init_interleave (mode
, target
, ops
, n
>> 1);
33482 gcc_unreachable ();
33486 int i
, j
, n_elts
, n_words
, n_elt_per_word
;
33487 enum machine_mode inner_mode
;
33488 rtx words
[4], shift
;
33490 inner_mode
= GET_MODE_INNER (mode
);
33491 n_elts
= GET_MODE_NUNITS (mode
);
33492 n_words
= GET_MODE_SIZE (mode
) / UNITS_PER_WORD
;
33493 n_elt_per_word
= n_elts
/ n_words
;
33494 shift
= GEN_INT (GET_MODE_BITSIZE (inner_mode
));
33496 for (i
= 0; i
< n_words
; ++i
)
33498 rtx word
= NULL_RTX
;
33500 for (j
= 0; j
< n_elt_per_word
; ++j
)
33502 rtx elt
= XVECEXP (vals
, 0, (i
+1)*n_elt_per_word
- j
- 1);
33503 elt
= convert_modes (word_mode
, inner_mode
, elt
, true);
33509 word
= expand_simple_binop (word_mode
, ASHIFT
, word
, shift
,
33510 word
, 1, OPTAB_LIB_WIDEN
);
33511 word
= expand_simple_binop (word_mode
, IOR
, word
, elt
,
33512 word
, 1, OPTAB_LIB_WIDEN
);
33520 emit_move_insn (target
, gen_lowpart (mode
, words
[0]));
33521 else if (n_words
== 2)
33523 rtx tmp
= gen_reg_rtx (mode
);
33524 emit_clobber (tmp
);
33525 emit_move_insn (gen_lowpart (word_mode
, tmp
), words
[0]);
33526 emit_move_insn (gen_highpart (word_mode
, tmp
), words
[1]);
33527 emit_move_insn (target
, tmp
);
33529 else if (n_words
== 4)
33531 rtx tmp
= gen_reg_rtx (V4SImode
);
33532 gcc_assert (word_mode
== SImode
);
33533 vals
= gen_rtx_PARALLEL (V4SImode
, gen_rtvec_v (4, words
));
33534 ix86_expand_vector_init_general (false, V4SImode
, tmp
, vals
);
33535 emit_move_insn (target
, gen_lowpart (mode
, tmp
));
33538 gcc_unreachable ();
33542 /* Initialize vector TARGET via VALS. Suppress the use of MMX
33543 instructions unless MMX_OK is true. */
33546 ix86_expand_vector_init (bool mmx_ok
, rtx target
, rtx vals
)
33548 enum machine_mode mode
= GET_MODE (target
);
33549 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
33550 int n_elts
= GET_MODE_NUNITS (mode
);
33551 int n_var
= 0, one_var
= -1;
33552 bool all_same
= true, all_const_zero
= true;
33556 for (i
= 0; i
< n_elts
; ++i
)
33558 x
= XVECEXP (vals
, 0, i
);
33559 if (!(CONST_INT_P (x
)
33560 || GET_CODE (x
) == CONST_DOUBLE
33561 || GET_CODE (x
) == CONST_FIXED
))
33562 n_var
++, one_var
= i
;
33563 else if (x
!= CONST0_RTX (inner_mode
))
33564 all_const_zero
= false;
33565 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
33569 /* Constants are best loaded from the constant pool. */
33572 emit_move_insn (target
, gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
33576 /* If all values are identical, broadcast the value. */
33578 && ix86_expand_vector_init_duplicate (mmx_ok
, mode
, target
,
33579 XVECEXP (vals
, 0, 0)))
33582 /* Values where only one field is non-constant are best loaded from
33583 the pool and overwritten via move later. */
33587 && ix86_expand_vector_init_one_nonzero (mmx_ok
, mode
, target
,
33588 XVECEXP (vals
, 0, one_var
),
33592 if (ix86_expand_vector_init_one_var (mmx_ok
, mode
, target
, vals
, one_var
))
33596 ix86_expand_vector_init_general (mmx_ok
, mode
, target
, vals
);
33600 ix86_expand_vector_set (bool mmx_ok
, rtx target
, rtx val
, int elt
)
33602 enum machine_mode mode
= GET_MODE (target
);
33603 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
33604 enum machine_mode half_mode
;
33605 bool use_vec_merge
= false;
33607 static rtx (*gen_extract
[6][2]) (rtx
, rtx
)
33609 { gen_vec_extract_lo_v32qi
, gen_vec_extract_hi_v32qi
},
33610 { gen_vec_extract_lo_v16hi
, gen_vec_extract_hi_v16hi
},
33611 { gen_vec_extract_lo_v8si
, gen_vec_extract_hi_v8si
},
33612 { gen_vec_extract_lo_v4di
, gen_vec_extract_hi_v4di
},
33613 { gen_vec_extract_lo_v8sf
, gen_vec_extract_hi_v8sf
},
33614 { gen_vec_extract_lo_v4df
, gen_vec_extract_hi_v4df
}
33616 static rtx (*gen_insert
[6][2]) (rtx
, rtx
, rtx
)
33618 { gen_vec_set_lo_v32qi
, gen_vec_set_hi_v32qi
},
33619 { gen_vec_set_lo_v16hi
, gen_vec_set_hi_v16hi
},
33620 { gen_vec_set_lo_v8si
, gen_vec_set_hi_v8si
},
33621 { gen_vec_set_lo_v4di
, gen_vec_set_hi_v4di
},
33622 { gen_vec_set_lo_v8sf
, gen_vec_set_hi_v8sf
},
33623 { gen_vec_set_lo_v4df
, gen_vec_set_hi_v4df
}
33633 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
33634 ix86_expand_vector_extract (true, tmp
, target
, 1 - elt
);
33636 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
33638 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
33639 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
33645 use_vec_merge
= TARGET_SSE4_1
&& TARGET_64BIT
;
33649 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
33650 ix86_expand_vector_extract (false, tmp
, target
, 1 - elt
);
33652 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
33654 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
33655 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
33662 /* For the two element vectors, we implement a VEC_CONCAT with
33663 the extraction of the other element. */
33665 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (1 - elt
)));
33666 tmp
= gen_rtx_VEC_SELECT (inner_mode
, target
, tmp
);
33669 op0
= val
, op1
= tmp
;
33671 op0
= tmp
, op1
= val
;
33673 tmp
= gen_rtx_VEC_CONCAT (mode
, op0
, op1
);
33674 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
33679 use_vec_merge
= TARGET_SSE4_1
;
33686 use_vec_merge
= true;
33690 /* tmp = target = A B C D */
33691 tmp
= copy_to_reg (target
);
33692 /* target = A A B B */
33693 emit_insn (gen_vec_interleave_lowv4sf (target
, target
, target
));
33694 /* target = X A B B */
33695 ix86_expand_vector_set (false, target
, val
, 0);
33696 /* target = A X C D */
33697 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
33698 const1_rtx
, const0_rtx
,
33699 GEN_INT (2+4), GEN_INT (3+4)));
33703 /* tmp = target = A B C D */
33704 tmp
= copy_to_reg (target
);
33705 /* tmp = X B C D */
33706 ix86_expand_vector_set (false, tmp
, val
, 0);
33707 /* target = A B X D */
33708 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
33709 const0_rtx
, const1_rtx
,
33710 GEN_INT (0+4), GEN_INT (3+4)));
33714 /* tmp = target = A B C D */
33715 tmp
= copy_to_reg (target
);
33716 /* tmp = X B C D */
33717 ix86_expand_vector_set (false, tmp
, val
, 0);
33718 /* target = A B X D */
33719 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
33720 const0_rtx
, const1_rtx
,
33721 GEN_INT (2+4), GEN_INT (0+4)));
33725 gcc_unreachable ();
33730 use_vec_merge
= TARGET_SSE4_1
;
33734 /* Element 0 handled by vec_merge below. */
33737 use_vec_merge
= true;
33743 /* With SSE2, use integer shuffles to swap element 0 and ELT,
33744 store into element 0, then shuffle them back. */
33748 order
[0] = GEN_INT (elt
);
33749 order
[1] = const1_rtx
;
33750 order
[2] = const2_rtx
;
33751 order
[3] = GEN_INT (3);
33752 order
[elt
] = const0_rtx
;
33754 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
33755 order
[1], order
[2], order
[3]));
33757 ix86_expand_vector_set (false, target
, val
, 0);
33759 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
33760 order
[1], order
[2], order
[3]));
33764 /* For SSE1, we have to reuse the V4SF code. */
33765 ix86_expand_vector_set (false, gen_lowpart (V4SFmode
, target
),
33766 gen_lowpart (SFmode
, val
), elt
);
33771 use_vec_merge
= TARGET_SSE2
;
33774 use_vec_merge
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
33778 use_vec_merge
= TARGET_SSE4_1
;
33785 half_mode
= V16QImode
;
33791 half_mode
= V8HImode
;
33797 half_mode
= V4SImode
;
33803 half_mode
= V2DImode
;
33809 half_mode
= V4SFmode
;
33815 half_mode
= V2DFmode
;
33821 /* Compute offset. */
33825 gcc_assert (i
<= 1);
33827 /* Extract the half. */
33828 tmp
= gen_reg_rtx (half_mode
);
33829 emit_insn (gen_extract
[j
][i
] (tmp
, target
));
33831 /* Put val in tmp at elt. */
33832 ix86_expand_vector_set (false, tmp
, val
, elt
);
33835 emit_insn (gen_insert
[j
][i
] (target
, target
, tmp
));
33844 tmp
= gen_rtx_VEC_DUPLICATE (mode
, val
);
33845 tmp
= gen_rtx_VEC_MERGE (mode
, tmp
, target
, GEN_INT (1 << elt
));
33846 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
33850 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
), false);
33852 emit_move_insn (mem
, target
);
33854 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
33855 emit_move_insn (tmp
, val
);
33857 emit_move_insn (target
, mem
);
33862 ix86_expand_vector_extract (bool mmx_ok
, rtx target
, rtx vec
, int elt
)
33864 enum machine_mode mode
= GET_MODE (vec
);
33865 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
33866 bool use_vec_extr
= false;
33879 use_vec_extr
= true;
33883 use_vec_extr
= TARGET_SSE4_1
;
33895 tmp
= gen_reg_rtx (mode
);
33896 emit_insn (gen_sse_shufps_v4sf (tmp
, vec
, vec
,
33897 GEN_INT (elt
), GEN_INT (elt
),
33898 GEN_INT (elt
+4), GEN_INT (elt
+4)));
33902 tmp
= gen_reg_rtx (mode
);
33903 emit_insn (gen_vec_interleave_highv4sf (tmp
, vec
, vec
));
33907 gcc_unreachable ();
33910 use_vec_extr
= true;
33915 use_vec_extr
= TARGET_SSE4_1
;
33929 tmp
= gen_reg_rtx (mode
);
33930 emit_insn (gen_sse2_pshufd_1 (tmp
, vec
,
33931 GEN_INT (elt
), GEN_INT (elt
),
33932 GEN_INT (elt
), GEN_INT (elt
)));
33936 tmp
= gen_reg_rtx (mode
);
33937 emit_insn (gen_vec_interleave_highv4si (tmp
, vec
, vec
));
33941 gcc_unreachable ();
33944 use_vec_extr
= true;
33949 /* For SSE1, we have to reuse the V4SF code. */
33950 ix86_expand_vector_extract (false, gen_lowpart (SFmode
, target
),
33951 gen_lowpart (V4SFmode
, vec
), elt
);
33957 use_vec_extr
= TARGET_SSE2
;
33960 use_vec_extr
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
33964 use_vec_extr
= TARGET_SSE4_1
;
33970 tmp
= gen_reg_rtx (V4SFmode
);
33972 emit_insn (gen_vec_extract_lo_v8sf (tmp
, vec
));
33974 emit_insn (gen_vec_extract_hi_v8sf (tmp
, vec
));
33975 ix86_expand_vector_extract (false, target
, tmp
, elt
& 3);
33983 tmp
= gen_reg_rtx (V2DFmode
);
33985 emit_insn (gen_vec_extract_lo_v4df (tmp
, vec
));
33987 emit_insn (gen_vec_extract_hi_v4df (tmp
, vec
));
33988 ix86_expand_vector_extract (false, target
, tmp
, elt
& 1);
33996 tmp
= gen_reg_rtx (V16QImode
);
33998 emit_insn (gen_vec_extract_lo_v32qi (tmp
, vec
));
34000 emit_insn (gen_vec_extract_hi_v32qi (tmp
, vec
));
34001 ix86_expand_vector_extract (false, target
, tmp
, elt
& 15);
34009 tmp
= gen_reg_rtx (V8HImode
);
34011 emit_insn (gen_vec_extract_lo_v16hi (tmp
, vec
));
34013 emit_insn (gen_vec_extract_hi_v16hi (tmp
, vec
));
34014 ix86_expand_vector_extract (false, target
, tmp
, elt
& 7);
34022 tmp
= gen_reg_rtx (V4SImode
);
34024 emit_insn (gen_vec_extract_lo_v8si (tmp
, vec
));
34026 emit_insn (gen_vec_extract_hi_v8si (tmp
, vec
));
34027 ix86_expand_vector_extract (false, target
, tmp
, elt
& 3);
34035 tmp
= gen_reg_rtx (V2DImode
);
34037 emit_insn (gen_vec_extract_lo_v4di (tmp
, vec
));
34039 emit_insn (gen_vec_extract_hi_v4di (tmp
, vec
));
34040 ix86_expand_vector_extract (false, target
, tmp
, elt
& 1);
34046 /* ??? Could extract the appropriate HImode element and shift. */
34053 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (elt
)));
34054 tmp
= gen_rtx_VEC_SELECT (inner_mode
, vec
, tmp
);
34056 /* Let the rtl optimizers know about the zero extension performed. */
34057 if (inner_mode
== QImode
|| inner_mode
== HImode
)
34059 tmp
= gen_rtx_ZERO_EXTEND (SImode
, tmp
);
34060 target
= gen_lowpart (SImode
, target
);
34063 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
34067 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
), false);
34069 emit_move_insn (mem
, vec
);
34071 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
34072 emit_move_insn (target
, tmp
);
34076 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
34077 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
34078 The upper bits of DEST are undefined, though they shouldn't cause
34079 exceptions (some bits from src or all zeros are ok). */
34082 emit_reduc_half (rtx dest
, rtx src
, int i
)
34085 switch (GET_MODE (src
))
34089 tem
= gen_sse_movhlps (dest
, src
, src
);
34091 tem
= gen_sse_shufps_v4sf (dest
, src
, src
, const1_rtx
, const1_rtx
,
34092 GEN_INT (1 + 4), GEN_INT (1 + 4));
34095 tem
= gen_vec_interleave_highv2df (dest
, src
, src
);
34101 tem
= gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode
, dest
),
34102 gen_lowpart (V1TImode
, src
),
34107 tem
= gen_avx_vperm2f128v8sf3 (dest
, src
, src
, const1_rtx
);
34109 tem
= gen_avx_shufps256 (dest
, src
, src
,
34110 GEN_INT (i
== 128 ? 2 + (3 << 2) : 1));
34114 tem
= gen_avx_vperm2f128v4df3 (dest
, src
, src
, const1_rtx
);
34116 tem
= gen_avx_shufpd256 (dest
, src
, src
, const1_rtx
);
34123 tem
= gen_avx2_permv2ti (gen_lowpart (V4DImode
, dest
),
34124 gen_lowpart (V4DImode
, src
),
34125 gen_lowpart (V4DImode
, src
),
34128 tem
= gen_avx2_lshrv2ti3 (gen_lowpart (V2TImode
, dest
),
34129 gen_lowpart (V2TImode
, src
),
34133 gcc_unreachable ();
34138 /* Expand a vector reduction. FN is the binary pattern to reduce;
34139 DEST is the destination; IN is the input vector. */
34142 ix86_expand_reduc (rtx (*fn
) (rtx
, rtx
, rtx
), rtx dest
, rtx in
)
34144 rtx half
, dst
, vec
= in
;
34145 enum machine_mode mode
= GET_MODE (in
);
34148 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
34150 && mode
== V8HImode
34151 && fn
== gen_uminv8hi3
)
34153 emit_insn (gen_sse4_1_phminposuw (dest
, in
));
34157 for (i
= GET_MODE_BITSIZE (mode
);
34158 i
> GET_MODE_BITSIZE (GET_MODE_INNER (mode
));
34161 half
= gen_reg_rtx (mode
);
34162 emit_reduc_half (half
, vec
, i
);
34163 if (i
== GET_MODE_BITSIZE (GET_MODE_INNER (mode
)) * 2)
34166 dst
= gen_reg_rtx (mode
);
34167 emit_insn (fn (dst
, half
, vec
));
34172 /* Target hook for scalar_mode_supported_p. */
34174 ix86_scalar_mode_supported_p (enum machine_mode mode
)
34176 if (DECIMAL_FLOAT_MODE_P (mode
))
34177 return default_decimal_float_supported_p ();
34178 else if (mode
== TFmode
)
34181 return default_scalar_mode_supported_p (mode
);
34184 /* Implements target hook vector_mode_supported_p. */
34186 ix86_vector_mode_supported_p (enum machine_mode mode
)
34188 if (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
34190 if (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
34192 if (TARGET_AVX
&& VALID_AVX256_REG_MODE (mode
))
34194 if (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
))
34196 if (TARGET_3DNOW
&& VALID_MMX_REG_MODE_3DNOW (mode
))
34201 /* Target hook for c_mode_for_suffix. */
34202 static enum machine_mode
34203 ix86_c_mode_for_suffix (char suffix
)
34213 /* Worker function for TARGET_MD_ASM_CLOBBERS.
34215 We do this in the new i386 backend to maintain source compatibility
34216 with the old cc0-based compiler. */
34219 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED
,
34220 tree inputs ATTRIBUTE_UNUSED
,
34223 clobbers
= tree_cons (NULL_TREE
, build_string (5, "flags"),
34225 clobbers
= tree_cons (NULL_TREE
, build_string (4, "fpsr"),
34230 /* Implements target vector targetm.asm.encode_section_info. */
34232 static void ATTRIBUTE_UNUSED
34233 ix86_encode_section_info (tree decl
, rtx rtl
, int first
)
34235 default_encode_section_info (decl
, rtl
, first
);
34237 if (TREE_CODE (decl
) == VAR_DECL
34238 && (TREE_STATIC (decl
) || DECL_EXTERNAL (decl
))
34239 && ix86_in_large_data_p (decl
))
34240 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= SYMBOL_FLAG_FAR_ADDR
;
34243 /* Worker function for REVERSE_CONDITION. */
34246 ix86_reverse_condition (enum rtx_code code
, enum machine_mode mode
)
34248 return (mode
!= CCFPmode
&& mode
!= CCFPUmode
34249 ? reverse_condition (code
)
34250 : reverse_condition_maybe_unordered (code
));
34253 /* Output code to perform an x87 FP register move, from OPERANDS[1]
34257 output_387_reg_move (rtx insn
, rtx
*operands
)
34259 if (REG_P (operands
[0]))
34261 if (REG_P (operands
[1])
34262 && find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
34264 if (REGNO (operands
[0]) == FIRST_STACK_REG
)
34265 return output_387_ffreep (operands
, 0);
34266 return "fstp\t%y0";
34268 if (STACK_TOP_P (operands
[0]))
34269 return "fld%Z1\t%y1";
34272 else if (MEM_P (operands
[0]))
34274 gcc_assert (REG_P (operands
[1]));
34275 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
34276 return "fstp%Z0\t%y0";
34279 /* There is no non-popping store to memory for XFmode.
34280 So if we need one, follow the store with a load. */
34281 if (GET_MODE (operands
[0]) == XFmode
)
34282 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
34284 return "fst%Z0\t%y0";
34291 /* Output code to perform a conditional jump to LABEL, if C2 flag in
34292 FP status register is set. */
34295 ix86_emit_fp_unordered_jump (rtx label
)
34297 rtx reg
= gen_reg_rtx (HImode
);
34300 emit_insn (gen_x86_fnstsw_1 (reg
));
34302 if (TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_insn_for_size_p ()))
34304 emit_insn (gen_x86_sahf_1 (reg
));
34306 temp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
34307 temp
= gen_rtx_UNORDERED (VOIDmode
, temp
, const0_rtx
);
34311 emit_insn (gen_testqi_ext_ccno_0 (reg
, GEN_INT (0x04)));
34313 temp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
34314 temp
= gen_rtx_NE (VOIDmode
, temp
, const0_rtx
);
34317 temp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, temp
,
34318 gen_rtx_LABEL_REF (VOIDmode
, label
),
34320 temp
= gen_rtx_SET (VOIDmode
, pc_rtx
, temp
);
34322 emit_jump_insn (temp
);
34323 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
34326 /* Output code to perform a log1p XFmode calculation. */
34328 void ix86_emit_i387_log1p (rtx op0
, rtx op1
)
34330 rtx label1
= gen_label_rtx ();
34331 rtx label2
= gen_label_rtx ();
34333 rtx tmp
= gen_reg_rtx (XFmode
);
34334 rtx tmp2
= gen_reg_rtx (XFmode
);
34337 emit_insn (gen_absxf2 (tmp
, op1
));
34338 test
= gen_rtx_GE (VOIDmode
, tmp
,
34339 CONST_DOUBLE_FROM_REAL_VALUE (
34340 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode
),
34342 emit_jump_insn (gen_cbranchxf4 (test
, XEXP (test
, 0), XEXP (test
, 1), label1
));
34344 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
34345 emit_insn (gen_fyl2xp1xf3_i387 (op0
, op1
, tmp2
));
34346 emit_jump (label2
);
34348 emit_label (label1
);
34349 emit_move_insn (tmp
, CONST1_RTX (XFmode
));
34350 emit_insn (gen_addxf3 (tmp
, op1
, tmp
));
34351 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
34352 emit_insn (gen_fyl2xxf3_i387 (op0
, tmp
, tmp2
));
34354 emit_label (label2
);
34357 /* Emit code for round calculation. */
34358 void ix86_emit_i387_round (rtx op0
, rtx op1
)
34360 enum machine_mode inmode
= GET_MODE (op1
);
34361 enum machine_mode outmode
= GET_MODE (op0
);
34362 rtx e1
, e2
, res
, tmp
, tmp1
, half
;
34363 rtx scratch
= gen_reg_rtx (HImode
);
34364 rtx flags
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
34365 rtx jump_label
= gen_label_rtx ();
34367 rtx (*gen_abs
) (rtx
, rtx
);
34368 rtx (*gen_neg
) (rtx
, rtx
);
34373 gen_abs
= gen_abssf2
;
34376 gen_abs
= gen_absdf2
;
34379 gen_abs
= gen_absxf2
;
34382 gcc_unreachable ();
34388 gen_neg
= gen_negsf2
;
34391 gen_neg
= gen_negdf2
;
34394 gen_neg
= gen_negxf2
;
34397 gen_neg
= gen_neghi2
;
34400 gen_neg
= gen_negsi2
;
34403 gen_neg
= gen_negdi2
;
34406 gcc_unreachable ();
34409 e1
= gen_reg_rtx (inmode
);
34410 e2
= gen_reg_rtx (inmode
);
34411 res
= gen_reg_rtx (outmode
);
34413 half
= CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf
, inmode
);
34415 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
34417 /* scratch = fxam(op1) */
34418 emit_insn (gen_rtx_SET (VOIDmode
, scratch
,
34419 gen_rtx_UNSPEC (HImode
, gen_rtvec (1, op1
),
34421 /* e1 = fabs(op1) */
34422 emit_insn (gen_abs (e1
, op1
));
34424 /* e2 = e1 + 0.5 */
34425 half
= force_reg (inmode
, half
);
34426 emit_insn (gen_rtx_SET (VOIDmode
, e2
,
34427 gen_rtx_PLUS (inmode
, e1
, half
)));
34429 /* res = floor(e2) */
34430 if (inmode
!= XFmode
)
34432 tmp1
= gen_reg_rtx (XFmode
);
34434 emit_insn (gen_rtx_SET (VOIDmode
, tmp1
,
34435 gen_rtx_FLOAT_EXTEND (XFmode
, e2
)));
34445 rtx tmp0
= gen_reg_rtx (XFmode
);
34447 emit_insn (gen_frndintxf2_floor (tmp0
, tmp1
));
34449 emit_insn (gen_rtx_SET (VOIDmode
, res
,
34450 gen_rtx_UNSPEC (outmode
, gen_rtvec (1, tmp0
),
34451 UNSPEC_TRUNC_NOOP
)));
34455 emit_insn (gen_frndintxf2_floor (res
, tmp1
));
34458 emit_insn (gen_lfloorxfhi2 (res
, tmp1
));
34461 emit_insn (gen_lfloorxfsi2 (res
, tmp1
));
34464 emit_insn (gen_lfloorxfdi2 (res
, tmp1
));
34467 gcc_unreachable ();
34470 /* flags = signbit(a) */
34471 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x02)));
34473 /* if (flags) then res = -res */
34474 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
,
34475 gen_rtx_EQ (VOIDmode
, flags
, const0_rtx
),
34476 gen_rtx_LABEL_REF (VOIDmode
, jump_label
),
34478 insn
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
34479 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
34480 JUMP_LABEL (insn
) = jump_label
;
34482 emit_insn (gen_neg (res
, res
));
34484 emit_label (jump_label
);
34485 LABEL_NUSES (jump_label
) = 1;
34487 emit_move_insn (op0
, res
);
34490 /* Output code to perform a Newton-Rhapson approximation of a single precision
34491 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
34493 void ix86_emit_swdivsf (rtx res
, rtx a
, rtx b
, enum machine_mode mode
)
34495 rtx x0
, x1
, e0
, e1
;
34497 x0
= gen_reg_rtx (mode
);
34498 e0
= gen_reg_rtx (mode
);
34499 e1
= gen_reg_rtx (mode
);
34500 x1
= gen_reg_rtx (mode
);
34502 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
34504 b
= force_reg (mode
, b
);
34506 /* x0 = rcp(b) estimate */
34507 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
34508 gen_rtx_UNSPEC (mode
, gen_rtvec (1, b
),
34511 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
34512 gen_rtx_MULT (mode
, x0
, b
)));
34515 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
34516 gen_rtx_MULT (mode
, x0
, e0
)));
34519 emit_insn (gen_rtx_SET (VOIDmode
, e1
,
34520 gen_rtx_PLUS (mode
, x0
, x0
)));
34523 emit_insn (gen_rtx_SET (VOIDmode
, x1
,
34524 gen_rtx_MINUS (mode
, e1
, e0
)));
34527 emit_insn (gen_rtx_SET (VOIDmode
, res
,
34528 gen_rtx_MULT (mode
, a
, x1
)));
34531 /* Output code to perform a Newton-Rhapson approximation of a
34532 single precision floating point [reciprocal] square root. */
34534 void ix86_emit_swsqrtsf (rtx res
, rtx a
, enum machine_mode mode
,
34537 rtx x0
, e0
, e1
, e2
, e3
, mthree
, mhalf
;
34540 x0
= gen_reg_rtx (mode
);
34541 e0
= gen_reg_rtx (mode
);
34542 e1
= gen_reg_rtx (mode
);
34543 e2
= gen_reg_rtx (mode
);
34544 e3
= gen_reg_rtx (mode
);
34546 real_from_integer (&r
, VOIDmode
, -3, -1, 0);
34547 mthree
= CONST_DOUBLE_FROM_REAL_VALUE (r
, SFmode
);
34549 real_arithmetic (&r
, NEGATE_EXPR
, &dconsthalf
, NULL
);
34550 mhalf
= CONST_DOUBLE_FROM_REAL_VALUE (r
, SFmode
);
34552 if (VECTOR_MODE_P (mode
))
34554 mthree
= ix86_build_const_vector (mode
, true, mthree
);
34555 mhalf
= ix86_build_const_vector (mode
, true, mhalf
);
34558 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
34559 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
34561 a
= force_reg (mode
, a
);
34563 /* x0 = rsqrt(a) estimate */
34564 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
34565 gen_rtx_UNSPEC (mode
, gen_rtvec (1, a
),
34568 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
34573 zero
= gen_reg_rtx (mode
);
34574 mask
= gen_reg_rtx (mode
);
34576 zero
= force_reg (mode
, CONST0_RTX(mode
));
34577 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
34578 gen_rtx_NE (mode
, zero
, a
)));
34580 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
34581 gen_rtx_AND (mode
, x0
, mask
)));
34585 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
34586 gen_rtx_MULT (mode
, x0
, a
)));
34588 emit_insn (gen_rtx_SET (VOIDmode
, e1
,
34589 gen_rtx_MULT (mode
, e0
, x0
)));
34592 mthree
= force_reg (mode
, mthree
);
34593 emit_insn (gen_rtx_SET (VOIDmode
, e2
,
34594 gen_rtx_PLUS (mode
, e1
, mthree
)));
34596 mhalf
= force_reg (mode
, mhalf
);
34598 /* e3 = -.5 * x0 */
34599 emit_insn (gen_rtx_SET (VOIDmode
, e3
,
34600 gen_rtx_MULT (mode
, x0
, mhalf
)));
34602 /* e3 = -.5 * e0 */
34603 emit_insn (gen_rtx_SET (VOIDmode
, e3
,
34604 gen_rtx_MULT (mode
, e0
, mhalf
)));
34605 /* ret = e2 * e3 */
34606 emit_insn (gen_rtx_SET (VOIDmode
, res
,
34607 gen_rtx_MULT (mode
, e2
, e3
)));
34610 #ifdef TARGET_SOLARIS
34611 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
34614 i386_solaris_elf_named_section (const char *name
, unsigned int flags
,
34617 /* With Binutils 2.15, the "@unwind" marker must be specified on
34618 every occurrence of the ".eh_frame" section, not just the first
34621 && strcmp (name
, ".eh_frame") == 0)
34623 fprintf (asm_out_file
, "\t.section\t%s,\"%s\",@unwind\n", name
,
34624 flags
& SECTION_WRITE
? "aw" : "a");
34629 if (HAVE_COMDAT_GROUP
&& flags
& SECTION_LINKONCE
)
34631 solaris_elf_asm_comdat_section (name
, flags
, decl
);
34636 default_elf_asm_named_section (name
, flags
, decl
);
34638 #endif /* TARGET_SOLARIS */
34640 /* Return the mangling of TYPE if it is an extended fundamental type. */
34642 static const char *
34643 ix86_mangle_type (const_tree type
)
34645 type
= TYPE_MAIN_VARIANT (type
);
34647 if (TREE_CODE (type
) != VOID_TYPE
&& TREE_CODE (type
) != BOOLEAN_TYPE
34648 && TREE_CODE (type
) != INTEGER_TYPE
&& TREE_CODE (type
) != REAL_TYPE
)
34651 switch (TYPE_MODE (type
))
34654 /* __float128 is "g". */
34657 /* "long double" or __float80 is "e". */
34664 /* For 32-bit code we can save PIC register setup by using
34665 __stack_chk_fail_local hidden function instead of calling
34666 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
34667 register, so it is better to call __stack_chk_fail directly. */
34669 static tree ATTRIBUTE_UNUSED
34670 ix86_stack_protect_fail (void)
34672 return TARGET_64BIT
34673 ? default_external_stack_protect_fail ()
34674 : default_hidden_stack_protect_fail ();
34677 /* Select a format to encode pointers in exception handling data. CODE
34678 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
34679 true if the symbol may be affected by dynamic relocations.
34681 ??? All x86 object file formats are capable of representing this.
34682 After all, the relocation needed is the same as for the call insn.
34683 Whether or not a particular assembler allows us to enter such, I
34684 guess we'll have to see. */
34686 asm_preferred_eh_data_format (int code
, int global
)
34690 int type
= DW_EH_PE_sdata8
;
34692 || ix86_cmodel
== CM_SMALL_PIC
34693 || (ix86_cmodel
== CM_MEDIUM_PIC
&& (global
|| code
)))
34694 type
= DW_EH_PE_sdata4
;
34695 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
34697 if (ix86_cmodel
== CM_SMALL
34698 || (ix86_cmodel
== CM_MEDIUM
&& code
))
34699 return DW_EH_PE_udata4
;
34700 return DW_EH_PE_absptr
;
34703 /* Expand copysign from SIGN to the positive value ABS_VALUE
34704 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
34707 ix86_sse_copysign_to_positive (rtx result
, rtx abs_value
, rtx sign
, rtx mask
)
34709 enum machine_mode mode
= GET_MODE (sign
);
34710 rtx sgn
= gen_reg_rtx (mode
);
34711 if (mask
== NULL_RTX
)
34713 enum machine_mode vmode
;
34715 if (mode
== SFmode
)
34717 else if (mode
== DFmode
)
34722 mask
= ix86_build_signbit_mask (vmode
, VECTOR_MODE_P (mode
), false);
34723 if (!VECTOR_MODE_P (mode
))
34725 /* We need to generate a scalar mode mask in this case. */
34726 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
34727 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
34728 mask
= gen_reg_rtx (mode
);
34729 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
34733 mask
= gen_rtx_NOT (mode
, mask
);
34734 emit_insn (gen_rtx_SET (VOIDmode
, sgn
,
34735 gen_rtx_AND (mode
, mask
, sign
)));
34736 emit_insn (gen_rtx_SET (VOIDmode
, result
,
34737 gen_rtx_IOR (mode
, abs_value
, sgn
)));
34740 /* Expand fabs (OP0) and return a new rtx that holds the result. The
34741 mask for masking out the sign-bit is stored in *SMASK, if that is
34744 ix86_expand_sse_fabs (rtx op0
, rtx
*smask
)
34746 enum machine_mode vmode
, mode
= GET_MODE (op0
);
34749 xa
= gen_reg_rtx (mode
);
34750 if (mode
== SFmode
)
34752 else if (mode
== DFmode
)
34756 mask
= ix86_build_signbit_mask (vmode
, VECTOR_MODE_P (mode
), true);
34757 if (!VECTOR_MODE_P (mode
))
34759 /* We need to generate a scalar mode mask in this case. */
34760 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
34761 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
34762 mask
= gen_reg_rtx (mode
);
34763 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
34765 emit_insn (gen_rtx_SET (VOIDmode
, xa
,
34766 gen_rtx_AND (mode
, op0
, mask
)));
34774 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
34775 swapping the operands if SWAP_OPERANDS is true. The expanded
34776 code is a forward jump to a newly created label in case the
34777 comparison is true. The generated label rtx is returned. */
34779 ix86_expand_sse_compare_and_jump (enum rtx_code code
, rtx op0
, rtx op1
,
34780 bool swap_operands
)
34791 label
= gen_label_rtx ();
34792 tmp
= gen_rtx_REG (CCFPUmode
, FLAGS_REG
);
34793 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
34794 gen_rtx_COMPARE (CCFPUmode
, op0
, op1
)));
34795 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
, tmp
, const0_rtx
);
34796 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
34797 gen_rtx_LABEL_REF (VOIDmode
, label
), pc_rtx
);
34798 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
34799 JUMP_LABEL (tmp
) = label
;
34804 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
34805 using comparison code CODE. Operands are swapped for the comparison if
34806 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
34808 ix86_expand_sse_compare_mask (enum rtx_code code
, rtx op0
, rtx op1
,
34809 bool swap_operands
)
34811 rtx (*insn
)(rtx
, rtx
, rtx
, rtx
);
34812 enum machine_mode mode
= GET_MODE (op0
);
34813 rtx mask
= gen_reg_rtx (mode
);
34822 insn
= mode
== DFmode
? gen_setcc_df_sse
: gen_setcc_sf_sse
;
34824 emit_insn (insn (mask
, op0
, op1
,
34825 gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
34829 /* Generate and return a rtx of mode MODE for 2**n where n is the number
34830 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
34832 ix86_gen_TWO52 (enum machine_mode mode
)
34834 REAL_VALUE_TYPE TWO52r
;
34837 real_ldexp (&TWO52r
, &dconst1
, mode
== DFmode
? 52 : 23);
34838 TWO52
= const_double_from_real_value (TWO52r
, mode
);
34839 TWO52
= force_reg (mode
, TWO52
);
34844 /* Expand SSE sequence for computing lround from OP1 storing
34847 ix86_expand_lround (rtx op0
, rtx op1
)
34849 /* C code for the stuff we're doing below:
34850 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
34853 enum machine_mode mode
= GET_MODE (op1
);
34854 const struct real_format
*fmt
;
34855 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
34858 /* load nextafter (0.5, 0.0) */
34859 fmt
= REAL_MODE_FORMAT (mode
);
34860 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
34861 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
34863 /* adj = copysign (0.5, op1) */
34864 adj
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
34865 ix86_sse_copysign_to_positive (adj
, adj
, force_reg (mode
, op1
), NULL_RTX
);
34867 /* adj = op1 + adj */
34868 adj
= expand_simple_binop (mode
, PLUS
, adj
, op1
, NULL_RTX
, 0, OPTAB_DIRECT
);
34870 /* op0 = (imode)adj */
34871 expand_fix (op0
, adj
, 0);
34874 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
34877 ix86_expand_lfloorceil (rtx op0
, rtx op1
, bool do_floor
)
34879 /* C code for the stuff we're doing below (for do_floor):
34881 xi -= (double)xi > op1 ? 1 : 0;
34884 enum machine_mode fmode
= GET_MODE (op1
);
34885 enum machine_mode imode
= GET_MODE (op0
);
34886 rtx ireg
, freg
, label
, tmp
;
34888 /* reg = (long)op1 */
34889 ireg
= gen_reg_rtx (imode
);
34890 expand_fix (ireg
, op1
, 0);
34892 /* freg = (double)reg */
34893 freg
= gen_reg_rtx (fmode
);
34894 expand_float (freg
, ireg
, 0);
34896 /* ireg = (freg > op1) ? ireg - 1 : ireg */
34897 label
= ix86_expand_sse_compare_and_jump (UNLE
,
34898 freg
, op1
, !do_floor
);
34899 tmp
= expand_simple_binop (imode
, do_floor
? MINUS
: PLUS
,
34900 ireg
, const1_rtx
, NULL_RTX
, 0, OPTAB_DIRECT
);
34901 emit_move_insn (ireg
, tmp
);
34903 emit_label (label
);
34904 LABEL_NUSES (label
) = 1;
34906 emit_move_insn (op0
, ireg
);
34909 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
34910 result in OPERAND0. */
34912 ix86_expand_rint (rtx operand0
, rtx operand1
)
34914 /* C code for the stuff we're doing below:
34915 xa = fabs (operand1);
34916 if (!isless (xa, 2**52))
34918 xa = xa + 2**52 - 2**52;
34919 return copysign (xa, operand1);
34921 enum machine_mode mode
= GET_MODE (operand0
);
34922 rtx res
, xa
, label
, TWO52
, mask
;
34924 res
= gen_reg_rtx (mode
);
34925 emit_move_insn (res
, operand1
);
34927 /* xa = abs (operand1) */
34928 xa
= ix86_expand_sse_fabs (res
, &mask
);
34930 /* if (!isless (xa, TWO52)) goto label; */
34931 TWO52
= ix86_gen_TWO52 (mode
);
34932 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
34934 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
34935 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
34937 ix86_sse_copysign_to_positive (res
, xa
, res
, mask
);
34939 emit_label (label
);
34940 LABEL_NUSES (label
) = 1;
34942 emit_move_insn (operand0
, res
);
34945 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
34948 ix86_expand_floorceildf_32 (rtx operand0
, rtx operand1
, bool do_floor
)
34950 /* C code for the stuff we expand below.
34951 double xa = fabs (x), x2;
34952 if (!isless (xa, TWO52))
34954 xa = xa + TWO52 - TWO52;
34955 x2 = copysign (xa, x);
34964 enum machine_mode mode
= GET_MODE (operand0
);
34965 rtx xa
, TWO52
, tmp
, label
, one
, res
, mask
;
34967 TWO52
= ix86_gen_TWO52 (mode
);
34969 /* Temporary for holding the result, initialized to the input
34970 operand to ease control flow. */
34971 res
= gen_reg_rtx (mode
);
34972 emit_move_insn (res
, operand1
);
34974 /* xa = abs (operand1) */
34975 xa
= ix86_expand_sse_fabs (res
, &mask
);
34977 /* if (!isless (xa, TWO52)) goto label; */
34978 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
34980 /* xa = xa + TWO52 - TWO52; */
34981 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
34982 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
34984 /* xa = copysign (xa, operand1) */
34985 ix86_sse_copysign_to_positive (xa
, xa
, res
, mask
);
34987 /* generate 1.0 or -1.0 */
34988 one
= force_reg (mode
,
34989 const_double_from_real_value (do_floor
34990 ? dconst1
: dconstm1
, mode
));
34992 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
34993 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
34994 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
34995 gen_rtx_AND (mode
, one
, tmp
)));
34996 /* We always need to subtract here to preserve signed zero. */
34997 tmp
= expand_simple_binop (mode
, MINUS
,
34998 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
34999 emit_move_insn (res
, tmp
);
35001 emit_label (label
);
35002 LABEL_NUSES (label
) = 1;
35004 emit_move_insn (operand0
, res
);
35007 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
35010 ix86_expand_floorceil (rtx operand0
, rtx operand1
, bool do_floor
)
35012 /* C code for the stuff we expand below.
35013 double xa = fabs (x), x2;
35014 if (!isless (xa, TWO52))
35016 x2 = (double)(long)x;
35023 if (HONOR_SIGNED_ZEROS (mode))
35024 return copysign (x2, x);
35027 enum machine_mode mode
= GET_MODE (operand0
);
35028 rtx xa
, xi
, TWO52
, tmp
, label
, one
, res
, mask
;
35030 TWO52
= ix86_gen_TWO52 (mode
);
35032 /* Temporary for holding the result, initialized to the input
35033 operand to ease control flow. */
35034 res
= gen_reg_rtx (mode
);
35035 emit_move_insn (res
, operand1
);
35037 /* xa = abs (operand1) */
35038 xa
= ix86_expand_sse_fabs (res
, &mask
);
35040 /* if (!isless (xa, TWO52)) goto label; */
35041 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
35043 /* xa = (double)(long)x */
35044 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
35045 expand_fix (xi
, res
, 0);
35046 expand_float (xa
, xi
, 0);
35049 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
35051 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
35052 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
35053 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
35054 gen_rtx_AND (mode
, one
, tmp
)));
35055 tmp
= expand_simple_binop (mode
, do_floor
? MINUS
: PLUS
,
35056 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
35057 emit_move_insn (res
, tmp
);
35059 if (HONOR_SIGNED_ZEROS (mode
))
35060 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
35062 emit_label (label
);
35063 LABEL_NUSES (label
) = 1;
35065 emit_move_insn (operand0
, res
);
35068 /* Expand SSE sequence for computing round from OPERAND1 storing
35069 into OPERAND0. Sequence that works without relying on DImode truncation
35070 via cvttsd2siq that is only available on 64bit targets. */
35072 ix86_expand_rounddf_32 (rtx operand0
, rtx operand1
)
35074 /* C code for the stuff we expand below.
35075 double xa = fabs (x), xa2, x2;
35076 if (!isless (xa, TWO52))
35078 Using the absolute value and copying back sign makes
35079 -0.0 -> -0.0 correct.
35080 xa2 = xa + TWO52 - TWO52;
35085 else if (dxa > 0.5)
35087 x2 = copysign (xa2, x);
35090 enum machine_mode mode
= GET_MODE (operand0
);
35091 rtx xa
, xa2
, dxa
, TWO52
, tmp
, label
, half
, mhalf
, one
, res
, mask
;
35093 TWO52
= ix86_gen_TWO52 (mode
);
35095 /* Temporary for holding the result, initialized to the input
35096 operand to ease control flow. */
35097 res
= gen_reg_rtx (mode
);
35098 emit_move_insn (res
, operand1
);
35100 /* xa = abs (operand1) */
35101 xa
= ix86_expand_sse_fabs (res
, &mask
);
35103 /* if (!isless (xa, TWO52)) goto label; */
35104 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
35106 /* xa2 = xa + TWO52 - TWO52; */
35107 xa2
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
35108 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, TWO52
, xa2
, 0, OPTAB_DIRECT
);
35110 /* dxa = xa2 - xa; */
35111 dxa
= expand_simple_binop (mode
, MINUS
, xa2
, xa
, NULL_RTX
, 0, OPTAB_DIRECT
);
35113 /* generate 0.5, 1.0 and -0.5 */
35114 half
= force_reg (mode
, const_double_from_real_value (dconsthalf
, mode
));
35115 one
= expand_simple_binop (mode
, PLUS
, half
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
35116 mhalf
= expand_simple_binop (mode
, MINUS
, half
, one
, NULL_RTX
,
35120 tmp
= gen_reg_rtx (mode
);
35121 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
35122 tmp
= ix86_expand_sse_compare_mask (UNGT
, dxa
, half
, false);
35123 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
35124 gen_rtx_AND (mode
, one
, tmp
)));
35125 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
35126 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
35127 tmp
= ix86_expand_sse_compare_mask (UNGE
, mhalf
, dxa
, false);
35128 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
35129 gen_rtx_AND (mode
, one
, tmp
)));
35130 xa2
= expand_simple_binop (mode
, PLUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
35132 /* res = copysign (xa2, operand1) */
35133 ix86_sse_copysign_to_positive (res
, xa2
, force_reg (mode
, operand1
), mask
);
35135 emit_label (label
);
35136 LABEL_NUSES (label
) = 1;
35138 emit_move_insn (operand0
, res
);
35141 /* Expand SSE sequence for computing trunc from OPERAND1 storing
35144 ix86_expand_trunc (rtx operand0
, rtx operand1
)
35146 /* C code for SSE variant we expand below.
35147 double xa = fabs (x), x2;
35148 if (!isless (xa, TWO52))
35150 x2 = (double)(long)x;
35151 if (HONOR_SIGNED_ZEROS (mode))
35152 return copysign (x2, x);
35155 enum machine_mode mode
= GET_MODE (operand0
);
35156 rtx xa
, xi
, TWO52
, label
, res
, mask
;
35158 TWO52
= ix86_gen_TWO52 (mode
);
35160 /* Temporary for holding the result, initialized to the input
35161 operand to ease control flow. */
35162 res
= gen_reg_rtx (mode
);
35163 emit_move_insn (res
, operand1
);
35165 /* xa = abs (operand1) */
35166 xa
= ix86_expand_sse_fabs (res
, &mask
);
35168 /* if (!isless (xa, TWO52)) goto label; */
35169 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
35171 /* x = (double)(long)x */
35172 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
35173 expand_fix (xi
, res
, 0);
35174 expand_float (res
, xi
, 0);
35176 if (HONOR_SIGNED_ZEROS (mode
))
35177 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
35179 emit_label (label
);
35180 LABEL_NUSES (label
) = 1;
35182 emit_move_insn (operand0
, res
);
35185 /* Expand SSE sequence for computing trunc from OPERAND1 storing
35188 ix86_expand_truncdf_32 (rtx operand0
, rtx operand1
)
35190 enum machine_mode mode
= GET_MODE (operand0
);
35191 rtx xa
, mask
, TWO52
, label
, one
, res
, smask
, tmp
;
35193 /* C code for SSE variant we expand below.
35194 double xa = fabs (x), x2;
35195 if (!isless (xa, TWO52))
35197 xa2 = xa + TWO52 - TWO52;
35201 x2 = copysign (xa2, x);
35205 TWO52
= ix86_gen_TWO52 (mode
);
35207 /* Temporary for holding the result, initialized to the input
35208 operand to ease control flow. */
35209 res
= gen_reg_rtx (mode
);
35210 emit_move_insn (res
, operand1
);
35212 /* xa = abs (operand1) */
35213 xa
= ix86_expand_sse_fabs (res
, &smask
);
35215 /* if (!isless (xa, TWO52)) goto label; */
35216 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
35218 /* res = xa + TWO52 - TWO52; */
35219 tmp
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
35220 tmp
= expand_simple_binop (mode
, MINUS
, tmp
, TWO52
, tmp
, 0, OPTAB_DIRECT
);
35221 emit_move_insn (res
, tmp
);
35224 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
35226 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
35227 mask
= ix86_expand_sse_compare_mask (UNGT
, res
, xa
, false);
35228 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
35229 gen_rtx_AND (mode
, mask
, one
)));
35230 tmp
= expand_simple_binop (mode
, MINUS
,
35231 res
, mask
, NULL_RTX
, 0, OPTAB_DIRECT
);
35232 emit_move_insn (res
, tmp
);
35234 /* res = copysign (res, operand1) */
35235 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), smask
);
35237 emit_label (label
);
35238 LABEL_NUSES (label
) = 1;
35240 emit_move_insn (operand0
, res
);
35243 /* Expand SSE sequence for computing round from OPERAND1 storing
35246 ix86_expand_round (rtx operand0
, rtx operand1
)
35248 /* C code for the stuff we're doing below:
35249 double xa = fabs (x);
35250 if (!isless (xa, TWO52))
35252 xa = (double)(long)(xa + nextafter (0.5, 0.0));
35253 return copysign (xa, x);
35255 enum machine_mode mode
= GET_MODE (operand0
);
35256 rtx res
, TWO52
, xa
, label
, xi
, half
, mask
;
35257 const struct real_format
*fmt
;
35258 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
35260 /* Temporary for holding the result, initialized to the input
35261 operand to ease control flow. */
35262 res
= gen_reg_rtx (mode
);
35263 emit_move_insn (res
, operand1
);
35265 TWO52
= ix86_gen_TWO52 (mode
);
35266 xa
= ix86_expand_sse_fabs (res
, &mask
);
35267 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
35269 /* load nextafter (0.5, 0.0) */
35270 fmt
= REAL_MODE_FORMAT (mode
);
35271 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
35272 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
35274 /* xa = xa + 0.5 */
35275 half
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
35276 xa
= expand_simple_binop (mode
, PLUS
, xa
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
35278 /* xa = (double)(int64_t)xa */
35279 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
35280 expand_fix (xi
, xa
, 0);
35281 expand_float (xa
, xi
, 0);
35283 /* res = copysign (xa, operand1) */
35284 ix86_sse_copysign_to_positive (res
, xa
, force_reg (mode
, operand1
), mask
);
35286 emit_label (label
);
35287 LABEL_NUSES (label
) = 1;
35289 emit_move_insn (operand0
, res
);
35292 /* Expand SSE sequence for computing round
35293 from OP1 storing into OP0 using sse4 round insn. */
35295 ix86_expand_round_sse4 (rtx op0
, rtx op1
)
35297 enum machine_mode mode
= GET_MODE (op0
);
35298 rtx e1
, e2
, res
, half
;
35299 const struct real_format
*fmt
;
35300 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
35301 rtx (*gen_copysign
) (rtx
, rtx
, rtx
);
35302 rtx (*gen_round
) (rtx
, rtx
, rtx
);
35307 gen_copysign
= gen_copysignsf3
;
35308 gen_round
= gen_sse4_1_roundsf2
;
35311 gen_copysign
= gen_copysigndf3
;
35312 gen_round
= gen_sse4_1_rounddf2
;
35315 gcc_unreachable ();
35318 /* round (a) = trunc (a + copysign (0.5, a)) */
35320 /* load nextafter (0.5, 0.0) */
35321 fmt
= REAL_MODE_FORMAT (mode
);
35322 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
35323 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
35324 half
= const_double_from_real_value (pred_half
, mode
);
35326 /* e1 = copysign (0.5, op1) */
35327 e1
= gen_reg_rtx (mode
);
35328 emit_insn (gen_copysign (e1
, half
, op1
));
35330 /* e2 = op1 + e1 */
35331 e2
= expand_simple_binop (mode
, PLUS
, op1
, e1
, NULL_RTX
, 0, OPTAB_DIRECT
);
35333 /* res = trunc (e2) */
35334 res
= gen_reg_rtx (mode
);
35335 emit_insn (gen_round (res
, e2
, GEN_INT (ROUND_TRUNC
)));
35337 emit_move_insn (op0
, res
);
35341 /* Table of valid machine attributes. */
35342 static const struct attribute_spec ix86_attribute_table
[] =
35344 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
35345 affects_type_identity } */
35346 /* Stdcall attribute says callee is responsible for popping arguments
35347 if they are not variable. */
35348 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
35350 /* Fastcall attribute says callee is responsible for popping arguments
35351 if they are not variable. */
35352 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
35354 /* Thiscall attribute says callee is responsible for popping arguments
35355 if they are not variable. */
35356 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
35358 /* Cdecl attribute says the callee is a normal C declaration */
35359 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
35361 /* Regparm attribute specifies how many integer arguments are to be
35362 passed in registers. */
35363 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute
,
35365 /* Sseregparm attribute says we are using x86_64 calling conventions
35366 for FP arguments. */
35367 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
35369 /* The transactional memory builtins are implicitly regparm or fastcall
35370 depending on the ABI. Override the generic do-nothing attribute that
35371 these builtins were declared with. */
35372 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute
,
35374 /* force_align_arg_pointer says this function realigns the stack at entry. */
35375 { (const char *)&ix86_force_align_arg_pointer_string
, 0, 0,
35376 false, true, true, ix86_handle_cconv_attribute
, false },
35377 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
35378 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
, false },
35379 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
, false },
35380 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute
,
35383 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
,
35385 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
,
35387 #ifdef SUBTARGET_ATTRIBUTE_TABLE
35388 SUBTARGET_ATTRIBUTE_TABLE
,
35390 /* ms_abi and sysv_abi calling convention function attributes. */
35391 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute
, true },
35392 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute
, true },
35393 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute
,
35395 { "callee_pop_aggregate_return", 1, 1, false, true, true,
35396 ix86_handle_callee_pop_aggregate_return
, true },
35398 { NULL
, 0, 0, false, false, false, NULL
, false }
35401 /* Implement targetm.vectorize.builtin_vectorization_cost. */
35403 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
35404 tree vectype ATTRIBUTE_UNUSED
,
35405 int misalign ATTRIBUTE_UNUSED
)
35407 switch (type_of_cost
)
35410 return ix86_cost
->scalar_stmt_cost
;
35413 return ix86_cost
->scalar_load_cost
;
35416 return ix86_cost
->scalar_store_cost
;
35419 return ix86_cost
->vec_stmt_cost
;
35422 return ix86_cost
->vec_align_load_cost
;
35425 return ix86_cost
->vec_store_cost
;
35427 case vec_to_scalar
:
35428 return ix86_cost
->vec_to_scalar_cost
;
35430 case scalar_to_vec
:
35431 return ix86_cost
->scalar_to_vec_cost
;
35433 case unaligned_load
:
35434 case unaligned_store
:
35435 return ix86_cost
->vec_unalign_load_cost
;
35437 case cond_branch_taken
:
35438 return ix86_cost
->cond_taken_branch_cost
;
35440 case cond_branch_not_taken
:
35441 return ix86_cost
->cond_not_taken_branch_cost
;
35444 case vec_promote_demote
:
35445 return ix86_cost
->vec_stmt_cost
;
35448 gcc_unreachable ();
35452 /* Construct (set target (vec_select op0 (parallel perm))) and
35453 return true if that's a valid instruction in the active ISA. */
35456 expand_vselect (rtx target
, rtx op0
, const unsigned char *perm
, unsigned nelt
)
35458 rtx rperm
[MAX_VECT_LEN
], x
;
35461 for (i
= 0; i
< nelt
; ++i
)
35462 rperm
[i
] = GEN_INT (perm
[i
]);
35464 x
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (nelt
, rperm
));
35465 x
= gen_rtx_VEC_SELECT (GET_MODE (target
), op0
, x
);
35466 x
= gen_rtx_SET (VOIDmode
, target
, x
);
35469 if (recog_memoized (x
) < 0)
35477 /* Similar, but generate a vec_concat from op0 and op1 as well. */
35480 expand_vselect_vconcat (rtx target
, rtx op0
, rtx op1
,
35481 const unsigned char *perm
, unsigned nelt
)
35483 enum machine_mode v2mode
;
35486 v2mode
= GET_MODE_2XWIDER_MODE (GET_MODE (op0
));
35487 x
= gen_rtx_VEC_CONCAT (v2mode
, op0
, op1
);
35488 return expand_vselect (target
, x
, perm
, nelt
);
35491 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
35492 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
35495 expand_vec_perm_blend (struct expand_vec_perm_d
*d
)
35497 enum machine_mode vmode
= d
->vmode
;
35498 unsigned i
, mask
, nelt
= d
->nelt
;
35499 rtx target
, op0
, op1
, x
;
35500 rtx rperm
[32], vperm
;
35502 if (d
->op0
== d
->op1
)
35504 if (TARGET_AVX2
&& GET_MODE_SIZE (vmode
) == 32)
35506 else if (TARGET_AVX
&& (vmode
== V4DFmode
|| vmode
== V8SFmode
))
35508 else if (TARGET_SSE4_1
&& GET_MODE_SIZE (vmode
) == 16)
35513 /* This is a blend, not a permute. Elements must stay in their
35514 respective lanes. */
35515 for (i
= 0; i
< nelt
; ++i
)
35517 unsigned e
= d
->perm
[i
];
35518 if (!(e
== i
|| e
== i
+ nelt
))
35525 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
35526 decision should be extracted elsewhere, so that we only try that
35527 sequence once all budget==3 options have been tried. */
35528 target
= d
->target
;
35541 for (i
= 0; i
< nelt
; ++i
)
35542 mask
|= (d
->perm
[i
] >= nelt
) << i
;
35546 for (i
= 0; i
< 2; ++i
)
35547 mask
|= (d
->perm
[i
] >= 2 ? 15 : 0) << (i
* 4);
35552 for (i
= 0; i
< 4; ++i
)
35553 mask
|= (d
->perm
[i
] >= 4 ? 3 : 0) << (i
* 2);
35558 /* See if bytes move in pairs so we can use pblendw with
35559 an immediate argument, rather than pblendvb with a vector
35561 for (i
= 0; i
< 16; i
+= 2)
35562 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
35565 for (i
= 0; i
< nelt
; ++i
)
35566 rperm
[i
] = (d
->perm
[i
] < nelt
? const0_rtx
: constm1_rtx
);
35569 vperm
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
35570 vperm
= force_reg (vmode
, vperm
);
35572 if (GET_MODE_SIZE (vmode
) == 16)
35573 emit_insn (gen_sse4_1_pblendvb (target
, op0
, op1
, vperm
));
35575 emit_insn (gen_avx2_pblendvb (target
, op0
, op1
, vperm
));
35579 for (i
= 0; i
< 8; ++i
)
35580 mask
|= (d
->perm
[i
* 2] >= 16) << i
;
35585 target
= gen_lowpart (vmode
, target
);
35586 op0
= gen_lowpart (vmode
, op0
);
35587 op1
= gen_lowpart (vmode
, op1
);
35591 /* See if bytes move in pairs. If not, vpblendvb must be used. */
35592 for (i
= 0; i
< 32; i
+= 2)
35593 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
35595 /* See if bytes move in quadruplets. If yes, vpblendd
35596 with immediate can be used. */
35597 for (i
= 0; i
< 32; i
+= 4)
35598 if (d
->perm
[i
] + 2 != d
->perm
[i
+ 2])
35602 /* See if bytes move the same in both lanes. If yes,
35603 vpblendw with immediate can be used. */
35604 for (i
= 0; i
< 16; i
+= 2)
35605 if (d
->perm
[i
] + 16 != d
->perm
[i
+ 16])
35608 /* Use vpblendw. */
35609 for (i
= 0; i
< 16; ++i
)
35610 mask
|= (d
->perm
[i
* 2] >= 32) << i
;
35615 /* Use vpblendd. */
35616 for (i
= 0; i
< 8; ++i
)
35617 mask
|= (d
->perm
[i
* 4] >= 32) << i
;
35622 /* See if words move in pairs. If yes, vpblendd can be used. */
35623 for (i
= 0; i
< 16; i
+= 2)
35624 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
35628 /* See if words move the same in both lanes. If not,
35629 vpblendvb must be used. */
35630 for (i
= 0; i
< 8; i
++)
35631 if (d
->perm
[i
] + 8 != d
->perm
[i
+ 8])
35633 /* Use vpblendvb. */
35634 for (i
= 0; i
< 32; ++i
)
35635 rperm
[i
] = (d
->perm
[i
/ 2] < 16 ? const0_rtx
: constm1_rtx
);
35639 target
= gen_lowpart (vmode
, target
);
35640 op0
= gen_lowpart (vmode
, op0
);
35641 op1
= gen_lowpart (vmode
, op1
);
35642 goto finish_pblendvb
;
35645 /* Use vpblendw. */
35646 for (i
= 0; i
< 16; ++i
)
35647 mask
|= (d
->perm
[i
] >= 16) << i
;
35651 /* Use vpblendd. */
35652 for (i
= 0; i
< 8; ++i
)
35653 mask
|= (d
->perm
[i
* 2] >= 16) << i
;
35658 /* Use vpblendd. */
35659 for (i
= 0; i
< 4; ++i
)
35660 mask
|= (d
->perm
[i
] >= 4 ? 3 : 0) << (i
* 2);
35665 gcc_unreachable ();
35668 /* This matches five different patterns with the different modes. */
35669 x
= gen_rtx_VEC_MERGE (vmode
, op1
, op0
, GEN_INT (mask
));
35670 x
= gen_rtx_SET (VOIDmode
, target
, x
);
35676 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
35677 in terms of the variable form of vpermilps.
35679 Note that we will have already failed the immediate input vpermilps,
35680 which requires that the high and low part shuffle be identical; the
35681 variable form doesn't require that. */
35684 expand_vec_perm_vpermil (struct expand_vec_perm_d
*d
)
35686 rtx rperm
[8], vperm
;
35689 if (!TARGET_AVX
|| d
->vmode
!= V8SFmode
|| d
->op0
!= d
->op1
)
35692 /* We can only permute within the 128-bit lane. */
35693 for (i
= 0; i
< 8; ++i
)
35695 unsigned e
= d
->perm
[i
];
35696 if (i
< 4 ? e
>= 4 : e
< 4)
35703 for (i
= 0; i
< 8; ++i
)
35705 unsigned e
= d
->perm
[i
];
35707 /* Within each 128-bit lane, the elements of op0 are numbered
35708 from 0 and the elements of op1 are numbered from 4. */
35714 rperm
[i
] = GEN_INT (e
);
35717 vperm
= gen_rtx_CONST_VECTOR (V8SImode
, gen_rtvec_v (8, rperm
));
35718 vperm
= force_reg (V8SImode
, vperm
);
35719 emit_insn (gen_avx_vpermilvarv8sf3 (d
->target
, d
->op0
, vperm
));
35724 /* Return true if permutation D can be performed as VMODE permutation
35728 valid_perm_using_mode_p (enum machine_mode vmode
, struct expand_vec_perm_d
*d
)
35730 unsigned int i
, j
, chunk
;
35732 if (GET_MODE_CLASS (vmode
) != MODE_VECTOR_INT
35733 || GET_MODE_CLASS (d
->vmode
) != MODE_VECTOR_INT
35734 || GET_MODE_SIZE (vmode
) != GET_MODE_SIZE (d
->vmode
))
35737 if (GET_MODE_NUNITS (vmode
) >= d
->nelt
)
35740 chunk
= d
->nelt
/ GET_MODE_NUNITS (vmode
);
35741 for (i
= 0; i
< d
->nelt
; i
+= chunk
)
35742 if (d
->perm
[i
] & (chunk
- 1))
35745 for (j
= 1; j
< chunk
; ++j
)
35746 if (d
->perm
[i
] + j
!= d
->perm
[i
+ j
])
35752 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
35753 in terms of pshufb, vpperm, vpermq, vpermd or vperm2i128. */
35756 expand_vec_perm_pshufb (struct expand_vec_perm_d
*d
)
35758 unsigned i
, nelt
, eltsz
, mask
;
35759 unsigned char perm
[32];
35760 enum machine_mode vmode
= V16QImode
;
35761 rtx rperm
[32], vperm
, target
, op0
, op1
;
35765 if (d
->op0
!= d
->op1
)
35767 if (!TARGET_XOP
|| GET_MODE_SIZE (d
->vmode
) != 16)
35770 && valid_perm_using_mode_p (V2TImode
, d
))
35775 /* Use vperm2i128 insn. The pattern uses
35776 V4DImode instead of V2TImode. */
35777 target
= gen_lowpart (V4DImode
, d
->target
);
35778 op0
= gen_lowpart (V4DImode
, d
->op0
);
35779 op1
= gen_lowpart (V4DImode
, d
->op1
);
35781 = GEN_INT (((d
->perm
[0] & (nelt
/ 2)) ? 1 : 0)
35782 || ((d
->perm
[nelt
/ 2] & (nelt
/ 2)) ? 2 : 0));
35783 emit_insn (gen_avx2_permv2ti (target
, op0
, op1
, rperm
[0]));
35791 if (GET_MODE_SIZE (d
->vmode
) == 16)
35796 else if (GET_MODE_SIZE (d
->vmode
) == 32)
35801 /* V4DImode should be already handled through
35802 expand_vselect by vpermq instruction. */
35803 gcc_assert (d
->vmode
!= V4DImode
);
35806 if (d
->vmode
== V8SImode
35807 || d
->vmode
== V16HImode
35808 || d
->vmode
== V32QImode
)
35810 /* First see if vpermq can be used for
35811 V8SImode/V16HImode/V32QImode. */
35812 if (valid_perm_using_mode_p (V4DImode
, d
))
35814 for (i
= 0; i
< 4; i
++)
35815 perm
[i
] = (d
->perm
[i
* nelt
/ 4] * 4 / nelt
) & 3;
35818 return expand_vselect (gen_lowpart (V4DImode
, d
->target
),
35819 gen_lowpart (V4DImode
, d
->op0
),
35823 /* Next see if vpermd can be used. */
35824 if (valid_perm_using_mode_p (V8SImode
, d
))
35828 if (vmode
== V32QImode
)
35830 /* vpshufb only works intra lanes, it is not
35831 possible to shuffle bytes in between the lanes. */
35832 for (i
= 0; i
< nelt
; ++i
)
35833 if ((d
->perm
[i
] ^ i
) & (nelt
/ 2))
35844 if (vmode
== V8SImode
)
35845 for (i
= 0; i
< 8; ++i
)
35846 rperm
[i
] = GEN_INT ((d
->perm
[i
* nelt
/ 8] * 8 / nelt
) & 7);
35849 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
35850 if (d
->op0
!= d
->op1
)
35851 mask
= 2 * nelt
- 1;
35852 else if (vmode
== V16QImode
)
35855 mask
= nelt
/ 2 - 1;
35857 for (i
= 0; i
< nelt
; ++i
)
35859 unsigned j
, e
= d
->perm
[i
] & mask
;
35860 for (j
= 0; j
< eltsz
; ++j
)
35861 rperm
[i
* eltsz
+ j
] = GEN_INT (e
* eltsz
+ j
);
35865 vperm
= gen_rtx_CONST_VECTOR (vmode
,
35866 gen_rtvec_v (GET_MODE_NUNITS (vmode
), rperm
));
35867 vperm
= force_reg (vmode
, vperm
);
35869 target
= gen_lowpart (vmode
, d
->target
);
35870 op0
= gen_lowpart (vmode
, d
->op0
);
35871 if (d
->op0
== d
->op1
)
35873 if (vmode
== V16QImode
)
35874 emit_insn (gen_ssse3_pshufbv16qi3 (target
, op0
, vperm
));
35875 else if (vmode
== V32QImode
)
35876 emit_insn (gen_avx2_pshufbv32qi3 (target
, op0
, vperm
));
35878 emit_insn (gen_avx2_permvarv8si (target
, vperm
, op0
));
35882 op1
= gen_lowpart (vmode
, d
->op1
);
35883 emit_insn (gen_xop_pperm (target
, op0
, op1
, vperm
));
35889 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
35890 in a single instruction. */
35893 expand_vec_perm_1 (struct expand_vec_perm_d
*d
)
35895 unsigned i
, nelt
= d
->nelt
;
35896 unsigned char perm2
[MAX_VECT_LEN
];
35898 /* Check plain VEC_SELECT first, because AVX has instructions that could
35899 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
35900 input where SEL+CONCAT may not. */
35901 if (d
->op0
== d
->op1
)
35903 int mask
= nelt
- 1;
35904 bool identity_perm
= true;
35905 bool broadcast_perm
= true;
35907 for (i
= 0; i
< nelt
; i
++)
35909 perm2
[i
] = d
->perm
[i
] & mask
;
35911 identity_perm
= false;
35913 broadcast_perm
= false;
35919 emit_move_insn (d
->target
, d
->op0
);
35922 else if (broadcast_perm
&& TARGET_AVX2
)
35924 /* Use vpbroadcast{b,w,d}. */
35925 rtx op
= d
->op0
, (*gen
) (rtx
, rtx
) = NULL
;
35929 op
= gen_lowpart (V16QImode
, op
);
35930 gen
= gen_avx2_pbroadcastv32qi
;
35933 op
= gen_lowpart (V8HImode
, op
);
35934 gen
= gen_avx2_pbroadcastv16hi
;
35937 op
= gen_lowpart (V4SImode
, op
);
35938 gen
= gen_avx2_pbroadcastv8si
;
35941 gen
= gen_avx2_pbroadcastv16qi
;
35944 gen
= gen_avx2_pbroadcastv8hi
;
35946 /* For other modes prefer other shuffles this function creates. */
35952 emit_insn (gen (d
->target
, op
));
35957 if (expand_vselect (d
->target
, d
->op0
, perm2
, nelt
))
35960 /* There are plenty of patterns in sse.md that are written for
35961 SEL+CONCAT and are not replicated for a single op. Perhaps
35962 that should be changed, to avoid the nastiness here. */
35964 /* Recognize interleave style patterns, which means incrementing
35965 every other permutation operand. */
35966 for (i
= 0; i
< nelt
; i
+= 2)
35968 perm2
[i
] = d
->perm
[i
] & mask
;
35969 perm2
[i
+ 1] = (d
->perm
[i
+ 1] & mask
) + nelt
;
35971 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op0
, perm2
, nelt
))
35974 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
35977 for (i
= 0; i
< nelt
; i
+= 4)
35979 perm2
[i
+ 0] = d
->perm
[i
+ 0] & mask
;
35980 perm2
[i
+ 1] = d
->perm
[i
+ 1] & mask
;
35981 perm2
[i
+ 2] = (d
->perm
[i
+ 2] & mask
) + nelt
;
35982 perm2
[i
+ 3] = (d
->perm
[i
+ 3] & mask
) + nelt
;
35985 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op0
, perm2
, nelt
))
35990 /* Finally, try the fully general two operand permute. */
35991 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op1
, d
->perm
, nelt
))
35994 /* Recognize interleave style patterns with reversed operands. */
35995 if (d
->op0
!= d
->op1
)
35997 for (i
= 0; i
< nelt
; ++i
)
35999 unsigned e
= d
->perm
[i
];
36007 if (expand_vselect_vconcat (d
->target
, d
->op1
, d
->op0
, perm2
, nelt
))
36011 /* Try the SSE4.1 blend variable merge instructions. */
36012 if (expand_vec_perm_blend (d
))
36015 /* Try one of the AVX vpermil variable permutations. */
36016 if (expand_vec_perm_vpermil (d
))
36019 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
36020 vpshufb, vpermd or vpermq variable permutation. */
36021 if (expand_vec_perm_pshufb (d
))
36027 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
36028 in terms of a pair of pshuflw + pshufhw instructions. */
36031 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d
*d
)
36033 unsigned char perm2
[MAX_VECT_LEN
];
36037 if (d
->vmode
!= V8HImode
|| d
->op0
!= d
->op1
)
36040 /* The two permutations only operate in 64-bit lanes. */
36041 for (i
= 0; i
< 4; ++i
)
36042 if (d
->perm
[i
] >= 4)
36044 for (i
= 4; i
< 8; ++i
)
36045 if (d
->perm
[i
] < 4)
36051 /* Emit the pshuflw. */
36052 memcpy (perm2
, d
->perm
, 4);
36053 for (i
= 4; i
< 8; ++i
)
36055 ok
= expand_vselect (d
->target
, d
->op0
, perm2
, 8);
36058 /* Emit the pshufhw. */
36059 memcpy (perm2
+ 4, d
->perm
+ 4, 4);
36060 for (i
= 0; i
< 4; ++i
)
36062 ok
= expand_vselect (d
->target
, d
->target
, perm2
, 8);
36068 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
36069 the permutation using the SSSE3 palignr instruction. This succeeds
36070 when all of the elements in PERM fit within one vector and we merely
36071 need to shift them down so that a single vector permutation has a
36072 chance to succeed. */
36075 expand_vec_perm_palignr (struct expand_vec_perm_d
*d
)
36077 unsigned i
, nelt
= d
->nelt
;
36082 /* Even with AVX, palignr only operates on 128-bit vectors. */
36083 if (!TARGET_SSSE3
|| GET_MODE_SIZE (d
->vmode
) != 16)
36086 min
= nelt
, max
= 0;
36087 for (i
= 0; i
< nelt
; ++i
)
36089 unsigned e
= d
->perm
[i
];
36095 if (min
== 0 || max
- min
>= nelt
)
36098 /* Given that we have SSSE3, we know we'll be able to implement the
36099 single operand permutation after the palignr with pshufb. */
36103 shift
= GEN_INT (min
* GET_MODE_BITSIZE (GET_MODE_INNER (d
->vmode
)));
36104 emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode
, d
->target
),
36105 gen_lowpart (TImode
, d
->op1
),
36106 gen_lowpart (TImode
, d
->op0
), shift
));
36108 d
->op0
= d
->op1
= d
->target
;
36111 for (i
= 0; i
< nelt
; ++i
)
36113 unsigned e
= d
->perm
[i
] - min
;
36119 /* Test for the degenerate case where the alignment by itself
36120 produces the desired permutation. */
36124 ok
= expand_vec_perm_1 (d
);
36130 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d
*d
);
36132 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
36133 a two vector permutation into a single vector permutation by using
36134 an interleave operation to merge the vectors. */
36137 expand_vec_perm_interleave2 (struct expand_vec_perm_d
*d
)
36139 struct expand_vec_perm_d dremap
, dfinal
;
36140 unsigned i
, nelt
= d
->nelt
, nelt2
= nelt
/ 2;
36141 unsigned HOST_WIDE_INT contents
;
36142 unsigned char remap
[2 * MAX_VECT_LEN
];
36144 bool ok
, same_halves
= false;
36146 if (GET_MODE_SIZE (d
->vmode
) == 16)
36148 if (d
->op0
== d
->op1
)
36151 else if (GET_MODE_SIZE (d
->vmode
) == 32)
36155 /* For 32-byte modes allow even d->op0 == d->op1.
36156 The lack of cross-lane shuffling in some instructions
36157 might prevent a single insn shuffle. */
36159 dfinal
.testing_p
= true;
36160 /* If expand_vec_perm_interleave3 can expand this into
36161 a 3 insn sequence, give up and let it be expanded as
36162 3 insn sequence. While that is one insn longer,
36163 it doesn't need a memory operand and in the common
36164 case that both interleave low and high permutations
36165 with the same operands are adjacent needs 4 insns
36166 for both after CSE. */
36167 if (expand_vec_perm_interleave3 (&dfinal
))
36173 /* Examine from whence the elements come. */
36175 for (i
= 0; i
< nelt
; ++i
)
36176 contents
|= ((unsigned HOST_WIDE_INT
) 1) << d
->perm
[i
];
36178 memset (remap
, 0xff, sizeof (remap
));
36181 if (GET_MODE_SIZE (d
->vmode
) == 16)
36183 unsigned HOST_WIDE_INT h1
, h2
, h3
, h4
;
36185 /* Split the two input vectors into 4 halves. */
36186 h1
= (((unsigned HOST_WIDE_INT
) 1) << nelt2
) - 1;
36191 /* If the elements from the low halves use interleave low, and similarly
36192 for interleave high. If the elements are from mis-matched halves, we
36193 can use shufps for V4SF/V4SI or do a DImode shuffle. */
36194 if ((contents
& (h1
| h3
)) == contents
)
36197 for (i
= 0; i
< nelt2
; ++i
)
36200 remap
[i
+ nelt
] = i
* 2 + 1;
36201 dremap
.perm
[i
* 2] = i
;
36202 dremap
.perm
[i
* 2 + 1] = i
+ nelt
;
36204 if (!TARGET_SSE2
&& d
->vmode
== V4SImode
)
36205 dremap
.vmode
= V4SFmode
;
36207 else if ((contents
& (h2
| h4
)) == contents
)
36210 for (i
= 0; i
< nelt2
; ++i
)
36212 remap
[i
+ nelt2
] = i
* 2;
36213 remap
[i
+ nelt
+ nelt2
] = i
* 2 + 1;
36214 dremap
.perm
[i
* 2] = i
+ nelt2
;
36215 dremap
.perm
[i
* 2 + 1] = i
+ nelt
+ nelt2
;
36217 if (!TARGET_SSE2
&& d
->vmode
== V4SImode
)
36218 dremap
.vmode
= V4SFmode
;
36220 else if ((contents
& (h1
| h4
)) == contents
)
36223 for (i
= 0; i
< nelt2
; ++i
)
36226 remap
[i
+ nelt
+ nelt2
] = i
+ nelt2
;
36227 dremap
.perm
[i
] = i
;
36228 dremap
.perm
[i
+ nelt2
] = i
+ nelt
+ nelt2
;
36233 dremap
.vmode
= V2DImode
;
36235 dremap
.perm
[0] = 0;
36236 dremap
.perm
[1] = 3;
36239 else if ((contents
& (h2
| h3
)) == contents
)
36242 for (i
= 0; i
< nelt2
; ++i
)
36244 remap
[i
+ nelt2
] = i
;
36245 remap
[i
+ nelt
] = i
+ nelt2
;
36246 dremap
.perm
[i
] = i
+ nelt2
;
36247 dremap
.perm
[i
+ nelt2
] = i
+ nelt
;
36252 dremap
.vmode
= V2DImode
;
36254 dremap
.perm
[0] = 1;
36255 dremap
.perm
[1] = 2;
36263 unsigned int nelt4
= nelt
/ 4, nzcnt
= 0;
36264 unsigned HOST_WIDE_INT q
[8];
36265 unsigned int nonzero_halves
[4];
36267 /* Split the two input vectors into 8 quarters. */
36268 q
[0] = (((unsigned HOST_WIDE_INT
) 1) << nelt4
) - 1;
36269 for (i
= 1; i
< 8; ++i
)
36270 q
[i
] = q
[0] << (nelt4
* i
);
36271 for (i
= 0; i
< 4; ++i
)
36272 if (((q
[2 * i
] | q
[2 * i
+ 1]) & contents
) != 0)
36274 nonzero_halves
[nzcnt
] = i
;
36280 gcc_assert (d
->op0
== d
->op1
);
36281 nonzero_halves
[1] = nonzero_halves
[0];
36282 same_halves
= true;
36284 else if (d
->op0
== d
->op1
)
36286 gcc_assert (nonzero_halves
[0] == 0);
36287 gcc_assert (nonzero_halves
[1] == 1);
36292 if (d
->perm
[0] / nelt2
== nonzero_halves
[1])
36294 /* Attempt to increase the likelyhood that dfinal
36295 shuffle will be intra-lane. */
36296 char tmph
= nonzero_halves
[0];
36297 nonzero_halves
[0] = nonzero_halves
[1];
36298 nonzero_halves
[1] = tmph
;
36301 /* vperm2f128 or vperm2i128. */
36302 for (i
= 0; i
< nelt2
; ++i
)
36304 remap
[i
+ nonzero_halves
[1] * nelt2
] = i
+ nelt2
;
36305 remap
[i
+ nonzero_halves
[0] * nelt2
] = i
;
36306 dremap
.perm
[i
+ nelt2
] = i
+ nonzero_halves
[1] * nelt2
;
36307 dremap
.perm
[i
] = i
+ nonzero_halves
[0] * nelt2
;
36310 if (d
->vmode
!= V8SFmode
36311 && d
->vmode
!= V4DFmode
36312 && d
->vmode
!= V8SImode
)
36314 dremap
.vmode
= V8SImode
;
36316 for (i
= 0; i
< 4; ++i
)
36318 dremap
.perm
[i
] = i
+ nonzero_halves
[0] * 4;
36319 dremap
.perm
[i
+ 4] = i
+ nonzero_halves
[1] * 4;
36323 else if (d
->op0
== d
->op1
)
36325 else if (TARGET_AVX2
36326 && (contents
& (q
[0] | q
[2] | q
[4] | q
[6])) == contents
)
36329 for (i
= 0; i
< nelt4
; ++i
)
36332 remap
[i
+ nelt
] = i
* 2 + 1;
36333 remap
[i
+ nelt2
] = i
* 2 + nelt2
;
36334 remap
[i
+ nelt
+ nelt2
] = i
* 2 + nelt2
+ 1;
36335 dremap
.perm
[i
* 2] = i
;
36336 dremap
.perm
[i
* 2 + 1] = i
+ nelt
;
36337 dremap
.perm
[i
* 2 + nelt2
] = i
+ nelt2
;
36338 dremap
.perm
[i
* 2 + nelt2
+ 1] = i
+ nelt
+ nelt2
;
36341 else if (TARGET_AVX2
36342 && (contents
& (q
[1] | q
[3] | q
[5] | q
[7])) == contents
)
36345 for (i
= 0; i
< nelt4
; ++i
)
36347 remap
[i
+ nelt4
] = i
* 2;
36348 remap
[i
+ nelt
+ nelt4
] = i
* 2 + 1;
36349 remap
[i
+ nelt2
+ nelt4
] = i
* 2 + nelt2
;
36350 remap
[i
+ nelt
+ nelt2
+ nelt4
] = i
* 2 + nelt2
+ 1;
36351 dremap
.perm
[i
* 2] = i
+ nelt4
;
36352 dremap
.perm
[i
* 2 + 1] = i
+ nelt
+ nelt4
;
36353 dremap
.perm
[i
* 2 + nelt2
] = i
+ nelt2
+ nelt4
;
36354 dremap
.perm
[i
* 2 + nelt2
+ 1] = i
+ nelt
+ nelt2
+ nelt4
;
36361 /* Use the remapping array set up above to move the elements from their
36362 swizzled locations into their final destinations. */
36364 for (i
= 0; i
< nelt
; ++i
)
36366 unsigned e
= remap
[d
->perm
[i
]];
36367 gcc_assert (e
< nelt
);
36368 /* If same_halves is true, both halves of the remapped vector are the
36369 same. Avoid cross-lane accesses if possible. */
36370 if (same_halves
&& i
>= nelt2
)
36372 gcc_assert (e
< nelt2
);
36373 dfinal
.perm
[i
] = e
+ nelt2
;
36376 dfinal
.perm
[i
] = e
;
36378 dfinal
.op0
= gen_reg_rtx (dfinal
.vmode
);
36379 dfinal
.op1
= dfinal
.op0
;
36380 dremap
.target
= dfinal
.op0
;
36382 /* Test if the final remap can be done with a single insn. For V4SFmode or
36383 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
36385 ok
= expand_vec_perm_1 (&dfinal
);
36386 seq
= get_insns ();
36395 if (dremap
.vmode
!= dfinal
.vmode
)
36397 dremap
.target
= gen_lowpart (dremap
.vmode
, dremap
.target
);
36398 dremap
.op0
= gen_lowpart (dremap
.vmode
, dremap
.op0
);
36399 dremap
.op1
= gen_lowpart (dremap
.vmode
, dremap
.op1
);
36402 ok
= expand_vec_perm_1 (&dremap
);
36409 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
36410 a single vector cross-lane permutation into vpermq followed
36411 by any of the single insn permutations. */
36414 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d
*d
)
36416 struct expand_vec_perm_d dremap
, dfinal
;
36417 unsigned i
, j
, nelt
= d
->nelt
, nelt2
= nelt
/ 2, nelt4
= nelt
/ 4;
36418 unsigned contents
[2];
36422 && (d
->vmode
== V32QImode
|| d
->vmode
== V16HImode
)
36423 && d
->op0
== d
->op1
))
36428 for (i
= 0; i
< nelt2
; ++i
)
36430 contents
[0] |= 1u << (d
->perm
[i
] / nelt4
);
36431 contents
[1] |= 1u << (d
->perm
[i
+ nelt2
] / nelt4
);
36434 for (i
= 0; i
< 2; ++i
)
36436 unsigned int cnt
= 0;
36437 for (j
= 0; j
< 4; ++j
)
36438 if ((contents
[i
] & (1u << j
)) != 0 && ++cnt
> 2)
36446 dremap
.vmode
= V4DImode
;
36448 dremap
.target
= gen_reg_rtx (V4DImode
);
36449 dremap
.op0
= gen_lowpart (V4DImode
, d
->op0
);
36450 dremap
.op1
= dremap
.op0
;
36451 for (i
= 0; i
< 2; ++i
)
36453 unsigned int cnt
= 0;
36454 for (j
= 0; j
< 4; ++j
)
36455 if ((contents
[i
] & (1u << j
)) != 0)
36456 dremap
.perm
[2 * i
+ cnt
++] = j
;
36457 for (; cnt
< 2; ++cnt
)
36458 dremap
.perm
[2 * i
+ cnt
] = 0;
36462 dfinal
.op0
= gen_lowpart (dfinal
.vmode
, dremap
.target
);
36463 dfinal
.op1
= dfinal
.op0
;
36464 for (i
= 0, j
= 0; i
< nelt
; ++i
)
36468 dfinal
.perm
[i
] = (d
->perm
[i
] & (nelt4
- 1)) | (j
? nelt2
: 0);
36469 if ((d
->perm
[i
] / nelt4
) == dremap
.perm
[j
])
36471 else if ((d
->perm
[i
] / nelt4
) == dremap
.perm
[j
+ 1])
36472 dfinal
.perm
[i
] |= nelt4
;
36474 gcc_unreachable ();
36477 ok
= expand_vec_perm_1 (&dremap
);
36480 ok
= expand_vec_perm_1 (&dfinal
);
36486 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
36487 a two vector permutation using 2 intra-lane interleave insns
36488 and cross-lane shuffle for 32-byte vectors. */
36491 expand_vec_perm_interleave3 (struct expand_vec_perm_d
*d
)
36494 rtx (*gen
) (rtx
, rtx
, rtx
);
36496 if (d
->op0
== d
->op1
)
36498 if (TARGET_AVX2
&& GET_MODE_SIZE (d
->vmode
) == 32)
36500 else if (TARGET_AVX
&& (d
->vmode
== V8SFmode
|| d
->vmode
== V4DFmode
))
36506 if (d
->perm
[0] != 0 && d
->perm
[0] != nelt
/ 2)
36508 for (i
= 0; i
< nelt
; i
+= 2)
36509 if (d
->perm
[i
] != d
->perm
[0] + i
/ 2
36510 || d
->perm
[i
+ 1] != d
->perm
[0] + i
/ 2 + nelt
)
36520 gen
= gen_vec_interleave_highv32qi
;
36522 gen
= gen_vec_interleave_lowv32qi
;
36526 gen
= gen_vec_interleave_highv16hi
;
36528 gen
= gen_vec_interleave_lowv16hi
;
36532 gen
= gen_vec_interleave_highv8si
;
36534 gen
= gen_vec_interleave_lowv8si
;
36538 gen
= gen_vec_interleave_highv4di
;
36540 gen
= gen_vec_interleave_lowv4di
;
36544 gen
= gen_vec_interleave_highv8sf
;
36546 gen
= gen_vec_interleave_lowv8sf
;
36550 gen
= gen_vec_interleave_highv4df
;
36552 gen
= gen_vec_interleave_lowv4df
;
36555 gcc_unreachable ();
36558 emit_insn (gen (d
->target
, d
->op0
, d
->op1
));
36562 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
36563 permutation with two pshufb insns and an ior. We should have already
36564 failed all two instruction sequences. */
36567 expand_vec_perm_pshufb2 (struct expand_vec_perm_d
*d
)
36569 rtx rperm
[2][16], vperm
, l
, h
, op
, m128
;
36570 unsigned int i
, nelt
, eltsz
;
36572 if (!TARGET_SSSE3
|| GET_MODE_SIZE (d
->vmode
) != 16)
36574 gcc_assert (d
->op0
!= d
->op1
);
36577 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
36579 /* Generate two permutation masks. If the required element is within
36580 the given vector it is shuffled into the proper lane. If the required
36581 element is in the other vector, force a zero into the lane by setting
36582 bit 7 in the permutation mask. */
36583 m128
= GEN_INT (-128);
36584 for (i
= 0; i
< nelt
; ++i
)
36586 unsigned j
, e
= d
->perm
[i
];
36587 unsigned which
= (e
>= nelt
);
36591 for (j
= 0; j
< eltsz
; ++j
)
36593 rperm
[which
][i
*eltsz
+ j
] = GEN_INT (e
*eltsz
+ j
);
36594 rperm
[1-which
][i
*eltsz
+ j
] = m128
;
36598 vperm
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, rperm
[0]));
36599 vperm
= force_reg (V16QImode
, vperm
);
36601 l
= gen_reg_rtx (V16QImode
);
36602 op
= gen_lowpart (V16QImode
, d
->op0
);
36603 emit_insn (gen_ssse3_pshufbv16qi3 (l
, op
, vperm
));
36605 vperm
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, rperm
[1]));
36606 vperm
= force_reg (V16QImode
, vperm
);
36608 h
= gen_reg_rtx (V16QImode
);
36609 op
= gen_lowpart (V16QImode
, d
->op1
);
36610 emit_insn (gen_ssse3_pshufbv16qi3 (h
, op
, vperm
));
36612 op
= gen_lowpart (V16QImode
, d
->target
);
36613 emit_insn (gen_iorv16qi3 (op
, l
, h
));
36618 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
36619 with two vpshufb insns, vpermq and vpor. We should have already failed
36620 all two or three instruction sequences. */
36623 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d
*d
)
36625 rtx rperm
[2][32], vperm
, l
, h
, hp
, op
, m128
;
36626 unsigned int i
, nelt
, eltsz
;
36629 || d
->op0
!= d
->op1
36630 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
36637 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
36639 /* Generate two permutation masks. If the required element is within
36640 the same lane, it is shuffled in. If the required element from the
36641 other lane, force a zero by setting bit 7 in the permutation mask.
36642 In the other mask the mask has non-negative elements if element
36643 is requested from the other lane, but also moved to the other lane,
36644 so that the result of vpshufb can have the two V2TImode halves
36646 m128
= GEN_INT (-128);
36647 for (i
= 0; i
< nelt
; ++i
)
36649 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
36650 unsigned which
= ((d
->perm
[i
] ^ i
) & (nelt
/ 2)) * eltsz
;
36652 for (j
= 0; j
< eltsz
; ++j
)
36654 rperm
[!!which
][(i
* eltsz
+ j
) ^ which
] = GEN_INT (e
* eltsz
+ j
);
36655 rperm
[!which
][(i
* eltsz
+ j
) ^ (which
^ 16)] = m128
;
36659 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[1]));
36660 vperm
= force_reg (V32QImode
, vperm
);
36662 h
= gen_reg_rtx (V32QImode
);
36663 op
= gen_lowpart (V32QImode
, d
->op0
);
36664 emit_insn (gen_avx2_pshufbv32qi3 (h
, op
, vperm
));
36666 /* Swap the 128-byte lanes of h into hp. */
36667 hp
= gen_reg_rtx (V4DImode
);
36668 op
= gen_lowpart (V4DImode
, h
);
36669 emit_insn (gen_avx2_permv4di_1 (hp
, op
, const2_rtx
, GEN_INT (3), const0_rtx
,
36672 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[0]));
36673 vperm
= force_reg (V32QImode
, vperm
);
36675 l
= gen_reg_rtx (V32QImode
);
36676 op
= gen_lowpart (V32QImode
, d
->op0
);
36677 emit_insn (gen_avx2_pshufbv32qi3 (l
, op
, vperm
));
36679 op
= gen_lowpart (V32QImode
, d
->target
);
36680 emit_insn (gen_iorv32qi3 (op
, l
, gen_lowpart (V32QImode
, hp
)));
36685 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
36686 and extract-odd permutations of two V32QImode and V16QImode operand
36687 with two vpshufb insns, vpor and vpermq. We should have already
36688 failed all two or three instruction sequences. */
36691 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d
*d
)
36693 rtx rperm
[2][32], vperm
, l
, h
, ior
, op
, m128
;
36694 unsigned int i
, nelt
, eltsz
;
36697 || d
->op0
== d
->op1
36698 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
36701 for (i
= 0; i
< d
->nelt
; ++i
)
36702 if ((d
->perm
[i
] ^ (i
* 2)) & (3 * d
->nelt
/ 2))
36709 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
36711 /* Generate two permutation masks. In the first permutation mask
36712 the first quarter will contain indexes for the first half
36713 of the op0, the second quarter will contain bit 7 set, third quarter
36714 will contain indexes for the second half of the op0 and the
36715 last quarter bit 7 set. In the second permutation mask
36716 the first quarter will contain bit 7 set, the second quarter
36717 indexes for the first half of the op1, the third quarter bit 7 set
36718 and last quarter indexes for the second half of the op1.
36719 I.e. the first mask e.g. for V32QImode extract even will be:
36720 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
36721 (all values masked with 0xf except for -128) and second mask
36722 for extract even will be
36723 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
36724 m128
= GEN_INT (-128);
36725 for (i
= 0; i
< nelt
; ++i
)
36727 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
36728 unsigned which
= d
->perm
[i
] >= nelt
;
36729 unsigned xorv
= (i
>= nelt
/ 4 && i
< 3 * nelt
/ 4) ? 24 : 0;
36731 for (j
= 0; j
< eltsz
; ++j
)
36733 rperm
[which
][(i
* eltsz
+ j
) ^ xorv
] = GEN_INT (e
* eltsz
+ j
);
36734 rperm
[1 - which
][(i
* eltsz
+ j
) ^ xorv
] = m128
;
36738 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[0]));
36739 vperm
= force_reg (V32QImode
, vperm
);
36741 l
= gen_reg_rtx (V32QImode
);
36742 op
= gen_lowpart (V32QImode
, d
->op0
);
36743 emit_insn (gen_avx2_pshufbv32qi3 (l
, op
, vperm
));
36745 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[1]));
36746 vperm
= force_reg (V32QImode
, vperm
);
36748 h
= gen_reg_rtx (V32QImode
);
36749 op
= gen_lowpart (V32QImode
, d
->op1
);
36750 emit_insn (gen_avx2_pshufbv32qi3 (h
, op
, vperm
));
36752 ior
= gen_reg_rtx (V32QImode
);
36753 emit_insn (gen_iorv32qi3 (ior
, l
, h
));
36755 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
36756 op
= gen_lowpart (V4DImode
, d
->target
);
36757 ior
= gen_lowpart (V4DImode
, ior
);
36758 emit_insn (gen_avx2_permv4di_1 (op
, ior
, const0_rtx
, const2_rtx
,
36759 const1_rtx
, GEN_INT (3)));
36764 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
36765 and extract-odd permutations. */
36768 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d
*d
, unsigned odd
)
36775 t1
= gen_reg_rtx (V4DFmode
);
36776 t2
= gen_reg_rtx (V4DFmode
);
36778 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
36779 emit_insn (gen_avx_vperm2f128v4df3 (t1
, d
->op0
, d
->op1
, GEN_INT (0x20)));
36780 emit_insn (gen_avx_vperm2f128v4df3 (t2
, d
->op0
, d
->op1
, GEN_INT (0x31)));
36782 /* Now an unpck[lh]pd will produce the result required. */
36784 t3
= gen_avx_unpckhpd256 (d
->target
, t1
, t2
);
36786 t3
= gen_avx_unpcklpd256 (d
->target
, t1
, t2
);
36792 int mask
= odd
? 0xdd : 0x88;
36794 t1
= gen_reg_rtx (V8SFmode
);
36795 t2
= gen_reg_rtx (V8SFmode
);
36796 t3
= gen_reg_rtx (V8SFmode
);
36798 /* Shuffle within the 128-bit lanes to produce:
36799 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
36800 emit_insn (gen_avx_shufps256 (t1
, d
->op0
, d
->op1
,
36803 /* Shuffle the lanes around to produce:
36804 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
36805 emit_insn (gen_avx_vperm2f128v8sf3 (t2
, t1
, t1
,
36808 /* Shuffle within the 128-bit lanes to produce:
36809 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
36810 emit_insn (gen_avx_shufps256 (t3
, t1
, t2
, GEN_INT (0x44)));
36812 /* Shuffle within the 128-bit lanes to produce:
36813 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
36814 emit_insn (gen_avx_shufps256 (t2
, t1
, t2
, GEN_INT (0xee)));
36816 /* Shuffle the lanes around to produce:
36817 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
36818 emit_insn (gen_avx_vperm2f128v8sf3 (d
->target
, t3
, t2
,
36827 /* These are always directly implementable by expand_vec_perm_1. */
36828 gcc_unreachable ();
36832 return expand_vec_perm_pshufb2 (d
);
36835 /* We need 2*log2(N)-1 operations to achieve odd/even
36836 with interleave. */
36837 t1
= gen_reg_rtx (V8HImode
);
36838 t2
= gen_reg_rtx (V8HImode
);
36839 emit_insn (gen_vec_interleave_highv8hi (t1
, d
->op0
, d
->op1
));
36840 emit_insn (gen_vec_interleave_lowv8hi (d
->target
, d
->op0
, d
->op1
));
36841 emit_insn (gen_vec_interleave_highv8hi (t2
, d
->target
, t1
));
36842 emit_insn (gen_vec_interleave_lowv8hi (d
->target
, d
->target
, t1
));
36844 t3
= gen_vec_interleave_highv8hi (d
->target
, d
->target
, t2
);
36846 t3
= gen_vec_interleave_lowv8hi (d
->target
, d
->target
, t2
);
36853 return expand_vec_perm_pshufb2 (d
);
36856 t1
= gen_reg_rtx (V16QImode
);
36857 t2
= gen_reg_rtx (V16QImode
);
36858 t3
= gen_reg_rtx (V16QImode
);
36859 emit_insn (gen_vec_interleave_highv16qi (t1
, d
->op0
, d
->op1
));
36860 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->op0
, d
->op1
));
36861 emit_insn (gen_vec_interleave_highv16qi (t2
, d
->target
, t1
));
36862 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t1
));
36863 emit_insn (gen_vec_interleave_highv16qi (t3
, d
->target
, t2
));
36864 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t2
));
36866 t3
= gen_vec_interleave_highv16qi (d
->target
, d
->target
, t3
);
36868 t3
= gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t3
);
36875 return expand_vec_perm_vpshufb2_vpermq_even_odd (d
);
36880 struct expand_vec_perm_d d_copy
= *d
;
36881 d_copy
.vmode
= V4DFmode
;
36882 d_copy
.target
= gen_lowpart (V4DFmode
, d
->target
);
36883 d_copy
.op0
= gen_lowpart (V4DFmode
, d
->op0
);
36884 d_copy
.op1
= gen_lowpart (V4DFmode
, d
->op1
);
36885 return expand_vec_perm_even_odd_1 (&d_copy
, odd
);
36888 t1
= gen_reg_rtx (V4DImode
);
36889 t2
= gen_reg_rtx (V4DImode
);
36891 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
36892 emit_insn (gen_avx2_permv2ti (t1
, d
->op0
, d
->op1
, GEN_INT (0x20)));
36893 emit_insn (gen_avx2_permv2ti (t2
, d
->op0
, d
->op1
, GEN_INT (0x31)));
36895 /* Now an vpunpck[lh]qdq will produce the result required. */
36897 t3
= gen_avx2_interleave_highv4di (d
->target
, t1
, t2
);
36899 t3
= gen_avx2_interleave_lowv4di (d
->target
, t1
, t2
);
36906 struct expand_vec_perm_d d_copy
= *d
;
36907 d_copy
.vmode
= V8SFmode
;
36908 d_copy
.target
= gen_lowpart (V8SFmode
, d
->target
);
36909 d_copy
.op0
= gen_lowpart (V8SFmode
, d
->op0
);
36910 d_copy
.op1
= gen_lowpart (V8SFmode
, d
->op1
);
36911 return expand_vec_perm_even_odd_1 (&d_copy
, odd
);
36914 t1
= gen_reg_rtx (V8SImode
);
36915 t2
= gen_reg_rtx (V8SImode
);
36917 /* Shuffle the lanes around into
36918 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
36919 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode
, t1
),
36920 gen_lowpart (V4DImode
, d
->op0
),
36921 gen_lowpart (V4DImode
, d
->op1
),
36923 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode
, t2
),
36924 gen_lowpart (V4DImode
, d
->op0
),
36925 gen_lowpart (V4DImode
, d
->op1
),
36928 /* Swap the 2nd and 3rd position in each lane into
36929 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
36930 emit_insn (gen_avx2_pshufdv3 (t1
, t1
,
36931 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
36932 emit_insn (gen_avx2_pshufdv3 (t2
, t2
,
36933 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
36935 /* Now an vpunpck[lh]qdq will produce
36936 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
36938 t3
= gen_avx2_interleave_highv4di (gen_lowpart (V4DImode
, d
->target
),
36939 gen_lowpart (V4DImode
, t1
),
36940 gen_lowpart (V4DImode
, t2
));
36942 t3
= gen_avx2_interleave_lowv4di (gen_lowpart (V4DImode
, d
->target
),
36943 gen_lowpart (V4DImode
, t1
),
36944 gen_lowpart (V4DImode
, t2
));
36949 gcc_unreachable ();
36955 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
36956 extract-even and extract-odd permutations. */
36959 expand_vec_perm_even_odd (struct expand_vec_perm_d
*d
)
36961 unsigned i
, odd
, nelt
= d
->nelt
;
36964 if (odd
!= 0 && odd
!= 1)
36967 for (i
= 1; i
< nelt
; ++i
)
36968 if (d
->perm
[i
] != 2 * i
+ odd
)
36971 return expand_vec_perm_even_odd_1 (d
, odd
);
36974 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
36975 permutations. We assume that expand_vec_perm_1 has already failed. */
36978 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d
*d
)
36980 unsigned elt
= d
->perm
[0], nelt2
= d
->nelt
/ 2;
36981 enum machine_mode vmode
= d
->vmode
;
36982 unsigned char perm2
[4];
36990 /* These are special-cased in sse.md so that we can optionally
36991 use the vbroadcast instruction. They expand to two insns
36992 if the input happens to be in a register. */
36993 gcc_unreachable ();
36999 /* These are always implementable using standard shuffle patterns. */
37000 gcc_unreachable ();
37004 /* These can be implemented via interleave. We save one insn by
37005 stopping once we have promoted to V4SImode and then use pshufd. */
37009 rtx (*gen
) (rtx
, rtx
, rtx
)
37010 = vmode
== V16QImode
? gen_vec_interleave_lowv16qi
37011 : gen_vec_interleave_lowv8hi
;
37015 gen
= vmode
== V16QImode
? gen_vec_interleave_highv16qi
37016 : gen_vec_interleave_highv8hi
;
37021 dest
= gen_reg_rtx (vmode
);
37022 emit_insn (gen (dest
, op0
, op0
));
37023 vmode
= get_mode_wider_vector (vmode
);
37024 op0
= gen_lowpart (vmode
, dest
);
37026 while (vmode
!= V4SImode
);
37028 memset (perm2
, elt
, 4);
37029 ok
= expand_vselect (gen_lowpart (V4SImode
, d
->target
), op0
, perm2
, 4);
37037 /* For AVX2 broadcasts of the first element vpbroadcast* or
37038 vpermq should be used by expand_vec_perm_1. */
37039 gcc_assert (!TARGET_AVX2
|| d
->perm
[0]);
37043 gcc_unreachable ();
37047 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
37048 broadcast permutations. */
37051 expand_vec_perm_broadcast (struct expand_vec_perm_d
*d
)
37053 unsigned i
, elt
, nelt
= d
->nelt
;
37055 if (d
->op0
!= d
->op1
)
37059 for (i
= 1; i
< nelt
; ++i
)
37060 if (d
->perm
[i
] != elt
)
37063 return expand_vec_perm_broadcast_1 (d
);
37066 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
37067 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
37068 all the shorter instruction sequences. */
37071 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d
*d
)
37073 rtx rperm
[4][32], vperm
, l
[2], h
[2], op
, m128
;
37074 unsigned int i
, nelt
, eltsz
;
37078 || d
->op0
== d
->op1
37079 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
37086 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
37088 /* Generate 4 permutation masks. If the required element is within
37089 the same lane, it is shuffled in. If the required element from the
37090 other lane, force a zero by setting bit 7 in the permutation mask.
37091 In the other mask the mask has non-negative elements if element
37092 is requested from the other lane, but also moved to the other lane,
37093 so that the result of vpshufb can have the two V2TImode halves
37095 m128
= GEN_INT (-128);
37096 for (i
= 0; i
< 32; ++i
)
37098 rperm
[0][i
] = m128
;
37099 rperm
[1][i
] = m128
;
37100 rperm
[2][i
] = m128
;
37101 rperm
[3][i
] = m128
;
37107 for (i
= 0; i
< nelt
; ++i
)
37109 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
37110 unsigned xlane
= ((d
->perm
[i
] ^ i
) & (nelt
/ 2)) * eltsz
;
37111 unsigned int which
= ((d
->perm
[i
] & nelt
) ? 2 : 0) + (xlane
? 1 : 0);
37113 for (j
= 0; j
< eltsz
; ++j
)
37114 rperm
[which
][(i
* eltsz
+ j
) ^ xlane
] = GEN_INT (e
* eltsz
+ j
);
37115 used
[which
] = true;
37118 for (i
= 0; i
< 2; ++i
)
37120 if (!used
[2 * i
+ 1])
37125 vperm
= gen_rtx_CONST_VECTOR (V32QImode
,
37126 gen_rtvec_v (32, rperm
[2 * i
+ 1]));
37127 vperm
= force_reg (V32QImode
, vperm
);
37128 h
[i
] = gen_reg_rtx (V32QImode
);
37129 op
= gen_lowpart (V32QImode
, i
? d
->op1
: d
->op0
);
37130 emit_insn (gen_avx2_pshufbv32qi3 (h
[i
], op
, vperm
));
37133 /* Swap the 128-byte lanes of h[X]. */
37134 for (i
= 0; i
< 2; ++i
)
37136 if (h
[i
] == NULL_RTX
)
37138 op
= gen_reg_rtx (V4DImode
);
37139 emit_insn (gen_avx2_permv4di_1 (op
, gen_lowpart (V4DImode
, h
[i
]),
37140 const2_rtx
, GEN_INT (3), const0_rtx
,
37142 h
[i
] = gen_lowpart (V32QImode
, op
);
37145 for (i
= 0; i
< 2; ++i
)
37152 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[2 * i
]));
37153 vperm
= force_reg (V32QImode
, vperm
);
37154 l
[i
] = gen_reg_rtx (V32QImode
);
37155 op
= gen_lowpart (V32QImode
, i
? d
->op1
: d
->op0
);
37156 emit_insn (gen_avx2_pshufbv32qi3 (l
[i
], op
, vperm
));
37159 for (i
= 0; i
< 2; ++i
)
37163 op
= gen_reg_rtx (V32QImode
);
37164 emit_insn (gen_iorv32qi3 (op
, l
[i
], h
[i
]));
37171 gcc_assert (l
[0] && l
[1]);
37172 op
= gen_lowpart (V32QImode
, d
->target
);
37173 emit_insn (gen_iorv32qi3 (op
, l
[0], l
[1]));
37177 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
37178 With all of the interface bits taken care of, perform the expansion
37179 in D and return true on success. */
37182 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
37184 /* Try a single instruction expansion. */
37185 if (expand_vec_perm_1 (d
))
37188 /* Try sequences of two instructions. */
37190 if (expand_vec_perm_pshuflw_pshufhw (d
))
37193 if (expand_vec_perm_palignr (d
))
37196 if (expand_vec_perm_interleave2 (d
))
37199 if (expand_vec_perm_broadcast (d
))
37202 if (expand_vec_perm_vpermq_perm_1 (d
))
37205 /* Try sequences of three instructions. */
37207 if (expand_vec_perm_pshufb2 (d
))
37210 if (expand_vec_perm_interleave3 (d
))
37213 /* Try sequences of four instructions. */
37215 if (expand_vec_perm_vpshufb2_vpermq (d
))
37218 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d
))
37221 /* ??? Look for narrow permutations whose element orderings would
37222 allow the promotion to a wider mode. */
37224 /* ??? Look for sequences of interleave or a wider permute that place
37225 the data into the correct lanes for a half-vector shuffle like
37226 pshuf[lh]w or vpermilps. */
37228 /* ??? Look for sequences of interleave that produce the desired results.
37229 The combinatorics of punpck[lh] get pretty ugly... */
37231 if (expand_vec_perm_even_odd (d
))
37234 /* Even longer sequences. */
37235 if (expand_vec_perm_vpshufb4_vpermq2 (d
))
37242 ix86_expand_vec_perm_const (rtx operands
[4])
37244 struct expand_vec_perm_d d
;
37245 unsigned char perm
[MAX_VECT_LEN
];
37246 int i
, nelt
, which
;
37249 d
.target
= operands
[0];
37250 d
.op0
= operands
[1];
37251 d
.op1
= operands
[2];
37254 d
.vmode
= GET_MODE (d
.target
);
37255 gcc_assert (VECTOR_MODE_P (d
.vmode
));
37256 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
37257 d
.testing_p
= false;
37259 gcc_assert (GET_CODE (sel
) == CONST_VECTOR
);
37260 gcc_assert (XVECLEN (sel
, 0) == nelt
);
37261 gcc_checking_assert (sizeof (d
.perm
) == sizeof (perm
));
37263 for (i
= which
= 0; i
< nelt
; ++i
)
37265 rtx e
= XVECEXP (sel
, 0, i
);
37266 int ei
= INTVAL (e
) & (2 * nelt
- 1);
37268 which
|= (ei
< nelt
? 1 : 2);
37279 if (!rtx_equal_p (d
.op0
, d
.op1
))
37282 /* The elements of PERM do not suggest that only the first operand
37283 is used, but both operands are identical. Allow easier matching
37284 of the permutation by folding the permutation into the single
37286 for (i
= 0; i
< nelt
; ++i
)
37287 if (d
.perm
[i
] >= nelt
)
37296 for (i
= 0; i
< nelt
; ++i
)
37302 if (ix86_expand_vec_perm_const_1 (&d
))
37305 /* If the mask says both arguments are needed, but they are the same,
37306 the above tried to expand with d.op0 == d.op1. If that didn't work,
37307 retry with d.op0 != d.op1 as that is what testing has been done with. */
37308 if (which
== 3 && d
.op0
== d
.op1
)
37313 memcpy (d
.perm
, perm
, sizeof (perm
));
37314 d
.op1
= gen_reg_rtx (d
.vmode
);
37316 ok
= ix86_expand_vec_perm_const_1 (&d
);
37317 seq
= get_insns ();
37321 emit_move_insn (d
.op1
, d
.op0
);
37330 /* Implement targetm.vectorize.vec_perm_const_ok. */
37333 ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode
,
37334 const unsigned char *sel
)
37336 struct expand_vec_perm_d d
;
37337 unsigned int i
, nelt
, which
;
37341 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
37342 d
.testing_p
= true;
37344 /* Given sufficient ISA support we can just return true here
37345 for selected vector modes. */
37346 if (GET_MODE_SIZE (d
.vmode
) == 16)
37348 /* All implementable with a single vpperm insn. */
37351 /* All implementable with 2 pshufb + 1 ior. */
37354 /* All implementable with shufpd or unpck[lh]pd. */
37359 /* Extract the values from the vector CST into the permutation
37361 memcpy (d
.perm
, sel
, nelt
);
37362 for (i
= which
= 0; i
< nelt
; ++i
)
37364 unsigned char e
= d
.perm
[i
];
37365 gcc_assert (e
< 2 * nelt
);
37366 which
|= (e
< nelt
? 1 : 2);
37369 /* For all elements from second vector, fold the elements to first. */
37371 for (i
= 0; i
< nelt
; ++i
)
37374 /* Check whether the mask can be applied to the vector type. */
37375 one_vec
= (which
!= 3);
37377 /* Implementable with shufps or pshufd. */
37378 if (one_vec
&& (d
.vmode
== V4SFmode
|| d
.vmode
== V4SImode
))
37381 /* Otherwise we have to go through the motions and see if we can
37382 figure out how to generate the requested permutation. */
37383 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
37384 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
37386 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
37389 ret
= ix86_expand_vec_perm_const_1 (&d
);
37396 ix86_expand_vec_extract_even_odd (rtx targ
, rtx op0
, rtx op1
, unsigned odd
)
37398 struct expand_vec_perm_d d
;
37404 d
.vmode
= GET_MODE (targ
);
37405 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
37406 d
.testing_p
= false;
37408 for (i
= 0; i
< nelt
; ++i
)
37409 d
.perm
[i
] = i
* 2 + odd
;
37411 /* We'll either be able to implement the permutation directly... */
37412 if (expand_vec_perm_1 (&d
))
37415 /* ... or we use the special-case patterns. */
37416 expand_vec_perm_even_odd_1 (&d
, odd
);
37419 /* Expand an insert into a vector register through pinsr insn.
37420 Return true if successful. */
37423 ix86_expand_pinsr (rtx
*operands
)
37425 rtx dst
= operands
[0];
37426 rtx src
= operands
[3];
37428 unsigned int size
= INTVAL (operands
[1]);
37429 unsigned int pos
= INTVAL (operands
[2]);
37431 if (GET_CODE (dst
) == SUBREG
)
37433 pos
+= SUBREG_BYTE (dst
) * BITS_PER_UNIT
;
37434 dst
= SUBREG_REG (dst
);
37437 if (GET_CODE (src
) == SUBREG
)
37438 src
= SUBREG_REG (src
);
37440 switch (GET_MODE (dst
))
37447 enum machine_mode srcmode
, dstmode
;
37448 rtx (*pinsr
)(rtx
, rtx
, rtx
, rtx
);
37450 srcmode
= mode_for_size (size
, MODE_INT
, 0);
37455 if (!TARGET_SSE4_1
)
37457 dstmode
= V16QImode
;
37458 pinsr
= gen_sse4_1_pinsrb
;
37464 dstmode
= V8HImode
;
37465 pinsr
= gen_sse2_pinsrw
;
37469 if (!TARGET_SSE4_1
)
37471 dstmode
= V4SImode
;
37472 pinsr
= gen_sse4_1_pinsrd
;
37476 gcc_assert (TARGET_64BIT
);
37477 if (!TARGET_SSE4_1
)
37479 dstmode
= V2DImode
;
37480 pinsr
= gen_sse4_1_pinsrq
;
37487 dst
= gen_lowpart (dstmode
, dst
);
37488 src
= gen_lowpart (srcmode
, src
);
37492 emit_insn (pinsr (dst
, dst
, src
, GEN_INT (1 << pos
)));
37501 /* This function returns the calling abi specific va_list type node.
37502 It returns the FNDECL specific va_list type. */
37505 ix86_fn_abi_va_list (tree fndecl
)
37508 return va_list_type_node
;
37509 gcc_assert (fndecl
!= NULL_TREE
);
37511 if (ix86_function_abi ((const_tree
) fndecl
) == MS_ABI
)
37512 return ms_va_list_type_node
;
37514 return sysv_va_list_type_node
;
37517 /* Returns the canonical va_list type specified by TYPE. If there
37518 is no valid TYPE provided, it return NULL_TREE. */
37521 ix86_canonical_va_list_type (tree type
)
37525 /* Resolve references and pointers to va_list type. */
37526 if (TREE_CODE (type
) == MEM_REF
)
37527 type
= TREE_TYPE (type
);
37528 else if (POINTER_TYPE_P (type
) && POINTER_TYPE_P (TREE_TYPE(type
)))
37529 type
= TREE_TYPE (type
);
37530 else if (POINTER_TYPE_P (type
) && TREE_CODE (TREE_TYPE (type
)) == ARRAY_TYPE
)
37531 type
= TREE_TYPE (type
);
37533 if (TARGET_64BIT
&& va_list_type_node
!= NULL_TREE
)
37535 wtype
= va_list_type_node
;
37536 gcc_assert (wtype
!= NULL_TREE
);
37538 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
37540 /* If va_list is an array type, the argument may have decayed
37541 to a pointer type, e.g. by being passed to another function.
37542 In that case, unwrap both types so that we can compare the
37543 underlying records. */
37544 if (TREE_CODE (htype
) == ARRAY_TYPE
37545 || POINTER_TYPE_P (htype
))
37547 wtype
= TREE_TYPE (wtype
);
37548 htype
= TREE_TYPE (htype
);
37551 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
37552 return va_list_type_node
;
37553 wtype
= sysv_va_list_type_node
;
37554 gcc_assert (wtype
!= NULL_TREE
);
37556 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
37558 /* If va_list is an array type, the argument may have decayed
37559 to a pointer type, e.g. by being passed to another function.
37560 In that case, unwrap both types so that we can compare the
37561 underlying records. */
37562 if (TREE_CODE (htype
) == ARRAY_TYPE
37563 || POINTER_TYPE_P (htype
))
37565 wtype
= TREE_TYPE (wtype
);
37566 htype
= TREE_TYPE (htype
);
37569 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
37570 return sysv_va_list_type_node
;
37571 wtype
= ms_va_list_type_node
;
37572 gcc_assert (wtype
!= NULL_TREE
);
37574 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
37576 /* If va_list is an array type, the argument may have decayed
37577 to a pointer type, e.g. by being passed to another function.
37578 In that case, unwrap both types so that we can compare the
37579 underlying records. */
37580 if (TREE_CODE (htype
) == ARRAY_TYPE
37581 || POINTER_TYPE_P (htype
))
37583 wtype
= TREE_TYPE (wtype
);
37584 htype
= TREE_TYPE (htype
);
37587 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
37588 return ms_va_list_type_node
;
37591 return std_canonical_va_list_type (type
);
37594 /* Iterate through the target-specific builtin types for va_list.
37595 IDX denotes the iterator, *PTREE is set to the result type of
37596 the va_list builtin, and *PNAME to its internal type.
37597 Returns zero if there is no element for this index, otherwise
37598 IDX should be increased upon the next call.
37599 Note, do not iterate a base builtin's name like __builtin_va_list.
37600 Used from c_common_nodes_and_builtins. */
37603 ix86_enum_va_list (int idx
, const char **pname
, tree
*ptree
)
37613 *ptree
= ms_va_list_type_node
;
37614 *pname
= "__builtin_ms_va_list";
37618 *ptree
= sysv_va_list_type_node
;
37619 *pname
= "__builtin_sysv_va_list";
37627 #undef TARGET_SCHED_DISPATCH
37628 #define TARGET_SCHED_DISPATCH has_dispatch
37629 #undef TARGET_SCHED_DISPATCH_DO
37630 #define TARGET_SCHED_DISPATCH_DO do_dispatch
37631 #undef TARGET_SCHED_REASSOCIATION_WIDTH
37632 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
37634 /* The size of the dispatch window is the total number of bytes of
37635 object code allowed in a window. */
37636 #define DISPATCH_WINDOW_SIZE 16
37638 /* Number of dispatch windows considered for scheduling. */
37639 #define MAX_DISPATCH_WINDOWS 3
37641 /* Maximum number of instructions in a window. */
37644 /* Maximum number of immediate operands in a window. */
37647 /* Maximum number of immediate bits allowed in a window. */
37648 #define MAX_IMM_SIZE 128
37650 /* Maximum number of 32 bit immediates allowed in a window. */
37651 #define MAX_IMM_32 4
37653 /* Maximum number of 64 bit immediates allowed in a window. */
37654 #define MAX_IMM_64 2
37656 /* Maximum total of loads or prefetches allowed in a window. */
37659 /* Maximum total of stores allowed in a window. */
37660 #define MAX_STORE 1
37666 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
37667 enum dispatch_group
{
37682 /* Number of allowable groups in a dispatch window. It is an array
37683 indexed by dispatch_group enum. 100 is used as a big number,
37684 because the number of these kind of operations does not have any
37685 effect in dispatch window, but we need them for other reasons in
37687 static unsigned int num_allowable_groups
[disp_last
] = {
37688 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG
, BIG
37691 char group_name
[disp_last
+ 1][16] = {
37692 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
37693 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
37694 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
37697 /* Instruction path. */
37700 path_single
, /* Single micro op. */
37701 path_double
, /* Double micro op. */
37702 path_multi
, /* Instructions with more than 2 micro op.. */
37706 /* sched_insn_info defines a window to the instructions scheduled in
37707 the basic block. It contains a pointer to the insn_info table and
37708 the instruction scheduled.
37710 Windows are allocated for each basic block and are linked
37712 typedef struct sched_insn_info_s
{
37714 enum dispatch_group group
;
37715 enum insn_path path
;
37720 /* Linked list of dispatch windows. This is a two way list of
37721 dispatch windows of a basic block. It contains information about
37722 the number of uops in the window and the total number of
37723 instructions and of bytes in the object code for this dispatch
37725 typedef struct dispatch_windows_s
{
37726 int num_insn
; /* Number of insn in the window. */
37727 int num_uops
; /* Number of uops in the window. */
37728 int window_size
; /* Number of bytes in the window. */
37729 int window_num
; /* Window number between 0 or 1. */
37730 int num_imm
; /* Number of immediates in an insn. */
37731 int num_imm_32
; /* Number of 32 bit immediates in an insn. */
37732 int num_imm_64
; /* Number of 64 bit immediates in an insn. */
37733 int imm_size
; /* Total immediates in the window. */
37734 int num_loads
; /* Total memory loads in the window. */
37735 int num_stores
; /* Total memory stores in the window. */
37736 int violation
; /* Violation exists in window. */
37737 sched_insn_info
*window
; /* Pointer to the window. */
37738 struct dispatch_windows_s
*next
;
37739 struct dispatch_windows_s
*prev
;
37740 } dispatch_windows
;
37742 /* Immediate valuse used in an insn. */
37743 typedef struct imm_info_s
37750 static dispatch_windows
*dispatch_window_list
;
37751 static dispatch_windows
*dispatch_window_list1
;
37753 /* Get dispatch group of insn. */
37755 static enum dispatch_group
37756 get_mem_group (rtx insn
)
37758 enum attr_memory memory
;
37760 if (INSN_CODE (insn
) < 0)
37761 return disp_no_group
;
37762 memory
= get_attr_memory (insn
);
37763 if (memory
== MEMORY_STORE
)
37766 if (memory
== MEMORY_LOAD
)
37769 if (memory
== MEMORY_BOTH
)
37770 return disp_load_store
;
37772 return disp_no_group
;
37775 /* Return true if insn is a compare instruction. */
37780 enum attr_type type
;
37782 type
= get_attr_type (insn
);
37783 return (type
== TYPE_TEST
37784 || type
== TYPE_ICMP
37785 || type
== TYPE_FCMP
37786 || GET_CODE (PATTERN (insn
)) == COMPARE
);
37789 /* Return true if a dispatch violation encountered. */
37792 dispatch_violation (void)
37794 if (dispatch_window_list
->next
)
37795 return dispatch_window_list
->next
->violation
;
37796 return dispatch_window_list
->violation
;
37799 /* Return true if insn is a branch instruction. */
37802 is_branch (rtx insn
)
37804 return (CALL_P (insn
) || JUMP_P (insn
));
37807 /* Return true if insn is a prefetch instruction. */
37810 is_prefetch (rtx insn
)
37812 return NONJUMP_INSN_P (insn
) && GET_CODE (PATTERN (insn
)) == PREFETCH
;
37815 /* This function initializes a dispatch window and the list container holding a
37816 pointer to the window. */
37819 init_window (int window_num
)
37822 dispatch_windows
*new_list
;
37824 if (window_num
== 0)
37825 new_list
= dispatch_window_list
;
37827 new_list
= dispatch_window_list1
;
37829 new_list
->num_insn
= 0;
37830 new_list
->num_uops
= 0;
37831 new_list
->window_size
= 0;
37832 new_list
->next
= NULL
;
37833 new_list
->prev
= NULL
;
37834 new_list
->window_num
= window_num
;
37835 new_list
->num_imm
= 0;
37836 new_list
->num_imm_32
= 0;
37837 new_list
->num_imm_64
= 0;
37838 new_list
->imm_size
= 0;
37839 new_list
->num_loads
= 0;
37840 new_list
->num_stores
= 0;
37841 new_list
->violation
= false;
37843 for (i
= 0; i
< MAX_INSN
; i
++)
37845 new_list
->window
[i
].insn
= NULL
;
37846 new_list
->window
[i
].group
= disp_no_group
;
37847 new_list
->window
[i
].path
= no_path
;
37848 new_list
->window
[i
].byte_len
= 0;
37849 new_list
->window
[i
].imm_bytes
= 0;
37854 /* This function allocates and initializes a dispatch window and the
37855 list container holding a pointer to the window. */
37857 static dispatch_windows
*
37858 allocate_window (void)
37860 dispatch_windows
*new_list
= XNEW (struct dispatch_windows_s
);
37861 new_list
->window
= XNEWVEC (struct sched_insn_info_s
, MAX_INSN
+ 1);
37866 /* This routine initializes the dispatch scheduling information. It
37867 initiates building dispatch scheduler tables and constructs the
37868 first dispatch window. */
37871 init_dispatch_sched (void)
37873 /* Allocate a dispatch list and a window. */
37874 dispatch_window_list
= allocate_window ();
37875 dispatch_window_list1
= allocate_window ();
37880 /* This function returns true if a branch is detected. End of a basic block
37881 does not have to be a branch, but here we assume only branches end a
37885 is_end_basic_block (enum dispatch_group group
)
37887 return group
== disp_branch
;
37890 /* This function is called when the end of a window processing is reached. */
37893 process_end_window (void)
37895 gcc_assert (dispatch_window_list
->num_insn
<= MAX_INSN
);
37896 if (dispatch_window_list
->next
)
37898 gcc_assert (dispatch_window_list1
->num_insn
<= MAX_INSN
);
37899 gcc_assert (dispatch_window_list
->window_size
37900 + dispatch_window_list1
->window_size
<= 48);
37906 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
37907 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
37908 for 48 bytes of instructions. Note that these windows are not dispatch
37909 windows that their sizes are DISPATCH_WINDOW_SIZE. */
37911 static dispatch_windows
*
37912 allocate_next_window (int window_num
)
37914 if (window_num
== 0)
37916 if (dispatch_window_list
->next
)
37919 return dispatch_window_list
;
37922 dispatch_window_list
->next
= dispatch_window_list1
;
37923 dispatch_window_list1
->prev
= dispatch_window_list
;
37925 return dispatch_window_list1
;
37928 /* Increment the number of immediate operands of an instruction. */
37931 find_constant_1 (rtx
*in_rtx
, imm_info
*imm_values
)
37936 switch ( GET_CODE (*in_rtx
))
37941 (imm_values
->imm
)++;
37942 if (x86_64_immediate_operand (*in_rtx
, SImode
))
37943 (imm_values
->imm32
)++;
37945 (imm_values
->imm64
)++;
37949 (imm_values
->imm
)++;
37950 (imm_values
->imm64
)++;
37954 if (LABEL_KIND (*in_rtx
) == LABEL_NORMAL
)
37956 (imm_values
->imm
)++;
37957 (imm_values
->imm32
)++;
37968 /* Compute number of immediate operands of an instruction. */
37971 find_constant (rtx in_rtx
, imm_info
*imm_values
)
37973 for_each_rtx (INSN_P (in_rtx
) ? &PATTERN (in_rtx
) : &in_rtx
,
37974 (rtx_function
) find_constant_1
, (void *) imm_values
);
37977 /* Return total size of immediate operands of an instruction along with number
37978 of corresponding immediate-operands. It initializes its parameters to zero
37979 befor calling FIND_CONSTANT.
37980 INSN is the input instruction. IMM is the total of immediates.
37981 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
37985 get_num_immediates (rtx insn
, int *imm
, int *imm32
, int *imm64
)
37987 imm_info imm_values
= {0, 0, 0};
37989 find_constant (insn
, &imm_values
);
37990 *imm
= imm_values
.imm
;
37991 *imm32
= imm_values
.imm32
;
37992 *imm64
= imm_values
.imm64
;
37993 return imm_values
.imm32
* 4 + imm_values
.imm64
* 8;
37996 /* This function indicates if an operand of an instruction is an
38000 has_immediate (rtx insn
)
38002 int num_imm_operand
;
38003 int num_imm32_operand
;
38004 int num_imm64_operand
;
38007 return get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
38008 &num_imm64_operand
);
38012 /* Return single or double path for instructions. */
38014 static enum insn_path
38015 get_insn_path (rtx insn
)
38017 enum attr_amdfam10_decode path
= get_attr_amdfam10_decode (insn
);
38019 if ((int)path
== 0)
38020 return path_single
;
38022 if ((int)path
== 1)
38023 return path_double
;
38028 /* Return insn dispatch group. */
38030 static enum dispatch_group
38031 get_insn_group (rtx insn
)
38033 enum dispatch_group group
= get_mem_group (insn
);
38037 if (is_branch (insn
))
38038 return disp_branch
;
38043 if (has_immediate (insn
))
38046 if (is_prefetch (insn
))
38047 return disp_prefetch
;
38049 return disp_no_group
;
38052 /* Count number of GROUP restricted instructions in a dispatch
38053 window WINDOW_LIST. */
38056 count_num_restricted (rtx insn
, dispatch_windows
*window_list
)
38058 enum dispatch_group group
= get_insn_group (insn
);
38060 int num_imm_operand
;
38061 int num_imm32_operand
;
38062 int num_imm64_operand
;
38064 if (group
== disp_no_group
)
38067 if (group
== disp_imm
)
38069 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
38070 &num_imm64_operand
);
38071 if (window_list
->imm_size
+ imm_size
> MAX_IMM_SIZE
38072 || num_imm_operand
+ window_list
->num_imm
> MAX_IMM
38073 || (num_imm32_operand
> 0
38074 && (window_list
->num_imm_32
+ num_imm32_operand
> MAX_IMM_32
38075 || window_list
->num_imm_64
* 2 + num_imm32_operand
> MAX_IMM_32
))
38076 || (num_imm64_operand
> 0
38077 && (window_list
->num_imm_64
+ num_imm64_operand
> MAX_IMM_64
38078 || window_list
->num_imm_32
+ num_imm64_operand
* 2 > MAX_IMM_32
))
38079 || (window_list
->imm_size
+ imm_size
== MAX_IMM_SIZE
38080 && num_imm64_operand
> 0
38081 && ((window_list
->num_imm_64
> 0
38082 && window_list
->num_insn
>= 2)
38083 || window_list
->num_insn
>= 3)))
38089 if ((group
== disp_load_store
38090 && (window_list
->num_loads
>= MAX_LOAD
38091 || window_list
->num_stores
>= MAX_STORE
))
38092 || ((group
== disp_load
38093 || group
== disp_prefetch
)
38094 && window_list
->num_loads
>= MAX_LOAD
)
38095 || (group
== disp_store
38096 && window_list
->num_stores
>= MAX_STORE
))
38102 /* This function returns true if insn satisfies dispatch rules on the
38103 last window scheduled. */
38106 fits_dispatch_window (rtx insn
)
38108 dispatch_windows
*window_list
= dispatch_window_list
;
38109 dispatch_windows
*window_list_next
= dispatch_window_list
->next
;
38110 unsigned int num_restrict
;
38111 enum dispatch_group group
= get_insn_group (insn
);
38112 enum insn_path path
= get_insn_path (insn
);
38115 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
38116 instructions should be given the lowest priority in the
38117 scheduling process in Haifa scheduler to make sure they will be
38118 scheduled in the same dispatch window as the refrence to them. */
38119 if (group
== disp_jcc
|| group
== disp_cmp
)
38122 /* Check nonrestricted. */
38123 if (group
== disp_no_group
|| group
== disp_branch
)
38126 /* Get last dispatch window. */
38127 if (window_list_next
)
38128 window_list
= window_list_next
;
38130 if (window_list
->window_num
== 1)
38132 sum
= window_list
->prev
->window_size
+ window_list
->window_size
;
38135 || (min_insn_size (insn
) + sum
) >= 48)
38136 /* Window 1 is full. Go for next window. */
38140 num_restrict
= count_num_restricted (insn
, window_list
);
38142 if (num_restrict
> num_allowable_groups
[group
])
38145 /* See if it fits in the first window. */
38146 if (window_list
->window_num
== 0)
38148 /* The first widow should have only single and double path
38150 if (path
== path_double
38151 && (window_list
->num_uops
+ 2) > MAX_INSN
)
38153 else if (path
!= path_single
)
38159 /* Add an instruction INSN with NUM_UOPS micro-operations to the
38160 dispatch window WINDOW_LIST. */
38163 add_insn_window (rtx insn
, dispatch_windows
*window_list
, int num_uops
)
38165 int byte_len
= min_insn_size (insn
);
38166 int num_insn
= window_list
->num_insn
;
38168 sched_insn_info
*window
= window_list
->window
;
38169 enum dispatch_group group
= get_insn_group (insn
);
38170 enum insn_path path
= get_insn_path (insn
);
38171 int num_imm_operand
;
38172 int num_imm32_operand
;
38173 int num_imm64_operand
;
38175 if (!window_list
->violation
&& group
!= disp_cmp
38176 && !fits_dispatch_window (insn
))
38177 window_list
->violation
= true;
38179 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
38180 &num_imm64_operand
);
38182 /* Initialize window with new instruction. */
38183 window
[num_insn
].insn
= insn
;
38184 window
[num_insn
].byte_len
= byte_len
;
38185 window
[num_insn
].group
= group
;
38186 window
[num_insn
].path
= path
;
38187 window
[num_insn
].imm_bytes
= imm_size
;
38189 window_list
->window_size
+= byte_len
;
38190 window_list
->num_insn
= num_insn
+ 1;
38191 window_list
->num_uops
= window_list
->num_uops
+ num_uops
;
38192 window_list
->imm_size
+= imm_size
;
38193 window_list
->num_imm
+= num_imm_operand
;
38194 window_list
->num_imm_32
+= num_imm32_operand
;
38195 window_list
->num_imm_64
+= num_imm64_operand
;
38197 if (group
== disp_store
)
38198 window_list
->num_stores
+= 1;
38199 else if (group
== disp_load
38200 || group
== disp_prefetch
)
38201 window_list
->num_loads
+= 1;
38202 else if (group
== disp_load_store
)
38204 window_list
->num_stores
+= 1;
38205 window_list
->num_loads
+= 1;
38209 /* Adds a scheduled instruction, INSN, to the current dispatch window.
38210 If the total bytes of instructions or the number of instructions in
38211 the window exceed allowable, it allocates a new window. */
38214 add_to_dispatch_window (rtx insn
)
38217 dispatch_windows
*window_list
;
38218 dispatch_windows
*next_list
;
38219 dispatch_windows
*window0_list
;
38220 enum insn_path path
;
38221 enum dispatch_group insn_group
;
38229 if (INSN_CODE (insn
) < 0)
38232 byte_len
= min_insn_size (insn
);
38233 window_list
= dispatch_window_list
;
38234 next_list
= window_list
->next
;
38235 path
= get_insn_path (insn
);
38236 insn_group
= get_insn_group (insn
);
38238 /* Get the last dispatch window. */
38240 window_list
= dispatch_window_list
->next
;
38242 if (path
== path_single
)
38244 else if (path
== path_double
)
38247 insn_num_uops
= (int) path
;
38249 /* If current window is full, get a new window.
38250 Window number zero is full, if MAX_INSN uops are scheduled in it.
38251 Window number one is full, if window zero's bytes plus window
38252 one's bytes is 32, or if the bytes of the new instruction added
38253 to the total makes it greater than 48, or it has already MAX_INSN
38254 instructions in it. */
38255 num_insn
= window_list
->num_insn
;
38256 num_uops
= window_list
->num_uops
;
38257 window_num
= window_list
->window_num
;
38258 insn_fits
= fits_dispatch_window (insn
);
38260 if (num_insn
>= MAX_INSN
38261 || num_uops
+ insn_num_uops
> MAX_INSN
38264 window_num
= ~window_num
& 1;
38265 window_list
= allocate_next_window (window_num
);
38268 if (window_num
== 0)
38270 add_insn_window (insn
, window_list
, insn_num_uops
);
38271 if (window_list
->num_insn
>= MAX_INSN
38272 && insn_group
== disp_branch
)
38274 process_end_window ();
38278 else if (window_num
== 1)
38280 window0_list
= window_list
->prev
;
38281 sum
= window0_list
->window_size
+ window_list
->window_size
;
38283 || (byte_len
+ sum
) >= 48)
38285 process_end_window ();
38286 window_list
= dispatch_window_list
;
38289 add_insn_window (insn
, window_list
, insn_num_uops
);
38292 gcc_unreachable ();
38294 if (is_end_basic_block (insn_group
))
38296 /* End of basic block is reached do end-basic-block process. */
38297 process_end_window ();
38302 /* Print the dispatch window, WINDOW_NUM, to FILE. */
38304 DEBUG_FUNCTION
static void
38305 debug_dispatch_window_file (FILE *file
, int window_num
)
38307 dispatch_windows
*list
;
38310 if (window_num
== 0)
38311 list
= dispatch_window_list
;
38313 list
= dispatch_window_list1
;
38315 fprintf (file
, "Window #%d:\n", list
->window_num
);
38316 fprintf (file
, " num_insn = %d, num_uops = %d, window_size = %d\n",
38317 list
->num_insn
, list
->num_uops
, list
->window_size
);
38318 fprintf (file
, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
38319 list
->num_imm
, list
->num_imm_32
, list
->num_imm_64
, list
->imm_size
);
38321 fprintf (file
, " num_loads = %d, num_stores = %d\n", list
->num_loads
,
38323 fprintf (file
, " insn info:\n");
38325 for (i
= 0; i
< MAX_INSN
; i
++)
38327 if (!list
->window
[i
].insn
)
38329 fprintf (file
, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
38330 i
, group_name
[list
->window
[i
].group
],
38331 i
, (void *)list
->window
[i
].insn
,
38332 i
, list
->window
[i
].path
,
38333 i
, list
->window
[i
].byte_len
,
38334 i
, list
->window
[i
].imm_bytes
);
38338 /* Print to stdout a dispatch window. */
38340 DEBUG_FUNCTION
void
38341 debug_dispatch_window (int window_num
)
38343 debug_dispatch_window_file (stdout
, window_num
);
38346 /* Print INSN dispatch information to FILE. */
38348 DEBUG_FUNCTION
static void
38349 debug_insn_dispatch_info_file (FILE *file
, rtx insn
)
38352 enum insn_path path
;
38353 enum dispatch_group group
;
38355 int num_imm_operand
;
38356 int num_imm32_operand
;
38357 int num_imm64_operand
;
38359 if (INSN_CODE (insn
) < 0)
38362 byte_len
= min_insn_size (insn
);
38363 path
= get_insn_path (insn
);
38364 group
= get_insn_group (insn
);
38365 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
38366 &num_imm64_operand
);
38368 fprintf (file
, " insn info:\n");
38369 fprintf (file
, " group = %s, path = %d, byte_len = %d\n",
38370 group_name
[group
], path
, byte_len
);
38371 fprintf (file
, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
38372 num_imm_operand
, num_imm32_operand
, num_imm64_operand
, imm_size
);
38375 /* Print to STDERR the status of the ready list with respect to
38376 dispatch windows. */
38378 DEBUG_FUNCTION
void
38379 debug_ready_dispatch (void)
38382 int no_ready
= number_in_ready ();
38384 fprintf (stdout
, "Number of ready: %d\n", no_ready
);
38386 for (i
= 0; i
< no_ready
; i
++)
38387 debug_insn_dispatch_info_file (stdout
, get_ready_element (i
));
38390 /* This routine is the driver of the dispatch scheduler. */
38393 do_dispatch (rtx insn
, int mode
)
38395 if (mode
== DISPATCH_INIT
)
38396 init_dispatch_sched ();
38397 else if (mode
== ADD_TO_DISPATCH_WINDOW
)
38398 add_to_dispatch_window (insn
);
38401 /* Return TRUE if Dispatch Scheduling is supported. */
38404 has_dispatch (rtx insn
, int action
)
38406 if ((ix86_tune
== PROCESSOR_BDVER1
|| ix86_tune
== PROCESSOR_BDVER2
)
38407 && flag_dispatch_scheduler
)
38413 case IS_DISPATCH_ON
:
38418 return is_cmp (insn
);
38420 case DISPATCH_VIOLATION
:
38421 return dispatch_violation ();
38423 case FITS_DISPATCH_WINDOW
:
38424 return fits_dispatch_window (insn
);
38430 /* Implementation of reassociation_width target hook used by
38431 reassoc phase to identify parallelism level in reassociated
38432 tree. Statements tree_code is passed in OPC. Arguments type
38435 Currently parallel reassociation is enabled for Atom
38436 processors only and we set reassociation width to be 2
38437 because Atom may issue up to 2 instructions per cycle.
38439 Return value should be fixed if parallel reassociation is
38440 enabled for other processors. */
38443 ix86_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED
,
38444 enum machine_mode mode
)
38448 if (INTEGRAL_MODE_P (mode
) && TARGET_REASSOC_INT_TO_PARALLEL
)
38450 else if (FLOAT_MODE_P (mode
) && TARGET_REASSOC_FP_TO_PARALLEL
)
38456 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
38457 place emms and femms instructions. */
38459 static enum machine_mode
38460 ix86_preferred_simd_mode (enum machine_mode mode
)
38468 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V32QImode
: V16QImode
;
38470 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V16HImode
: V8HImode
;
38472 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V8SImode
: V4SImode
;
38474 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V4DImode
: V2DImode
;
38477 if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
38483 if (!TARGET_VECTORIZE_DOUBLE
)
38485 else if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
38487 else if (TARGET_SSE2
)
38496 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
38499 static unsigned int
38500 ix86_autovectorize_vector_sizes (void)
38502 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? 32 | 16 : 0;
38505 /* Initialize the GCC target structure. */
38506 #undef TARGET_RETURN_IN_MEMORY
38507 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
38509 #undef TARGET_LEGITIMIZE_ADDRESS
38510 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
38512 #undef TARGET_ATTRIBUTE_TABLE
38513 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
38514 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
38515 # undef TARGET_MERGE_DECL_ATTRIBUTES
38516 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
38519 #undef TARGET_COMP_TYPE_ATTRIBUTES
38520 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
38522 #undef TARGET_INIT_BUILTINS
38523 #define TARGET_INIT_BUILTINS ix86_init_builtins
38524 #undef TARGET_BUILTIN_DECL
38525 #define TARGET_BUILTIN_DECL ix86_builtin_decl
38526 #undef TARGET_EXPAND_BUILTIN
38527 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
38529 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
38530 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
38531 ix86_builtin_vectorized_function
38533 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
38534 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
38536 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
38537 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
38539 #undef TARGET_VECTORIZE_BUILTIN_GATHER
38540 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
38542 #undef TARGET_BUILTIN_RECIPROCAL
38543 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
38545 #undef TARGET_ASM_FUNCTION_EPILOGUE
38546 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
38548 #undef TARGET_ENCODE_SECTION_INFO
38549 #ifndef SUBTARGET_ENCODE_SECTION_INFO
38550 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
38552 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
38555 #undef TARGET_ASM_OPEN_PAREN
38556 #define TARGET_ASM_OPEN_PAREN ""
38557 #undef TARGET_ASM_CLOSE_PAREN
38558 #define TARGET_ASM_CLOSE_PAREN ""
38560 #undef TARGET_ASM_BYTE_OP
38561 #define TARGET_ASM_BYTE_OP ASM_BYTE
38563 #undef TARGET_ASM_ALIGNED_HI_OP
38564 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
38565 #undef TARGET_ASM_ALIGNED_SI_OP
38566 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
38568 #undef TARGET_ASM_ALIGNED_DI_OP
38569 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
38572 #undef TARGET_PROFILE_BEFORE_PROLOGUE
38573 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
38575 #undef TARGET_ASM_UNALIGNED_HI_OP
38576 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
38577 #undef TARGET_ASM_UNALIGNED_SI_OP
38578 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
38579 #undef TARGET_ASM_UNALIGNED_DI_OP
38580 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
38582 #undef TARGET_PRINT_OPERAND
38583 #define TARGET_PRINT_OPERAND ix86_print_operand
38584 #undef TARGET_PRINT_OPERAND_ADDRESS
38585 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
38586 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
38587 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
38588 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
38589 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
38591 #undef TARGET_SCHED_INIT_GLOBAL
38592 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
38593 #undef TARGET_SCHED_ADJUST_COST
38594 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
38595 #undef TARGET_SCHED_ISSUE_RATE
38596 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
38597 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
38598 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
38599 ia32_multipass_dfa_lookahead
38601 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
38602 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
38605 #undef TARGET_HAVE_TLS
38606 #define TARGET_HAVE_TLS true
38608 #undef TARGET_CANNOT_FORCE_CONST_MEM
38609 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
38610 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
38611 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
38613 #undef TARGET_DELEGITIMIZE_ADDRESS
38614 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
38616 #undef TARGET_MS_BITFIELD_LAYOUT_P
38617 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
38620 #undef TARGET_BINDS_LOCAL_P
38621 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
38623 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
38624 #undef TARGET_BINDS_LOCAL_P
38625 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
38628 #undef TARGET_ASM_OUTPUT_MI_THUNK
38629 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
38630 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
38631 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
38633 #undef TARGET_ASM_FILE_START
38634 #define TARGET_ASM_FILE_START x86_file_start
38636 #undef TARGET_OPTION_OVERRIDE
38637 #define TARGET_OPTION_OVERRIDE ix86_option_override
38639 #undef TARGET_REGISTER_MOVE_COST
38640 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
38641 #undef TARGET_MEMORY_MOVE_COST
38642 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
38643 #undef TARGET_RTX_COSTS
38644 #define TARGET_RTX_COSTS ix86_rtx_costs
38645 #undef TARGET_ADDRESS_COST
38646 #define TARGET_ADDRESS_COST ix86_address_cost
38648 #undef TARGET_FIXED_CONDITION_CODE_REGS
38649 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
38650 #undef TARGET_CC_MODES_COMPATIBLE
38651 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
38653 #undef TARGET_MACHINE_DEPENDENT_REORG
38654 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
38656 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
38657 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
38659 #undef TARGET_BUILD_BUILTIN_VA_LIST
38660 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
38662 #undef TARGET_ENUM_VA_LIST_P
38663 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
38665 #undef TARGET_FN_ABI_VA_LIST
38666 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
38668 #undef TARGET_CANONICAL_VA_LIST_TYPE
38669 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
38671 #undef TARGET_EXPAND_BUILTIN_VA_START
38672 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
38674 #undef TARGET_MD_ASM_CLOBBERS
38675 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
38677 #undef TARGET_PROMOTE_PROTOTYPES
38678 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
38679 #undef TARGET_STRUCT_VALUE_RTX
38680 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
38681 #undef TARGET_SETUP_INCOMING_VARARGS
38682 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
38683 #undef TARGET_MUST_PASS_IN_STACK
38684 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
38685 #undef TARGET_FUNCTION_ARG_ADVANCE
38686 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
38687 #undef TARGET_FUNCTION_ARG
38688 #define TARGET_FUNCTION_ARG ix86_function_arg
38689 #undef TARGET_FUNCTION_ARG_BOUNDARY
38690 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
38691 #undef TARGET_PASS_BY_REFERENCE
38692 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
38693 #undef TARGET_INTERNAL_ARG_POINTER
38694 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
38695 #undef TARGET_UPDATE_STACK_BOUNDARY
38696 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
38697 #undef TARGET_GET_DRAP_RTX
38698 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
38699 #undef TARGET_STRICT_ARGUMENT_NAMING
38700 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
38701 #undef TARGET_STATIC_CHAIN
38702 #define TARGET_STATIC_CHAIN ix86_static_chain
38703 #undef TARGET_TRAMPOLINE_INIT
38704 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
38705 #undef TARGET_RETURN_POPS_ARGS
38706 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
38708 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
38709 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
38711 #undef TARGET_SCALAR_MODE_SUPPORTED_P
38712 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
38714 #undef TARGET_VECTOR_MODE_SUPPORTED_P
38715 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
38717 #undef TARGET_C_MODE_FOR_SUFFIX
38718 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
38721 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
38722 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
38725 #ifdef SUBTARGET_INSERT_ATTRIBUTES
38726 #undef TARGET_INSERT_ATTRIBUTES
38727 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
38730 #undef TARGET_MANGLE_TYPE
38731 #define TARGET_MANGLE_TYPE ix86_mangle_type
38734 #undef TARGET_STACK_PROTECT_FAIL
38735 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
38738 #undef TARGET_FUNCTION_VALUE
38739 #define TARGET_FUNCTION_VALUE ix86_function_value
38741 #undef TARGET_FUNCTION_VALUE_REGNO_P
38742 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
38744 #undef TARGET_PROMOTE_FUNCTION_MODE
38745 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
38747 #undef TARGET_SECONDARY_RELOAD
38748 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
38750 #undef TARGET_CLASS_MAX_NREGS
38751 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
38753 #undef TARGET_PREFERRED_RELOAD_CLASS
38754 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
38755 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
38756 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
38757 #undef TARGET_CLASS_LIKELY_SPILLED_P
38758 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
38760 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
38761 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
38762 ix86_builtin_vectorization_cost
38763 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
38764 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
38765 ix86_vectorize_vec_perm_const_ok
38766 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
38767 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
38768 ix86_preferred_simd_mode
38769 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
38770 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
38771 ix86_autovectorize_vector_sizes
38773 #undef TARGET_SET_CURRENT_FUNCTION
38774 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
38776 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
38777 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
38779 #undef TARGET_OPTION_SAVE
38780 #define TARGET_OPTION_SAVE ix86_function_specific_save
38782 #undef TARGET_OPTION_RESTORE
38783 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
38785 #undef TARGET_OPTION_PRINT
38786 #define TARGET_OPTION_PRINT ix86_function_specific_print
38788 #undef TARGET_CAN_INLINE_P
38789 #define TARGET_CAN_INLINE_P ix86_can_inline_p
38791 #undef TARGET_EXPAND_TO_RTL_HOOK
38792 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
38794 #undef TARGET_LEGITIMATE_ADDRESS_P
38795 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
38797 #undef TARGET_LEGITIMATE_CONSTANT_P
38798 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
38800 #undef TARGET_FRAME_POINTER_REQUIRED
38801 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
38803 #undef TARGET_CAN_ELIMINATE
38804 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
38806 #undef TARGET_EXTRA_LIVE_ON_ENTRY
38807 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
38809 #undef TARGET_ASM_CODE_END
38810 #define TARGET_ASM_CODE_END ix86_code_end
38812 #undef TARGET_CONDITIONAL_REGISTER_USAGE
38813 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
38816 #undef TARGET_INIT_LIBFUNCS
38817 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
38820 struct gcc_target targetm
= TARGET_INITIALIZER
;
38822 #include "gt-i386.h"