1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
3 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
34 #include "insn-codes.h"
35 #include "insn-attr.h"
42 #include "diagnostic-core.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "common/common-target.h"
49 #include "langhooks.h"
54 #include "tm-constrs.h"
58 #include "sched-int.h"
62 #include "diagnostic.h"
64 enum upper_128bits_state
71 typedef struct block_info_def
73 /* State of the upper 128bits of AVX registers at exit. */
74 enum upper_128bits_state state
;
75 /* TRUE if state of the upper 128bits of AVX registers is unchanged
78 /* TRUE if block has been processed. */
80 /* TRUE if block has been scanned. */
82 /* Previous state of the upper 128bits of AVX registers at entry. */
83 enum upper_128bits_state prev
;
86 #define BLOCK_INFO(B) ((block_info) (B)->aux)
88 enum call_avx256_state
90 /* Callee returns 256bit AVX register. */
91 callee_return_avx256
= -1,
92 /* Callee returns and passes 256bit AVX register. */
93 callee_return_pass_avx256
,
94 /* Callee passes 256bit AVX register. */
96 /* Callee doesn't return nor passe 256bit AVX register, or no
97 256bit AVX register in function return. */
99 /* vzeroupper intrinsic. */
103 /* Check if a 256bit AVX register is referenced in stores. */
106 check_avx256_stores (rtx dest
, const_rtx set
, void *data
)
109 && VALID_AVX256_REG_MODE (GET_MODE (dest
)))
110 || (GET_CODE (set
) == SET
111 && REG_P (SET_SRC (set
))
112 && VALID_AVX256_REG_MODE (GET_MODE (SET_SRC (set
)))))
114 enum upper_128bits_state
*state
115 = (enum upper_128bits_state
*) data
;
120 /* Helper function for move_or_delete_vzeroupper_1. Look for vzeroupper
121 in basic block BB. Delete it if upper 128bit AVX registers are
122 unused. If it isn't deleted, move it to just before a jump insn.
124 STATE is state of the upper 128bits of AVX registers at entry. */
127 move_or_delete_vzeroupper_2 (basic_block bb
,
128 enum upper_128bits_state state
)
131 rtx vzeroupper_insn
= NULL_RTX
;
136 if (BLOCK_INFO (bb
)->unchanged
)
139 fprintf (dump_file
, " [bb %i] unchanged: upper 128bits: %d\n",
142 BLOCK_INFO (bb
)->state
= state
;
146 if (BLOCK_INFO (bb
)->scanned
&& BLOCK_INFO (bb
)->prev
== state
)
149 fprintf (dump_file
, " [bb %i] scanned: upper 128bits: %d\n",
150 bb
->index
, BLOCK_INFO (bb
)->state
);
154 BLOCK_INFO (bb
)->prev
= state
;
157 fprintf (dump_file
, " [bb %i] entry: upper 128bits: %d\n",
162 /* BB_END changes when it is deleted. */
163 bb_end
= BB_END (bb
);
165 while (insn
!= bb_end
)
167 insn
= NEXT_INSN (insn
);
169 if (!NONDEBUG_INSN_P (insn
))
172 /* Move vzeroupper before jump/call. */
173 if (JUMP_P (insn
) || CALL_P (insn
))
175 if (!vzeroupper_insn
)
178 if (PREV_INSN (insn
) != vzeroupper_insn
)
182 fprintf (dump_file
, "Move vzeroupper after:\n");
183 print_rtl_single (dump_file
, PREV_INSN (insn
));
184 fprintf (dump_file
, "before:\n");
185 print_rtl_single (dump_file
, insn
);
187 reorder_insns_nobb (vzeroupper_insn
, vzeroupper_insn
,
190 vzeroupper_insn
= NULL_RTX
;
194 pat
= PATTERN (insn
);
196 /* Check insn for vzeroupper intrinsic. */
197 if (GET_CODE (pat
) == UNSPEC_VOLATILE
198 && XINT (pat
, 1) == UNSPECV_VZEROUPPER
)
202 /* Found vzeroupper intrinsic. */
203 fprintf (dump_file
, "Found vzeroupper:\n");
204 print_rtl_single (dump_file
, insn
);
209 /* Check insn for vzeroall intrinsic. */
210 if (GET_CODE (pat
) == PARALLEL
211 && GET_CODE (XVECEXP (pat
, 0, 0)) == UNSPEC_VOLATILE
212 && XINT (XVECEXP (pat
, 0, 0), 1) == UNSPECV_VZEROALL
)
217 /* Delete pending vzeroupper insertion. */
220 delete_insn (vzeroupper_insn
);
221 vzeroupper_insn
= NULL_RTX
;
224 else if (state
!= used
)
226 note_stores (pat
, check_avx256_stores
, &state
);
233 /* Process vzeroupper intrinsic. */
234 avx256
= INTVAL (XVECEXP (pat
, 0, 0));
238 /* Since the upper 128bits are cleared, callee must not pass
239 256bit AVX register. We only need to check if callee
240 returns 256bit AVX register. */
241 if (avx256
== callee_return_avx256
)
247 /* Remove unnecessary vzeroupper since upper 128bits are
251 fprintf (dump_file
, "Delete redundant vzeroupper:\n");
252 print_rtl_single (dump_file
, insn
);
258 /* Set state to UNUSED if callee doesn't return 256bit AVX
260 if (avx256
!= callee_return_pass_avx256
)
263 if (avx256
== callee_return_pass_avx256
264 || avx256
== callee_pass_avx256
)
266 /* Must remove vzeroupper since callee passes in 256bit
270 fprintf (dump_file
, "Delete callee pass vzeroupper:\n");
271 print_rtl_single (dump_file
, insn
);
277 vzeroupper_insn
= insn
;
283 BLOCK_INFO (bb
)->state
= state
;
284 BLOCK_INFO (bb
)->unchanged
= unchanged
;
285 BLOCK_INFO (bb
)->scanned
= true;
288 fprintf (dump_file
, " [bb %i] exit: %s: upper 128bits: %d\n",
289 bb
->index
, unchanged
? "unchanged" : "changed",
293 /* Helper function for move_or_delete_vzeroupper. Process vzeroupper
294 in BLOCK and check its predecessor blocks. Treat UNKNOWN state
295 as USED if UNKNOWN_IS_UNUSED is true. Return TRUE if the exit
299 move_or_delete_vzeroupper_1 (basic_block block
, bool unknown_is_unused
)
303 enum upper_128bits_state state
, old_state
, new_state
;
307 fprintf (dump_file
, " Process [bb %i]: status: %d\n",
308 block
->index
, BLOCK_INFO (block
)->processed
);
310 if (BLOCK_INFO (block
)->processed
)
315 /* Check all predecessor edges of this block. */
316 seen_unknown
= false;
317 FOR_EACH_EDGE (e
, ei
, block
->preds
)
321 switch (BLOCK_INFO (e
->src
)->state
)
324 if (!unknown_is_unused
)
338 old_state
= BLOCK_INFO (block
)->state
;
339 move_or_delete_vzeroupper_2 (block
, state
);
340 new_state
= BLOCK_INFO (block
)->state
;
342 if (state
!= unknown
|| new_state
== used
)
343 BLOCK_INFO (block
)->processed
= true;
345 /* Need to rescan if the upper 128bits of AVX registers are changed
347 if (new_state
!= old_state
)
349 if (new_state
== used
)
350 cfun
->machine
->rescan_vzeroupper_p
= 1;
357 /* Go through the instruction stream looking for vzeroupper. Delete
358 it if upper 128bit AVX registers are unused. If it isn't deleted,
359 move it to just before a jump insn. */
362 move_or_delete_vzeroupper (void)
367 fibheap_t worklist
, pending
, fibheap_swap
;
368 sbitmap visited
, in_worklist
, in_pending
, sbitmap_swap
;
373 /* Set up block info for each basic block. */
374 alloc_aux_for_blocks (sizeof (struct block_info_def
));
376 /* Process outgoing edges of entry point. */
378 fprintf (dump_file
, "Process outgoing edges of entry point\n");
380 FOR_EACH_EDGE (e
, ei
, ENTRY_BLOCK_PTR
->succs
)
382 move_or_delete_vzeroupper_2 (e
->dest
,
383 cfun
->machine
->caller_pass_avx256_p
385 BLOCK_INFO (e
->dest
)->processed
= true;
388 /* Compute reverse completion order of depth first search of the CFG
389 so that the data-flow runs faster. */
390 rc_order
= XNEWVEC (int, n_basic_blocks
- NUM_FIXED_BLOCKS
);
391 bb_order
= XNEWVEC (int, last_basic_block
);
392 pre_and_rev_post_order_compute (NULL
, rc_order
, false);
393 for (i
= 0; i
< n_basic_blocks
- NUM_FIXED_BLOCKS
; i
++)
394 bb_order
[rc_order
[i
]] = i
;
397 worklist
= fibheap_new ();
398 pending
= fibheap_new ();
399 visited
= sbitmap_alloc (last_basic_block
);
400 in_worklist
= sbitmap_alloc (last_basic_block
);
401 in_pending
= sbitmap_alloc (last_basic_block
);
402 sbitmap_zero (in_worklist
);
404 /* Don't check outgoing edges of entry point. */
405 sbitmap_ones (in_pending
);
407 if (BLOCK_INFO (bb
)->processed
)
408 RESET_BIT (in_pending
, bb
->index
);
411 move_or_delete_vzeroupper_1 (bb
, false);
412 fibheap_insert (pending
, bb_order
[bb
->index
], bb
);
416 fprintf (dump_file
, "Check remaining basic blocks\n");
418 while (!fibheap_empty (pending
))
420 fibheap_swap
= pending
;
422 worklist
= fibheap_swap
;
423 sbitmap_swap
= in_pending
;
424 in_pending
= in_worklist
;
425 in_worklist
= sbitmap_swap
;
427 sbitmap_zero (visited
);
429 cfun
->machine
->rescan_vzeroupper_p
= 0;
431 while (!fibheap_empty (worklist
))
433 bb
= (basic_block
) fibheap_extract_min (worklist
);
434 RESET_BIT (in_worklist
, bb
->index
);
435 gcc_assert (!TEST_BIT (visited
, bb
->index
));
436 if (!TEST_BIT (visited
, bb
->index
))
440 SET_BIT (visited
, bb
->index
);
442 if (move_or_delete_vzeroupper_1 (bb
, false))
443 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
445 if (e
->dest
== EXIT_BLOCK_PTR
446 || BLOCK_INFO (e
->dest
)->processed
)
449 if (TEST_BIT (visited
, e
->dest
->index
))
451 if (!TEST_BIT (in_pending
, e
->dest
->index
))
453 /* Send E->DEST to next round. */
454 SET_BIT (in_pending
, e
->dest
->index
);
455 fibheap_insert (pending
,
456 bb_order
[e
->dest
->index
],
460 else if (!TEST_BIT (in_worklist
, e
->dest
->index
))
462 /* Add E->DEST to current round. */
463 SET_BIT (in_worklist
, e
->dest
->index
);
464 fibheap_insert (worklist
, bb_order
[e
->dest
->index
],
471 if (!cfun
->machine
->rescan_vzeroupper_p
)
476 fibheap_delete (worklist
);
477 fibheap_delete (pending
);
478 sbitmap_free (visited
);
479 sbitmap_free (in_worklist
);
480 sbitmap_free (in_pending
);
483 fprintf (dump_file
, "Process remaining basic blocks\n");
486 move_or_delete_vzeroupper_1 (bb
, true);
488 free_aux_for_blocks ();
491 static rtx
legitimize_dllimport_symbol (rtx
, bool);
493 #ifndef CHECK_STACK_LIMIT
494 #define CHECK_STACK_LIMIT (-1)
497 /* Return index of given mode in mult and division cost tables. */
498 #define MODE_INDEX(mode) \
499 ((mode) == QImode ? 0 \
500 : (mode) == HImode ? 1 \
501 : (mode) == SImode ? 2 \
502 : (mode) == DImode ? 3 \
505 /* Processor costs (relative to an add) */
506 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
507 #define COSTS_N_BYTES(N) ((N) * 2)
509 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
512 struct processor_costs ix86_size_cost
= {/* costs for tuning for size */
513 COSTS_N_BYTES (2), /* cost of an add instruction */
514 COSTS_N_BYTES (3), /* cost of a lea instruction */
515 COSTS_N_BYTES (2), /* variable shift costs */
516 COSTS_N_BYTES (3), /* constant shift costs */
517 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
518 COSTS_N_BYTES (3), /* HI */
519 COSTS_N_BYTES (3), /* SI */
520 COSTS_N_BYTES (3), /* DI */
521 COSTS_N_BYTES (5)}, /* other */
522 0, /* cost of multiply per each bit set */
523 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
524 COSTS_N_BYTES (3), /* HI */
525 COSTS_N_BYTES (3), /* SI */
526 COSTS_N_BYTES (3), /* DI */
527 COSTS_N_BYTES (5)}, /* other */
528 COSTS_N_BYTES (3), /* cost of movsx */
529 COSTS_N_BYTES (3), /* cost of movzx */
530 0, /* "large" insn */
532 2, /* cost for loading QImode using movzbl */
533 {2, 2, 2}, /* cost of loading integer registers
534 in QImode, HImode and SImode.
535 Relative to reg-reg move (2). */
536 {2, 2, 2}, /* cost of storing integer registers */
537 2, /* cost of reg,reg fld/fst */
538 {2, 2, 2}, /* cost of loading fp registers
539 in SFmode, DFmode and XFmode */
540 {2, 2, 2}, /* cost of storing fp registers
541 in SFmode, DFmode and XFmode */
542 3, /* cost of moving MMX register */
543 {3, 3}, /* cost of loading MMX registers
544 in SImode and DImode */
545 {3, 3}, /* cost of storing MMX registers
546 in SImode and DImode */
547 3, /* cost of moving SSE register */
548 {3, 3, 3}, /* cost of loading SSE registers
549 in SImode, DImode and TImode */
550 {3, 3, 3}, /* cost of storing SSE registers
551 in SImode, DImode and TImode */
552 3, /* MMX or SSE register to integer */
553 0, /* size of l1 cache */
554 0, /* size of l2 cache */
555 0, /* size of prefetch block */
556 0, /* number of parallel prefetches */
558 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
559 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
560 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
561 COSTS_N_BYTES (2), /* cost of FABS instruction. */
562 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
563 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
564 {{{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
565 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}}},
566 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
567 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}}}},
568 {{{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
569 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}}},
570 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
571 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}}}},
572 1, /* scalar_stmt_cost. */
573 1, /* scalar load_cost. */
574 1, /* scalar_store_cost. */
575 1, /* vec_stmt_cost. */
576 1, /* vec_to_scalar_cost. */
577 1, /* scalar_to_vec_cost. */
578 1, /* vec_align_load_cost. */
579 1, /* vec_unalign_load_cost. */
580 1, /* vec_store_cost. */
581 1, /* cond_taken_branch_cost. */
582 1, /* cond_not_taken_branch_cost. */
585 /* Processor costs (relative to an add) */
587 struct processor_costs i386_cost
= { /* 386 specific costs */
588 COSTS_N_INSNS (1), /* cost of an add instruction */
589 COSTS_N_INSNS (1), /* cost of a lea instruction */
590 COSTS_N_INSNS (3), /* variable shift costs */
591 COSTS_N_INSNS (2), /* constant shift costs */
592 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
593 COSTS_N_INSNS (6), /* HI */
594 COSTS_N_INSNS (6), /* SI */
595 COSTS_N_INSNS (6), /* DI */
596 COSTS_N_INSNS (6)}, /* other */
597 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
598 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
599 COSTS_N_INSNS (23), /* HI */
600 COSTS_N_INSNS (23), /* SI */
601 COSTS_N_INSNS (23), /* DI */
602 COSTS_N_INSNS (23)}, /* other */
603 COSTS_N_INSNS (3), /* cost of movsx */
604 COSTS_N_INSNS (2), /* cost of movzx */
605 15, /* "large" insn */
607 4, /* cost for loading QImode using movzbl */
608 {2, 4, 2}, /* cost of loading integer registers
609 in QImode, HImode and SImode.
610 Relative to reg-reg move (2). */
611 {2, 4, 2}, /* cost of storing integer registers */
612 2, /* cost of reg,reg fld/fst */
613 {8, 8, 8}, /* cost of loading fp registers
614 in SFmode, DFmode and XFmode */
615 {8, 8, 8}, /* cost of storing fp registers
616 in SFmode, DFmode and XFmode */
617 2, /* cost of moving MMX register */
618 {4, 8}, /* cost of loading MMX registers
619 in SImode and DImode */
620 {4, 8}, /* cost of storing MMX registers
621 in SImode and DImode */
622 2, /* cost of moving SSE register */
623 {4, 8, 16}, /* cost of loading SSE registers
624 in SImode, DImode and TImode */
625 {4, 8, 16}, /* cost of storing SSE registers
626 in SImode, DImode and TImode */
627 3, /* MMX or SSE register to integer */
628 0, /* size of l1 cache */
629 0, /* size of l2 cache */
630 0, /* size of prefetch block */
631 0, /* number of parallel prefetches */
633 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
634 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
635 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
636 COSTS_N_INSNS (22), /* cost of FABS instruction. */
637 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
638 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
639 {{{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
640 DUMMY_STRINGOP_ALGS
},
641 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
642 DUMMY_STRINGOP_ALGS
}},
643 {{{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
644 DUMMY_STRINGOP_ALGS
},
645 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
646 DUMMY_STRINGOP_ALGS
}},
647 1, /* scalar_stmt_cost. */
648 1, /* scalar load_cost. */
649 1, /* scalar_store_cost. */
650 1, /* vec_stmt_cost. */
651 1, /* vec_to_scalar_cost. */
652 1, /* scalar_to_vec_cost. */
653 1, /* vec_align_load_cost. */
654 2, /* vec_unalign_load_cost. */
655 1, /* vec_store_cost. */
656 3, /* cond_taken_branch_cost. */
657 1, /* cond_not_taken_branch_cost. */
661 struct processor_costs i486_cost
= { /* 486 specific costs */
662 COSTS_N_INSNS (1), /* cost of an add instruction */
663 COSTS_N_INSNS (1), /* cost of a lea instruction */
664 COSTS_N_INSNS (3), /* variable shift costs */
665 COSTS_N_INSNS (2), /* constant shift costs */
666 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
667 COSTS_N_INSNS (12), /* HI */
668 COSTS_N_INSNS (12), /* SI */
669 COSTS_N_INSNS (12), /* DI */
670 COSTS_N_INSNS (12)}, /* other */
671 1, /* cost of multiply per each bit set */
672 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
673 COSTS_N_INSNS (40), /* HI */
674 COSTS_N_INSNS (40), /* SI */
675 COSTS_N_INSNS (40), /* DI */
676 COSTS_N_INSNS (40)}, /* other */
677 COSTS_N_INSNS (3), /* cost of movsx */
678 COSTS_N_INSNS (2), /* cost of movzx */
679 15, /* "large" insn */
681 4, /* cost for loading QImode using movzbl */
682 {2, 4, 2}, /* cost of loading integer registers
683 in QImode, HImode and SImode.
684 Relative to reg-reg move (2). */
685 {2, 4, 2}, /* cost of storing integer registers */
686 2, /* cost of reg,reg fld/fst */
687 {8, 8, 8}, /* cost of loading fp registers
688 in SFmode, DFmode and XFmode */
689 {8, 8, 8}, /* cost of storing fp registers
690 in SFmode, DFmode and XFmode */
691 2, /* cost of moving MMX register */
692 {4, 8}, /* cost of loading MMX registers
693 in SImode and DImode */
694 {4, 8}, /* cost of storing MMX registers
695 in SImode and DImode */
696 2, /* cost of moving SSE register */
697 {4, 8, 16}, /* cost of loading SSE registers
698 in SImode, DImode and TImode */
699 {4, 8, 16}, /* cost of storing SSE registers
700 in SImode, DImode and TImode */
701 3, /* MMX or SSE register to integer */
702 4, /* size of l1 cache. 486 has 8kB cache
703 shared for code and data, so 4kB is
704 not really precise. */
705 4, /* size of l2 cache */
706 0, /* size of prefetch block */
707 0, /* number of parallel prefetches */
709 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
710 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
711 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
712 COSTS_N_INSNS (3), /* cost of FABS instruction. */
713 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
714 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
715 {{{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
}}},
716 DUMMY_STRINGOP_ALGS
},
717 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
}}},
718 DUMMY_STRINGOP_ALGS
}},
719 {{{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
}}},
720 DUMMY_STRINGOP_ALGS
},
721 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
}}},
722 DUMMY_STRINGOP_ALGS
}},
723 1, /* scalar_stmt_cost. */
724 1, /* scalar load_cost. */
725 1, /* scalar_store_cost. */
726 1, /* vec_stmt_cost. */
727 1, /* vec_to_scalar_cost. */
728 1, /* scalar_to_vec_cost. */
729 1, /* vec_align_load_cost. */
730 2, /* vec_unalign_load_cost. */
731 1, /* vec_store_cost. */
732 3, /* cond_taken_branch_cost. */
733 1, /* cond_not_taken_branch_cost. */
737 struct processor_costs pentium_cost
= {
738 COSTS_N_INSNS (1), /* cost of an add instruction */
739 COSTS_N_INSNS (1), /* cost of a lea instruction */
740 COSTS_N_INSNS (4), /* variable shift costs */
741 COSTS_N_INSNS (1), /* constant shift costs */
742 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
743 COSTS_N_INSNS (11), /* HI */
744 COSTS_N_INSNS (11), /* SI */
745 COSTS_N_INSNS (11), /* DI */
746 COSTS_N_INSNS (11)}, /* other */
747 0, /* cost of multiply per each bit set */
748 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
749 COSTS_N_INSNS (25), /* HI */
750 COSTS_N_INSNS (25), /* SI */
751 COSTS_N_INSNS (25), /* DI */
752 COSTS_N_INSNS (25)}, /* other */
753 COSTS_N_INSNS (3), /* cost of movsx */
754 COSTS_N_INSNS (2), /* cost of movzx */
755 8, /* "large" insn */
757 6, /* cost for loading QImode using movzbl */
758 {2, 4, 2}, /* cost of loading integer registers
759 in QImode, HImode and SImode.
760 Relative to reg-reg move (2). */
761 {2, 4, 2}, /* cost of storing integer registers */
762 2, /* cost of reg,reg fld/fst */
763 {2, 2, 6}, /* cost of loading fp registers
764 in SFmode, DFmode and XFmode */
765 {4, 4, 6}, /* cost of storing fp registers
766 in SFmode, DFmode and XFmode */
767 8, /* cost of moving MMX register */
768 {8, 8}, /* cost of loading MMX registers
769 in SImode and DImode */
770 {8, 8}, /* cost of storing MMX registers
771 in SImode and DImode */
772 2, /* cost of moving SSE register */
773 {4, 8, 16}, /* cost of loading SSE registers
774 in SImode, DImode and TImode */
775 {4, 8, 16}, /* cost of storing SSE registers
776 in SImode, DImode and TImode */
777 3, /* MMX or SSE register to integer */
778 8, /* size of l1 cache. */
779 8, /* size of l2 cache */
780 0, /* size of prefetch block */
781 0, /* number of parallel prefetches */
783 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
784 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
785 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
786 COSTS_N_INSNS (1), /* cost of FABS instruction. */
787 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
788 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
789 {{{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
790 DUMMY_STRINGOP_ALGS
},
791 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
792 DUMMY_STRINGOP_ALGS
}},
793 {{{libcall
, {{-1, rep_prefix_4_byte
}}},
794 DUMMY_STRINGOP_ALGS
},
795 {{libcall
, {{-1, rep_prefix_4_byte
}}},
796 DUMMY_STRINGOP_ALGS
}},
797 1, /* scalar_stmt_cost. */
798 1, /* scalar load_cost. */
799 1, /* scalar_store_cost. */
800 1, /* vec_stmt_cost. */
801 1, /* vec_to_scalar_cost. */
802 1, /* scalar_to_vec_cost. */
803 1, /* vec_align_load_cost. */
804 2, /* vec_unalign_load_cost. */
805 1, /* vec_store_cost. */
806 3, /* cond_taken_branch_cost. */
807 1, /* cond_not_taken_branch_cost. */
811 struct processor_costs pentiumpro_cost
= {
812 COSTS_N_INSNS (1), /* cost of an add instruction */
813 COSTS_N_INSNS (1), /* cost of a lea instruction */
814 COSTS_N_INSNS (1), /* variable shift costs */
815 COSTS_N_INSNS (1), /* constant shift costs */
816 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
817 COSTS_N_INSNS (4), /* HI */
818 COSTS_N_INSNS (4), /* SI */
819 COSTS_N_INSNS (4), /* DI */
820 COSTS_N_INSNS (4)}, /* other */
821 0, /* cost of multiply per each bit set */
822 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
823 COSTS_N_INSNS (17), /* HI */
824 COSTS_N_INSNS (17), /* SI */
825 COSTS_N_INSNS (17), /* DI */
826 COSTS_N_INSNS (17)}, /* other */
827 COSTS_N_INSNS (1), /* cost of movsx */
828 COSTS_N_INSNS (1), /* cost of movzx */
829 8, /* "large" insn */
831 2, /* cost for loading QImode using movzbl */
832 {4, 4, 4}, /* cost of loading integer registers
833 in QImode, HImode and SImode.
834 Relative to reg-reg move (2). */
835 {2, 2, 2}, /* cost of storing integer registers */
836 2, /* cost of reg,reg fld/fst */
837 {2, 2, 6}, /* cost of loading fp registers
838 in SFmode, DFmode and XFmode */
839 {4, 4, 6}, /* cost of storing fp registers
840 in SFmode, DFmode and XFmode */
841 2, /* cost of moving MMX register */
842 {2, 2}, /* cost of loading MMX registers
843 in SImode and DImode */
844 {2, 2}, /* cost of storing MMX registers
845 in SImode and DImode */
846 2, /* cost of moving SSE register */
847 {2, 2, 8}, /* cost of loading SSE registers
848 in SImode, DImode and TImode */
849 {2, 2, 8}, /* cost of storing SSE registers
850 in SImode, DImode and TImode */
851 3, /* MMX or SSE register to integer */
852 8, /* size of l1 cache. */
853 256, /* size of l2 cache */
854 32, /* size of prefetch block */
855 6, /* number of parallel prefetches */
857 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
858 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
859 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
860 COSTS_N_INSNS (2), /* cost of FABS instruction. */
861 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
862 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
863 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
864 (we ensure the alignment). For small blocks inline loop is still a
865 noticeable win, for bigger blocks either rep movsl or rep movsb is
866 way to go. Rep movsb has apparently more expensive startup time in CPU,
867 but after 4K the difference is down in the noise. */
868 {{{rep_prefix_4_byte
, {{128, loop
}, {1024, unrolled_loop
},
869 {8192, rep_prefix_4_byte
}, {-1, rep_prefix_1_byte
}}},
870 DUMMY_STRINGOP_ALGS
},
871 {{rep_prefix_4_byte
, {{128, loop
}, {1024, unrolled_loop
},
872 {8192, rep_prefix_4_byte
}, {-1, rep_prefix_1_byte
}}},
873 DUMMY_STRINGOP_ALGS
}},
874 {{{rep_prefix_4_byte
, {{1024, unrolled_loop
},
875 {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
876 DUMMY_STRINGOP_ALGS
},
877 {{rep_prefix_4_byte
, {{1024, unrolled_loop
},
878 {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
879 DUMMY_STRINGOP_ALGS
}},
880 1, /* scalar_stmt_cost. */
881 1, /* scalar load_cost. */
882 1, /* scalar_store_cost. */
883 1, /* vec_stmt_cost. */
884 1, /* vec_to_scalar_cost. */
885 1, /* scalar_to_vec_cost. */
886 1, /* vec_align_load_cost. */
887 2, /* vec_unalign_load_cost. */
888 1, /* vec_store_cost. */
889 3, /* cond_taken_branch_cost. */
890 1, /* cond_not_taken_branch_cost. */
894 struct processor_costs geode_cost
= {
895 COSTS_N_INSNS (1), /* cost of an add instruction */
896 COSTS_N_INSNS (1), /* cost of a lea instruction */
897 COSTS_N_INSNS (2), /* variable shift costs */
898 COSTS_N_INSNS (1), /* constant shift costs */
899 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
900 COSTS_N_INSNS (4), /* HI */
901 COSTS_N_INSNS (7), /* SI */
902 COSTS_N_INSNS (7), /* DI */
903 COSTS_N_INSNS (7)}, /* other */
904 0, /* cost of multiply per each bit set */
905 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
906 COSTS_N_INSNS (23), /* HI */
907 COSTS_N_INSNS (39), /* SI */
908 COSTS_N_INSNS (39), /* DI */
909 COSTS_N_INSNS (39)}, /* other */
910 COSTS_N_INSNS (1), /* cost of movsx */
911 COSTS_N_INSNS (1), /* cost of movzx */
912 8, /* "large" insn */
914 1, /* cost for loading QImode using movzbl */
915 {1, 1, 1}, /* cost of loading integer registers
916 in QImode, HImode and SImode.
917 Relative to reg-reg move (2). */
918 {1, 1, 1}, /* cost of storing integer registers */
919 1, /* cost of reg,reg fld/fst */
920 {1, 1, 1}, /* cost of loading fp registers
921 in SFmode, DFmode and XFmode */
922 {4, 6, 6}, /* cost of storing fp registers
923 in SFmode, DFmode and XFmode */
925 1, /* cost of moving MMX register */
926 {1, 1}, /* cost of loading MMX registers
927 in SImode and DImode */
928 {1, 1}, /* cost of storing MMX registers
929 in SImode and DImode */
930 1, /* cost of moving SSE register */
931 {1, 1, 1}, /* cost of loading SSE registers
932 in SImode, DImode and TImode */
933 {1, 1, 1}, /* cost of storing SSE registers
934 in SImode, DImode and TImode */
935 1, /* MMX or SSE register to integer */
936 64, /* size of l1 cache. */
937 128, /* size of l2 cache. */
938 32, /* size of prefetch block */
939 1, /* number of parallel prefetches */
941 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
942 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
943 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
944 COSTS_N_INSNS (1), /* cost of FABS instruction. */
945 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
946 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
947 {{{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
948 DUMMY_STRINGOP_ALGS
},
949 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
950 DUMMY_STRINGOP_ALGS
}},
951 {{{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
952 DUMMY_STRINGOP_ALGS
},
953 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
954 DUMMY_STRINGOP_ALGS
}},
955 1, /* scalar_stmt_cost. */
956 1, /* scalar load_cost. */
957 1, /* scalar_store_cost. */
958 1, /* vec_stmt_cost. */
959 1, /* vec_to_scalar_cost. */
960 1, /* scalar_to_vec_cost. */
961 1, /* vec_align_load_cost. */
962 2, /* vec_unalign_load_cost. */
963 1, /* vec_store_cost. */
964 3, /* cond_taken_branch_cost. */
965 1, /* cond_not_taken_branch_cost. */
969 struct processor_costs k6_cost
= {
970 COSTS_N_INSNS (1), /* cost of an add instruction */
971 COSTS_N_INSNS (2), /* cost of a lea instruction */
972 COSTS_N_INSNS (1), /* variable shift costs */
973 COSTS_N_INSNS (1), /* constant shift costs */
974 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
975 COSTS_N_INSNS (3), /* HI */
976 COSTS_N_INSNS (3), /* SI */
977 COSTS_N_INSNS (3), /* DI */
978 COSTS_N_INSNS (3)}, /* other */
979 0, /* cost of multiply per each bit set */
980 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
981 COSTS_N_INSNS (18), /* HI */
982 COSTS_N_INSNS (18), /* SI */
983 COSTS_N_INSNS (18), /* DI */
984 COSTS_N_INSNS (18)}, /* other */
985 COSTS_N_INSNS (2), /* cost of movsx */
986 COSTS_N_INSNS (2), /* cost of movzx */
987 8, /* "large" insn */
989 3, /* cost for loading QImode using movzbl */
990 {4, 5, 4}, /* cost of loading integer registers
991 in QImode, HImode and SImode.
992 Relative to reg-reg move (2). */
993 {2, 3, 2}, /* cost of storing integer registers */
994 4, /* cost of reg,reg fld/fst */
995 {6, 6, 6}, /* cost of loading fp registers
996 in SFmode, DFmode and XFmode */
997 {4, 4, 4}, /* cost of storing fp registers
998 in SFmode, DFmode and XFmode */
999 2, /* cost of moving MMX register */
1000 {2, 2}, /* cost of loading MMX registers
1001 in SImode and DImode */
1002 {2, 2}, /* cost of storing MMX registers
1003 in SImode and DImode */
1004 2, /* cost of moving SSE register */
1005 {2, 2, 8}, /* cost of loading SSE registers
1006 in SImode, DImode and TImode */
1007 {2, 2, 8}, /* cost of storing SSE registers
1008 in SImode, DImode and TImode */
1009 6, /* MMX or SSE register to integer */
1010 32, /* size of l1 cache. */
1011 32, /* size of l2 cache. Some models
1012 have integrated l2 cache, but
1013 optimizing for k6 is not important
1014 enough to worry about that. */
1015 32, /* size of prefetch block */
1016 1, /* number of parallel prefetches */
1017 1, /* Branch cost */
1018 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
1019 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
1020 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
1021 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1022 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1023 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
1024 {{{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
1025 DUMMY_STRINGOP_ALGS
},
1026 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
1027 DUMMY_STRINGOP_ALGS
}},
1028 {{{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
1029 DUMMY_STRINGOP_ALGS
},
1030 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
1031 DUMMY_STRINGOP_ALGS
}},
1032 1, /* scalar_stmt_cost. */
1033 1, /* scalar load_cost. */
1034 1, /* scalar_store_cost. */
1035 1, /* vec_stmt_cost. */
1036 1, /* vec_to_scalar_cost. */
1037 1, /* scalar_to_vec_cost. */
1038 1, /* vec_align_load_cost. */
1039 2, /* vec_unalign_load_cost. */
1040 1, /* vec_store_cost. */
1041 3, /* cond_taken_branch_cost. */
1042 1, /* cond_not_taken_branch_cost. */
1046 struct processor_costs athlon_cost
= {
1047 COSTS_N_INSNS (1), /* cost of an add instruction */
1048 COSTS_N_INSNS (2), /* cost of a lea instruction */
1049 COSTS_N_INSNS (1), /* variable shift costs */
1050 COSTS_N_INSNS (1), /* constant shift costs */
1051 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
1052 COSTS_N_INSNS (5), /* HI */
1053 COSTS_N_INSNS (5), /* SI */
1054 COSTS_N_INSNS (5), /* DI */
1055 COSTS_N_INSNS (5)}, /* other */
1056 0, /* cost of multiply per each bit set */
1057 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1058 COSTS_N_INSNS (26), /* HI */
1059 COSTS_N_INSNS (42), /* SI */
1060 COSTS_N_INSNS (74), /* DI */
1061 COSTS_N_INSNS (74)}, /* other */
1062 COSTS_N_INSNS (1), /* cost of movsx */
1063 COSTS_N_INSNS (1), /* cost of movzx */
1064 8, /* "large" insn */
1066 4, /* cost for loading QImode using movzbl */
1067 {3, 4, 3}, /* cost of loading integer registers
1068 in QImode, HImode and SImode.
1069 Relative to reg-reg move (2). */
1070 {3, 4, 3}, /* cost of storing integer registers */
1071 4, /* cost of reg,reg fld/fst */
1072 {4, 4, 12}, /* cost of loading fp registers
1073 in SFmode, DFmode and XFmode */
1074 {6, 6, 8}, /* cost of storing fp registers
1075 in SFmode, DFmode and XFmode */
1076 2, /* cost of moving MMX register */
1077 {4, 4}, /* cost of loading MMX registers
1078 in SImode and DImode */
1079 {4, 4}, /* cost of storing MMX registers
1080 in SImode and DImode */
1081 2, /* cost of moving SSE register */
1082 {4, 4, 6}, /* cost of loading SSE registers
1083 in SImode, DImode and TImode */
1084 {4, 4, 5}, /* cost of storing SSE registers
1085 in SImode, DImode and TImode */
1086 5, /* MMX or SSE register to integer */
1087 64, /* size of l1 cache. */
1088 256, /* size of l2 cache. */
1089 64, /* size of prefetch block */
1090 6, /* number of parallel prefetches */
1091 5, /* Branch cost */
1092 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1093 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1094 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
1095 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1096 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1097 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1098 /* For some reason, Athlon deals better with REP prefix (relative to loops)
1099 compared to K8. Alignment becomes important after 8 bytes for memcpy and
1100 128 bytes for memset. */
1101 {{{libcall
, {{2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1102 DUMMY_STRINGOP_ALGS
},
1103 {{libcall
, {{2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1104 DUMMY_STRINGOP_ALGS
}},
1105 {{{libcall
, {{2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1106 DUMMY_STRINGOP_ALGS
},
1107 {{libcall
, {{2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1108 DUMMY_STRINGOP_ALGS
}},
1109 1, /* scalar_stmt_cost. */
1110 1, /* scalar load_cost. */
1111 1, /* scalar_store_cost. */
1112 1, /* vec_stmt_cost. */
1113 1, /* vec_to_scalar_cost. */
1114 1, /* scalar_to_vec_cost. */
1115 1, /* vec_align_load_cost. */
1116 2, /* vec_unalign_load_cost. */
1117 1, /* vec_store_cost. */
1118 3, /* cond_taken_branch_cost. */
1119 1, /* cond_not_taken_branch_cost. */
1123 struct processor_costs k8_cost
= {
1124 COSTS_N_INSNS (1), /* cost of an add instruction */
1125 COSTS_N_INSNS (2), /* cost of a lea instruction */
1126 COSTS_N_INSNS (1), /* variable shift costs */
1127 COSTS_N_INSNS (1), /* constant shift costs */
1128 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1129 COSTS_N_INSNS (4), /* HI */
1130 COSTS_N_INSNS (3), /* SI */
1131 COSTS_N_INSNS (4), /* DI */
1132 COSTS_N_INSNS (5)}, /* other */
1133 0, /* cost of multiply per each bit set */
1134 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1135 COSTS_N_INSNS (26), /* HI */
1136 COSTS_N_INSNS (42), /* SI */
1137 COSTS_N_INSNS (74), /* DI */
1138 COSTS_N_INSNS (74)}, /* other */
1139 COSTS_N_INSNS (1), /* cost of movsx */
1140 COSTS_N_INSNS (1), /* cost of movzx */
1141 8, /* "large" insn */
1143 4, /* cost for loading QImode using movzbl */
1144 {3, 4, 3}, /* cost of loading integer registers
1145 in QImode, HImode and SImode.
1146 Relative to reg-reg move (2). */
1147 {3, 4, 3}, /* cost of storing integer registers */
1148 4, /* cost of reg,reg fld/fst */
1149 {4, 4, 12}, /* cost of loading fp registers
1150 in SFmode, DFmode and XFmode */
1151 {6, 6, 8}, /* cost of storing fp registers
1152 in SFmode, DFmode and XFmode */
1153 2, /* cost of moving MMX register */
1154 {3, 3}, /* cost of loading MMX registers
1155 in SImode and DImode */
1156 {4, 4}, /* cost of storing MMX registers
1157 in SImode and DImode */
1158 2, /* cost of moving SSE register */
1159 {4, 3, 6}, /* cost of loading SSE registers
1160 in SImode, DImode and TImode */
1161 {4, 4, 5}, /* cost of storing SSE registers
1162 in SImode, DImode and TImode */
1163 5, /* MMX or SSE register to integer */
1164 64, /* size of l1 cache. */
1165 512, /* size of l2 cache. */
1166 64, /* size of prefetch block */
1167 /* New AMD processors never drop prefetches; if they cannot be performed
1168 immediately, they are queued. We set number of simultaneous prefetches
1169 to a large constant to reflect this (it probably is not a good idea not
1170 to limit number of prefetches at all, as their execution also takes some
1172 100, /* number of parallel prefetches */
1173 3, /* Branch cost */
1174 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1175 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1176 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1177 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1178 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1179 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1180 /* K8 has optimized REP instruction for medium sized blocks, but for very
1181 small blocks it is better to use loop. For large blocks, libcall can
1182 do nontemporary accesses and beat inline considerably. */
1183 {{{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1184 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1185 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1186 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}},
1187 {{{libcall
, {{8, loop
}, {24, unrolled_loop
},
1188 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1189 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1190 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
1191 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1192 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}},
1193 4, /* scalar_stmt_cost. */
1194 2, /* scalar load_cost. */
1195 2, /* scalar_store_cost. */
1196 5, /* vec_stmt_cost. */
1197 0, /* vec_to_scalar_cost. */
1198 2, /* scalar_to_vec_cost. */
1199 2, /* vec_align_load_cost. */
1200 3, /* vec_unalign_load_cost. */
1201 3, /* vec_store_cost. */
1202 3, /* cond_taken_branch_cost. */
1203 2, /* cond_not_taken_branch_cost. */
1206 struct processor_costs amdfam10_cost
= {
1207 COSTS_N_INSNS (1), /* cost of an add instruction */
1208 COSTS_N_INSNS (2), /* cost of a lea instruction */
1209 COSTS_N_INSNS (1), /* variable shift costs */
1210 COSTS_N_INSNS (1), /* constant shift costs */
1211 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1212 COSTS_N_INSNS (4), /* HI */
1213 COSTS_N_INSNS (3), /* SI */
1214 COSTS_N_INSNS (4), /* DI */
1215 COSTS_N_INSNS (5)}, /* other */
1216 0, /* cost of multiply per each bit set */
1217 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1218 COSTS_N_INSNS (35), /* HI */
1219 COSTS_N_INSNS (51), /* SI */
1220 COSTS_N_INSNS (83), /* DI */
1221 COSTS_N_INSNS (83)}, /* other */
1222 COSTS_N_INSNS (1), /* cost of movsx */
1223 COSTS_N_INSNS (1), /* cost of movzx */
1224 8, /* "large" insn */
1226 4, /* cost for loading QImode using movzbl */
1227 {3, 4, 3}, /* cost of loading integer registers
1228 in QImode, HImode and SImode.
1229 Relative to reg-reg move (2). */
1230 {3, 4, 3}, /* cost of storing integer registers */
1231 4, /* cost of reg,reg fld/fst */
1232 {4, 4, 12}, /* cost of loading fp registers
1233 in SFmode, DFmode and XFmode */
1234 {6, 6, 8}, /* cost of storing fp registers
1235 in SFmode, DFmode and XFmode */
1236 2, /* cost of moving MMX register */
1237 {3, 3}, /* cost of loading MMX registers
1238 in SImode and DImode */
1239 {4, 4}, /* cost of storing MMX registers
1240 in SImode and DImode */
1241 2, /* cost of moving SSE register */
1242 {4, 4, 3}, /* cost of loading SSE registers
1243 in SImode, DImode and TImode */
1244 {4, 4, 5}, /* cost of storing SSE registers
1245 in SImode, DImode and TImode */
1246 3, /* MMX or SSE register to integer */
1248 MOVD reg64, xmmreg Double FSTORE 4
1249 MOVD reg32, xmmreg Double FSTORE 4
1251 MOVD reg64, xmmreg Double FADD 3
1253 MOVD reg32, xmmreg Double FADD 3
1255 64, /* size of l1 cache. */
1256 512, /* size of l2 cache. */
1257 64, /* size of prefetch block */
1258 /* New AMD processors never drop prefetches; if they cannot be performed
1259 immediately, they are queued. We set number of simultaneous prefetches
1260 to a large constant to reflect this (it probably is not a good idea not
1261 to limit number of prefetches at all, as their execution also takes some
1263 100, /* number of parallel prefetches */
1264 2, /* Branch cost */
1265 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1266 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1267 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1268 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1269 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1270 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1272 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
1273 very small blocks it is better to use loop. For large blocks, libcall can
1274 do nontemporary accesses and beat inline considerably. */
1275 {{{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1276 {libcall
, {{16, loop
}, {512, rep_prefix_8_byte
}, {-1, libcall
}}}},
1277 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1278 {libcall
, {{16, loop
}, {512, rep_prefix_8_byte
}, {-1, libcall
}}}}},
1279 {{{libcall
, {{8, loop
}, {24, unrolled_loop
},
1280 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1281 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1282 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
1283 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1284 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}},
1285 4, /* scalar_stmt_cost. */
1286 2, /* scalar load_cost. */
1287 2, /* scalar_store_cost. */
1288 6, /* vec_stmt_cost. */
1289 0, /* vec_to_scalar_cost. */
1290 2, /* scalar_to_vec_cost. */
1291 2, /* vec_align_load_cost. */
1292 2, /* vec_unalign_load_cost. */
1293 2, /* vec_store_cost. */
1294 2, /* cond_taken_branch_cost. */
1295 1, /* cond_not_taken_branch_cost. */
1298 struct processor_costs bdver1_cost
= {
1299 COSTS_N_INSNS (1), /* cost of an add instruction */
1300 COSTS_N_INSNS (1), /* cost of a lea instruction */
1301 COSTS_N_INSNS (1), /* variable shift costs */
1302 COSTS_N_INSNS (1), /* constant shift costs */
1303 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1304 COSTS_N_INSNS (4), /* HI */
1305 COSTS_N_INSNS (4), /* SI */
1306 COSTS_N_INSNS (6), /* DI */
1307 COSTS_N_INSNS (6)}, /* other */
1308 0, /* cost of multiply per each bit set */
1309 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1310 COSTS_N_INSNS (35), /* HI */
1311 COSTS_N_INSNS (51), /* SI */
1312 COSTS_N_INSNS (83), /* DI */
1313 COSTS_N_INSNS (83)}, /* other */
1314 COSTS_N_INSNS (1), /* cost of movsx */
1315 COSTS_N_INSNS (1), /* cost of movzx */
1316 8, /* "large" insn */
1318 4, /* cost for loading QImode using movzbl */
1319 {5, 5, 4}, /* cost of loading integer registers
1320 in QImode, HImode and SImode.
1321 Relative to reg-reg move (2). */
1322 {4, 4, 4}, /* cost of storing integer registers */
1323 2, /* cost of reg,reg fld/fst */
1324 {5, 5, 12}, /* cost of loading fp registers
1325 in SFmode, DFmode and XFmode */
1326 {4, 4, 8}, /* cost of storing fp registers
1327 in SFmode, DFmode and XFmode */
1328 2, /* cost of moving MMX register */
1329 {4, 4}, /* cost of loading MMX registers
1330 in SImode and DImode */
1331 {4, 4}, /* cost of storing MMX registers
1332 in SImode and DImode */
1333 2, /* cost of moving SSE register */
1334 {4, 4, 4}, /* cost of loading SSE registers
1335 in SImode, DImode and TImode */
1336 {4, 4, 4}, /* cost of storing SSE registers
1337 in SImode, DImode and TImode */
1338 2, /* MMX or SSE register to integer */
1340 MOVD reg64, xmmreg Double FSTORE 4
1341 MOVD reg32, xmmreg Double FSTORE 4
1343 MOVD reg64, xmmreg Double FADD 3
1345 MOVD reg32, xmmreg Double FADD 3
1347 16, /* size of l1 cache. */
1348 2048, /* size of l2 cache. */
1349 64, /* size of prefetch block */
1350 /* New AMD processors never drop prefetches; if they cannot be performed
1351 immediately, they are queued. We set number of simultaneous prefetches
1352 to a large constant to reflect this (it probably is not a good idea not
1353 to limit number of prefetches at all, as their execution also takes some
1355 100, /* number of parallel prefetches */
1356 2, /* Branch cost */
1357 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1358 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1359 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1360 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1361 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1362 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1364 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
1365 very small blocks it is better to use loop. For large blocks, libcall
1366 can do nontemporary accesses and beat inline considerably. */
1367 {{{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1368 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1369 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1370 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}},
1371 {{{libcall
, {{8, loop
}, {24, unrolled_loop
},
1372 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1373 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1374 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
1375 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1376 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}},
1377 6, /* scalar_stmt_cost. */
1378 4, /* scalar load_cost. */
1379 4, /* scalar_store_cost. */
1380 6, /* vec_stmt_cost. */
1381 0, /* vec_to_scalar_cost. */
1382 2, /* scalar_to_vec_cost. */
1383 4, /* vec_align_load_cost. */
1384 4, /* vec_unalign_load_cost. */
1385 4, /* vec_store_cost. */
1386 2, /* cond_taken_branch_cost. */
1387 1, /* cond_not_taken_branch_cost. */
1390 struct processor_costs bdver2_cost
= {
1391 COSTS_N_INSNS (1), /* cost of an add instruction */
1392 COSTS_N_INSNS (1), /* cost of a lea instruction */
1393 COSTS_N_INSNS (1), /* variable shift costs */
1394 COSTS_N_INSNS (1), /* constant shift costs */
1395 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1396 COSTS_N_INSNS (4), /* HI */
1397 COSTS_N_INSNS (4), /* SI */
1398 COSTS_N_INSNS (6), /* DI */
1399 COSTS_N_INSNS (6)}, /* other */
1400 0, /* cost of multiply per each bit set */
1401 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1402 COSTS_N_INSNS (35), /* HI */
1403 COSTS_N_INSNS (51), /* SI */
1404 COSTS_N_INSNS (83), /* DI */
1405 COSTS_N_INSNS (83)}, /* other */
1406 COSTS_N_INSNS (1), /* cost of movsx */
1407 COSTS_N_INSNS (1), /* cost of movzx */
1408 8, /* "large" insn */
1410 4, /* cost for loading QImode using movzbl */
1411 {5, 5, 4}, /* cost of loading integer registers
1412 in QImode, HImode and SImode.
1413 Relative to reg-reg move (2). */
1414 {4, 4, 4}, /* cost of storing integer registers */
1415 2, /* cost of reg,reg fld/fst */
1416 {5, 5, 12}, /* cost of loading fp registers
1417 in SFmode, DFmode and XFmode */
1418 {4, 4, 8}, /* cost of storing fp registers
1419 in SFmode, DFmode and XFmode */
1420 2, /* cost of moving MMX register */
1421 {4, 4}, /* cost of loading MMX registers
1422 in SImode and DImode */
1423 {4, 4}, /* cost of storing MMX registers
1424 in SImode and DImode */
1425 2, /* cost of moving SSE register */
1426 {4, 4, 4}, /* cost of loading SSE registers
1427 in SImode, DImode and TImode */
1428 {4, 4, 4}, /* cost of storing SSE registers
1429 in SImode, DImode and TImode */
1430 2, /* MMX or SSE register to integer */
1432 MOVD reg64, xmmreg Double FSTORE 4
1433 MOVD reg32, xmmreg Double FSTORE 4
1435 MOVD reg64, xmmreg Double FADD 3
1437 MOVD reg32, xmmreg Double FADD 3
1439 16, /* size of l1 cache. */
1440 2048, /* size of l2 cache. */
1441 64, /* size of prefetch block */
1442 /* New AMD processors never drop prefetches; if they cannot be performed
1443 immediately, they are queued. We set number of simultaneous prefetches
1444 to a large constant to reflect this (it probably is not a good idea not
1445 to limit number of prefetches at all, as their execution also takes some
1447 100, /* number of parallel prefetches */
1448 2, /* Branch cost */
1449 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1450 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1451 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1452 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1453 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1454 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1456 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1457 very small blocks it is better to use loop. For large blocks, libcall
1458 can do nontemporary accesses and beat inline considerably. */
1459 {{{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1460 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1461 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1462 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}},
1463 {{{libcall
, {{8, loop
}, {24, unrolled_loop
},
1464 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1465 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1466 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
1467 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1468 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}},
1469 6, /* scalar_stmt_cost. */
1470 4, /* scalar load_cost. */
1471 4, /* scalar_store_cost. */
1472 6, /* vec_stmt_cost. */
1473 0, /* vec_to_scalar_cost. */
1474 2, /* scalar_to_vec_cost. */
1475 4, /* vec_align_load_cost. */
1476 4, /* vec_unalign_load_cost. */
1477 4, /* vec_store_cost. */
1478 2, /* cond_taken_branch_cost. */
1479 1, /* cond_not_taken_branch_cost. */
1482 struct processor_costs btver1_cost
= {
1483 COSTS_N_INSNS (1), /* cost of an add instruction */
1484 COSTS_N_INSNS (2), /* cost of a lea instruction */
1485 COSTS_N_INSNS (1), /* variable shift costs */
1486 COSTS_N_INSNS (1), /* constant shift costs */
1487 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1488 COSTS_N_INSNS (4), /* HI */
1489 COSTS_N_INSNS (3), /* SI */
1490 COSTS_N_INSNS (4), /* DI */
1491 COSTS_N_INSNS (5)}, /* other */
1492 0, /* cost of multiply per each bit set */
1493 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1494 COSTS_N_INSNS (35), /* HI */
1495 COSTS_N_INSNS (51), /* SI */
1496 COSTS_N_INSNS (83), /* DI */
1497 COSTS_N_INSNS (83)}, /* other */
1498 COSTS_N_INSNS (1), /* cost of movsx */
1499 COSTS_N_INSNS (1), /* cost of movzx */
1500 8, /* "large" insn */
1502 4, /* cost for loading QImode using movzbl */
1503 {3, 4, 3}, /* cost of loading integer registers
1504 in QImode, HImode and SImode.
1505 Relative to reg-reg move (2). */
1506 {3, 4, 3}, /* cost of storing integer registers */
1507 4, /* cost of reg,reg fld/fst */
1508 {4, 4, 12}, /* cost of loading fp registers
1509 in SFmode, DFmode and XFmode */
1510 {6, 6, 8}, /* cost of storing fp registers
1511 in SFmode, DFmode and XFmode */
1512 2, /* cost of moving MMX register */
1513 {3, 3}, /* cost of loading MMX registers
1514 in SImode and DImode */
1515 {4, 4}, /* cost of storing MMX registers
1516 in SImode and DImode */
1517 2, /* cost of moving SSE register */
1518 {4, 4, 3}, /* cost of loading SSE registers
1519 in SImode, DImode and TImode */
1520 {4, 4, 5}, /* cost of storing SSE registers
1521 in SImode, DImode and TImode */
1522 3, /* MMX or SSE register to integer */
1524 MOVD reg64, xmmreg Double FSTORE 4
1525 MOVD reg32, xmmreg Double FSTORE 4
1527 MOVD reg64, xmmreg Double FADD 3
1529 MOVD reg32, xmmreg Double FADD 3
1531 32, /* size of l1 cache. */
1532 512, /* size of l2 cache. */
1533 64, /* size of prefetch block */
1534 100, /* number of parallel prefetches */
1535 2, /* Branch cost */
1536 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1537 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1538 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1539 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1540 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1541 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1543 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1544 very small blocks it is better to use loop. For large blocks, libcall can
1545 do nontemporary accesses and beat inline considerably. */
1546 {{{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1547 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1548 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1549 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}},
1550 {{{libcall
, {{8, loop
}, {24, unrolled_loop
},
1551 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1552 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1553 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
1554 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1555 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}},
1556 4, /* scalar_stmt_cost. */
1557 2, /* scalar load_cost. */
1558 2, /* scalar_store_cost. */
1559 6, /* vec_stmt_cost. */
1560 0, /* vec_to_scalar_cost. */
1561 2, /* scalar_to_vec_cost. */
1562 2, /* vec_align_load_cost. */
1563 2, /* vec_unalign_load_cost. */
1564 2, /* vec_store_cost. */
1565 2, /* cond_taken_branch_cost. */
1566 1, /* cond_not_taken_branch_cost. */
1570 struct processor_costs pentium4_cost
= {
1571 COSTS_N_INSNS (1), /* cost of an add instruction */
1572 COSTS_N_INSNS (3), /* cost of a lea instruction */
1573 COSTS_N_INSNS (4), /* variable shift costs */
1574 COSTS_N_INSNS (4), /* constant shift costs */
1575 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1576 COSTS_N_INSNS (15), /* HI */
1577 COSTS_N_INSNS (15), /* SI */
1578 COSTS_N_INSNS (15), /* DI */
1579 COSTS_N_INSNS (15)}, /* other */
1580 0, /* cost of multiply per each bit set */
1581 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1582 COSTS_N_INSNS (56), /* HI */
1583 COSTS_N_INSNS (56), /* SI */
1584 COSTS_N_INSNS (56), /* DI */
1585 COSTS_N_INSNS (56)}, /* other */
1586 COSTS_N_INSNS (1), /* cost of movsx */
1587 COSTS_N_INSNS (1), /* cost of movzx */
1588 16, /* "large" insn */
1590 2, /* cost for loading QImode using movzbl */
1591 {4, 5, 4}, /* cost of loading integer registers
1592 in QImode, HImode and SImode.
1593 Relative to reg-reg move (2). */
1594 {2, 3, 2}, /* cost of storing integer registers */
1595 2, /* cost of reg,reg fld/fst */
1596 {2, 2, 6}, /* cost of loading fp registers
1597 in SFmode, DFmode and XFmode */
1598 {4, 4, 6}, /* cost of storing fp registers
1599 in SFmode, DFmode and XFmode */
1600 2, /* cost of moving MMX register */
1601 {2, 2}, /* cost of loading MMX registers
1602 in SImode and DImode */
1603 {2, 2}, /* cost of storing MMX registers
1604 in SImode and DImode */
1605 12, /* cost of moving SSE register */
1606 {12, 12, 12}, /* cost of loading SSE registers
1607 in SImode, DImode and TImode */
1608 {2, 2, 8}, /* cost of storing SSE registers
1609 in SImode, DImode and TImode */
1610 10, /* MMX or SSE register to integer */
1611 8, /* size of l1 cache. */
1612 256, /* size of l2 cache. */
1613 64, /* size of prefetch block */
1614 6, /* number of parallel prefetches */
1615 2, /* Branch cost */
1616 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1617 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1618 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1619 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1620 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1621 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1623 {{{libcall
, {{12, loop_1_byte
}, {-1, rep_prefix_4_byte
}}},
1624 DUMMY_STRINGOP_ALGS
},
1625 {{libcall
, {{12, loop_1_byte
}, {-1, rep_prefix_4_byte
}}},
1626 DUMMY_STRINGOP_ALGS
}},
1628 {{{libcall
, {{6, loop_1_byte
}, {48, loop
}, {20480, rep_prefix_4_byte
},
1630 DUMMY_STRINGOP_ALGS
},
1631 {{libcall
, {{6, loop_1_byte
}, {48, loop
}, {20480, rep_prefix_4_byte
},
1633 DUMMY_STRINGOP_ALGS
}},
1634 1, /* scalar_stmt_cost. */
1635 1, /* scalar load_cost. */
1636 1, /* scalar_store_cost. */
1637 1, /* vec_stmt_cost. */
1638 1, /* vec_to_scalar_cost. */
1639 1, /* scalar_to_vec_cost. */
1640 1, /* vec_align_load_cost. */
1641 2, /* vec_unalign_load_cost. */
1642 1, /* vec_store_cost. */
1643 3, /* cond_taken_branch_cost. */
1644 1, /* cond_not_taken_branch_cost. */
1648 struct processor_costs nocona_cost
= {
1649 COSTS_N_INSNS (1), /* cost of an add instruction */
1650 COSTS_N_INSNS (1), /* cost of a lea instruction */
1651 COSTS_N_INSNS (1), /* variable shift costs */
1652 COSTS_N_INSNS (1), /* constant shift costs */
1653 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1654 COSTS_N_INSNS (10), /* HI */
1655 COSTS_N_INSNS (10), /* SI */
1656 COSTS_N_INSNS (10), /* DI */
1657 COSTS_N_INSNS (10)}, /* other */
1658 0, /* cost of multiply per each bit set */
1659 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1660 COSTS_N_INSNS (66), /* HI */
1661 COSTS_N_INSNS (66), /* SI */
1662 COSTS_N_INSNS (66), /* DI */
1663 COSTS_N_INSNS (66)}, /* other */
1664 COSTS_N_INSNS (1), /* cost of movsx */
1665 COSTS_N_INSNS (1), /* cost of movzx */
1666 16, /* "large" insn */
1667 17, /* MOVE_RATIO */
1668 4, /* cost for loading QImode using movzbl */
1669 {4, 4, 4}, /* cost of loading integer registers
1670 in QImode, HImode and SImode.
1671 Relative to reg-reg move (2). */
1672 {4, 4, 4}, /* cost of storing integer registers */
1673 3, /* cost of reg,reg fld/fst */
1674 {12, 12, 12}, /* cost of loading fp registers
1675 in SFmode, DFmode and XFmode */
1676 {4, 4, 4}, /* cost of storing fp registers
1677 in SFmode, DFmode and XFmode */
1678 6, /* cost of moving MMX register */
1679 {12, 12}, /* cost of loading MMX registers
1680 in SImode and DImode */
1681 {12, 12}, /* cost of storing MMX registers
1682 in SImode and DImode */
1683 6, /* cost of moving SSE register */
1684 {12, 12, 12}, /* cost of loading SSE registers
1685 in SImode, DImode and TImode */
1686 {12, 12, 12}, /* cost of storing SSE registers
1687 in SImode, DImode and TImode */
1688 8, /* MMX or SSE register to integer */
1689 8, /* size of l1 cache. */
1690 1024, /* size of l2 cache. */
1691 128, /* size of prefetch block */
1692 8, /* number of parallel prefetches */
1693 1, /* Branch cost */
1694 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1695 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1696 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1697 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1698 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1699 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1701 {{{libcall
, {{12, loop_1_byte
}, {-1, rep_prefix_4_byte
}}},
1702 {libcall
, {{32, loop
}, {20000, rep_prefix_8_byte
},
1703 {100000, unrolled_loop
}, {-1, libcall
}}}},
1704 {{libcall
, {{12, loop_1_byte
}, {-1, rep_prefix_4_byte
}}},
1705 {libcall
, {{32, loop
}, {20000, rep_prefix_8_byte
},
1706 {100000, unrolled_loop
}, {-1, libcall
}}}}},
1708 {{{libcall
, {{6, loop_1_byte
}, {48, loop
}, {20480, rep_prefix_4_byte
},
1710 {libcall
, {{24, loop
}, {64, unrolled_loop
},
1711 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1712 {{libcall
, {{6, loop_1_byte
}, {48, loop
}, {20480, rep_prefix_4_byte
},
1714 {libcall
, {{24, loop
}, {64, unrolled_loop
},
1715 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}},
1716 1, /* scalar_stmt_cost. */
1717 1, /* scalar load_cost. */
1718 1, /* scalar_store_cost. */
1719 1, /* vec_stmt_cost. */
1720 1, /* vec_to_scalar_cost. */
1721 1, /* scalar_to_vec_cost. */
1722 1, /* vec_align_load_cost. */
1723 2, /* vec_unalign_load_cost. */
1724 1, /* vec_store_cost. */
1725 3, /* cond_taken_branch_cost. */
1726 1, /* cond_not_taken_branch_cost. */
1730 struct processor_costs atom_cost
= {
1731 COSTS_N_INSNS (1), /* cost of an add instruction */
1732 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1733 COSTS_N_INSNS (1), /* variable shift costs */
1734 COSTS_N_INSNS (1), /* constant shift costs */
1735 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1736 COSTS_N_INSNS (4), /* HI */
1737 COSTS_N_INSNS (3), /* SI */
1738 COSTS_N_INSNS (4), /* DI */
1739 COSTS_N_INSNS (2)}, /* other */
1740 0, /* cost of multiply per each bit set */
1741 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1742 COSTS_N_INSNS (26), /* HI */
1743 COSTS_N_INSNS (42), /* SI */
1744 COSTS_N_INSNS (74), /* DI */
1745 COSTS_N_INSNS (74)}, /* other */
1746 COSTS_N_INSNS (1), /* cost of movsx */
1747 COSTS_N_INSNS (1), /* cost of movzx */
1748 8, /* "large" insn */
1749 17, /* MOVE_RATIO */
1750 4, /* cost for loading QImode using movzbl */
1751 {4, 4, 4}, /* cost of loading integer registers
1752 in QImode, HImode and SImode.
1753 Relative to reg-reg move (2). */
1754 {4, 4, 4}, /* cost of storing integer registers */
1755 4, /* cost of reg,reg fld/fst */
1756 {12, 12, 12}, /* cost of loading fp registers
1757 in SFmode, DFmode and XFmode */
1758 {6, 6, 8}, /* cost of storing fp registers
1759 in SFmode, DFmode and XFmode */
1760 2, /* cost of moving MMX register */
1761 {8, 8}, /* cost of loading MMX registers
1762 in SImode and DImode */
1763 {8, 8}, /* cost of storing MMX registers
1764 in SImode and DImode */
1765 2, /* cost of moving SSE register */
1766 {8, 8, 8}, /* cost of loading SSE registers
1767 in SImode, DImode and TImode */
1768 {8, 8, 8}, /* cost of storing SSE registers
1769 in SImode, DImode and TImode */
1770 5, /* MMX or SSE register to integer */
1771 32, /* size of l1 cache. */
1772 256, /* size of l2 cache. */
1773 64, /* size of prefetch block */
1774 6, /* number of parallel prefetches */
1775 3, /* Branch cost */
1776 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1777 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1778 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1779 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1780 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1781 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1783 /* stringop_algs for memcpy.
1784 SSE loops works best on Atom, but fall back into non-SSE unrolled loop variant
1786 {{{libcall
, {{4096, sse_loop
}, {4096, unrolled_loop
}, {-1, libcall
}}}, /* Known alignment. */
1787 {libcall
, {{4096, sse_loop
}, {4096, unrolled_loop
}, {-1, libcall
}}}},
1788 {{libcall
, {{2048, sse_loop
}, {2048, unrolled_loop
}, {-1, libcall
}}}, /* Unknown alignment. */
1789 {libcall
, {{2048, sse_loop
}, {2048, unrolled_loop
},
1792 /* stringop_algs for memset. */
1793 {{{libcall
, {{4096, sse_loop
}, {4096, unrolled_loop
}, {-1, libcall
}}}, /* Known alignment. */
1794 {libcall
, {{4096, sse_loop
}, {4096, unrolled_loop
}, {-1, libcall
}}}},
1795 {{libcall
, {{1024, sse_loop
}, {1024, unrolled_loop
}, /* Unknown alignment. */
1797 {libcall
, {{2048, sse_loop
}, {2048, unrolled_loop
},
1799 1, /* scalar_stmt_cost. */
1800 1, /* scalar load_cost. */
1801 1, /* scalar_store_cost. */
1802 1, /* vec_stmt_cost. */
1803 1, /* vec_to_scalar_cost. */
1804 1, /* scalar_to_vec_cost. */
1805 1, /* vec_align_load_cost. */
1806 2, /* vec_unalign_load_cost. */
1807 1, /* vec_store_cost. */
1808 3, /* cond_taken_branch_cost. */
1809 1, /* cond_not_taken_branch_cost. */
1812 /* Core should produce code tuned for core variants. */
1814 struct processor_costs core_cost
= {
1815 COSTS_N_INSNS (1), /* cost of an add instruction */
1816 /* On all chips taken into consideration lea is 2 cycles and more. With
1817 this cost however our current implementation of synth_mult results in
1818 use of unnecessary temporary registers causing regression on several
1819 SPECfp benchmarks. */
1820 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1821 COSTS_N_INSNS (1), /* variable shift costs */
1822 COSTS_N_INSNS (1), /* constant shift costs */
1823 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1824 COSTS_N_INSNS (4), /* HI */
1825 COSTS_N_INSNS (3), /* SI */
1826 COSTS_N_INSNS (4), /* DI */
1827 COSTS_N_INSNS (2)}, /* other */
1828 0, /* cost of multiply per each bit set */
1829 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1830 COSTS_N_INSNS (26), /* HI */
1831 COSTS_N_INSNS (42), /* SI */
1832 COSTS_N_INSNS (74), /* DI */
1833 COSTS_N_INSNS (74)}, /* other */
1834 COSTS_N_INSNS (1), /* cost of movsx */
1835 COSTS_N_INSNS (1), /* cost of movzx */
1836 8, /* "large" insn */
1837 17, /* MOVE_RATIO */
1838 4, /* cost for loading QImode using movzbl */
1839 {4, 4, 4}, /* cost of loading integer registers
1840 in QImode, HImode and SImode.
1841 Relative to reg-reg move (2). */
1842 {4, 4, 4}, /* cost of storing integer registers */
1843 4, /* cost of reg,reg fld/fst */
1844 {12, 12, 12}, /* cost of loading fp registers
1845 in SFmode, DFmode and XFmode */
1846 {6, 6, 8}, /* cost of storing fp registers
1847 in SFmode, DFmode and XFmode */
1848 2, /* cost of moving MMX register */
1849 {8, 8}, /* cost of loading MMX registers
1850 in SImode and DImode */
1851 {8, 8}, /* cost of storing MMX registers
1852 in SImode and DImode */
1853 2, /* cost of moving SSE register */
1854 {8, 8, 8}, /* cost of loading SSE registers
1855 in SImode, DImode and TImode */
1856 {8, 8, 8}, /* cost of storing SSE registers
1857 in SImode, DImode and TImode */
1858 5, /* MMX or SSE register to integer */
1859 32, /* size of l1 cache. */
1860 512, /* size of l2 cache. */
1861 64, /* size of prefetch block */
1862 6, /* number of parallel prefetches */
1863 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1864 value is increased to perhaps more appropriate value of 5. */
1865 3, /* Branch cost */
1866 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1867 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1868 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1869 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1870 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1871 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1873 /* stringop_algs for memcpy. */
1874 {{{libcall
, {{16, loop
}, {24, unrolled_loop
}, {1024, rep_prefix_4_byte
}, {-1, libcall
}}}, /* Known alignment. */
1875 {libcall
, {{16, loop
}, {24, unrolled_loop
}, {1024, rep_prefix_8_byte
}, {-1, libcall
}}}},
1876 {{libcall
, {{16, loop
}, {24, unrolled_loop
}, {1024, rep_prefix_4_byte
}, {-1, libcall
}}}, /* Unknown alignment. */
1877 {libcall
, {{16, loop
}, {24, unrolled_loop
}, {1024, rep_prefix_8_byte
}, {-1, libcall
}}}}},
1879 /* stringop_algs for memset. */
1880 {{{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}}, /* Known alignment. */
1881 {libcall
, {{256, rep_prefix_8_byte
}, {-1, libcall
}}}},
1882 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}}, /* Unknown alignment. */
1883 {libcall
, {{256, rep_prefix_8_byte
}, {-1, libcall
}}}}},
1884 1, /* scalar_stmt_cost. */
1885 1, /* scalar load_cost. */
1886 1, /* scalar_store_cost. */
1887 1, /* vec_stmt_cost. */
1888 1, /* vec_to_scalar_cost. */
1889 1, /* scalar_to_vec_cost. */
1890 1, /* vec_align_load_cost. */
1891 2, /* vec_unalign_load_cost. */
1892 1, /* vec_store_cost. */
1893 3, /* cond_taken_branch_cost. */
1894 1, /* cond_not_taken_branch_cost. */
1897 /* Generic64 should produce code tuned for Nocona, Core, K8, Amdfam10 and buldozer. */
1899 struct processor_costs generic64_cost
= {
1900 COSTS_N_INSNS (1), /* cost of an add instruction */
1901 /* On all chips taken into consideration lea is 2 cycles and more. With
1902 this cost however our current implementation of synth_mult results in
1903 use of unnecessary temporary registers causing regression on several
1904 SPECfp benchmarks. */
1905 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1906 COSTS_N_INSNS (1), /* variable shift costs */
1907 COSTS_N_INSNS (1), /* constant shift costs */
1908 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1909 COSTS_N_INSNS (4), /* HI */
1910 COSTS_N_INSNS (3), /* SI */
1911 COSTS_N_INSNS (4), /* DI */
1912 COSTS_N_INSNS (2)}, /* other */
1913 0, /* cost of multiply per each bit set */
1914 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1915 COSTS_N_INSNS (26), /* HI */
1916 COSTS_N_INSNS (42), /* SI */
1917 COSTS_N_INSNS (74), /* DI */
1918 COSTS_N_INSNS (74)}, /* other */
1919 COSTS_N_INSNS (1), /* cost of movsx */
1920 COSTS_N_INSNS (1), /* cost of movzx */
1921 8, /* "large" insn */
1922 17, /* MOVE_RATIO */
1923 4, /* cost for loading QImode using movzbl */
1924 {4, 4, 4}, /* cost of loading integer registers
1925 in QImode, HImode and SImode.
1926 Relative to reg-reg move (2). */
1927 {4, 4, 4}, /* cost of storing integer registers */
1928 4, /* cost of reg,reg fld/fst */
1929 {12, 12, 12}, /* cost of loading fp registers
1930 in SFmode, DFmode and XFmode */
1931 {6, 6, 8}, /* cost of storing fp registers
1932 in SFmode, DFmode and XFmode */
1933 2, /* cost of moving MMX register */
1934 {8, 8}, /* cost of loading MMX registers
1935 in SImode and DImode */
1936 {8, 8}, /* cost of storing MMX registers
1937 in SImode and DImode */
1938 2, /* cost of moving SSE register */
1939 {8, 8, 8}, /* cost of loading SSE registers
1940 in SImode, DImode and TImode */
1941 {8, 8, 8}, /* cost of storing SSE registers
1942 in SImode, DImode and TImode */
1943 5, /* MMX or SSE register to integer */
1944 32, /* size of l1 cache. */
1945 512, /* size of l2 cache. */
1946 64, /* size of prefetch block */
1947 6, /* number of parallel prefetches */
1948 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1949 value is increased to perhaps more appropriate value of 5. */
1950 3, /* Branch cost */
1951 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1952 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1953 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1954 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1955 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1956 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1958 {{DUMMY_STRINGOP_ALGS
,
1959 {libcall
, {{16, rep_prefix_4_byte
}, {128, rep_prefix_8_byte
}, {4096, rep_prefix_1_byte
}, {-1, libcall
}}}},
1960 {DUMMY_STRINGOP_ALGS
,
1961 {libcall
, {{128, rep_prefix_4_byte
}, {4096, rep_prefix_1_byte
}, {-1, libcall
}}}}},
1963 {{DUMMY_STRINGOP_ALGS
,
1964 {libcall
, {{16, rep_prefix_4_byte
}, {512, unrolled_loop
}, {4096, rep_prefix_1_byte
}, {-1, libcall
}}}},
1965 {DUMMY_STRINGOP_ALGS
,
1966 {libcall
, {{16, rep_prefix_4_byte
}, {512, unrolled_loop
}, {4096, rep_prefix_1_byte
}, {-1, libcall
}}}}},
1967 1, /* scalar_stmt_cost. */
1968 1, /* scalar load_cost. */
1969 1, /* scalar_store_cost. */
1970 1, /* vec_stmt_cost. */
1971 1, /* vec_to_scalar_cost. */
1972 1, /* scalar_to_vec_cost. */
1973 1, /* vec_align_load_cost. */
1974 2, /* vec_unalign_load_cost. */
1975 1, /* vec_store_cost. */
1976 3, /* cond_taken_branch_cost. */
1977 1, /* cond_not_taken_branch_cost. */
1980 /* Generic32 should produce code tuned for PPro, Pentium4, Nocona, Core
1981 Athlon, K8, amdfam10, buldozer. */
1983 struct processor_costs generic32_cost
= {
1984 COSTS_N_INSNS (1), /* cost of an add instruction */
1985 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1986 COSTS_N_INSNS (1), /* variable shift costs */
1987 COSTS_N_INSNS (1), /* constant shift costs */
1988 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1989 COSTS_N_INSNS (4), /* HI */
1990 COSTS_N_INSNS (3), /* SI */
1991 COSTS_N_INSNS (4), /* DI */
1992 COSTS_N_INSNS (2)}, /* other */
1993 0, /* cost of multiply per each bit set */
1994 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1995 COSTS_N_INSNS (26), /* HI */
1996 COSTS_N_INSNS (42), /* SI */
1997 COSTS_N_INSNS (74), /* DI */
1998 COSTS_N_INSNS (74)}, /* other */
1999 COSTS_N_INSNS (1), /* cost of movsx */
2000 COSTS_N_INSNS (1), /* cost of movzx */
2001 8, /* "large" insn */
2002 17, /* MOVE_RATIO */
2003 4, /* cost for loading QImode using movzbl */
2004 {4, 4, 4}, /* cost of loading integer registers
2005 in QImode, HImode and SImode.
2006 Relative to reg-reg move (2). */
2007 {4, 4, 4}, /* cost of storing integer registers */
2008 4, /* cost of reg,reg fld/fst */
2009 {12, 12, 12}, /* cost of loading fp registers
2010 in SFmode, DFmode and XFmode */
2011 {6, 6, 8}, /* cost of storing fp registers
2012 in SFmode, DFmode and XFmode */
2013 2, /* cost of moving MMX register */
2014 {8, 8}, /* cost of loading MMX registers
2015 in SImode and DImode */
2016 {8, 8}, /* cost of storing MMX registers
2017 in SImode and DImode */
2018 2, /* cost of moving SSE register */
2019 {8, 8, 8}, /* cost of loading SSE registers
2020 in SImode, DImode and TImode */
2021 {8, 8, 8}, /* cost of storing SSE registers
2022 in SImode, DImode and TImode */
2023 5, /* MMX or SSE register to integer */
2024 32, /* size of l1 cache. */
2025 256, /* size of l2 cache. */
2026 64, /* size of prefetch block */
2027 6, /* number of parallel prefetches */
2028 3, /* Branch cost */
2029 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
2030 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
2031 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
2032 COSTS_N_INSNS (8), /* cost of FABS instruction. */
2033 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
2034 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
2035 /* stringop_algs for memcpy. */
2036 {{{libcall
, {{32, loop
}, {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
2037 DUMMY_STRINGOP_ALGS
},
2038 {{libcall
, {{32, loop
}, {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
2039 DUMMY_STRINGOP_ALGS
}},
2040 /* stringop_algs for memset. */
2041 {{{libcall
, {{32, loop
}, {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
2042 DUMMY_STRINGOP_ALGS
},
2043 {{libcall
, {{32, loop
}, {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
2044 DUMMY_STRINGOP_ALGS
}},
2045 1, /* scalar_stmt_cost. */
2046 1, /* scalar load_cost. */
2047 1, /* scalar_store_cost. */
2048 1, /* vec_stmt_cost. */
2049 1, /* vec_to_scalar_cost. */
2050 1, /* scalar_to_vec_cost. */
2051 1, /* vec_align_load_cost. */
2052 2, /* vec_unalign_load_cost. */
2053 1, /* vec_store_cost. */
2054 3, /* cond_taken_branch_cost. */
2055 1, /* cond_not_taken_branch_cost. */
2058 const struct processor_costs
*ix86_cost
= &pentium_cost
;
2060 /* Processor feature/optimization bitmasks. */
2061 #define m_386 (1<<PROCESSOR_I386)
2062 #define m_486 (1<<PROCESSOR_I486)
2063 #define m_PENT (1<<PROCESSOR_PENTIUM)
2064 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
2065 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
2066 #define m_NOCONA (1<<PROCESSOR_NOCONA)
2067 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
2068 #define m_CORE2_32 (1<<PROCESSOR_CORE2_32)
2069 #define m_CORE2_64 (1<<PROCESSOR_CORE2_64)
2070 #define m_COREI7_32 (1<<PROCESSOR_COREI7_32)
2071 #define m_COREI7_64 (1<<PROCESSOR_COREI7_64)
2072 #define m_COREI7 (m_COREI7_32 | m_COREI7_64)
2073 #define m_CORE2I7_32 (m_CORE2_32 | m_COREI7_32)
2074 #define m_CORE2I7_64 (m_CORE2_64 | m_COREI7_64)
2075 #define m_CORE2I7 (m_CORE2I7_32 | m_CORE2I7_64)
2076 #define m_ATOM (1<<PROCESSOR_ATOM)
2078 #define m_GEODE (1<<PROCESSOR_GEODE)
2079 #define m_K6 (1<<PROCESSOR_K6)
2080 #define m_K6_GEODE (m_K6 | m_GEODE)
2081 #define m_K8 (1<<PROCESSOR_K8)
2082 #define m_ATHLON (1<<PROCESSOR_ATHLON)
2083 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
2084 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
2085 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
2086 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
2087 #define m_BDVER (m_BDVER1 | m_BDVER2)
2088 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
2089 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1)
2091 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
2092 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
2094 /* Generic instruction choice should be common subset of supported CPUs
2095 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
2096 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
2098 /* Feature tests against the various tunings. */
2099 unsigned char ix86_tune_features
[X86_TUNE_LAST
];
2101 /* Feature tests against the various tunings used to create ix86_tune_features
2102 based on the processor mask. */
2103 static unsigned int initial_ix86_tune_features
[X86_TUNE_LAST
] = {
2104 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
2105 negatively, so enabling for Generic64 seems like good code size
2106 tradeoff. We can't enable it for 32bit generic because it does not
2107 work well with PPro base chips. */
2108 m_386
| m_CORE2I7_64
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC64
,
2110 /* X86_TUNE_PUSH_MEMORY */
2111 m_386
| m_P4_NOCONA
| m_CORE2I7
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
2113 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
2116 /* X86_TUNE_UNROLL_STRLEN */
2117 m_486
| m_PENT
| m_PPRO
| m_ATOM
| m_CORE2I7
| m_K6
| m_AMD_MULTIPLE
| m_GENERIC
,
2119 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
2120 on simulation result. But after P4 was made, no performance benefit
2121 was observed with branch hints. It also increases the code size.
2122 As a result, icc never generates branch hints. */
2125 /* X86_TUNE_DOUBLE_WITH_ADD */
2128 /* X86_TUNE_USE_SAHF */
2129 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_K6_GEODE
| m_K8
| m_AMDFAM10
| m_BDVER
| m_BTVER1
| m_GENERIC
,
2131 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
2132 partial dependencies. */
2133 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
2135 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
2136 register stalls on Generic32 compilation setting as well. However
2137 in current implementation the partial register stalls are not eliminated
2138 very well - they can be introduced via subregs synthesized by combine
2139 and can happen in caller/callee saving sequences. Because this option
2140 pays back little on PPro based chips and is in conflict with partial reg
2141 dependencies used by Athlon/P4 based chips, it is better to leave it off
2142 for generic32 for now. */
2145 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
2146 m_CORE2I7
| m_GENERIC
,
2148 /* X86_TUNE_USE_HIMODE_FIOP */
2149 m_386
| m_486
| m_K6_GEODE
,
2151 /* X86_TUNE_USE_SIMODE_FIOP */
2152 ~(m_PENT
| m_PPRO
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
),
2154 /* X86_TUNE_USE_MOV0 */
2157 /* X86_TUNE_USE_CLTD */
2158 ~(m_PENT
| m_CORE2I7
| m_ATOM
| m_K6
| m_GENERIC
),
2160 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
2163 /* X86_TUNE_SPLIT_LONG_MOVES */
2166 /* X86_TUNE_READ_MODIFY_WRITE */
2169 /* X86_TUNE_READ_MODIFY */
2172 /* X86_TUNE_PROMOTE_QIMODE */
2173 m_386
| m_486
| m_PENT
| m_CORE2I7
| m_ATOM
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
2175 /* X86_TUNE_FAST_PREFIX */
2176 ~(m_386
| m_486
| m_PENT
),
2178 /* X86_TUNE_SINGLE_STRINGOP */
2179 m_386
| m_P4_NOCONA
,
2181 /* X86_TUNE_QIMODE_MATH */
2184 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
2185 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
2186 might be considered for Generic32 if our scheme for avoiding partial
2187 stalls was more effective. */
2190 /* X86_TUNE_PROMOTE_QI_REGS */
2193 /* X86_TUNE_PROMOTE_HI_REGS */
2196 /* X86_TUNE_SINGLE_POP: Enable if single pop insn is preferred
2197 over esp addition. */
2198 m_386
| m_486
| m_PENT
| m_PPRO
,
2200 /* X86_TUNE_DOUBLE_POP: Enable if double pop insn is preferred
2201 over esp addition. */
2204 /* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred
2205 over esp subtraction. */
2206 m_386
| m_486
| m_PENT
| m_K6_GEODE
,
2208 /* X86_TUNE_DOUBLE_PUSH. Enable if double push insn is preferred
2209 over esp subtraction. */
2210 m_PENT
| m_K6_GEODE
,
2212 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
2213 for DFmode copies */
2214 ~(m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_GEODE
| m_AMD_MULTIPLE
| m_ATOM
| m_GENERIC
),
2216 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
2217 m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
,
2219 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
2220 conflict here in between PPro/Pentium4 based chips that thread 128bit
2221 SSE registers as single units versus K8 based chips that divide SSE
2222 registers to two 64bit halves. This knob promotes all store destinations
2223 to be 128bit to allow register renaming on 128bit SSE units, but usually
2224 results in one extra microop on 64bit SSE units. Experimental results
2225 shows that disabling this option on P4 brings over 20% SPECfp regression,
2226 while enabling it on K8 brings roughly 2.4% regression that can be partly
2227 masked by careful scheduling of moves. */
2228 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMDFAM10
| m_BDVER
| m_GENERIC
,
2230 /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
2231 m_COREI7
| m_AMDFAM10
| m_BDVER
| m_BTVER1
,
2233 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
2236 /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
2239 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
2240 are resolved on SSE register parts instead of whole registers, so we may
2241 maintain just lower part of scalar values in proper format leaving the
2242 upper part undefined. */
2245 /* X86_TUNE_SSE_TYPELESS_STORES */
2248 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
2249 m_PPRO
| m_P4_NOCONA
,
2251 /* X86_TUNE_MEMORY_MISMATCH_STALL */
2252 m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
,
2254 /* X86_TUNE_PROLOGUE_USING_MOVE */
2255 m_PPRO
| m_CORE2I7
| m_ATOM
| m_ATHLON_K8
| m_GENERIC
,
2257 /* X86_TUNE_EPILOGUE_USING_MOVE */
2258 m_PPRO
| m_CORE2I7
| m_ATOM
| m_ATHLON_K8
| m_GENERIC
,
2260 /* X86_TUNE_SHIFT1 */
2263 /* X86_TUNE_USE_FFREEP */
2266 /* X86_TUNE_INTER_UNIT_MOVES */
2267 ~(m_AMD_MULTIPLE
| m_GENERIC
),
2269 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
2270 ~(m_AMDFAM10
| m_BDVER
),
2272 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
2273 than 4 branch instructions in the 16 byte window. */
2274 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
,
2276 /* X86_TUNE_SCHEDULE */
2277 m_PENT
| m_PPRO
| m_CORE2I7
| m_ATOM
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
2279 /* X86_TUNE_USE_BT */
2280 m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
,
2282 /* X86_TUNE_USE_INCDEC */
2283 ~(m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_GENERIC
),
2285 /* X86_TUNE_PAD_RETURNS */
2286 m_CORE2I7
| m_AMD_MULTIPLE
| m_GENERIC
,
2288 /* X86_TUNE_PAD_SHORT_FUNCTION: Pad short funtion. */
2291 /* X86_TUNE_EXT_80387_CONSTANTS */
2292 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_K6_GEODE
| m_ATHLON_K8
| m_GENERIC
,
2294 /* X86_TUNE_SHORTEN_X87_SSE */
2297 /* X86_TUNE_AVOID_VECTOR_DECODE */
2298 m_CORE2I7_64
| m_K8
| m_GENERIC64
,
2300 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
2301 and SImode multiply, but 386 and 486 do HImode multiply faster. */
2304 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
2305 vector path on AMD machines. */
2306 m_CORE2I7_64
| m_K8
| m_AMDFAM10
| m_BDVER
| m_BTVER1
| m_GENERIC64
,
2308 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
2310 m_CORE2I7_64
| m_K8
| m_AMDFAM10
| m_BDVER
| m_BTVER1
| m_GENERIC64
,
2312 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
2316 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
2317 but one byte longer. */
2320 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
2321 operand that cannot be represented using a modRM byte. The XOR
2322 replacement is long decoded, so this split helps here as well. */
2325 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
2327 m_CORE2I7
| m_AMDFAM10
| m_GENERIC
,
2329 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
2330 from integer to FP. */
2333 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
2334 with a subsequent conditional jump instruction into a single
2335 compare-and-branch uop. */
2338 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
2339 will impact LEA instruction selection. */
2342 /* X86_TUNE_VECTORIZE_DOUBLE: Enable double precision vector
2346 /* X86_SOFTARE_PREFETCHING_BENEFICIAL: Enable software prefetching
2347 at -O3. For the moment, the prefetching seems badly tuned for Intel
2349 m_K6_GEODE
| m_AMD_MULTIPLE
,
2351 /* X86_TUNE_AVX128_OPTIMAL: Enable 128-bit AVX instruction generation for
2352 the auto-vectorizer. */
2355 /* X86_TUNE_REASSOC_INT_TO_PARALLEL: Try to produce parallel computations
2356 during reassociation of integer computation. */
2359 /* X86_TUNE_REASSOC_FP_TO_PARALLEL: Try to produce parallel computations
2360 during reassociation of fp computation. */
2364 /* Feature tests against the various architecture variations. */
2365 unsigned char ix86_arch_features
[X86_ARCH_LAST
];
2367 /* Feature tests against the various architecture variations, used to create
2368 ix86_arch_features based on the processor mask. */
2369 static unsigned int initial_ix86_arch_features
[X86_ARCH_LAST
] = {
2370 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
2371 ~(m_386
| m_486
| m_PENT
| m_K6
),
2373 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2376 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2379 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2382 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2386 static const unsigned int x86_accumulate_outgoing_args
2387 = m_PPRO
| m_P4_NOCONA
| m_ATOM
| m_CORE2I7
| m_AMD_MULTIPLE
| m_GENERIC
;
2389 static const unsigned int x86_arch_always_fancy_math_387
2390 = m_PENT
| m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
;
2392 static const unsigned int x86_avx256_split_unaligned_load
2393 = m_COREI7
| m_GENERIC
;
2395 static const unsigned int x86_avx256_split_unaligned_store
2396 = m_COREI7
| m_BDVER
| m_GENERIC
;
2398 /* In case the average insn count for single function invocation is
2399 lower than this constant, emit fast (but longer) prologue and
2401 #define FAST_PROLOGUE_INSN_COUNT 20
2403 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2404 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
;
2405 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
2406 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
;
2408 /* Array of the smallest class containing reg number REGNO, indexed by
2409 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2411 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
2413 /* ax, dx, cx, bx */
2414 AREG
, DREG
, CREG
, BREG
,
2415 /* si, di, bp, sp */
2416 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
2418 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
2419 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
2422 /* flags, fpsr, fpcr, frame */
2423 NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
2425 SSE_FIRST_REG
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
2428 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
2431 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
2432 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
2433 /* SSE REX registers */
2434 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
2438 /* The "default" register map used in 32bit mode. */
2440 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
2442 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2443 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2444 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2445 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2446 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2447 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2448 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2451 /* The "default" register map used in 64bit mode. */
2453 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
2455 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2456 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2457 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2458 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2459 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2460 8,9,10,11,12,13,14,15, /* extended integer registers */
2461 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2464 /* Define the register numbers to be used in Dwarf debugging information.
2465 The SVR4 reference port C compiler uses the following register numbers
2466 in its Dwarf output code:
2467 0 for %eax (gcc regno = 0)
2468 1 for %ecx (gcc regno = 2)
2469 2 for %edx (gcc regno = 1)
2470 3 for %ebx (gcc regno = 3)
2471 4 for %esp (gcc regno = 7)
2472 5 for %ebp (gcc regno = 6)
2473 6 for %esi (gcc regno = 4)
2474 7 for %edi (gcc regno = 5)
2475 The following three DWARF register numbers are never generated by
2476 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2477 believes these numbers have these meanings.
2478 8 for %eip (no gcc equivalent)
2479 9 for %eflags (gcc regno = 17)
2480 10 for %trapno (no gcc equivalent)
2481 It is not at all clear how we should number the FP stack registers
2482 for the x86 architecture. If the version of SDB on x86/svr4 were
2483 a bit less brain dead with respect to floating-point then we would
2484 have a precedent to follow with respect to DWARF register numbers
2485 for x86 FP registers, but the SDB on x86/svr4 is so completely
2486 broken with respect to FP registers that it is hardly worth thinking
2487 of it as something to strive for compatibility with.
2488 The version of x86/svr4 SDB I have at the moment does (partially)
2489 seem to believe that DWARF register number 11 is associated with
2490 the x86 register %st(0), but that's about all. Higher DWARF
2491 register numbers don't seem to be associated with anything in
2492 particular, and even for DWARF regno 11, SDB only seems to under-
2493 stand that it should say that a variable lives in %st(0) (when
2494 asked via an `=' command) if we said it was in DWARF regno 11,
2495 but SDB still prints garbage when asked for the value of the
2496 variable in question (via a `/' command).
2497 (Also note that the labels SDB prints for various FP stack regs
2498 when doing an `x' command are all wrong.)
2499 Note that these problems generally don't affect the native SVR4
2500 C compiler because it doesn't allow the use of -O with -g and
2501 because when it is *not* optimizing, it allocates a memory
2502 location for each floating-point variable, and the memory
2503 location is what gets described in the DWARF AT_location
2504 attribute for the variable in question.
2505 Regardless of the severe mental illness of the x86/svr4 SDB, we
2506 do something sensible here and we use the following DWARF
2507 register numbers. Note that these are all stack-top-relative
2509 11 for %st(0) (gcc regno = 8)
2510 12 for %st(1) (gcc regno = 9)
2511 13 for %st(2) (gcc regno = 10)
2512 14 for %st(3) (gcc regno = 11)
2513 15 for %st(4) (gcc regno = 12)
2514 16 for %st(5) (gcc regno = 13)
2515 17 for %st(6) (gcc regno = 14)
2516 18 for %st(7) (gcc regno = 15)
2518 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
2520 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2521 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2522 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2523 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2524 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2525 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2526 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2529 /* Define parameter passing and return registers. */
2531 static int const x86_64_int_parameter_registers
[6] =
2533 DI_REG
, SI_REG
, DX_REG
, CX_REG
, R8_REG
, R9_REG
2536 static int const x86_64_ms_abi_int_parameter_registers
[4] =
2538 CX_REG
, DX_REG
, R8_REG
, R9_REG
2541 static int const x86_64_int_return_registers
[4] =
2543 AX_REG
, DX_REG
, DI_REG
, SI_REG
2546 /* Define the structure for the machine field in struct function. */
2548 struct GTY(()) stack_local_entry
{
2549 unsigned short mode
;
2552 struct stack_local_entry
*next
;
2555 /* Structure describing stack frame layout.
2556 Stack grows downward:
2562 saved static chain if ix86_static_chain_on_stack
2564 saved frame pointer if frame_pointer_needed
2565 <- HARD_FRAME_POINTER
2571 <- sse_regs_save_offset
2574 [va_arg registers] |
2578 [padding2] | = to_allocate
2587 int outgoing_arguments_size
;
2588 HOST_WIDE_INT frame
;
2590 /* The offsets relative to ARG_POINTER. */
2591 HOST_WIDE_INT frame_pointer_offset
;
2592 HOST_WIDE_INT hard_frame_pointer_offset
;
2593 HOST_WIDE_INT stack_pointer_offset
;
2594 HOST_WIDE_INT hfp_save_offset
;
2595 HOST_WIDE_INT reg_save_offset
;
2596 HOST_WIDE_INT sse_reg_save_offset
;
2598 /* When save_regs_using_mov is set, emit prologue using
2599 move instead of push instructions. */
2600 bool save_regs_using_mov
;
2603 /* Which cpu are we scheduling for. */
2604 enum attr_cpu ix86_schedule
;
2606 /* Which cpu are we optimizing for. */
2607 enum processor_type ix86_tune
;
2609 /* Which instruction set architecture to use. */
2610 enum processor_type ix86_arch
;
2612 /* true if sse prefetch instruction is not NOOP. */
2613 int x86_prefetch_sse
;
2615 /* -mstackrealign option */
2616 static const char ix86_force_align_arg_pointer_string
[]
2617 = "force_align_arg_pointer";
2619 static rtx (*ix86_gen_leave
) (void);
2620 static rtx (*ix86_gen_add3
) (rtx
, rtx
, rtx
);
2621 static rtx (*ix86_gen_sub3
) (rtx
, rtx
, rtx
);
2622 static rtx (*ix86_gen_sub3_carry
) (rtx
, rtx
, rtx
, rtx
, rtx
);
2623 static rtx (*ix86_gen_one_cmpl2
) (rtx
, rtx
);
2624 static rtx (*ix86_gen_monitor
) (rtx
, rtx
, rtx
);
2625 static rtx (*ix86_gen_andsp
) (rtx
, rtx
, rtx
);
2626 static rtx (*ix86_gen_allocate_stack_worker
) (rtx
, rtx
);
2627 static rtx (*ix86_gen_adjust_stack_and_probe
) (rtx
, rtx
, rtx
);
2628 static rtx (*ix86_gen_probe_stack_range
) (rtx
, rtx
, rtx
);
2630 /* Preferred alignment for stack boundary in bits. */
2631 unsigned int ix86_preferred_stack_boundary
;
2633 /* Alignment for incoming stack boundary in bits specified at
2635 static unsigned int ix86_user_incoming_stack_boundary
;
2637 /* Default alignment for incoming stack boundary in bits. */
2638 static unsigned int ix86_default_incoming_stack_boundary
;
2640 /* Alignment for incoming stack boundary in bits. */
2641 unsigned int ix86_incoming_stack_boundary
;
2643 /* Calling abi specific va_list type nodes. */
2644 static GTY(()) tree sysv_va_list_type_node
;
2645 static GTY(()) tree ms_va_list_type_node
;
2647 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2648 char internal_label_prefix
[16];
2649 int internal_label_prefix_len
;
2651 /* Fence to use after loop using movnt. */
2654 /* Register class used for passing given 64bit part of the argument.
2655 These represent classes as documented by the PS ABI, with the exception
2656 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2657 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2659 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2660 whenever possible (upper half does contain padding). */
2661 enum x86_64_reg_class
2664 X86_64_INTEGER_CLASS
,
2665 X86_64_INTEGERSI_CLASS
,
2672 X86_64_COMPLEX_X87_CLASS
,
2676 #define MAX_CLASSES 4
2678 /* Table of constants used by fldpi, fldln2, etc.... */
2679 static REAL_VALUE_TYPE ext_80387_constants_table
[5];
2680 static bool ext_80387_constants_init
= 0;
2683 static struct machine_function
* ix86_init_machine_status (void);
2684 static rtx
ix86_function_value (const_tree
, const_tree
, bool);
2685 static bool ix86_function_value_regno_p (const unsigned int);
2686 static unsigned int ix86_function_arg_boundary (enum machine_mode
,
2688 static rtx
ix86_static_chain (const_tree
, bool);
2689 static int ix86_function_regparm (const_tree
, const_tree
);
2690 static void ix86_compute_frame_layout (struct ix86_frame
*);
2691 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode
,
2693 static void ix86_add_new_builtins (HOST_WIDE_INT
);
2694 static tree
ix86_canonical_va_list_type (tree
);
2695 static void predict_jump (int);
2696 static unsigned int split_stack_prologue_scratch_regno (void);
2697 static bool i386_asm_output_addr_const_extra (FILE *, rtx
);
2699 enum ix86_function_specific_strings
2701 IX86_FUNCTION_SPECIFIC_ARCH
,
2702 IX86_FUNCTION_SPECIFIC_TUNE
,
2703 IX86_FUNCTION_SPECIFIC_MAX
2706 static char *ix86_target_string (HOST_WIDE_INT
, int, const char *,
2707 const char *, enum fpmath_unit
, bool);
2708 static void ix86_debug_options (void) ATTRIBUTE_UNUSED
;
2709 static void ix86_function_specific_save (struct cl_target_option
*);
2710 static void ix86_function_specific_restore (struct cl_target_option
*);
2711 static void ix86_function_specific_print (FILE *, int,
2712 struct cl_target_option
*);
2713 static bool ix86_valid_target_attribute_p (tree
, tree
, tree
, int);
2714 static bool ix86_valid_target_attribute_inner_p (tree
, char *[],
2715 struct gcc_options
*);
2716 static bool ix86_can_inline_p (tree
, tree
);
2717 static void ix86_set_current_function (tree
);
2718 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2720 static enum calling_abi
ix86_function_abi (const_tree
);
2721 static rtx
promote_duplicated_reg (enum machine_mode
, rtx
);
2722 static rtx
promote_duplicated_reg_to_size (rtx
, int, int, int);
2725 #ifndef SUBTARGET32_DEFAULT_CPU
2726 #define SUBTARGET32_DEFAULT_CPU "i386"
2729 /* The svr4 ABI for the i386 says that records and unions are returned
2731 #ifndef DEFAULT_PCC_STRUCT_RETURN
2732 #define DEFAULT_PCC_STRUCT_RETURN 1
2735 /* Whether -mtune= or -march= were specified */
2736 static int ix86_tune_defaulted
;
2737 static int ix86_arch_specified
;
2739 /* Vectorization library interface and handlers. */
2740 static tree (*ix86_veclib_handler
) (enum built_in_function
, tree
, tree
);
2742 static tree
ix86_veclibabi_svml (enum built_in_function
, tree
, tree
);
2743 static tree
ix86_veclibabi_acml (enum built_in_function
, tree
, tree
);
2745 /* Processor target table, indexed by processor number */
2748 const struct processor_costs
*cost
; /* Processor costs */
2749 const int align_loop
; /* Default alignments. */
2750 const int align_loop_max_skip
;
2751 const int align_jump
;
2752 const int align_jump_max_skip
;
2753 const int align_func
;
2756 static const struct ptt processor_target_table
[PROCESSOR_max
] =
2758 {&i386_cost
, 4, 3, 4, 3, 4},
2759 {&i486_cost
, 16, 15, 16, 15, 16},
2760 {&pentium_cost
, 16, 7, 16, 7, 16},
2761 {&pentiumpro_cost
, 16, 15, 16, 10, 16},
2762 {&geode_cost
, 0, 0, 0, 0, 0},
2763 {&k6_cost
, 32, 7, 32, 7, 32},
2764 {&athlon_cost
, 16, 7, 16, 7, 16},
2765 {&pentium4_cost
, 0, 0, 0, 0, 0},
2766 {&k8_cost
, 16, 7, 16, 7, 16},
2767 {&nocona_cost
, 0, 0, 0, 0, 0},
2768 /* Core 2 32-bit. */
2769 {&core_cost
, 16, 10, 16, 10, 16},
2770 /* Core 2 64-bit. */
2771 {&core_cost
, 16, 10, 16, 10, 16},
2772 /* Core i7 32-bit. */
2773 {&core_cost
, 16, 10, 16, 10, 16},
2774 /* Core i7 64-bit. */
2775 {&core_cost
, 16, 10, 16, 10, 16},
2776 {&generic32_cost
, 16, 7, 16, 7, 16},
2777 {&generic64_cost
, 16, 10, 16, 10, 16},
2778 {&amdfam10_cost
, 32, 24, 32, 7, 32},
2779 {&bdver1_cost
, 32, 24, 32, 7, 32},
2780 {&bdver2_cost
, 32, 24, 32, 7, 32},
2781 {&btver1_cost
, 32, 24, 32, 7, 32},
2782 {&atom_cost
, 16, 15, 16, 7, 16}
2785 static const char *const cpu_names
[TARGET_CPU_DEFAULT_max
] =
2815 /* Return true if a red-zone is in use. */
2818 ix86_using_red_zone (void)
2820 return TARGET_RED_ZONE
&& !TARGET_64BIT_MS_ABI
;
2823 /* Return a string that documents the current -m options. The caller is
2824 responsible for freeing the string. */
2827 ix86_target_string (HOST_WIDE_INT isa
, int flags
, const char *arch
,
2828 const char *tune
, enum fpmath_unit fpmath
,
2831 struct ix86_target_opts
2833 const char *option
; /* option string */
2834 HOST_WIDE_INT mask
; /* isa mask options */
2837 /* This table is ordered so that options like -msse4.2 that imply
2838 preceding options while match those first. */
2839 static struct ix86_target_opts isa_opts
[] =
2841 { "-m64", OPTION_MASK_ISA_64BIT
},
2842 { "-mfma4", OPTION_MASK_ISA_FMA4
},
2843 { "-mfma", OPTION_MASK_ISA_FMA
},
2844 { "-mxop", OPTION_MASK_ISA_XOP
},
2845 { "-mlwp", OPTION_MASK_ISA_LWP
},
2846 { "-msse4a", OPTION_MASK_ISA_SSE4A
},
2847 { "-msse4.2", OPTION_MASK_ISA_SSE4_2
},
2848 { "-msse4.1", OPTION_MASK_ISA_SSE4_1
},
2849 { "-mssse3", OPTION_MASK_ISA_SSSE3
},
2850 { "-msse3", OPTION_MASK_ISA_SSE3
},
2851 { "-msse2", OPTION_MASK_ISA_SSE2
},
2852 { "-msse", OPTION_MASK_ISA_SSE
},
2853 { "-m3dnow", OPTION_MASK_ISA_3DNOW
},
2854 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A
},
2855 { "-mmmx", OPTION_MASK_ISA_MMX
},
2856 { "-mabm", OPTION_MASK_ISA_ABM
},
2857 { "-mbmi", OPTION_MASK_ISA_BMI
},
2858 { "-mbmi2", OPTION_MASK_ISA_BMI2
},
2859 { "-mlzcnt", OPTION_MASK_ISA_LZCNT
},
2860 { "-mtbm", OPTION_MASK_ISA_TBM
},
2861 { "-mpopcnt", OPTION_MASK_ISA_POPCNT
},
2862 { "-mmovbe", OPTION_MASK_ISA_MOVBE
},
2863 { "-mcrc32", OPTION_MASK_ISA_CRC32
},
2864 { "-maes", OPTION_MASK_ISA_AES
},
2865 { "-mpclmul", OPTION_MASK_ISA_PCLMUL
},
2866 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE
},
2867 { "-mrdrnd", OPTION_MASK_ISA_RDRND
},
2868 { "-mf16c", OPTION_MASK_ISA_F16C
},
2872 static struct ix86_target_opts flag_opts
[] =
2874 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE
},
2875 { "-m80387", MASK_80387
},
2876 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS
},
2877 { "-malign-double", MASK_ALIGN_DOUBLE
},
2878 { "-mcld", MASK_CLD
},
2879 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS
},
2880 { "-mieee-fp", MASK_IEEE_FP
},
2881 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS
},
2882 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY
},
2883 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT
},
2884 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS
},
2885 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387
},
2886 { "-mno-push-args", MASK_NO_PUSH_ARGS
},
2887 { "-mno-red-zone", MASK_NO_RED_ZONE
},
2888 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER
},
2889 { "-mrecip", MASK_RECIP
},
2890 { "-mrtd", MASK_RTD
},
2891 { "-msseregparm", MASK_SSEREGPARM
},
2892 { "-mstack-arg-probe", MASK_STACK_PROBE
},
2893 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS
},
2894 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS
},
2895 { "-m8bit-idiv", MASK_USE_8BIT_IDIV
},
2896 { "-mvzeroupper", MASK_VZEROUPPER
},
2897 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD
},
2898 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE
},
2899 { "-mprefer-avx128", MASK_PREFER_AVX128
},
2902 const char *opts
[ARRAY_SIZE (isa_opts
) + ARRAY_SIZE (flag_opts
) + 6][2];
2905 char target_other
[40];
2914 memset (opts
, '\0', sizeof (opts
));
2916 /* Add -march= option. */
2919 opts
[num
][0] = "-march=";
2920 opts
[num
++][1] = arch
;
2923 /* Add -mtune= option. */
2926 opts
[num
][0] = "-mtune=";
2927 opts
[num
++][1] = tune
;
2930 /* Pick out the options in isa options. */
2931 for (i
= 0; i
< ARRAY_SIZE (isa_opts
); i
++)
2933 if ((isa
& isa_opts
[i
].mask
) != 0)
2935 opts
[num
++][0] = isa_opts
[i
].option
;
2936 isa
&= ~ isa_opts
[i
].mask
;
2940 if (isa
&& add_nl_p
)
2942 opts
[num
++][0] = isa_other
;
2943 sprintf (isa_other
, "(other isa: %#" HOST_WIDE_INT_PRINT
"x)",
2947 /* Add flag options. */
2948 for (i
= 0; i
< ARRAY_SIZE (flag_opts
); i
++)
2950 if ((flags
& flag_opts
[i
].mask
) != 0)
2952 opts
[num
++][0] = flag_opts
[i
].option
;
2953 flags
&= ~ flag_opts
[i
].mask
;
2957 if (flags
&& add_nl_p
)
2959 opts
[num
++][0] = target_other
;
2960 sprintf (target_other
, "(other flags: %#x)", flags
);
2963 /* Add -fpmath= option. */
2966 opts
[num
][0] = "-mfpmath=";
2967 switch ((int) fpmath
)
2970 opts
[num
++][1] = "387";
2974 opts
[num
++][1] = "sse";
2977 case FPMATH_387
| FPMATH_SSE
:
2978 opts
[num
++][1] = "sse+387";
2990 gcc_assert (num
< ARRAY_SIZE (opts
));
2992 /* Size the string. */
2994 sep_len
= (add_nl_p
) ? 3 : 1;
2995 for (i
= 0; i
< num
; i
++)
2998 for (j
= 0; j
< 2; j
++)
3000 len
+= strlen (opts
[i
][j
]);
3003 /* Build the string. */
3004 ret
= ptr
= (char *) xmalloc (len
);
3007 for (i
= 0; i
< num
; i
++)
3011 for (j
= 0; j
< 2; j
++)
3012 len2
[j
] = (opts
[i
][j
]) ? strlen (opts
[i
][j
]) : 0;
3019 if (add_nl_p
&& line_len
+ len2
[0] + len2
[1] > 70)
3027 for (j
= 0; j
< 2; j
++)
3030 memcpy (ptr
, opts
[i
][j
], len2
[j
]);
3032 line_len
+= len2
[j
];
3037 gcc_assert (ret
+ len
>= ptr
);
3042 /* Return true, if profiling code should be emitted before
3043 prologue. Otherwise it returns false.
3044 Note: For x86 with "hotfix" it is sorried. */
3046 ix86_profile_before_prologue (void)
3048 return flag_fentry
!= 0;
3051 /* Function that is callable from the debugger to print the current
3054 ix86_debug_options (void)
3056 char *opts
= ix86_target_string (ix86_isa_flags
, target_flags
,
3057 ix86_arch_string
, ix86_tune_string
,
3062 fprintf (stderr
, "%s\n\n", opts
);
3066 fputs ("<no options>\n\n", stderr
);
3071 /* Override various settings based on options. If MAIN_ARGS_P, the
3072 options are from the command line, otherwise they are from
3076 ix86_option_override_internal (bool main_args_p
)
3079 unsigned int ix86_arch_mask
, ix86_tune_mask
;
3080 const bool ix86_tune_specified
= (ix86_tune_string
!= NULL
);
3085 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
3086 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
3087 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
3088 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
3089 #define PTA_AES (HOST_WIDE_INT_1 << 4)
3090 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
3091 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
3092 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
3093 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
3094 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
3095 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
3096 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
3097 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
3098 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
3099 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
3100 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
3101 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
3102 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
3103 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
3104 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
3105 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
3106 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
3107 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
3108 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
3109 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
3110 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
3111 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
3112 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
3113 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
3114 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
3115 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
3116 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
3117 /* if this reaches 64, need to widen struct pta flags below */
3121 const char *const name
; /* processor name or nickname. */
3122 const enum processor_type processor
;
3123 const enum attr_cpu schedule
;
3124 const unsigned HOST_WIDE_INT flags
;
3126 const processor_alias_table
[] =
3128 {"i386", PROCESSOR_I386
, CPU_NONE
, 0},
3129 {"i486", PROCESSOR_I486
, CPU_NONE
, 0},
3130 {"i586", PROCESSOR_PENTIUM
, CPU_PENTIUM
, 0},
3131 {"pentium", PROCESSOR_PENTIUM
, CPU_PENTIUM
, 0},
3132 {"pentium-mmx", PROCESSOR_PENTIUM
, CPU_PENTIUM
, PTA_MMX
},
3133 {"winchip-c6", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
},
3134 {"winchip2", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
| PTA_3DNOW
},
3135 {"c3", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
| PTA_3DNOW
},
3136 {"c3-2", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, PTA_MMX
| PTA_SSE
},
3137 {"i686", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, 0},
3138 {"pentiumpro", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, 0},
3139 {"pentium2", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, PTA_MMX
},
3140 {"pentium3", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
3142 {"pentium3m", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
3144 {"pentium-m", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
3145 PTA_MMX
| PTA_SSE
| PTA_SSE2
},
3146 {"pentium4", PROCESSOR_PENTIUM4
, CPU_NONE
,
3147 PTA_MMX
|PTA_SSE
| PTA_SSE2
},
3148 {"pentium4m", PROCESSOR_PENTIUM4
, CPU_NONE
,
3149 PTA_MMX
| PTA_SSE
| PTA_SSE2
},
3150 {"prescott", PROCESSOR_NOCONA
, CPU_NONE
,
3151 PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
},
3152 {"nocona", PROCESSOR_NOCONA
, CPU_NONE
,
3153 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3154 | PTA_CX16
| PTA_NO_SAHF
},
3155 {"core2", PROCESSOR_CORE2_64
, CPU_CORE2
,
3156 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3157 | PTA_SSSE3
| PTA_CX16
},
3158 {"corei7", PROCESSOR_COREI7_64
, CPU_COREI7
,
3159 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3160 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_CX16
},
3161 {"corei7-avx", PROCESSOR_COREI7_64
, CPU_COREI7
,
3162 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3163 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AVX
3164 | PTA_CX16
| PTA_POPCNT
| PTA_AES
| PTA_PCLMUL
},
3165 {"core-avx-i", PROCESSOR_COREI7_64
, CPU_COREI7
,
3166 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3167 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AVX
3168 | PTA_CX16
| PTA_POPCNT
| PTA_AES
| PTA_PCLMUL
| PTA_FSGSBASE
3169 | PTA_RDRND
| PTA_F16C
},
3170 {"core-avx2", PROCESSOR_COREI7_64
, CPU_COREI7
,
3171 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3172 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AVX
| PTA_AVX2
3173 | PTA_CX16
| PTA_POPCNT
| PTA_AES
| PTA_PCLMUL
| PTA_FSGSBASE
3174 | PTA_RDRND
| PTA_F16C
| PTA_BMI
| PTA_BMI2
| PTA_LZCNT
3175 | PTA_FMA
| PTA_MOVBE
},
3176 {"atom", PROCESSOR_ATOM
, CPU_ATOM
,
3177 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3178 | PTA_SSSE3
| PTA_CX16
| PTA_MOVBE
},
3179 {"geode", PROCESSOR_GEODE
, CPU_GEODE
,
3180 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
|PTA_PREFETCH_SSE
},
3181 {"k6", PROCESSOR_K6
, CPU_K6
, PTA_MMX
},
3182 {"k6-2", PROCESSOR_K6
, CPU_K6
, PTA_MMX
| PTA_3DNOW
},
3183 {"k6-3", PROCESSOR_K6
, CPU_K6
, PTA_MMX
| PTA_3DNOW
},
3184 {"athlon", PROCESSOR_ATHLON
, CPU_ATHLON
,
3185 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_PREFETCH_SSE
},
3186 {"athlon-tbird", PROCESSOR_ATHLON
, CPU_ATHLON
,
3187 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_PREFETCH_SSE
},
3188 {"athlon-4", PROCESSOR_ATHLON
, CPU_ATHLON
,
3189 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
},
3190 {"athlon-xp", PROCESSOR_ATHLON
, CPU_ATHLON
,
3191 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
},
3192 {"athlon-mp", PROCESSOR_ATHLON
, CPU_ATHLON
,
3193 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
},
3194 {"x86-64", PROCESSOR_K8
, CPU_K8
,
3195 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_NO_SAHF
},
3196 {"k8", PROCESSOR_K8
, CPU_K8
,
3197 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3198 | PTA_SSE2
| PTA_NO_SAHF
},
3199 {"k8-sse3", PROCESSOR_K8
, CPU_K8
,
3200 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3201 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
},
3202 {"opteron", PROCESSOR_K8
, CPU_K8
,
3203 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3204 | PTA_SSE2
| PTA_NO_SAHF
},
3205 {"opteron-sse3", PROCESSOR_K8
, CPU_K8
,
3206 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3207 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
},
3208 {"athlon64", PROCESSOR_K8
, CPU_K8
,
3209 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3210 | PTA_SSE2
| PTA_NO_SAHF
},
3211 {"athlon64-sse3", PROCESSOR_K8
, CPU_K8
,
3212 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3213 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
},
3214 {"athlon-fx", PROCESSOR_K8
, CPU_K8
,
3215 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3216 | PTA_SSE2
| PTA_NO_SAHF
},
3217 {"amdfam10", PROCESSOR_AMDFAM10
, CPU_AMDFAM10
,
3218 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3219 | PTA_SSE2
| PTA_SSE3
| PTA_SSE4A
| PTA_CX16
| PTA_ABM
},
3220 {"barcelona", PROCESSOR_AMDFAM10
, CPU_AMDFAM10
,
3221 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3222 | PTA_SSE2
| PTA_SSE3
| PTA_SSE4A
| PTA_CX16
| PTA_ABM
},
3223 {"bdver1", PROCESSOR_BDVER1
, CPU_BDVER1
,
3224 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3225 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
3226 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
| PTA_FMA4
3227 | PTA_XOP
| PTA_LWP
},
3228 {"bdver2", PROCESSOR_BDVER2
, CPU_BDVER2
,
3229 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3230 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
3231 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
3232 | PTA_XOP
| PTA_LWP
| PTA_BMI
| PTA_TBM
| PTA_F16C
3234 {"btver1", PROCESSOR_BTVER1
, CPU_GENERIC64
,
3235 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3236 | PTA_SSSE3
| PTA_SSE4A
|PTA_ABM
| PTA_CX16
},
3237 {"generic32", PROCESSOR_GENERIC32
, CPU_PENTIUMPRO
,
3238 0 /* flags are only used for -march switch. */ },
3239 {"generic64", PROCESSOR_GENERIC64
, CPU_GENERIC64
,
3240 PTA_64BIT
/* flags are only used for -march switch. */ },
3243 /* -mrecip options. */
3246 const char *string
; /* option name */
3247 unsigned int mask
; /* mask bits to set */
3249 const recip_options
[] =
3251 { "all", RECIP_MASK_ALL
},
3252 { "none", RECIP_MASK_NONE
},
3253 { "div", RECIP_MASK_DIV
},
3254 { "sqrt", RECIP_MASK_SQRT
},
3255 { "vec-div", RECIP_MASK_VEC_DIV
},
3256 { "vec-sqrt", RECIP_MASK_VEC_SQRT
},
3259 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
3261 /* Set up prefix/suffix so the error messages refer to either the command
3262 line argument, or the attribute(target). */
3271 prefix
= "option(\"";
3276 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3277 SUBTARGET_OVERRIDE_OPTIONS
;
3280 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3281 SUBSUBTARGET_OVERRIDE_OPTIONS
;
3285 ix86_isa_flags
|= OPTION_MASK_ISA_64BIT
;
3287 /* -fPIC is the default for x86_64. */
3288 if (TARGET_MACHO
&& TARGET_64BIT
)
3291 /* Need to check -mtune=generic first. */
3292 if (ix86_tune_string
)
3294 if (!strcmp (ix86_tune_string
, "generic")
3295 || !strcmp (ix86_tune_string
, "i686")
3296 /* As special support for cross compilers we read -mtune=native
3297 as -mtune=generic. With native compilers we won't see the
3298 -mtune=native, as it was changed by the driver. */
3299 || !strcmp (ix86_tune_string
, "native"))
3302 ix86_tune_string
= "generic64";
3304 ix86_tune_string
= "generic32";
3306 /* If this call is for setting the option attribute, allow the
3307 generic32/generic64 that was previously set. */
3308 else if (!main_args_p
3309 && (!strcmp (ix86_tune_string
, "generic32")
3310 || !strcmp (ix86_tune_string
, "generic64")))
3312 else if (!strncmp (ix86_tune_string
, "generic", 7))
3313 error ("bad value (%s) for %stune=%s %s",
3314 ix86_tune_string
, prefix
, suffix
, sw
);
3315 else if (!strcmp (ix86_tune_string
, "x86-64"))
3316 warning (OPT_Wdeprecated
, "%stune=x86-64%s is deprecated; use "
3317 "%stune=k8%s or %stune=generic%s instead as appropriate",
3318 prefix
, suffix
, prefix
, suffix
, prefix
, suffix
);
3322 if (ix86_arch_string
)
3323 ix86_tune_string
= ix86_arch_string
;
3324 if (!ix86_tune_string
)
3326 ix86_tune_string
= cpu_names
[TARGET_CPU_DEFAULT
];
3327 ix86_tune_defaulted
= 1;
3330 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
3331 need to use a sensible tune option. */
3332 if (!strcmp (ix86_tune_string
, "generic")
3333 || !strcmp (ix86_tune_string
, "x86-64")
3334 || !strcmp (ix86_tune_string
, "i686"))
3337 ix86_tune_string
= "generic64";
3339 ix86_tune_string
= "generic32";
3343 if (ix86_stringop_alg
== rep_prefix_8_byte
&& !TARGET_64BIT
)
3345 /* rep; movq isn't available in 32-bit code. */
3346 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3347 ix86_stringop_alg
= no_stringop
;
3350 if (!ix86_arch_string
)
3351 ix86_arch_string
= TARGET_64BIT
? "x86-64" : SUBTARGET32_DEFAULT_CPU
;
3353 ix86_arch_specified
= 1;
3355 if (!global_options_set
.x_ix86_abi
)
3356 ix86_abi
= DEFAULT_ABI
;
3358 if (global_options_set
.x_ix86_cmodel
)
3360 switch (ix86_cmodel
)
3365 ix86_cmodel
= CM_SMALL_PIC
;
3367 error ("code model %qs not supported in the %s bit mode",
3374 ix86_cmodel
= CM_MEDIUM_PIC
;
3376 error ("code model %qs not supported in the %s bit mode",
3378 else if (TARGET_X32
)
3379 error ("code model %qs not supported in x32 mode",
3386 ix86_cmodel
= CM_LARGE_PIC
;
3388 error ("code model %qs not supported in the %s bit mode",
3390 else if (TARGET_X32
)
3391 error ("code model %qs not supported in x32 mode",
3397 error ("code model %s does not support PIC mode", "32");
3399 error ("code model %qs not supported in the %s bit mode",
3406 error ("code model %s does not support PIC mode", "kernel");
3407 ix86_cmodel
= CM_32
;
3410 error ("code model %qs not supported in the %s bit mode",
3420 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3421 use of rip-relative addressing. This eliminates fixups that
3422 would otherwise be needed if this object is to be placed in a
3423 DLL, and is essentially just as efficient as direct addressing. */
3424 if (TARGET_64BIT
&& DEFAULT_ABI
== MS_ABI
)
3425 ix86_cmodel
= CM_SMALL_PIC
, flag_pic
= 1;
3426 else if (TARGET_64BIT
)
3427 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
3429 ix86_cmodel
= CM_32
;
3431 if (TARGET_MACHO
&& ix86_asm_dialect
== ASM_INTEL
)
3433 error ("-masm=intel not supported in this configuration");
3434 ix86_asm_dialect
= ASM_ATT
;
3436 if ((TARGET_64BIT
!= 0) != ((ix86_isa_flags
& OPTION_MASK_ISA_64BIT
) != 0))
3437 sorry ("%i-bit mode not compiled in",
3438 (ix86_isa_flags
& OPTION_MASK_ISA_64BIT
) ? 64 : 32);
3440 for (i
= 0; i
< pta_size
; i
++)
3441 if (! strcmp (ix86_arch_string
, processor_alias_table
[i
].name
))
3443 ix86_schedule
= processor_alias_table
[i
].schedule
;
3444 ix86_arch
= processor_alias_table
[i
].processor
;
3445 /* Default cpu tuning to the architecture. */
3446 ix86_tune
= ix86_arch
;
3448 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
3449 error ("CPU you selected does not support x86-64 "
3452 if (processor_alias_table
[i
].flags
& PTA_MMX
3453 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_MMX
))
3454 ix86_isa_flags
|= OPTION_MASK_ISA_MMX
;
3455 if (processor_alias_table
[i
].flags
& PTA_3DNOW
3456 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_3DNOW
))
3457 ix86_isa_flags
|= OPTION_MASK_ISA_3DNOW
;
3458 if (processor_alias_table
[i
].flags
& PTA_3DNOW_A
3459 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_3DNOW_A
))
3460 ix86_isa_flags
|= OPTION_MASK_ISA_3DNOW_A
;
3461 if (processor_alias_table
[i
].flags
& PTA_SSE
3462 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE
))
3463 ix86_isa_flags
|= OPTION_MASK_ISA_SSE
;
3464 if (processor_alias_table
[i
].flags
& PTA_SSE2
3465 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE2
))
3466 ix86_isa_flags
|= OPTION_MASK_ISA_SSE2
;
3467 if (processor_alias_table
[i
].flags
& PTA_SSE3
3468 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE3
))
3469 ix86_isa_flags
|= OPTION_MASK_ISA_SSE3
;
3470 if (processor_alias_table
[i
].flags
& PTA_SSSE3
3471 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSSE3
))
3472 ix86_isa_flags
|= OPTION_MASK_ISA_SSSE3
;
3473 if (processor_alias_table
[i
].flags
& PTA_SSE4_1
3474 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4_1
))
3475 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_1
;
3476 if (processor_alias_table
[i
].flags
& PTA_SSE4_2
3477 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4_2
))
3478 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_2
;
3479 if (processor_alias_table
[i
].flags
& PTA_AVX
3480 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX
))
3481 ix86_isa_flags
|= OPTION_MASK_ISA_AVX
;
3482 if (processor_alias_table
[i
].flags
& PTA_AVX2
3483 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX2
))
3484 ix86_isa_flags
|= OPTION_MASK_ISA_AVX2
;
3485 if (processor_alias_table
[i
].flags
& PTA_FMA
3486 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FMA
))
3487 ix86_isa_flags
|= OPTION_MASK_ISA_FMA
;
3488 if (processor_alias_table
[i
].flags
& PTA_SSE4A
3489 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4A
))
3490 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4A
;
3491 if (processor_alias_table
[i
].flags
& PTA_FMA4
3492 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FMA4
))
3493 ix86_isa_flags
|= OPTION_MASK_ISA_FMA4
;
3494 if (processor_alias_table
[i
].flags
& PTA_XOP
3495 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_XOP
))
3496 ix86_isa_flags
|= OPTION_MASK_ISA_XOP
;
3497 if (processor_alias_table
[i
].flags
& PTA_LWP
3498 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_LWP
))
3499 ix86_isa_flags
|= OPTION_MASK_ISA_LWP
;
3500 if (processor_alias_table
[i
].flags
& PTA_ABM
3501 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_ABM
))
3502 ix86_isa_flags
|= OPTION_MASK_ISA_ABM
;
3503 if (processor_alias_table
[i
].flags
& PTA_BMI
3504 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_BMI
))
3505 ix86_isa_flags
|= OPTION_MASK_ISA_BMI
;
3506 if (processor_alias_table
[i
].flags
& (PTA_LZCNT
| PTA_ABM
)
3507 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_LZCNT
))
3508 ix86_isa_flags
|= OPTION_MASK_ISA_LZCNT
;
3509 if (processor_alias_table
[i
].flags
& PTA_TBM
3510 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_TBM
))
3511 ix86_isa_flags
|= OPTION_MASK_ISA_TBM
;
3512 if (processor_alias_table
[i
].flags
& PTA_BMI2
3513 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_BMI2
))
3514 ix86_isa_flags
|= OPTION_MASK_ISA_BMI2
;
3515 if (processor_alias_table
[i
].flags
& PTA_CX16
3516 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_CX16
))
3517 ix86_isa_flags
|= OPTION_MASK_ISA_CX16
;
3518 if (processor_alias_table
[i
].flags
& (PTA_POPCNT
| PTA_ABM
)
3519 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_POPCNT
))
3520 ix86_isa_flags
|= OPTION_MASK_ISA_POPCNT
;
3521 if (!(TARGET_64BIT
&& (processor_alias_table
[i
].flags
& PTA_NO_SAHF
))
3522 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SAHF
))
3523 ix86_isa_flags
|= OPTION_MASK_ISA_SAHF
;
3524 if (processor_alias_table
[i
].flags
& PTA_MOVBE
3525 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_MOVBE
))
3526 ix86_isa_flags
|= OPTION_MASK_ISA_MOVBE
;
3527 if (processor_alias_table
[i
].flags
& PTA_AES
3528 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_AES
))
3529 ix86_isa_flags
|= OPTION_MASK_ISA_AES
;
3530 if (processor_alias_table
[i
].flags
& PTA_PCLMUL
3531 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_PCLMUL
))
3532 ix86_isa_flags
|= OPTION_MASK_ISA_PCLMUL
;
3533 if (processor_alias_table
[i
].flags
& PTA_FSGSBASE
3534 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FSGSBASE
))
3535 ix86_isa_flags
|= OPTION_MASK_ISA_FSGSBASE
;
3536 if (processor_alias_table
[i
].flags
& PTA_RDRND
3537 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_RDRND
))
3538 ix86_isa_flags
|= OPTION_MASK_ISA_RDRND
;
3539 if (processor_alias_table
[i
].flags
& PTA_F16C
3540 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_F16C
))
3541 ix86_isa_flags
|= OPTION_MASK_ISA_F16C
;
3542 if (processor_alias_table
[i
].flags
& (PTA_PREFETCH_SSE
| PTA_SSE
))
3543 x86_prefetch_sse
= true;
3548 if (!strcmp (ix86_arch_string
, "generic"))
3549 error ("generic CPU can be used only for %stune=%s %s",
3550 prefix
, suffix
, sw
);
3551 else if (!strncmp (ix86_arch_string
, "generic", 7) || i
== pta_size
)
3552 error ("bad value (%s) for %sarch=%s %s",
3553 ix86_arch_string
, prefix
, suffix
, sw
);
3555 ix86_arch_mask
= 1u << ix86_arch
;
3556 for (i
= 0; i
< X86_ARCH_LAST
; ++i
)
3557 ix86_arch_features
[i
] = !!(initial_ix86_arch_features
[i
] & ix86_arch_mask
);
3559 for (i
= 0; i
< pta_size
; i
++)
3560 if (! strcmp (ix86_tune_string
, processor_alias_table
[i
].name
))
3562 ix86_schedule
= processor_alias_table
[i
].schedule
;
3563 ix86_tune
= processor_alias_table
[i
].processor
;
3566 if (!(processor_alias_table
[i
].flags
& PTA_64BIT
))
3568 if (ix86_tune_defaulted
)
3570 ix86_tune_string
= "x86-64";
3571 for (i
= 0; i
< pta_size
; i
++)
3572 if (! strcmp (ix86_tune_string
,
3573 processor_alias_table
[i
].name
))
3575 ix86_schedule
= processor_alias_table
[i
].schedule
;
3576 ix86_tune
= processor_alias_table
[i
].processor
;
3579 error ("CPU you selected does not support x86-64 "
3585 /* Adjust tuning when compiling for 32-bit ABI. */
3588 case PROCESSOR_GENERIC64
:
3589 ix86_tune
= PROCESSOR_GENERIC32
;
3590 ix86_schedule
= CPU_PENTIUMPRO
;
3593 case PROCESSOR_CORE2_64
:
3594 ix86_tune
= PROCESSOR_CORE2_32
;
3597 case PROCESSOR_COREI7_64
:
3598 ix86_tune
= PROCESSOR_COREI7_32
;
3605 /* Intel CPUs have always interpreted SSE prefetch instructions as
3606 NOPs; so, we can enable SSE prefetch instructions even when
3607 -mtune (rather than -march) points us to a processor that has them.
3608 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3609 higher processors. */
3611 && (processor_alias_table
[i
].flags
& (PTA_PREFETCH_SSE
| PTA_SSE
)))
3612 x86_prefetch_sse
= true;
3616 if (ix86_tune_specified
&& i
== pta_size
)
3617 error ("bad value (%s) for %stune=%s %s",
3618 ix86_tune_string
, prefix
, suffix
, sw
);
3620 ix86_tune_mask
= 1u << ix86_tune
;
3621 for (i
= 0; i
< X86_TUNE_LAST
; ++i
)
3622 ix86_tune_features
[i
] = !!(initial_ix86_tune_features
[i
] & ix86_tune_mask
);
3624 #ifndef USE_IX86_FRAME_POINTER
3625 #define USE_IX86_FRAME_POINTER 0
3628 #ifndef USE_X86_64_FRAME_POINTER
3629 #define USE_X86_64_FRAME_POINTER 0
3632 /* Set the default values for switches whose default depends on TARGET_64BIT
3633 in case they weren't overwritten by command line options. */
3636 if (optimize
> 1 && !global_options_set
.x_flag_zee
)
3638 if (optimize
>= 1 && !global_options_set
.x_flag_omit_frame_pointer
)
3639 flag_omit_frame_pointer
= !USE_X86_64_FRAME_POINTER
;
3640 if (flag_asynchronous_unwind_tables
== 2)
3641 flag_unwind_tables
= flag_asynchronous_unwind_tables
= 1;
3642 if (flag_pcc_struct_return
== 2)
3643 flag_pcc_struct_return
= 0;
3647 if (optimize
>= 1 && !global_options_set
.x_flag_omit_frame_pointer
)
3648 flag_omit_frame_pointer
= !(USE_IX86_FRAME_POINTER
|| optimize_size
);
3649 if (flag_asynchronous_unwind_tables
== 2)
3650 flag_asynchronous_unwind_tables
= !USE_IX86_FRAME_POINTER
;
3651 if (flag_pcc_struct_return
== 2)
3652 flag_pcc_struct_return
= DEFAULT_PCC_STRUCT_RETURN
;
3656 ix86_cost
= &ix86_size_cost
;
3658 ix86_cost
= processor_target_table
[ix86_tune
].cost
;
3660 /* Arrange to set up i386_stack_locals for all functions. */
3661 init_machine_status
= ix86_init_machine_status
;
3663 /* Validate -mregparm= value. */
3664 if (global_options_set
.x_ix86_regparm
)
3667 warning (0, "-mregparm is ignored in 64-bit mode");
3668 if (ix86_regparm
> REGPARM_MAX
)
3670 error ("-mregparm=%d is not between 0 and %d",
3671 ix86_regparm
, REGPARM_MAX
);
3676 ix86_regparm
= REGPARM_MAX
;
3678 /* Default align_* from the processor table. */
3679 if (align_loops
== 0)
3681 align_loops
= processor_target_table
[ix86_tune
].align_loop
;
3682 align_loops_max_skip
= processor_target_table
[ix86_tune
].align_loop_max_skip
;
3684 if (align_jumps
== 0)
3686 align_jumps
= processor_target_table
[ix86_tune
].align_jump
;
3687 align_jumps_max_skip
= processor_target_table
[ix86_tune
].align_jump_max_skip
;
3689 if (align_functions
== 0)
3691 align_functions
= processor_target_table
[ix86_tune
].align_func
;
3694 /* Provide default for -mbranch-cost= value. */
3695 if (!global_options_set
.x_ix86_branch_cost
)
3696 ix86_branch_cost
= ix86_cost
->branch_cost
;
3700 target_flags
|= TARGET_SUBTARGET64_DEFAULT
& ~target_flags_explicit
;
3702 /* Enable by default the SSE and MMX builtins. Do allow the user to
3703 explicitly disable any of these. In particular, disabling SSE and
3704 MMX for kernel code is extremely useful. */
3705 if (!ix86_arch_specified
)
3707 |= ((OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_MMX
3708 | TARGET_SUBTARGET64_ISA_DEFAULT
) & ~ix86_isa_flags_explicit
);
3711 warning (0, "%srtd%s is ignored in 64bit mode", prefix
, suffix
);
3715 target_flags
|= TARGET_SUBTARGET32_DEFAULT
& ~target_flags_explicit
;
3717 if (!ix86_arch_specified
)
3719 |= TARGET_SUBTARGET32_ISA_DEFAULT
& ~ix86_isa_flags_explicit
;
3721 /* i386 ABI does not specify red zone. It still makes sense to use it
3722 when programmer takes care to stack from being destroyed. */
3723 if (!(target_flags_explicit
& MASK_NO_RED_ZONE
))
3724 target_flags
|= MASK_NO_RED_ZONE
;
3727 /* Keep nonleaf frame pointers. */
3728 if (flag_omit_frame_pointer
)
3729 target_flags
&= ~MASK_OMIT_LEAF_FRAME_POINTER
;
3730 else if (TARGET_OMIT_LEAF_FRAME_POINTER
)
3731 flag_omit_frame_pointer
= 1;
3733 /* If we're doing fast math, we don't care about comparison order
3734 wrt NaNs. This lets us use a shorter comparison sequence. */
3735 if (flag_finite_math_only
)
3736 target_flags
&= ~MASK_IEEE_FP
;
3738 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3739 since the insns won't need emulation. */
3740 if (x86_arch_always_fancy_math_387
& ix86_arch_mask
)
3741 target_flags
&= ~MASK_NO_FANCY_MATH_387
;
3743 /* Likewise, if the target doesn't have a 387, or we've specified
3744 software floating point, don't use 387 inline intrinsics. */
3746 target_flags
|= MASK_NO_FANCY_MATH_387
;
3748 /* Turn on MMX builtins for -msse. */
3751 ix86_isa_flags
|= OPTION_MASK_ISA_MMX
& ~ix86_isa_flags_explicit
;
3752 x86_prefetch_sse
= true;
3755 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3756 if (TARGET_SSE4_2
|| TARGET_ABM
)
3757 ix86_isa_flags
|= OPTION_MASK_ISA_POPCNT
& ~ix86_isa_flags_explicit
;
3759 /* Turn on lzcnt instruction for -mabm. */
3761 ix86_isa_flags
|= OPTION_MASK_ISA_LZCNT
& ~ix86_isa_flags_explicit
;
3763 /* Validate -mpreferred-stack-boundary= value or default it to
3764 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3765 ix86_preferred_stack_boundary
= PREFERRED_STACK_BOUNDARY_DEFAULT
;
3766 if (global_options_set
.x_ix86_preferred_stack_boundary_arg
)
3768 int min
= (TARGET_64BIT
? 4 : 2);
3769 int max
= (TARGET_SEH
? 4 : 12);
3771 if (ix86_preferred_stack_boundary_arg
< min
3772 || ix86_preferred_stack_boundary_arg
> max
)
3775 error ("-mpreferred-stack-boundary is not supported "
3778 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3779 ix86_preferred_stack_boundary_arg
, min
, max
);
3782 ix86_preferred_stack_boundary
3783 = (1 << ix86_preferred_stack_boundary_arg
) * BITS_PER_UNIT
;
3786 /* Set the default value for -mstackrealign. */
3787 if (ix86_force_align_arg_pointer
== -1)
3788 ix86_force_align_arg_pointer
= STACK_REALIGN_DEFAULT
;
3790 ix86_default_incoming_stack_boundary
= PREFERRED_STACK_BOUNDARY
;
3792 /* Validate -mincoming-stack-boundary= value or default it to
3793 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3794 ix86_incoming_stack_boundary
= ix86_default_incoming_stack_boundary
;
3795 if (global_options_set
.x_ix86_incoming_stack_boundary_arg
)
3797 if (ix86_incoming_stack_boundary_arg
< (TARGET_64BIT
? 4 : 2)
3798 || ix86_incoming_stack_boundary_arg
> 12)
3799 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3800 ix86_incoming_stack_boundary_arg
, TARGET_64BIT
? 4 : 2);
3803 ix86_user_incoming_stack_boundary
3804 = (1 << ix86_incoming_stack_boundary_arg
) * BITS_PER_UNIT
;
3805 ix86_incoming_stack_boundary
3806 = ix86_user_incoming_stack_boundary
;
3810 /* Accept -msseregparm only if at least SSE support is enabled. */
3811 if (TARGET_SSEREGPARM
3813 error ("%ssseregparm%s used without SSE enabled", prefix
, suffix
);
3815 if (global_options_set
.x_ix86_fpmath
)
3817 if (ix86_fpmath
& FPMATH_SSE
)
3821 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3822 ix86_fpmath
= FPMATH_387
;
3824 else if ((ix86_fpmath
& FPMATH_387
) && !TARGET_80387
)
3826 warning (0, "387 instruction set disabled, using SSE arithmetics");
3827 ix86_fpmath
= FPMATH_SSE
;
3832 ix86_fpmath
= TARGET_FPMATH_DEFAULT
;
3834 /* If the i387 is disabled, then do not return values in it. */
3836 target_flags
&= ~MASK_FLOAT_RETURNS
;
3838 /* Use external vectorized library in vectorizing intrinsics. */
3839 if (global_options_set
.x_ix86_veclibabi_type
)
3840 switch (ix86_veclibabi_type
)
3842 case ix86_veclibabi_type_svml
:
3843 ix86_veclib_handler
= ix86_veclibabi_svml
;
3846 case ix86_veclibabi_type_acml
:
3847 ix86_veclib_handler
= ix86_veclibabi_acml
;
3854 if ((!USE_IX86_FRAME_POINTER
3855 || (x86_accumulate_outgoing_args
& ix86_tune_mask
))
3856 && !(target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
3858 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
3860 /* ??? Unwind info is not correct around the CFG unless either a frame
3861 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3862 unwind info generation to be aware of the CFG and propagating states
3864 if ((flag_unwind_tables
|| flag_asynchronous_unwind_tables
3865 || flag_exceptions
|| flag_non_call_exceptions
)
3866 && flag_omit_frame_pointer
3867 && !(target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
3869 if (target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
3870 warning (0, "unwind tables currently require either a frame pointer "
3871 "or %saccumulate-outgoing-args%s for correctness",
3873 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
3876 /* If stack probes are required, the space used for large function
3877 arguments on the stack must also be probed, so enable
3878 -maccumulate-outgoing-args so this happens in the prologue. */
3879 if (TARGET_STACK_PROBE
3880 && !(target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
3882 if (target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
3883 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3884 "for correctness", prefix
, suffix
);
3885 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
3888 /* For sane SSE instruction set generation we need fcomi instruction.
3889 It is safe to enable all CMOVE instructions. Also, RDRAND intrinsic
3890 expands to a sequence that includes conditional move. */
3891 if (TARGET_SSE
|| TARGET_RDRND
)
3894 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3897 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix
, "LX", 0);
3898 p
= strchr (internal_label_prefix
, 'X');
3899 internal_label_prefix_len
= p
- internal_label_prefix
;
3903 /* When scheduling description is not available, disable scheduler pass
3904 so it won't slow down the compilation and make x87 code slower. */
3905 if (!TARGET_SCHEDULE
)
3906 flag_schedule_insns_after_reload
= flag_schedule_insns
= 0;
3908 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES
,
3909 ix86_cost
->simultaneous_prefetches
,
3910 global_options
.x_param_values
,
3911 global_options_set
.x_param_values
);
3912 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
, ix86_cost
->prefetch_block
,
3913 global_options
.x_param_values
,
3914 global_options_set
.x_param_values
);
3915 maybe_set_param_value (PARAM_L1_CACHE_SIZE
, ix86_cost
->l1_cache_size
,
3916 global_options
.x_param_values
,
3917 global_options_set
.x_param_values
);
3918 maybe_set_param_value (PARAM_L2_CACHE_SIZE
, ix86_cost
->l2_cache_size
,
3919 global_options
.x_param_values
,
3920 global_options_set
.x_param_values
);
3922 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
3923 if (flag_prefetch_loop_arrays
< 0
3926 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL
)
3927 flag_prefetch_loop_arrays
= 1;
3929 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3930 can be optimized to ap = __builtin_next_arg (0). */
3931 if (!TARGET_64BIT
&& !flag_split_stack
)
3932 targetm
.expand_builtin_va_start
= NULL
;
3936 ix86_gen_leave
= gen_leave_rex64
;
3937 ix86_gen_add3
= gen_adddi3
;
3938 ix86_gen_sub3
= gen_subdi3
;
3939 ix86_gen_sub3_carry
= gen_subdi3_carry
;
3940 ix86_gen_one_cmpl2
= gen_one_cmpldi2
;
3941 ix86_gen_monitor
= gen_sse3_monitor64
;
3942 ix86_gen_andsp
= gen_anddi3
;
3943 ix86_gen_allocate_stack_worker
= gen_allocate_stack_worker_probe_di
;
3944 ix86_gen_adjust_stack_and_probe
= gen_adjust_stack_and_probedi
;
3945 ix86_gen_probe_stack_range
= gen_probe_stack_rangedi
;
3949 ix86_gen_leave
= gen_leave
;
3950 ix86_gen_add3
= gen_addsi3
;
3951 ix86_gen_sub3
= gen_subsi3
;
3952 ix86_gen_sub3_carry
= gen_subsi3_carry
;
3953 ix86_gen_one_cmpl2
= gen_one_cmplsi2
;
3954 ix86_gen_monitor
= gen_sse3_monitor
;
3955 ix86_gen_andsp
= gen_andsi3
;
3956 ix86_gen_allocate_stack_worker
= gen_allocate_stack_worker_probe_si
;
3957 ix86_gen_adjust_stack_and_probe
= gen_adjust_stack_and_probesi
;
3958 ix86_gen_probe_stack_range
= gen_probe_stack_rangesi
;
3962 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3964 target_flags
|= MASK_CLD
& ~target_flags_explicit
;
3967 if (!TARGET_64BIT
&& flag_pic
)
3969 if (flag_fentry
> 0)
3970 sorry ("-mfentry isn%'t supported for 32-bit in combination "
3974 else if (TARGET_SEH
)
3976 if (flag_fentry
== 0)
3977 sorry ("-mno-fentry isn%'t compatible with SEH");
3980 else if (flag_fentry
< 0)
3982 #if defined(PROFILE_BEFORE_PROLOGUE)
3991 /* When not optimize for size, enable vzeroupper optimization for
3992 TARGET_AVX with -fexpensive-optimizations and split 32-byte
3993 AVX unaligned load/store. */
3996 if (flag_expensive_optimizations
3997 && !(target_flags_explicit
& MASK_VZEROUPPER
))
3998 target_flags
|= MASK_VZEROUPPER
;
3999 if ((x86_avx256_split_unaligned_load
& ix86_tune_mask
)
4000 && !(target_flags_explicit
& MASK_AVX256_SPLIT_UNALIGNED_LOAD
))
4001 target_flags
|= MASK_AVX256_SPLIT_UNALIGNED_LOAD
;
4002 if ((x86_avx256_split_unaligned_store
& ix86_tune_mask
)
4003 && !(target_flags_explicit
& MASK_AVX256_SPLIT_UNALIGNED_STORE
))
4004 target_flags
|= MASK_AVX256_SPLIT_UNALIGNED_STORE
;
4005 /* Enable 128-bit AVX instruction generation for the auto-vectorizer. */
4006 if (TARGET_AVX128_OPTIMAL
&& !(target_flags_explicit
& MASK_PREFER_AVX128
))
4007 target_flags
|= MASK_PREFER_AVX128
;
4012 /* Disable vzeroupper pass if TARGET_AVX is disabled. */
4013 target_flags
&= ~MASK_VZEROUPPER
;
4016 if (ix86_recip_name
)
4018 char *p
= ASTRDUP (ix86_recip_name
);
4020 unsigned int mask
, i
;
4023 while ((q
= strtok (p
, ",")) != NULL
)
4034 if (!strcmp (q
, "default"))
4035 mask
= RECIP_MASK_ALL
;
4038 for (i
= 0; i
< ARRAY_SIZE (recip_options
); i
++)
4039 if (!strcmp (q
, recip_options
[i
].string
))
4041 mask
= recip_options
[i
].mask
;
4045 if (i
== ARRAY_SIZE (recip_options
))
4047 error ("unknown option for -mrecip=%s", q
);
4049 mask
= RECIP_MASK_NONE
;
4053 recip_mask_explicit
|= mask
;
4055 recip_mask
&= ~mask
;
4062 recip_mask
|= RECIP_MASK_ALL
& ~recip_mask_explicit
;
4063 else if (target_flags_explicit
& MASK_RECIP
)
4064 recip_mask
&= ~(RECIP_MASK_ALL
& ~recip_mask_explicit
);
4066 /* Save the initial options in case the user does function specific
4069 target_option_default_node
= target_option_current_node
4070 = build_target_option_node ();
4073 /* Return TRUE if VAL is passed in register with 256bit AVX modes. */
4076 function_pass_avx256_p (const_rtx val
)
4081 if (REG_P (val
) && VALID_AVX256_REG_MODE (GET_MODE (val
)))
4084 if (GET_CODE (val
) == PARALLEL
)
4089 for (i
= XVECLEN (val
, 0) - 1; i
>= 0; i
--)
4091 r
= XVECEXP (val
, 0, i
);
4092 if (GET_CODE (r
) == EXPR_LIST
4094 && REG_P (XEXP (r
, 0))
4095 && (GET_MODE (XEXP (r
, 0)) == OImode
4096 || VALID_AVX256_REG_MODE (GET_MODE (XEXP (r
, 0)))))
4104 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4107 ix86_option_override (void)
4109 ix86_option_override_internal (true);
4112 /* Update register usage after having seen the compiler flags. */
4115 ix86_conditional_register_usage (void)
4120 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4122 if (fixed_regs
[i
] > 1)
4123 fixed_regs
[i
] = (fixed_regs
[i
] == (TARGET_64BIT
? 3 : 2));
4124 if (call_used_regs
[i
] > 1)
4125 call_used_regs
[i
] = (call_used_regs
[i
] == (TARGET_64BIT
? 3 : 2));
4128 /* The PIC register, if it exists, is fixed. */
4129 j
= PIC_OFFSET_TABLE_REGNUM
;
4130 if (j
!= INVALID_REGNUM
)
4131 fixed_regs
[j
] = call_used_regs
[j
] = 1;
4133 /* The 64-bit MS_ABI changes the set of call-used registers. */
4134 if (TARGET_64BIT_MS_ABI
)
4136 call_used_regs
[SI_REG
] = 0;
4137 call_used_regs
[DI_REG
] = 0;
4138 call_used_regs
[XMM6_REG
] = 0;
4139 call_used_regs
[XMM7_REG
] = 0;
4140 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
4141 call_used_regs
[i
] = 0;
4144 /* The default setting of CLOBBERED_REGS is for 32-bit; add in the
4145 other call-clobbered regs for 64-bit. */
4148 CLEAR_HARD_REG_SET (reg_class_contents
[(int)CLOBBERED_REGS
]);
4150 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4151 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)GENERAL_REGS
], i
)
4152 && call_used_regs
[i
])
4153 SET_HARD_REG_BIT (reg_class_contents
[(int)CLOBBERED_REGS
], i
);
4156 /* If MMX is disabled, squash the registers. */
4158 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4159 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)MMX_REGS
], i
))
4160 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4162 /* If SSE is disabled, squash the registers. */
4164 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4165 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)SSE_REGS
], i
))
4166 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4168 /* If the FPU is disabled, squash the registers. */
4169 if (! (TARGET_80387
|| TARGET_FLOAT_RETURNS_IN_80387
))
4170 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4171 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)FLOAT_REGS
], i
))
4172 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4174 /* If 32-bit, squash the 64-bit registers. */
4177 for (i
= FIRST_REX_INT_REG
; i
<= LAST_REX_INT_REG
; i
++)
4179 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
4185 /* Save the current options */
4188 ix86_function_specific_save (struct cl_target_option
*ptr
)
4190 ptr
->arch
= ix86_arch
;
4191 ptr
->schedule
= ix86_schedule
;
4192 ptr
->tune
= ix86_tune
;
4193 ptr
->branch_cost
= ix86_branch_cost
;
4194 ptr
->tune_defaulted
= ix86_tune_defaulted
;
4195 ptr
->arch_specified
= ix86_arch_specified
;
4196 ptr
->x_ix86_isa_flags_explicit
= ix86_isa_flags_explicit
;
4197 ptr
->ix86_target_flags_explicit
= target_flags_explicit
;
4198 ptr
->x_recip_mask_explicit
= recip_mask_explicit
;
4200 /* The fields are char but the variables are not; make sure the
4201 values fit in the fields. */
4202 gcc_assert (ptr
->arch
== ix86_arch
);
4203 gcc_assert (ptr
->schedule
== ix86_schedule
);
4204 gcc_assert (ptr
->tune
== ix86_tune
);
4205 gcc_assert (ptr
->branch_cost
== ix86_branch_cost
);
4208 /* Restore the current options */
4211 ix86_function_specific_restore (struct cl_target_option
*ptr
)
4213 enum processor_type old_tune
= ix86_tune
;
4214 enum processor_type old_arch
= ix86_arch
;
4215 unsigned int ix86_arch_mask
, ix86_tune_mask
;
4218 ix86_arch
= (enum processor_type
) ptr
->arch
;
4219 ix86_schedule
= (enum attr_cpu
) ptr
->schedule
;
4220 ix86_tune
= (enum processor_type
) ptr
->tune
;
4221 ix86_branch_cost
= ptr
->branch_cost
;
4222 ix86_tune_defaulted
= ptr
->tune_defaulted
;
4223 ix86_arch_specified
= ptr
->arch_specified
;
4224 ix86_isa_flags_explicit
= ptr
->x_ix86_isa_flags_explicit
;
4225 target_flags_explicit
= ptr
->ix86_target_flags_explicit
;
4226 recip_mask_explicit
= ptr
->x_recip_mask_explicit
;
4228 /* Recreate the arch feature tests if the arch changed */
4229 if (old_arch
!= ix86_arch
)
4231 ix86_arch_mask
= 1u << ix86_arch
;
4232 for (i
= 0; i
< X86_ARCH_LAST
; ++i
)
4233 ix86_arch_features
[i
]
4234 = !!(initial_ix86_arch_features
[i
] & ix86_arch_mask
);
4237 /* Recreate the tune optimization tests */
4238 if (old_tune
!= ix86_tune
)
4240 ix86_tune_mask
= 1u << ix86_tune
;
4241 for (i
= 0; i
< X86_TUNE_LAST
; ++i
)
4242 ix86_tune_features
[i
]
4243 = !!(initial_ix86_tune_features
[i
] & ix86_tune_mask
);
4247 /* Print the current options */
4250 ix86_function_specific_print (FILE *file
, int indent
,
4251 struct cl_target_option
*ptr
)
4254 = ix86_target_string (ptr
->x_ix86_isa_flags
, ptr
->x_target_flags
,
4255 NULL
, NULL
, ptr
->x_ix86_fpmath
, false);
4257 fprintf (file
, "%*sarch = %d (%s)\n",
4260 ((ptr
->arch
< TARGET_CPU_DEFAULT_max
)
4261 ? cpu_names
[ptr
->arch
]
4264 fprintf (file
, "%*stune = %d (%s)\n",
4267 ((ptr
->tune
< TARGET_CPU_DEFAULT_max
)
4268 ? cpu_names
[ptr
->tune
]
4271 fprintf (file
, "%*sbranch_cost = %d\n", indent
, "", ptr
->branch_cost
);
4275 fprintf (file
, "%*s%s\n", indent
, "", target_string
);
4276 free (target_string
);
4281 /* Inner function to process the attribute((target(...))), take an argument and
4282 set the current options from the argument. If we have a list, recursively go
4286 ix86_valid_target_attribute_inner_p (tree args
, char *p_strings
[],
4287 struct gcc_options
*enum_opts_set
)
4292 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4293 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4294 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4295 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4296 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4312 enum ix86_opt_type type
;
4317 IX86_ATTR_ISA ("3dnow", OPT_m3dnow
),
4318 IX86_ATTR_ISA ("abm", OPT_mabm
),
4319 IX86_ATTR_ISA ("bmi", OPT_mbmi
),
4320 IX86_ATTR_ISA ("bmi2", OPT_mbmi2
),
4321 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt
),
4322 IX86_ATTR_ISA ("tbm", OPT_mtbm
),
4323 IX86_ATTR_ISA ("aes", OPT_maes
),
4324 IX86_ATTR_ISA ("avx", OPT_mavx
),
4325 IX86_ATTR_ISA ("avx2", OPT_mavx2
),
4326 IX86_ATTR_ISA ("mmx", OPT_mmmx
),
4327 IX86_ATTR_ISA ("pclmul", OPT_mpclmul
),
4328 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt
),
4329 IX86_ATTR_ISA ("sse", OPT_msse
),
4330 IX86_ATTR_ISA ("sse2", OPT_msse2
),
4331 IX86_ATTR_ISA ("sse3", OPT_msse3
),
4332 IX86_ATTR_ISA ("sse4", OPT_msse4
),
4333 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1
),
4334 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2
),
4335 IX86_ATTR_ISA ("sse4a", OPT_msse4a
),
4336 IX86_ATTR_ISA ("ssse3", OPT_mssse3
),
4337 IX86_ATTR_ISA ("fma4", OPT_mfma4
),
4338 IX86_ATTR_ISA ("fma", OPT_mfma
),
4339 IX86_ATTR_ISA ("xop", OPT_mxop
),
4340 IX86_ATTR_ISA ("lwp", OPT_mlwp
),
4341 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase
),
4342 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd
),
4343 IX86_ATTR_ISA ("f16c", OPT_mf16c
),
4346 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_
),
4348 /* string options */
4349 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH
),
4350 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE
),
4353 IX86_ATTR_YES ("cld",
4357 IX86_ATTR_NO ("fancy-math-387",
4358 OPT_mfancy_math_387
,
4359 MASK_NO_FANCY_MATH_387
),
4361 IX86_ATTR_YES ("ieee-fp",
4365 IX86_ATTR_YES ("inline-all-stringops",
4366 OPT_minline_all_stringops
,
4367 MASK_INLINE_ALL_STRINGOPS
),
4369 IX86_ATTR_YES ("inline-stringops-dynamically",
4370 OPT_minline_stringops_dynamically
,
4371 MASK_INLINE_STRINGOPS_DYNAMICALLY
),
4373 IX86_ATTR_NO ("align-stringops",
4374 OPT_mno_align_stringops
,
4375 MASK_NO_ALIGN_STRINGOPS
),
4377 IX86_ATTR_YES ("recip",
4383 /* If this is a list, recurse to get the options. */
4384 if (TREE_CODE (args
) == TREE_LIST
)
4388 for (; args
; args
= TREE_CHAIN (args
))
4389 if (TREE_VALUE (args
)
4390 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args
),
4391 p_strings
, enum_opts_set
))
4397 else if (TREE_CODE (args
) != STRING_CST
)
4400 /* Handle multiple arguments separated by commas. */
4401 next_optstr
= ASTRDUP (TREE_STRING_POINTER (args
));
4403 while (next_optstr
&& *next_optstr
!= '\0')
4405 char *p
= next_optstr
;
4407 char *comma
= strchr (next_optstr
, ',');
4408 const char *opt_string
;
4409 size_t len
, opt_len
;
4414 enum ix86_opt_type type
= ix86_opt_unknown
;
4420 len
= comma
- next_optstr
;
4421 next_optstr
= comma
+ 1;
4429 /* Recognize no-xxx. */
4430 if (len
> 3 && p
[0] == 'n' && p
[1] == 'o' && p
[2] == '-')
4439 /* Find the option. */
4442 for (i
= 0; i
< ARRAY_SIZE (attrs
); i
++)
4444 type
= attrs
[i
].type
;
4445 opt_len
= attrs
[i
].len
;
4446 if (ch
== attrs
[i
].string
[0]
4447 && ((type
!= ix86_opt_str
&& type
!= ix86_opt_enum
)
4450 && memcmp (p
, attrs
[i
].string
, opt_len
) == 0)
4453 mask
= attrs
[i
].mask
;
4454 opt_string
= attrs
[i
].string
;
4459 /* Process the option. */
4462 error ("attribute(target(\"%s\")) is unknown", orig_p
);
4466 else if (type
== ix86_opt_isa
)
4468 struct cl_decoded_option decoded
;
4470 generate_option (opt
, NULL
, opt_set_p
, CL_TARGET
, &decoded
);
4471 ix86_handle_option (&global_options
, &global_options_set
,
4472 &decoded
, input_location
);
4475 else if (type
== ix86_opt_yes
|| type
== ix86_opt_no
)
4477 if (type
== ix86_opt_no
)
4478 opt_set_p
= !opt_set_p
;
4481 target_flags
|= mask
;
4483 target_flags
&= ~mask
;
4486 else if (type
== ix86_opt_str
)
4490 error ("option(\"%s\") was already specified", opt_string
);
4494 p_strings
[opt
] = xstrdup (p
+ opt_len
);
4497 else if (type
== ix86_opt_enum
)
4502 arg_ok
= opt_enum_arg_to_value (opt
, p
+ opt_len
, &value
, CL_TARGET
);
4504 set_option (&global_options
, enum_opts_set
, opt
, value
,
4505 p
+ opt_len
, DK_UNSPECIFIED
, input_location
,
4509 error ("attribute(target(\"%s\")) is unknown", orig_p
);
4521 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4524 ix86_valid_target_attribute_tree (tree args
)
4526 const char *orig_arch_string
= ix86_arch_string
;
4527 const char *orig_tune_string
= ix86_tune_string
;
4528 enum fpmath_unit orig_fpmath_set
= global_options_set
.x_ix86_fpmath
;
4529 int orig_tune_defaulted
= ix86_tune_defaulted
;
4530 int orig_arch_specified
= ix86_arch_specified
;
4531 char *option_strings
[IX86_FUNCTION_SPECIFIC_MAX
] = { NULL
, NULL
};
4534 struct cl_target_option
*def
4535 = TREE_TARGET_OPTION (target_option_default_node
);
4536 struct gcc_options enum_opts_set
;
4538 memset (&enum_opts_set
, 0, sizeof (enum_opts_set
));
4540 /* Process each of the options on the chain. */
4541 if (! ix86_valid_target_attribute_inner_p (args
, option_strings
,
4545 /* If the changed options are different from the default, rerun
4546 ix86_option_override_internal, and then save the options away.
4547 The string options are are attribute options, and will be undone
4548 when we copy the save structure. */
4549 if (ix86_isa_flags
!= def
->x_ix86_isa_flags
4550 || target_flags
!= def
->x_target_flags
4551 || option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
]
4552 || option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
]
4553 || enum_opts_set
.x_ix86_fpmath
)
4555 /* If we are using the default tune= or arch=, undo the string assigned,
4556 and use the default. */
4557 if (option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
])
4558 ix86_arch_string
= option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
];
4559 else if (!orig_arch_specified
)
4560 ix86_arch_string
= NULL
;
4562 if (option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
])
4563 ix86_tune_string
= option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
];
4564 else if (orig_tune_defaulted
)
4565 ix86_tune_string
= NULL
;
4567 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4568 if (enum_opts_set
.x_ix86_fpmath
)
4569 global_options_set
.x_ix86_fpmath
= (enum fpmath_unit
) 1;
4570 else if (!TARGET_64BIT
&& TARGET_SSE
)
4572 ix86_fpmath
= (enum fpmath_unit
) (FPMATH_SSE
| FPMATH_387
);
4573 global_options_set
.x_ix86_fpmath
= (enum fpmath_unit
) 1;
4576 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4577 ix86_option_override_internal (false);
4579 /* Add any builtin functions with the new isa if any. */
4580 ix86_add_new_builtins (ix86_isa_flags
);
4582 /* Save the current options unless we are validating options for
4584 t
= build_target_option_node ();
4586 ix86_arch_string
= orig_arch_string
;
4587 ix86_tune_string
= orig_tune_string
;
4588 global_options_set
.x_ix86_fpmath
= orig_fpmath_set
;
4590 /* Free up memory allocated to hold the strings */
4591 for (i
= 0; i
< IX86_FUNCTION_SPECIFIC_MAX
; i
++)
4592 free (option_strings
[i
]);
4598 /* Hook to validate attribute((target("string"))). */
4601 ix86_valid_target_attribute_p (tree fndecl
,
4602 tree
ARG_UNUSED (name
),
4604 int ARG_UNUSED (flags
))
4606 struct cl_target_option cur_target
;
4608 tree old_optimize
= build_optimization_node ();
4609 tree new_target
, new_optimize
;
4610 tree func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
4612 /* If the function changed the optimization levels as well as setting target
4613 options, start with the optimizations specified. */
4614 if (func_optimize
&& func_optimize
!= old_optimize
)
4615 cl_optimization_restore (&global_options
,
4616 TREE_OPTIMIZATION (func_optimize
));
4618 /* The target attributes may also change some optimization flags, so update
4619 the optimization options if necessary. */
4620 cl_target_option_save (&cur_target
, &global_options
);
4621 new_target
= ix86_valid_target_attribute_tree (args
);
4622 new_optimize
= build_optimization_node ();
4629 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = new_target
;
4631 if (old_optimize
!= new_optimize
)
4632 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
4635 cl_target_option_restore (&global_options
, &cur_target
);
4637 if (old_optimize
!= new_optimize
)
4638 cl_optimization_restore (&global_options
,
4639 TREE_OPTIMIZATION (old_optimize
));
4645 /* Hook to determine if one function can safely inline another. */
4648 ix86_can_inline_p (tree caller
, tree callee
)
4651 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
4652 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
4654 /* If callee has no option attributes, then it is ok to inline. */
4658 /* If caller has no option attributes, but callee does then it is not ok to
4660 else if (!caller_tree
)
4665 struct cl_target_option
*caller_opts
= TREE_TARGET_OPTION (caller_tree
);
4666 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
4668 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4669 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4671 if ((caller_opts
->x_ix86_isa_flags
& callee_opts
->x_ix86_isa_flags
)
4672 != callee_opts
->x_ix86_isa_flags
)
4675 /* See if we have the same non-isa options. */
4676 else if (caller_opts
->x_target_flags
!= callee_opts
->x_target_flags
)
4679 /* See if arch, tune, etc. are the same. */
4680 else if (caller_opts
->arch
!= callee_opts
->arch
)
4683 else if (caller_opts
->tune
!= callee_opts
->tune
)
4686 else if (caller_opts
->x_ix86_fpmath
!= callee_opts
->x_ix86_fpmath
)
4689 else if (caller_opts
->branch_cost
!= callee_opts
->branch_cost
)
4700 /* Remember the last target of ix86_set_current_function. */
4701 static GTY(()) tree ix86_previous_fndecl
;
4703 /* Establish appropriate back-end context for processing the function
4704 FNDECL. The argument might be NULL to indicate processing at top
4705 level, outside of any function scope. */
4707 ix86_set_current_function (tree fndecl
)
4709 /* Only change the context if the function changes. This hook is called
4710 several times in the course of compiling a function, and we don't want to
4711 slow things down too much or call target_reinit when it isn't safe. */
4712 if (fndecl
&& fndecl
!= ix86_previous_fndecl
)
4714 tree old_tree
= (ix86_previous_fndecl
4715 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl
)
4718 tree new_tree
= (fndecl
4719 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl
)
4722 ix86_previous_fndecl
= fndecl
;
4723 if (old_tree
== new_tree
)
4728 cl_target_option_restore (&global_options
,
4729 TREE_TARGET_OPTION (new_tree
));
4735 struct cl_target_option
*def
4736 = TREE_TARGET_OPTION (target_option_current_node
);
4738 cl_target_option_restore (&global_options
, def
);
4745 /* Return true if this goes in large data/bss. */
4748 ix86_in_large_data_p (tree exp
)
4750 if (ix86_cmodel
!= CM_MEDIUM
&& ix86_cmodel
!= CM_MEDIUM_PIC
)
4753 /* Functions are never large data. */
4754 if (TREE_CODE (exp
) == FUNCTION_DECL
)
4757 if (TREE_CODE (exp
) == VAR_DECL
&& DECL_SECTION_NAME (exp
))
4759 const char *section
= TREE_STRING_POINTER (DECL_SECTION_NAME (exp
));
4760 if (strcmp (section
, ".ldata") == 0
4761 || strcmp (section
, ".lbss") == 0)
4767 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (exp
));
4769 /* If this is an incomplete type with size 0, then we can't put it
4770 in data because it might be too big when completed. */
4771 if (!size
|| size
> ix86_section_threshold
)
4778 /* Switch to the appropriate section for output of DECL.
4779 DECL is either a `VAR_DECL' node or a constant of some sort.
4780 RELOC indicates whether forming the initial value of DECL requires
4781 link-time relocations. */
4783 static section
* x86_64_elf_select_section (tree
, int, unsigned HOST_WIDE_INT
)
4787 x86_64_elf_select_section (tree decl
, int reloc
,
4788 unsigned HOST_WIDE_INT align
)
4790 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4791 && ix86_in_large_data_p (decl
))
4793 const char *sname
= NULL
;
4794 unsigned int flags
= SECTION_WRITE
;
4795 switch (categorize_decl_for_section (decl
, reloc
))
4800 case SECCAT_DATA_REL
:
4801 sname
= ".ldata.rel";
4803 case SECCAT_DATA_REL_LOCAL
:
4804 sname
= ".ldata.rel.local";
4806 case SECCAT_DATA_REL_RO
:
4807 sname
= ".ldata.rel.ro";
4809 case SECCAT_DATA_REL_RO_LOCAL
:
4810 sname
= ".ldata.rel.ro.local";
4814 flags
|= SECTION_BSS
;
4817 case SECCAT_RODATA_MERGE_STR
:
4818 case SECCAT_RODATA_MERGE_STR_INIT
:
4819 case SECCAT_RODATA_MERGE_CONST
:
4823 case SECCAT_SRODATA
:
4830 /* We don't split these for medium model. Place them into
4831 default sections and hope for best. */
4836 /* We might get called with string constants, but get_named_section
4837 doesn't like them as they are not DECLs. Also, we need to set
4838 flags in that case. */
4840 return get_section (sname
, flags
, NULL
);
4841 return get_named_section (decl
, sname
, reloc
);
4844 return default_elf_select_section (decl
, reloc
, align
);
4847 /* Build up a unique section name, expressed as a
4848 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4849 RELOC indicates whether the initial value of EXP requires
4850 link-time relocations. */
4852 static void ATTRIBUTE_UNUSED
4853 x86_64_elf_unique_section (tree decl
, int reloc
)
4855 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4856 && ix86_in_large_data_p (decl
))
4858 const char *prefix
= NULL
;
4859 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4860 bool one_only
= DECL_ONE_ONLY (decl
) && !HAVE_COMDAT_GROUP
;
4862 switch (categorize_decl_for_section (decl
, reloc
))
4865 case SECCAT_DATA_REL
:
4866 case SECCAT_DATA_REL_LOCAL
:
4867 case SECCAT_DATA_REL_RO
:
4868 case SECCAT_DATA_REL_RO_LOCAL
:
4869 prefix
= one_only
? ".ld" : ".ldata";
4872 prefix
= one_only
? ".lb" : ".lbss";
4875 case SECCAT_RODATA_MERGE_STR
:
4876 case SECCAT_RODATA_MERGE_STR_INIT
:
4877 case SECCAT_RODATA_MERGE_CONST
:
4878 prefix
= one_only
? ".lr" : ".lrodata";
4880 case SECCAT_SRODATA
:
4887 /* We don't split these for medium model. Place them into
4888 default sections and hope for best. */
4893 const char *name
, *linkonce
;
4896 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
4897 name
= targetm
.strip_name_encoding (name
);
4899 /* If we're using one_only, then there needs to be a .gnu.linkonce
4900 prefix to the section name. */
4901 linkonce
= one_only
? ".gnu.linkonce" : "";
4903 string
= ACONCAT ((linkonce
, prefix
, ".", name
, NULL
));
4905 DECL_SECTION_NAME (decl
) = build_string (strlen (string
), string
);
4909 default_unique_section (decl
, reloc
);
4912 #ifdef COMMON_ASM_OP
4913 /* This says how to output assembler code to declare an
4914 uninitialized external linkage data object.
4916 For medium model x86-64 we need to use .largecomm opcode for
4919 x86_elf_aligned_common (FILE *file
,
4920 const char *name
, unsigned HOST_WIDE_INT size
,
4923 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4924 && size
> (unsigned int)ix86_section_threshold
)
4925 fputs (".largecomm\t", file
);
4927 fputs (COMMON_ASM_OP
, file
);
4928 assemble_name (file
, name
);
4929 fprintf (file
, "," HOST_WIDE_INT_PRINT_UNSIGNED
",%u\n",
4930 size
, align
/ BITS_PER_UNIT
);
4934 /* Utility function for targets to use in implementing
4935 ASM_OUTPUT_ALIGNED_BSS. */
4938 x86_output_aligned_bss (FILE *file
, tree decl ATTRIBUTE_UNUSED
,
4939 const char *name
, unsigned HOST_WIDE_INT size
,
4942 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4943 && size
> (unsigned int)ix86_section_threshold
)
4944 switch_to_section (get_named_section (decl
, ".lbss", 0));
4946 switch_to_section (bss_section
);
4947 ASM_OUTPUT_ALIGN (file
, floor_log2 (align
/ BITS_PER_UNIT
));
4948 #ifdef ASM_DECLARE_OBJECT_NAME
4949 last_assemble_variable_decl
= decl
;
4950 ASM_DECLARE_OBJECT_NAME (file
, name
, decl
);
4952 /* Standard thing is just output label for the object. */
4953 ASM_OUTPUT_LABEL (file
, name
);
4954 #endif /* ASM_DECLARE_OBJECT_NAME */
4955 ASM_OUTPUT_SKIP (file
, size
? size
: 1);
4958 /* Decide whether we must probe the stack before any space allocation
4959 on this target. It's essentially TARGET_STACK_PROBE except when
4960 -fstack-check causes the stack to be already probed differently. */
4963 ix86_target_stack_probe (void)
4965 /* Do not probe the stack twice if static stack checking is enabled. */
4966 if (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
4969 return TARGET_STACK_PROBE
;
4972 /* Decide whether we can make a sibling call to a function. DECL is the
4973 declaration of the function being targeted by the call and EXP is the
4974 CALL_EXPR representing the call. */
4977 ix86_function_ok_for_sibcall (tree decl
, tree exp
)
4979 tree type
, decl_or_type
;
4982 /* If we are generating position-independent code, we cannot sibcall
4983 optimize any indirect call, or a direct call to a global function,
4984 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
4988 && (!decl
|| !targetm
.binds_local_p (decl
)))
4991 /* If we need to align the outgoing stack, then sibcalling would
4992 unalign the stack, which may break the called function. */
4993 if (ix86_minimum_incoming_stack_boundary (true)
4994 < PREFERRED_STACK_BOUNDARY
)
4999 decl_or_type
= decl
;
5000 type
= TREE_TYPE (decl
);
5004 /* We're looking at the CALL_EXPR, we need the type of the function. */
5005 type
= CALL_EXPR_FN (exp
); /* pointer expression */
5006 type
= TREE_TYPE (type
); /* pointer type */
5007 type
= TREE_TYPE (type
); /* function type */
5008 decl_or_type
= type
;
5011 /* Check that the return value locations are the same. Like
5012 if we are returning floats on the 80387 register stack, we cannot
5013 make a sibcall from a function that doesn't return a float to a
5014 function that does or, conversely, from a function that does return
5015 a float to a function that doesn't; the necessary stack adjustment
5016 would not be executed. This is also the place we notice
5017 differences in the return value ABI. Note that it is ok for one
5018 of the functions to have void return type as long as the return
5019 value of the other is passed in a register. */
5020 a
= ix86_function_value (TREE_TYPE (exp
), decl_or_type
, false);
5021 b
= ix86_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
5023 if (STACK_REG_P (a
) || STACK_REG_P (b
))
5025 if (!rtx_equal_p (a
, b
))
5028 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
5030 /* Disable sibcall if we need to generate vzeroupper after
5032 if (TARGET_VZEROUPPER
5033 && cfun
->machine
->callee_return_avx256_p
5034 && !cfun
->machine
->caller_return_avx256_p
)
5037 else if (!rtx_equal_p (a
, b
))
5042 /* The SYSV ABI has more call-clobbered registers;
5043 disallow sibcalls from MS to SYSV. */
5044 if (cfun
->machine
->call_abi
== MS_ABI
5045 && ix86_function_type_abi (type
) == SYSV_ABI
)
5050 /* If this call is indirect, we'll need to be able to use a
5051 call-clobbered register for the address of the target function.
5052 Make sure that all such registers are not used for passing
5053 parameters. Note that DLLIMPORT functions are indirect. */
5055 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES
&& DECL_DLLIMPORT_P (decl
)))
5057 if (ix86_function_regparm (type
, NULL
) >= 3)
5059 /* ??? Need to count the actual number of registers to be used,
5060 not the possible number of registers. Fix later. */
5066 /* Otherwise okay. That also includes certain types of indirect calls. */
5070 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5071 and "sseregparm" calling convention attributes;
5072 arguments as in struct attribute_spec.handler. */
5075 ix86_handle_cconv_attribute (tree
*node
, tree name
,
5077 int flags ATTRIBUTE_UNUSED
,
5080 if (TREE_CODE (*node
) != FUNCTION_TYPE
5081 && TREE_CODE (*node
) != METHOD_TYPE
5082 && TREE_CODE (*node
) != FIELD_DECL
5083 && TREE_CODE (*node
) != TYPE_DECL
)
5085 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
5087 *no_add_attrs
= true;
5091 /* Can combine regparm with all attributes but fastcall, and thiscall. */
5092 if (is_attribute_p ("regparm", name
))
5096 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5098 error ("fastcall and regparm attributes are not compatible");
5101 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5103 error ("regparam and thiscall attributes are not compatible");
5106 cst
= TREE_VALUE (args
);
5107 if (TREE_CODE (cst
) != INTEGER_CST
)
5109 warning (OPT_Wattributes
,
5110 "%qE attribute requires an integer constant argument",
5112 *no_add_attrs
= true;
5114 else if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
5116 warning (OPT_Wattributes
, "argument to %qE attribute larger than %d",
5118 *no_add_attrs
= true;
5126 /* Do not warn when emulating the MS ABI. */
5127 if ((TREE_CODE (*node
) != FUNCTION_TYPE
5128 && TREE_CODE (*node
) != METHOD_TYPE
)
5129 || ix86_function_type_abi (*node
) != MS_ABI
)
5130 warning (OPT_Wattributes
, "%qE attribute ignored",
5132 *no_add_attrs
= true;
5136 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5137 if (is_attribute_p ("fastcall", name
))
5139 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
5141 error ("fastcall and cdecl attributes are not compatible");
5143 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
5145 error ("fastcall and stdcall attributes are not compatible");
5147 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node
)))
5149 error ("fastcall and regparm attributes are not compatible");
5151 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5153 error ("fastcall and thiscall attributes are not compatible");
5157 /* Can combine stdcall with fastcall (redundant), regparm and
5159 else if (is_attribute_p ("stdcall", name
))
5161 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
5163 error ("stdcall and cdecl attributes are not compatible");
5165 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5167 error ("stdcall and fastcall attributes are not compatible");
5169 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5171 error ("stdcall and thiscall attributes are not compatible");
5175 /* Can combine cdecl with regparm and sseregparm. */
5176 else if (is_attribute_p ("cdecl", name
))
5178 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
5180 error ("stdcall and cdecl attributes are not compatible");
5182 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5184 error ("fastcall and cdecl attributes are not compatible");
5186 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5188 error ("cdecl and thiscall attributes are not compatible");
5191 else if (is_attribute_p ("thiscall", name
))
5193 if (TREE_CODE (*node
) != METHOD_TYPE
&& pedantic
)
5194 warning (OPT_Wattributes
, "%qE attribute is used for none class-method",
5196 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
5198 error ("stdcall and thiscall attributes are not compatible");
5200 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5202 error ("fastcall and thiscall attributes are not compatible");
5204 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
5206 error ("cdecl and thiscall attributes are not compatible");
5210 /* Can combine sseregparm with all attributes. */
5215 /* The transactional memory builtins are implicitly regparm or fastcall
5216 depending on the ABI. Override the generic do-nothing attribute that
5217 these builtins were declared with, and replace it with one of the two
5218 attributes that we expect elsewhere. */
5221 ix86_handle_tm_regparm_attribute (tree
*node
, tree name ATTRIBUTE_UNUSED
,
5222 tree args ATTRIBUTE_UNUSED
,
5223 int flags ATTRIBUTE_UNUSED
,
5228 /* In no case do we want to add the placeholder attribute. */
5229 *no_add_attrs
= true;
5231 /* The 64-bit ABI is unchanged for transactional memory. */
5235 /* ??? Is there a better way to validate 32-bit windows? We have
5236 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5237 if (CHECK_STACK_LIMIT
> 0)
5238 alt
= tree_cons (get_identifier ("fastcall"), NULL
, NULL
);
5241 alt
= tree_cons (NULL
, build_int_cst (NULL
, 2), NULL
);
5242 alt
= tree_cons (get_identifier ("regparm"), alt
, NULL
);
5244 decl_attributes (node
, alt
, flags
);
5249 /* This function determines from TYPE the calling-convention. */
5252 ix86_get_callcvt (const_tree type
)
5254 unsigned int ret
= 0;
5259 return IX86_CALLCVT_CDECL
;
5261 attrs
= TYPE_ATTRIBUTES (type
);
5262 if (attrs
!= NULL_TREE
)
5264 if (lookup_attribute ("cdecl", attrs
))
5265 ret
|= IX86_CALLCVT_CDECL
;
5266 else if (lookup_attribute ("stdcall", attrs
))
5267 ret
|= IX86_CALLCVT_STDCALL
;
5268 else if (lookup_attribute ("fastcall", attrs
))
5269 ret
|= IX86_CALLCVT_FASTCALL
;
5270 else if (lookup_attribute ("thiscall", attrs
))
5271 ret
|= IX86_CALLCVT_THISCALL
;
5273 /* Regparam isn't allowed for thiscall and fastcall. */
5274 if ((ret
& (IX86_CALLCVT_THISCALL
| IX86_CALLCVT_FASTCALL
)) == 0)
5276 if (lookup_attribute ("regparm", attrs
))
5277 ret
|= IX86_CALLCVT_REGPARM
;
5278 if (lookup_attribute ("sseregparm", attrs
))
5279 ret
|= IX86_CALLCVT_SSEREGPARM
;
5282 if (IX86_BASE_CALLCVT(ret
) != 0)
5286 is_stdarg
= stdarg_p (type
);
5287 if (TARGET_RTD
&& !is_stdarg
)
5288 return IX86_CALLCVT_STDCALL
| ret
;
5292 || TREE_CODE (type
) != METHOD_TYPE
5293 || ix86_function_type_abi (type
) != MS_ABI
)
5294 return IX86_CALLCVT_CDECL
| ret
;
5296 return IX86_CALLCVT_THISCALL
;
5299 /* Return 0 if the attributes for two types are incompatible, 1 if they
5300 are compatible, and 2 if they are nearly compatible (which causes a
5301 warning to be generated). */
5304 ix86_comp_type_attributes (const_tree type1
, const_tree type2
)
5306 unsigned int ccvt1
, ccvt2
;
5308 if (TREE_CODE (type1
) != FUNCTION_TYPE
5309 && TREE_CODE (type1
) != METHOD_TYPE
)
5312 ccvt1
= ix86_get_callcvt (type1
);
5313 ccvt2
= ix86_get_callcvt (type2
);
5316 if (ix86_function_regparm (type1
, NULL
)
5317 != ix86_function_regparm (type2
, NULL
))
5323 /* Return the regparm value for a function with the indicated TYPE and DECL.
5324 DECL may be NULL when calling function indirectly
5325 or considering a libcall. */
5328 ix86_function_regparm (const_tree type
, const_tree decl
)
5335 return (ix86_function_type_abi (type
) == SYSV_ABI
5336 ? X86_64_REGPARM_MAX
: X86_64_MS_REGPARM_MAX
);
5337 ccvt
= ix86_get_callcvt (type
);
5338 regparm
= ix86_regparm
;
5340 if ((ccvt
& IX86_CALLCVT_REGPARM
) != 0)
5342 attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (type
));
5345 regparm
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
5349 else if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
5351 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
5354 /* Use register calling convention for local functions when possible. */
5356 && TREE_CODE (decl
) == FUNCTION_DECL
5358 && !(profile_flag
&& !flag_fentry
))
5360 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5361 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE (decl
));
5362 if (i
&& i
->local
&& i
->can_change_signature
)
5364 int local_regparm
, globals
= 0, regno
;
5366 /* Make sure no regparm register is taken by a
5367 fixed register variable. */
5368 for (local_regparm
= 0; local_regparm
< REGPARM_MAX
; local_regparm
++)
5369 if (fixed_regs
[local_regparm
])
5372 /* We don't want to use regparm(3) for nested functions as
5373 these use a static chain pointer in the third argument. */
5374 if (local_regparm
== 3 && DECL_STATIC_CHAIN (decl
))
5377 /* In 32-bit mode save a register for the split stack. */
5378 if (!TARGET_64BIT
&& local_regparm
== 3 && flag_split_stack
)
5381 /* Each fixed register usage increases register pressure,
5382 so less registers should be used for argument passing.
5383 This functionality can be overriden by an explicit
5385 for (regno
= 0; regno
<= DI_REG
; regno
++)
5386 if (fixed_regs
[regno
])
5390 = globals
< local_regparm
? local_regparm
- globals
: 0;
5392 if (local_regparm
> regparm
)
5393 regparm
= local_regparm
;
5400 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5401 DFmode (2) arguments in SSE registers for a function with the
5402 indicated TYPE and DECL. DECL may be NULL when calling function
5403 indirectly or considering a libcall. Otherwise return 0. */
5406 ix86_function_sseregparm (const_tree type
, const_tree decl
, bool warn
)
5408 gcc_assert (!TARGET_64BIT
);
5410 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5411 by the sseregparm attribute. */
5412 if (TARGET_SSEREGPARM
5413 || (type
&& lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type
))))
5420 error ("calling %qD with attribute sseregparm without "
5421 "SSE/SSE2 enabled", decl
);
5423 error ("calling %qT with attribute sseregparm without "
5424 "SSE/SSE2 enabled", type
);
5432 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5433 (and DFmode for SSE2) arguments in SSE registers. */
5434 if (decl
&& TARGET_SSE_MATH
&& optimize
5435 && !(profile_flag
&& !flag_fentry
))
5437 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5438 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE(decl
));
5439 if (i
&& i
->local
&& i
->can_change_signature
)
5440 return TARGET_SSE2
? 2 : 1;
5446 /* Return true if EAX is live at the start of the function. Used by
5447 ix86_expand_prologue to determine if we need special help before
5448 calling allocate_stack_worker. */
5451 ix86_eax_live_at_start_p (void)
5453 /* Cheat. Don't bother working forward from ix86_function_regparm
5454 to the function type to whether an actual argument is located in
5455 eax. Instead just look at cfg info, which is still close enough
5456 to correct at this point. This gives false positives for broken
5457 functions that might use uninitialized data that happens to be
5458 allocated in eax, but who cares? */
5459 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR
), 0);
5463 ix86_keep_aggregate_return_pointer (tree fntype
)
5469 attr
= lookup_attribute ("callee_pop_aggregate_return",
5470 TYPE_ATTRIBUTES (fntype
));
5472 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
))) == 0);
5474 /* For 32-bit MS-ABI the default is to keep aggregate
5476 if (ix86_function_type_abi (fntype
) == MS_ABI
)
5479 return KEEP_AGGREGATE_RETURN_POINTER
!= 0;
5482 /* Value is the number of bytes of arguments automatically
5483 popped when returning from a subroutine call.
5484 FUNDECL is the declaration node of the function (as a tree),
5485 FUNTYPE is the data type of the function (as a tree),
5486 or for a library call it is an identifier node for the subroutine name.
5487 SIZE is the number of bytes of arguments passed on the stack.
5489 On the 80386, the RTD insn may be used to pop them if the number
5490 of args is fixed, but if the number is variable then the caller
5491 must pop them all. RTD can't be used for library calls now
5492 because the library is compiled with the Unix compiler.
5493 Use of RTD is a selectable option, since it is incompatible with
5494 standard Unix calling sequences. If the option is not selected,
5495 the caller must always pop the args.
5497 The attribute stdcall is equivalent to RTD on a per module basis. */
5500 ix86_return_pops_args (tree fundecl
, tree funtype
, int size
)
5504 /* None of the 64-bit ABIs pop arguments. */
5508 ccvt
= ix86_get_callcvt (funtype
);
5510 if ((ccvt
& (IX86_CALLCVT_STDCALL
| IX86_CALLCVT_FASTCALL
5511 | IX86_CALLCVT_THISCALL
)) != 0
5512 && ! stdarg_p (funtype
))
5515 /* Lose any fake structure return argument if it is passed on the stack. */
5516 if (aggregate_value_p (TREE_TYPE (funtype
), fundecl
)
5517 && !ix86_keep_aggregate_return_pointer (funtype
))
5519 int nregs
= ix86_function_regparm (funtype
, fundecl
);
5521 return GET_MODE_SIZE (Pmode
);
5527 /* Argument support functions. */
5529 /* Return true when register may be used to pass function parameters. */
5531 ix86_function_arg_regno_p (int regno
)
5534 const int *parm_regs
;
5539 return (regno
< REGPARM_MAX
5540 || (TARGET_SSE
&& SSE_REGNO_P (regno
) && !fixed_regs
[regno
]));
5542 return (regno
< REGPARM_MAX
5543 || (TARGET_MMX
&& MMX_REGNO_P (regno
)
5544 && (regno
< FIRST_MMX_REG
+ MMX_REGPARM_MAX
))
5545 || (TARGET_SSE
&& SSE_REGNO_P (regno
)
5546 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
)));
5551 if (SSE_REGNO_P (regno
) && TARGET_SSE
)
5556 if (TARGET_SSE
&& SSE_REGNO_P (regno
)
5557 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
))
5561 /* TODO: The function should depend on current function ABI but
5562 builtins.c would need updating then. Therefore we use the
5565 /* RAX is used as hidden argument to va_arg functions. */
5566 if (ix86_abi
== SYSV_ABI
&& regno
== AX_REG
)
5569 if (ix86_abi
== MS_ABI
)
5570 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
5572 parm_regs
= x86_64_int_parameter_registers
;
5573 for (i
= 0; i
< (ix86_abi
== MS_ABI
5574 ? X86_64_MS_REGPARM_MAX
: X86_64_REGPARM_MAX
); i
++)
5575 if (regno
== parm_regs
[i
])
5580 /* Return if we do not know how to pass TYPE solely in registers. */
5583 ix86_must_pass_in_stack (enum machine_mode mode
, const_tree type
)
5585 if (must_pass_in_stack_var_size_or_pad (mode
, type
))
5588 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5589 The layout_type routine is crafty and tries to trick us into passing
5590 currently unsupported vector types on the stack by using TImode. */
5591 return (!TARGET_64BIT
&& mode
== TImode
5592 && type
&& TREE_CODE (type
) != VECTOR_TYPE
);
5595 /* It returns the size, in bytes, of the area reserved for arguments passed
5596 in registers for the function represented by fndecl dependent to the used
5599 ix86_reg_parm_stack_space (const_tree fndecl
)
5601 enum calling_abi call_abi
= SYSV_ABI
;
5602 if (fndecl
!= NULL_TREE
&& TREE_CODE (fndecl
) == FUNCTION_DECL
)
5603 call_abi
= ix86_function_abi (fndecl
);
5605 call_abi
= ix86_function_type_abi (fndecl
);
5606 if (TARGET_64BIT
&& call_abi
== MS_ABI
)
5611 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5614 ix86_function_type_abi (const_tree fntype
)
5616 if (fntype
!= NULL_TREE
&& TYPE_ATTRIBUTES (fntype
) != NULL_TREE
)
5618 enum calling_abi abi
= ix86_abi
;
5619 if (abi
== SYSV_ABI
)
5621 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype
)))
5624 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype
)))
5632 ix86_function_ms_hook_prologue (const_tree fn
)
5634 if (fn
&& lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn
)))
5636 if (decl_function_context (fn
) != NULL_TREE
)
5637 error_at (DECL_SOURCE_LOCATION (fn
),
5638 "ms_hook_prologue is not compatible with nested function");
5645 static enum calling_abi
5646 ix86_function_abi (const_tree fndecl
)
5650 return ix86_function_type_abi (TREE_TYPE (fndecl
));
5653 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
5656 ix86_cfun_abi (void)
5660 return cfun
->machine
->call_abi
;
5663 /* Write the extra assembler code needed to declare a function properly. */
5666 ix86_asm_output_function_label (FILE *asm_out_file
, const char *fname
,
5669 bool is_ms_hook
= ix86_function_ms_hook_prologue (decl
);
5673 int i
, filler_count
= (TARGET_64BIT
? 32 : 16);
5674 unsigned int filler_cc
= 0xcccccccc;
5676 for (i
= 0; i
< filler_count
; i
+= 4)
5677 fprintf (asm_out_file
, ASM_LONG
" %#x\n", filler_cc
);
5680 #ifdef SUBTARGET_ASM_UNWIND_INIT
5681 SUBTARGET_ASM_UNWIND_INIT (asm_out_file
);
5684 ASM_OUTPUT_LABEL (asm_out_file
, fname
);
5686 /* Output magic byte marker, if hot-patch attribute is set. */
5691 /* leaq [%rsp + 0], %rsp */
5692 asm_fprintf (asm_out_file
, ASM_BYTE
5693 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
5697 /* movl.s %edi, %edi
5699 movl.s %esp, %ebp */
5700 asm_fprintf (asm_out_file
, ASM_BYTE
5701 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
5707 extern void init_regs (void);
5709 /* Implementation of call abi switching target hook. Specific to FNDECL
5710 the specific call register sets are set. See also
5711 ix86_conditional_register_usage for more details. */
5713 ix86_call_abi_override (const_tree fndecl
)
5715 if (fndecl
== NULL_TREE
)
5716 cfun
->machine
->call_abi
= ix86_abi
;
5718 cfun
->machine
->call_abi
= ix86_function_type_abi (TREE_TYPE (fndecl
));
5721 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
5722 expensive re-initialization of init_regs each time we switch function context
5723 since this is needed only during RTL expansion. */
5725 ix86_maybe_switch_abi (void)
5728 call_used_regs
[SI_REG
] == (cfun
->machine
->call_abi
== MS_ABI
))
5732 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5733 for a call to a function whose data type is FNTYPE.
5734 For a library call, FNTYPE is 0. */
5737 init_cumulative_args (CUMULATIVE_ARGS
*cum
, /* Argument info to initialize */
5738 tree fntype
, /* tree ptr for function decl */
5739 rtx libname
, /* SYMBOL_REF of library name or 0 */
5743 struct cgraph_local_info
*i
;
5746 memset (cum
, 0, sizeof (*cum
));
5748 /* Initialize for the current callee. */
5751 cfun
->machine
->callee_pass_avx256_p
= false;
5752 cfun
->machine
->callee_return_avx256_p
= false;
5757 i
= cgraph_local_info (fndecl
);
5758 cum
->call_abi
= ix86_function_abi (fndecl
);
5759 fnret_type
= TREE_TYPE (TREE_TYPE (fndecl
));
5764 cum
->call_abi
= ix86_function_type_abi (fntype
);
5766 fnret_type
= TREE_TYPE (fntype
);
5771 if (TARGET_VZEROUPPER
&& fnret_type
)
5773 rtx fnret_value
= ix86_function_value (fnret_type
, fntype
,
5775 if (function_pass_avx256_p (fnret_value
))
5777 /* The return value of this function uses 256bit AVX modes. */
5779 cfun
->machine
->callee_return_avx256_p
= true;
5781 cfun
->machine
->caller_return_avx256_p
= true;
5785 cum
->caller
= caller
;
5787 /* Set up the number of registers to use for passing arguments. */
5789 if (TARGET_64BIT
&& cum
->call_abi
== MS_ABI
&& !ACCUMULATE_OUTGOING_ARGS
)
5790 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
5791 "or subtarget optimization implying it");
5792 cum
->nregs
= ix86_regparm
;
5795 cum
->nregs
= (cum
->call_abi
== SYSV_ABI
5796 ? X86_64_REGPARM_MAX
5797 : X86_64_MS_REGPARM_MAX
);
5801 cum
->sse_nregs
= SSE_REGPARM_MAX
;
5804 cum
->sse_nregs
= (cum
->call_abi
== SYSV_ABI
5805 ? X86_64_SSE_REGPARM_MAX
5806 : X86_64_MS_SSE_REGPARM_MAX
);
5810 cum
->mmx_nregs
= MMX_REGPARM_MAX
;
5811 cum
->warn_avx
= true;
5812 cum
->warn_sse
= true;
5813 cum
->warn_mmx
= true;
5815 /* Because type might mismatch in between caller and callee, we need to
5816 use actual type of function for local calls.
5817 FIXME: cgraph_analyze can be told to actually record if function uses
5818 va_start so for local functions maybe_vaarg can be made aggressive
5820 FIXME: once typesytem is fixed, we won't need this code anymore. */
5821 if (i
&& i
->local
&& i
->can_change_signature
)
5822 fntype
= TREE_TYPE (fndecl
);
5823 cum
->maybe_vaarg
= (fntype
5824 ? (!prototype_p (fntype
) || stdarg_p (fntype
))
5829 /* If there are variable arguments, then we won't pass anything
5830 in registers in 32-bit mode. */
5831 if (stdarg_p (fntype
))
5842 /* Use ecx and edx registers if function has fastcall attribute,
5843 else look for regparm information. */
5846 unsigned int ccvt
= ix86_get_callcvt (fntype
);
5847 if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
5850 cum
->fastcall
= 1; /* Same first register as in fastcall. */
5852 else if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
5858 cum
->nregs
= ix86_function_regparm (fntype
, fndecl
);
5861 /* Set up the number of SSE registers used for passing SFmode
5862 and DFmode arguments. Warn for mismatching ABI. */
5863 cum
->float_in_sse
= ix86_function_sseregparm (fntype
, fndecl
, true);
5867 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
5868 But in the case of vector types, it is some vector mode.
5870 When we have only some of our vector isa extensions enabled, then there
5871 are some modes for which vector_mode_supported_p is false. For these
5872 modes, the generic vector support in gcc will choose some non-vector mode
5873 in order to implement the type. By computing the natural mode, we'll
5874 select the proper ABI location for the operand and not depend on whatever
5875 the middle-end decides to do with these vector types.
5877 The midde-end can't deal with the vector types > 16 bytes. In this
5878 case, we return the original mode and warn ABI change if CUM isn't
5881 static enum machine_mode
5882 type_natural_mode (const_tree type
, const CUMULATIVE_ARGS
*cum
)
5884 enum machine_mode mode
= TYPE_MODE (type
);
5886 if (TREE_CODE (type
) == VECTOR_TYPE
&& !VECTOR_MODE_P (mode
))
5888 HOST_WIDE_INT size
= int_size_in_bytes (type
);
5889 if ((size
== 8 || size
== 16 || size
== 32)
5890 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
5891 && TYPE_VECTOR_SUBPARTS (type
) > 1)
5893 enum machine_mode innermode
= TYPE_MODE (TREE_TYPE (type
));
5895 if (TREE_CODE (TREE_TYPE (type
)) == REAL_TYPE
)
5896 mode
= MIN_MODE_VECTOR_FLOAT
;
5898 mode
= MIN_MODE_VECTOR_INT
;
5900 /* Get the mode which has this inner mode and number of units. */
5901 for (; mode
!= VOIDmode
; mode
= GET_MODE_WIDER_MODE (mode
))
5902 if (GET_MODE_NUNITS (mode
) == TYPE_VECTOR_SUBPARTS (type
)
5903 && GET_MODE_INNER (mode
) == innermode
)
5905 if (size
== 32 && !TARGET_AVX
)
5907 static bool warnedavx
;
5914 warning (0, "AVX vector argument without AVX "
5915 "enabled changes the ABI");
5917 return TYPE_MODE (type
);
5930 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
5931 this may not agree with the mode that the type system has chosen for the
5932 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
5933 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
5936 gen_reg_or_parallel (enum machine_mode mode
, enum machine_mode orig_mode
,
5941 if (orig_mode
!= BLKmode
)
5942 tmp
= gen_rtx_REG (orig_mode
, regno
);
5945 tmp
= gen_rtx_REG (mode
, regno
);
5946 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
, const0_rtx
);
5947 tmp
= gen_rtx_PARALLEL (orig_mode
, gen_rtvec (1, tmp
));
5953 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
5954 of this code is to classify each 8bytes of incoming argument by the register
5955 class and assign registers accordingly. */
5957 /* Return the union class of CLASS1 and CLASS2.
5958 See the x86-64 PS ABI for details. */
5960 static enum x86_64_reg_class
5961 merge_classes (enum x86_64_reg_class class1
, enum x86_64_reg_class class2
)
5963 /* Rule #1: If both classes are equal, this is the resulting class. */
5964 if (class1
== class2
)
5967 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
5969 if (class1
== X86_64_NO_CLASS
)
5971 if (class2
== X86_64_NO_CLASS
)
5974 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
5975 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
5976 return X86_64_MEMORY_CLASS
;
5978 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
5979 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
5980 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
5981 return X86_64_INTEGERSI_CLASS
;
5982 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
5983 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
5984 return X86_64_INTEGER_CLASS
;
5986 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
5988 if (class1
== X86_64_X87_CLASS
5989 || class1
== X86_64_X87UP_CLASS
5990 || class1
== X86_64_COMPLEX_X87_CLASS
5991 || class2
== X86_64_X87_CLASS
5992 || class2
== X86_64_X87UP_CLASS
5993 || class2
== X86_64_COMPLEX_X87_CLASS
)
5994 return X86_64_MEMORY_CLASS
;
5996 /* Rule #6: Otherwise class SSE is used. */
5997 return X86_64_SSE_CLASS
;
6000 /* Classify the argument of type TYPE and mode MODE.
6001 CLASSES will be filled by the register class used to pass each word
6002 of the operand. The number of words is returned. In case the parameter
6003 should be passed in memory, 0 is returned. As a special case for zero
6004 sized containers, classes[0] will be NO_CLASS and 1 is returned.
6006 BIT_OFFSET is used internally for handling records and specifies offset
6007 of the offset in bits modulo 256 to avoid overflow cases.
6009 See the x86-64 PS ABI for details.
6013 classify_argument (enum machine_mode mode
, const_tree type
,
6014 enum x86_64_reg_class classes
[MAX_CLASSES
], int bit_offset
)
6016 HOST_WIDE_INT bytes
=
6017 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
6018 int words
= (bytes
+ (bit_offset
% 64) / 8 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
6020 /* Variable sized entities are always passed/returned in memory. */
6024 if (mode
!= VOIDmode
6025 && targetm
.calls
.must_pass_in_stack (mode
, type
))
6028 if (type
&& AGGREGATE_TYPE_P (type
))
6032 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
6034 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
6038 for (i
= 0; i
< words
; i
++)
6039 classes
[i
] = X86_64_NO_CLASS
;
6041 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
6042 signalize memory class, so handle it as special case. */
6045 classes
[0] = X86_64_NO_CLASS
;
6049 /* Classify each field of record and merge classes. */
6050 switch (TREE_CODE (type
))
6053 /* And now merge the fields of structure. */
6054 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6056 if (TREE_CODE (field
) == FIELD_DECL
)
6060 if (TREE_TYPE (field
) == error_mark_node
)
6063 /* Bitfields are always classified as integer. Handle them
6064 early, since later code would consider them to be
6065 misaligned integers. */
6066 if (DECL_BIT_FIELD (field
))
6068 for (i
= (int_bit_position (field
) + (bit_offset
% 64)) / 8 / 8;
6069 i
< ((int_bit_position (field
) + (bit_offset
% 64))
6070 + tree_low_cst (DECL_SIZE (field
), 0)
6073 merge_classes (X86_64_INTEGER_CLASS
,
6080 type
= TREE_TYPE (field
);
6082 /* Flexible array member is ignored. */
6083 if (TYPE_MODE (type
) == BLKmode
6084 && TREE_CODE (type
) == ARRAY_TYPE
6085 && TYPE_SIZE (type
) == NULL_TREE
6086 && TYPE_DOMAIN (type
) != NULL_TREE
6087 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type
))
6092 if (!warned
&& warn_psabi
)
6095 inform (input_location
,
6096 "the ABI of passing struct with"
6097 " a flexible array member has"
6098 " changed in GCC 4.4");
6102 num
= classify_argument (TYPE_MODE (type
), type
,
6104 (int_bit_position (field
)
6105 + bit_offset
) % 256);
6108 pos
= (int_bit_position (field
) + (bit_offset
% 64)) / 8 / 8;
6109 for (i
= 0; i
< num
&& (i
+ pos
) < words
; i
++)
6111 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
6118 /* Arrays are handled as small records. */
6121 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
6122 TREE_TYPE (type
), subclasses
, bit_offset
);
6126 /* The partial classes are now full classes. */
6127 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
6128 subclasses
[0] = X86_64_SSE_CLASS
;
6129 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
6130 && !((bit_offset
% 64) == 0 && bytes
== 4))
6131 subclasses
[0] = X86_64_INTEGER_CLASS
;
6133 for (i
= 0; i
< words
; i
++)
6134 classes
[i
] = subclasses
[i
% num
];
6139 case QUAL_UNION_TYPE
:
6140 /* Unions are similar to RECORD_TYPE but offset is always 0.
6142 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6144 if (TREE_CODE (field
) == FIELD_DECL
)
6148 if (TREE_TYPE (field
) == error_mark_node
)
6151 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
6152 TREE_TYPE (field
), subclasses
,
6156 for (i
= 0; i
< num
; i
++)
6157 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
6168 /* When size > 16 bytes, if the first one isn't
6169 X86_64_SSE_CLASS or any other ones aren't
6170 X86_64_SSEUP_CLASS, everything should be passed in
6172 if (classes
[0] != X86_64_SSE_CLASS
)
6175 for (i
= 1; i
< words
; i
++)
6176 if (classes
[i
] != X86_64_SSEUP_CLASS
)
6180 /* Final merger cleanup. */
6181 for (i
= 0; i
< words
; i
++)
6183 /* If one class is MEMORY, everything should be passed in
6185 if (classes
[i
] == X86_64_MEMORY_CLASS
)
6188 /* The X86_64_SSEUP_CLASS should be always preceded by
6189 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6190 if (classes
[i
] == X86_64_SSEUP_CLASS
6191 && classes
[i
- 1] != X86_64_SSE_CLASS
6192 && classes
[i
- 1] != X86_64_SSEUP_CLASS
)
6194 /* The first one should never be X86_64_SSEUP_CLASS. */
6195 gcc_assert (i
!= 0);
6196 classes
[i
] = X86_64_SSE_CLASS
;
6199 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6200 everything should be passed in memory. */
6201 if (classes
[i
] == X86_64_X87UP_CLASS
6202 && (classes
[i
- 1] != X86_64_X87_CLASS
))
6206 /* The first one should never be X86_64_X87UP_CLASS. */
6207 gcc_assert (i
!= 0);
6208 if (!warned
&& warn_psabi
)
6211 inform (input_location
,
6212 "the ABI of passing union with long double"
6213 " has changed in GCC 4.4");
6221 /* Compute alignment needed. We align all types to natural boundaries with
6222 exception of XFmode that is aligned to 64bits. */
6223 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
6225 int mode_alignment
= GET_MODE_BITSIZE (mode
);
6228 mode_alignment
= 128;
6229 else if (mode
== XCmode
)
6230 mode_alignment
= 256;
6231 if (COMPLEX_MODE_P (mode
))
6232 mode_alignment
/= 2;
6233 /* Misaligned fields are always returned in memory. */
6234 if (bit_offset
% mode_alignment
)
6238 /* for V1xx modes, just use the base mode */
6239 if (VECTOR_MODE_P (mode
) && mode
!= V1DImode
&& mode
!= V1TImode
6240 && GET_MODE_SIZE (GET_MODE_INNER (mode
)) == bytes
)
6241 mode
= GET_MODE_INNER (mode
);
6243 /* Classification of atomic types. */
6248 classes
[0] = X86_64_SSE_CLASS
;
6251 classes
[0] = X86_64_SSE_CLASS
;
6252 classes
[1] = X86_64_SSEUP_CLASS
;
6262 int size
= (bit_offset
% 64)+ (int) GET_MODE_BITSIZE (mode
);
6266 classes
[0] = X86_64_INTEGERSI_CLASS
;
6269 else if (size
<= 64)
6271 classes
[0] = X86_64_INTEGER_CLASS
;
6274 else if (size
<= 64+32)
6276 classes
[0] = X86_64_INTEGER_CLASS
;
6277 classes
[1] = X86_64_INTEGERSI_CLASS
;
6280 else if (size
<= 64+64)
6282 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
6290 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
6294 /* OImode shouldn't be used directly. */
6299 if (!(bit_offset
% 64))
6300 classes
[0] = X86_64_SSESF_CLASS
;
6302 classes
[0] = X86_64_SSE_CLASS
;
6305 classes
[0] = X86_64_SSEDF_CLASS
;
6308 classes
[0] = X86_64_X87_CLASS
;
6309 classes
[1] = X86_64_X87UP_CLASS
;
6312 classes
[0] = X86_64_SSE_CLASS
;
6313 classes
[1] = X86_64_SSEUP_CLASS
;
6316 classes
[0] = X86_64_SSE_CLASS
;
6317 if (!(bit_offset
% 64))
6323 if (!warned
&& warn_psabi
)
6326 inform (input_location
,
6327 "the ABI of passing structure with complex float"
6328 " member has changed in GCC 4.4");
6330 classes
[1] = X86_64_SSESF_CLASS
;
6334 classes
[0] = X86_64_SSEDF_CLASS
;
6335 classes
[1] = X86_64_SSEDF_CLASS
;
6338 classes
[0] = X86_64_COMPLEX_X87_CLASS
;
6341 /* This modes is larger than 16 bytes. */
6349 classes
[0] = X86_64_SSE_CLASS
;
6350 classes
[1] = X86_64_SSEUP_CLASS
;
6351 classes
[2] = X86_64_SSEUP_CLASS
;
6352 classes
[3] = X86_64_SSEUP_CLASS
;
6360 classes
[0] = X86_64_SSE_CLASS
;
6361 classes
[1] = X86_64_SSEUP_CLASS
;
6369 classes
[0] = X86_64_SSE_CLASS
;
6375 gcc_assert (VECTOR_MODE_P (mode
));
6380 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode
)) == MODE_INT
);
6382 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
6383 classes
[0] = X86_64_INTEGERSI_CLASS
;
6385 classes
[0] = X86_64_INTEGER_CLASS
;
6386 classes
[1] = X86_64_INTEGER_CLASS
;
6387 return 1 + (bytes
> 8);
6391 /* Examine the argument and return set number of register required in each
6392 class. Return 0 iff parameter should be passed in memory. */
6394 examine_argument (enum machine_mode mode
, const_tree type
, int in_return
,
6395 int *int_nregs
, int *sse_nregs
)
6397 enum x86_64_reg_class regclass
[MAX_CLASSES
];
6398 int n
= classify_argument (mode
, type
, regclass
, 0);
6404 for (n
--; n
>= 0; n
--)
6405 switch (regclass
[n
])
6407 case X86_64_INTEGER_CLASS
:
6408 case X86_64_INTEGERSI_CLASS
:
6411 case X86_64_SSE_CLASS
:
6412 case X86_64_SSESF_CLASS
:
6413 case X86_64_SSEDF_CLASS
:
6416 case X86_64_NO_CLASS
:
6417 case X86_64_SSEUP_CLASS
:
6419 case X86_64_X87_CLASS
:
6420 case X86_64_X87UP_CLASS
:
6424 case X86_64_COMPLEX_X87_CLASS
:
6425 return in_return
? 2 : 0;
6426 case X86_64_MEMORY_CLASS
:
6432 /* Construct container for the argument used by GCC interface. See
6433 FUNCTION_ARG for the detailed description. */
6436 construct_container (enum machine_mode mode
, enum machine_mode orig_mode
,
6437 const_tree type
, int in_return
, int nintregs
, int nsseregs
,
6438 const int *intreg
, int sse_regno
)
6440 /* The following variables hold the static issued_error state. */
6441 static bool issued_sse_arg_error
;
6442 static bool issued_sse_ret_error
;
6443 static bool issued_x87_ret_error
;
6445 enum machine_mode tmpmode
;
6447 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
6448 enum x86_64_reg_class regclass
[MAX_CLASSES
];
6452 int needed_sseregs
, needed_intregs
;
6453 rtx exp
[MAX_CLASSES
];
6456 n
= classify_argument (mode
, type
, regclass
, 0);
6459 if (!examine_argument (mode
, type
, in_return
, &needed_intregs
,
6462 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
6465 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6466 some less clueful developer tries to use floating-point anyway. */
6467 if (needed_sseregs
&& !TARGET_SSE
)
6471 if (!issued_sse_ret_error
)
6473 error ("SSE register return with SSE disabled");
6474 issued_sse_ret_error
= true;
6477 else if (!issued_sse_arg_error
)
6479 error ("SSE register argument with SSE disabled");
6480 issued_sse_arg_error
= true;
6485 /* Likewise, error if the ABI requires us to return values in the
6486 x87 registers and the user specified -mno-80387. */
6487 if (!TARGET_80387
&& in_return
)
6488 for (i
= 0; i
< n
; i
++)
6489 if (regclass
[i
] == X86_64_X87_CLASS
6490 || regclass
[i
] == X86_64_X87UP_CLASS
6491 || regclass
[i
] == X86_64_COMPLEX_X87_CLASS
)
6493 if (!issued_x87_ret_error
)
6495 error ("x87 register return with x87 disabled");
6496 issued_x87_ret_error
= true;
6501 /* First construct simple cases. Avoid SCmode, since we want to use
6502 single register to pass this type. */
6503 if (n
== 1 && mode
!= SCmode
)
6504 switch (regclass
[0])
6506 case X86_64_INTEGER_CLASS
:
6507 case X86_64_INTEGERSI_CLASS
:
6508 return gen_rtx_REG (mode
, intreg
[0]);
6509 case X86_64_SSE_CLASS
:
6510 case X86_64_SSESF_CLASS
:
6511 case X86_64_SSEDF_CLASS
:
6512 if (mode
!= BLKmode
)
6513 return gen_reg_or_parallel (mode
, orig_mode
,
6514 SSE_REGNO (sse_regno
));
6516 case X86_64_X87_CLASS
:
6517 case X86_64_COMPLEX_X87_CLASS
:
6518 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
6519 case X86_64_NO_CLASS
:
6520 /* Zero sized array, struct or class. */
6525 if (n
== 2 && regclass
[0] == X86_64_SSE_CLASS
6526 && regclass
[1] == X86_64_SSEUP_CLASS
&& mode
!= BLKmode
)
6527 return gen_rtx_REG (mode
, SSE_REGNO (sse_regno
));
6529 && regclass
[0] == X86_64_SSE_CLASS
6530 && regclass
[1] == X86_64_SSEUP_CLASS
6531 && regclass
[2] == X86_64_SSEUP_CLASS
6532 && regclass
[3] == X86_64_SSEUP_CLASS
6534 return gen_rtx_REG (mode
, SSE_REGNO (sse_regno
));
6537 && regclass
[0] == X86_64_X87_CLASS
&& regclass
[1] == X86_64_X87UP_CLASS
)
6538 return gen_rtx_REG (XFmode
, FIRST_STACK_REG
);
6539 if (n
== 2 && regclass
[0] == X86_64_INTEGER_CLASS
6540 && regclass
[1] == X86_64_INTEGER_CLASS
6541 && (mode
== CDImode
|| mode
== TImode
|| mode
== TFmode
)
6542 && intreg
[0] + 1 == intreg
[1])
6543 return gen_rtx_REG (mode
, intreg
[0]);
6545 /* Otherwise figure out the entries of the PARALLEL. */
6546 for (i
= 0; i
< n
; i
++)
6550 switch (regclass
[i
])
6552 case X86_64_NO_CLASS
:
6554 case X86_64_INTEGER_CLASS
:
6555 case X86_64_INTEGERSI_CLASS
:
6556 /* Merge TImodes on aligned occasions here too. */
6557 if (i
* 8 + 8 > bytes
)
6558 tmpmode
= mode_for_size ((bytes
- i
* 8) * BITS_PER_UNIT
, MODE_INT
, 0);
6559 else if (regclass
[i
] == X86_64_INTEGERSI_CLASS
)
6563 /* We've requested 24 bytes we don't have mode for. Use DImode. */
6564 if (tmpmode
== BLKmode
)
6566 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
6567 gen_rtx_REG (tmpmode
, *intreg
),
6571 case X86_64_SSESF_CLASS
:
6572 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
6573 gen_rtx_REG (SFmode
,
6574 SSE_REGNO (sse_regno
)),
6578 case X86_64_SSEDF_CLASS
:
6579 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
6580 gen_rtx_REG (DFmode
,
6581 SSE_REGNO (sse_regno
)),
6585 case X86_64_SSE_CLASS
:
6593 if (i
== 0 && regclass
[1] == X86_64_SSEUP_CLASS
)
6603 && regclass
[1] == X86_64_SSEUP_CLASS
6604 && regclass
[2] == X86_64_SSEUP_CLASS
6605 && regclass
[3] == X86_64_SSEUP_CLASS
);
6612 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
6613 gen_rtx_REG (tmpmode
,
6614 SSE_REGNO (sse_regno
)),
6623 /* Empty aligned struct, union or class. */
6627 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
6628 for (i
= 0; i
< nexps
; i
++)
6629 XVECEXP (ret
, 0, i
) = exp
[i
];
6633 /* Update the data in CUM to advance over an argument of mode MODE
6634 and data type TYPE. (TYPE is null for libcalls where that information
6635 may not be available.) */
6638 function_arg_advance_32 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6639 const_tree type
, HOST_WIDE_INT bytes
,
6640 HOST_WIDE_INT words
)
6656 cum
->words
+= words
;
6657 cum
->nregs
-= words
;
6658 cum
->regno
+= words
;
6660 if (cum
->nregs
<= 0)
6668 /* OImode shouldn't be used directly. */
6672 if (cum
->float_in_sse
< 2)
6675 if (cum
->float_in_sse
< 1)
6692 if (!type
|| !AGGREGATE_TYPE_P (type
))
6694 cum
->sse_words
+= words
;
6695 cum
->sse_nregs
-= 1;
6696 cum
->sse_regno
+= 1;
6697 if (cum
->sse_nregs
<= 0)
6711 if (!type
|| !AGGREGATE_TYPE_P (type
))
6713 cum
->mmx_words
+= words
;
6714 cum
->mmx_nregs
-= 1;
6715 cum
->mmx_regno
+= 1;
6716 if (cum
->mmx_nregs
<= 0)
6727 function_arg_advance_64 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6728 const_tree type
, HOST_WIDE_INT words
, bool named
)
6730 int int_nregs
, sse_nregs
;
6732 /* Unnamed 256bit vector mode parameters are passed on stack. */
6733 if (!named
&& VALID_AVX256_REG_MODE (mode
))
6736 if (examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
)
6737 && sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
6739 cum
->nregs
-= int_nregs
;
6740 cum
->sse_nregs
-= sse_nregs
;
6741 cum
->regno
+= int_nregs
;
6742 cum
->sse_regno
+= sse_nregs
;
6746 int align
= ix86_function_arg_boundary (mode
, type
) / BITS_PER_WORD
;
6747 cum
->words
= (cum
->words
+ align
- 1) & ~(align
- 1);
6748 cum
->words
+= words
;
6753 function_arg_advance_ms_64 (CUMULATIVE_ARGS
*cum
, HOST_WIDE_INT bytes
,
6754 HOST_WIDE_INT words
)
6756 /* Otherwise, this should be passed indirect. */
6757 gcc_assert (bytes
== 1 || bytes
== 2 || bytes
== 4 || bytes
== 8);
6759 cum
->words
+= words
;
6767 /* Update the data in CUM to advance over an argument of mode MODE and
6768 data type TYPE. (TYPE is null for libcalls where that information
6769 may not be available.) */
6772 ix86_function_arg_advance (cumulative_args_t cum_v
, enum machine_mode mode
,
6773 const_tree type
, bool named
)
6775 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
6776 HOST_WIDE_INT bytes
, words
;
6778 if (mode
== BLKmode
)
6779 bytes
= int_size_in_bytes (type
);
6781 bytes
= GET_MODE_SIZE (mode
);
6782 words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
6785 mode
= type_natural_mode (type
, NULL
);
6787 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
6788 function_arg_advance_ms_64 (cum
, bytes
, words
);
6789 else if (TARGET_64BIT
)
6790 function_arg_advance_64 (cum
, mode
, type
, words
, named
);
6792 function_arg_advance_32 (cum
, mode
, type
, bytes
, words
);
6795 /* Define where to put the arguments to a function.
6796 Value is zero to push the argument on the stack,
6797 or a hard register in which to store the argument.
6799 MODE is the argument's machine mode.
6800 TYPE is the data type of the argument (as a tree).
6801 This is null for libcalls where that information may
6803 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6804 the preceding args and about the function being called.
6805 NAMED is nonzero if this argument is a named parameter
6806 (otherwise it is an extra parameter matching an ellipsis). */
6809 function_arg_32 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6810 enum machine_mode orig_mode
, const_tree type
,
6811 HOST_WIDE_INT bytes
, HOST_WIDE_INT words
)
6813 static bool warnedsse
, warnedmmx
;
6815 /* Avoid the AL settings for the Unix64 ABI. */
6816 if (mode
== VOIDmode
)
6832 if (words
<= cum
->nregs
)
6834 int regno
= cum
->regno
;
6836 /* Fastcall allocates the first two DWORD (SImode) or
6837 smaller arguments to ECX and EDX if it isn't an
6843 || (type
&& AGGREGATE_TYPE_P (type
)))
6846 /* ECX not EAX is the first allocated register. */
6847 if (regno
== AX_REG
)
6850 return gen_rtx_REG (mode
, regno
);
6855 if (cum
->float_in_sse
< 2)
6858 if (cum
->float_in_sse
< 1)
6862 /* In 32bit, we pass TImode in xmm registers. */
6869 if (!type
|| !AGGREGATE_TYPE_P (type
))
6871 if (!TARGET_SSE
&& !warnedsse
&& cum
->warn_sse
)
6874 warning (0, "SSE vector argument without SSE enabled "
6878 return gen_reg_or_parallel (mode
, orig_mode
,
6879 cum
->sse_regno
+ FIRST_SSE_REG
);
6884 /* OImode shouldn't be used directly. */
6893 if (!type
|| !AGGREGATE_TYPE_P (type
))
6896 return gen_reg_or_parallel (mode
, orig_mode
,
6897 cum
->sse_regno
+ FIRST_SSE_REG
);
6907 if (!type
|| !AGGREGATE_TYPE_P (type
))
6909 if (!TARGET_MMX
&& !warnedmmx
&& cum
->warn_mmx
)
6912 warning (0, "MMX vector argument without MMX enabled "
6916 return gen_reg_or_parallel (mode
, orig_mode
,
6917 cum
->mmx_regno
+ FIRST_MMX_REG
);
6926 function_arg_64 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6927 enum machine_mode orig_mode
, const_tree type
, bool named
)
6929 /* Handle a hidden AL argument containing number of registers
6930 for varargs x86-64 functions. */
6931 if (mode
== VOIDmode
)
6932 return GEN_INT (cum
->maybe_vaarg
6933 ? (cum
->sse_nregs
< 0
6934 ? X86_64_SSE_REGPARM_MAX
6949 /* Unnamed 256bit vector mode parameters are passed on stack. */
6955 return construct_container (mode
, orig_mode
, type
, 0, cum
->nregs
,
6957 &x86_64_int_parameter_registers
[cum
->regno
],
6962 function_arg_ms_64 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6963 enum machine_mode orig_mode
, bool named
,
6964 HOST_WIDE_INT bytes
)
6968 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
6969 We use value of -2 to specify that current function call is MSABI. */
6970 if (mode
== VOIDmode
)
6971 return GEN_INT (-2);
6973 /* If we've run out of registers, it goes on the stack. */
6974 if (cum
->nregs
== 0)
6977 regno
= x86_64_ms_abi_int_parameter_registers
[cum
->regno
];
6979 /* Only floating point modes are passed in anything but integer regs. */
6980 if (TARGET_SSE
&& (mode
== SFmode
|| mode
== DFmode
))
6983 regno
= cum
->regno
+ FIRST_SSE_REG
;
6988 /* Unnamed floating parameters are passed in both the
6989 SSE and integer registers. */
6990 t1
= gen_rtx_REG (mode
, cum
->regno
+ FIRST_SSE_REG
);
6991 t2
= gen_rtx_REG (mode
, regno
);
6992 t1
= gen_rtx_EXPR_LIST (VOIDmode
, t1
, const0_rtx
);
6993 t2
= gen_rtx_EXPR_LIST (VOIDmode
, t2
, const0_rtx
);
6994 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, t1
, t2
));
6997 /* Handle aggregated types passed in register. */
6998 if (orig_mode
== BLKmode
)
7000 if (bytes
> 0 && bytes
<= 8)
7001 mode
= (bytes
> 4 ? DImode
: SImode
);
7002 if (mode
== BLKmode
)
7006 return gen_reg_or_parallel (mode
, orig_mode
, regno
);
7009 /* Return where to put the arguments to a function.
7010 Return zero to push the argument on the stack, or a hard register in which to store the argument.
7012 MODE is the argument's machine mode. TYPE is the data type of the
7013 argument. It is null for libcalls where that information may not be
7014 available. CUM gives information about the preceding args and about
7015 the function being called. NAMED is nonzero if this argument is a
7016 named parameter (otherwise it is an extra parameter matching an
7020 ix86_function_arg (cumulative_args_t cum_v
, enum machine_mode omode
,
7021 const_tree type
, bool named
)
7023 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
7024 enum machine_mode mode
= omode
;
7025 HOST_WIDE_INT bytes
, words
;
7028 if (mode
== BLKmode
)
7029 bytes
= int_size_in_bytes (type
);
7031 bytes
= GET_MODE_SIZE (mode
);
7032 words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
7034 /* To simplify the code below, represent vector types with a vector mode
7035 even if MMX/SSE are not active. */
7036 if (type
&& TREE_CODE (type
) == VECTOR_TYPE
)
7037 mode
= type_natural_mode (type
, cum
);
7039 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
7040 arg
= function_arg_ms_64 (cum
, mode
, omode
, named
, bytes
);
7041 else if (TARGET_64BIT
)
7042 arg
= function_arg_64 (cum
, mode
, omode
, type
, named
);
7044 arg
= function_arg_32 (cum
, mode
, omode
, type
, bytes
, words
);
7046 if (TARGET_VZEROUPPER
&& function_pass_avx256_p (arg
))
7048 /* This argument uses 256bit AVX modes. */
7050 cfun
->machine
->callee_pass_avx256_p
= true;
7052 cfun
->machine
->caller_pass_avx256_p
= true;
7058 /* A C expression that indicates when an argument must be passed by
7059 reference. If nonzero for an argument, a copy of that argument is
7060 made in memory and a pointer to the argument is passed instead of
7061 the argument itself. The pointer is passed in whatever way is
7062 appropriate for passing a pointer to that type. */
7065 ix86_pass_by_reference (cumulative_args_t cum_v ATTRIBUTE_UNUSED
,
7066 enum machine_mode mode ATTRIBUTE_UNUSED
,
7067 const_tree type
, bool named ATTRIBUTE_UNUSED
)
7069 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
7071 /* See Windows x64 Software Convention. */
7072 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
7074 int msize
= (int) GET_MODE_SIZE (mode
);
7077 /* Arrays are passed by reference. */
7078 if (TREE_CODE (type
) == ARRAY_TYPE
)
7081 if (AGGREGATE_TYPE_P (type
))
7083 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
7084 are passed by reference. */
7085 msize
= int_size_in_bytes (type
);
7089 /* __m128 is passed by reference. */
7091 case 1: case 2: case 4: case 8:
7097 else if (TARGET_64BIT
&& type
&& int_size_in_bytes (type
) == -1)
7103 /* Return true when TYPE should be 128bit aligned for 32bit argument
7104 passing ABI. XXX: This function is obsolete and is only used for
7105 checking psABI compatibility with previous versions of GCC. */
7108 ix86_compat_aligned_value_p (const_tree type
)
7110 enum machine_mode mode
= TYPE_MODE (type
);
7111 if (((TARGET_SSE
&& SSE_REG_MODE_P (mode
))
7115 && (!TYPE_USER_ALIGN (type
) || TYPE_ALIGN (type
) > 128))
7117 if (TYPE_ALIGN (type
) < 128)
7120 if (AGGREGATE_TYPE_P (type
))
7122 /* Walk the aggregates recursively. */
7123 switch (TREE_CODE (type
))
7127 case QUAL_UNION_TYPE
:
7131 /* Walk all the structure fields. */
7132 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
7134 if (TREE_CODE (field
) == FIELD_DECL
7135 && ix86_compat_aligned_value_p (TREE_TYPE (field
)))
7142 /* Just for use if some languages passes arrays by value. */
7143 if (ix86_compat_aligned_value_p (TREE_TYPE (type
)))
7154 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
7155 XXX: This function is obsolete and is only used for checking psABI
7156 compatibility with previous versions of GCC. */
7159 ix86_compat_function_arg_boundary (enum machine_mode mode
,
7160 const_tree type
, unsigned int align
)
7162 /* In 32bit, only _Decimal128 and __float128 are aligned to their
7163 natural boundaries. */
7164 if (!TARGET_64BIT
&& mode
!= TDmode
&& mode
!= TFmode
)
7166 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
7167 make an exception for SSE modes since these require 128bit
7170 The handling here differs from field_alignment. ICC aligns MMX
7171 arguments to 4 byte boundaries, while structure fields are aligned
7172 to 8 byte boundaries. */
7175 if (!(TARGET_SSE
&& SSE_REG_MODE_P (mode
)))
7176 align
= PARM_BOUNDARY
;
7180 if (!ix86_compat_aligned_value_p (type
))
7181 align
= PARM_BOUNDARY
;
7184 if (align
> BIGGEST_ALIGNMENT
)
7185 align
= BIGGEST_ALIGNMENT
;
7189 /* Return true when TYPE should be 128bit aligned for 32bit argument
7193 ix86_contains_aligned_value_p (const_tree type
)
7195 enum machine_mode mode
= TYPE_MODE (type
);
7197 if (mode
== XFmode
|| mode
== XCmode
)
7200 if (TYPE_ALIGN (type
) < 128)
7203 if (AGGREGATE_TYPE_P (type
))
7205 /* Walk the aggregates recursively. */
7206 switch (TREE_CODE (type
))
7210 case QUAL_UNION_TYPE
:
7214 /* Walk all the structure fields. */
7215 for (field
= TYPE_FIELDS (type
);
7217 field
= DECL_CHAIN (field
))
7219 if (TREE_CODE (field
) == FIELD_DECL
7220 && ix86_contains_aligned_value_p (TREE_TYPE (field
)))
7227 /* Just for use if some languages passes arrays by value. */
7228 if (ix86_contains_aligned_value_p (TREE_TYPE (type
)))
7237 return TYPE_ALIGN (type
) >= 128;
7242 /* Gives the alignment boundary, in bits, of an argument with the
7243 specified mode and type. */
7246 ix86_function_arg_boundary (enum machine_mode mode
, const_tree type
)
7251 /* Since the main variant type is used for call, we convert it to
7252 the main variant type. */
7253 type
= TYPE_MAIN_VARIANT (type
);
7254 align
= TYPE_ALIGN (type
);
7257 align
= GET_MODE_ALIGNMENT (mode
);
7258 if (align
< PARM_BOUNDARY
)
7259 align
= PARM_BOUNDARY
;
7263 unsigned int saved_align
= align
;
7267 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7270 if (mode
== XFmode
|| mode
== XCmode
)
7271 align
= PARM_BOUNDARY
;
7273 else if (!ix86_contains_aligned_value_p (type
))
7274 align
= PARM_BOUNDARY
;
7277 align
= PARM_BOUNDARY
;
7282 && align
!= ix86_compat_function_arg_boundary (mode
, type
,
7286 inform (input_location
,
7287 "The ABI for passing parameters with %d-byte"
7288 " alignment has changed in GCC 4.6",
7289 align
/ BITS_PER_UNIT
);
7296 /* Return true if N is a possible register number of function value. */
7299 ix86_function_value_regno_p (const unsigned int regno
)
7306 case FIRST_FLOAT_REG
:
7307 /* TODO: The function should depend on current function ABI but
7308 builtins.c would need updating then. Therefore we use the
7310 if (TARGET_64BIT
&& ix86_abi
== MS_ABI
)
7312 return TARGET_FLOAT_RETURNS_IN_80387
;
7318 if (TARGET_MACHO
|| TARGET_64BIT
)
7326 /* Define how to find the value returned by a function.
7327 VALTYPE is the data type of the value (as a tree).
7328 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7329 otherwise, FUNC is 0. */
7332 function_value_32 (enum machine_mode orig_mode
, enum machine_mode mode
,
7333 const_tree fntype
, const_tree fn
)
7337 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7338 we normally prevent this case when mmx is not available. However
7339 some ABIs may require the result to be returned like DImode. */
7340 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
7341 regno
= FIRST_MMX_REG
;
7343 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7344 we prevent this case when sse is not available. However some ABIs
7345 may require the result to be returned like integer TImode. */
7346 else if (mode
== TImode
7347 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
7348 regno
= FIRST_SSE_REG
;
7350 /* 32-byte vector modes in %ymm0. */
7351 else if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 32)
7352 regno
= FIRST_SSE_REG
;
7354 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7355 else if (X87_FLOAT_MODE_P (mode
) && TARGET_FLOAT_RETURNS_IN_80387
)
7356 regno
= FIRST_FLOAT_REG
;
7358 /* Most things go in %eax. */
7361 /* Override FP return register with %xmm0 for local functions when
7362 SSE math is enabled or for functions with sseregparm attribute. */
7363 if ((fn
|| fntype
) && (mode
== SFmode
|| mode
== DFmode
))
7365 int sse_level
= ix86_function_sseregparm (fntype
, fn
, false);
7366 if ((sse_level
>= 1 && mode
== SFmode
)
7367 || (sse_level
== 2 && mode
== DFmode
))
7368 regno
= FIRST_SSE_REG
;
7371 /* OImode shouldn't be used directly. */
7372 gcc_assert (mode
!= OImode
);
7374 return gen_rtx_REG (orig_mode
, regno
);
7378 function_value_64 (enum machine_mode orig_mode
, enum machine_mode mode
,
7383 /* Handle libcalls, which don't provide a type node. */
7384 if (valtype
== NULL
)
7398 regno
= FIRST_SSE_REG
;
7402 regno
= FIRST_FLOAT_REG
;
7410 return gen_rtx_REG (mode
, regno
);
7412 else if (POINTER_TYPE_P (valtype
))
7414 /* Pointers are always returned in Pmode. */
7418 ret
= construct_container (mode
, orig_mode
, valtype
, 1,
7419 X86_64_REGPARM_MAX
, X86_64_SSE_REGPARM_MAX
,
7420 x86_64_int_return_registers
, 0);
7422 /* For zero sized structures, construct_container returns NULL, but we
7423 need to keep rest of compiler happy by returning meaningful value. */
7425 ret
= gen_rtx_REG (orig_mode
, AX_REG
);
7431 function_value_ms_64 (enum machine_mode orig_mode
, enum machine_mode mode
)
7433 unsigned int regno
= AX_REG
;
7437 switch (GET_MODE_SIZE (mode
))
7440 if((SCALAR_INT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
7441 && !COMPLEX_MODE_P (mode
))
7442 regno
= FIRST_SSE_REG
;
7446 if (mode
== SFmode
|| mode
== DFmode
)
7447 regno
= FIRST_SSE_REG
;
7453 return gen_rtx_REG (orig_mode
, regno
);
7457 ix86_function_value_1 (const_tree valtype
, const_tree fntype_or_decl
,
7458 enum machine_mode orig_mode
, enum machine_mode mode
)
7460 const_tree fn
, fntype
;
7463 if (fntype_or_decl
&& DECL_P (fntype_or_decl
))
7464 fn
= fntype_or_decl
;
7465 fntype
= fn
? TREE_TYPE (fn
) : fntype_or_decl
;
7467 if (TARGET_64BIT
&& ix86_function_type_abi (fntype
) == MS_ABI
)
7468 return function_value_ms_64 (orig_mode
, mode
);
7469 else if (TARGET_64BIT
)
7470 return function_value_64 (orig_mode
, mode
, valtype
);
7472 return function_value_32 (orig_mode
, mode
, fntype
, fn
);
7476 ix86_function_value (const_tree valtype
, const_tree fntype_or_decl
,
7477 bool outgoing ATTRIBUTE_UNUSED
)
7479 enum machine_mode mode
, orig_mode
;
7481 orig_mode
= TYPE_MODE (valtype
);
7482 mode
= type_natural_mode (valtype
, NULL
);
7483 return ix86_function_value_1 (valtype
, fntype_or_decl
, orig_mode
, mode
);
7486 /* Pointer function arguments and return values are promoted to Pmode. */
7488 static enum machine_mode
7489 ix86_promote_function_mode (const_tree type
, enum machine_mode mode
,
7490 int *punsignedp
, const_tree fntype
,
7493 if (type
!= NULL_TREE
&& POINTER_TYPE_P (type
))
7495 *punsignedp
= POINTERS_EXTEND_UNSIGNED
;
7498 return default_promote_function_mode (type
, mode
, punsignedp
, fntype
,
7503 ix86_libcall_value (enum machine_mode mode
)
7505 return ix86_function_value_1 (NULL
, NULL
, mode
, mode
);
7508 /* Return true iff type is returned in memory. */
7510 static bool ATTRIBUTE_UNUSED
7511 return_in_memory_32 (const_tree type
, enum machine_mode mode
)
7515 if (mode
== BLKmode
)
7518 size
= int_size_in_bytes (type
);
7520 if (MS_AGGREGATE_RETURN
&& AGGREGATE_TYPE_P (type
) && size
<= 8)
7523 if (VECTOR_MODE_P (mode
) || mode
== TImode
)
7525 /* User-created vectors small enough to fit in EAX. */
7529 /* MMX/3dNow values are returned in MM0,
7530 except when it doesn't exits or the ABI prescribes otherwise. */
7532 return !TARGET_MMX
|| TARGET_VECT8_RETURNS
;
7534 /* SSE values are returned in XMM0, except when it doesn't exist. */
7538 /* AVX values are returned in YMM0, except when it doesn't exist. */
7549 /* OImode shouldn't be used directly. */
7550 gcc_assert (mode
!= OImode
);
7555 static bool ATTRIBUTE_UNUSED
7556 return_in_memory_64 (const_tree type
, enum machine_mode mode
)
7558 int needed_intregs
, needed_sseregs
;
7559 return !examine_argument (mode
, type
, 1, &needed_intregs
, &needed_sseregs
);
7562 static bool ATTRIBUTE_UNUSED
7563 return_in_memory_ms_64 (const_tree type
, enum machine_mode mode
)
7565 HOST_WIDE_INT size
= int_size_in_bytes (type
);
7567 /* __m128 is returned in xmm0. */
7568 if ((SCALAR_INT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
7569 && !COMPLEX_MODE_P (mode
) && (GET_MODE_SIZE (mode
) == 16 || size
== 16))
7572 /* Otherwise, the size must be exactly in [1248]. */
7573 return size
!= 1 && size
!= 2 && size
!= 4 && size
!= 8;
7577 ix86_return_in_memory (const_tree type
, const_tree fntype ATTRIBUTE_UNUSED
)
7579 #ifdef SUBTARGET_RETURN_IN_MEMORY
7580 return SUBTARGET_RETURN_IN_MEMORY (type
, fntype
);
7582 const enum machine_mode mode
= type_natural_mode (type
, NULL
);
7586 if (ix86_function_type_abi (fntype
) == MS_ABI
)
7587 return return_in_memory_ms_64 (type
, mode
);
7589 return return_in_memory_64 (type
, mode
);
7592 return return_in_memory_32 (type
, mode
);
7596 /* When returning SSE vector types, we have a choice of either
7597 (1) being abi incompatible with a -march switch, or
7598 (2) generating an error.
7599 Given no good solution, I think the safest thing is one warning.
7600 The user won't be able to use -Werror, but....
7602 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
7603 called in response to actually generating a caller or callee that
7604 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
7605 via aggregate_value_p for general type probing from tree-ssa. */
7608 ix86_struct_value_rtx (tree type
, int incoming ATTRIBUTE_UNUSED
)
7610 static bool warnedsse
, warnedmmx
;
7612 if (!TARGET_64BIT
&& type
)
7614 /* Look at the return type of the function, not the function type. */
7615 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (type
));
7617 if (!TARGET_SSE
&& !warnedsse
)
7620 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
7623 warning (0, "SSE vector return without SSE enabled "
7628 if (!TARGET_MMX
&& !warnedmmx
)
7630 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
7633 warning (0, "MMX vector return without MMX enabled "
7643 /* Create the va_list data type. */
7645 /* Returns the calling convention specific va_list date type.
7646 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
7649 ix86_build_builtin_va_list_abi (enum calling_abi abi
)
7651 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
7653 /* For i386 we use plain pointer to argument area. */
7654 if (!TARGET_64BIT
|| abi
== MS_ABI
)
7655 return build_pointer_type (char_type_node
);
7657 record
= lang_hooks
.types
.make_type (RECORD_TYPE
);
7658 type_decl
= build_decl (BUILTINS_LOCATION
,
7659 TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
7661 f_gpr
= build_decl (BUILTINS_LOCATION
,
7662 FIELD_DECL
, get_identifier ("gp_offset"),
7663 unsigned_type_node
);
7664 f_fpr
= build_decl (BUILTINS_LOCATION
,
7665 FIELD_DECL
, get_identifier ("fp_offset"),
7666 unsigned_type_node
);
7667 f_ovf
= build_decl (BUILTINS_LOCATION
,
7668 FIELD_DECL
, get_identifier ("overflow_arg_area"),
7670 f_sav
= build_decl (BUILTINS_LOCATION
,
7671 FIELD_DECL
, get_identifier ("reg_save_area"),
7674 va_list_gpr_counter_field
= f_gpr
;
7675 va_list_fpr_counter_field
= f_fpr
;
7677 DECL_FIELD_CONTEXT (f_gpr
) = record
;
7678 DECL_FIELD_CONTEXT (f_fpr
) = record
;
7679 DECL_FIELD_CONTEXT (f_ovf
) = record
;
7680 DECL_FIELD_CONTEXT (f_sav
) = record
;
7682 TYPE_STUB_DECL (record
) = type_decl
;
7683 TYPE_NAME (record
) = type_decl
;
7684 TYPE_FIELDS (record
) = f_gpr
;
7685 DECL_CHAIN (f_gpr
) = f_fpr
;
7686 DECL_CHAIN (f_fpr
) = f_ovf
;
7687 DECL_CHAIN (f_ovf
) = f_sav
;
7689 layout_type (record
);
7691 /* The correct type is an array type of one element. */
7692 return build_array_type (record
, build_index_type (size_zero_node
));
7695 /* Setup the builtin va_list data type and for 64-bit the additional
7696 calling convention specific va_list data types. */
7699 ix86_build_builtin_va_list (void)
7701 tree ret
= ix86_build_builtin_va_list_abi (ix86_abi
);
7703 /* Initialize abi specific va_list builtin types. */
7707 if (ix86_abi
== MS_ABI
)
7709 t
= ix86_build_builtin_va_list_abi (SYSV_ABI
);
7710 if (TREE_CODE (t
) != RECORD_TYPE
)
7711 t
= build_variant_type_copy (t
);
7712 sysv_va_list_type_node
= t
;
7717 if (TREE_CODE (t
) != RECORD_TYPE
)
7718 t
= build_variant_type_copy (t
);
7719 sysv_va_list_type_node
= t
;
7721 if (ix86_abi
!= MS_ABI
)
7723 t
= ix86_build_builtin_va_list_abi (MS_ABI
);
7724 if (TREE_CODE (t
) != RECORD_TYPE
)
7725 t
= build_variant_type_copy (t
);
7726 ms_va_list_type_node
= t
;
7731 if (TREE_CODE (t
) != RECORD_TYPE
)
7732 t
= build_variant_type_copy (t
);
7733 ms_va_list_type_node
= t
;
7740 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
7743 setup_incoming_varargs_64 (CUMULATIVE_ARGS
*cum
)
7749 /* GPR size of varargs save area. */
7750 if (cfun
->va_list_gpr_size
)
7751 ix86_varargs_gpr_size
= X86_64_REGPARM_MAX
* UNITS_PER_WORD
;
7753 ix86_varargs_gpr_size
= 0;
7755 /* FPR size of varargs save area. We don't need it if we don't pass
7756 anything in SSE registers. */
7757 if (TARGET_SSE
&& cfun
->va_list_fpr_size
)
7758 ix86_varargs_fpr_size
= X86_64_SSE_REGPARM_MAX
* 16;
7760 ix86_varargs_fpr_size
= 0;
7762 if (! ix86_varargs_gpr_size
&& ! ix86_varargs_fpr_size
)
7765 save_area
= frame_pointer_rtx
;
7766 set
= get_varargs_alias_set ();
7768 max
= cum
->regno
+ cfun
->va_list_gpr_size
/ UNITS_PER_WORD
;
7769 if (max
> X86_64_REGPARM_MAX
)
7770 max
= X86_64_REGPARM_MAX
;
7772 for (i
= cum
->regno
; i
< max
; i
++)
7774 mem
= gen_rtx_MEM (Pmode
,
7775 plus_constant (save_area
, i
* UNITS_PER_WORD
));
7776 MEM_NOTRAP_P (mem
) = 1;
7777 set_mem_alias_set (mem
, set
);
7778 emit_move_insn (mem
, gen_rtx_REG (Pmode
,
7779 x86_64_int_parameter_registers
[i
]));
7782 if (ix86_varargs_fpr_size
)
7784 enum machine_mode smode
;
7787 /* Now emit code to save SSE registers. The AX parameter contains number
7788 of SSE parameter registers used to call this function, though all we
7789 actually check here is the zero/non-zero status. */
7791 label
= gen_label_rtx ();
7792 test
= gen_rtx_EQ (VOIDmode
, gen_rtx_REG (QImode
, AX_REG
), const0_rtx
);
7793 emit_jump_insn (gen_cbranchqi4 (test
, XEXP (test
, 0), XEXP (test
, 1),
7796 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
7797 we used movdqa (i.e. TImode) instead? Perhaps even better would
7798 be if we could determine the real mode of the data, via a hook
7799 into pass_stdarg. Ignore all that for now. */
7801 if (crtl
->stack_alignment_needed
< GET_MODE_ALIGNMENT (smode
))
7802 crtl
->stack_alignment_needed
= GET_MODE_ALIGNMENT (smode
);
7804 max
= cum
->sse_regno
+ cfun
->va_list_fpr_size
/ 16;
7805 if (max
> X86_64_SSE_REGPARM_MAX
)
7806 max
= X86_64_SSE_REGPARM_MAX
;
7808 for (i
= cum
->sse_regno
; i
< max
; ++i
)
7810 mem
= plus_constant (save_area
, i
* 16 + ix86_varargs_gpr_size
);
7811 mem
= gen_rtx_MEM (smode
, mem
);
7812 MEM_NOTRAP_P (mem
) = 1;
7813 set_mem_alias_set (mem
, set
);
7814 set_mem_align (mem
, GET_MODE_ALIGNMENT (smode
));
7816 emit_move_insn (mem
, gen_rtx_REG (smode
, SSE_REGNO (i
)));
7824 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS
*cum
)
7826 alias_set_type set
= get_varargs_alias_set ();
7829 /* Reset to zero, as there might be a sysv vaarg used
7831 ix86_varargs_gpr_size
= 0;
7832 ix86_varargs_fpr_size
= 0;
7834 for (i
= cum
->regno
; i
< X86_64_MS_REGPARM_MAX
; i
++)
7838 mem
= gen_rtx_MEM (Pmode
,
7839 plus_constant (virtual_incoming_args_rtx
,
7840 i
* UNITS_PER_WORD
));
7841 MEM_NOTRAP_P (mem
) = 1;
7842 set_mem_alias_set (mem
, set
);
7844 reg
= gen_rtx_REG (Pmode
, x86_64_ms_abi_int_parameter_registers
[i
]);
7845 emit_move_insn (mem
, reg
);
7850 ix86_setup_incoming_varargs (cumulative_args_t cum_v
, enum machine_mode mode
,
7851 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
7854 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
7855 CUMULATIVE_ARGS next_cum
;
7858 /* This argument doesn't appear to be used anymore. Which is good,
7859 because the old code here didn't suppress rtl generation. */
7860 gcc_assert (!no_rtl
);
7865 fntype
= TREE_TYPE (current_function_decl
);
7867 /* For varargs, we do not want to skip the dummy va_dcl argument.
7868 For stdargs, we do want to skip the last named argument. */
7870 if (stdarg_p (fntype
))
7871 ix86_function_arg_advance (pack_cumulative_args (&next_cum
), mode
, type
,
7874 if (cum
->call_abi
== MS_ABI
)
7875 setup_incoming_varargs_ms_64 (&next_cum
);
7877 setup_incoming_varargs_64 (&next_cum
);
7880 /* Checks if TYPE is of kind va_list char *. */
7883 is_va_list_char_pointer (tree type
)
7887 /* For 32-bit it is always true. */
7890 canonic
= ix86_canonical_va_list_type (type
);
7891 return (canonic
== ms_va_list_type_node
7892 || (ix86_abi
== MS_ABI
&& canonic
== va_list_type_node
));
7895 /* Implement va_start. */
7898 ix86_va_start (tree valist
, rtx nextarg
)
7900 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
7901 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
7902 tree gpr
, fpr
, ovf
, sav
, t
;
7906 if (flag_split_stack
7907 && cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
7909 unsigned int scratch_regno
;
7911 /* When we are splitting the stack, we can't refer to the stack
7912 arguments using internal_arg_pointer, because they may be on
7913 the old stack. The split stack prologue will arrange to
7914 leave a pointer to the old stack arguments in a scratch
7915 register, which we here copy to a pseudo-register. The split
7916 stack prologue can't set the pseudo-register directly because
7917 it (the prologue) runs before any registers have been saved. */
7919 scratch_regno
= split_stack_prologue_scratch_regno ();
7920 if (scratch_regno
!= INVALID_REGNUM
)
7924 reg
= gen_reg_rtx (Pmode
);
7925 cfun
->machine
->split_stack_varargs_pointer
= reg
;
7928 emit_move_insn (reg
, gen_rtx_REG (Pmode
, scratch_regno
));
7932 push_topmost_sequence ();
7933 emit_insn_after (seq
, entry_of_function ());
7934 pop_topmost_sequence ();
7938 /* Only 64bit target needs something special. */
7939 if (!TARGET_64BIT
|| is_va_list_char_pointer (TREE_TYPE (valist
)))
7941 if (cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
7942 std_expand_builtin_va_start (valist
, nextarg
);
7947 va_r
= expand_expr (valist
, NULL_RTX
, VOIDmode
, EXPAND_WRITE
);
7948 next
= expand_binop (ptr_mode
, add_optab
,
7949 cfun
->machine
->split_stack_varargs_pointer
,
7950 crtl
->args
.arg_offset_rtx
,
7951 NULL_RTX
, 0, OPTAB_LIB_WIDEN
);
7952 convert_move (va_r
, next
, 0);
7957 f_gpr
= TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node
));
7958 f_fpr
= DECL_CHAIN (f_gpr
);
7959 f_ovf
= DECL_CHAIN (f_fpr
);
7960 f_sav
= DECL_CHAIN (f_ovf
);
7962 valist
= build_simple_mem_ref (valist
);
7963 TREE_TYPE (valist
) = TREE_TYPE (sysv_va_list_type_node
);
7964 /* The following should be folded into the MEM_REF offset. */
7965 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), unshare_expr (valist
),
7967 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), unshare_expr (valist
),
7969 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), unshare_expr (valist
),
7971 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), unshare_expr (valist
),
7974 /* Count number of gp and fp argument registers used. */
7975 words
= crtl
->args
.info
.words
;
7976 n_gpr
= crtl
->args
.info
.regno
;
7977 n_fpr
= crtl
->args
.info
.sse_regno
;
7979 if (cfun
->va_list_gpr_size
)
7981 type
= TREE_TYPE (gpr
);
7982 t
= build2 (MODIFY_EXPR
, type
,
7983 gpr
, build_int_cst (type
, n_gpr
* 8));
7984 TREE_SIDE_EFFECTS (t
) = 1;
7985 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7988 if (TARGET_SSE
&& cfun
->va_list_fpr_size
)
7990 type
= TREE_TYPE (fpr
);
7991 t
= build2 (MODIFY_EXPR
, type
, fpr
,
7992 build_int_cst (type
, n_fpr
* 16 + 8*X86_64_REGPARM_MAX
));
7993 TREE_SIDE_EFFECTS (t
) = 1;
7994 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7997 /* Find the overflow area. */
7998 type
= TREE_TYPE (ovf
);
7999 if (cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
8000 ovf_rtx
= crtl
->args
.internal_arg_pointer
;
8002 ovf_rtx
= cfun
->machine
->split_stack_varargs_pointer
;
8003 t
= make_tree (type
, ovf_rtx
);
8005 t
= fold_build_pointer_plus_hwi (t
, words
* UNITS_PER_WORD
);
8006 t
= build2 (MODIFY_EXPR
, type
, ovf
, t
);
8007 TREE_SIDE_EFFECTS (t
) = 1;
8008 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
8010 if (ix86_varargs_gpr_size
|| ix86_varargs_fpr_size
)
8012 /* Find the register save area.
8013 Prologue of the function save it right above stack frame. */
8014 type
= TREE_TYPE (sav
);
8015 t
= make_tree (type
, frame_pointer_rtx
);
8016 if (!ix86_varargs_gpr_size
)
8017 t
= fold_build_pointer_plus_hwi (t
, -8 * X86_64_REGPARM_MAX
);
8018 t
= build2 (MODIFY_EXPR
, type
, sav
, t
);
8019 TREE_SIDE_EFFECTS (t
) = 1;
8020 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
8024 /* Implement va_arg. */
8027 ix86_gimplify_va_arg (tree valist
, tree type
, gimple_seq
*pre_p
,
8030 static const int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
8031 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
8032 tree gpr
, fpr
, ovf
, sav
, t
;
8034 tree lab_false
, lab_over
= NULL_TREE
;
8039 enum machine_mode nat_mode
;
8040 unsigned int arg_boundary
;
8042 /* Only 64bit target needs something special. */
8043 if (!TARGET_64BIT
|| is_va_list_char_pointer (TREE_TYPE (valist
)))
8044 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
8046 f_gpr
= TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node
));
8047 f_fpr
= DECL_CHAIN (f_gpr
);
8048 f_ovf
= DECL_CHAIN (f_fpr
);
8049 f_sav
= DECL_CHAIN (f_ovf
);
8051 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
),
8052 build_va_arg_indirect_ref (valist
), f_gpr
, NULL_TREE
);
8053 valist
= build_va_arg_indirect_ref (valist
);
8054 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
8055 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
8056 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
8058 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
8060 type
= build_pointer_type (type
);
8061 size
= int_size_in_bytes (type
);
8062 rsize
= (size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
8064 nat_mode
= type_natural_mode (type
, NULL
);
8073 /* Unnamed 256bit vector mode parameters are passed on stack. */
8074 if (!TARGET_64BIT_MS_ABI
)
8081 container
= construct_container (nat_mode
, TYPE_MODE (type
),
8082 type
, 0, X86_64_REGPARM_MAX
,
8083 X86_64_SSE_REGPARM_MAX
, intreg
,
8088 /* Pull the value out of the saved registers. */
8090 addr
= create_tmp_var (ptr_type_node
, "addr");
8094 int needed_intregs
, needed_sseregs
;
8096 tree int_addr
, sse_addr
;
8098 lab_false
= create_artificial_label (UNKNOWN_LOCATION
);
8099 lab_over
= create_artificial_label (UNKNOWN_LOCATION
);
8101 examine_argument (nat_mode
, type
, 0, &needed_intregs
, &needed_sseregs
);
8103 need_temp
= (!REG_P (container
)
8104 && ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
8105 || TYPE_ALIGN (type
) > 128));
8107 /* In case we are passing structure, verify that it is consecutive block
8108 on the register save area. If not we need to do moves. */
8109 if (!need_temp
&& !REG_P (container
))
8111 /* Verify that all registers are strictly consecutive */
8112 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
8116 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
8118 rtx slot
= XVECEXP (container
, 0, i
);
8119 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
8120 || INTVAL (XEXP (slot
, 1)) != i
* 16)
8128 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
8130 rtx slot
= XVECEXP (container
, 0, i
);
8131 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
8132 || INTVAL (XEXP (slot
, 1)) != i
* 8)
8144 int_addr
= create_tmp_var (ptr_type_node
, "int_addr");
8145 sse_addr
= create_tmp_var (ptr_type_node
, "sse_addr");
8148 /* First ensure that we fit completely in registers. */
8151 t
= build_int_cst (TREE_TYPE (gpr
),
8152 (X86_64_REGPARM_MAX
- needed_intregs
+ 1) * 8);
8153 t
= build2 (GE_EXPR
, boolean_type_node
, gpr
, t
);
8154 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
8155 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
8156 gimplify_and_add (t
, pre_p
);
8160 t
= build_int_cst (TREE_TYPE (fpr
),
8161 (X86_64_SSE_REGPARM_MAX
- needed_sseregs
+ 1) * 16
8162 + X86_64_REGPARM_MAX
* 8);
8163 t
= build2 (GE_EXPR
, boolean_type_node
, fpr
, t
);
8164 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
8165 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
8166 gimplify_and_add (t
, pre_p
);
8169 /* Compute index to start of area used for integer regs. */
8172 /* int_addr = gpr + sav; */
8173 t
= fold_build_pointer_plus (sav
, gpr
);
8174 gimplify_assign (int_addr
, t
, pre_p
);
8178 /* sse_addr = fpr + sav; */
8179 t
= fold_build_pointer_plus (sav
, fpr
);
8180 gimplify_assign (sse_addr
, t
, pre_p
);
8184 int i
, prev_size
= 0;
8185 tree temp
= create_tmp_var (type
, "va_arg_tmp");
8188 t
= build1 (ADDR_EXPR
, build_pointer_type (type
), temp
);
8189 gimplify_assign (addr
, t
, pre_p
);
8191 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
8193 rtx slot
= XVECEXP (container
, 0, i
);
8194 rtx reg
= XEXP (slot
, 0);
8195 enum machine_mode mode
= GET_MODE (reg
);
8201 tree dest_addr
, dest
;
8202 int cur_size
= GET_MODE_SIZE (mode
);
8204 gcc_assert (prev_size
<= INTVAL (XEXP (slot
, 1)));
8205 prev_size
= INTVAL (XEXP (slot
, 1));
8206 if (prev_size
+ cur_size
> size
)
8208 cur_size
= size
- prev_size
;
8209 mode
= mode_for_size (cur_size
* BITS_PER_UNIT
, MODE_INT
, 1);
8210 if (mode
== BLKmode
)
8213 piece_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
8214 if (mode
== GET_MODE (reg
))
8215 addr_type
= build_pointer_type (piece_type
);
8217 addr_type
= build_pointer_type_for_mode (piece_type
, ptr_mode
,
8219 daddr_type
= build_pointer_type_for_mode (piece_type
, ptr_mode
,
8222 if (SSE_REGNO_P (REGNO (reg
)))
8224 src_addr
= sse_addr
;
8225 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
8229 src_addr
= int_addr
;
8230 src_offset
= REGNO (reg
) * 8;
8232 src_addr
= fold_convert (addr_type
, src_addr
);
8233 src_addr
= fold_build_pointer_plus_hwi (src_addr
, src_offset
);
8235 dest_addr
= fold_convert (daddr_type
, addr
);
8236 dest_addr
= fold_build_pointer_plus_hwi (dest_addr
, prev_size
);
8237 if (cur_size
== GET_MODE_SIZE (mode
))
8239 src
= build_va_arg_indirect_ref (src_addr
);
8240 dest
= build_va_arg_indirect_ref (dest_addr
);
8242 gimplify_assign (dest
, src
, pre_p
);
8247 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY
),
8248 3, dest_addr
, src_addr
,
8249 size_int (cur_size
));
8250 gimplify_and_add (copy
, pre_p
);
8252 prev_size
+= cur_size
;
8258 t
= build2 (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
8259 build_int_cst (TREE_TYPE (gpr
), needed_intregs
* 8));
8260 gimplify_assign (gpr
, t
, pre_p
);
8265 t
= build2 (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
8266 build_int_cst (TREE_TYPE (fpr
), needed_sseregs
* 16));
8267 gimplify_assign (fpr
, t
, pre_p
);
8270 gimple_seq_add_stmt (pre_p
, gimple_build_goto (lab_over
));
8272 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_false
));
8275 /* ... otherwise out of the overflow area. */
8277 /* When we align parameter on stack for caller, if the parameter
8278 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
8279 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
8280 here with caller. */
8281 arg_boundary
= ix86_function_arg_boundary (VOIDmode
, type
);
8282 if ((unsigned int) arg_boundary
> MAX_SUPPORTED_STACK_ALIGNMENT
)
8283 arg_boundary
= MAX_SUPPORTED_STACK_ALIGNMENT
;
8285 /* Care for on-stack alignment if needed. */
8286 if (arg_boundary
<= 64 || size
== 0)
8290 HOST_WIDE_INT align
= arg_boundary
/ 8;
8291 t
= fold_build_pointer_plus_hwi (ovf
, align
- 1);
8292 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
8293 build_int_cst (TREE_TYPE (t
), -align
));
8296 gimplify_expr (&t
, pre_p
, NULL
, is_gimple_val
, fb_rvalue
);
8297 gimplify_assign (addr
, t
, pre_p
);
8299 t
= fold_build_pointer_plus_hwi (t
, rsize
* UNITS_PER_WORD
);
8300 gimplify_assign (unshare_expr (ovf
), t
, pre_p
);
8303 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_over
));
8305 ptrtype
= build_pointer_type_for_mode (type
, ptr_mode
, true);
8306 addr
= fold_convert (ptrtype
, addr
);
8309 addr
= build_va_arg_indirect_ref (addr
);
8310 return build_va_arg_indirect_ref (addr
);
8313 /* Return true if OPNUM's MEM should be matched
8314 in movabs* patterns. */
8317 ix86_check_movabs (rtx insn
, int opnum
)
8321 set
= PATTERN (insn
);
8322 if (GET_CODE (set
) == PARALLEL
)
8323 set
= XVECEXP (set
, 0, 0);
8324 gcc_assert (GET_CODE (set
) == SET
);
8325 mem
= XEXP (set
, opnum
);
8326 while (GET_CODE (mem
) == SUBREG
)
8327 mem
= SUBREG_REG (mem
);
8328 gcc_assert (MEM_P (mem
));
8329 return volatile_ok
|| !MEM_VOLATILE_P (mem
);
8332 /* Initialize the table of extra 80387 mathematical constants. */
8335 init_ext_80387_constants (void)
8337 static const char * cst
[5] =
8339 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8340 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8341 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8342 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8343 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8347 for (i
= 0; i
< 5; i
++)
8349 real_from_string (&ext_80387_constants_table
[i
], cst
[i
]);
8350 /* Ensure each constant is rounded to XFmode precision. */
8351 real_convert (&ext_80387_constants_table
[i
],
8352 XFmode
, &ext_80387_constants_table
[i
]);
8355 ext_80387_constants_init
= 1;
8358 /* Return non-zero if the constant is something that
8359 can be loaded with a special instruction. */
8362 standard_80387_constant_p (rtx x
)
8364 enum machine_mode mode
= GET_MODE (x
);
8368 if (!(X87_FLOAT_MODE_P (mode
) && (GET_CODE (x
) == CONST_DOUBLE
)))
8371 if (x
== CONST0_RTX (mode
))
8373 if (x
== CONST1_RTX (mode
))
8376 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
8378 /* For XFmode constants, try to find a special 80387 instruction when
8379 optimizing for size or on those CPUs that benefit from them. */
8381 && (optimize_function_for_size_p (cfun
) || TARGET_EXT_80387_CONSTANTS
))
8385 if (! ext_80387_constants_init
)
8386 init_ext_80387_constants ();
8388 for (i
= 0; i
< 5; i
++)
8389 if (real_identical (&r
, &ext_80387_constants_table
[i
]))
8393 /* Load of the constant -0.0 or -1.0 will be split as
8394 fldz;fchs or fld1;fchs sequence. */
8395 if (real_isnegzero (&r
))
8397 if (real_identical (&r
, &dconstm1
))
8403 /* Return the opcode of the special instruction to be used to load
8407 standard_80387_constant_opcode (rtx x
)
8409 switch (standard_80387_constant_p (x
))
8433 /* Return the CONST_DOUBLE representing the 80387 constant that is
8434 loaded by the specified special instruction. The argument IDX
8435 matches the return value from standard_80387_constant_p. */
8438 standard_80387_constant_rtx (int idx
)
8442 if (! ext_80387_constants_init
)
8443 init_ext_80387_constants ();
8459 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table
[i
],
8463 /* Return 1 if X is all 0s and 2 if x is all 1s
8464 in supported SSE/AVX vector mode. */
8467 standard_sse_constant_p (rtx x
)
8469 enum machine_mode mode
= GET_MODE (x
);
8471 if (x
== const0_rtx
|| x
== CONST0_RTX (GET_MODE (x
)))
8473 if (vector_all_ones_operand (x
, mode
))
8495 /* Return the opcode of the special instruction to be used to load
8499 standard_sse_constant_opcode (rtx insn
, rtx x
)
8501 switch (standard_sse_constant_p (x
))
8504 switch (get_attr_mode (insn
))
8507 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
)
8508 return "%vpxor\t%0, %d0";
8510 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
)
8511 return "%vxorpd\t%0, %d0";
8513 return "%vxorps\t%0, %d0";
8516 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
)
8517 return "vpxor\t%x0, %x0, %x0";
8519 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
)
8520 return "vxorpd\t%x0, %x0, %x0";
8522 return "vxorps\t%x0, %x0, %x0";
8530 return "vpcmpeqd\t%0, %0, %0";
8532 return "pcmpeqd\t%0, %0";
8540 /* Returns true if OP contains a symbol reference */
8543 symbolic_reference_mentioned_p (rtx op
)
8548 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
8551 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
8552 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
8558 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
8559 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
8563 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
8570 /* Return true if it is appropriate to emit `ret' instructions in the
8571 body of a function. Do this only if the epilogue is simple, needing a
8572 couple of insns. Prior to reloading, we can't tell how many registers
8573 must be saved, so return false then. Return false if there is no frame
8574 marker to de-allocate. */
8577 ix86_can_use_return_insn_p (void)
8579 struct ix86_frame frame
;
8581 if (! reload_completed
|| frame_pointer_needed
)
8584 /* Don't allow more than 32k pop, since that's all we can do
8585 with one instruction. */
8586 if (crtl
->args
.pops_args
&& crtl
->args
.size
>= 32768)
8589 ix86_compute_frame_layout (&frame
);
8590 return (frame
.stack_pointer_offset
== UNITS_PER_WORD
8591 && (frame
.nregs
+ frame
.nsseregs
) == 0);
8594 /* Value should be nonzero if functions must have frame pointers.
8595 Zero means the frame pointer need not be set up (and parms may
8596 be accessed via the stack pointer) in functions that seem suitable. */
8599 ix86_frame_pointer_required (void)
8601 /* If we accessed previous frames, then the generated code expects
8602 to be able to access the saved ebp value in our frame. */
8603 if (cfun
->machine
->accesses_prev_frame
)
8606 /* Several x86 os'es need a frame pointer for other reasons,
8607 usually pertaining to setjmp. */
8608 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
8611 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
8612 if (TARGET_32BIT_MS_ABI
&& cfun
->calls_setjmp
)
8615 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
8616 turns off the frame pointer by default. Turn it back on now if
8617 we've not got a leaf function. */
8618 if (TARGET_OMIT_LEAF_FRAME_POINTER
8619 && (!current_function_is_leaf
8620 || ix86_current_function_calls_tls_descriptor
))
8623 if (crtl
->profile
&& !flag_fentry
)
8629 /* Record that the current function accesses previous call frames. */
8632 ix86_setup_frame_addresses (void)
8634 cfun
->machine
->accesses_prev_frame
= 1;
8637 #ifndef USE_HIDDEN_LINKONCE
8638 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
8639 # define USE_HIDDEN_LINKONCE 1
8641 # define USE_HIDDEN_LINKONCE 0
8645 static int pic_labels_used
;
8647 /* Fills in the label name that should be used for a pc thunk for
8648 the given register. */
8651 get_pc_thunk_name (char name
[32], unsigned int regno
)
8653 gcc_assert (!TARGET_64BIT
);
8655 if (USE_HIDDEN_LINKONCE
)
8656 sprintf (name
, "__x86.get_pc_thunk.%s", reg_names
[regno
]);
8658 ASM_GENERATE_INTERNAL_LABEL (name
, "LPR", regno
);
8662 /* This function generates code for -fpic that loads %ebx with
8663 the return address of the caller and then returns. */
8666 ix86_code_end (void)
8671 for (regno
= AX_REG
; regno
<= SP_REG
; regno
++)
8676 if (!(pic_labels_used
& (1 << regno
)))
8679 get_pc_thunk_name (name
, regno
);
8681 decl
= build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
8682 get_identifier (name
),
8683 build_function_type_list (void_type_node
, NULL_TREE
));
8684 DECL_RESULT (decl
) = build_decl (BUILTINS_LOCATION
, RESULT_DECL
,
8685 NULL_TREE
, void_type_node
);
8686 TREE_PUBLIC (decl
) = 1;
8687 TREE_STATIC (decl
) = 1;
8692 switch_to_section (darwin_sections
[text_coal_section
]);
8693 fputs ("\t.weak_definition\t", asm_out_file
);
8694 assemble_name (asm_out_file
, name
);
8695 fputs ("\n\t.private_extern\t", asm_out_file
);
8696 assemble_name (asm_out_file
, name
);
8697 putc ('\n', asm_out_file
);
8698 ASM_OUTPUT_LABEL (asm_out_file
, name
);
8699 DECL_WEAK (decl
) = 1;
8703 if (USE_HIDDEN_LINKONCE
)
8705 DECL_COMDAT_GROUP (decl
) = DECL_ASSEMBLER_NAME (decl
);
8707 targetm
.asm_out
.unique_section (decl
, 0);
8708 switch_to_section (get_named_section (decl
, NULL
, 0));
8710 targetm
.asm_out
.globalize_label (asm_out_file
, name
);
8711 fputs ("\t.hidden\t", asm_out_file
);
8712 assemble_name (asm_out_file
, name
);
8713 putc ('\n', asm_out_file
);
8714 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
8718 switch_to_section (text_section
);
8719 ASM_OUTPUT_LABEL (asm_out_file
, name
);
8722 DECL_INITIAL (decl
) = make_node (BLOCK
);
8723 current_function_decl
= decl
;
8724 init_function_start (decl
);
8725 first_function_block_is_cold
= false;
8726 /* Make sure unwind info is emitted for the thunk if needed. */
8727 final_start_function (emit_barrier (), asm_out_file
, 1);
8729 /* Pad stack IP move with 4 instructions (two NOPs count
8730 as one instruction). */
8731 if (TARGET_PAD_SHORT_FUNCTION
)
8736 fputs ("\tnop\n", asm_out_file
);
8739 xops
[0] = gen_rtx_REG (Pmode
, regno
);
8740 xops
[1] = gen_rtx_MEM (Pmode
, stack_pointer_rtx
);
8741 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops
);
8742 fputs ("\tret\n", asm_out_file
);
8743 final_end_function ();
8744 init_insn_lengths ();
8745 free_after_compilation (cfun
);
8747 current_function_decl
= NULL
;
8750 if (flag_split_stack
)
8751 file_end_indicate_split_stack ();
8754 /* Emit code for the SET_GOT patterns. */
8757 output_set_got (rtx dest
, rtx label ATTRIBUTE_UNUSED
)
8763 if (TARGET_VXWORKS_RTP
&& flag_pic
)
8765 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
8766 xops
[2] = gen_rtx_MEM (Pmode
,
8767 gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
));
8768 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
8770 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
8771 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
8772 an unadorned address. */
8773 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
8774 SYMBOL_REF_FLAGS (xops
[2]) |= SYMBOL_FLAG_LOCAL
;
8775 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops
);
8779 xops
[1] = gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
8783 xops
[2] = gen_rtx_LABEL_REF (Pmode
, label
? label
: gen_label_rtx ());
8785 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops
);
8788 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8789 is what will be referenced by the Mach-O PIC subsystem. */
8791 ASM_OUTPUT_LABEL (asm_out_file
, MACHOPIC_FUNCTION_BASE_NAME
);
8794 targetm
.asm_out
.internal_label (asm_out_file
, "L",
8795 CODE_LABEL_NUMBER (XEXP (xops
[2], 0)));
8800 get_pc_thunk_name (name
, REGNO (dest
));
8801 pic_labels_used
|= 1 << REGNO (dest
);
8803 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
8804 xops
[2] = gen_rtx_MEM (QImode
, xops
[2]);
8805 output_asm_insn ("call\t%X2", xops
);
8806 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8807 is what will be referenced by the Mach-O PIC subsystem. */
8810 ASM_OUTPUT_LABEL (asm_out_file
, MACHOPIC_FUNCTION_BASE_NAME
);
8812 targetm
.asm_out
.internal_label (asm_out_file
, "L",
8813 CODE_LABEL_NUMBER (label
));
8818 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops
);
8823 /* Generate an "push" pattern for input ARG. */
8828 struct machine_function
*m
= cfun
->machine
;
8830 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
8831 m
->fs
.cfa_offset
+= UNITS_PER_WORD
;
8832 m
->fs
.sp_offset
+= UNITS_PER_WORD
;
8834 return gen_rtx_SET (VOIDmode
,
8836 gen_rtx_PRE_DEC (Pmode
,
8837 stack_pointer_rtx
)),
8841 /* Generate an "pop" pattern for input ARG. */
8846 return gen_rtx_SET (VOIDmode
,
8849 gen_rtx_POST_INC (Pmode
,
8850 stack_pointer_rtx
)));
8853 /* Return >= 0 if there is an unused call-clobbered register available
8854 for the entire function. */
8857 ix86_select_alt_pic_regnum (void)
8859 if (current_function_is_leaf
8861 && !ix86_current_function_calls_tls_descriptor
)
8864 /* Can't use the same register for both PIC and DRAP. */
8866 drap
= REGNO (crtl
->drap_reg
);
8869 for (i
= 2; i
>= 0; --i
)
8870 if (i
!= drap
&& !df_regs_ever_live_p (i
))
8874 return INVALID_REGNUM
;
8877 /* Return TRUE if we need to save REGNO. */
8880 ix86_save_reg (unsigned int regno
, bool maybe_eh_return
)
8882 if (pic_offset_table_rtx
8883 && regno
== REAL_PIC_OFFSET_TABLE_REGNUM
8884 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM
)
8886 || crtl
->calls_eh_return
8887 || crtl
->uses_const_pool
))
8888 return ix86_select_alt_pic_regnum () == INVALID_REGNUM
;
8890 if (crtl
->calls_eh_return
&& maybe_eh_return
)
8895 unsigned test
= EH_RETURN_DATA_REGNO (i
);
8896 if (test
== INVALID_REGNUM
)
8903 if (crtl
->drap_reg
&& regno
== REGNO (crtl
->drap_reg
))
8906 return (df_regs_ever_live_p (regno
)
8907 && !call_used_regs
[regno
]
8908 && !fixed_regs
[regno
]
8909 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
8912 /* Return number of saved general prupose registers. */
8915 ix86_nsaved_regs (void)
8920 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
8921 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
8926 /* Return number of saved SSE registrers. */
8929 ix86_nsaved_sseregs (void)
8934 if (!TARGET_64BIT_MS_ABI
)
8936 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
8937 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
8942 /* Given FROM and TO register numbers, say whether this elimination is
8943 allowed. If stack alignment is needed, we can only replace argument
8944 pointer with hard frame pointer, or replace frame pointer with stack
8945 pointer. Otherwise, frame pointer elimination is automatically
8946 handled and all other eliminations are valid. */
8949 ix86_can_eliminate (const int from
, const int to
)
8951 if (stack_realign_fp
)
8952 return ((from
== ARG_POINTER_REGNUM
8953 && to
== HARD_FRAME_POINTER_REGNUM
)
8954 || (from
== FRAME_POINTER_REGNUM
8955 && to
== STACK_POINTER_REGNUM
));
8957 return to
== STACK_POINTER_REGNUM
? !frame_pointer_needed
: true;
8960 /* Return the offset between two registers, one to be eliminated, and the other
8961 its replacement, at the start of a routine. */
8964 ix86_initial_elimination_offset (int from
, int to
)
8966 struct ix86_frame frame
;
8967 ix86_compute_frame_layout (&frame
);
8969 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
8970 return frame
.hard_frame_pointer_offset
;
8971 else if (from
== FRAME_POINTER_REGNUM
8972 && to
== HARD_FRAME_POINTER_REGNUM
)
8973 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
8976 gcc_assert (to
== STACK_POINTER_REGNUM
);
8978 if (from
== ARG_POINTER_REGNUM
)
8979 return frame
.stack_pointer_offset
;
8981 gcc_assert (from
== FRAME_POINTER_REGNUM
);
8982 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
8986 /* In a dynamically-aligned function, we can't know the offset from
8987 stack pointer to frame pointer, so we must ensure that setjmp
8988 eliminates fp against the hard fp (%ebp) rather than trying to
8989 index from %esp up to the top of the frame across a gap that is
8990 of unknown (at compile-time) size. */
8992 ix86_builtin_setjmp_frame_value (void)
8994 return stack_realign_fp
? hard_frame_pointer_rtx
: virtual_stack_vars_rtx
;
8997 /* When using -fsplit-stack, the allocation routines set a field in
8998 the TCB to the bottom of the stack plus this much space, measured
9001 #define SPLIT_STACK_AVAILABLE 256
9003 /* Fill structure ix86_frame about frame of currently computed function. */
9006 ix86_compute_frame_layout (struct ix86_frame
*frame
)
9008 unsigned int stack_alignment_needed
;
9009 HOST_WIDE_INT offset
;
9010 unsigned int preferred_alignment
;
9011 HOST_WIDE_INT size
= get_frame_size ();
9012 HOST_WIDE_INT to_allocate
;
9014 frame
->nregs
= ix86_nsaved_regs ();
9015 frame
->nsseregs
= ix86_nsaved_sseregs ();
9017 stack_alignment_needed
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
9018 preferred_alignment
= crtl
->preferred_stack_boundary
/ BITS_PER_UNIT
;
9020 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
9021 function prologues and leaf. */
9022 if ((TARGET_64BIT_MS_ABI
&& preferred_alignment
< 16)
9023 && (!current_function_is_leaf
|| cfun
->calls_alloca
!= 0
9024 || ix86_current_function_calls_tls_descriptor
))
9026 preferred_alignment
= 16;
9027 stack_alignment_needed
= 16;
9028 crtl
->preferred_stack_boundary
= 128;
9029 crtl
->stack_alignment_needed
= 128;
9032 gcc_assert (!size
|| stack_alignment_needed
);
9033 gcc_assert (preferred_alignment
>= STACK_BOUNDARY
/ BITS_PER_UNIT
);
9034 gcc_assert (preferred_alignment
<= stack_alignment_needed
);
9036 /* For SEH we have to limit the amount of code movement into the prologue.
9037 At present we do this via a BLOCKAGE, at which point there's very little
9038 scheduling that can be done, which means that there's very little point
9039 in doing anything except PUSHs. */
9041 cfun
->machine
->use_fast_prologue_epilogue
= false;
9043 /* During reload iteration the amount of registers saved can change.
9044 Recompute the value as needed. Do not recompute when amount of registers
9045 didn't change as reload does multiple calls to the function and does not
9046 expect the decision to change within single iteration. */
9047 else if (!optimize_function_for_size_p (cfun
)
9048 && cfun
->machine
->use_fast_prologue_epilogue_nregs
!= frame
->nregs
)
9050 int count
= frame
->nregs
;
9051 struct cgraph_node
*node
= cgraph_get_node (current_function_decl
);
9053 cfun
->machine
->use_fast_prologue_epilogue_nregs
= count
;
9055 /* The fast prologue uses move instead of push to save registers. This
9056 is significantly longer, but also executes faster as modern hardware
9057 can execute the moves in parallel, but can't do that for push/pop.
9059 Be careful about choosing what prologue to emit: When function takes
9060 many instructions to execute we may use slow version as well as in
9061 case function is known to be outside hot spot (this is known with
9062 feedback only). Weight the size of function by number of registers
9063 to save as it is cheap to use one or two push instructions but very
9064 slow to use many of them. */
9066 count
= (count
- 1) * FAST_PROLOGUE_INSN_COUNT
;
9067 if (node
->frequency
< NODE_FREQUENCY_NORMAL
9068 || (flag_branch_probabilities
9069 && node
->frequency
< NODE_FREQUENCY_HOT
))
9070 cfun
->machine
->use_fast_prologue_epilogue
= false;
9072 cfun
->machine
->use_fast_prologue_epilogue
9073 = !expensive_function_p (count
);
9076 frame
->save_regs_using_mov
9077 = (TARGET_PROLOGUE_USING_MOVE
&& cfun
->machine
->use_fast_prologue_epilogue
9078 /* If static stack checking is enabled and done with probes,
9079 the registers need to be saved before allocating the frame. */
9080 && flag_stack_check
!= STATIC_BUILTIN_STACK_CHECK
);
9082 /* Skip return address. */
9083 offset
= UNITS_PER_WORD
;
9085 /* Skip pushed static chain. */
9086 if (ix86_static_chain_on_stack
)
9087 offset
+= UNITS_PER_WORD
;
9089 /* Skip saved base pointer. */
9090 if (frame_pointer_needed
)
9091 offset
+= UNITS_PER_WORD
;
9092 frame
->hfp_save_offset
= offset
;
9094 /* The traditional frame pointer location is at the top of the frame. */
9095 frame
->hard_frame_pointer_offset
= offset
;
9097 /* Register save area */
9098 offset
+= frame
->nregs
* UNITS_PER_WORD
;
9099 frame
->reg_save_offset
= offset
;
9101 /* Align and set SSE register save area. */
9102 if (frame
->nsseregs
)
9104 /* The only ABI that has saved SSE registers (Win64) also has a
9105 16-byte aligned default stack, and thus we don't need to be
9106 within the re-aligned local stack frame to save them. */
9107 gcc_assert (INCOMING_STACK_BOUNDARY
>= 128);
9108 offset
= (offset
+ 16 - 1) & -16;
9109 offset
+= frame
->nsseregs
* 16;
9111 frame
->sse_reg_save_offset
= offset
;
9113 /* The re-aligned stack starts here. Values before this point are not
9114 directly comparable with values below this point. In order to make
9115 sure that no value happens to be the same before and after, force
9116 the alignment computation below to add a non-zero value. */
9117 if (stack_realign_fp
)
9118 offset
= (offset
+ stack_alignment_needed
) & -stack_alignment_needed
;
9121 frame
->va_arg_size
= ix86_varargs_gpr_size
+ ix86_varargs_fpr_size
;
9122 offset
+= frame
->va_arg_size
;
9124 /* Align start of frame for local function. */
9125 if (stack_realign_fp
9126 || offset
!= frame
->sse_reg_save_offset
9128 || !current_function_is_leaf
9129 || cfun
->calls_alloca
9130 || ix86_current_function_calls_tls_descriptor
)
9131 offset
= (offset
+ stack_alignment_needed
- 1) & -stack_alignment_needed
;
9133 /* Frame pointer points here. */
9134 frame
->frame_pointer_offset
= offset
;
9138 /* Add outgoing arguments area. Can be skipped if we eliminated
9139 all the function calls as dead code.
9140 Skipping is however impossible when function calls alloca. Alloca
9141 expander assumes that last crtl->outgoing_args_size
9142 of stack frame are unused. */
9143 if (ACCUMULATE_OUTGOING_ARGS
9144 && (!current_function_is_leaf
|| cfun
->calls_alloca
9145 || ix86_current_function_calls_tls_descriptor
))
9147 offset
+= crtl
->outgoing_args_size
;
9148 frame
->outgoing_arguments_size
= crtl
->outgoing_args_size
;
9151 frame
->outgoing_arguments_size
= 0;
9153 /* Align stack boundary. Only needed if we're calling another function
9155 if (!current_function_is_leaf
|| cfun
->calls_alloca
9156 || ix86_current_function_calls_tls_descriptor
)
9157 offset
= (offset
+ preferred_alignment
- 1) & -preferred_alignment
;
9159 /* We've reached end of stack frame. */
9160 frame
->stack_pointer_offset
= offset
;
9162 /* Size prologue needs to allocate. */
9163 to_allocate
= offset
- frame
->sse_reg_save_offset
;
9165 if ((!to_allocate
&& frame
->nregs
<= 1)
9166 || (TARGET_64BIT
&& to_allocate
>= (HOST_WIDE_INT
) 0x80000000))
9167 frame
->save_regs_using_mov
= false;
9169 if (ix86_using_red_zone ()
9170 && current_function_sp_is_unchanging
9171 && current_function_is_leaf
9172 && !ix86_current_function_calls_tls_descriptor
)
9174 frame
->red_zone_size
= to_allocate
;
9175 if (frame
->save_regs_using_mov
)
9176 frame
->red_zone_size
+= frame
->nregs
* UNITS_PER_WORD
;
9177 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
9178 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
9181 frame
->red_zone_size
= 0;
9182 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
9184 /* The SEH frame pointer location is near the bottom of the frame.
9185 This is enforced by the fact that the difference between the
9186 stack pointer and the frame pointer is limited to 240 bytes in
9187 the unwind data structure. */
9192 /* If we can leave the frame pointer where it is, do so. */
9193 diff
= frame
->stack_pointer_offset
- frame
->hard_frame_pointer_offset
;
9194 if (diff
> 240 || (diff
& 15) != 0)
9196 /* Ideally we'd determine what portion of the local stack frame
9197 (within the constraint of the lowest 240) is most heavily used.
9198 But without that complication, simply bias the frame pointer
9199 by 128 bytes so as to maximize the amount of the local stack
9200 frame that is addressable with 8-bit offsets. */
9201 frame
->hard_frame_pointer_offset
= frame
->stack_pointer_offset
- 128;
9206 /* This is semi-inlined memory_address_length, but simplified
9207 since we know that we're always dealing with reg+offset, and
9208 to avoid having to create and discard all that rtl. */
9211 choose_baseaddr_len (unsigned int regno
, HOST_WIDE_INT offset
)
9217 /* EBP and R13 cannot be encoded without an offset. */
9218 len
= (regno
== BP_REG
|| regno
== R13_REG
);
9220 else if (IN_RANGE (offset
, -128, 127))
9223 /* ESP and R12 must be encoded with a SIB byte. */
9224 if (regno
== SP_REG
|| regno
== R12_REG
)
9230 /* Return an RTX that points to CFA_OFFSET within the stack frame.
9231 The valid base registers are taken from CFUN->MACHINE->FS. */
9234 choose_baseaddr (HOST_WIDE_INT cfa_offset
)
9236 const struct machine_function
*m
= cfun
->machine
;
9237 rtx base_reg
= NULL
;
9238 HOST_WIDE_INT base_offset
= 0;
9240 if (m
->use_fast_prologue_epilogue
)
9242 /* Choose the base register most likely to allow the most scheduling
9243 opportunities. Generally FP is valid througout the function,
9244 while DRAP must be reloaded within the epilogue. But choose either
9245 over the SP due to increased encoding size. */
9249 base_reg
= hard_frame_pointer_rtx
;
9250 base_offset
= m
->fs
.fp_offset
- cfa_offset
;
9252 else if (m
->fs
.drap_valid
)
9254 base_reg
= crtl
->drap_reg
;
9255 base_offset
= 0 - cfa_offset
;
9257 else if (m
->fs
.sp_valid
)
9259 base_reg
= stack_pointer_rtx
;
9260 base_offset
= m
->fs
.sp_offset
- cfa_offset
;
9265 HOST_WIDE_INT toffset
;
9268 /* Choose the base register with the smallest address encoding.
9269 With a tie, choose FP > DRAP > SP. */
9272 base_reg
= stack_pointer_rtx
;
9273 base_offset
= m
->fs
.sp_offset
- cfa_offset
;
9274 len
= choose_baseaddr_len (STACK_POINTER_REGNUM
, base_offset
);
9276 if (m
->fs
.drap_valid
)
9278 toffset
= 0 - cfa_offset
;
9279 tlen
= choose_baseaddr_len (REGNO (crtl
->drap_reg
), toffset
);
9282 base_reg
= crtl
->drap_reg
;
9283 base_offset
= toffset
;
9289 toffset
= m
->fs
.fp_offset
- cfa_offset
;
9290 tlen
= choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM
, toffset
);
9293 base_reg
= hard_frame_pointer_rtx
;
9294 base_offset
= toffset
;
9299 gcc_assert (base_reg
!= NULL
);
9301 return plus_constant (base_reg
, base_offset
);
9304 /* Emit code to save registers in the prologue. */
9307 ix86_emit_save_regs (void)
9312 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
-- > 0; )
9313 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9315 insn
= emit_insn (gen_push (gen_rtx_REG (Pmode
, regno
)));
9316 RTX_FRAME_RELATED_P (insn
) = 1;
9320 /* Emit a single register save at CFA - CFA_OFFSET. */
9323 ix86_emit_save_reg_using_mov (enum machine_mode mode
, unsigned int regno
,
9324 HOST_WIDE_INT cfa_offset
)
9326 struct machine_function
*m
= cfun
->machine
;
9327 rtx reg
= gen_rtx_REG (mode
, regno
);
9328 rtx mem
, addr
, base
, insn
;
9330 addr
= choose_baseaddr (cfa_offset
);
9331 mem
= gen_frame_mem (mode
, addr
);
9333 /* For SSE saves, we need to indicate the 128-bit alignment. */
9334 set_mem_align (mem
, GET_MODE_ALIGNMENT (mode
));
9336 insn
= emit_move_insn (mem
, reg
);
9337 RTX_FRAME_RELATED_P (insn
) = 1;
9340 if (GET_CODE (base
) == PLUS
)
9341 base
= XEXP (base
, 0);
9342 gcc_checking_assert (REG_P (base
));
9344 /* When saving registers into a re-aligned local stack frame, avoid
9345 any tricky guessing by dwarf2out. */
9346 if (m
->fs
.realigned
)
9348 gcc_checking_assert (stack_realign_drap
);
9350 if (regno
== REGNO (crtl
->drap_reg
))
9352 /* A bit of a hack. We force the DRAP register to be saved in
9353 the re-aligned stack frame, which provides us with a copy
9354 of the CFA that will last past the prologue. Install it. */
9355 gcc_checking_assert (cfun
->machine
->fs
.fp_valid
);
9356 addr
= plus_constant (hard_frame_pointer_rtx
,
9357 cfun
->machine
->fs
.fp_offset
- cfa_offset
);
9358 mem
= gen_rtx_MEM (mode
, addr
);
9359 add_reg_note (insn
, REG_CFA_DEF_CFA
, mem
);
9363 /* The frame pointer is a stable reference within the
9364 aligned frame. Use it. */
9365 gcc_checking_assert (cfun
->machine
->fs
.fp_valid
);
9366 addr
= plus_constant (hard_frame_pointer_rtx
,
9367 cfun
->machine
->fs
.fp_offset
- cfa_offset
);
9368 mem
= gen_rtx_MEM (mode
, addr
);
9369 add_reg_note (insn
, REG_CFA_EXPRESSION
,
9370 gen_rtx_SET (VOIDmode
, mem
, reg
));
9374 /* The memory may not be relative to the current CFA register,
9375 which means that we may need to generate a new pattern for
9376 use by the unwind info. */
9377 else if (base
!= m
->fs
.cfa_reg
)
9379 addr
= plus_constant (m
->fs
.cfa_reg
, m
->fs
.cfa_offset
- cfa_offset
);
9380 mem
= gen_rtx_MEM (mode
, addr
);
9381 add_reg_note (insn
, REG_CFA_OFFSET
, gen_rtx_SET (VOIDmode
, mem
, reg
));
9385 /* Emit code to save registers using MOV insns.
9386 First register is stored at CFA - CFA_OFFSET. */
9388 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset
)
9392 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9393 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9395 ix86_emit_save_reg_using_mov (Pmode
, regno
, cfa_offset
);
9396 cfa_offset
-= UNITS_PER_WORD
;
9400 /* Emit code to save SSE registers using MOV insns.
9401 First register is stored at CFA - CFA_OFFSET. */
9403 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset
)
9407 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9408 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9410 ix86_emit_save_reg_using_mov (V4SFmode
, regno
, cfa_offset
);
9415 static GTY(()) rtx queued_cfa_restores
;
9417 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9418 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9419 Don't add the note if the previously saved value will be left untouched
9420 within stack red-zone till return, as unwinders can find the same value
9421 in the register and on the stack. */
9424 ix86_add_cfa_restore_note (rtx insn
, rtx reg
, HOST_WIDE_INT cfa_offset
)
9426 if (!crtl
->shrink_wrapped
9427 && cfa_offset
<= cfun
->machine
->fs
.red_zone_offset
)
9432 add_reg_note (insn
, REG_CFA_RESTORE
, reg
);
9433 RTX_FRAME_RELATED_P (insn
) = 1;
9437 = alloc_reg_note (REG_CFA_RESTORE
, reg
, queued_cfa_restores
);
9440 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9443 ix86_add_queued_cfa_restore_notes (rtx insn
)
9446 if (!queued_cfa_restores
)
9448 for (last
= queued_cfa_restores
; XEXP (last
, 1); last
= XEXP (last
, 1))
9450 XEXP (last
, 1) = REG_NOTES (insn
);
9451 REG_NOTES (insn
) = queued_cfa_restores
;
9452 queued_cfa_restores
= NULL_RTX
;
9453 RTX_FRAME_RELATED_P (insn
) = 1;
9456 /* Expand prologue or epilogue stack adjustment.
9457 The pattern exist to put a dependency on all ebp-based memory accesses.
9458 STYLE should be negative if instructions should be marked as frame related,
9459 zero if %r11 register is live and cannot be freely used and positive
9463 pro_epilogue_adjust_stack (rtx dest
, rtx src
, rtx offset
,
9464 int style
, bool set_cfa
)
9466 struct machine_function
*m
= cfun
->machine
;
9468 bool add_frame_related_expr
= false;
9471 insn
= gen_pro_epilogue_adjust_stack_si_add (dest
, src
, offset
);
9472 else if (x86_64_immediate_operand (offset
, DImode
))
9473 insn
= gen_pro_epilogue_adjust_stack_di_add (dest
, src
, offset
);
9477 /* r11 is used by indirect sibcall return as well, set before the
9478 epilogue and used after the epilogue. */
9480 tmp
= gen_rtx_REG (DImode
, R11_REG
);
9483 gcc_assert (src
!= hard_frame_pointer_rtx
9484 && dest
!= hard_frame_pointer_rtx
);
9485 tmp
= hard_frame_pointer_rtx
;
9487 insn
= emit_insn (gen_rtx_SET (DImode
, tmp
, offset
));
9489 add_frame_related_expr
= true;
9491 insn
= gen_pro_epilogue_adjust_stack_di_add (dest
, src
, tmp
);
9494 insn
= emit_insn (insn
);
9496 ix86_add_queued_cfa_restore_notes (insn
);
9502 gcc_assert (m
->fs
.cfa_reg
== src
);
9503 m
->fs
.cfa_offset
+= INTVAL (offset
);
9504 m
->fs
.cfa_reg
= dest
;
9506 r
= gen_rtx_PLUS (Pmode
, src
, offset
);
9507 r
= gen_rtx_SET (VOIDmode
, dest
, r
);
9508 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, r
);
9509 RTX_FRAME_RELATED_P (insn
) = 1;
9513 RTX_FRAME_RELATED_P (insn
) = 1;
9514 if (add_frame_related_expr
)
9516 rtx r
= gen_rtx_PLUS (Pmode
, src
, offset
);
9517 r
= gen_rtx_SET (VOIDmode
, dest
, r
);
9518 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, r
);
9522 if (dest
== stack_pointer_rtx
)
9524 HOST_WIDE_INT ooffset
= m
->fs
.sp_offset
;
9525 bool valid
= m
->fs
.sp_valid
;
9527 if (src
== hard_frame_pointer_rtx
)
9529 valid
= m
->fs
.fp_valid
;
9530 ooffset
= m
->fs
.fp_offset
;
9532 else if (src
== crtl
->drap_reg
)
9534 valid
= m
->fs
.drap_valid
;
9539 /* Else there are two possibilities: SP itself, which we set
9540 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
9541 taken care of this by hand along the eh_return path. */
9542 gcc_checking_assert (src
== stack_pointer_rtx
9543 || offset
== const0_rtx
);
9546 m
->fs
.sp_offset
= ooffset
- INTVAL (offset
);
9547 m
->fs
.sp_valid
= valid
;
9551 /* Find an available register to be used as dynamic realign argument
9552 pointer regsiter. Such a register will be written in prologue and
9553 used in begin of body, so it must not be
9554 1. parameter passing register.
9556 We reuse static-chain register if it is available. Otherwise, we
9557 use DI for i386 and R13 for x86-64. We chose R13 since it has
9560 Return: the regno of chosen register. */
9563 find_drap_reg (void)
9565 tree decl
= cfun
->decl
;
9569 /* Use R13 for nested function or function need static chain.
9570 Since function with tail call may use any caller-saved
9571 registers in epilogue, DRAP must not use caller-saved
9572 register in such case. */
9573 if (DECL_STATIC_CHAIN (decl
) || crtl
->tail_call_emit
)
9580 /* Use DI for nested function or function need static chain.
9581 Since function with tail call may use any caller-saved
9582 registers in epilogue, DRAP must not use caller-saved
9583 register in such case. */
9584 if (DECL_STATIC_CHAIN (decl
) || crtl
->tail_call_emit
)
9587 /* Reuse static chain register if it isn't used for parameter
9589 if (ix86_function_regparm (TREE_TYPE (decl
), decl
) <= 2)
9591 unsigned int ccvt
= ix86_get_callcvt (TREE_TYPE (decl
));
9592 if ((ccvt
& (IX86_CALLCVT_FASTCALL
| IX86_CALLCVT_THISCALL
)) == 0)
9599 /* Return minimum incoming stack alignment. */
9602 ix86_minimum_incoming_stack_boundary (bool sibcall
)
9604 unsigned int incoming_stack_boundary
;
9606 /* Prefer the one specified at command line. */
9607 if (ix86_user_incoming_stack_boundary
)
9608 incoming_stack_boundary
= ix86_user_incoming_stack_boundary
;
9609 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
9610 if -mstackrealign is used, it isn't used for sibcall check and
9611 estimated stack alignment is 128bit. */
9614 && ix86_force_align_arg_pointer
9615 && crtl
->stack_alignment_estimated
== 128)
9616 incoming_stack_boundary
= MIN_STACK_BOUNDARY
;
9618 incoming_stack_boundary
= ix86_default_incoming_stack_boundary
;
9620 /* Incoming stack alignment can be changed on individual functions
9621 via force_align_arg_pointer attribute. We use the smallest
9622 incoming stack boundary. */
9623 if (incoming_stack_boundary
> MIN_STACK_BOUNDARY
9624 && lookup_attribute (ix86_force_align_arg_pointer_string
,
9625 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))))
9626 incoming_stack_boundary
= MIN_STACK_BOUNDARY
;
9628 /* The incoming stack frame has to be aligned at least at
9629 parm_stack_boundary. */
9630 if (incoming_stack_boundary
< crtl
->parm_stack_boundary
)
9631 incoming_stack_boundary
= crtl
->parm_stack_boundary
;
9633 /* Stack at entrance of main is aligned by runtime. We use the
9634 smallest incoming stack boundary. */
9635 if (incoming_stack_boundary
> MAIN_STACK_BOUNDARY
9636 && DECL_NAME (current_function_decl
)
9637 && MAIN_NAME_P (DECL_NAME (current_function_decl
))
9638 && DECL_FILE_SCOPE_P (current_function_decl
))
9639 incoming_stack_boundary
= MAIN_STACK_BOUNDARY
;
9641 return incoming_stack_boundary
;
9644 /* Update incoming stack boundary and estimated stack alignment. */
9647 ix86_update_stack_boundary (void)
9649 ix86_incoming_stack_boundary
9650 = ix86_minimum_incoming_stack_boundary (false);
9652 /* x86_64 vararg needs 16byte stack alignment for register save
9656 && crtl
->stack_alignment_estimated
< 128)
9657 crtl
->stack_alignment_estimated
= 128;
9660 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
9661 needed or an rtx for DRAP otherwise. */
9664 ix86_get_drap_rtx (void)
9666 if (ix86_force_drap
|| !ACCUMULATE_OUTGOING_ARGS
)
9667 crtl
->need_drap
= true;
9669 if (stack_realign_drap
)
9671 /* Assign DRAP to vDRAP and returns vDRAP */
9672 unsigned int regno
= find_drap_reg ();
9677 arg_ptr
= gen_rtx_REG (Pmode
, regno
);
9678 crtl
->drap_reg
= arg_ptr
;
9681 drap_vreg
= copy_to_reg (arg_ptr
);
9685 insn
= emit_insn_before (seq
, NEXT_INSN (entry_of_function ()));
9688 add_reg_note (insn
, REG_CFA_SET_VDRAP
, drap_vreg
);
9689 RTX_FRAME_RELATED_P (insn
) = 1;
9697 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
9700 ix86_internal_arg_pointer (void)
9702 return virtual_incoming_args_rtx
;
9705 struct scratch_reg
{
9710 /* Return a short-lived scratch register for use on function entry.
9711 In 32-bit mode, it is valid only after the registers are saved
9712 in the prologue. This register must be released by means of
9713 release_scratch_register_on_entry once it is dead. */
9716 get_scratch_register_on_entry (struct scratch_reg
*sr
)
9724 /* We always use R11 in 64-bit mode. */
9729 tree decl
= current_function_decl
, fntype
= TREE_TYPE (decl
);
9731 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype
)) != NULL_TREE
;
9732 bool static_chain_p
= DECL_STATIC_CHAIN (decl
);
9733 int regparm
= ix86_function_regparm (fntype
, decl
);
9735 = crtl
->drap_reg
? REGNO (crtl
->drap_reg
) : INVALID_REGNUM
;
9737 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
9738 for the static chain register. */
9739 if ((regparm
< 1 || (fastcall_p
&& !static_chain_p
))
9740 && drap_regno
!= AX_REG
)
9742 else if (regparm
< 2 && drap_regno
!= DX_REG
)
9744 /* ecx is the static chain register. */
9745 else if (regparm
< 3 && !fastcall_p
&& !static_chain_p
9746 && drap_regno
!= CX_REG
)
9748 else if (ix86_save_reg (BX_REG
, true))
9750 /* esi is the static chain register. */
9751 else if (!(regparm
== 3 && static_chain_p
)
9752 && ix86_save_reg (SI_REG
, true))
9754 else if (ix86_save_reg (DI_REG
, true))
9758 regno
= (drap_regno
== AX_REG
? DX_REG
: AX_REG
);
9763 sr
->reg
= gen_rtx_REG (Pmode
, regno
);
9766 rtx insn
= emit_insn (gen_push (sr
->reg
));
9767 RTX_FRAME_RELATED_P (insn
) = 1;
9771 /* Release a scratch register obtained from the preceding function. */
9774 release_scratch_register_on_entry (struct scratch_reg
*sr
)
9778 rtx x
, insn
= emit_insn (gen_pop (sr
->reg
));
9780 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
9781 RTX_FRAME_RELATED_P (insn
) = 1;
9782 x
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, GEN_INT (UNITS_PER_WORD
));
9783 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
9784 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
9788 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
9790 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
9793 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size
)
9795 /* We skip the probe for the first interval + a small dope of 4 words and
9796 probe that many bytes past the specified size to maintain a protection
9797 area at the botton of the stack. */
9798 const int dope
= 4 * UNITS_PER_WORD
;
9799 rtx size_rtx
= GEN_INT (size
), last
;
9801 /* See if we have a constant small number of probes to generate. If so,
9802 that's the easy case. The run-time loop is made up of 11 insns in the
9803 generic case while the compile-time loop is made up of 3+2*(n-1) insns
9804 for n # of intervals. */
9805 if (size
<= 5 * PROBE_INTERVAL
)
9807 HOST_WIDE_INT i
, adjust
;
9808 bool first_probe
= true;
9810 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
9811 values of N from 1 until it exceeds SIZE. If only one probe is
9812 needed, this will not generate any code. Then adjust and probe
9813 to PROBE_INTERVAL + SIZE. */
9814 for (i
= PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
9818 adjust
= 2 * PROBE_INTERVAL
+ dope
;
9819 first_probe
= false;
9822 adjust
= PROBE_INTERVAL
;
9824 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9825 plus_constant (stack_pointer_rtx
, -adjust
)));
9826 emit_stack_probe (stack_pointer_rtx
);
9830 adjust
= size
+ PROBE_INTERVAL
+ dope
;
9832 adjust
= size
+ PROBE_INTERVAL
- i
;
9834 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9835 plus_constant (stack_pointer_rtx
, -adjust
)));
9836 emit_stack_probe (stack_pointer_rtx
);
9838 /* Adjust back to account for the additional first interval. */
9839 last
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9840 plus_constant (stack_pointer_rtx
,
9841 PROBE_INTERVAL
+ dope
)));
9844 /* Otherwise, do the same as above, but in a loop. Note that we must be
9845 extra careful with variables wrapping around because we might be at
9846 the very top (or the very bottom) of the address space and we have
9847 to be able to handle this case properly; in particular, we use an
9848 equality test for the loop condition. */
9851 HOST_WIDE_INT rounded_size
;
9852 struct scratch_reg sr
;
9854 get_scratch_register_on_entry (&sr
);
9857 /* Step 1: round SIZE to the previous multiple of the interval. */
9859 rounded_size
= size
& -PROBE_INTERVAL
;
9862 /* Step 2: compute initial and final value of the loop counter. */
9864 /* SP = SP_0 + PROBE_INTERVAL. */
9865 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9866 plus_constant (stack_pointer_rtx
,
9867 - (PROBE_INTERVAL
+ dope
))));
9869 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
9870 emit_move_insn (sr
.reg
, GEN_INT (-rounded_size
));
9871 emit_insn (gen_rtx_SET (VOIDmode
, sr
.reg
,
9872 gen_rtx_PLUS (Pmode
, sr
.reg
,
9873 stack_pointer_rtx
)));
9878 while (SP != LAST_ADDR)
9880 SP = SP + PROBE_INTERVAL
9884 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
9885 values of N from 1 until it is equal to ROUNDED_SIZE. */
9887 emit_insn (ix86_gen_adjust_stack_and_probe (sr
.reg
, sr
.reg
, size_rtx
));
9890 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
9891 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
9893 if (size
!= rounded_size
)
9895 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9896 plus_constant (stack_pointer_rtx
,
9897 rounded_size
- size
)));
9898 emit_stack_probe (stack_pointer_rtx
);
9901 /* Adjust back to account for the additional first interval. */
9902 last
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9903 plus_constant (stack_pointer_rtx
,
9904 PROBE_INTERVAL
+ dope
)));
9906 release_scratch_register_on_entry (&sr
);
9909 gcc_assert (cfun
->machine
->fs
.cfa_reg
!= stack_pointer_rtx
);
9911 /* Even if the stack pointer isn't the CFA register, we need to correctly
9912 describe the adjustments made to it, in particular differentiate the
9913 frame-related ones from the frame-unrelated ones. */
9916 rtx expr
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (2));
9917 XVECEXP (expr
, 0, 0)
9918 = gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9919 plus_constant (stack_pointer_rtx
, -size
));
9920 XVECEXP (expr
, 0, 1)
9921 = gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9922 plus_constant (stack_pointer_rtx
,
9923 PROBE_INTERVAL
+ dope
+ size
));
9924 add_reg_note (last
, REG_FRAME_RELATED_EXPR
, expr
);
9925 RTX_FRAME_RELATED_P (last
) = 1;
9927 cfun
->machine
->fs
.sp_offset
+= size
;
9930 /* Make sure nothing is scheduled before we are done. */
9931 emit_insn (gen_blockage ());
9934 /* Adjust the stack pointer up to REG while probing it. */
9937 output_adjust_stack_and_probe (rtx reg
)
9939 static int labelno
= 0;
9940 char loop_lab
[32], end_lab
[32];
9943 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
);
9944 ASM_GENERATE_INTERNAL_LABEL (end_lab
, "LPSRE", labelno
++);
9946 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
9948 /* Jump to END_LAB if SP == LAST_ADDR. */
9949 xops
[0] = stack_pointer_rtx
;
9951 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops
);
9952 fputs ("\tje\t", asm_out_file
);
9953 assemble_name_raw (asm_out_file
, end_lab
);
9954 fputc ('\n', asm_out_file
);
9956 /* SP = SP + PROBE_INTERVAL. */
9957 xops
[1] = GEN_INT (PROBE_INTERVAL
);
9958 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops
);
9961 xops
[1] = const0_rtx
;
9962 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops
);
9964 fprintf (asm_out_file
, "\tjmp\t");
9965 assemble_name_raw (asm_out_file
, loop_lab
);
9966 fputc ('\n', asm_out_file
);
9968 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, end_lab
);
9973 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
9974 inclusive. These are offsets from the current stack pointer. */
9977 ix86_emit_probe_stack_range (HOST_WIDE_INT first
, HOST_WIDE_INT size
)
9979 /* See if we have a constant small number of probes to generate. If so,
9980 that's the easy case. The run-time loop is made up of 7 insns in the
9981 generic case while the compile-time loop is made up of n insns for n #
9983 if (size
<= 7 * PROBE_INTERVAL
)
9987 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
9988 it exceeds SIZE. If only one probe is needed, this will not
9989 generate any code. Then probe at FIRST + SIZE. */
9990 for (i
= PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
9991 emit_stack_probe (plus_constant (stack_pointer_rtx
, -(first
+ i
)));
9993 emit_stack_probe (plus_constant (stack_pointer_rtx
, -(first
+ size
)));
9996 /* Otherwise, do the same as above, but in a loop. Note that we must be
9997 extra careful with variables wrapping around because we might be at
9998 the very top (or the very bottom) of the address space and we have
9999 to be able to handle this case properly; in particular, we use an
10000 equality test for the loop condition. */
10003 HOST_WIDE_INT rounded_size
, last
;
10004 struct scratch_reg sr
;
10006 get_scratch_register_on_entry (&sr
);
10009 /* Step 1: round SIZE to the previous multiple of the interval. */
10011 rounded_size
= size
& -PROBE_INTERVAL
;
10014 /* Step 2: compute initial and final value of the loop counter. */
10016 /* TEST_OFFSET = FIRST. */
10017 emit_move_insn (sr
.reg
, GEN_INT (-first
));
10019 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
10020 last
= first
+ rounded_size
;
10023 /* Step 3: the loop
10025 while (TEST_ADDR != LAST_ADDR)
10027 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
10031 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
10032 until it is equal to ROUNDED_SIZE. */
10034 emit_insn (ix86_gen_probe_stack_range (sr
.reg
, sr
.reg
, GEN_INT (-last
)));
10037 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
10038 that SIZE is equal to ROUNDED_SIZE. */
10040 if (size
!= rounded_size
)
10041 emit_stack_probe (plus_constant (gen_rtx_PLUS (Pmode
,
10044 rounded_size
- size
));
10046 release_scratch_register_on_entry (&sr
);
10049 /* Make sure nothing is scheduled before we are done. */
10050 emit_insn (gen_blockage ());
10053 /* Probe a range of stack addresses from REG to END, inclusive. These are
10054 offsets from the current stack pointer. */
10057 output_probe_stack_range (rtx reg
, rtx end
)
10059 static int labelno
= 0;
10060 char loop_lab
[32], end_lab
[32];
10063 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
);
10064 ASM_GENERATE_INTERNAL_LABEL (end_lab
, "LPSRE", labelno
++);
10066 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
10068 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
10071 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops
);
10072 fputs ("\tje\t", asm_out_file
);
10073 assemble_name_raw (asm_out_file
, end_lab
);
10074 fputc ('\n', asm_out_file
);
10076 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
10077 xops
[1] = GEN_INT (PROBE_INTERVAL
);
10078 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops
);
10080 /* Probe at TEST_ADDR. */
10081 xops
[0] = stack_pointer_rtx
;
10083 xops
[2] = const0_rtx
;
10084 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops
);
10086 fprintf (asm_out_file
, "\tjmp\t");
10087 assemble_name_raw (asm_out_file
, loop_lab
);
10088 fputc ('\n', asm_out_file
);
10090 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, end_lab
);
10095 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
10096 to be generated in correct form. */
10098 ix86_finalize_stack_realign_flags (void)
10100 /* Check if stack realign is really needed after reload, and
10101 stores result in cfun */
10102 unsigned int incoming_stack_boundary
10103 = (crtl
->parm_stack_boundary
> ix86_incoming_stack_boundary
10104 ? crtl
->parm_stack_boundary
: ix86_incoming_stack_boundary
);
10105 unsigned int stack_realign
= (incoming_stack_boundary
10106 < (current_function_is_leaf
10107 ? crtl
->max_used_stack_slot_alignment
10108 : crtl
->stack_alignment_needed
));
10110 if (crtl
->stack_realign_finalized
)
10112 /* After stack_realign_needed is finalized, we can't no longer
10114 gcc_assert (crtl
->stack_realign_needed
== stack_realign
);
10118 /* If the only reason for frame_pointer_needed is that we conservatively
10119 assumed stack realignment might be needed, but in the end nothing that
10120 needed the stack alignment had been spilled, clear frame_pointer_needed
10121 and say we don't need stack realignment. */
10123 && !crtl
->need_drap
10124 && frame_pointer_needed
10125 && current_function_is_leaf
10126 && flag_omit_frame_pointer
10127 && current_function_sp_is_unchanging
10128 && !ix86_current_function_calls_tls_descriptor
10129 && !crtl
->accesses_prior_frames
10130 && !cfun
->calls_alloca
10131 && !crtl
->calls_eh_return
10132 && !(flag_stack_check
&& STACK_CHECK_MOVING_SP
)
10133 && !ix86_frame_pointer_required ()
10134 && get_frame_size () == 0
10135 && ix86_nsaved_sseregs () == 0
10136 && ix86_varargs_gpr_size
+ ix86_varargs_fpr_size
== 0)
10138 HARD_REG_SET set_up_by_prologue
, prologue_used
;
10141 CLEAR_HARD_REG_SET (prologue_used
);
10142 CLEAR_HARD_REG_SET (set_up_by_prologue
);
10143 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
, STACK_POINTER_REGNUM
);
10144 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
, ARG_POINTER_REGNUM
);
10145 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
,
10146 HARD_FRAME_POINTER_REGNUM
);
10150 FOR_BB_INSNS (bb
, insn
)
10151 if (NONDEBUG_INSN_P (insn
)
10152 && requires_stack_frame_p (insn
, prologue_used
,
10153 set_up_by_prologue
))
10155 crtl
->stack_realign_needed
= stack_realign
;
10156 crtl
->stack_realign_finalized
= true;
10161 frame_pointer_needed
= false;
10162 stack_realign
= false;
10163 crtl
->max_used_stack_slot_alignment
= incoming_stack_boundary
;
10164 crtl
->stack_alignment_needed
= incoming_stack_boundary
;
10165 crtl
->stack_alignment_estimated
= incoming_stack_boundary
;
10166 if (crtl
->preferred_stack_boundary
> incoming_stack_boundary
)
10167 crtl
->preferred_stack_boundary
= incoming_stack_boundary
;
10168 df_finish_pass (true);
10169 df_scan_alloc (NULL
);
10171 df_compute_regs_ever_live (true);
10175 crtl
->stack_realign_needed
= stack_realign
;
10176 crtl
->stack_realign_finalized
= true;
10179 /* Expand the prologue into a bunch of separate insns. */
10182 ix86_expand_prologue (void)
10184 struct machine_function
*m
= cfun
->machine
;
10187 struct ix86_frame frame
;
10188 HOST_WIDE_INT allocate
;
10189 bool int_registers_saved
;
10191 ix86_finalize_stack_realign_flags ();
10193 /* DRAP should not coexist with stack_realign_fp */
10194 gcc_assert (!(crtl
->drap_reg
&& stack_realign_fp
));
10196 memset (&m
->fs
, 0, sizeof (m
->fs
));
10198 /* Initialize CFA state for before the prologue. */
10199 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10200 m
->fs
.cfa_offset
= INCOMING_FRAME_SP_OFFSET
;
10202 /* Track SP offset to the CFA. We continue tracking this after we've
10203 swapped the CFA register away from SP. In the case of re-alignment
10204 this is fudged; we're interested to offsets within the local frame. */
10205 m
->fs
.sp_offset
= INCOMING_FRAME_SP_OFFSET
;
10206 m
->fs
.sp_valid
= true;
10208 ix86_compute_frame_layout (&frame
);
10210 if (!TARGET_64BIT
&& ix86_function_ms_hook_prologue (current_function_decl
))
10212 /* We should have already generated an error for any use of
10213 ms_hook on a nested function. */
10214 gcc_checking_assert (!ix86_static_chain_on_stack
);
10216 /* Check if profiling is active and we shall use profiling before
10217 prologue variant. If so sorry. */
10218 if (crtl
->profile
&& flag_fentry
!= 0)
10219 sorry ("ms_hook_prologue attribute isn%'t compatible "
10220 "with -mfentry for 32-bit");
10222 /* In ix86_asm_output_function_label we emitted:
10223 8b ff movl.s %edi,%edi
10225 8b ec movl.s %esp,%ebp
10227 This matches the hookable function prologue in Win32 API
10228 functions in Microsoft Windows XP Service Pack 2 and newer.
10229 Wine uses this to enable Windows apps to hook the Win32 API
10230 functions provided by Wine.
10232 What that means is that we've already set up the frame pointer. */
10234 if (frame_pointer_needed
10235 && !(crtl
->drap_reg
&& crtl
->stack_realign_needed
))
10239 /* We've decided to use the frame pointer already set up.
10240 Describe this to the unwinder by pretending that both
10241 push and mov insns happen right here.
10243 Putting the unwind info here at the end of the ms_hook
10244 is done so that we can make absolutely certain we get
10245 the required byte sequence at the start of the function,
10246 rather than relying on an assembler that can produce
10247 the exact encoding required.
10249 However it does mean (in the unpatched case) that we have
10250 a 1 insn window where the asynchronous unwind info is
10251 incorrect. However, if we placed the unwind info at
10252 its correct location we would have incorrect unwind info
10253 in the patched case. Which is probably all moot since
10254 I don't expect Wine generates dwarf2 unwind info for the
10255 system libraries that use this feature. */
10257 insn
= emit_insn (gen_blockage ());
10259 push
= gen_push (hard_frame_pointer_rtx
);
10260 mov
= gen_rtx_SET (VOIDmode
, hard_frame_pointer_rtx
,
10261 stack_pointer_rtx
);
10262 RTX_FRAME_RELATED_P (push
) = 1;
10263 RTX_FRAME_RELATED_P (mov
) = 1;
10265 RTX_FRAME_RELATED_P (insn
) = 1;
10266 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
10267 gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, push
, mov
)));
10269 /* Note that gen_push incremented m->fs.cfa_offset, even
10270 though we didn't emit the push insn here. */
10271 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10272 m
->fs
.fp_offset
= m
->fs
.cfa_offset
;
10273 m
->fs
.fp_valid
= true;
10277 /* The frame pointer is not needed so pop %ebp again.
10278 This leaves us with a pristine state. */
10279 emit_insn (gen_pop (hard_frame_pointer_rtx
));
10283 /* The first insn of a function that accepts its static chain on the
10284 stack is to push the register that would be filled in by a direct
10285 call. This insn will be skipped by the trampoline. */
10286 else if (ix86_static_chain_on_stack
)
10288 insn
= emit_insn (gen_push (ix86_static_chain (cfun
->decl
, false)));
10289 emit_insn (gen_blockage ());
10291 /* We don't want to interpret this push insn as a register save,
10292 only as a stack adjustment. The real copy of the register as
10293 a save will be done later, if needed. */
10294 t
= plus_constant (stack_pointer_rtx
, -UNITS_PER_WORD
);
10295 t
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
);
10296 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, t
);
10297 RTX_FRAME_RELATED_P (insn
) = 1;
10300 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
10301 of DRAP is needed and stack realignment is really needed after reload */
10302 if (stack_realign_drap
)
10304 int align_bytes
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10306 /* Only need to push parameter pointer reg if it is caller saved. */
10307 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
10309 /* Push arg pointer reg */
10310 insn
= emit_insn (gen_push (crtl
->drap_reg
));
10311 RTX_FRAME_RELATED_P (insn
) = 1;
10314 /* Grab the argument pointer. */
10315 t
= plus_constant (stack_pointer_rtx
, m
->fs
.sp_offset
);
10316 insn
= emit_insn (gen_rtx_SET (VOIDmode
, crtl
->drap_reg
, t
));
10317 RTX_FRAME_RELATED_P (insn
) = 1;
10318 m
->fs
.cfa_reg
= crtl
->drap_reg
;
10319 m
->fs
.cfa_offset
= 0;
10321 /* Align the stack. */
10322 insn
= emit_insn (ix86_gen_andsp (stack_pointer_rtx
,
10324 GEN_INT (-align_bytes
)));
10325 RTX_FRAME_RELATED_P (insn
) = 1;
10327 /* Replicate the return address on the stack so that return
10328 address can be reached via (argp - 1) slot. This is needed
10329 to implement macro RETURN_ADDR_RTX and intrinsic function
10330 expand_builtin_return_addr etc. */
10331 t
= plus_constant (crtl
->drap_reg
, -UNITS_PER_WORD
);
10332 t
= gen_frame_mem (Pmode
, t
);
10333 insn
= emit_insn (gen_push (t
));
10334 RTX_FRAME_RELATED_P (insn
) = 1;
10336 /* For the purposes of frame and register save area addressing,
10337 we've started over with a new frame. */
10338 m
->fs
.sp_offset
= INCOMING_FRAME_SP_OFFSET
;
10339 m
->fs
.realigned
= true;
10342 if (frame_pointer_needed
&& !m
->fs
.fp_valid
)
10344 /* Note: AT&T enter does NOT have reversed args. Enter is probably
10345 slower on all targets. Also sdb doesn't like it. */
10346 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
10347 RTX_FRAME_RELATED_P (insn
) = 1;
10349 if (m
->fs
.sp_offset
== frame
.hard_frame_pointer_offset
)
10351 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
10352 RTX_FRAME_RELATED_P (insn
) = 1;
10354 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10355 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10356 m
->fs
.fp_offset
= m
->fs
.sp_offset
;
10357 m
->fs
.fp_valid
= true;
10361 int_registers_saved
= (frame
.nregs
== 0);
10363 if (!int_registers_saved
)
10365 /* If saving registers via PUSH, do so now. */
10366 if (!frame
.save_regs_using_mov
)
10368 ix86_emit_save_regs ();
10369 int_registers_saved
= true;
10370 gcc_assert (m
->fs
.sp_offset
== frame
.reg_save_offset
);
10373 /* When using red zone we may start register saving before allocating
10374 the stack frame saving one cycle of the prologue. However, avoid
10375 doing this if we have to probe the stack; at least on x86_64 the
10376 stack probe can turn into a call that clobbers a red zone location. */
10377 else if (ix86_using_red_zone ()
10378 && (! TARGET_STACK_PROBE
10379 || frame
.stack_pointer_offset
< CHECK_STACK_LIMIT
))
10381 ix86_emit_save_regs_using_mov (frame
.reg_save_offset
);
10382 int_registers_saved
= true;
10386 if (stack_realign_fp
)
10388 int align_bytes
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10389 gcc_assert (align_bytes
> MIN_STACK_BOUNDARY
/ BITS_PER_UNIT
);
10391 /* The computation of the size of the re-aligned stack frame means
10392 that we must allocate the size of the register save area before
10393 performing the actual alignment. Otherwise we cannot guarantee
10394 that there's enough storage above the realignment point. */
10395 if (m
->fs
.sp_offset
!= frame
.sse_reg_save_offset
)
10396 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10397 GEN_INT (m
->fs
.sp_offset
10398 - frame
.sse_reg_save_offset
),
10401 /* Align the stack. */
10402 insn
= emit_insn (ix86_gen_andsp (stack_pointer_rtx
,
10404 GEN_INT (-align_bytes
)));
10406 /* For the purposes of register save area addressing, the stack
10407 pointer is no longer valid. As for the value of sp_offset,
10408 see ix86_compute_frame_layout, which we need to match in order
10409 to pass verification of stack_pointer_offset at the end. */
10410 m
->fs
.sp_offset
= (m
->fs
.sp_offset
+ align_bytes
) & -align_bytes
;
10411 m
->fs
.sp_valid
= false;
10414 allocate
= frame
.stack_pointer_offset
- m
->fs
.sp_offset
;
10416 if (flag_stack_usage_info
)
10418 /* We start to count from ARG_POINTER. */
10419 HOST_WIDE_INT stack_size
= frame
.stack_pointer_offset
;
10421 /* If it was realigned, take into account the fake frame. */
10422 if (stack_realign_drap
)
10424 if (ix86_static_chain_on_stack
)
10425 stack_size
+= UNITS_PER_WORD
;
10427 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
10428 stack_size
+= UNITS_PER_WORD
;
10430 /* This over-estimates by 1 minimal-stack-alignment-unit but
10431 mitigates that by counting in the new return address slot. */
10432 current_function_dynamic_stack_size
10433 += crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10436 current_function_static_stack_size
= stack_size
;
10439 /* The stack has already been decremented by the instruction calling us
10440 so probe if the size is non-negative to preserve the protection area. */
10441 if (allocate
>= 0 && flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
10443 /* We expect the registers to be saved when probes are used. */
10444 gcc_assert (int_registers_saved
);
10446 if (STACK_CHECK_MOVING_SP
)
10448 ix86_adjust_stack_and_probe (allocate
);
10453 HOST_WIDE_INT size
= allocate
;
10455 if (TARGET_64BIT
&& size
>= (HOST_WIDE_INT
) 0x80000000)
10456 size
= 0x80000000 - STACK_CHECK_PROTECT
- 1;
10458 if (TARGET_STACK_PROBE
)
10459 ix86_emit_probe_stack_range (0, size
+ STACK_CHECK_PROTECT
);
10461 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT
, size
);
10467 else if (!ix86_target_stack_probe ()
10468 || frame
.stack_pointer_offset
< CHECK_STACK_LIMIT
)
10470 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10471 GEN_INT (-allocate
), -1,
10472 m
->fs
.cfa_reg
== stack_pointer_rtx
);
10476 rtx eax
= gen_rtx_REG (Pmode
, AX_REG
);
10478 rtx (*adjust_stack_insn
)(rtx
, rtx
, rtx
);
10480 bool eax_live
= false;
10481 bool r10_live
= false;
10484 r10_live
= (DECL_STATIC_CHAIN (current_function_decl
) != 0);
10485 if (!TARGET_64BIT_MS_ABI
)
10486 eax_live
= ix86_eax_live_at_start_p ();
10490 emit_insn (gen_push (eax
));
10491 allocate
-= UNITS_PER_WORD
;
10495 r10
= gen_rtx_REG (Pmode
, R10_REG
);
10496 emit_insn (gen_push (r10
));
10497 allocate
-= UNITS_PER_WORD
;
10500 emit_move_insn (eax
, GEN_INT (allocate
));
10501 emit_insn (ix86_gen_allocate_stack_worker (eax
, eax
));
10503 /* Use the fact that AX still contains ALLOCATE. */
10504 adjust_stack_insn
= (TARGET_64BIT
10505 ? gen_pro_epilogue_adjust_stack_di_sub
10506 : gen_pro_epilogue_adjust_stack_si_sub
);
10508 insn
= emit_insn (adjust_stack_insn (stack_pointer_rtx
,
10509 stack_pointer_rtx
, eax
));
10511 /* Note that SEH directives need to continue tracking the stack
10512 pointer even after the frame pointer has been set up. */
10513 if (m
->fs
.cfa_reg
== stack_pointer_rtx
|| TARGET_SEH
)
10515 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10516 m
->fs
.cfa_offset
+= allocate
;
10518 RTX_FRAME_RELATED_P (insn
) = 1;
10519 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
10520 gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10521 plus_constant (stack_pointer_rtx
,
10524 m
->fs
.sp_offset
+= allocate
;
10526 if (r10_live
&& eax_live
)
10528 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
);
10529 emit_move_insn (r10
, gen_frame_mem (Pmode
, t
));
10530 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
- UNITS_PER_WORD
);
10531 emit_move_insn (eax
, gen_frame_mem (Pmode
, t
));
10533 else if (eax_live
|| r10_live
)
10535 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
);
10536 emit_move_insn ((eax_live
? eax
: r10
), gen_frame_mem (Pmode
, t
));
10539 gcc_assert (m
->fs
.sp_offset
== frame
.stack_pointer_offset
);
10541 /* If we havn't already set up the frame pointer, do so now. */
10542 if (frame_pointer_needed
&& !m
->fs
.fp_valid
)
10544 insn
= ix86_gen_add3 (hard_frame_pointer_rtx
, stack_pointer_rtx
,
10545 GEN_INT (frame
.stack_pointer_offset
10546 - frame
.hard_frame_pointer_offset
));
10547 insn
= emit_insn (insn
);
10548 RTX_FRAME_RELATED_P (insn
) = 1;
10549 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, NULL
);
10551 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10552 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10553 m
->fs
.fp_offset
= frame
.hard_frame_pointer_offset
;
10554 m
->fs
.fp_valid
= true;
10557 if (!int_registers_saved
)
10558 ix86_emit_save_regs_using_mov (frame
.reg_save_offset
);
10559 if (frame
.nsseregs
)
10560 ix86_emit_save_sse_regs_using_mov (frame
.sse_reg_save_offset
);
10562 pic_reg_used
= false;
10563 if (pic_offset_table_rtx
10564 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM
)
10567 unsigned int alt_pic_reg_used
= ix86_select_alt_pic_regnum ();
10569 if (alt_pic_reg_used
!= INVALID_REGNUM
)
10570 SET_REGNO (pic_offset_table_rtx
, alt_pic_reg_used
);
10572 pic_reg_used
= true;
10579 if (ix86_cmodel
== CM_LARGE_PIC
)
10581 rtx tmp_reg
= gen_rtx_REG (DImode
, R11_REG
);
10582 rtx label
= gen_label_rtx ();
10583 emit_label (label
);
10584 LABEL_PRESERVE_P (label
) = 1;
10585 gcc_assert (REGNO (pic_offset_table_rtx
) != REGNO (tmp_reg
));
10586 insn
= emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx
, label
));
10587 insn
= emit_insn (gen_set_got_offset_rex64 (tmp_reg
, label
));
10588 insn
= emit_insn (gen_adddi3 (pic_offset_table_rtx
,
10589 pic_offset_table_rtx
, tmp_reg
));
10592 insn
= emit_insn (gen_set_got_rex64 (pic_offset_table_rtx
));
10596 insn
= emit_insn (gen_set_got (pic_offset_table_rtx
));
10597 RTX_FRAME_RELATED_P (insn
) = 1;
10598 add_reg_note (insn
, REG_CFA_FLUSH_QUEUE
, NULL_RTX
);
10602 /* In the pic_reg_used case, make sure that the got load isn't deleted
10603 when mcount needs it. Blockage to avoid call movement across mcount
10604 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
10606 if (crtl
->profile
&& !flag_fentry
&& pic_reg_used
)
10607 emit_insn (gen_prologue_use (pic_offset_table_rtx
));
10609 if (crtl
->drap_reg
&& !crtl
->stack_realign_needed
)
10611 /* vDRAP is setup but after reload it turns out stack realign
10612 isn't necessary, here we will emit prologue to setup DRAP
10613 without stack realign adjustment */
10614 t
= choose_baseaddr (0);
10615 emit_insn (gen_rtx_SET (VOIDmode
, crtl
->drap_reg
, t
));
10618 /* Prevent instructions from being scheduled into register save push
10619 sequence when access to the redzone area is done through frame pointer.
10620 The offset between the frame pointer and the stack pointer is calculated
10621 relative to the value of the stack pointer at the end of the function
10622 prologue, and moving instructions that access redzone area via frame
10623 pointer inside push sequence violates this assumption. */
10624 if (frame_pointer_needed
&& frame
.red_zone_size
)
10625 emit_insn (gen_memory_blockage ());
10627 /* Emit cld instruction if stringops are used in the function. */
10628 if (TARGET_CLD
&& ix86_current_function_needs_cld
)
10629 emit_insn (gen_cld ());
10631 /* SEH requires that the prologue end within 256 bytes of the start of
10632 the function. Prevent instruction schedules that would extend that.
10633 Further, prevent alloca modifications to the stack pointer from being
10634 combined with prologue modifications. */
10636 emit_insn (gen_prologue_use (stack_pointer_rtx
));
10639 /* Emit code to restore REG using a POP insn. */
10642 ix86_emit_restore_reg_using_pop (rtx reg
)
10644 struct machine_function
*m
= cfun
->machine
;
10645 rtx insn
= emit_insn (gen_pop (reg
));
10647 ix86_add_cfa_restore_note (insn
, reg
, m
->fs
.sp_offset
);
10648 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
10650 if (m
->fs
.cfa_reg
== crtl
->drap_reg
10651 && REGNO (reg
) == REGNO (crtl
->drap_reg
))
10653 /* Previously we'd represented the CFA as an expression
10654 like *(%ebp - 8). We've just popped that value from
10655 the stack, which means we need to reset the CFA to
10656 the drap register. This will remain until we restore
10657 the stack pointer. */
10658 add_reg_note (insn
, REG_CFA_DEF_CFA
, reg
);
10659 RTX_FRAME_RELATED_P (insn
) = 1;
10661 /* This means that the DRAP register is valid for addressing too. */
10662 m
->fs
.drap_valid
= true;
10666 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10668 rtx x
= plus_constant (stack_pointer_rtx
, UNITS_PER_WORD
);
10669 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
10670 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, x
);
10671 RTX_FRAME_RELATED_P (insn
) = 1;
10673 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
10676 /* When the frame pointer is the CFA, and we pop it, we are
10677 swapping back to the stack pointer as the CFA. This happens
10678 for stack frames that don't allocate other data, so we assume
10679 the stack pointer is now pointing at the return address, i.e.
10680 the function entry state, which makes the offset be 1 word. */
10681 if (reg
== hard_frame_pointer_rtx
)
10683 m
->fs
.fp_valid
= false;
10684 if (m
->fs
.cfa_reg
== hard_frame_pointer_rtx
)
10686 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10687 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
10689 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10690 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
10691 GEN_INT (m
->fs
.cfa_offset
)));
10692 RTX_FRAME_RELATED_P (insn
) = 1;
10697 /* Emit code to restore saved registers using POP insns. */
10700 ix86_emit_restore_regs_using_pop (void)
10702 unsigned int regno
;
10704 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
10705 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, false))
10706 ix86_emit_restore_reg_using_pop (gen_rtx_REG (Pmode
, regno
));
10709 /* Emit code and notes for the LEAVE instruction. */
10712 ix86_emit_leave (void)
10714 struct machine_function
*m
= cfun
->machine
;
10715 rtx insn
= emit_insn (ix86_gen_leave ());
10717 ix86_add_queued_cfa_restore_notes (insn
);
10719 gcc_assert (m
->fs
.fp_valid
);
10720 m
->fs
.sp_valid
= true;
10721 m
->fs
.sp_offset
= m
->fs
.fp_offset
- UNITS_PER_WORD
;
10722 m
->fs
.fp_valid
= false;
10724 if (m
->fs
.cfa_reg
== hard_frame_pointer_rtx
)
10726 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10727 m
->fs
.cfa_offset
= m
->fs
.sp_offset
;
10729 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10730 plus_constant (stack_pointer_rtx
, m
->fs
.sp_offset
));
10731 RTX_FRAME_RELATED_P (insn
) = 1;
10732 ix86_add_cfa_restore_note (insn
, hard_frame_pointer_rtx
,
10737 /* Emit code to restore saved registers using MOV insns.
10738 First register is restored from CFA - CFA_OFFSET. */
10740 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset
,
10741 bool maybe_eh_return
)
10743 struct machine_function
*m
= cfun
->machine
;
10744 unsigned int regno
;
10746 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
10747 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, maybe_eh_return
))
10749 rtx reg
= gen_rtx_REG (Pmode
, regno
);
10752 mem
= choose_baseaddr (cfa_offset
);
10753 mem
= gen_frame_mem (Pmode
, mem
);
10754 insn
= emit_move_insn (reg
, mem
);
10756 if (m
->fs
.cfa_reg
== crtl
->drap_reg
&& regno
== REGNO (crtl
->drap_reg
))
10758 /* Previously we'd represented the CFA as an expression
10759 like *(%ebp - 8). We've just popped that value from
10760 the stack, which means we need to reset the CFA to
10761 the drap register. This will remain until we restore
10762 the stack pointer. */
10763 add_reg_note (insn
, REG_CFA_DEF_CFA
, reg
);
10764 RTX_FRAME_RELATED_P (insn
) = 1;
10766 /* This means that the DRAP register is valid for addressing. */
10767 m
->fs
.drap_valid
= true;
10770 ix86_add_cfa_restore_note (NULL_RTX
, reg
, cfa_offset
);
10772 cfa_offset
-= UNITS_PER_WORD
;
10776 /* Emit code to restore saved registers using MOV insns.
10777 First register is restored from CFA - CFA_OFFSET. */
10779 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset
,
10780 bool maybe_eh_return
)
10782 unsigned int regno
;
10784 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
10785 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, maybe_eh_return
))
10787 rtx reg
= gen_rtx_REG (V4SFmode
, regno
);
10790 mem
= choose_baseaddr (cfa_offset
);
10791 mem
= gen_rtx_MEM (V4SFmode
, mem
);
10792 set_mem_align (mem
, 128);
10793 emit_move_insn (reg
, mem
);
10795 ix86_add_cfa_restore_note (NULL_RTX
, reg
, cfa_offset
);
10801 /* Emit vzeroupper if needed. */
10804 ix86_maybe_emit_epilogue_vzeroupper (void)
10806 if (TARGET_VZEROUPPER
10807 && !TREE_THIS_VOLATILE (cfun
->decl
)
10808 && !cfun
->machine
->caller_return_avx256_p
)
10809 emit_insn (gen_avx_vzeroupper (GEN_INT (call_no_avx256
)));
10812 /* Restore function stack, frame, and registers. */
10815 ix86_expand_epilogue (int style
)
10817 struct machine_function
*m
= cfun
->machine
;
10818 struct machine_frame_state frame_state_save
= m
->fs
;
10819 struct ix86_frame frame
;
10820 bool restore_regs_via_mov
;
10823 ix86_finalize_stack_realign_flags ();
10824 ix86_compute_frame_layout (&frame
);
10826 m
->fs
.sp_valid
= (!frame_pointer_needed
10827 || (current_function_sp_is_unchanging
10828 && !stack_realign_fp
));
10829 gcc_assert (!m
->fs
.sp_valid
10830 || m
->fs
.sp_offset
== frame
.stack_pointer_offset
);
10832 /* The FP must be valid if the frame pointer is present. */
10833 gcc_assert (frame_pointer_needed
== m
->fs
.fp_valid
);
10834 gcc_assert (!m
->fs
.fp_valid
10835 || m
->fs
.fp_offset
== frame
.hard_frame_pointer_offset
);
10837 /* We must have *some* valid pointer to the stack frame. */
10838 gcc_assert (m
->fs
.sp_valid
|| m
->fs
.fp_valid
);
10840 /* The DRAP is never valid at this point. */
10841 gcc_assert (!m
->fs
.drap_valid
);
10843 /* See the comment about red zone and frame
10844 pointer usage in ix86_expand_prologue. */
10845 if (frame_pointer_needed
&& frame
.red_zone_size
)
10846 emit_insn (gen_memory_blockage ());
10848 using_drap
= crtl
->drap_reg
&& crtl
->stack_realign_needed
;
10849 gcc_assert (!using_drap
|| m
->fs
.cfa_reg
== crtl
->drap_reg
);
10851 /* Determine the CFA offset of the end of the red-zone. */
10852 m
->fs
.red_zone_offset
= 0;
10853 if (ix86_using_red_zone () && crtl
->args
.pops_args
< 65536)
10855 /* The red-zone begins below the return address. */
10856 m
->fs
.red_zone_offset
= RED_ZONE_SIZE
+ UNITS_PER_WORD
;
10858 /* When the register save area is in the aligned portion of
10859 the stack, determine the maximum runtime displacement that
10860 matches up with the aligned frame. */
10861 if (stack_realign_drap
)
10862 m
->fs
.red_zone_offset
-= (crtl
->stack_alignment_needed
/ BITS_PER_UNIT
10866 /* Special care must be taken for the normal return case of a function
10867 using eh_return: the eax and edx registers are marked as saved, but
10868 not restored along this path. Adjust the save location to match. */
10869 if (crtl
->calls_eh_return
&& style
!= 2)
10870 frame
.reg_save_offset
-= 2 * UNITS_PER_WORD
;
10872 /* EH_RETURN requires the use of moves to function properly. */
10873 if (crtl
->calls_eh_return
)
10874 restore_regs_via_mov
= true;
10875 /* SEH requires the use of pops to identify the epilogue. */
10876 else if (TARGET_SEH
)
10877 restore_regs_via_mov
= false;
10878 /* If we're only restoring one register and sp is not valid then
10879 using a move instruction to restore the register since it's
10880 less work than reloading sp and popping the register. */
10881 else if (!m
->fs
.sp_valid
&& frame
.nregs
<= 1)
10882 restore_regs_via_mov
= true;
10883 else if (TARGET_EPILOGUE_USING_MOVE
10884 && cfun
->machine
->use_fast_prologue_epilogue
10885 && (frame
.nregs
> 1
10886 || m
->fs
.sp_offset
!= frame
.reg_save_offset
))
10887 restore_regs_via_mov
= true;
10888 else if (frame_pointer_needed
10890 && m
->fs
.sp_offset
!= frame
.reg_save_offset
)
10891 restore_regs_via_mov
= true;
10892 else if (frame_pointer_needed
10893 && TARGET_USE_LEAVE
10894 && cfun
->machine
->use_fast_prologue_epilogue
10895 && frame
.nregs
== 1)
10896 restore_regs_via_mov
= true;
10898 restore_regs_via_mov
= false;
10900 if (restore_regs_via_mov
|| frame
.nsseregs
)
10902 /* Ensure that the entire register save area is addressable via
10903 the stack pointer, if we will restore via sp. */
10905 && m
->fs
.sp_offset
> 0x7fffffff
10906 && !(m
->fs
.fp_valid
|| m
->fs
.drap_valid
)
10907 && (frame
.nsseregs
+ frame
.nregs
) != 0)
10909 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10910 GEN_INT (m
->fs
.sp_offset
10911 - frame
.sse_reg_save_offset
),
10913 m
->fs
.cfa_reg
== stack_pointer_rtx
);
10917 /* If there are any SSE registers to restore, then we have to do it
10918 via moves, since there's obviously no pop for SSE regs. */
10919 if (frame
.nsseregs
)
10920 ix86_emit_restore_sse_regs_using_mov (frame
.sse_reg_save_offset
,
10923 if (restore_regs_via_mov
)
10928 ix86_emit_restore_regs_using_mov (frame
.reg_save_offset
, style
== 2);
10930 /* eh_return epilogues need %ecx added to the stack pointer. */
10933 rtx insn
, sa
= EH_RETURN_STACKADJ_RTX
;
10935 /* Stack align doesn't work with eh_return. */
10936 gcc_assert (!stack_realign_drap
);
10937 /* Neither does regparm nested functions. */
10938 gcc_assert (!ix86_static_chain_on_stack
);
10940 if (frame_pointer_needed
)
10942 t
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
10943 t
= plus_constant (t
, m
->fs
.fp_offset
- UNITS_PER_WORD
);
10944 emit_insn (gen_rtx_SET (VOIDmode
, sa
, t
));
10946 t
= gen_frame_mem (Pmode
, hard_frame_pointer_rtx
);
10947 insn
= emit_move_insn (hard_frame_pointer_rtx
, t
);
10949 /* Note that we use SA as a temporary CFA, as the return
10950 address is at the proper place relative to it. We
10951 pretend this happens at the FP restore insn because
10952 prior to this insn the FP would be stored at the wrong
10953 offset relative to SA, and after this insn we have no
10954 other reasonable register to use for the CFA. We don't
10955 bother resetting the CFA to the SP for the duration of
10956 the return insn. */
10957 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10958 plus_constant (sa
, UNITS_PER_WORD
));
10959 ix86_add_queued_cfa_restore_notes (insn
);
10960 add_reg_note (insn
, REG_CFA_RESTORE
, hard_frame_pointer_rtx
);
10961 RTX_FRAME_RELATED_P (insn
) = 1;
10963 m
->fs
.cfa_reg
= sa
;
10964 m
->fs
.cfa_offset
= UNITS_PER_WORD
;
10965 m
->fs
.fp_valid
= false;
10967 pro_epilogue_adjust_stack (stack_pointer_rtx
, sa
,
10968 const0_rtx
, style
, false);
10972 t
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
10973 t
= plus_constant (t
, m
->fs
.sp_offset
- UNITS_PER_WORD
);
10974 insn
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
));
10975 ix86_add_queued_cfa_restore_notes (insn
);
10977 gcc_assert (m
->fs
.cfa_reg
== stack_pointer_rtx
);
10978 if (m
->fs
.cfa_offset
!= UNITS_PER_WORD
)
10980 m
->fs
.cfa_offset
= UNITS_PER_WORD
;
10981 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10982 plus_constant (stack_pointer_rtx
,
10984 RTX_FRAME_RELATED_P (insn
) = 1;
10987 m
->fs
.sp_offset
= UNITS_PER_WORD
;
10988 m
->fs
.sp_valid
= true;
10993 /* SEH requires that the function end with (1) a stack adjustment
10994 if necessary, (2) a sequence of pops, and (3) a return or
10995 jump instruction. Prevent insns from the function body from
10996 being scheduled into this sequence. */
10999 /* Prevent a catch region from being adjacent to the standard
11000 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
11001 several other flags that would be interesting to test are
11003 if (flag_non_call_exceptions
)
11004 emit_insn (gen_nops (const1_rtx
));
11006 emit_insn (gen_blockage ());
11009 /* First step is to deallocate the stack frame so that we can
11010 pop the registers. */
11011 if (!m
->fs
.sp_valid
)
11013 pro_epilogue_adjust_stack (stack_pointer_rtx
, hard_frame_pointer_rtx
,
11014 GEN_INT (m
->fs
.fp_offset
11015 - frame
.reg_save_offset
),
11018 else if (m
->fs
.sp_offset
!= frame
.reg_save_offset
)
11020 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
11021 GEN_INT (m
->fs
.sp_offset
11022 - frame
.reg_save_offset
),
11024 m
->fs
.cfa_reg
== stack_pointer_rtx
);
11027 ix86_emit_restore_regs_using_pop ();
11030 /* If we used a stack pointer and haven't already got rid of it,
11032 if (m
->fs
.fp_valid
)
11034 /* If the stack pointer is valid and pointing at the frame
11035 pointer store address, then we only need a pop. */
11036 if (m
->fs
.sp_valid
&& m
->fs
.sp_offset
== frame
.hfp_save_offset
)
11037 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx
);
11038 /* Leave results in shorter dependency chains on CPUs that are
11039 able to grok it fast. */
11040 else if (TARGET_USE_LEAVE
11041 || optimize_function_for_size_p (cfun
)
11042 || !cfun
->machine
->use_fast_prologue_epilogue
)
11043 ix86_emit_leave ();
11046 pro_epilogue_adjust_stack (stack_pointer_rtx
,
11047 hard_frame_pointer_rtx
,
11048 const0_rtx
, style
, !using_drap
);
11049 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx
);
11055 int param_ptr_offset
= UNITS_PER_WORD
;
11058 gcc_assert (stack_realign_drap
);
11060 if (ix86_static_chain_on_stack
)
11061 param_ptr_offset
+= UNITS_PER_WORD
;
11062 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
11063 param_ptr_offset
+= UNITS_PER_WORD
;
11065 insn
= emit_insn (gen_rtx_SET
11066 (VOIDmode
, stack_pointer_rtx
,
11067 gen_rtx_PLUS (Pmode
,
11069 GEN_INT (-param_ptr_offset
))));
11070 m
->fs
.cfa_reg
= stack_pointer_rtx
;
11071 m
->fs
.cfa_offset
= param_ptr_offset
;
11072 m
->fs
.sp_offset
= param_ptr_offset
;
11073 m
->fs
.realigned
= false;
11075 add_reg_note (insn
, REG_CFA_DEF_CFA
,
11076 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11077 GEN_INT (param_ptr_offset
)));
11078 RTX_FRAME_RELATED_P (insn
) = 1;
11080 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
11081 ix86_emit_restore_reg_using_pop (crtl
->drap_reg
);
11084 /* At this point the stack pointer must be valid, and we must have
11085 restored all of the registers. We may not have deallocated the
11086 entire stack frame. We've delayed this until now because it may
11087 be possible to merge the local stack deallocation with the
11088 deallocation forced by ix86_static_chain_on_stack. */
11089 gcc_assert (m
->fs
.sp_valid
);
11090 gcc_assert (!m
->fs
.fp_valid
);
11091 gcc_assert (!m
->fs
.realigned
);
11092 if (m
->fs
.sp_offset
!= UNITS_PER_WORD
)
11094 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
11095 GEN_INT (m
->fs
.sp_offset
- UNITS_PER_WORD
),
11099 ix86_add_queued_cfa_restore_notes (get_last_insn ());
11101 /* Sibcall epilogues don't want a return instruction. */
11104 m
->fs
= frame_state_save
;
11108 /* Emit vzeroupper if needed. */
11109 ix86_maybe_emit_epilogue_vzeroupper ();
11111 if (crtl
->args
.pops_args
&& crtl
->args
.size
)
11113 rtx popc
= GEN_INT (crtl
->args
.pops_args
);
11115 /* i386 can only pop 64K bytes. If asked to pop more, pop return
11116 address, do explicit add, and jump indirectly to the caller. */
11118 if (crtl
->args
.pops_args
>= 65536)
11120 rtx ecx
= gen_rtx_REG (SImode
, CX_REG
);
11123 /* There is no "pascal" calling convention in any 64bit ABI. */
11124 gcc_assert (!TARGET_64BIT
);
11126 insn
= emit_insn (gen_pop (ecx
));
11127 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
11128 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
11130 add_reg_note (insn
, REG_CFA_ADJUST_CFA
,
11131 copy_rtx (XVECEXP (PATTERN (insn
), 0, 1)));
11132 add_reg_note (insn
, REG_CFA_REGISTER
,
11133 gen_rtx_SET (VOIDmode
, ecx
, pc_rtx
));
11134 RTX_FRAME_RELATED_P (insn
) = 1;
11136 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
11138 emit_jump_insn (gen_simple_return_indirect_internal (ecx
));
11141 emit_jump_insn (gen_simple_return_pop_internal (popc
));
11144 emit_jump_insn (gen_simple_return_internal ());
11146 /* Restore the state back to the state from the prologue,
11147 so that it's correct for the next epilogue. */
11148 m
->fs
= frame_state_save
;
11151 /* Reset from the function's potential modifications. */
11154 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
11155 HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
11157 if (pic_offset_table_rtx
)
11158 SET_REGNO (pic_offset_table_rtx
, REAL_PIC_OFFSET_TABLE_REGNUM
);
11160 /* Mach-O doesn't support labels at the end of objects, so if
11161 it looks like we might want one, insert a NOP. */
11163 rtx insn
= get_last_insn ();
11164 rtx deleted_debug_label
= NULL_RTX
;
11167 && NOTE_KIND (insn
) != NOTE_INSN_DELETED_LABEL
)
11169 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
11170 notes only, instead set their CODE_LABEL_NUMBER to -1,
11171 otherwise there would be code generation differences
11172 in between -g and -g0. */
11173 if (NOTE_P (insn
) && NOTE_KIND (insn
) == NOTE_INSN_DELETED_DEBUG_LABEL
)
11174 deleted_debug_label
= insn
;
11175 insn
= PREV_INSN (insn
);
11180 && NOTE_KIND (insn
) == NOTE_INSN_DELETED_LABEL
)))
11181 fputs ("\tnop\n", file
);
11182 else if (deleted_debug_label
)
11183 for (insn
= deleted_debug_label
; insn
; insn
= NEXT_INSN (insn
))
11184 if (NOTE_KIND (insn
) == NOTE_INSN_DELETED_DEBUG_LABEL
)
11185 CODE_LABEL_NUMBER (insn
) = -1;
11191 /* Return a scratch register to use in the split stack prologue. The
11192 split stack prologue is used for -fsplit-stack. It is the first
11193 instructions in the function, even before the regular prologue.
11194 The scratch register can be any caller-saved register which is not
11195 used for parameters or for the static chain. */
11197 static unsigned int
11198 split_stack_prologue_scratch_regno (void)
11207 is_fastcall
= (lookup_attribute ("fastcall",
11208 TYPE_ATTRIBUTES (TREE_TYPE (cfun
->decl
)))
11210 regparm
= ix86_function_regparm (TREE_TYPE (cfun
->decl
), cfun
->decl
);
11214 if (DECL_STATIC_CHAIN (cfun
->decl
))
11216 sorry ("-fsplit-stack does not support fastcall with "
11217 "nested function");
11218 return INVALID_REGNUM
;
11222 else if (regparm
< 3)
11224 if (!DECL_STATIC_CHAIN (cfun
->decl
))
11230 sorry ("-fsplit-stack does not support 2 register "
11231 " parameters for a nested function");
11232 return INVALID_REGNUM
;
11239 /* FIXME: We could make this work by pushing a register
11240 around the addition and comparison. */
11241 sorry ("-fsplit-stack does not support 3 register parameters");
11242 return INVALID_REGNUM
;
11247 /* A SYMBOL_REF for the function which allocates new stackspace for
11250 static GTY(()) rtx split_stack_fn
;
11252 /* A SYMBOL_REF for the more stack function when using the large
11255 static GTY(()) rtx split_stack_fn_large
;
11257 /* Handle -fsplit-stack. These are the first instructions in the
11258 function, even before the regular prologue. */
11261 ix86_expand_split_stack_prologue (void)
11263 struct ix86_frame frame
;
11264 HOST_WIDE_INT allocate
;
11265 unsigned HOST_WIDE_INT args_size
;
11266 rtx label
, limit
, current
, jump_insn
, allocate_rtx
, call_insn
, call_fusage
;
11267 rtx scratch_reg
= NULL_RTX
;
11268 rtx varargs_label
= NULL_RTX
;
11271 gcc_assert (flag_split_stack
&& reload_completed
);
11273 ix86_finalize_stack_realign_flags ();
11274 ix86_compute_frame_layout (&frame
);
11275 allocate
= frame
.stack_pointer_offset
- INCOMING_FRAME_SP_OFFSET
;
11277 /* This is the label we will branch to if we have enough stack
11278 space. We expect the basic block reordering pass to reverse this
11279 branch if optimizing, so that we branch in the unlikely case. */
11280 label
= gen_label_rtx ();
11282 /* We need to compare the stack pointer minus the frame size with
11283 the stack boundary in the TCB. The stack boundary always gives
11284 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
11285 can compare directly. Otherwise we need to do an addition. */
11287 limit
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
11288 UNSPEC_STACK_CHECK
);
11289 limit
= gen_rtx_CONST (Pmode
, limit
);
11290 limit
= gen_rtx_MEM (Pmode
, limit
);
11291 if (allocate
< SPLIT_STACK_AVAILABLE
)
11292 current
= stack_pointer_rtx
;
11295 unsigned int scratch_regno
;
11298 /* We need a scratch register to hold the stack pointer minus
11299 the required frame size. Since this is the very start of the
11300 function, the scratch register can be any caller-saved
11301 register which is not used for parameters. */
11302 offset
= GEN_INT (- allocate
);
11303 scratch_regno
= split_stack_prologue_scratch_regno ();
11304 if (scratch_regno
== INVALID_REGNUM
)
11306 scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
11307 if (!TARGET_64BIT
|| x86_64_immediate_operand (offset
, Pmode
))
11309 /* We don't use ix86_gen_add3 in this case because it will
11310 want to split to lea, but when not optimizing the insn
11311 will not be split after this point. */
11312 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11313 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11318 emit_move_insn (scratch_reg
, offset
);
11319 emit_insn (gen_adddi3 (scratch_reg
, scratch_reg
,
11320 stack_pointer_rtx
));
11322 current
= scratch_reg
;
11325 ix86_expand_branch (GEU
, current
, limit
, label
);
11326 jump_insn
= get_last_insn ();
11327 JUMP_LABEL (jump_insn
) = label
;
11329 /* Mark the jump as very likely to be taken. */
11330 add_reg_note (jump_insn
, REG_BR_PROB
,
11331 GEN_INT (REG_BR_PROB_BASE
- REG_BR_PROB_BASE
/ 100));
11333 if (split_stack_fn
== NULL_RTX
)
11334 split_stack_fn
= gen_rtx_SYMBOL_REF (Pmode
, "__morestack");
11335 fn
= split_stack_fn
;
11337 /* Get more stack space. We pass in the desired stack space and the
11338 size of the arguments to copy to the new stack. In 32-bit mode
11339 we push the parameters; __morestack will return on a new stack
11340 anyhow. In 64-bit mode we pass the parameters in r10 and
11342 allocate_rtx
= GEN_INT (allocate
);
11343 args_size
= crtl
->args
.size
>= 0 ? crtl
->args
.size
: 0;
11344 call_fusage
= NULL_RTX
;
11349 reg10
= gen_rtx_REG (Pmode
, R10_REG
);
11350 reg11
= gen_rtx_REG (Pmode
, R11_REG
);
11352 /* If this function uses a static chain, it will be in %r10.
11353 Preserve it across the call to __morestack. */
11354 if (DECL_STATIC_CHAIN (cfun
->decl
))
11358 rax
= gen_rtx_REG (Pmode
, AX_REG
);
11359 emit_move_insn (rax
, reg10
);
11360 use_reg (&call_fusage
, rax
);
11363 if (ix86_cmodel
== CM_LARGE
|| ix86_cmodel
== CM_LARGE_PIC
)
11365 HOST_WIDE_INT argval
;
11367 /* When using the large model we need to load the address
11368 into a register, and we've run out of registers. So we
11369 switch to a different calling convention, and we call a
11370 different function: __morestack_large. We pass the
11371 argument size in the upper 32 bits of r10 and pass the
11372 frame size in the lower 32 bits. */
11373 gcc_assert ((allocate
& (HOST_WIDE_INT
) 0xffffffff) == allocate
);
11374 gcc_assert ((args_size
& 0xffffffff) == args_size
);
11376 if (split_stack_fn_large
== NULL_RTX
)
11377 split_stack_fn_large
=
11378 gen_rtx_SYMBOL_REF (Pmode
, "__morestack_large_model");
11380 if (ix86_cmodel
== CM_LARGE_PIC
)
11384 label
= gen_label_rtx ();
11385 emit_label (label
);
11386 LABEL_PRESERVE_P (label
) = 1;
11387 emit_insn (gen_set_rip_rex64 (reg10
, label
));
11388 emit_insn (gen_set_got_offset_rex64 (reg11
, label
));
11389 emit_insn (gen_adddi3 (reg10
, reg10
, reg11
));
11390 x
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, split_stack_fn_large
),
11392 x
= gen_rtx_CONST (Pmode
, x
);
11393 emit_move_insn (reg11
, x
);
11394 x
= gen_rtx_PLUS (Pmode
, reg10
, reg11
);
11395 x
= gen_const_mem (Pmode
, x
);
11396 emit_move_insn (reg11
, x
);
11399 emit_move_insn (reg11
, split_stack_fn_large
);
11403 argval
= ((args_size
<< 16) << 16) + allocate
;
11404 emit_move_insn (reg10
, GEN_INT (argval
));
11408 emit_move_insn (reg10
, allocate_rtx
);
11409 emit_move_insn (reg11
, GEN_INT (args_size
));
11410 use_reg (&call_fusage
, reg11
);
11413 use_reg (&call_fusage
, reg10
);
11417 emit_insn (gen_push (GEN_INT (args_size
)));
11418 emit_insn (gen_push (allocate_rtx
));
11420 call_insn
= ix86_expand_call (NULL_RTX
, gen_rtx_MEM (QImode
, fn
),
11421 GEN_INT (UNITS_PER_WORD
), constm1_rtx
,
11423 add_function_usage_to (call_insn
, call_fusage
);
11425 /* In order to make call/return prediction work right, we now need
11426 to execute a return instruction. See
11427 libgcc/config/i386/morestack.S for the details on how this works.
11429 For flow purposes gcc must not see this as a return
11430 instruction--we need control flow to continue at the subsequent
11431 label. Therefore, we use an unspec. */
11432 gcc_assert (crtl
->args
.pops_args
< 65536);
11433 emit_insn (gen_split_stack_return (GEN_INT (crtl
->args
.pops_args
)));
11435 /* If we are in 64-bit mode and this function uses a static chain,
11436 we saved %r10 in %rax before calling _morestack. */
11437 if (TARGET_64BIT
&& DECL_STATIC_CHAIN (cfun
->decl
))
11438 emit_move_insn (gen_rtx_REG (Pmode
, R10_REG
),
11439 gen_rtx_REG (Pmode
, AX_REG
));
11441 /* If this function calls va_start, we need to store a pointer to
11442 the arguments on the old stack, because they may not have been
11443 all copied to the new stack. At this point the old stack can be
11444 found at the frame pointer value used by __morestack, because
11445 __morestack has set that up before calling back to us. Here we
11446 store that pointer in a scratch register, and in
11447 ix86_expand_prologue we store the scratch register in a stack
11449 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11451 unsigned int scratch_regno
;
11455 scratch_regno
= split_stack_prologue_scratch_regno ();
11456 scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
11457 frame_reg
= gen_rtx_REG (Pmode
, BP_REG
);
11461 return address within this function
11462 return address of caller of this function
11464 So we add three words to get to the stack arguments.
11468 return address within this function
11469 first argument to __morestack
11470 second argument to __morestack
11471 return address of caller of this function
11473 So we add five words to get to the stack arguments.
11475 words
= TARGET_64BIT
? 3 : 5;
11476 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11477 gen_rtx_PLUS (Pmode
, frame_reg
,
11478 GEN_INT (words
* UNITS_PER_WORD
))));
11480 varargs_label
= gen_label_rtx ();
11481 emit_jump_insn (gen_jump (varargs_label
));
11482 JUMP_LABEL (get_last_insn ()) = varargs_label
;
11487 emit_label (label
);
11488 LABEL_NUSES (label
) = 1;
11490 /* If this function calls va_start, we now have to set the scratch
11491 register for the case where we do not call __morestack. In this
11492 case we need to set it based on the stack pointer. */
11493 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11495 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11496 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11497 GEN_INT (UNITS_PER_WORD
))));
11499 emit_label (varargs_label
);
11500 LABEL_NUSES (varargs_label
) = 1;
11504 /* We may have to tell the dataflow pass that the split stack prologue
11505 is initializing a scratch register. */
11508 ix86_live_on_entry (bitmap regs
)
11510 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11512 gcc_assert (flag_split_stack
);
11513 bitmap_set_bit (regs
, split_stack_prologue_scratch_regno ());
11517 /* Determine if op is suitable SUBREG RTX for address. */
11520 ix86_address_subreg_operand (rtx op
)
11522 enum machine_mode mode
;
11527 mode
= GET_MODE (op
);
11529 if (GET_MODE_CLASS (mode
) != MODE_INT
)
11532 /* Don't allow SUBREGs that span more than a word. It can lead to spill
11533 failures when the register is one word out of a two word structure. */
11534 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
11537 /* Allow only SUBREGs of non-eliminable hard registers. */
11538 return register_no_elim_operand (op
, mode
);
11541 /* Extract the parts of an RTL expression that is a valid memory address
11542 for an instruction. Return 0 if the structure of the address is
11543 grossly off. Return -1 if the address contains ASHIFT, so it is not
11544 strictly valid, but still used for computing length of lea instruction. */
11547 ix86_decompose_address (rtx addr
, struct ix86_address
*out
)
11549 rtx base
= NULL_RTX
, index
= NULL_RTX
, disp
= NULL_RTX
;
11550 rtx base_reg
, index_reg
;
11551 HOST_WIDE_INT scale
= 1;
11552 rtx scale_rtx
= NULL_RTX
;
11555 enum ix86_address_seg seg
= SEG_DEFAULT
;
11557 /* Allow zero-extended SImode addresses,
11558 they will be emitted with addr32 prefix. */
11559 if (TARGET_64BIT
&& GET_MODE (addr
) == DImode
)
11561 if (GET_CODE (addr
) == ZERO_EXTEND
11562 && GET_MODE (XEXP (addr
, 0)) == SImode
)
11563 addr
= XEXP (addr
, 0);
11564 else if (GET_CODE (addr
) == AND
11565 && const_32bit_mask (XEXP (addr
, 1), DImode
))
11567 addr
= XEXP (addr
, 0);
11569 /* Strip subreg. */
11570 if (GET_CODE (addr
) == SUBREG
11571 && GET_MODE (SUBREG_REG (addr
)) == SImode
)
11572 addr
= SUBREG_REG (addr
);
11578 else if (GET_CODE (addr
) == SUBREG
)
11580 if (ix86_address_subreg_operand (SUBREG_REG (addr
)))
11585 else if (GET_CODE (addr
) == PLUS
)
11587 rtx addends
[4], op
;
11595 addends
[n
++] = XEXP (op
, 1);
11598 while (GET_CODE (op
) == PLUS
);
11603 for (i
= n
; i
>= 0; --i
)
11606 switch (GET_CODE (op
))
11611 index
= XEXP (op
, 0);
11612 scale_rtx
= XEXP (op
, 1);
11618 index
= XEXP (op
, 0);
11619 tmp
= XEXP (op
, 1);
11620 if (!CONST_INT_P (tmp
))
11622 scale
= INTVAL (tmp
);
11623 if ((unsigned HOST_WIDE_INT
) scale
> 3)
11625 scale
= 1 << scale
;
11629 if (XINT (op
, 1) == UNSPEC_TP
11630 && TARGET_TLS_DIRECT_SEG_REFS
11631 && seg
== SEG_DEFAULT
)
11632 seg
= TARGET_64BIT
? SEG_FS
: SEG_GS
;
11638 if (!ix86_address_subreg_operand (SUBREG_REG (op
)))
11665 else if (GET_CODE (addr
) == MULT
)
11667 index
= XEXP (addr
, 0); /* index*scale */
11668 scale_rtx
= XEXP (addr
, 1);
11670 else if (GET_CODE (addr
) == ASHIFT
)
11672 /* We're called for lea too, which implements ashift on occasion. */
11673 index
= XEXP (addr
, 0);
11674 tmp
= XEXP (addr
, 1);
11675 if (!CONST_INT_P (tmp
))
11677 scale
= INTVAL (tmp
);
11678 if ((unsigned HOST_WIDE_INT
) scale
> 3)
11680 scale
= 1 << scale
;
11684 disp
= addr
; /* displacement */
11690 else if (GET_CODE (index
) == SUBREG
11691 && ix86_address_subreg_operand (SUBREG_REG (index
)))
11697 /* Extract the integral value of scale. */
11700 if (!CONST_INT_P (scale_rtx
))
11702 scale
= INTVAL (scale_rtx
);
11705 base_reg
= base
&& GET_CODE (base
) == SUBREG
? SUBREG_REG (base
) : base
;
11706 index_reg
= index
&& GET_CODE (index
) == SUBREG
? SUBREG_REG (index
) : index
;
11708 /* Avoid useless 0 displacement. */
11709 if (disp
== const0_rtx
&& (base
|| index
))
11712 /* Allow arg pointer and stack pointer as index if there is not scaling. */
11713 if (base_reg
&& index_reg
&& scale
== 1
11714 && (index_reg
== arg_pointer_rtx
11715 || index_reg
== frame_pointer_rtx
11716 || (REG_P (index_reg
) && REGNO (index_reg
) == STACK_POINTER_REGNUM
)))
11719 tmp
= base
, base
= index
, index
= tmp
;
11720 tmp
= base_reg
, base_reg
= index_reg
, index_reg
= tmp
;
11723 /* Special case: %ebp cannot be encoded as a base without a displacement.
11727 && (base_reg
== hard_frame_pointer_rtx
11728 || base_reg
== frame_pointer_rtx
11729 || base_reg
== arg_pointer_rtx
11730 || (REG_P (base_reg
)
11731 && (REGNO (base_reg
) == HARD_FRAME_POINTER_REGNUM
11732 || REGNO (base_reg
) == R13_REG
))))
11735 /* Special case: on K6, [%esi] makes the instruction vector decoded.
11736 Avoid this by transforming to [%esi+0].
11737 Reload calls address legitimization without cfun defined, so we need
11738 to test cfun for being non-NULL. */
11739 if (TARGET_K6
&& cfun
&& optimize_function_for_speed_p (cfun
)
11740 && base_reg
&& !index_reg
&& !disp
11741 && REG_P (base_reg
) && REGNO (base_reg
) == SI_REG
)
11744 /* Special case: encode reg+reg instead of reg*2. */
11745 if (!base
&& index
&& scale
== 2)
11746 base
= index
, base_reg
= index_reg
, scale
= 1;
11748 /* Special case: scaling cannot be encoded without base or displacement. */
11749 if (!base
&& !disp
&& index
&& scale
!= 1)
11753 out
->index
= index
;
11755 out
->scale
= scale
;
11761 /* Return cost of the memory address x.
11762 For i386, it is better to use a complex address than let gcc copy
11763 the address into a reg and make a new pseudo. But not if the address
11764 requires to two regs - that would mean more pseudos with longer
11767 ix86_address_cost (rtx x
, bool speed ATTRIBUTE_UNUSED
)
11769 struct ix86_address parts
;
11771 int ok
= ix86_decompose_address (x
, &parts
);
11775 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
11776 parts
.base
= SUBREG_REG (parts
.base
);
11777 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
11778 parts
.index
= SUBREG_REG (parts
.index
);
11780 /* Attempt to minimize number of registers in the address. */
11782 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
))
11784 && (!REG_P (parts
.index
)
11785 || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)))
11789 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
11791 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
11792 && parts
.base
!= parts
.index
)
11795 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
11796 since it's predecode logic can't detect the length of instructions
11797 and it degenerates to vector decoded. Increase cost of such
11798 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
11799 to split such addresses or even refuse such addresses at all.
11801 Following addressing modes are affected:
11806 The first and last case may be avoidable by explicitly coding the zero in
11807 memory address, but I don't have AMD-K6 machine handy to check this
11811 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
11812 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
11813 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
11819 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
11820 this is used for to form addresses to local data when -fPIC is in
11824 darwin_local_data_pic (rtx disp
)
11826 return (GET_CODE (disp
) == UNSPEC
11827 && XINT (disp
, 1) == UNSPEC_MACHOPIC_OFFSET
);
11830 /* Determine if a given RTX is a valid constant. We already know this
11831 satisfies CONSTANT_P. */
11834 ix86_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
11836 switch (GET_CODE (x
))
11841 if (GET_CODE (x
) == PLUS
)
11843 if (!CONST_INT_P (XEXP (x
, 1)))
11848 if (TARGET_MACHO
&& darwin_local_data_pic (x
))
11851 /* Only some unspecs are valid as "constants". */
11852 if (GET_CODE (x
) == UNSPEC
)
11853 switch (XINT (x
, 1))
11856 case UNSPEC_GOTOFF
:
11857 case UNSPEC_PLTOFF
:
11858 return TARGET_64BIT
;
11860 case UNSPEC_NTPOFF
:
11861 x
= XVECEXP (x
, 0, 0);
11862 return (GET_CODE (x
) == SYMBOL_REF
11863 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
11864 case UNSPEC_DTPOFF
:
11865 x
= XVECEXP (x
, 0, 0);
11866 return (GET_CODE (x
) == SYMBOL_REF
11867 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
);
11872 /* We must have drilled down to a symbol. */
11873 if (GET_CODE (x
) == LABEL_REF
)
11875 if (GET_CODE (x
) != SYMBOL_REF
)
11880 /* TLS symbols are never valid. */
11881 if (SYMBOL_REF_TLS_MODEL (x
))
11884 /* DLLIMPORT symbols are never valid. */
11885 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
11886 && SYMBOL_REF_DLLIMPORT_P (x
))
11890 /* mdynamic-no-pic */
11891 if (MACHO_DYNAMIC_NO_PIC_P
)
11892 return machopic_symbol_defined_p (x
);
11897 if (GET_MODE (x
) == TImode
11898 && x
!= CONST0_RTX (TImode
)
11904 if (!standard_sse_constant_p (x
))
11911 /* Otherwise we handle everything else in the move patterns. */
11915 /* Determine if it's legal to put X into the constant pool. This
11916 is not possible for the address of thread-local symbols, which
11917 is checked above. */
11920 ix86_cannot_force_const_mem (enum machine_mode mode
, rtx x
)
11922 /* We can always put integral constants and vectors in memory. */
11923 switch (GET_CODE (x
))
11933 return !ix86_legitimate_constant_p (mode
, x
);
11937 /* Nonzero if the constant value X is a legitimate general operand
11938 when generating PIC code. It is given that flag_pic is on and
11939 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
11942 legitimate_pic_operand_p (rtx x
)
11946 switch (GET_CODE (x
))
11949 inner
= XEXP (x
, 0);
11950 if (GET_CODE (inner
) == PLUS
11951 && CONST_INT_P (XEXP (inner
, 1)))
11952 inner
= XEXP (inner
, 0);
11954 /* Only some unspecs are valid as "constants". */
11955 if (GET_CODE (inner
) == UNSPEC
)
11956 switch (XINT (inner
, 1))
11959 case UNSPEC_GOTOFF
:
11960 case UNSPEC_PLTOFF
:
11961 return TARGET_64BIT
;
11963 x
= XVECEXP (inner
, 0, 0);
11964 return (GET_CODE (x
) == SYMBOL_REF
11965 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
11966 case UNSPEC_MACHOPIC_OFFSET
:
11967 return legitimate_pic_address_disp_p (x
);
11975 return legitimate_pic_address_disp_p (x
);
11982 /* Determine if a given CONST RTX is a valid memory displacement
11986 legitimate_pic_address_disp_p (rtx disp
)
11990 /* In 64bit mode we can allow direct addresses of symbols and labels
11991 when they are not dynamic symbols. */
11994 rtx op0
= disp
, op1
;
11996 switch (GET_CODE (disp
))
12002 if (GET_CODE (XEXP (disp
, 0)) != PLUS
)
12004 op0
= XEXP (XEXP (disp
, 0), 0);
12005 op1
= XEXP (XEXP (disp
, 0), 1);
12006 if (!CONST_INT_P (op1
)
12007 || INTVAL (op1
) >= 16*1024*1024
12008 || INTVAL (op1
) < -16*1024*1024)
12010 if (GET_CODE (op0
) == LABEL_REF
)
12012 if (GET_CODE (op0
) != SYMBOL_REF
)
12017 /* TLS references should always be enclosed in UNSPEC. */
12018 if (SYMBOL_REF_TLS_MODEL (op0
))
12020 if (!SYMBOL_REF_FAR_ADDR_P (op0
) && SYMBOL_REF_LOCAL_P (op0
)
12021 && ix86_cmodel
!= CM_LARGE_PIC
)
12029 if (GET_CODE (disp
) != CONST
)
12031 disp
= XEXP (disp
, 0);
12035 /* We are unsafe to allow PLUS expressions. This limit allowed distance
12036 of GOT tables. We should not need these anyway. */
12037 if (GET_CODE (disp
) != UNSPEC
12038 || (XINT (disp
, 1) != UNSPEC_GOTPCREL
12039 && XINT (disp
, 1) != UNSPEC_GOTOFF
12040 && XINT (disp
, 1) != UNSPEC_PCREL
12041 && XINT (disp
, 1) != UNSPEC_PLTOFF
))
12044 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
12045 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
12051 if (GET_CODE (disp
) == PLUS
)
12053 if (!CONST_INT_P (XEXP (disp
, 1)))
12055 disp
= XEXP (disp
, 0);
12059 if (TARGET_MACHO
&& darwin_local_data_pic (disp
))
12062 if (GET_CODE (disp
) != UNSPEC
)
12065 switch (XINT (disp
, 1))
12070 /* We need to check for both symbols and labels because VxWorks loads
12071 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
12073 return (GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
12074 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
);
12075 case UNSPEC_GOTOFF
:
12076 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
12077 While ABI specify also 32bit relocation but we don't produce it in
12078 small PIC model at all. */
12079 if ((GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
12080 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
)
12082 return gotoff_operand (XVECEXP (disp
, 0, 0), Pmode
);
12084 case UNSPEC_GOTTPOFF
:
12085 case UNSPEC_GOTNTPOFF
:
12086 case UNSPEC_INDNTPOFF
:
12089 disp
= XVECEXP (disp
, 0, 0);
12090 return (GET_CODE (disp
) == SYMBOL_REF
12091 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_INITIAL_EXEC
);
12092 case UNSPEC_NTPOFF
:
12093 disp
= XVECEXP (disp
, 0, 0);
12094 return (GET_CODE (disp
) == SYMBOL_REF
12095 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_EXEC
);
12096 case UNSPEC_DTPOFF
:
12097 disp
= XVECEXP (disp
, 0, 0);
12098 return (GET_CODE (disp
) == SYMBOL_REF
12099 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_DYNAMIC
);
12105 /* Recognizes RTL expressions that are valid memory addresses for an
12106 instruction. The MODE argument is the machine mode for the MEM
12107 expression that wants to use this address.
12109 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
12110 convert common non-canonical forms to canonical form so that they will
12114 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED
,
12115 rtx addr
, bool strict
)
12117 struct ix86_address parts
;
12118 rtx base
, index
, disp
;
12119 HOST_WIDE_INT scale
;
12121 if (ix86_decompose_address (addr
, &parts
) <= 0)
12122 /* Decomposition failed. */
12126 index
= parts
.index
;
12128 scale
= parts
.scale
;
12130 /* Validate base register. */
12137 else if (GET_CODE (base
) == SUBREG
&& REG_P (SUBREG_REG (base
)))
12138 reg
= SUBREG_REG (base
);
12140 /* Base is not a register. */
12143 if (GET_MODE (base
) != SImode
&& GET_MODE (base
) != DImode
)
12146 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (reg
))
12147 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (reg
)))
12148 /* Base is not valid. */
12152 /* Validate index register. */
12159 else if (GET_CODE (index
) == SUBREG
&& REG_P (SUBREG_REG (index
)))
12160 reg
= SUBREG_REG (index
);
12162 /* Index is not a register. */
12165 if (GET_MODE (index
) != SImode
&& GET_MODE (index
) != DImode
)
12168 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (reg
))
12169 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (reg
)))
12170 /* Index is not valid. */
12174 /* Index and base should have the same mode. */
12176 && GET_MODE (base
) != GET_MODE (index
))
12179 /* Validate scale factor. */
12183 /* Scale without index. */
12186 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
12187 /* Scale is not a valid multiplier. */
12191 /* Validate displacement. */
12194 if (GET_CODE (disp
) == CONST
12195 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
12196 && XINT (XEXP (disp
, 0), 1) != UNSPEC_MACHOPIC_OFFSET
)
12197 switch (XINT (XEXP (disp
, 0), 1))
12199 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
12200 used. While ABI specify also 32bit relocations, we don't produce
12201 them at all and use IP relative instead. */
12203 case UNSPEC_GOTOFF
:
12204 gcc_assert (flag_pic
);
12206 goto is_legitimate_pic
;
12208 /* 64bit address unspec. */
12211 case UNSPEC_GOTPCREL
:
12213 gcc_assert (flag_pic
);
12214 goto is_legitimate_pic
;
12216 case UNSPEC_GOTTPOFF
:
12217 case UNSPEC_GOTNTPOFF
:
12218 case UNSPEC_INDNTPOFF
:
12219 case UNSPEC_NTPOFF
:
12220 case UNSPEC_DTPOFF
:
12223 case UNSPEC_STACK_CHECK
:
12224 gcc_assert (flag_split_stack
);
12228 /* Invalid address unspec. */
12232 else if (SYMBOLIC_CONST (disp
)
12236 && MACHOPIC_INDIRECT
12237 && !machopic_operand_p (disp
)
12243 if (TARGET_64BIT
&& (index
|| base
))
12245 /* foo@dtpoff(%rX) is ok. */
12246 if (GET_CODE (disp
) != CONST
12247 || GET_CODE (XEXP (disp
, 0)) != PLUS
12248 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) != UNSPEC
12249 || !CONST_INT_P (XEXP (XEXP (disp
, 0), 1))
12250 || (XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_DTPOFF
12251 && XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_NTPOFF
))
12252 /* Non-constant pic memory reference. */
12255 else if ((!TARGET_MACHO
|| flag_pic
)
12256 && ! legitimate_pic_address_disp_p (disp
))
12257 /* Displacement is an invalid pic construct. */
12260 else if (MACHO_DYNAMIC_NO_PIC_P
12261 && !ix86_legitimate_constant_p (Pmode
, disp
))
12262 /* displacment must be referenced via non_lazy_pointer */
12266 /* This code used to verify that a symbolic pic displacement
12267 includes the pic_offset_table_rtx register.
12269 While this is good idea, unfortunately these constructs may
12270 be created by "adds using lea" optimization for incorrect
12279 This code is nonsensical, but results in addressing
12280 GOT table with pic_offset_table_rtx base. We can't
12281 just refuse it easily, since it gets matched by
12282 "addsi3" pattern, that later gets split to lea in the
12283 case output register differs from input. While this
12284 can be handled by separate addsi pattern for this case
12285 that never results in lea, this seems to be easier and
12286 correct fix for crash to disable this test. */
12288 else if (GET_CODE (disp
) != LABEL_REF
12289 && !CONST_INT_P (disp
)
12290 && (GET_CODE (disp
) != CONST
12291 || !ix86_legitimate_constant_p (Pmode
, disp
))
12292 && (GET_CODE (disp
) != SYMBOL_REF
12293 || !ix86_legitimate_constant_p (Pmode
, disp
)))
12294 /* Displacement is not constant. */
12296 else if (TARGET_64BIT
12297 && !x86_64_immediate_operand (disp
, VOIDmode
))
12298 /* Displacement is out of range. */
12302 /* Everything looks valid. */
12306 /* Determine if a given RTX is a valid constant address. */
12309 constant_address_p (rtx x
)
12311 return CONSTANT_P (x
) && ix86_legitimate_address_p (Pmode
, x
, 1);
12314 /* Return a unique alias set for the GOT. */
12316 static alias_set_type
12317 ix86_GOT_alias_set (void)
12319 static alias_set_type set
= -1;
12321 set
= new_alias_set ();
12325 /* Return a legitimate reference for ORIG (an address) using the
12326 register REG. If REG is 0, a new pseudo is generated.
12328 There are two types of references that must be handled:
12330 1. Global data references must load the address from the GOT, via
12331 the PIC reg. An insn is emitted to do this load, and the reg is
12334 2. Static data references, constant pool addresses, and code labels
12335 compute the address as an offset from the GOT, whose base is in
12336 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
12337 differentiate them from global data objects. The returned
12338 address is the PIC reg + an unspec constant.
12340 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
12341 reg also appears in the address. */
12344 legitimize_pic_address (rtx orig
, rtx reg
)
12347 rtx new_rtx
= orig
;
12351 if (TARGET_MACHO
&& !TARGET_64BIT
)
12354 reg
= gen_reg_rtx (Pmode
);
12355 /* Use the generic Mach-O PIC machinery. */
12356 return machopic_legitimize_pic_address (orig
, GET_MODE (orig
), reg
);
12360 if (TARGET_64BIT
&& legitimate_pic_address_disp_p (addr
))
12362 else if (TARGET_64BIT
12363 && ix86_cmodel
!= CM_SMALL_PIC
12364 && gotoff_operand (addr
, Pmode
))
12367 /* This symbol may be referenced via a displacement from the PIC
12368 base address (@GOTOFF). */
12370 if (reload_in_progress
)
12371 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12372 if (GET_CODE (addr
) == CONST
)
12373 addr
= XEXP (addr
, 0);
12374 if (GET_CODE (addr
) == PLUS
)
12376 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
12378 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, XEXP (addr
, 1));
12381 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
12382 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12384 tmpreg
= gen_reg_rtx (Pmode
);
12387 emit_move_insn (tmpreg
, new_rtx
);
12391 new_rtx
= expand_simple_binop (Pmode
, PLUS
, reg
, pic_offset_table_rtx
,
12392 tmpreg
, 1, OPTAB_DIRECT
);
12395 else new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, tmpreg
);
12397 else if (!TARGET_64BIT
&& gotoff_operand (addr
, Pmode
))
12399 /* This symbol may be referenced via a displacement from the PIC
12400 base address (@GOTOFF). */
12402 if (reload_in_progress
)
12403 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12404 if (GET_CODE (addr
) == CONST
)
12405 addr
= XEXP (addr
, 0);
12406 if (GET_CODE (addr
) == PLUS
)
12408 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
12410 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, XEXP (addr
, 1));
12413 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
12414 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12415 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
12419 emit_move_insn (reg
, new_rtx
);
12423 else if ((GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (addr
) == 0)
12424 /* We can't use @GOTOFF for text labels on VxWorks;
12425 see gotoff_operand. */
12426 || (TARGET_VXWORKS_RTP
&& GET_CODE (addr
) == LABEL_REF
))
12428 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
12430 if (GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_DLLIMPORT_P (addr
))
12431 return legitimize_dllimport_symbol (addr
, true);
12432 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
12433 && GET_CODE (XEXP (XEXP (addr
, 0), 0)) == SYMBOL_REF
12434 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr
, 0), 0)))
12436 rtx t
= legitimize_dllimport_symbol (XEXP (XEXP (addr
, 0), 0), true);
12437 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (addr
, 0), 1));
12441 /* For x64 PE-COFF there is no GOT table. So we use address
12443 if (TARGET_64BIT
&& DEFAULT_ABI
== MS_ABI
)
12445 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_PCREL
);
12446 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12449 reg
= gen_reg_rtx (Pmode
);
12450 emit_move_insn (reg
, new_rtx
);
12453 else if (TARGET_64BIT
&& ix86_cmodel
!= CM_LARGE_PIC
)
12455 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTPCREL
);
12456 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12457 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
12458 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
12461 reg
= gen_reg_rtx (Pmode
);
12462 /* Use directly gen_movsi, otherwise the address is loaded
12463 into register for CSE. We don't want to CSE this addresses,
12464 instead we CSE addresses from the GOT table, so skip this. */
12465 emit_insn (gen_movsi (reg
, new_rtx
));
12470 /* This symbol must be referenced via a load from the
12471 Global Offset Table (@GOT). */
12473 if (reload_in_progress
)
12474 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12475 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
12476 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12478 new_rtx
= force_reg (Pmode
, new_rtx
);
12479 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
12480 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
12481 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
12484 reg
= gen_reg_rtx (Pmode
);
12485 emit_move_insn (reg
, new_rtx
);
12491 if (CONST_INT_P (addr
)
12492 && !x86_64_immediate_operand (addr
, VOIDmode
))
12496 emit_move_insn (reg
, addr
);
12500 new_rtx
= force_reg (Pmode
, addr
);
12502 else if (GET_CODE (addr
) == CONST
)
12504 addr
= XEXP (addr
, 0);
12506 /* We must match stuff we generate before. Assume the only
12507 unspecs that can get here are ours. Not that we could do
12508 anything with them anyway.... */
12509 if (GET_CODE (addr
) == UNSPEC
12510 || (GET_CODE (addr
) == PLUS
12511 && GET_CODE (XEXP (addr
, 0)) == UNSPEC
))
12513 gcc_assert (GET_CODE (addr
) == PLUS
);
12515 if (GET_CODE (addr
) == PLUS
)
12517 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
12519 /* Check first to see if this is a constant offset from a @GOTOFF
12520 symbol reference. */
12521 if (gotoff_operand (op0
, Pmode
)
12522 && CONST_INT_P (op1
))
12526 if (reload_in_progress
)
12527 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12528 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
),
12530 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, op1
);
12531 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12532 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
12536 emit_move_insn (reg
, new_rtx
);
12542 if (INTVAL (op1
) < -16*1024*1024
12543 || INTVAL (op1
) >= 16*1024*1024)
12545 if (!x86_64_immediate_operand (op1
, Pmode
))
12546 op1
= force_reg (Pmode
, op1
);
12547 new_rtx
= gen_rtx_PLUS (Pmode
, force_reg (Pmode
, op0
), op1
);
12553 base
= legitimize_pic_address (XEXP (addr
, 0), reg
);
12554 new_rtx
= legitimize_pic_address (XEXP (addr
, 1),
12555 base
== reg
? NULL_RTX
: reg
);
12557 if (CONST_INT_P (new_rtx
))
12558 new_rtx
= plus_constant (base
, INTVAL (new_rtx
));
12561 if (GET_CODE (new_rtx
) == PLUS
&& CONSTANT_P (XEXP (new_rtx
, 1)))
12563 base
= gen_rtx_PLUS (Pmode
, base
, XEXP (new_rtx
, 0));
12564 new_rtx
= XEXP (new_rtx
, 1);
12566 new_rtx
= gen_rtx_PLUS (Pmode
, base
, new_rtx
);
12574 /* Load the thread pointer. If TO_REG is true, force it into a register. */
12577 get_thread_pointer (bool to_reg
)
12579 rtx tp
= gen_rtx_UNSPEC (ptr_mode
, gen_rtvec (1, const0_rtx
), UNSPEC_TP
);
12581 if (GET_MODE (tp
) != Pmode
)
12582 tp
= convert_to_mode (Pmode
, tp
, 1);
12585 tp
= copy_addr_to_reg (tp
);
12590 /* Construct the SYMBOL_REF for the tls_get_addr function. */
12592 static GTY(()) rtx ix86_tls_symbol
;
12595 ix86_tls_get_addr (void)
12597 if (!ix86_tls_symbol
)
12600 = ((TARGET_ANY_GNU_TLS
&& !TARGET_64BIT
)
12601 ? "___tls_get_addr" : "__tls_get_addr");
12603 ix86_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
, sym
);
12606 return ix86_tls_symbol
;
12609 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
12611 static GTY(()) rtx ix86_tls_module_base_symbol
;
12614 ix86_tls_module_base (void)
12616 if (!ix86_tls_module_base_symbol
)
12618 ix86_tls_module_base_symbol
12619 = gen_rtx_SYMBOL_REF (Pmode
, "_TLS_MODULE_BASE_");
12621 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol
)
12622 |= TLS_MODEL_GLOBAL_DYNAMIC
<< SYMBOL_FLAG_TLS_SHIFT
;
12625 return ix86_tls_module_base_symbol
;
12628 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
12629 false if we expect this to be used for a memory address and true if
12630 we expect to load the address into a register. */
12633 legitimize_tls_address (rtx x
, enum tls_model model
, bool for_mov
)
12635 rtx dest
, base
, off
;
12636 rtx pic
= NULL_RTX
, tp
= NULL_RTX
;
12641 case TLS_MODEL_GLOBAL_DYNAMIC
:
12642 dest
= gen_reg_rtx (Pmode
);
12647 pic
= pic_offset_table_rtx
;
12650 pic
= gen_reg_rtx (Pmode
);
12651 emit_insn (gen_set_got (pic
));
12655 if (TARGET_GNU2_TLS
)
12658 emit_insn (gen_tls_dynamic_gnu2_64 (dest
, x
));
12660 emit_insn (gen_tls_dynamic_gnu2_32 (dest
, x
, pic
));
12662 tp
= get_thread_pointer (true);
12663 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, tp
, dest
));
12665 set_unique_reg_note (get_last_insn (), REG_EQUAL
, x
);
12669 rtx caddr
= ix86_tls_get_addr ();
12673 rtx rax
= gen_rtx_REG (Pmode
, AX_REG
), insns
;
12676 emit_call_insn (gen_tls_global_dynamic_64 (rax
, x
, caddr
));
12677 insns
= get_insns ();
12680 RTL_CONST_CALL_P (insns
) = 1;
12681 emit_libcall_block (insns
, dest
, rax
, x
);
12684 emit_insn (gen_tls_global_dynamic_32 (dest
, x
, pic
, caddr
));
12688 case TLS_MODEL_LOCAL_DYNAMIC
:
12689 base
= gen_reg_rtx (Pmode
);
12694 pic
= pic_offset_table_rtx
;
12697 pic
= gen_reg_rtx (Pmode
);
12698 emit_insn (gen_set_got (pic
));
12702 if (TARGET_GNU2_TLS
)
12704 rtx tmp
= ix86_tls_module_base ();
12707 emit_insn (gen_tls_dynamic_gnu2_64 (base
, tmp
));
12709 emit_insn (gen_tls_dynamic_gnu2_32 (base
, tmp
, pic
));
12711 tp
= get_thread_pointer (true);
12712 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
12713 gen_rtx_MINUS (Pmode
, tmp
, tp
));
12717 rtx caddr
= ix86_tls_get_addr ();
12721 rtx rax
= gen_rtx_REG (Pmode
, AX_REG
), insns
, eqv
;
12724 emit_call_insn (gen_tls_local_dynamic_base_64 (rax
, caddr
));
12725 insns
= get_insns ();
12728 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
12729 share the LD_BASE result with other LD model accesses. */
12730 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
12731 UNSPEC_TLS_LD_BASE
);
12733 RTL_CONST_CALL_P (insns
) = 1;
12734 emit_libcall_block (insns
, base
, rax
, eqv
);
12737 emit_insn (gen_tls_local_dynamic_base_32 (base
, pic
, caddr
));
12740 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_DTPOFF
);
12741 off
= gen_rtx_CONST (Pmode
, off
);
12743 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, base
, off
));
12745 if (TARGET_GNU2_TLS
)
12747 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, dest
, tp
));
12749 set_unique_reg_note (get_last_insn (), REG_EQUAL
, x
);
12753 case TLS_MODEL_INITIAL_EXEC
:
12756 if (TARGET_SUN_TLS
)
12758 /* The Sun linker took the AMD64 TLS spec literally
12759 and can only handle %rax as destination of the
12760 initial executable code sequence. */
12762 dest
= gen_reg_rtx (Pmode
);
12763 emit_insn (gen_tls_initial_exec_64_sun (dest
, x
));
12768 type
= UNSPEC_GOTNTPOFF
;
12772 if (reload_in_progress
)
12773 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12774 pic
= pic_offset_table_rtx
;
12775 type
= TARGET_ANY_GNU_TLS
? UNSPEC_GOTNTPOFF
: UNSPEC_GOTTPOFF
;
12777 else if (!TARGET_ANY_GNU_TLS
)
12779 pic
= gen_reg_rtx (Pmode
);
12780 emit_insn (gen_set_got (pic
));
12781 type
= UNSPEC_GOTTPOFF
;
12786 type
= UNSPEC_INDNTPOFF
;
12789 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), type
);
12790 off
= gen_rtx_CONST (Pmode
, off
);
12792 off
= gen_rtx_PLUS (Pmode
, pic
, off
);
12793 off
= gen_const_mem (Pmode
, off
);
12794 set_mem_alias_set (off
, ix86_GOT_alias_set ());
12796 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
12798 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
12799 off
= force_reg (Pmode
, off
);
12800 return gen_rtx_PLUS (Pmode
, base
, off
);
12804 base
= get_thread_pointer (true);
12805 dest
= gen_reg_rtx (Pmode
);
12806 emit_insn (gen_subsi3 (dest
, base
, off
));
12810 case TLS_MODEL_LOCAL_EXEC
:
12811 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
12812 (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
12813 ? UNSPEC_NTPOFF
: UNSPEC_TPOFF
);
12814 off
= gen_rtx_CONST (Pmode
, off
);
12816 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
12818 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
12819 return gen_rtx_PLUS (Pmode
, base
, off
);
12823 base
= get_thread_pointer (true);
12824 dest
= gen_reg_rtx (Pmode
);
12825 emit_insn (gen_subsi3 (dest
, base
, off
));
12830 gcc_unreachable ();
12836 /* Create or return the unique __imp_DECL dllimport symbol corresponding
12839 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map
)))
12840 htab_t dllimport_map
;
12843 get_dllimport_decl (tree decl
)
12845 struct tree_map
*h
, in
;
12848 const char *prefix
;
12849 size_t namelen
, prefixlen
;
12854 if (!dllimport_map
)
12855 dllimport_map
= htab_create_ggc (512, tree_map_hash
, tree_map_eq
, 0);
12857 in
.hash
= htab_hash_pointer (decl
);
12858 in
.base
.from
= decl
;
12859 loc
= htab_find_slot_with_hash (dllimport_map
, &in
, in
.hash
, INSERT
);
12860 h
= (struct tree_map
*) *loc
;
12864 *loc
= h
= ggc_alloc_tree_map ();
12866 h
->base
.from
= decl
;
12867 h
->to
= to
= build_decl (DECL_SOURCE_LOCATION (decl
),
12868 VAR_DECL
, NULL
, ptr_type_node
);
12869 DECL_ARTIFICIAL (to
) = 1;
12870 DECL_IGNORED_P (to
) = 1;
12871 DECL_EXTERNAL (to
) = 1;
12872 TREE_READONLY (to
) = 1;
12874 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
12875 name
= targetm
.strip_name_encoding (name
);
12876 prefix
= name
[0] == FASTCALL_PREFIX
|| user_label_prefix
[0] == 0
12877 ? "*__imp_" : "*__imp__";
12878 namelen
= strlen (name
);
12879 prefixlen
= strlen (prefix
);
12880 imp_name
= (char *) alloca (namelen
+ prefixlen
+ 1);
12881 memcpy (imp_name
, prefix
, prefixlen
);
12882 memcpy (imp_name
+ prefixlen
, name
, namelen
+ 1);
12884 name
= ggc_alloc_string (imp_name
, namelen
+ prefixlen
);
12885 rtl
= gen_rtx_SYMBOL_REF (Pmode
, name
);
12886 SET_SYMBOL_REF_DECL (rtl
, to
);
12887 SYMBOL_REF_FLAGS (rtl
) = SYMBOL_FLAG_LOCAL
;
12889 rtl
= gen_const_mem (Pmode
, rtl
);
12890 set_mem_alias_set (rtl
, ix86_GOT_alias_set ());
12892 SET_DECL_RTL (to
, rtl
);
12893 SET_DECL_ASSEMBLER_NAME (to
, get_identifier (name
));
12898 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
12899 true if we require the result be a register. */
12902 legitimize_dllimport_symbol (rtx symbol
, bool want_reg
)
12907 gcc_assert (SYMBOL_REF_DECL (symbol
));
12908 imp_decl
= get_dllimport_decl (SYMBOL_REF_DECL (symbol
));
12910 x
= DECL_RTL (imp_decl
);
12912 x
= force_reg (Pmode
, x
);
12916 /* Try machine-dependent ways of modifying an illegitimate address
12917 to be legitimate. If we find one, return the new, valid address.
12918 This macro is used in only one place: `memory_address' in explow.c.
12920 OLDX is the address as it was before break_out_memory_refs was called.
12921 In some cases it is useful to look at this to decide what needs to be done.
12923 It is always safe for this macro to do nothing. It exists to recognize
12924 opportunities to optimize the output.
12926 For the 80386, we handle X+REG by loading X into a register R and
12927 using R+REG. R will go in a general reg and indexing will be used.
12928 However, if REG is a broken-out memory address or multiplication,
12929 nothing needs to be done because REG can certainly go in a general reg.
12931 When -fpic is used, special handling is needed for symbolic references.
12932 See comments by legitimize_pic_address in i386.c for details. */
12935 ix86_legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
,
12936 enum machine_mode mode
)
12941 log
= GET_CODE (x
) == SYMBOL_REF
? SYMBOL_REF_TLS_MODEL (x
) : 0;
12943 return legitimize_tls_address (x
, (enum tls_model
) log
, false);
12944 if (GET_CODE (x
) == CONST
12945 && GET_CODE (XEXP (x
, 0)) == PLUS
12946 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
12947 && (log
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x
, 0), 0))))
12949 rtx t
= legitimize_tls_address (XEXP (XEXP (x
, 0), 0),
12950 (enum tls_model
) log
, false);
12951 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
12954 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
12956 if (GET_CODE (x
) == SYMBOL_REF
&& SYMBOL_REF_DLLIMPORT_P (x
))
12957 return legitimize_dllimport_symbol (x
, true);
12958 if (GET_CODE (x
) == CONST
12959 && GET_CODE (XEXP (x
, 0)) == PLUS
12960 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
12961 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x
, 0), 0)))
12963 rtx t
= legitimize_dllimport_symbol (XEXP (XEXP (x
, 0), 0), true);
12964 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
12968 if (flag_pic
&& SYMBOLIC_CONST (x
))
12969 return legitimize_pic_address (x
, 0);
12972 if (MACHO_DYNAMIC_NO_PIC_P
&& SYMBOLIC_CONST (x
))
12973 return machopic_indirect_data_reference (x
, 0);
12976 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
12977 if (GET_CODE (x
) == ASHIFT
12978 && CONST_INT_P (XEXP (x
, 1))
12979 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (x
, 1)) < 4)
12982 log
= INTVAL (XEXP (x
, 1));
12983 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
12984 GEN_INT (1 << log
));
12987 if (GET_CODE (x
) == PLUS
)
12989 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
12991 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
12992 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
12993 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 0), 1)) < 4)
12996 log
= INTVAL (XEXP (XEXP (x
, 0), 1));
12997 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
12998 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
12999 GEN_INT (1 << log
));
13002 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
13003 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
13004 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 1), 1)) < 4)
13007 log
= INTVAL (XEXP (XEXP (x
, 1), 1));
13008 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
13009 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
13010 GEN_INT (1 << log
));
13013 /* Put multiply first if it isn't already. */
13014 if (GET_CODE (XEXP (x
, 1)) == MULT
)
13016 rtx tmp
= XEXP (x
, 0);
13017 XEXP (x
, 0) = XEXP (x
, 1);
13022 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
13023 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
13024 created by virtual register instantiation, register elimination, and
13025 similar optimizations. */
13026 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
13029 x
= gen_rtx_PLUS (Pmode
,
13030 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
13031 XEXP (XEXP (x
, 1), 0)),
13032 XEXP (XEXP (x
, 1), 1));
13036 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
13037 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
13038 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
13039 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
13040 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
13041 && CONSTANT_P (XEXP (x
, 1)))
13044 rtx other
= NULL_RTX
;
13046 if (CONST_INT_P (XEXP (x
, 1)))
13048 constant
= XEXP (x
, 1);
13049 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
13051 else if (CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 1), 1)))
13053 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
13054 other
= XEXP (x
, 1);
13062 x
= gen_rtx_PLUS (Pmode
,
13063 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
13064 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
13065 plus_constant (other
, INTVAL (constant
)));
13069 if (changed
&& ix86_legitimate_address_p (mode
, x
, false))
13072 if (GET_CODE (XEXP (x
, 0)) == MULT
)
13075 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
13078 if (GET_CODE (XEXP (x
, 1)) == MULT
)
13081 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
13085 && REG_P (XEXP (x
, 1))
13086 && REG_P (XEXP (x
, 0)))
13089 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
13092 x
= legitimize_pic_address (x
, 0);
13095 if (changed
&& ix86_legitimate_address_p (mode
, x
, false))
13098 if (REG_P (XEXP (x
, 0)))
13100 rtx temp
= gen_reg_rtx (Pmode
);
13101 rtx val
= force_operand (XEXP (x
, 1), temp
);
13104 if (GET_MODE (val
) != Pmode
)
13105 val
= convert_to_mode (Pmode
, val
, 1);
13106 emit_move_insn (temp
, val
);
13109 XEXP (x
, 1) = temp
;
13113 else if (REG_P (XEXP (x
, 1)))
13115 rtx temp
= gen_reg_rtx (Pmode
);
13116 rtx val
= force_operand (XEXP (x
, 0), temp
);
13119 if (GET_MODE (val
) != Pmode
)
13120 val
= convert_to_mode (Pmode
, val
, 1);
13121 emit_move_insn (temp
, val
);
13124 XEXP (x
, 0) = temp
;
13132 /* Print an integer constant expression in assembler syntax. Addition
13133 and subtraction are the only arithmetic that may appear in these
13134 expressions. FILE is the stdio stream to write to, X is the rtx, and
13135 CODE is the operand print code from the output string. */
13138 output_pic_addr_const (FILE *file
, rtx x
, int code
)
13142 switch (GET_CODE (x
))
13145 gcc_assert (flag_pic
);
13150 if (TARGET_64BIT
|| ! TARGET_MACHO_BRANCH_ISLANDS
)
13151 output_addr_const (file
, x
);
13154 const char *name
= XSTR (x
, 0);
13156 /* Mark the decl as referenced so that cgraph will
13157 output the function. */
13158 if (SYMBOL_REF_DECL (x
))
13159 mark_decl_referenced (SYMBOL_REF_DECL (x
));
13162 if (MACHOPIC_INDIRECT
13163 && machopic_classify_symbol (x
) == MACHOPIC_UNDEFINED_FUNCTION
)
13164 name
= machopic_indirection_name (x
, /*stub_p=*/true);
13166 assemble_name (file
, name
);
13168 if (!TARGET_MACHO
&& !(TARGET_64BIT
&& DEFAULT_ABI
== MS_ABI
)
13169 && code
== 'P' && ! SYMBOL_REF_LOCAL_P (x
))
13170 fputs ("@PLT", file
);
13177 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
13178 assemble_name (asm_out_file
, buf
);
13182 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
13186 /* This used to output parentheses around the expression,
13187 but that does not work on the 386 (either ATT or BSD assembler). */
13188 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13192 if (GET_MODE (x
) == VOIDmode
)
13194 /* We can use %d if the number is <32 bits and positive. */
13195 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
13196 fprintf (file
, "0x%lx%08lx",
13197 (unsigned long) CONST_DOUBLE_HIGH (x
),
13198 (unsigned long) CONST_DOUBLE_LOW (x
));
13200 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
13203 /* We can't handle floating point constants;
13204 TARGET_PRINT_OPERAND must handle them. */
13205 output_operand_lossage ("floating constant misused");
13209 /* Some assemblers need integer constants to appear first. */
13210 if (CONST_INT_P (XEXP (x
, 0)))
13212 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13214 output_pic_addr_const (file
, XEXP (x
, 1), code
);
13218 gcc_assert (CONST_INT_P (XEXP (x
, 1)));
13219 output_pic_addr_const (file
, XEXP (x
, 1), code
);
13221 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13227 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
13228 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13230 output_pic_addr_const (file
, XEXP (x
, 1), code
);
13232 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
13236 if (XINT (x
, 1) == UNSPEC_STACK_CHECK
)
13238 bool f
= i386_asm_output_addr_const_extra (file
, x
);
13243 gcc_assert (XVECLEN (x
, 0) == 1);
13244 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
13245 switch (XINT (x
, 1))
13248 fputs ("@GOT", file
);
13250 case UNSPEC_GOTOFF
:
13251 fputs ("@GOTOFF", file
);
13253 case UNSPEC_PLTOFF
:
13254 fputs ("@PLTOFF", file
);
13257 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13258 "(%rip)" : "[rip]", file
);
13260 case UNSPEC_GOTPCREL
:
13261 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13262 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file
);
13264 case UNSPEC_GOTTPOFF
:
13265 /* FIXME: This might be @TPOFF in Sun ld too. */
13266 fputs ("@gottpoff", file
);
13269 fputs ("@tpoff", file
);
13271 case UNSPEC_NTPOFF
:
13273 fputs ("@tpoff", file
);
13275 fputs ("@ntpoff", file
);
13277 case UNSPEC_DTPOFF
:
13278 fputs ("@dtpoff", file
);
13280 case UNSPEC_GOTNTPOFF
:
13282 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13283 "@gottpoff(%rip)": "@gottpoff[rip]", file
);
13285 fputs ("@gotntpoff", file
);
13287 case UNSPEC_INDNTPOFF
:
13288 fputs ("@indntpoff", file
);
13291 case UNSPEC_MACHOPIC_OFFSET
:
13293 machopic_output_function_base_name (file
);
13297 output_operand_lossage ("invalid UNSPEC as operand");
13303 output_operand_lossage ("invalid expression as operand");
13307 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
13308 We need to emit DTP-relative relocations. */
13310 static void ATTRIBUTE_UNUSED
13311 i386_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
13313 fputs (ASM_LONG
, file
);
13314 output_addr_const (file
, x
);
13315 fputs ("@dtpoff", file
);
13321 fputs (", 0", file
);
13324 gcc_unreachable ();
13328 /* Return true if X is a representation of the PIC register. This copes
13329 with calls from ix86_find_base_term, where the register might have
13330 been replaced by a cselib value. */
13333 ix86_pic_register_p (rtx x
)
13335 if (GET_CODE (x
) == VALUE
&& CSELIB_VAL_PTR (x
))
13336 return (pic_offset_table_rtx
13337 && rtx_equal_for_cselib_p (x
, pic_offset_table_rtx
));
13339 return REG_P (x
) && REGNO (x
) == PIC_OFFSET_TABLE_REGNUM
;
13342 /* Helper function for ix86_delegitimize_address.
13343 Attempt to delegitimize TLS local-exec accesses. */
13346 ix86_delegitimize_tls_address (rtx orig_x
)
13348 rtx x
= orig_x
, unspec
;
13349 struct ix86_address addr
;
13351 if (!TARGET_TLS_DIRECT_SEG_REFS
)
13355 if (GET_CODE (x
) != PLUS
|| GET_MODE (x
) != Pmode
)
13357 if (ix86_decompose_address (x
, &addr
) == 0
13358 || addr
.seg
!= (TARGET_64BIT
? SEG_FS
: SEG_GS
)
13359 || addr
.disp
== NULL_RTX
13360 || GET_CODE (addr
.disp
) != CONST
)
13362 unspec
= XEXP (addr
.disp
, 0);
13363 if (GET_CODE (unspec
) == PLUS
&& CONST_INT_P (XEXP (unspec
, 1)))
13364 unspec
= XEXP (unspec
, 0);
13365 if (GET_CODE (unspec
) != UNSPEC
|| XINT (unspec
, 1) != UNSPEC_NTPOFF
)
13367 x
= XVECEXP (unspec
, 0, 0);
13368 gcc_assert (GET_CODE (x
) == SYMBOL_REF
);
13369 if (unspec
!= XEXP (addr
.disp
, 0))
13370 x
= gen_rtx_PLUS (Pmode
, x
, XEXP (XEXP (addr
.disp
, 0), 1));
13373 rtx idx
= addr
.index
;
13374 if (addr
.scale
!= 1)
13375 idx
= gen_rtx_MULT (Pmode
, idx
, GEN_INT (addr
.scale
));
13376 x
= gen_rtx_PLUS (Pmode
, idx
, x
);
13379 x
= gen_rtx_PLUS (Pmode
, addr
.base
, x
);
13380 if (MEM_P (orig_x
))
13381 x
= replace_equiv_address_nv (orig_x
, x
);
13385 /* In the name of slightly smaller debug output, and to cater to
13386 general assembler lossage, recognize PIC+GOTOFF and turn it back
13387 into a direct symbol reference.
13389 On Darwin, this is necessary to avoid a crash, because Darwin
13390 has a different PIC label for each routine but the DWARF debugging
13391 information is not associated with any particular routine, so it's
13392 necessary to remove references to the PIC label from RTL stored by
13393 the DWARF output code. */
13396 ix86_delegitimize_address (rtx x
)
13398 rtx orig_x
= delegitimize_mem_from_attrs (x
);
13399 /* addend is NULL or some rtx if x is something+GOTOFF where
13400 something doesn't include the PIC register. */
13401 rtx addend
= NULL_RTX
;
13402 /* reg_addend is NULL or a multiple of some register. */
13403 rtx reg_addend
= NULL_RTX
;
13404 /* const_addend is NULL or a const_int. */
13405 rtx const_addend
= NULL_RTX
;
13406 /* This is the result, or NULL. */
13407 rtx result
= NULL_RTX
;
13416 if (GET_CODE (x
) != CONST
13417 || GET_CODE (XEXP (x
, 0)) != UNSPEC
13418 || (XINT (XEXP (x
, 0), 1) != UNSPEC_GOTPCREL
13419 && XINT (XEXP (x
, 0), 1) != UNSPEC_PCREL
)
13420 || !MEM_P (orig_x
))
13421 return ix86_delegitimize_tls_address (orig_x
);
13422 x
= XVECEXP (XEXP (x
, 0), 0, 0);
13423 if (GET_MODE (orig_x
) != GET_MODE (x
))
13425 x
= simplify_gen_subreg (GET_MODE (orig_x
), x
,
13433 if (GET_CODE (x
) != PLUS
13434 || GET_CODE (XEXP (x
, 1)) != CONST
)
13435 return ix86_delegitimize_tls_address (orig_x
);
13437 if (ix86_pic_register_p (XEXP (x
, 0)))
13438 /* %ebx + GOT/GOTOFF */
13440 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
13442 /* %ebx + %reg * scale + GOT/GOTOFF */
13443 reg_addend
= XEXP (x
, 0);
13444 if (ix86_pic_register_p (XEXP (reg_addend
, 0)))
13445 reg_addend
= XEXP (reg_addend
, 1);
13446 else if (ix86_pic_register_p (XEXP (reg_addend
, 1)))
13447 reg_addend
= XEXP (reg_addend
, 0);
13450 reg_addend
= NULL_RTX
;
13451 addend
= XEXP (x
, 0);
13455 addend
= XEXP (x
, 0);
13457 x
= XEXP (XEXP (x
, 1), 0);
13458 if (GET_CODE (x
) == PLUS
13459 && CONST_INT_P (XEXP (x
, 1)))
13461 const_addend
= XEXP (x
, 1);
13465 if (GET_CODE (x
) == UNSPEC
13466 && ((XINT (x
, 1) == UNSPEC_GOT
&& MEM_P (orig_x
) && !addend
)
13467 || (XINT (x
, 1) == UNSPEC_GOTOFF
&& !MEM_P (orig_x
))))
13468 result
= XVECEXP (x
, 0, 0);
13470 if (TARGET_MACHO
&& darwin_local_data_pic (x
)
13471 && !MEM_P (orig_x
))
13472 result
= XVECEXP (x
, 0, 0);
13475 return ix86_delegitimize_tls_address (orig_x
);
13478 result
= gen_rtx_CONST (Pmode
, gen_rtx_PLUS (Pmode
, result
, const_addend
));
13480 result
= gen_rtx_PLUS (Pmode
, reg_addend
, result
);
13483 /* If the rest of original X doesn't involve the PIC register, add
13484 addend and subtract pic_offset_table_rtx. This can happen e.g.
13486 leal (%ebx, %ecx, 4), %ecx
13488 movl foo@GOTOFF(%ecx), %edx
13489 in which case we return (%ecx - %ebx) + foo. */
13490 if (pic_offset_table_rtx
)
13491 result
= gen_rtx_PLUS (Pmode
, gen_rtx_MINUS (Pmode
, copy_rtx (addend
),
13492 pic_offset_table_rtx
),
13497 if (GET_MODE (orig_x
) != Pmode
&& MEM_P (orig_x
))
13499 result
= simplify_gen_subreg (GET_MODE (orig_x
), result
, Pmode
, 0);
13500 if (result
== NULL_RTX
)
13506 /* If X is a machine specific address (i.e. a symbol or label being
13507 referenced as a displacement from the GOT implemented using an
13508 UNSPEC), then return the base term. Otherwise return X. */
13511 ix86_find_base_term (rtx x
)
13517 if (GET_CODE (x
) != CONST
)
13519 term
= XEXP (x
, 0);
13520 if (GET_CODE (term
) == PLUS
13521 && (CONST_INT_P (XEXP (term
, 1))
13522 || GET_CODE (XEXP (term
, 1)) == CONST_DOUBLE
))
13523 term
= XEXP (term
, 0);
13524 if (GET_CODE (term
) != UNSPEC
13525 || (XINT (term
, 1) != UNSPEC_GOTPCREL
13526 && XINT (term
, 1) != UNSPEC_PCREL
))
13529 return XVECEXP (term
, 0, 0);
13532 return ix86_delegitimize_address (x
);
13536 put_condition_code (enum rtx_code code
, enum machine_mode mode
, int reverse
,
13537 int fp
, FILE *file
)
13539 const char *suffix
;
13541 if (mode
== CCFPmode
|| mode
== CCFPUmode
)
13543 code
= ix86_fp_compare_code_to_integer (code
);
13547 code
= reverse_condition (code
);
13598 gcc_assert (mode
== CCmode
|| mode
== CCNOmode
|| mode
== CCGCmode
);
13602 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
13603 Those same assemblers have the same but opposite lossage on cmov. */
13604 if (mode
== CCmode
)
13605 suffix
= fp
? "nbe" : "a";
13606 else if (mode
== CCCmode
)
13609 gcc_unreachable ();
13625 gcc_unreachable ();
13629 gcc_assert (mode
== CCmode
|| mode
== CCCmode
);
13646 gcc_unreachable ();
13650 /* ??? As above. */
13651 gcc_assert (mode
== CCmode
|| mode
== CCCmode
);
13652 suffix
= fp
? "nb" : "ae";
13655 gcc_assert (mode
== CCmode
|| mode
== CCGCmode
|| mode
== CCNOmode
);
13659 /* ??? As above. */
13660 if (mode
== CCmode
)
13662 else if (mode
== CCCmode
)
13663 suffix
= fp
? "nb" : "ae";
13665 gcc_unreachable ();
13668 suffix
= fp
? "u" : "p";
13671 suffix
= fp
? "nu" : "np";
13674 gcc_unreachable ();
13676 fputs (suffix
, file
);
13679 /* Print the name of register X to FILE based on its machine mode and number.
13680 If CODE is 'w', pretend the mode is HImode.
13681 If CODE is 'b', pretend the mode is QImode.
13682 If CODE is 'k', pretend the mode is SImode.
13683 If CODE is 'q', pretend the mode is DImode.
13684 If CODE is 'x', pretend the mode is V4SFmode.
13685 If CODE is 't', pretend the mode is V8SFmode.
13686 If CODE is 'h', pretend the reg is the 'high' byte register.
13687 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
13688 If CODE is 'd', duplicate the operand for AVX instruction.
13692 print_reg (rtx x
, int code
, FILE *file
)
13695 bool duplicated
= code
== 'd' && TARGET_AVX
;
13697 gcc_assert (x
== pc_rtx
13698 || (REGNO (x
) != ARG_POINTER_REGNUM
13699 && REGNO (x
) != FRAME_POINTER_REGNUM
13700 && REGNO (x
) != FLAGS_REG
13701 && REGNO (x
) != FPSR_REG
13702 && REGNO (x
) != FPCR_REG
));
13704 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13709 gcc_assert (TARGET_64BIT
);
13710 fputs ("rip", file
);
13714 if (code
== 'w' || MMX_REG_P (x
))
13716 else if (code
== 'b')
13718 else if (code
== 'k')
13720 else if (code
== 'q')
13722 else if (code
== 'y')
13724 else if (code
== 'h')
13726 else if (code
== 'x')
13728 else if (code
== 't')
13731 code
= GET_MODE_SIZE (GET_MODE (x
));
13733 /* Irritatingly, AMD extended registers use different naming convention
13734 from the normal registers: "r%d[bwd]" */
13735 if (REX_INT_REG_P (x
))
13737 gcc_assert (TARGET_64BIT
);
13739 fprint_ul (file
, REGNO (x
) - FIRST_REX_INT_REG
+ 8);
13743 error ("extended registers have no high halves");
13758 error ("unsupported operand size for extended register");
13768 if (STACK_TOP_P (x
))
13777 if (! ANY_FP_REG_P (x
))
13778 putc (code
== 8 && TARGET_64BIT
? 'r' : 'e', file
);
13783 reg
= hi_reg_name
[REGNO (x
)];
13786 if (REGNO (x
) >= ARRAY_SIZE (qi_reg_name
))
13788 reg
= qi_reg_name
[REGNO (x
)];
13791 if (REGNO (x
) >= ARRAY_SIZE (qi_high_reg_name
))
13793 reg
= qi_high_reg_name
[REGNO (x
)];
13798 gcc_assert (!duplicated
);
13800 fputs (hi_reg_name
[REGNO (x
)] + 1, file
);
13805 gcc_unreachable ();
13811 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13812 fprintf (file
, ", %%%s", reg
);
13814 fprintf (file
, ", %s", reg
);
13818 /* Locate some local-dynamic symbol still in use by this function
13819 so that we can print its name in some tls_local_dynamic_base
13823 get_some_local_dynamic_name_1 (rtx
*px
, void *data ATTRIBUTE_UNUSED
)
13827 if (GET_CODE (x
) == SYMBOL_REF
13828 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
)
13830 cfun
->machine
->some_ld_name
= XSTR (x
, 0);
13837 static const char *
13838 get_some_local_dynamic_name (void)
13842 if (cfun
->machine
->some_ld_name
)
13843 return cfun
->machine
->some_ld_name
;
13845 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
13846 if (NONDEBUG_INSN_P (insn
)
13847 && for_each_rtx (&PATTERN (insn
), get_some_local_dynamic_name_1
, 0))
13848 return cfun
->machine
->some_ld_name
;
13853 /* Meaning of CODE:
13854 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
13855 C -- print opcode suffix for set/cmov insn.
13856 c -- like C, but print reversed condition
13857 F,f -- likewise, but for floating-point.
13858 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
13860 R -- print the prefix for register names.
13861 z -- print the opcode suffix for the size of the current operand.
13862 Z -- likewise, with special suffixes for x87 instructions.
13863 * -- print a star (in certain assembler syntax)
13864 A -- print an absolute memory reference.
13865 w -- print the operand as if it's a "word" (HImode) even if it isn't.
13866 s -- print a shift double count, followed by the assemblers argument
13868 b -- print the QImode name of the register for the indicated operand.
13869 %b0 would print %al if operands[0] is reg 0.
13870 w -- likewise, print the HImode name of the register.
13871 k -- likewise, print the SImode name of the register.
13872 q -- likewise, print the DImode name of the register.
13873 x -- likewise, print the V4SFmode name of the register.
13874 t -- likewise, print the V8SFmode name of the register.
13875 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
13876 y -- print "st(0)" instead of "st" as a register.
13877 d -- print duplicated register operand for AVX instruction.
13878 D -- print condition for SSE cmp instruction.
13879 P -- if PIC, print an @PLT suffix.
13880 p -- print raw symbol name.
13881 X -- don't print any sort of PIC '@' suffix for a symbol.
13882 & -- print some in-use local-dynamic symbol name.
13883 H -- print a memory address offset by 8; used for sse high-parts
13884 Y -- print condition for XOP pcom* instruction.
13885 + -- print a branch hint as 'cs' or 'ds' prefix
13886 ; -- print a semicolon (after prefixes due to bug in older gas).
13887 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
13888 @ -- print a segment register of thread base pointer load
13892 ix86_print_operand (FILE *file
, rtx x
, int code
)
13899 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13905 const char *name
= get_some_local_dynamic_name ();
13907 output_operand_lossage ("'%%&' used without any "
13908 "local dynamic TLS references");
13910 assemble_name (file
, name
);
13915 switch (ASSEMBLER_DIALECT
)
13922 /* Intel syntax. For absolute addresses, registers should not
13923 be surrounded by braces. */
13927 ix86_print_operand (file
, x
, 0);
13934 gcc_unreachable ();
13937 ix86_print_operand (file
, x
, 0);
13942 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13947 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13952 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13957 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13962 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13967 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13972 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
13974 /* Opcodes don't get size suffixes if using Intel opcodes. */
13975 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
13978 switch (GET_MODE_SIZE (GET_MODE (x
)))
13997 output_operand_lossage
13998 ("invalid operand size for operand code '%c'", code
);
14003 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
14005 (0, "non-integer operand used with operand code '%c'", code
);
14009 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
14010 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
14013 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
14015 switch (GET_MODE_SIZE (GET_MODE (x
)))
14018 #ifdef HAVE_AS_IX86_FILDS
14028 #ifdef HAVE_AS_IX86_FILDQ
14031 fputs ("ll", file
);
14039 else if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
14041 /* 387 opcodes don't get size suffixes
14042 if the operands are registers. */
14043 if (STACK_REG_P (x
))
14046 switch (GET_MODE_SIZE (GET_MODE (x
)))
14067 output_operand_lossage
14068 ("invalid operand type used with operand code '%c'", code
);
14072 output_operand_lossage
14073 ("invalid operand size for operand code '%c'", code
);
14091 if (CONST_INT_P (x
) || ! SHIFT_DOUBLE_OMITS_COUNT
)
14093 ix86_print_operand (file
, x
, 0);
14094 fputs (", ", file
);
14099 /* Little bit of braindamage here. The SSE compare instructions
14100 does use completely different names for the comparisons that the
14101 fp conditional moves. */
14104 switch (GET_CODE (x
))
14107 fputs ("eq", file
);
14110 fputs ("eq_us", file
);
14113 fputs ("lt", file
);
14116 fputs ("nge", file
);
14119 fputs ("le", file
);
14122 fputs ("ngt", file
);
14125 fputs ("unord", file
);
14128 fputs ("neq", file
);
14131 fputs ("neq_oq", file
);
14134 fputs ("ge", file
);
14137 fputs ("nlt", file
);
14140 fputs ("gt", file
);
14143 fputs ("nle", file
);
14146 fputs ("ord", file
);
14149 output_operand_lossage ("operand is not a condition code, "
14150 "invalid operand code 'D'");
14156 switch (GET_CODE (x
))
14160 fputs ("eq", file
);
14164 fputs ("lt", file
);
14168 fputs ("le", file
);
14171 fputs ("unord", file
);
14175 fputs ("neq", file
);
14179 fputs ("nlt", file
);
14183 fputs ("nle", file
);
14186 fputs ("ord", file
);
14189 output_operand_lossage ("operand is not a condition code, "
14190 "invalid operand code 'D'");
14196 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14197 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14199 switch (GET_MODE (x
))
14201 case HImode
: putc ('w', file
); break;
14203 case SFmode
: putc ('l', file
); break;
14205 case DFmode
: putc ('q', file
); break;
14206 default: gcc_unreachable ();
14213 if (!COMPARISON_P (x
))
14215 output_operand_lossage ("operand is neither a constant nor a "
14216 "condition code, invalid operand code "
14220 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 0, file
);
14223 if (!COMPARISON_P (x
))
14225 output_operand_lossage ("operand is neither a constant nor a "
14226 "condition code, invalid operand code "
14230 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14231 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14234 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 1, file
);
14237 /* Like above, but reverse condition */
14239 /* Check to see if argument to %c is really a constant
14240 and not a condition code which needs to be reversed. */
14241 if (!COMPARISON_P (x
))
14243 output_operand_lossage ("operand is neither a constant nor a "
14244 "condition code, invalid operand "
14248 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 0, file
);
14251 if (!COMPARISON_P (x
))
14253 output_operand_lossage ("operand is neither a constant nor a "
14254 "condition code, invalid operand "
14258 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14259 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14262 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 1, file
);
14266 /* It doesn't actually matter what mode we use here, as we're
14267 only going to use this for printing. */
14268 x
= adjust_address_nv (x
, DImode
, 8);
14276 || optimize_function_for_size_p (cfun
) || !TARGET_BRANCH_PREDICTION_HINTS
)
14279 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
14282 int pred_val
= INTVAL (XEXP (x
, 0));
14284 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
14285 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
14287 int taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
14288 int cputaken
= final_forward_branch_p (current_output_insn
) == 0;
14290 /* Emit hints only in the case default branch prediction
14291 heuristics would fail. */
14292 if (taken
!= cputaken
)
14294 /* We use 3e (DS) prefix for taken branches and
14295 2e (CS) prefix for not taken branches. */
14297 fputs ("ds ; ", file
);
14299 fputs ("cs ; ", file
);
14307 switch (GET_CODE (x
))
14310 fputs ("neq", file
);
14313 fputs ("eq", file
);
14317 fputs (INTEGRAL_MODE_P (GET_MODE (x
)) ? "ge" : "unlt", file
);
14321 fputs (INTEGRAL_MODE_P (GET_MODE (x
)) ? "gt" : "unle", file
);
14325 fputs ("le", file
);
14329 fputs ("lt", file
);
14332 fputs ("unord", file
);
14335 fputs ("ord", file
);
14338 fputs ("ueq", file
);
14341 fputs ("nlt", file
);
14344 fputs ("nle", file
);
14347 fputs ("ule", file
);
14350 fputs ("ult", file
);
14353 fputs ("une", file
);
14356 output_operand_lossage ("operand is not a condition code, "
14357 "invalid operand code 'Y'");
14363 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
14369 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14372 /* The kernel uses a different segment register for performance
14373 reasons; a system call would not have to trash the userspace
14374 segment register, which would be expensive. */
14375 if (TARGET_64BIT
&& ix86_cmodel
!= CM_KERNEL
)
14376 fputs ("fs", file
);
14378 fputs ("gs", file
);
14382 putc (TARGET_AVX2
? 'i' : 'f', file
);
14386 output_operand_lossage ("invalid operand code '%c'", code
);
14391 print_reg (x
, code
, file
);
14393 else if (MEM_P (x
))
14395 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
14396 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P'
14397 && GET_MODE (x
) != BLKmode
)
14400 switch (GET_MODE_SIZE (GET_MODE (x
)))
14402 case 1: size
= "BYTE"; break;
14403 case 2: size
= "WORD"; break;
14404 case 4: size
= "DWORD"; break;
14405 case 8: size
= "QWORD"; break;
14406 case 12: size
= "TBYTE"; break;
14408 if (GET_MODE (x
) == XFmode
)
14413 case 32: size
= "YMMWORD"; break;
14415 gcc_unreachable ();
14418 /* Check for explicit size override (codes 'b', 'w', 'k',
14422 else if (code
== 'w')
14424 else if (code
== 'k')
14426 else if (code
== 'q')
14428 else if (code
== 'x')
14431 fputs (size
, file
);
14432 fputs (" PTR ", file
);
14436 /* Avoid (%rip) for call operands. */
14437 if (CONSTANT_ADDRESS_P (x
) && code
== 'P'
14438 && !CONST_INT_P (x
))
14439 output_addr_const (file
, x
);
14440 else if (this_is_asm_operands
&& ! address_operand (x
, VOIDmode
))
14441 output_operand_lossage ("invalid constraints for operand");
14443 output_address (x
);
14446 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
14451 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
14452 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
14454 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14456 /* Sign extend 32bit SFmode immediate to 8 bytes. */
14458 fprintf (file
, "0x%08llx", (unsigned long long) (int) l
);
14460 fprintf (file
, "0x%08x", (unsigned int) l
);
14463 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
14468 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
14469 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
14471 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14473 fprintf (file
, "0x%lx%08lx", l
[1] & 0xffffffff, l
[0] & 0xffffffff);
14476 /* These float cases don't actually occur as immediate operands. */
14477 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == XFmode
)
14481 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
14482 fputs (dstr
, file
);
14487 /* We have patterns that allow zero sets of memory, for instance.
14488 In 64-bit mode, we should probably support all 8-byte vectors,
14489 since we can in fact encode that into an immediate. */
14490 if (GET_CODE (x
) == CONST_VECTOR
)
14492 gcc_assert (x
== CONST0_RTX (GET_MODE (x
)));
14496 if (code
!= 'P' && code
!= 'p')
14498 if (CONST_INT_P (x
) || GET_CODE (x
) == CONST_DOUBLE
)
14500 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14503 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
14504 || GET_CODE (x
) == LABEL_REF
)
14506 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14509 fputs ("OFFSET FLAT:", file
);
14512 if (CONST_INT_P (x
))
14513 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
14514 else if (flag_pic
|| MACHOPIC_INDIRECT
)
14515 output_pic_addr_const (file
, x
, code
);
14517 output_addr_const (file
, x
);
14522 ix86_print_operand_punct_valid_p (unsigned char code
)
14524 return (code
== '@' || code
== '*' || code
== '+'
14525 || code
== '&' || code
== ';' || code
== '~');
14528 /* Print a memory operand whose address is ADDR. */
14531 ix86_print_operand_address (FILE *file
, rtx addr
)
14533 struct ix86_address parts
;
14534 rtx base
, index
, disp
;
14539 if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_VSIBADDR
)
14541 ok
= ix86_decompose_address (XVECEXP (addr
, 0, 0), &parts
);
14542 gcc_assert (parts
.index
== NULL_RTX
);
14543 parts
.index
= XVECEXP (addr
, 0, 1);
14544 parts
.scale
= INTVAL (XVECEXP (addr
, 0, 2));
14545 addr
= XVECEXP (addr
, 0, 0);
14549 ok
= ix86_decompose_address (addr
, &parts
);
14553 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
14555 rtx tmp
= SUBREG_REG (parts
.base
);
14556 parts
.base
= simplify_subreg (GET_MODE (parts
.base
),
14557 tmp
, GET_MODE (tmp
), 0);
14560 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
14562 rtx tmp
= SUBREG_REG (parts
.index
);
14563 parts
.index
= simplify_subreg (GET_MODE (parts
.index
),
14564 tmp
, GET_MODE (tmp
), 0);
14568 index
= parts
.index
;
14570 scale
= parts
.scale
;
14578 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14580 fputs ((parts
.seg
== SEG_FS
? "fs:" : "gs:"), file
);
14583 gcc_unreachable ();
14586 /* Use one byte shorter RIP relative addressing for 64bit mode. */
14587 if (TARGET_64BIT
&& !base
&& !index
)
14591 if (GET_CODE (disp
) == CONST
14592 && GET_CODE (XEXP (disp
, 0)) == PLUS
14593 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
14594 symbol
= XEXP (XEXP (disp
, 0), 0);
14596 if (GET_CODE (symbol
) == LABEL_REF
14597 || (GET_CODE (symbol
) == SYMBOL_REF
14598 && SYMBOL_REF_TLS_MODEL (symbol
) == 0))
14601 if (!base
&& !index
)
14603 /* Displacement only requires special attention. */
14605 if (CONST_INT_P (disp
))
14607 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& parts
.seg
== SEG_DEFAULT
)
14608 fputs ("ds:", file
);
14609 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (disp
));
14612 output_pic_addr_const (file
, disp
, 0);
14614 output_addr_const (file
, disp
);
14620 /* Print SImode registers for zero-extended addresses to force
14621 addr32 prefix. Otherwise print DImode registers to avoid it. */
14623 code
= ((GET_CODE (addr
) == ZERO_EXTEND
14624 || GET_CODE (addr
) == AND
)
14628 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14633 output_pic_addr_const (file
, disp
, 0);
14634 else if (GET_CODE (disp
) == LABEL_REF
)
14635 output_asm_label (disp
);
14637 output_addr_const (file
, disp
);
14642 print_reg (base
, code
, file
);
14646 print_reg (index
, vsib
? 0 : code
, file
);
14647 if (scale
!= 1 || vsib
)
14648 fprintf (file
, ",%d", scale
);
14654 rtx offset
= NULL_RTX
;
14658 /* Pull out the offset of a symbol; print any symbol itself. */
14659 if (GET_CODE (disp
) == CONST
14660 && GET_CODE (XEXP (disp
, 0)) == PLUS
14661 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
14663 offset
= XEXP (XEXP (disp
, 0), 1);
14664 disp
= gen_rtx_CONST (VOIDmode
,
14665 XEXP (XEXP (disp
, 0), 0));
14669 output_pic_addr_const (file
, disp
, 0);
14670 else if (GET_CODE (disp
) == LABEL_REF
)
14671 output_asm_label (disp
);
14672 else if (CONST_INT_P (disp
))
14675 output_addr_const (file
, disp
);
14681 print_reg (base
, code
, file
);
14684 if (INTVAL (offset
) >= 0)
14686 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
14690 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
14697 print_reg (index
, vsib
? 0 : code
, file
);
14698 if (scale
!= 1 || vsib
)
14699 fprintf (file
, "*%d", scale
);
14706 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
14709 i386_asm_output_addr_const_extra (FILE *file
, rtx x
)
14713 if (GET_CODE (x
) != UNSPEC
)
14716 op
= XVECEXP (x
, 0, 0);
14717 switch (XINT (x
, 1))
14719 case UNSPEC_GOTTPOFF
:
14720 output_addr_const (file
, op
);
14721 /* FIXME: This might be @TPOFF in Sun ld. */
14722 fputs ("@gottpoff", file
);
14725 output_addr_const (file
, op
);
14726 fputs ("@tpoff", file
);
14728 case UNSPEC_NTPOFF
:
14729 output_addr_const (file
, op
);
14731 fputs ("@tpoff", file
);
14733 fputs ("@ntpoff", file
);
14735 case UNSPEC_DTPOFF
:
14736 output_addr_const (file
, op
);
14737 fputs ("@dtpoff", file
);
14739 case UNSPEC_GOTNTPOFF
:
14740 output_addr_const (file
, op
);
14742 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
14743 "@gottpoff(%rip)" : "@gottpoff[rip]", file
);
14745 fputs ("@gotntpoff", file
);
14747 case UNSPEC_INDNTPOFF
:
14748 output_addr_const (file
, op
);
14749 fputs ("@indntpoff", file
);
14752 case UNSPEC_MACHOPIC_OFFSET
:
14753 output_addr_const (file
, op
);
14755 machopic_output_function_base_name (file
);
14759 case UNSPEC_STACK_CHECK
:
14763 gcc_assert (flag_split_stack
);
14765 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
14766 offset
= TARGET_THREAD_SPLIT_STACK_OFFSET
;
14768 gcc_unreachable ();
14771 fprintf (file
, "%s:%d", TARGET_64BIT
? "%fs" : "%gs", offset
);
14782 /* Split one or more double-mode RTL references into pairs of half-mode
14783 references. The RTL can be REG, offsettable MEM, integer constant, or
14784 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
14785 split and "num" is its length. lo_half and hi_half are output arrays
14786 that parallel "operands". */
14789 split_double_mode (enum machine_mode mode
, rtx operands
[],
14790 int num
, rtx lo_half
[], rtx hi_half
[])
14792 enum machine_mode half_mode
;
14798 half_mode
= DImode
;
14801 half_mode
= SImode
;
14804 gcc_unreachable ();
14807 byte
= GET_MODE_SIZE (half_mode
);
14811 rtx op
= operands
[num
];
14813 /* simplify_subreg refuse to split volatile memory addresses,
14814 but we still have to handle it. */
14817 lo_half
[num
] = adjust_address (op
, half_mode
, 0);
14818 hi_half
[num
] = adjust_address (op
, half_mode
, byte
);
14822 lo_half
[num
] = simplify_gen_subreg (half_mode
, op
,
14823 GET_MODE (op
) == VOIDmode
14824 ? mode
: GET_MODE (op
), 0);
14825 hi_half
[num
] = simplify_gen_subreg (half_mode
, op
,
14826 GET_MODE (op
) == VOIDmode
14827 ? mode
: GET_MODE (op
), byte
);
14832 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
14833 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
14834 is the expression of the binary operation. The output may either be
14835 emitted here, or returned to the caller, like all output_* functions.
14837 There is no guarantee that the operands are the same mode, as they
14838 might be within FLOAT or FLOAT_EXTEND expressions. */
14840 #ifndef SYSV386_COMPAT
14841 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
14842 wants to fix the assemblers because that causes incompatibility
14843 with gcc. No-one wants to fix gcc because that causes
14844 incompatibility with assemblers... You can use the option of
14845 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
14846 #define SYSV386_COMPAT 1
14850 output_387_binary_op (rtx insn
, rtx
*operands
)
14852 static char buf
[40];
14855 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]) || SSE_REG_P (operands
[2]);
14857 #ifdef ENABLE_CHECKING
14858 /* Even if we do not want to check the inputs, this documents input
14859 constraints. Which helps in understanding the following code. */
14860 if (STACK_REG_P (operands
[0])
14861 && ((REG_P (operands
[1])
14862 && REGNO (operands
[0]) == REGNO (operands
[1])
14863 && (STACK_REG_P (operands
[2]) || MEM_P (operands
[2])))
14864 || (REG_P (operands
[2])
14865 && REGNO (operands
[0]) == REGNO (operands
[2])
14866 && (STACK_REG_P (operands
[1]) || MEM_P (operands
[1]))))
14867 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
14870 gcc_assert (is_sse
);
14873 switch (GET_CODE (operands
[3]))
14876 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
14877 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
14885 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
14886 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
14894 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
14895 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
14903 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
14904 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
14912 gcc_unreachable ();
14919 strcpy (buf
, ssep
);
14920 if (GET_MODE (operands
[0]) == SFmode
)
14921 strcat (buf
, "ss\t{%2, %1, %0|%0, %1, %2}");
14923 strcat (buf
, "sd\t{%2, %1, %0|%0, %1, %2}");
14927 strcpy (buf
, ssep
+ 1);
14928 if (GET_MODE (operands
[0]) == SFmode
)
14929 strcat (buf
, "ss\t{%2, %0|%0, %2}");
14931 strcat (buf
, "sd\t{%2, %0|%0, %2}");
14937 switch (GET_CODE (operands
[3]))
14941 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
14943 rtx temp
= operands
[2];
14944 operands
[2] = operands
[1];
14945 operands
[1] = temp
;
14948 /* know operands[0] == operands[1]. */
14950 if (MEM_P (operands
[2]))
14956 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
14958 if (STACK_TOP_P (operands
[0]))
14959 /* How is it that we are storing to a dead operand[2]?
14960 Well, presumably operands[1] is dead too. We can't
14961 store the result to st(0) as st(0) gets popped on this
14962 instruction. Instead store to operands[2] (which I
14963 think has to be st(1)). st(1) will be popped later.
14964 gcc <= 2.8.1 didn't have this check and generated
14965 assembly code that the Unixware assembler rejected. */
14966 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14968 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14972 if (STACK_TOP_P (operands
[0]))
14973 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14975 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
14980 if (MEM_P (operands
[1]))
14986 if (MEM_P (operands
[2]))
14992 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
14995 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
14996 derived assemblers, confusingly reverse the direction of
14997 the operation for fsub{r} and fdiv{r} when the
14998 destination register is not st(0). The Intel assembler
14999 doesn't have this brain damage. Read !SYSV386_COMPAT to
15000 figure out what the hardware really does. */
15001 if (STACK_TOP_P (operands
[0]))
15002 p
= "{p\t%0, %2|rp\t%2, %0}";
15004 p
= "{rp\t%2, %0|p\t%0, %2}";
15006 if (STACK_TOP_P (operands
[0]))
15007 /* As above for fmul/fadd, we can't store to st(0). */
15008 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15010 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15015 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
15018 if (STACK_TOP_P (operands
[0]))
15019 p
= "{rp\t%0, %1|p\t%1, %0}";
15021 p
= "{p\t%1, %0|rp\t%0, %1}";
15023 if (STACK_TOP_P (operands
[0]))
15024 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
15026 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
15031 if (STACK_TOP_P (operands
[0]))
15033 if (STACK_TOP_P (operands
[1]))
15034 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15036 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
15039 else if (STACK_TOP_P (operands
[1]))
15042 p
= "{\t%1, %0|r\t%0, %1}";
15044 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
15050 p
= "{r\t%2, %0|\t%0, %2}";
15052 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15058 gcc_unreachable ();
15065 /* Return needed mode for entity in optimize_mode_switching pass. */
15068 ix86_mode_needed (int entity
, rtx insn
)
15070 enum attr_i387_cw mode
;
15072 /* The mode UNINITIALIZED is used to store control word after a
15073 function call or ASM pattern. The mode ANY specify that function
15074 has no requirements on the control word and make no changes in the
15075 bits we are interested in. */
15078 || (NONJUMP_INSN_P (insn
)
15079 && (asm_noperands (PATTERN (insn
)) >= 0
15080 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
)))
15081 return I387_CW_UNINITIALIZED
;
15083 if (recog_memoized (insn
) < 0)
15084 return I387_CW_ANY
;
15086 mode
= get_attr_i387_cw (insn
);
15091 if (mode
== I387_CW_TRUNC
)
15096 if (mode
== I387_CW_FLOOR
)
15101 if (mode
== I387_CW_CEIL
)
15106 if (mode
== I387_CW_MASK_PM
)
15111 gcc_unreachable ();
15114 return I387_CW_ANY
;
15117 /* Output code to initialize control word copies used by trunc?f?i and
15118 rounding patterns. CURRENT_MODE is set to current control word,
15119 while NEW_MODE is set to new control word. */
15122 emit_i387_cw_initialization (int mode
)
15124 rtx stored_mode
= assign_386_stack_local (HImode
, SLOT_CW_STORED
);
15127 enum ix86_stack_slot slot
;
15129 rtx reg
= gen_reg_rtx (HImode
);
15131 emit_insn (gen_x86_fnstcw_1 (stored_mode
));
15132 emit_move_insn (reg
, copy_rtx (stored_mode
));
15134 if (TARGET_64BIT
|| TARGET_PARTIAL_REG_STALL
15135 || optimize_function_for_size_p (cfun
))
15139 case I387_CW_TRUNC
:
15140 /* round toward zero (truncate) */
15141 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0c00)));
15142 slot
= SLOT_CW_TRUNC
;
15145 case I387_CW_FLOOR
:
15146 /* round down toward -oo */
15147 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
15148 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0400)));
15149 slot
= SLOT_CW_FLOOR
;
15153 /* round up toward +oo */
15154 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
15155 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0800)));
15156 slot
= SLOT_CW_CEIL
;
15159 case I387_CW_MASK_PM
:
15160 /* mask precision exception for nearbyint() */
15161 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
15162 slot
= SLOT_CW_MASK_PM
;
15166 gcc_unreachable ();
15173 case I387_CW_TRUNC
:
15174 /* round toward zero (truncate) */
15175 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0xc)));
15176 slot
= SLOT_CW_TRUNC
;
15179 case I387_CW_FLOOR
:
15180 /* round down toward -oo */
15181 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x4)));
15182 slot
= SLOT_CW_FLOOR
;
15186 /* round up toward +oo */
15187 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x8)));
15188 slot
= SLOT_CW_CEIL
;
15191 case I387_CW_MASK_PM
:
15192 /* mask precision exception for nearbyint() */
15193 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
15194 slot
= SLOT_CW_MASK_PM
;
15198 gcc_unreachable ();
15202 gcc_assert (slot
< MAX_386_STACK_LOCALS
);
15204 new_mode
= assign_386_stack_local (HImode
, slot
);
15205 emit_move_insn (new_mode
, reg
);
15208 /* Output code for INSN to convert a float to a signed int. OPERANDS
15209 are the insn operands. The output may be [HSD]Imode and the input
15210 operand may be [SDX]Fmode. */
15213 output_fix_trunc (rtx insn
, rtx
*operands
, bool fisttp
)
15215 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
15216 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
15217 int round_mode
= get_attr_i387_cw (insn
);
15219 /* Jump through a hoop or two for DImode, since the hardware has no
15220 non-popping instruction. We used to do this a different way, but
15221 that was somewhat fragile and broke with post-reload splitters. */
15222 if ((dimode_p
|| fisttp
) && !stack_top_dies
)
15223 output_asm_insn ("fld\t%y1", operands
);
15225 gcc_assert (STACK_TOP_P (operands
[1]));
15226 gcc_assert (MEM_P (operands
[0]));
15227 gcc_assert (GET_MODE (operands
[1]) != TFmode
);
15230 output_asm_insn ("fisttp%Z0\t%0", operands
);
15233 if (round_mode
!= I387_CW_ANY
)
15234 output_asm_insn ("fldcw\t%3", operands
);
15235 if (stack_top_dies
|| dimode_p
)
15236 output_asm_insn ("fistp%Z0\t%0", operands
);
15238 output_asm_insn ("fist%Z0\t%0", operands
);
15239 if (round_mode
!= I387_CW_ANY
)
15240 output_asm_insn ("fldcw\t%2", operands
);
15246 /* Output code for x87 ffreep insn. The OPNO argument, which may only
15247 have the values zero or one, indicates the ffreep insn's operand
15248 from the OPERANDS array. */
15250 static const char *
15251 output_387_ffreep (rtx
*operands ATTRIBUTE_UNUSED
, int opno
)
15253 if (TARGET_USE_FFREEP
)
15254 #ifdef HAVE_AS_IX86_FFREEP
15255 return opno
? "ffreep\t%y1" : "ffreep\t%y0";
15258 static char retval
[32];
15259 int regno
= REGNO (operands
[opno
]);
15261 gcc_assert (FP_REGNO_P (regno
));
15263 regno
-= FIRST_STACK_REG
;
15265 snprintf (retval
, sizeof (retval
), ASM_SHORT
"0xc%ddf", regno
);
15270 return opno
? "fstp\t%y1" : "fstp\t%y0";
15274 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
15275 should be used. UNORDERED_P is true when fucom should be used. */
15278 output_fp_compare (rtx insn
, rtx
*operands
, bool eflags_p
, bool unordered_p
)
15280 int stack_top_dies
;
15281 rtx cmp_op0
, cmp_op1
;
15282 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]);
15286 cmp_op0
= operands
[0];
15287 cmp_op1
= operands
[1];
15291 cmp_op0
= operands
[1];
15292 cmp_op1
= operands
[2];
15297 if (GET_MODE (operands
[0]) == SFmode
)
15299 return "%vucomiss\t{%1, %0|%0, %1}";
15301 return "%vcomiss\t{%1, %0|%0, %1}";
15304 return "%vucomisd\t{%1, %0|%0, %1}";
15306 return "%vcomisd\t{%1, %0|%0, %1}";
15309 gcc_assert (STACK_TOP_P (cmp_op0
));
15311 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
15313 if (cmp_op1
== CONST0_RTX (GET_MODE (cmp_op1
)))
15315 if (stack_top_dies
)
15317 output_asm_insn ("ftst\n\tfnstsw\t%0", operands
);
15318 return output_387_ffreep (operands
, 1);
15321 return "ftst\n\tfnstsw\t%0";
15324 if (STACK_REG_P (cmp_op1
)
15326 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
15327 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
15329 /* If both the top of the 387 stack dies, and the other operand
15330 is also a stack register that dies, then this must be a
15331 `fcompp' float compare */
15335 /* There is no double popping fcomi variant. Fortunately,
15336 eflags is immune from the fstp's cc clobbering. */
15338 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
15340 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
15341 return output_387_ffreep (operands
, 0);
15346 return "fucompp\n\tfnstsw\t%0";
15348 return "fcompp\n\tfnstsw\t%0";
15353 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
15355 static const char * const alt
[16] =
15357 "fcom%Z2\t%y2\n\tfnstsw\t%0",
15358 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
15359 "fucom%Z2\t%y2\n\tfnstsw\t%0",
15360 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
15362 "ficom%Z2\t%y2\n\tfnstsw\t%0",
15363 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
15367 "fcomi\t{%y1, %0|%0, %y1}",
15368 "fcomip\t{%y1, %0|%0, %y1}",
15369 "fucomi\t{%y1, %0|%0, %y1}",
15370 "fucomip\t{%y1, %0|%0, %y1}",
15381 mask
= eflags_p
<< 3;
15382 mask
|= (GET_MODE_CLASS (GET_MODE (cmp_op1
)) == MODE_INT
) << 2;
15383 mask
|= unordered_p
<< 1;
15384 mask
|= stack_top_dies
;
15386 gcc_assert (mask
< 16);
15395 ix86_output_addr_vec_elt (FILE *file
, int value
)
15397 const char *directive
= ASM_LONG
;
15401 directive
= ASM_QUAD
;
15403 gcc_assert (!TARGET_64BIT
);
15406 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
15410 ix86_output_addr_diff_elt (FILE *file
, int value
, int rel
)
15412 const char *directive
= ASM_LONG
;
15415 if (TARGET_64BIT
&& CASE_VECTOR_MODE
== DImode
)
15416 directive
= ASM_QUAD
;
15418 gcc_assert (!TARGET_64BIT
);
15420 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
15421 if (TARGET_64BIT
|| TARGET_VXWORKS_RTP
)
15422 fprintf (file
, "%s%s%d-%s%d\n",
15423 directive
, LPREFIX
, value
, LPREFIX
, rel
);
15424 else if (HAVE_AS_GOTOFF_IN_DATA
)
15425 fprintf (file
, ASM_LONG
"%s%d@GOTOFF\n", LPREFIX
, value
);
15427 else if (TARGET_MACHO
)
15429 fprintf (file
, ASM_LONG
"%s%d-", LPREFIX
, value
);
15430 machopic_output_function_base_name (file
);
15435 asm_fprintf (file
, ASM_LONG
"%U%s+[.-%s%d]\n",
15436 GOT_SYMBOL_NAME
, LPREFIX
, value
);
15439 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
15443 ix86_expand_clear (rtx dest
)
15447 /* We play register width games, which are only valid after reload. */
15448 gcc_assert (reload_completed
);
15450 /* Avoid HImode and its attendant prefix byte. */
15451 if (GET_MODE_SIZE (GET_MODE (dest
)) < 4)
15452 dest
= gen_rtx_REG (SImode
, REGNO (dest
));
15453 tmp
= gen_rtx_SET (VOIDmode
, dest
, const0_rtx
);
15455 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
15456 if (!TARGET_USE_MOV0
|| optimize_insn_for_speed_p ())
15458 rtx clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
15459 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
15465 /* X is an unchanging MEM. If it is a constant pool reference, return
15466 the constant pool rtx, else NULL. */
15469 maybe_get_pool_constant (rtx x
)
15471 x
= ix86_delegitimize_address (XEXP (x
, 0));
15473 if (GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
15474 return get_pool_constant (x
);
15480 ix86_expand_move (enum machine_mode mode
, rtx operands
[])
15483 enum tls_model model
;
15488 if (GET_CODE (op1
) == SYMBOL_REF
)
15490 model
= SYMBOL_REF_TLS_MODEL (op1
);
15493 op1
= legitimize_tls_address (op1
, model
, true);
15494 op1
= force_operand (op1
, op0
);
15497 if (GET_MODE (op1
) != mode
)
15498 op1
= convert_to_mode (mode
, op1
, 1);
15500 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15501 && SYMBOL_REF_DLLIMPORT_P (op1
))
15502 op1
= legitimize_dllimport_symbol (op1
, false);
15504 else if (GET_CODE (op1
) == CONST
15505 && GET_CODE (XEXP (op1
, 0)) == PLUS
15506 && GET_CODE (XEXP (XEXP (op1
, 0), 0)) == SYMBOL_REF
)
15508 rtx addend
= XEXP (XEXP (op1
, 0), 1);
15509 rtx symbol
= XEXP (XEXP (op1
, 0), 0);
15512 model
= SYMBOL_REF_TLS_MODEL (symbol
);
15514 tmp
= legitimize_tls_address (symbol
, model
, true);
15515 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15516 && SYMBOL_REF_DLLIMPORT_P (symbol
))
15517 tmp
= legitimize_dllimport_symbol (symbol
, true);
15521 tmp
= force_operand (tmp
, NULL
);
15522 tmp
= expand_simple_binop (Pmode
, PLUS
, tmp
, addend
,
15523 op0
, 1, OPTAB_DIRECT
);
15526 if (GET_MODE (tmp
) != mode
)
15527 op1
= convert_to_mode (mode
, tmp
, 1);
15531 if ((flag_pic
|| MACHOPIC_INDIRECT
)
15532 && symbolic_operand (op1
, mode
))
15534 if (TARGET_MACHO
&& !TARGET_64BIT
)
15537 /* dynamic-no-pic */
15538 if (MACHOPIC_INDIRECT
)
15540 rtx temp
= ((reload_in_progress
15541 || ((op0
&& REG_P (op0
))
15543 ? op0
: gen_reg_rtx (Pmode
));
15544 op1
= machopic_indirect_data_reference (op1
, temp
);
15546 op1
= machopic_legitimize_pic_address (op1
, mode
,
15547 temp
== op1
? 0 : temp
);
15549 if (op0
!= op1
&& GET_CODE (op0
) != MEM
)
15551 rtx insn
= gen_rtx_SET (VOIDmode
, op0
, op1
);
15555 if (GET_CODE (op0
) == MEM
)
15556 op1
= force_reg (Pmode
, op1
);
15560 if (GET_CODE (temp
) != REG
)
15561 temp
= gen_reg_rtx (Pmode
);
15562 temp
= legitimize_pic_address (op1
, temp
);
15567 /* dynamic-no-pic */
15573 op1
= force_reg (mode
, op1
);
15574 else if (!(TARGET_64BIT
&& x86_64_movabs_operand (op1
, DImode
)))
15576 rtx reg
= can_create_pseudo_p () ? NULL_RTX
: op0
;
15577 op1
= legitimize_pic_address (op1
, reg
);
15580 if (GET_MODE (op1
) != mode
)
15581 op1
= convert_to_mode (mode
, op1
, 1);
15588 && (PUSH_ROUNDING (GET_MODE_SIZE (mode
)) != GET_MODE_SIZE (mode
)
15589 || !push_operand (op0
, mode
))
15591 op1
= force_reg (mode
, op1
);
15593 if (push_operand (op0
, mode
)
15594 && ! general_no_elim_operand (op1
, mode
))
15595 op1
= copy_to_mode_reg (mode
, op1
);
15597 /* Force large constants in 64bit compilation into register
15598 to get them CSEed. */
15599 if (can_create_pseudo_p ()
15600 && (mode
== DImode
) && TARGET_64BIT
15601 && immediate_operand (op1
, mode
)
15602 && !x86_64_zext_immediate_operand (op1
, VOIDmode
)
15603 && !register_operand (op0
, mode
)
15605 op1
= copy_to_mode_reg (mode
, op1
);
15607 if (can_create_pseudo_p ()
15608 && FLOAT_MODE_P (mode
)
15609 && GET_CODE (op1
) == CONST_DOUBLE
)
15611 /* If we are loading a floating point constant to a register,
15612 force the value to memory now, since we'll get better code
15613 out the back end. */
15615 op1
= validize_mem (force_const_mem (mode
, op1
));
15616 if (!register_operand (op0
, mode
))
15618 rtx temp
= gen_reg_rtx (mode
);
15619 emit_insn (gen_rtx_SET (VOIDmode
, temp
, op1
));
15620 emit_move_insn (op0
, temp
);
15626 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
15630 ix86_expand_vector_move (enum machine_mode mode
, rtx operands
[])
15632 rtx op0
= operands
[0], op1
= operands
[1];
15633 unsigned int align
= GET_MODE_ALIGNMENT (mode
);
15635 /* Force constants other than zero into memory. We do not know how
15636 the instructions used to build constants modify the upper 64 bits
15637 of the register, once we have that information we may be able
15638 to handle some of them more efficiently. */
15639 if (can_create_pseudo_p ()
15640 && register_operand (op0
, mode
)
15641 && (CONSTANT_P (op1
)
15642 || (GET_CODE (op1
) == SUBREG
15643 && CONSTANT_P (SUBREG_REG (op1
))))
15644 && !standard_sse_constant_p (op1
))
15645 op1
= validize_mem (force_const_mem (mode
, op1
));
15647 /* We need to check memory alignment for SSE mode since attribute
15648 can make operands unaligned. */
15649 if (can_create_pseudo_p ()
15650 && SSE_REG_MODE_P (mode
)
15651 && ((MEM_P (op0
) && (MEM_ALIGN (op0
) < align
))
15652 || (MEM_P (op1
) && (MEM_ALIGN (op1
) < align
))))
15656 /* ix86_expand_vector_move_misalign() does not like constants ... */
15657 if (CONSTANT_P (op1
)
15658 || (GET_CODE (op1
) == SUBREG
15659 && CONSTANT_P (SUBREG_REG (op1
))))
15660 op1
= validize_mem (force_const_mem (mode
, op1
));
15662 /* ... nor both arguments in memory. */
15663 if (!register_operand (op0
, mode
)
15664 && !register_operand (op1
, mode
))
15665 op1
= force_reg (mode
, op1
);
15667 tmp
[0] = op0
; tmp
[1] = op1
;
15668 ix86_expand_vector_move_misalign (mode
, tmp
);
15672 /* Make operand1 a register if it isn't already. */
15673 if (can_create_pseudo_p ()
15674 && !register_operand (op0
, mode
)
15675 && !register_operand (op1
, mode
))
15677 emit_move_insn (op0
, force_reg (GET_MODE (op0
), op1
));
15681 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
15684 /* Split 32-byte AVX unaligned load and store if needed. */
15687 ix86_avx256_split_vector_move_misalign (rtx op0
, rtx op1
)
15690 rtx (*extract
) (rtx
, rtx
, rtx
);
15691 rtx (*move_unaligned
) (rtx
, rtx
);
15692 enum machine_mode mode
;
15694 switch (GET_MODE (op0
))
15697 gcc_unreachable ();
15699 extract
= gen_avx_vextractf128v32qi
;
15700 move_unaligned
= gen_avx_movdqu256
;
15704 extract
= gen_avx_vextractf128v8sf
;
15705 move_unaligned
= gen_avx_movups256
;
15709 extract
= gen_avx_vextractf128v4df
;
15710 move_unaligned
= gen_avx_movupd256
;
15715 if (MEM_P (op1
) && TARGET_AVX256_SPLIT_UNALIGNED_LOAD
)
15717 rtx r
= gen_reg_rtx (mode
);
15718 m
= adjust_address (op1
, mode
, 0);
15719 emit_move_insn (r
, m
);
15720 m
= adjust_address (op1
, mode
, 16);
15721 r
= gen_rtx_VEC_CONCAT (GET_MODE (op0
), r
, m
);
15722 emit_move_insn (op0
, r
);
15724 else if (MEM_P (op0
) && TARGET_AVX256_SPLIT_UNALIGNED_STORE
)
15726 m
= adjust_address (op0
, mode
, 0);
15727 emit_insn (extract (m
, op1
, const0_rtx
));
15728 m
= adjust_address (op0
, mode
, 16);
15729 emit_insn (extract (m
, op1
, const1_rtx
));
15732 emit_insn (move_unaligned (op0
, op1
));
15735 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
15736 straight to ix86_expand_vector_move. */
15737 /* Code generation for scalar reg-reg moves of single and double precision data:
15738 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
15742 if (x86_sse_partial_reg_dependency == true)
15747 Code generation for scalar loads of double precision data:
15748 if (x86_sse_split_regs == true)
15749 movlpd mem, reg (gas syntax)
15753 Code generation for unaligned packed loads of single precision data
15754 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
15755 if (x86_sse_unaligned_move_optimal)
15758 if (x86_sse_partial_reg_dependency == true)
15770 Code generation for unaligned packed loads of double precision data
15771 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
15772 if (x86_sse_unaligned_move_optimal)
15775 if (x86_sse_split_regs == true)
15788 ix86_expand_vector_move_misalign (enum machine_mode mode
, rtx operands
[])
15797 switch (GET_MODE_CLASS (mode
))
15799 case MODE_VECTOR_INT
:
15801 switch (GET_MODE_SIZE (mode
))
15804 /* If we're optimizing for size, movups is the smallest. */
15805 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
)
15807 op0
= gen_lowpart (V4SFmode
, op0
);
15808 op1
= gen_lowpart (V4SFmode
, op1
);
15809 emit_insn (gen_sse_movups (op0
, op1
));
15812 op0
= gen_lowpart (V16QImode
, op0
);
15813 op1
= gen_lowpart (V16QImode
, op1
);
15814 emit_insn (gen_sse2_movdqu (op0
, op1
));
15817 op0
= gen_lowpart (V32QImode
, op0
);
15818 op1
= gen_lowpart (V32QImode
, op1
);
15819 ix86_avx256_split_vector_move_misalign (op0
, op1
);
15822 gcc_unreachable ();
15825 case MODE_VECTOR_FLOAT
:
15826 op0
= gen_lowpart (mode
, op0
);
15827 op1
= gen_lowpart (mode
, op1
);
15832 emit_insn (gen_sse_movups (op0
, op1
));
15835 ix86_avx256_split_vector_move_misalign (op0
, op1
);
15838 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
)
15840 op0
= gen_lowpart (V4SFmode
, op0
);
15841 op1
= gen_lowpart (V4SFmode
, op1
);
15842 emit_insn (gen_sse_movups (op0
, op1
));
15845 emit_insn (gen_sse2_movupd (op0
, op1
));
15848 ix86_avx256_split_vector_move_misalign (op0
, op1
);
15851 gcc_unreachable ();
15856 gcc_unreachable ();
15864 /* If we're optimizing for size, movups is the smallest. */
15865 if (optimize_insn_for_size_p ()
15866 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
)
15868 op0
= gen_lowpart (V4SFmode
, op0
);
15869 op1
= gen_lowpart (V4SFmode
, op1
);
15870 emit_insn (gen_sse_movups (op0
, op1
));
15874 /* ??? If we have typed data, then it would appear that using
15875 movdqu is the only way to get unaligned data loaded with
15877 if (TARGET_SSE2
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
15879 op0
= gen_lowpart (V16QImode
, op0
);
15880 op1
= gen_lowpart (V16QImode
, op1
);
15881 emit_insn (gen_sse2_movdqu (op0
, op1
));
15885 if (TARGET_SSE2
&& mode
== V2DFmode
)
15889 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
)
15891 op0
= gen_lowpart (V2DFmode
, op0
);
15892 op1
= gen_lowpart (V2DFmode
, op1
);
15893 emit_insn (gen_sse2_movupd (op0
, op1
));
15897 /* When SSE registers are split into halves, we can avoid
15898 writing to the top half twice. */
15899 if (TARGET_SSE_SPLIT_REGS
)
15901 emit_clobber (op0
);
15906 /* ??? Not sure about the best option for the Intel chips.
15907 The following would seem to satisfy; the register is
15908 entirely cleared, breaking the dependency chain. We
15909 then store to the upper half, with a dependency depth
15910 of one. A rumor has it that Intel recommends two movsd
15911 followed by an unpacklpd, but this is unconfirmed. And
15912 given that the dependency depth of the unpacklpd would
15913 still be one, I'm not sure why this would be better. */
15914 zero
= CONST0_RTX (V2DFmode
);
15917 m
= adjust_address (op1
, DFmode
, 0);
15918 emit_insn (gen_sse2_loadlpd (op0
, zero
, m
));
15919 m
= adjust_address (op1
, DFmode
, 8);
15920 emit_insn (gen_sse2_loadhpd (op0
, op0
, m
));
15924 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
)
15926 op0
= gen_lowpart (V4SFmode
, op0
);
15927 op1
= gen_lowpart (V4SFmode
, op1
);
15928 emit_insn (gen_sse_movups (op0
, op1
));
15932 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY
)
15933 emit_move_insn (op0
, CONST0_RTX (mode
));
15935 emit_clobber (op0
);
15937 if (mode
!= V4SFmode
)
15938 op0
= gen_lowpart (V4SFmode
, op0
);
15939 m
= adjust_address (op1
, V2SFmode
, 0);
15940 emit_insn (gen_sse_loadlps (op0
, op0
, m
));
15941 m
= adjust_address (op1
, V2SFmode
, 8);
15942 emit_insn (gen_sse_loadhps (op0
, op0
, m
));
15945 else if (MEM_P (op0
))
15947 /* If we're optimizing for size, movups is the smallest. */
15948 if (optimize_insn_for_size_p ()
15949 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
)
15951 op0
= gen_lowpart (V4SFmode
, op0
);
15952 op1
= gen_lowpart (V4SFmode
, op1
);
15953 emit_insn (gen_sse_movups (op0
, op1
));
15957 /* ??? Similar to above, only less clear because of quote
15958 typeless stores unquote. */
15959 if (TARGET_SSE2
&& !TARGET_SSE_TYPELESS_STORES
15960 && GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
15962 op0
= gen_lowpart (V16QImode
, op0
);
15963 op1
= gen_lowpart (V16QImode
, op1
);
15964 emit_insn (gen_sse2_movdqu (op0
, op1
));
15968 if (TARGET_SSE2
&& mode
== V2DFmode
)
15970 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL
)
15972 op0
= gen_lowpart (V2DFmode
, op0
);
15973 op1
= gen_lowpart (V2DFmode
, op1
);
15974 emit_insn (gen_sse2_movupd (op0
, op1
));
15978 m
= adjust_address (op0
, DFmode
, 0);
15979 emit_insn (gen_sse2_storelpd (m
, op1
));
15980 m
= adjust_address (op0
, DFmode
, 8);
15981 emit_insn (gen_sse2_storehpd (m
, op1
));
15986 if (mode
!= V4SFmode
)
15987 op1
= gen_lowpart (V4SFmode
, op1
);
15989 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL
)
15991 op0
= gen_lowpart (V4SFmode
, op0
);
15992 emit_insn (gen_sse_movups (op0
, op1
));
15996 m
= adjust_address (op0
, V2SFmode
, 0);
15997 emit_insn (gen_sse_storelps (m
, op1
));
15998 m
= adjust_address (op0
, V2SFmode
, 8);
15999 emit_insn (gen_sse_storehps (m
, op1
));
16004 gcc_unreachable ();
16007 /* Expand a push in MODE. This is some mode for which we do not support
16008 proper push instructions, at least from the registers that we expect
16009 the value to live in. */
16012 ix86_expand_push (enum machine_mode mode
, rtx x
)
16016 tmp
= expand_simple_binop (Pmode
, PLUS
, stack_pointer_rtx
,
16017 GEN_INT (-GET_MODE_SIZE (mode
)),
16018 stack_pointer_rtx
, 1, OPTAB_DIRECT
);
16019 if (tmp
!= stack_pointer_rtx
)
16020 emit_move_insn (stack_pointer_rtx
, tmp
);
16022 tmp
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
16024 /* When we push an operand onto stack, it has to be aligned at least
16025 at the function argument boundary. However since we don't have
16026 the argument type, we can't determine the actual argument
16028 emit_move_insn (tmp
, x
);
16031 /* Helper function of ix86_fixup_binary_operands to canonicalize
16032 operand order. Returns true if the operands should be swapped. */
16035 ix86_swap_binary_operands_p (enum rtx_code code
, enum machine_mode mode
,
16038 rtx dst
= operands
[0];
16039 rtx src1
= operands
[1];
16040 rtx src2
= operands
[2];
16042 /* If the operation is not commutative, we can't do anything. */
16043 if (GET_RTX_CLASS (code
) != RTX_COMM_ARITH
)
16046 /* Highest priority is that src1 should match dst. */
16047 if (rtx_equal_p (dst
, src1
))
16049 if (rtx_equal_p (dst
, src2
))
16052 /* Next highest priority is that immediate constants come second. */
16053 if (immediate_operand (src2
, mode
))
16055 if (immediate_operand (src1
, mode
))
16058 /* Lowest priority is that memory references should come second. */
16068 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
16069 destination to use for the operation. If different from the true
16070 destination in operands[0], a copy operation will be required. */
16073 ix86_fixup_binary_operands (enum rtx_code code
, enum machine_mode mode
,
16076 rtx dst
= operands
[0];
16077 rtx src1
= operands
[1];
16078 rtx src2
= operands
[2];
16080 /* Canonicalize operand order. */
16081 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
16085 /* It is invalid to swap operands of different modes. */
16086 gcc_assert (GET_MODE (src1
) == GET_MODE (src2
));
16093 /* Both source operands cannot be in memory. */
16094 if (MEM_P (src1
) && MEM_P (src2
))
16096 /* Optimization: Only read from memory once. */
16097 if (rtx_equal_p (src1
, src2
))
16099 src2
= force_reg (mode
, src2
);
16103 src2
= force_reg (mode
, src2
);
16106 /* If the destination is memory, and we do not have matching source
16107 operands, do things in registers. */
16108 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
16109 dst
= gen_reg_rtx (mode
);
16111 /* Source 1 cannot be a constant. */
16112 if (CONSTANT_P (src1
))
16113 src1
= force_reg (mode
, src1
);
16115 /* Source 1 cannot be a non-matching memory. */
16116 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
16117 src1
= force_reg (mode
, src1
);
16119 /* Improve address combine. */
16121 && GET_MODE_CLASS (mode
) == MODE_INT
16123 src2
= force_reg (mode
, src2
);
16125 operands
[1] = src1
;
16126 operands
[2] = src2
;
16130 /* Similarly, but assume that the destination has already been
16131 set up properly. */
16134 ix86_fixup_binary_operands_no_copy (enum rtx_code code
,
16135 enum machine_mode mode
, rtx operands
[])
16137 rtx dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
16138 gcc_assert (dst
== operands
[0]);
16141 /* Attempt to expand a binary operator. Make the expansion closer to the
16142 actual machine, then just general_operand, which will allow 3 separate
16143 memory references (one output, two input) in a single insn. */
16146 ix86_expand_binary_operator (enum rtx_code code
, enum machine_mode mode
,
16149 rtx src1
, src2
, dst
, op
, clob
;
16151 dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
16152 src1
= operands
[1];
16153 src2
= operands
[2];
16155 /* Emit the instruction. */
16157 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
16158 if (reload_in_progress
)
16160 /* Reload doesn't know about the flags register, and doesn't know that
16161 it doesn't want to clobber it. We can only do this with PLUS. */
16162 gcc_assert (code
== PLUS
);
16165 else if (reload_completed
16167 && !rtx_equal_p (dst
, src1
))
16169 /* This is going to be an LEA; avoid splitting it later. */
16174 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
16175 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
16178 /* Fix up the destination if needed. */
16179 if (dst
!= operands
[0])
16180 emit_move_insn (operands
[0], dst
);
16183 /* Return TRUE or FALSE depending on whether the binary operator meets the
16184 appropriate constraints. */
16187 ix86_binary_operator_ok (enum rtx_code code
, enum machine_mode mode
,
16190 rtx dst
= operands
[0];
16191 rtx src1
= operands
[1];
16192 rtx src2
= operands
[2];
16194 /* Both source operands cannot be in memory. */
16195 if (MEM_P (src1
) && MEM_P (src2
))
16198 /* Canonicalize operand order for commutative operators. */
16199 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
16206 /* If the destination is memory, we must have a matching source operand. */
16207 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
16210 /* Source 1 cannot be a constant. */
16211 if (CONSTANT_P (src1
))
16214 /* Source 1 cannot be a non-matching memory. */
16215 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
16216 /* Support "andhi/andsi/anddi" as a zero-extending move. */
16217 return (code
== AND
16220 || (TARGET_64BIT
&& mode
== DImode
))
16221 && satisfies_constraint_L (src2
));
16226 /* Attempt to expand a unary operator. Make the expansion closer to the
16227 actual machine, then just general_operand, which will allow 2 separate
16228 memory references (one output, one input) in a single insn. */
16231 ix86_expand_unary_operator (enum rtx_code code
, enum machine_mode mode
,
16234 int matching_memory
;
16235 rtx src
, dst
, op
, clob
;
16240 /* If the destination is memory, and we do not have matching source
16241 operands, do things in registers. */
16242 matching_memory
= 0;
16245 if (rtx_equal_p (dst
, src
))
16246 matching_memory
= 1;
16248 dst
= gen_reg_rtx (mode
);
16251 /* When source operand is memory, destination must match. */
16252 if (MEM_P (src
) && !matching_memory
)
16253 src
= force_reg (mode
, src
);
16255 /* Emit the instruction. */
16257 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_e (code
, mode
, src
));
16258 if (reload_in_progress
|| code
== NOT
)
16260 /* Reload doesn't know about the flags register, and doesn't know that
16261 it doesn't want to clobber it. */
16262 gcc_assert (code
== NOT
);
16267 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
16268 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
16271 /* Fix up the destination if needed. */
16272 if (dst
!= operands
[0])
16273 emit_move_insn (operands
[0], dst
);
16276 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
16277 divisor are within the range [0-255]. */
16280 ix86_split_idivmod (enum machine_mode mode
, rtx operands
[],
16283 rtx end_label
, qimode_label
;
16284 rtx insn
, div
, mod
;
16285 rtx scratch
, tmp0
, tmp1
, tmp2
;
16286 rtx (*gen_divmod4_1
) (rtx
, rtx
, rtx
, rtx
);
16287 rtx (*gen_zero_extend
) (rtx
, rtx
);
16288 rtx (*gen_test_ccno_1
) (rtx
, rtx
);
16293 gen_divmod4_1
= signed_p
? gen_divmodsi4_1
: gen_udivmodsi4_1
;
16294 gen_test_ccno_1
= gen_testsi_ccno_1
;
16295 gen_zero_extend
= gen_zero_extendqisi2
;
16298 gen_divmod4_1
= signed_p
? gen_divmoddi4_1
: gen_udivmoddi4_1
;
16299 gen_test_ccno_1
= gen_testdi_ccno_1
;
16300 gen_zero_extend
= gen_zero_extendqidi2
;
16303 gcc_unreachable ();
16306 end_label
= gen_label_rtx ();
16307 qimode_label
= gen_label_rtx ();
16309 scratch
= gen_reg_rtx (mode
);
16311 /* Use 8bit unsigned divimod if dividend and divisor are within
16312 the range [0-255]. */
16313 emit_move_insn (scratch
, operands
[2]);
16314 scratch
= expand_simple_binop (mode
, IOR
, scratch
, operands
[3],
16315 scratch
, 1, OPTAB_DIRECT
);
16316 emit_insn (gen_test_ccno_1 (scratch
, GEN_INT (-0x100)));
16317 tmp0
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
16318 tmp0
= gen_rtx_EQ (VOIDmode
, tmp0
, const0_rtx
);
16319 tmp0
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp0
,
16320 gen_rtx_LABEL_REF (VOIDmode
, qimode_label
),
16322 insn
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp0
));
16323 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
16324 JUMP_LABEL (insn
) = qimode_label
;
16326 /* Generate original signed/unsigned divimod. */
16327 div
= gen_divmod4_1 (operands
[0], operands
[1],
16328 operands
[2], operands
[3]);
16331 /* Branch to the end. */
16332 emit_jump_insn (gen_jump (end_label
));
16335 /* Generate 8bit unsigned divide. */
16336 emit_label (qimode_label
);
16337 /* Don't use operands[0] for result of 8bit divide since not all
16338 registers support QImode ZERO_EXTRACT. */
16339 tmp0
= simplify_gen_subreg (HImode
, scratch
, mode
, 0);
16340 tmp1
= simplify_gen_subreg (HImode
, operands
[2], mode
, 0);
16341 tmp2
= simplify_gen_subreg (QImode
, operands
[3], mode
, 0);
16342 emit_insn (gen_udivmodhiqi3 (tmp0
, tmp1
, tmp2
));
16346 div
= gen_rtx_DIV (SImode
, operands
[2], operands
[3]);
16347 mod
= gen_rtx_MOD (SImode
, operands
[2], operands
[3]);
16351 div
= gen_rtx_UDIV (SImode
, operands
[2], operands
[3]);
16352 mod
= gen_rtx_UMOD (SImode
, operands
[2], operands
[3]);
16355 /* Extract remainder from AH. */
16356 tmp1
= gen_rtx_ZERO_EXTRACT (mode
, tmp0
, GEN_INT (8), GEN_INT (8));
16357 if (REG_P (operands
[1]))
16358 insn
= emit_move_insn (operands
[1], tmp1
);
16361 /* Need a new scratch register since the old one has result
16363 scratch
= gen_reg_rtx (mode
);
16364 emit_move_insn (scratch
, tmp1
);
16365 insn
= emit_move_insn (operands
[1], scratch
);
16367 set_unique_reg_note (insn
, REG_EQUAL
, mod
);
16369 /* Zero extend quotient from AL. */
16370 tmp1
= gen_lowpart (QImode
, tmp0
);
16371 insn
= emit_insn (gen_zero_extend (operands
[0], tmp1
));
16372 set_unique_reg_note (insn
, REG_EQUAL
, div
);
16374 emit_label (end_label
);
16377 #define LEA_MAX_STALL (3)
16378 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
16380 /* Increase given DISTANCE in half-cycles according to
16381 dependencies between PREV and NEXT instructions.
16382 Add 1 half-cycle if there is no dependency and
16383 go to next cycle if there is some dependecy. */
16385 static unsigned int
16386 increase_distance (rtx prev
, rtx next
, unsigned int distance
)
16391 if (!prev
|| !next
)
16392 return distance
+ (distance
& 1) + 2;
16394 if (!DF_INSN_USES (next
) || !DF_INSN_DEFS (prev
))
16395 return distance
+ 1;
16397 for (use_rec
= DF_INSN_USES (next
); *use_rec
; use_rec
++)
16398 for (def_rec
= DF_INSN_DEFS (prev
); *def_rec
; def_rec
++)
16399 if (!DF_REF_IS_ARTIFICIAL (*def_rec
)
16400 && DF_REF_REGNO (*use_rec
) == DF_REF_REGNO (*def_rec
))
16401 return distance
+ (distance
& 1) + 2;
16403 return distance
+ 1;
16406 /* Function checks if instruction INSN defines register number
16407 REGNO1 or REGNO2. */
16410 insn_defines_reg (unsigned int regno1
, unsigned int regno2
,
16415 for (def_rec
= DF_INSN_DEFS (insn
); *def_rec
; def_rec
++)
16416 if (DF_REF_REG_DEF_P (*def_rec
)
16417 && !DF_REF_IS_ARTIFICIAL (*def_rec
)
16418 && (regno1
== DF_REF_REGNO (*def_rec
)
16419 || regno2
== DF_REF_REGNO (*def_rec
)))
16427 /* Function checks if instruction INSN uses register number
16428 REGNO as a part of address expression. */
16431 insn_uses_reg_mem (unsigned int regno
, rtx insn
)
16435 for (use_rec
= DF_INSN_USES (insn
); *use_rec
; use_rec
++)
16436 if (DF_REF_REG_MEM_P (*use_rec
) && regno
== DF_REF_REGNO (*use_rec
))
16442 /* Search backward for non-agu definition of register number REGNO1
16443 or register number REGNO2 in basic block starting from instruction
16444 START up to head of basic block or instruction INSN.
16446 Function puts true value into *FOUND var if definition was found
16447 and false otherwise.
16449 Distance in half-cycles between START and found instruction or head
16450 of BB is added to DISTANCE and returned. */
16453 distance_non_agu_define_in_bb (unsigned int regno1
, unsigned int regno2
,
16454 rtx insn
, int distance
,
16455 rtx start
, bool *found
)
16457 basic_block bb
= start
? BLOCK_FOR_INSN (start
) : NULL
;
16460 enum attr_type insn_type
;
16466 && distance
< LEA_SEARCH_THRESHOLD
)
16468 if (NONDEBUG_INSN_P (prev
) && NONJUMP_INSN_P (prev
))
16470 distance
= increase_distance (prev
, next
, distance
);
16471 if (insn_defines_reg (regno1
, regno2
, prev
))
16473 insn_type
= get_attr_type (prev
);
16474 if (insn_type
!= TYPE_LEA
)
16483 if (prev
== BB_HEAD (bb
))
16486 prev
= PREV_INSN (prev
);
16492 /* Search backward for non-agu definition of register number REGNO1
16493 or register number REGNO2 in INSN's basic block until
16494 1. Pass LEA_SEARCH_THRESHOLD instructions, or
16495 2. Reach neighbour BBs boundary, or
16496 3. Reach agu definition.
16497 Returns the distance between the non-agu definition point and INSN.
16498 If no definition point, returns -1. */
16501 distance_non_agu_define (unsigned int regno1
, unsigned int regno2
,
16504 basic_block bb
= BLOCK_FOR_INSN (insn
);
16506 bool found
= false;
16508 if (insn
!= BB_HEAD (bb
))
16509 distance
= distance_non_agu_define_in_bb (regno1
, regno2
, insn
,
16510 distance
, PREV_INSN (insn
),
16513 if (!found
&& distance
< LEA_SEARCH_THRESHOLD
)
16517 bool simple_loop
= false;
16519 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
16522 simple_loop
= true;
16527 distance
= distance_non_agu_define_in_bb (regno1
, regno2
,
16529 BB_END (bb
), &found
);
16532 int shortest_dist
= -1;
16533 bool found_in_bb
= false;
16535 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
16538 = distance_non_agu_define_in_bb (regno1
, regno2
,
16544 if (shortest_dist
< 0)
16545 shortest_dist
= bb_dist
;
16546 else if (bb_dist
> 0)
16547 shortest_dist
= MIN (bb_dist
, shortest_dist
);
16553 distance
= shortest_dist
;
16557 /* get_attr_type may modify recog data. We want to make sure
16558 that recog data is valid for instruction INSN, on which
16559 distance_non_agu_define is called. INSN is unchanged here. */
16560 extract_insn_cached (insn
);
16565 return distance
>> 1;
16568 /* Return the distance in half-cycles between INSN and the next
16569 insn that uses register number REGNO in memory address added
16570 to DISTANCE. Return -1 if REGNO0 is set.
16572 Put true value into *FOUND if register usage was found and
16574 Put true value into *REDEFINED if register redefinition was
16575 found and false otherwise. */
16578 distance_agu_use_in_bb (unsigned int regno
,
16579 rtx insn
, int distance
, rtx start
,
16580 bool *found
, bool *redefined
)
16582 basic_block bb
= start
? BLOCK_FOR_INSN (start
) : NULL
;
16587 *redefined
= false;
16591 && distance
< LEA_SEARCH_THRESHOLD
)
16593 if (NONDEBUG_INSN_P (next
) && NONJUMP_INSN_P (next
))
16595 distance
= increase_distance(prev
, next
, distance
);
16596 if (insn_uses_reg_mem (regno
, next
))
16598 /* Return DISTANCE if OP0 is used in memory
16599 address in NEXT. */
16604 if (insn_defines_reg (regno
, INVALID_REGNUM
, next
))
16606 /* Return -1 if OP0 is set in NEXT. */
16614 if (next
== BB_END (bb
))
16617 next
= NEXT_INSN (next
);
16623 /* Return the distance between INSN and the next insn that uses
16624 register number REGNO0 in memory address. Return -1 if no such
16625 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
16628 distance_agu_use (unsigned int regno0
, rtx insn
)
16630 basic_block bb
= BLOCK_FOR_INSN (insn
);
16632 bool found
= false;
16633 bool redefined
= false;
16635 if (insn
!= BB_END (bb
))
16636 distance
= distance_agu_use_in_bb (regno0
, insn
, distance
,
16638 &found
, &redefined
);
16640 if (!found
&& !redefined
&& distance
< LEA_SEARCH_THRESHOLD
)
16644 bool simple_loop
= false;
16646 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
16649 simple_loop
= true;
16654 distance
= distance_agu_use_in_bb (regno0
, insn
,
16655 distance
, BB_HEAD (bb
),
16656 &found
, &redefined
);
16659 int shortest_dist
= -1;
16660 bool found_in_bb
= false;
16661 bool redefined_in_bb
= false;
16663 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
16666 = distance_agu_use_in_bb (regno0
, insn
,
16667 distance
, BB_HEAD (e
->dest
),
16668 &found_in_bb
, &redefined_in_bb
);
16671 if (shortest_dist
< 0)
16672 shortest_dist
= bb_dist
;
16673 else if (bb_dist
> 0)
16674 shortest_dist
= MIN (bb_dist
, shortest_dist
);
16680 distance
= shortest_dist
;
16684 if (!found
|| redefined
)
16687 return distance
>> 1;
16690 /* Define this macro to tune LEA priority vs ADD, it take effect when
16691 there is a dilemma of choicing LEA or ADD
16692 Negative value: ADD is more preferred than LEA
16694 Positive value: LEA is more preferred than ADD*/
16695 #define IX86_LEA_PRIORITY 0
16697 /* Return true if usage of lea INSN has performance advantage
16698 over a sequence of instructions. Instructions sequence has
16699 SPLIT_COST cycles higher latency than lea latency. */
16702 ix86_lea_outperforms (rtx insn
, unsigned int regno0
, unsigned int regno1
,
16703 unsigned int regno2
, unsigned int split_cost
)
16705 int dist_define
, dist_use
;
16707 dist_define
= distance_non_agu_define (regno1
, regno2
, insn
);
16708 dist_use
= distance_agu_use (regno0
, insn
);
16710 if (dist_define
< 0 || dist_define
>= LEA_MAX_STALL
)
16712 /* If there is no non AGU operand definition, no AGU
16713 operand usage and split cost is 0 then both lea
16714 and non lea variants have same priority. Currently
16715 we prefer lea for 64 bit code and non lea on 32 bit
16717 if (dist_use
< 0 && split_cost
== 0)
16718 return TARGET_64BIT
|| IX86_LEA_PRIORITY
;
16723 /* With longer definitions distance lea is more preferable.
16724 Here we change it to take into account splitting cost and
16726 dist_define
+= split_cost
+ IX86_LEA_PRIORITY
;
16728 /* If there is no use in memory addess then we just check
16729 that split cost does not exceed AGU stall. */
16731 return dist_define
>= LEA_MAX_STALL
;
16733 /* If this insn has both backward non-agu dependence and forward
16734 agu dependence, the one with short distance takes effect. */
16735 return dist_define
>= dist_use
;
16738 /* Return true if it is legal to clobber flags by INSN and
16739 false otherwise. */
16742 ix86_ok_to_clobber_flags (rtx insn
)
16744 basic_block bb
= BLOCK_FOR_INSN (insn
);
16750 if (NONDEBUG_INSN_P (insn
))
16752 for (use
= DF_INSN_USES (insn
); *use
; use
++)
16753 if (DF_REF_REG_USE_P (*use
) && DF_REF_REGNO (*use
) == FLAGS_REG
)
16756 if (insn_defines_reg (FLAGS_REG
, INVALID_REGNUM
, insn
))
16760 if (insn
== BB_END (bb
))
16763 insn
= NEXT_INSN (insn
);
16766 live
= df_get_live_out(bb
);
16767 return !REGNO_REG_SET_P (live
, FLAGS_REG
);
16770 /* Return true if we need to split op0 = op1 + op2 into a sequence of
16771 move and add to avoid AGU stalls. */
16774 ix86_avoid_lea_for_add (rtx insn
, rtx operands
[])
16776 unsigned int regno0
= true_regnum (operands
[0]);
16777 unsigned int regno1
= true_regnum (operands
[1]);
16778 unsigned int regno2
= true_regnum (operands
[2]);
16780 /* Check if we need to optimize. */
16781 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
16784 /* Check it is correct to split here. */
16785 if (!ix86_ok_to_clobber_flags(insn
))
16788 /* We need to split only adds with non destructive
16789 destination operand. */
16790 if (regno0
== regno1
|| regno0
== regno2
)
16793 return !ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, 1);
16796 /* Return true if we should emit lea instruction instead of mov
16800 ix86_use_lea_for_mov (rtx insn
, rtx operands
[])
16802 unsigned int regno0
;
16803 unsigned int regno1
;
16805 /* Check if we need to optimize. */
16806 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
16809 /* Use lea for reg to reg moves only. */
16810 if (!REG_P (operands
[0]) || !REG_P (operands
[1]))
16813 regno0
= true_regnum (operands
[0]);
16814 regno1
= true_regnum (operands
[1]);
16816 return ix86_lea_outperforms (insn
, regno0
, regno1
, -1, 0);
16819 /* Return true if we need to split lea into a sequence of
16820 instructions to avoid AGU stalls. */
16823 ix86_avoid_lea_for_addr (rtx insn
, rtx operands
[])
16825 unsigned int regno0
= true_regnum (operands
[0]) ;
16826 unsigned int regno1
= -1;
16827 unsigned int regno2
= -1;
16828 unsigned int split_cost
= 0;
16829 struct ix86_address parts
;
16832 /* Check we need to optimize. */
16833 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
16836 /* Check it is correct to split here. */
16837 if (!ix86_ok_to_clobber_flags(insn
))
16840 ok
= ix86_decompose_address (operands
[1], &parts
);
16843 /* We should not split into add if non legitimate pic
16844 operand is used as displacement. */
16845 if (parts
.disp
&& flag_pic
&& !LEGITIMATE_PIC_OPERAND_P (parts
.disp
))
16849 regno1
= true_regnum (parts
.base
);
16851 regno2
= true_regnum (parts
.index
);
16853 /* Compute how many cycles we will add to execution time
16854 if split lea into a sequence of instructions. */
16855 if (parts
.base
|| parts
.index
)
16857 /* Have to use mov instruction if non desctructive
16858 destination form is used. */
16859 if (regno1
!= regno0
&& regno2
!= regno0
)
16862 /* Have to add index to base if both exist. */
16863 if (parts
.base
&& parts
.index
)
16866 /* Have to use shift and adds if scale is 2 or greater. */
16867 if (parts
.scale
> 1)
16869 if (regno0
!= regno1
)
16871 else if (regno2
== regno0
)
16874 split_cost
+= parts
.scale
;
16877 /* Have to use add instruction with immediate if
16878 disp is non zero. */
16879 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
16882 /* Subtract the price of lea. */
16886 return !ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, split_cost
);
16889 /* Emit x86 binary operand CODE in mode MODE, where the first operand
16890 matches destination. RTX includes clobber of FLAGS_REG. */
16893 ix86_emit_binop (enum rtx_code code
, enum machine_mode mode
,
16898 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, dst
, src
));
16899 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
16901 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
16904 /* Split lea instructions into a sequence of instructions
16905 which are executed on ALU to avoid AGU stalls.
16906 It is assumed that it is allowed to clobber flags register
16907 at lea position. */
16910 ix86_split_lea_for_addr (rtx operands
[], enum machine_mode mode
)
16912 unsigned int regno0
= true_regnum (operands
[0]) ;
16913 unsigned int regno1
= INVALID_REGNUM
;
16914 unsigned int regno2
= INVALID_REGNUM
;
16915 struct ix86_address parts
;
16919 ok
= ix86_decompose_address (operands
[1], &parts
);
16924 if (GET_MODE (parts
.base
) != mode
)
16925 parts
.base
= gen_rtx_SUBREG (mode
, parts
.base
, 0);
16926 regno1
= true_regnum (parts
.base
);
16931 if (GET_MODE (parts
.index
) != mode
)
16932 parts
.index
= gen_rtx_SUBREG (mode
, parts
.index
, 0);
16933 regno2
= true_regnum (parts
.index
);
16936 if (parts
.scale
> 1)
16938 /* Case r1 = r1 + ... */
16939 if (regno1
== regno0
)
16941 /* If we have a case r1 = r1 + C * r1 then we
16942 should use multiplication which is very
16943 expensive. Assume cost model is wrong if we
16944 have such case here. */
16945 gcc_assert (regno2
!= regno0
);
16947 for (adds
= parts
.scale
; adds
> 0; adds
--)
16948 ix86_emit_binop (PLUS
, mode
, operands
[0], parts
.index
);
16952 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
16953 if (regno0
!= regno2
)
16954 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0], parts
.index
));
16956 /* Use shift for scaling. */
16957 ix86_emit_binop (ASHIFT
, mode
, operands
[0],
16958 GEN_INT (exact_log2 (parts
.scale
)));
16961 ix86_emit_binop (PLUS
, mode
, operands
[0], parts
.base
);
16963 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
16964 ix86_emit_binop (PLUS
, mode
, operands
[0], parts
.disp
);
16967 else if (!parts
.base
&& !parts
.index
)
16969 gcc_assert(parts
.disp
);
16970 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0], parts
.disp
));
16976 if (regno0
!= regno2
)
16977 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0], parts
.index
));
16979 else if (!parts
.index
)
16981 if (regno0
!= regno1
)
16982 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0], parts
.base
));
16986 if (regno0
== regno1
)
16988 else if (regno0
== regno2
)
16992 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0], parts
.base
));
16996 ix86_emit_binop (PLUS
, mode
, operands
[0], tmp
);
16999 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
17000 ix86_emit_binop (PLUS
, mode
, operands
[0], parts
.disp
);
17004 /* Return true if it is ok to optimize an ADD operation to LEA
17005 operation to avoid flag register consumation. For most processors,
17006 ADD is faster than LEA. For the processors like ATOM, if the
17007 destination register of LEA holds an actual address which will be
17008 used soon, LEA is better and otherwise ADD is better. */
17011 ix86_lea_for_add_ok (rtx insn
, rtx operands
[])
17013 unsigned int regno0
= true_regnum (operands
[0]);
17014 unsigned int regno1
= true_regnum (operands
[1]);
17015 unsigned int regno2
= true_regnum (operands
[2]);
17017 /* If a = b + c, (a!=b && a!=c), must use lea form. */
17018 if (regno0
!= regno1
&& regno0
!= regno2
)
17021 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
17024 return ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, 0);
17027 /* Return true if destination reg of SET_BODY is shift count of
17031 ix86_dep_by_shift_count_body (const_rtx set_body
, const_rtx use_body
)
17037 /* Retrieve destination of SET_BODY. */
17038 switch (GET_CODE (set_body
))
17041 set_dest
= SET_DEST (set_body
);
17042 if (!set_dest
|| !REG_P (set_dest
))
17046 for (i
= XVECLEN (set_body
, 0) - 1; i
>= 0; i
--)
17047 if (ix86_dep_by_shift_count_body (XVECEXP (set_body
, 0, i
),
17055 /* Retrieve shift count of USE_BODY. */
17056 switch (GET_CODE (use_body
))
17059 shift_rtx
= XEXP (use_body
, 1);
17062 for (i
= XVECLEN (use_body
, 0) - 1; i
>= 0; i
--)
17063 if (ix86_dep_by_shift_count_body (set_body
,
17064 XVECEXP (use_body
, 0, i
)))
17072 && (GET_CODE (shift_rtx
) == ASHIFT
17073 || GET_CODE (shift_rtx
) == LSHIFTRT
17074 || GET_CODE (shift_rtx
) == ASHIFTRT
17075 || GET_CODE (shift_rtx
) == ROTATE
17076 || GET_CODE (shift_rtx
) == ROTATERT
))
17078 rtx shift_count
= XEXP (shift_rtx
, 1);
17080 /* Return true if shift count is dest of SET_BODY. */
17081 if (REG_P (shift_count
)
17082 && true_regnum (set_dest
) == true_regnum (shift_count
))
17089 /* Return true if destination reg of SET_INSN is shift count of
17093 ix86_dep_by_shift_count (const_rtx set_insn
, const_rtx use_insn
)
17095 return ix86_dep_by_shift_count_body (PATTERN (set_insn
),
17096 PATTERN (use_insn
));
17099 /* Return TRUE or FALSE depending on whether the unary operator meets the
17100 appropriate constraints. */
17103 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED
,
17104 enum machine_mode mode ATTRIBUTE_UNUSED
,
17105 rtx operands
[2] ATTRIBUTE_UNUSED
)
17107 /* If one of operands is memory, source and destination must match. */
17108 if ((MEM_P (operands
[0])
17109 || MEM_P (operands
[1]))
17110 && ! rtx_equal_p (operands
[0], operands
[1]))
17115 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
17116 are ok, keeping in mind the possible movddup alternative. */
17119 ix86_vec_interleave_v2df_operator_ok (rtx operands
[3], bool high
)
17121 if (MEM_P (operands
[0]))
17122 return rtx_equal_p (operands
[0], operands
[1 + high
]);
17123 if (MEM_P (operands
[1]) && MEM_P (operands
[2]))
17124 return TARGET_SSE3
&& rtx_equal_p (operands
[1], operands
[2]);
17128 /* Post-reload splitter for converting an SF or DFmode value in an
17129 SSE register into an unsigned SImode. */
17132 ix86_split_convert_uns_si_sse (rtx operands
[])
17134 enum machine_mode vecmode
;
17135 rtx value
, large
, zero_or_two31
, input
, two31
, x
;
17137 large
= operands
[1];
17138 zero_or_two31
= operands
[2];
17139 input
= operands
[3];
17140 two31
= operands
[4];
17141 vecmode
= GET_MODE (large
);
17142 value
= gen_rtx_REG (vecmode
, REGNO (operands
[0]));
17144 /* Load up the value into the low element. We must ensure that the other
17145 elements are valid floats -- zero is the easiest such value. */
17148 if (vecmode
== V4SFmode
)
17149 emit_insn (gen_vec_setv4sf_0 (value
, CONST0_RTX (V4SFmode
), input
));
17151 emit_insn (gen_sse2_loadlpd (value
, CONST0_RTX (V2DFmode
), input
));
17155 input
= gen_rtx_REG (vecmode
, REGNO (input
));
17156 emit_move_insn (value
, CONST0_RTX (vecmode
));
17157 if (vecmode
== V4SFmode
)
17158 emit_insn (gen_sse_movss (value
, value
, input
));
17160 emit_insn (gen_sse2_movsd (value
, value
, input
));
17163 emit_move_insn (large
, two31
);
17164 emit_move_insn (zero_or_two31
, MEM_P (two31
) ? large
: two31
);
17166 x
= gen_rtx_fmt_ee (LE
, vecmode
, large
, value
);
17167 emit_insn (gen_rtx_SET (VOIDmode
, large
, x
));
17169 x
= gen_rtx_AND (vecmode
, zero_or_two31
, large
);
17170 emit_insn (gen_rtx_SET (VOIDmode
, zero_or_two31
, x
));
17172 x
= gen_rtx_MINUS (vecmode
, value
, zero_or_two31
);
17173 emit_insn (gen_rtx_SET (VOIDmode
, value
, x
));
17175 large
= gen_rtx_REG (V4SImode
, REGNO (large
));
17176 emit_insn (gen_ashlv4si3 (large
, large
, GEN_INT (31)));
17178 x
= gen_rtx_REG (V4SImode
, REGNO (value
));
17179 if (vecmode
== V4SFmode
)
17180 emit_insn (gen_fix_truncv4sfv4si2 (x
, value
));
17182 emit_insn (gen_sse2_cvttpd2dq (x
, value
));
17185 emit_insn (gen_xorv4si3 (value
, value
, large
));
17188 /* Convert an unsigned DImode value into a DFmode, using only SSE.
17189 Expects the 64-bit DImode to be supplied in a pair of integral
17190 registers. Requires SSE2; will use SSE3 if available. For x86_32,
17191 -mfpmath=sse, !optimize_size only. */
17194 ix86_expand_convert_uns_didf_sse (rtx target
, rtx input
)
17196 REAL_VALUE_TYPE bias_lo_rvt
, bias_hi_rvt
;
17197 rtx int_xmm
, fp_xmm
;
17198 rtx biases
, exponents
;
17201 int_xmm
= gen_reg_rtx (V4SImode
);
17202 if (TARGET_INTER_UNIT_MOVES
)
17203 emit_insn (gen_movdi_to_sse (int_xmm
, input
));
17204 else if (TARGET_SSE_SPLIT_REGS
)
17206 emit_clobber (int_xmm
);
17207 emit_move_insn (gen_lowpart (DImode
, int_xmm
), input
);
17211 x
= gen_reg_rtx (V2DImode
);
17212 ix86_expand_vector_init_one_nonzero (false, V2DImode
, x
, input
, 0);
17213 emit_move_insn (int_xmm
, gen_lowpart (V4SImode
, x
));
17216 x
= gen_rtx_CONST_VECTOR (V4SImode
,
17217 gen_rtvec (4, GEN_INT (0x43300000UL
),
17218 GEN_INT (0x45300000UL
),
17219 const0_rtx
, const0_rtx
));
17220 exponents
= validize_mem (force_const_mem (V4SImode
, x
));
17222 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
17223 emit_insn (gen_vec_interleave_lowv4si (int_xmm
, int_xmm
, exponents
));
17225 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
17226 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
17227 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
17228 (0x1.0p84 + double(fp_value_hi_xmm)).
17229 Note these exponents differ by 32. */
17231 fp_xmm
= copy_to_mode_reg (V2DFmode
, gen_lowpart (V2DFmode
, int_xmm
));
17233 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
17234 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
17235 real_ldexp (&bias_lo_rvt
, &dconst1
, 52);
17236 real_ldexp (&bias_hi_rvt
, &dconst1
, 84);
17237 biases
= const_double_from_real_value (bias_lo_rvt
, DFmode
);
17238 x
= const_double_from_real_value (bias_hi_rvt
, DFmode
);
17239 biases
= gen_rtx_CONST_VECTOR (V2DFmode
, gen_rtvec (2, biases
, x
));
17240 biases
= validize_mem (force_const_mem (V2DFmode
, biases
));
17241 emit_insn (gen_subv2df3 (fp_xmm
, fp_xmm
, biases
));
17243 /* Add the upper and lower DFmode values together. */
17245 emit_insn (gen_sse3_haddv2df3 (fp_xmm
, fp_xmm
, fp_xmm
));
17248 x
= copy_to_mode_reg (V2DFmode
, fp_xmm
);
17249 emit_insn (gen_vec_interleave_highv2df (fp_xmm
, fp_xmm
, fp_xmm
));
17250 emit_insn (gen_addv2df3 (fp_xmm
, fp_xmm
, x
));
17253 ix86_expand_vector_extract (false, target
, fp_xmm
, 0);
17256 /* Not used, but eases macroization of patterns. */
17258 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED
,
17259 rtx input ATTRIBUTE_UNUSED
)
17261 gcc_unreachable ();
17264 /* Convert an unsigned SImode value into a DFmode. Only currently used
17265 for SSE, but applicable anywhere. */
17268 ix86_expand_convert_uns_sidf_sse (rtx target
, rtx input
)
17270 REAL_VALUE_TYPE TWO31r
;
17273 x
= expand_simple_binop (SImode
, PLUS
, input
, GEN_INT (-2147483647 - 1),
17274 NULL
, 1, OPTAB_DIRECT
);
17276 fp
= gen_reg_rtx (DFmode
);
17277 emit_insn (gen_floatsidf2 (fp
, x
));
17279 real_ldexp (&TWO31r
, &dconst1
, 31);
17280 x
= const_double_from_real_value (TWO31r
, DFmode
);
17282 x
= expand_simple_binop (DFmode
, PLUS
, fp
, x
, target
, 0, OPTAB_DIRECT
);
17284 emit_move_insn (target
, x
);
17287 /* Convert a signed DImode value into a DFmode. Only used for SSE in
17288 32-bit mode; otherwise we have a direct convert instruction. */
17291 ix86_expand_convert_sign_didf_sse (rtx target
, rtx input
)
17293 REAL_VALUE_TYPE TWO32r
;
17294 rtx fp_lo
, fp_hi
, x
;
17296 fp_lo
= gen_reg_rtx (DFmode
);
17297 fp_hi
= gen_reg_rtx (DFmode
);
17299 emit_insn (gen_floatsidf2 (fp_hi
, gen_highpart (SImode
, input
)));
17301 real_ldexp (&TWO32r
, &dconst1
, 32);
17302 x
= const_double_from_real_value (TWO32r
, DFmode
);
17303 fp_hi
= expand_simple_binop (DFmode
, MULT
, fp_hi
, x
, fp_hi
, 0, OPTAB_DIRECT
);
17305 ix86_expand_convert_uns_sidf_sse (fp_lo
, gen_lowpart (SImode
, input
));
17307 x
= expand_simple_binop (DFmode
, PLUS
, fp_hi
, fp_lo
, target
,
17310 emit_move_insn (target
, x
);
17313 /* Convert an unsigned SImode value into a SFmode, using only SSE.
17314 For x86_32, -mfpmath=sse, !optimize_size only. */
17316 ix86_expand_convert_uns_sisf_sse (rtx target
, rtx input
)
17318 REAL_VALUE_TYPE ONE16r
;
17319 rtx fp_hi
, fp_lo
, int_hi
, int_lo
, x
;
17321 real_ldexp (&ONE16r
, &dconst1
, 16);
17322 x
= const_double_from_real_value (ONE16r
, SFmode
);
17323 int_lo
= expand_simple_binop (SImode
, AND
, input
, GEN_INT(0xffff),
17324 NULL
, 0, OPTAB_DIRECT
);
17325 int_hi
= expand_simple_binop (SImode
, LSHIFTRT
, input
, GEN_INT(16),
17326 NULL
, 0, OPTAB_DIRECT
);
17327 fp_hi
= gen_reg_rtx (SFmode
);
17328 fp_lo
= gen_reg_rtx (SFmode
);
17329 emit_insn (gen_floatsisf2 (fp_hi
, int_hi
));
17330 emit_insn (gen_floatsisf2 (fp_lo
, int_lo
));
17331 fp_hi
= expand_simple_binop (SFmode
, MULT
, fp_hi
, x
, fp_hi
,
17333 fp_hi
= expand_simple_binop (SFmode
, PLUS
, fp_hi
, fp_lo
, target
,
17335 if (!rtx_equal_p (target
, fp_hi
))
17336 emit_move_insn (target
, fp_hi
);
17339 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
17340 a vector of unsigned ints VAL to vector of floats TARGET. */
17343 ix86_expand_vector_convert_uns_vsivsf (rtx target
, rtx val
)
17346 REAL_VALUE_TYPE TWO16r
;
17347 enum machine_mode intmode
= GET_MODE (val
);
17348 enum machine_mode fltmode
= GET_MODE (target
);
17349 rtx (*cvt
) (rtx
, rtx
);
17351 if (intmode
== V4SImode
)
17352 cvt
= gen_floatv4siv4sf2
;
17354 cvt
= gen_floatv8siv8sf2
;
17355 tmp
[0] = ix86_build_const_vector (intmode
, 1, GEN_INT (0xffff));
17356 tmp
[0] = force_reg (intmode
, tmp
[0]);
17357 tmp
[1] = expand_simple_binop (intmode
, AND
, val
, tmp
[0], NULL_RTX
, 1,
17359 tmp
[2] = expand_simple_binop (intmode
, LSHIFTRT
, val
, GEN_INT (16),
17360 NULL_RTX
, 1, OPTAB_DIRECT
);
17361 tmp
[3] = gen_reg_rtx (fltmode
);
17362 emit_insn (cvt (tmp
[3], tmp
[1]));
17363 tmp
[4] = gen_reg_rtx (fltmode
);
17364 emit_insn (cvt (tmp
[4], tmp
[2]));
17365 real_ldexp (&TWO16r
, &dconst1
, 16);
17366 tmp
[5] = const_double_from_real_value (TWO16r
, SFmode
);
17367 tmp
[5] = force_reg (fltmode
, ix86_build_const_vector (fltmode
, 1, tmp
[5]));
17368 tmp
[6] = expand_simple_binop (fltmode
, MULT
, tmp
[4], tmp
[5], NULL_RTX
, 1,
17370 tmp
[7] = expand_simple_binop (fltmode
, PLUS
, tmp
[3], tmp
[6], target
, 1,
17372 if (tmp
[7] != target
)
17373 emit_move_insn (target
, tmp
[7]);
17376 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
17377 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
17378 This is done by doing just signed conversion if < 0x1p31, and otherwise by
17379 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
17382 ix86_expand_adjust_ufix_to_sfix_si (rtx val
, rtx
*xorp
)
17384 REAL_VALUE_TYPE TWO31r
;
17385 rtx two31r
, tmp
[4];
17386 enum machine_mode mode
= GET_MODE (val
);
17387 enum machine_mode scalarmode
= GET_MODE_INNER (mode
);
17388 enum machine_mode intmode
= GET_MODE_SIZE (mode
) == 32 ? V8SImode
: V4SImode
;
17389 rtx (*cmp
) (rtx
, rtx
, rtx
, rtx
);
17392 for (i
= 0; i
< 3; i
++)
17393 tmp
[i
] = gen_reg_rtx (mode
);
17394 real_ldexp (&TWO31r
, &dconst1
, 31);
17395 two31r
= const_double_from_real_value (TWO31r
, scalarmode
);
17396 two31r
= ix86_build_const_vector (mode
, 1, two31r
);
17397 two31r
= force_reg (mode
, two31r
);
17400 case V8SFmode
: cmp
= gen_avx_maskcmpv8sf3
; break;
17401 case V4SFmode
: cmp
= gen_sse_maskcmpv4sf3
; break;
17402 case V4DFmode
: cmp
= gen_avx_maskcmpv4df3
; break;
17403 case V2DFmode
: cmp
= gen_sse2_maskcmpv2df3
; break;
17404 default: gcc_unreachable ();
17406 tmp
[3] = gen_rtx_LE (mode
, two31r
, val
);
17407 emit_insn (cmp (tmp
[0], two31r
, val
, tmp
[3]));
17408 tmp
[1] = expand_simple_binop (mode
, AND
, tmp
[0], two31r
, tmp
[1],
17410 if (intmode
== V4SImode
|| TARGET_AVX2
)
17411 *xorp
= expand_simple_binop (intmode
, ASHIFT
,
17412 gen_lowpart (intmode
, tmp
[0]),
17413 GEN_INT (31), NULL_RTX
, 0,
17417 rtx two31
= GEN_INT ((unsigned HOST_WIDE_INT
) 1 << 31);
17418 two31
= ix86_build_const_vector (intmode
, 1, two31
);
17419 *xorp
= expand_simple_binop (intmode
, AND
,
17420 gen_lowpart (intmode
, tmp
[0]),
17421 two31
, NULL_RTX
, 0,
17424 return expand_simple_binop (mode
, MINUS
, val
, tmp
[1], tmp
[2],
17428 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
17429 then replicate the value for all elements of the vector
17433 ix86_build_const_vector (enum machine_mode mode
, bool vect
, rtx value
)
17437 enum machine_mode scalar_mode
;
17454 n_elt
= GET_MODE_NUNITS (mode
);
17455 v
= rtvec_alloc (n_elt
);
17456 scalar_mode
= GET_MODE_INNER (mode
);
17458 RTVEC_ELT (v
, 0) = value
;
17460 for (i
= 1; i
< n_elt
; ++i
)
17461 RTVEC_ELT (v
, i
) = vect
? value
: CONST0_RTX (scalar_mode
);
17463 return gen_rtx_CONST_VECTOR (mode
, v
);
17466 gcc_unreachable ();
17470 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
17471 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
17472 for an SSE register. If VECT is true, then replicate the mask for
17473 all elements of the vector register. If INVERT is true, then create
17474 a mask excluding the sign bit. */
17477 ix86_build_signbit_mask (enum machine_mode mode
, bool vect
, bool invert
)
17479 enum machine_mode vec_mode
, imode
;
17480 HOST_WIDE_INT hi
, lo
;
17485 /* Find the sign bit, sign extended to 2*HWI. */
17493 mode
= GET_MODE_INNER (mode
);
17495 lo
= 0x80000000, hi
= lo
< 0;
17503 mode
= GET_MODE_INNER (mode
);
17505 if (HOST_BITS_PER_WIDE_INT
>= 64)
17506 lo
= (HOST_WIDE_INT
)1 << shift
, hi
= -1;
17508 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
17513 vec_mode
= VOIDmode
;
17514 if (HOST_BITS_PER_WIDE_INT
>= 64)
17517 lo
= 0, hi
= (HOST_WIDE_INT
)1 << shift
;
17524 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
17528 lo
= ~lo
, hi
= ~hi
;
17534 mask
= immed_double_const (lo
, hi
, imode
);
17536 vec
= gen_rtvec (2, v
, mask
);
17537 v
= gen_rtx_CONST_VECTOR (V2DImode
, vec
);
17538 v
= copy_to_mode_reg (mode
, gen_lowpart (mode
, v
));
17545 gcc_unreachable ();
17549 lo
= ~lo
, hi
= ~hi
;
17551 /* Force this value into the low part of a fp vector constant. */
17552 mask
= immed_double_const (lo
, hi
, imode
);
17553 mask
= gen_lowpart (mode
, mask
);
17555 if (vec_mode
== VOIDmode
)
17556 return force_reg (mode
, mask
);
17558 v
= ix86_build_const_vector (vec_mode
, vect
, mask
);
17559 return force_reg (vec_mode
, v
);
17562 /* Generate code for floating point ABS or NEG. */
17565 ix86_expand_fp_absneg_operator (enum rtx_code code
, enum machine_mode mode
,
17568 rtx mask
, set
, dst
, src
;
17569 bool use_sse
= false;
17570 bool vector_mode
= VECTOR_MODE_P (mode
);
17571 enum machine_mode vmode
= mode
;
17575 else if (mode
== TFmode
)
17577 else if (TARGET_SSE_MATH
)
17579 use_sse
= SSE_FLOAT_MODE_P (mode
);
17580 if (mode
== SFmode
)
17582 else if (mode
== DFmode
)
17586 /* NEG and ABS performed with SSE use bitwise mask operations.
17587 Create the appropriate mask now. */
17589 mask
= ix86_build_signbit_mask (vmode
, vector_mode
, code
== ABS
);
17596 set
= gen_rtx_fmt_e (code
, mode
, src
);
17597 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
17604 use
= gen_rtx_USE (VOIDmode
, mask
);
17606 par
= gen_rtvec (2, set
, use
);
17609 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
17610 par
= gen_rtvec (3, set
, use
, clob
);
17612 emit_insn (gen_rtx_PARALLEL (VOIDmode
, par
));
17618 /* Expand a copysign operation. Special case operand 0 being a constant. */
17621 ix86_expand_copysign (rtx operands
[])
17623 enum machine_mode mode
, vmode
;
17624 rtx dest
, op0
, op1
, mask
, nmask
;
17626 dest
= operands
[0];
17630 mode
= GET_MODE (dest
);
17632 if (mode
== SFmode
)
17634 else if (mode
== DFmode
)
17639 if (GET_CODE (op0
) == CONST_DOUBLE
)
17641 rtx (*copysign_insn
)(rtx
, rtx
, rtx
, rtx
);
17643 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0
)))
17644 op0
= simplify_unary_operation (ABS
, mode
, op0
, mode
);
17646 if (mode
== SFmode
|| mode
== DFmode
)
17648 if (op0
== CONST0_RTX (mode
))
17649 op0
= CONST0_RTX (vmode
);
17652 rtx v
= ix86_build_const_vector (vmode
, false, op0
);
17654 op0
= force_reg (vmode
, v
);
17657 else if (op0
!= CONST0_RTX (mode
))
17658 op0
= force_reg (mode
, op0
);
17660 mask
= ix86_build_signbit_mask (vmode
, 0, 0);
17662 if (mode
== SFmode
)
17663 copysign_insn
= gen_copysignsf3_const
;
17664 else if (mode
== DFmode
)
17665 copysign_insn
= gen_copysigndf3_const
;
17667 copysign_insn
= gen_copysigntf3_const
;
17669 emit_insn (copysign_insn (dest
, op0
, op1
, mask
));
17673 rtx (*copysign_insn
)(rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
17675 nmask
= ix86_build_signbit_mask (vmode
, 0, 1);
17676 mask
= ix86_build_signbit_mask (vmode
, 0, 0);
17678 if (mode
== SFmode
)
17679 copysign_insn
= gen_copysignsf3_var
;
17680 else if (mode
== DFmode
)
17681 copysign_insn
= gen_copysigndf3_var
;
17683 copysign_insn
= gen_copysigntf3_var
;
17685 emit_insn (copysign_insn (dest
, NULL_RTX
, op0
, op1
, nmask
, mask
));
17689 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
17690 be a constant, and so has already been expanded into a vector constant. */
17693 ix86_split_copysign_const (rtx operands
[])
17695 enum machine_mode mode
, vmode
;
17696 rtx dest
, op0
, mask
, x
;
17698 dest
= operands
[0];
17700 mask
= operands
[3];
17702 mode
= GET_MODE (dest
);
17703 vmode
= GET_MODE (mask
);
17705 dest
= simplify_gen_subreg (vmode
, dest
, mode
, 0);
17706 x
= gen_rtx_AND (vmode
, dest
, mask
);
17707 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
17709 if (op0
!= CONST0_RTX (vmode
))
17711 x
= gen_rtx_IOR (vmode
, dest
, op0
);
17712 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
17716 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
17717 so we have to do two masks. */
17720 ix86_split_copysign_var (rtx operands
[])
17722 enum machine_mode mode
, vmode
;
17723 rtx dest
, scratch
, op0
, op1
, mask
, nmask
, x
;
17725 dest
= operands
[0];
17726 scratch
= operands
[1];
17729 nmask
= operands
[4];
17730 mask
= operands
[5];
17732 mode
= GET_MODE (dest
);
17733 vmode
= GET_MODE (mask
);
17735 if (rtx_equal_p (op0
, op1
))
17737 /* Shouldn't happen often (it's useless, obviously), but when it does
17738 we'd generate incorrect code if we continue below. */
17739 emit_move_insn (dest
, op0
);
17743 if (REG_P (mask
) && REGNO (dest
) == REGNO (mask
)) /* alternative 0 */
17745 gcc_assert (REGNO (op1
) == REGNO (scratch
));
17747 x
= gen_rtx_AND (vmode
, scratch
, mask
);
17748 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
17751 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
17752 x
= gen_rtx_NOT (vmode
, dest
);
17753 x
= gen_rtx_AND (vmode
, x
, op0
);
17754 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
17758 if (REGNO (op1
) == REGNO (scratch
)) /* alternative 1,3 */
17760 x
= gen_rtx_AND (vmode
, scratch
, mask
);
17762 else /* alternative 2,4 */
17764 gcc_assert (REGNO (mask
) == REGNO (scratch
));
17765 op1
= simplify_gen_subreg (vmode
, op1
, mode
, 0);
17766 x
= gen_rtx_AND (vmode
, scratch
, op1
);
17768 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
17770 if (REGNO (op0
) == REGNO (dest
)) /* alternative 1,2 */
17772 dest
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
17773 x
= gen_rtx_AND (vmode
, dest
, nmask
);
17775 else /* alternative 3,4 */
17777 gcc_assert (REGNO (nmask
) == REGNO (dest
));
17779 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
17780 x
= gen_rtx_AND (vmode
, dest
, op0
);
17782 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
17785 x
= gen_rtx_IOR (vmode
, dest
, scratch
);
17786 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
17789 /* Return TRUE or FALSE depending on whether the first SET in INSN
17790 has source and destination with matching CC modes, and that the
17791 CC mode is at least as constrained as REQ_MODE. */
17794 ix86_match_ccmode (rtx insn
, enum machine_mode req_mode
)
17797 enum machine_mode set_mode
;
17799 set
= PATTERN (insn
);
17800 if (GET_CODE (set
) == PARALLEL
)
17801 set
= XVECEXP (set
, 0, 0);
17802 gcc_assert (GET_CODE (set
) == SET
);
17803 gcc_assert (GET_CODE (SET_SRC (set
)) == COMPARE
);
17805 set_mode
= GET_MODE (SET_DEST (set
));
17809 if (req_mode
!= CCNOmode
17810 && (req_mode
!= CCmode
17811 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
17815 if (req_mode
== CCGCmode
)
17819 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
17823 if (req_mode
== CCZmode
)
17833 if (set_mode
!= req_mode
)
17838 gcc_unreachable ();
17841 return GET_MODE (SET_SRC (set
)) == set_mode
;
17844 /* Generate insn patterns to do an integer compare of OPERANDS. */
17847 ix86_expand_int_compare (enum rtx_code code
, rtx op0
, rtx op1
)
17849 enum machine_mode cmpmode
;
17852 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
17853 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
17855 /* This is very simple, but making the interface the same as in the
17856 FP case makes the rest of the code easier. */
17857 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
17858 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
17860 /* Return the test that should be put into the flags user, i.e.
17861 the bcc, scc, or cmov instruction. */
17862 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
17865 /* Figure out whether to use ordered or unordered fp comparisons.
17866 Return the appropriate mode to use. */
17869 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED
)
17871 /* ??? In order to make all comparisons reversible, we do all comparisons
17872 non-trapping when compiling for IEEE. Once gcc is able to distinguish
17873 all forms trapping and nontrapping comparisons, we can make inequality
17874 comparisons trapping again, since it results in better code when using
17875 FCOM based compares. */
17876 return TARGET_IEEE_FP
? CCFPUmode
: CCFPmode
;
17880 ix86_cc_mode (enum rtx_code code
, rtx op0
, rtx op1
)
17882 enum machine_mode mode
= GET_MODE (op0
);
17884 if (SCALAR_FLOAT_MODE_P (mode
))
17886 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
17887 return ix86_fp_compare_mode (code
);
17892 /* Only zero flag is needed. */
17893 case EQ
: /* ZF=0 */
17894 case NE
: /* ZF!=0 */
17896 /* Codes needing carry flag. */
17897 case GEU
: /* CF=0 */
17898 case LTU
: /* CF=1 */
17899 /* Detect overflow checks. They need just the carry flag. */
17900 if (GET_CODE (op0
) == PLUS
17901 && rtx_equal_p (op1
, XEXP (op0
, 0)))
17905 case GTU
: /* CF=0 & ZF=0 */
17906 case LEU
: /* CF=1 | ZF=1 */
17907 /* Detect overflow checks. They need just the carry flag. */
17908 if (GET_CODE (op0
) == MINUS
17909 && rtx_equal_p (op1
, XEXP (op0
, 0)))
17913 /* Codes possibly doable only with sign flag when
17914 comparing against zero. */
17915 case GE
: /* SF=OF or SF=0 */
17916 case LT
: /* SF<>OF or SF=1 */
17917 if (op1
== const0_rtx
)
17920 /* For other cases Carry flag is not required. */
17922 /* Codes doable only with sign flag when comparing
17923 against zero, but we miss jump instruction for it
17924 so we need to use relational tests against overflow
17925 that thus needs to be zero. */
17926 case GT
: /* ZF=0 & SF=OF */
17927 case LE
: /* ZF=1 | SF<>OF */
17928 if (op1
== const0_rtx
)
17932 /* strcmp pattern do (use flags) and combine may ask us for proper
17937 gcc_unreachable ();
17941 /* Return the fixed registers used for condition codes. */
17944 ix86_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
17951 /* If two condition code modes are compatible, return a condition code
17952 mode which is compatible with both. Otherwise, return
17955 static enum machine_mode
17956 ix86_cc_modes_compatible (enum machine_mode m1
, enum machine_mode m2
)
17961 if (GET_MODE_CLASS (m1
) != MODE_CC
|| GET_MODE_CLASS (m2
) != MODE_CC
)
17964 if ((m1
== CCGCmode
&& m2
== CCGOCmode
)
17965 || (m1
== CCGOCmode
&& m2
== CCGCmode
))
17971 gcc_unreachable ();
18001 /* These are only compatible with themselves, which we already
18008 /* Return a comparison we can do and that it is equivalent to
18009 swap_condition (code) apart possibly from orderedness.
18010 But, never change orderedness if TARGET_IEEE_FP, returning
18011 UNKNOWN in that case if necessary. */
18013 static enum rtx_code
18014 ix86_fp_swap_condition (enum rtx_code code
)
18018 case GT
: /* GTU - CF=0 & ZF=0 */
18019 return TARGET_IEEE_FP
? UNKNOWN
: UNLT
;
18020 case GE
: /* GEU - CF=0 */
18021 return TARGET_IEEE_FP
? UNKNOWN
: UNLE
;
18022 case UNLT
: /* LTU - CF=1 */
18023 return TARGET_IEEE_FP
? UNKNOWN
: GT
;
18024 case UNLE
: /* LEU - CF=1 | ZF=1 */
18025 return TARGET_IEEE_FP
? UNKNOWN
: GE
;
18027 return swap_condition (code
);
18031 /* Return cost of comparison CODE using the best strategy for performance.
18032 All following functions do use number of instructions as a cost metrics.
18033 In future this should be tweaked to compute bytes for optimize_size and
18034 take into account performance of various instructions on various CPUs. */
18037 ix86_fp_comparison_cost (enum rtx_code code
)
18041 /* The cost of code using bit-twiddling on %ah. */
18058 arith_cost
= TARGET_IEEE_FP
? 5 : 4;
18062 arith_cost
= TARGET_IEEE_FP
? 6 : 4;
18065 gcc_unreachable ();
18068 switch (ix86_fp_comparison_strategy (code
))
18070 case IX86_FPCMP_COMI
:
18071 return arith_cost
> 4 ? 3 : 2;
18072 case IX86_FPCMP_SAHF
:
18073 return arith_cost
> 4 ? 4 : 3;
18079 /* Return strategy to use for floating-point. We assume that fcomi is always
18080 preferrable where available, since that is also true when looking at size
18081 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
18083 enum ix86_fpcmp_strategy
18084 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED
)
18086 /* Do fcomi/sahf based test when profitable. */
18089 return IX86_FPCMP_COMI
;
18091 if (TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_function_for_size_p (cfun
)))
18092 return IX86_FPCMP_SAHF
;
18094 return IX86_FPCMP_ARITH
;
18097 /* Swap, force into registers, or otherwise massage the two operands
18098 to a fp comparison. The operands are updated in place; the new
18099 comparison code is returned. */
18101 static enum rtx_code
18102 ix86_prepare_fp_compare_args (enum rtx_code code
, rtx
*pop0
, rtx
*pop1
)
18104 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
18105 rtx op0
= *pop0
, op1
= *pop1
;
18106 enum machine_mode op_mode
= GET_MODE (op0
);
18107 int is_sse
= TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (op_mode
);
18109 /* All of the unordered compare instructions only work on registers.
18110 The same is true of the fcomi compare instructions. The XFmode
18111 compare instructions require registers except when comparing
18112 against zero or when converting operand 1 from fixed point to
18116 && (fpcmp_mode
== CCFPUmode
18117 || (op_mode
== XFmode
18118 && ! (standard_80387_constant_p (op0
) == 1
18119 || standard_80387_constant_p (op1
) == 1)
18120 && GET_CODE (op1
) != FLOAT
)
18121 || ix86_fp_comparison_strategy (code
) == IX86_FPCMP_COMI
))
18123 op0
= force_reg (op_mode
, op0
);
18124 op1
= force_reg (op_mode
, op1
);
18128 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
18129 things around if they appear profitable, otherwise force op0
18130 into a register. */
18132 if (standard_80387_constant_p (op0
) == 0
18134 && ! (standard_80387_constant_p (op1
) == 0
18137 enum rtx_code new_code
= ix86_fp_swap_condition (code
);
18138 if (new_code
!= UNKNOWN
)
18141 tmp
= op0
, op0
= op1
, op1
= tmp
;
18147 op0
= force_reg (op_mode
, op0
);
18149 if (CONSTANT_P (op1
))
18151 int tmp
= standard_80387_constant_p (op1
);
18153 op1
= validize_mem (force_const_mem (op_mode
, op1
));
18157 op1
= force_reg (op_mode
, op1
);
18160 op1
= force_reg (op_mode
, op1
);
18164 /* Try to rearrange the comparison to make it cheaper. */
18165 if (ix86_fp_comparison_cost (code
)
18166 > ix86_fp_comparison_cost (swap_condition (code
))
18167 && (REG_P (op1
) || can_create_pseudo_p ()))
18170 tmp
= op0
, op0
= op1
, op1
= tmp
;
18171 code
= swap_condition (code
);
18173 op0
= force_reg (op_mode
, op0
);
18181 /* Convert comparison codes we use to represent FP comparison to integer
18182 code that will result in proper branch. Return UNKNOWN if no such code
18186 ix86_fp_compare_code_to_integer (enum rtx_code code
)
18215 /* Generate insn patterns to do a floating point compare of OPERANDS. */
18218 ix86_expand_fp_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx scratch
)
18220 enum machine_mode fpcmp_mode
, intcmp_mode
;
18223 fpcmp_mode
= ix86_fp_compare_mode (code
);
18224 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
18226 /* Do fcomi/sahf based test when profitable. */
18227 switch (ix86_fp_comparison_strategy (code
))
18229 case IX86_FPCMP_COMI
:
18230 intcmp_mode
= fpcmp_mode
;
18231 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
18232 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
18237 case IX86_FPCMP_SAHF
:
18238 intcmp_mode
= fpcmp_mode
;
18239 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
18240 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
18244 scratch
= gen_reg_rtx (HImode
);
18245 tmp2
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
18246 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, tmp2
)));
18249 case IX86_FPCMP_ARITH
:
18250 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
18251 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
18252 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
18254 scratch
= gen_reg_rtx (HImode
);
18255 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
18257 /* In the unordered case, we have to check C2 for NaN's, which
18258 doesn't happen to work out to anything nice combination-wise.
18259 So do some bit twiddling on the value we've got in AH to come
18260 up with an appropriate set of condition codes. */
18262 intcmp_mode
= CCNOmode
;
18267 if (code
== GT
|| !TARGET_IEEE_FP
)
18269 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
18274 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18275 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
18276 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
18277 intcmp_mode
= CCmode
;
18283 if (code
== LT
&& TARGET_IEEE_FP
)
18285 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18286 emit_insn (gen_cmpqi_ext_3 (scratch
, const1_rtx
));
18287 intcmp_mode
= CCmode
;
18292 emit_insn (gen_testqi_ext_ccno_0 (scratch
, const1_rtx
));
18298 if (code
== GE
|| !TARGET_IEEE_FP
)
18300 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x05)));
18305 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18306 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
, const1_rtx
));
18312 if (code
== LE
&& TARGET_IEEE_FP
)
18314 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18315 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
18316 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
18317 intcmp_mode
= CCmode
;
18322 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
18328 if (code
== EQ
&& TARGET_IEEE_FP
)
18330 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18331 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
18332 intcmp_mode
= CCmode
;
18337 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
18343 if (code
== NE
&& TARGET_IEEE_FP
)
18345 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18346 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
18352 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
18358 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
18362 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
18367 gcc_unreachable ();
18375 /* Return the test that should be put into the flags user, i.e.
18376 the bcc, scc, or cmov instruction. */
18377 return gen_rtx_fmt_ee (code
, VOIDmode
,
18378 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
18383 ix86_expand_compare (enum rtx_code code
, rtx op0
, rtx op1
)
18387 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_CC
)
18388 ret
= gen_rtx_fmt_ee (code
, VOIDmode
, op0
, op1
);
18390 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0
)))
18392 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0
)));
18393 ret
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
);
18396 ret
= ix86_expand_int_compare (code
, op0
, op1
);
18402 ix86_expand_branch (enum rtx_code code
, rtx op0
, rtx op1
, rtx label
)
18404 enum machine_mode mode
= GET_MODE (op0
);
18416 tmp
= ix86_expand_compare (code
, op0
, op1
);
18417 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
18418 gen_rtx_LABEL_REF (VOIDmode
, label
),
18420 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
18427 /* Expand DImode branch into multiple compare+branch. */
18429 rtx lo
[2], hi
[2], label2
;
18430 enum rtx_code code1
, code2
, code3
;
18431 enum machine_mode submode
;
18433 if (CONSTANT_P (op0
) && !CONSTANT_P (op1
))
18435 tmp
= op0
, op0
= op1
, op1
= tmp
;
18436 code
= swap_condition (code
);
18439 split_double_mode (mode
, &op0
, 1, lo
+0, hi
+0);
18440 split_double_mode (mode
, &op1
, 1, lo
+1, hi
+1);
18442 submode
= mode
== DImode
? SImode
: DImode
;
18444 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
18445 avoid two branches. This costs one extra insn, so disable when
18446 optimizing for size. */
18448 if ((code
== EQ
|| code
== NE
)
18449 && (!optimize_insn_for_size_p ()
18450 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
18455 if (hi
[1] != const0_rtx
)
18456 xor1
= expand_binop (submode
, xor_optab
, xor1
, hi
[1],
18457 NULL_RTX
, 0, OPTAB_WIDEN
);
18460 if (lo
[1] != const0_rtx
)
18461 xor0
= expand_binop (submode
, xor_optab
, xor0
, lo
[1],
18462 NULL_RTX
, 0, OPTAB_WIDEN
);
18464 tmp
= expand_binop (submode
, ior_optab
, xor1
, xor0
,
18465 NULL_RTX
, 0, OPTAB_WIDEN
);
18467 ix86_expand_branch (code
, tmp
, const0_rtx
, label
);
18471 /* Otherwise, if we are doing less-than or greater-or-equal-than,
18472 op1 is a constant and the low word is zero, then we can just
18473 examine the high word. Similarly for low word -1 and
18474 less-or-equal-than or greater-than. */
18476 if (CONST_INT_P (hi
[1]))
18479 case LT
: case LTU
: case GE
: case GEU
:
18480 if (lo
[1] == const0_rtx
)
18482 ix86_expand_branch (code
, hi
[0], hi
[1], label
);
18486 case LE
: case LEU
: case GT
: case GTU
:
18487 if (lo
[1] == constm1_rtx
)
18489 ix86_expand_branch (code
, hi
[0], hi
[1], label
);
18497 /* Otherwise, we need two or three jumps. */
18499 label2
= gen_label_rtx ();
18502 code2
= swap_condition (code
);
18503 code3
= unsigned_condition (code
);
18507 case LT
: case GT
: case LTU
: case GTU
:
18510 case LE
: code1
= LT
; code2
= GT
; break;
18511 case GE
: code1
= GT
; code2
= LT
; break;
18512 case LEU
: code1
= LTU
; code2
= GTU
; break;
18513 case GEU
: code1
= GTU
; code2
= LTU
; break;
18515 case EQ
: code1
= UNKNOWN
; code2
= NE
; break;
18516 case NE
: code2
= UNKNOWN
; break;
18519 gcc_unreachable ();
18524 * if (hi(a) < hi(b)) goto true;
18525 * if (hi(a) > hi(b)) goto false;
18526 * if (lo(a) < lo(b)) goto true;
18530 if (code1
!= UNKNOWN
)
18531 ix86_expand_branch (code1
, hi
[0], hi
[1], label
);
18532 if (code2
!= UNKNOWN
)
18533 ix86_expand_branch (code2
, hi
[0], hi
[1], label2
);
18535 ix86_expand_branch (code3
, lo
[0], lo
[1], label
);
18537 if (code2
!= UNKNOWN
)
18538 emit_label (label2
);
18543 gcc_assert (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_CC
);
18548 /* Split branch based on floating point condition. */
18550 ix86_split_fp_branch (enum rtx_code code
, rtx op1
, rtx op2
,
18551 rtx target1
, rtx target2
, rtx tmp
, rtx pushed
)
18556 if (target2
!= pc_rtx
)
18559 code
= reverse_condition_maybe_unordered (code
);
18564 condition
= ix86_expand_fp_compare (code
, op1
, op2
,
18567 /* Remove pushed operand from stack. */
18569 ix86_free_from_memory (GET_MODE (pushed
));
18571 i
= emit_jump_insn (gen_rtx_SET
18573 gen_rtx_IF_THEN_ELSE (VOIDmode
,
18574 condition
, target1
, target2
)));
18575 if (split_branch_probability
>= 0)
18576 add_reg_note (i
, REG_BR_PROB
, GEN_INT (split_branch_probability
));
18580 ix86_expand_setcc (rtx dest
, enum rtx_code code
, rtx op0
, rtx op1
)
18584 gcc_assert (GET_MODE (dest
) == QImode
);
18586 ret
= ix86_expand_compare (code
, op0
, op1
);
18587 PUT_MODE (ret
, QImode
);
18588 emit_insn (gen_rtx_SET (VOIDmode
, dest
, ret
));
18591 /* Expand comparison setting or clearing carry flag. Return true when
18592 successful and set pop for the operation. */
18594 ix86_expand_carry_flag_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx
*pop
)
18596 enum machine_mode mode
=
18597 GET_MODE (op0
) != VOIDmode
? GET_MODE (op0
) : GET_MODE (op1
);
18599 /* Do not handle double-mode compares that go through special path. */
18600 if (mode
== (TARGET_64BIT
? TImode
: DImode
))
18603 if (SCALAR_FLOAT_MODE_P (mode
))
18605 rtx compare_op
, compare_seq
;
18607 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
18609 /* Shortcut: following common codes never translate
18610 into carry flag compares. */
18611 if (code
== EQ
|| code
== NE
|| code
== UNEQ
|| code
== LTGT
18612 || code
== ORDERED
|| code
== UNORDERED
)
18615 /* These comparisons require zero flag; swap operands so they won't. */
18616 if ((code
== GT
|| code
== UNLE
|| code
== LE
|| code
== UNGT
)
18617 && !TARGET_IEEE_FP
)
18622 code
= swap_condition (code
);
18625 /* Try to expand the comparison and verify that we end up with
18626 carry flag based comparison. This fails to be true only when
18627 we decide to expand comparison using arithmetic that is not
18628 too common scenario. */
18630 compare_op
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
);
18631 compare_seq
= get_insns ();
18634 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
18635 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
18636 code
= ix86_fp_compare_code_to_integer (GET_CODE (compare_op
));
18638 code
= GET_CODE (compare_op
);
18640 if (code
!= LTU
&& code
!= GEU
)
18643 emit_insn (compare_seq
);
18648 if (!INTEGRAL_MODE_P (mode
))
18657 /* Convert a==0 into (unsigned)a<1. */
18660 if (op1
!= const0_rtx
)
18663 code
= (code
== EQ
? LTU
: GEU
);
18666 /* Convert a>b into b<a or a>=b-1. */
18669 if (CONST_INT_P (op1
))
18671 op1
= gen_int_mode (INTVAL (op1
) + 1, GET_MODE (op0
));
18672 /* Bail out on overflow. We still can swap operands but that
18673 would force loading of the constant into register. */
18674 if (op1
== const0_rtx
18675 || !x86_64_immediate_operand (op1
, GET_MODE (op1
)))
18677 code
= (code
== GTU
? GEU
: LTU
);
18684 code
= (code
== GTU
? LTU
: GEU
);
18688 /* Convert a>=0 into (unsigned)a<0x80000000. */
18691 if (mode
== DImode
|| op1
!= const0_rtx
)
18693 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
18694 code
= (code
== LT
? GEU
: LTU
);
18698 if (mode
== DImode
|| op1
!= constm1_rtx
)
18700 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
18701 code
= (code
== LE
? GEU
: LTU
);
18707 /* Swapping operands may cause constant to appear as first operand. */
18708 if (!nonimmediate_operand (op0
, VOIDmode
))
18710 if (!can_create_pseudo_p ())
18712 op0
= force_reg (mode
, op0
);
18714 *pop
= ix86_expand_compare (code
, op0
, op1
);
18715 gcc_assert (GET_CODE (*pop
) == LTU
|| GET_CODE (*pop
) == GEU
);
18720 ix86_expand_int_movcc (rtx operands
[])
18722 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
18723 rtx compare_seq
, compare_op
;
18724 enum machine_mode mode
= GET_MODE (operands
[0]);
18725 bool sign_bit_compare_p
= false;
18726 rtx op0
= XEXP (operands
[1], 0);
18727 rtx op1
= XEXP (operands
[1], 1);
18730 compare_op
= ix86_expand_compare (code
, op0
, op1
);
18731 compare_seq
= get_insns ();
18734 compare_code
= GET_CODE (compare_op
);
18736 if ((op1
== const0_rtx
&& (code
== GE
|| code
== LT
))
18737 || (op1
== constm1_rtx
&& (code
== GT
|| code
== LE
)))
18738 sign_bit_compare_p
= true;
18740 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
18741 HImode insns, we'd be swallowed in word prefix ops. */
18743 if ((mode
!= HImode
|| TARGET_FAST_PREFIX
)
18744 && (mode
!= (TARGET_64BIT
? TImode
: DImode
))
18745 && CONST_INT_P (operands
[2])
18746 && CONST_INT_P (operands
[3]))
18748 rtx out
= operands
[0];
18749 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
18750 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
18751 HOST_WIDE_INT diff
;
18754 /* Sign bit compares are better done using shifts than we do by using
18756 if (sign_bit_compare_p
18757 || ix86_expand_carry_flag_compare (code
, op0
, op1
, &compare_op
))
18759 /* Detect overlap between destination and compare sources. */
18762 if (!sign_bit_compare_p
)
18765 bool fpcmp
= false;
18767 compare_code
= GET_CODE (compare_op
);
18769 flags
= XEXP (compare_op
, 0);
18771 if (GET_MODE (flags
) == CCFPmode
18772 || GET_MODE (flags
) == CCFPUmode
)
18776 = ix86_fp_compare_code_to_integer (compare_code
);
18779 /* To simplify rest of code, restrict to the GEU case. */
18780 if (compare_code
== LTU
)
18782 HOST_WIDE_INT tmp
= ct
;
18785 compare_code
= reverse_condition (compare_code
);
18786 code
= reverse_condition (code
);
18791 PUT_CODE (compare_op
,
18792 reverse_condition_maybe_unordered
18793 (GET_CODE (compare_op
)));
18795 PUT_CODE (compare_op
,
18796 reverse_condition (GET_CODE (compare_op
)));
18800 if (reg_overlap_mentioned_p (out
, op0
)
18801 || reg_overlap_mentioned_p (out
, op1
))
18802 tmp
= gen_reg_rtx (mode
);
18804 if (mode
== DImode
)
18805 emit_insn (gen_x86_movdicc_0_m1 (tmp
, flags
, compare_op
));
18807 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode
, tmp
),
18808 flags
, compare_op
));
18812 if (code
== GT
|| code
== GE
)
18813 code
= reverse_condition (code
);
18816 HOST_WIDE_INT tmp
= ct
;
18821 tmp
= emit_store_flag (tmp
, code
, op0
, op1
, VOIDmode
, 0, -1);
18834 tmp
= expand_simple_binop (mode
, PLUS
,
18836 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
18847 tmp
= expand_simple_binop (mode
, IOR
,
18849 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
18851 else if (diff
== -1 && ct
)
18861 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
18863 tmp
= expand_simple_binop (mode
, PLUS
,
18864 copy_rtx (tmp
), GEN_INT (cf
),
18865 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
18873 * andl cf - ct, dest
18883 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
18886 tmp
= expand_simple_binop (mode
, AND
,
18888 gen_int_mode (cf
- ct
, mode
),
18889 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
18891 tmp
= expand_simple_binop (mode
, PLUS
,
18892 copy_rtx (tmp
), GEN_INT (ct
),
18893 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
18896 if (!rtx_equal_p (tmp
, out
))
18897 emit_move_insn (copy_rtx (out
), copy_rtx (tmp
));
18904 enum machine_mode cmp_mode
= GET_MODE (op0
);
18907 tmp
= ct
, ct
= cf
, cf
= tmp
;
18910 if (SCALAR_FLOAT_MODE_P (cmp_mode
))
18912 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode
));
18914 /* We may be reversing unordered compare to normal compare, that
18915 is not valid in general (we may convert non-trapping condition
18916 to trapping one), however on i386 we currently emit all
18917 comparisons unordered. */
18918 compare_code
= reverse_condition_maybe_unordered (compare_code
);
18919 code
= reverse_condition_maybe_unordered (code
);
18923 compare_code
= reverse_condition (compare_code
);
18924 code
= reverse_condition (code
);
18928 compare_code
= UNKNOWN
;
18929 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
18930 && CONST_INT_P (op1
))
18932 if (op1
== const0_rtx
18933 && (code
== LT
|| code
== GE
))
18934 compare_code
= code
;
18935 else if (op1
== constm1_rtx
)
18939 else if (code
== GT
)
18944 /* Optimize dest = (op0 < 0) ? -1 : cf. */
18945 if (compare_code
!= UNKNOWN
18946 && GET_MODE (op0
) == GET_MODE (out
)
18947 && (cf
== -1 || ct
== -1))
18949 /* If lea code below could be used, only optimize
18950 if it results in a 2 insn sequence. */
18952 if (! (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
18953 || diff
== 3 || diff
== 5 || diff
== 9)
18954 || (compare_code
== LT
&& ct
== -1)
18955 || (compare_code
== GE
&& cf
== -1))
18958 * notl op1 (if necessary)
18966 code
= reverse_condition (code
);
18969 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, -1);
18971 out
= expand_simple_binop (mode
, IOR
,
18973 out
, 1, OPTAB_DIRECT
);
18974 if (out
!= operands
[0])
18975 emit_move_insn (operands
[0], out
);
18982 if ((diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
18983 || diff
== 3 || diff
== 5 || diff
== 9)
18984 && ((mode
!= QImode
&& mode
!= HImode
) || !TARGET_PARTIAL_REG_STALL
)
18986 || x86_64_immediate_operand (GEN_INT (cf
), VOIDmode
)))
18992 * lea cf(dest*(ct-cf)),dest
18996 * This also catches the degenerate setcc-only case.
19002 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, 1);
19005 /* On x86_64 the lea instruction operates on Pmode, so we need
19006 to get arithmetics done in proper mode to match. */
19008 tmp
= copy_rtx (out
);
19012 out1
= copy_rtx (out
);
19013 tmp
= gen_rtx_MULT (mode
, out1
, GEN_INT (diff
& ~1));
19017 tmp
= gen_rtx_PLUS (mode
, tmp
, out1
);
19023 tmp
= gen_rtx_PLUS (mode
, tmp
, GEN_INT (cf
));
19026 if (!rtx_equal_p (tmp
, out
))
19029 out
= force_operand (tmp
, copy_rtx (out
));
19031 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (out
), copy_rtx (tmp
)));
19033 if (!rtx_equal_p (out
, operands
[0]))
19034 emit_move_insn (operands
[0], copy_rtx (out
));
19040 * General case: Jumpful:
19041 * xorl dest,dest cmpl op1, op2
19042 * cmpl op1, op2 movl ct, dest
19043 * setcc dest jcc 1f
19044 * decl dest movl cf, dest
19045 * andl (cf-ct),dest 1:
19048 * Size 20. Size 14.
19050 * This is reasonably steep, but branch mispredict costs are
19051 * high on modern cpus, so consider failing only if optimizing
19055 if ((!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
19056 && BRANCH_COST (optimize_insn_for_speed_p (),
19061 enum machine_mode cmp_mode
= GET_MODE (op0
);
19066 if (SCALAR_FLOAT_MODE_P (cmp_mode
))
19068 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode
));
19070 /* We may be reversing unordered compare to normal compare,
19071 that is not valid in general (we may convert non-trapping
19072 condition to trapping one), however on i386 we currently
19073 emit all comparisons unordered. */
19074 code
= reverse_condition_maybe_unordered (code
);
19078 code
= reverse_condition (code
);
19079 if (compare_code
!= UNKNOWN
)
19080 compare_code
= reverse_condition (compare_code
);
19084 if (compare_code
!= UNKNOWN
)
19086 /* notl op1 (if needed)
19091 For x < 0 (resp. x <= -1) there will be no notl,
19092 so if possible swap the constants to get rid of the
19094 True/false will be -1/0 while code below (store flag
19095 followed by decrement) is 0/-1, so the constants need
19096 to be exchanged once more. */
19098 if (compare_code
== GE
|| !cf
)
19100 code
= reverse_condition (code
);
19105 HOST_WIDE_INT tmp
= cf
;
19110 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, -1);
19114 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, 1);
19116 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
),
19118 copy_rtx (out
), 1, OPTAB_DIRECT
);
19121 out
= expand_simple_binop (mode
, AND
, copy_rtx (out
),
19122 gen_int_mode (cf
- ct
, mode
),
19123 copy_rtx (out
), 1, OPTAB_DIRECT
);
19125 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), GEN_INT (ct
),
19126 copy_rtx (out
), 1, OPTAB_DIRECT
);
19127 if (!rtx_equal_p (out
, operands
[0]))
19128 emit_move_insn (operands
[0], copy_rtx (out
));
19134 if (!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
19136 /* Try a few things more with specific constants and a variable. */
19139 rtx var
, orig_out
, out
, tmp
;
19141 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
19144 /* If one of the two operands is an interesting constant, load a
19145 constant with the above and mask it in with a logical operation. */
19147 if (CONST_INT_P (operands
[2]))
19150 if (INTVAL (operands
[2]) == 0 && operands
[3] != constm1_rtx
)
19151 operands
[3] = constm1_rtx
, op
= and_optab
;
19152 else if (INTVAL (operands
[2]) == -1 && operands
[3] != const0_rtx
)
19153 operands
[3] = const0_rtx
, op
= ior_optab
;
19157 else if (CONST_INT_P (operands
[3]))
19160 if (INTVAL (operands
[3]) == 0 && operands
[2] != constm1_rtx
)
19161 operands
[2] = constm1_rtx
, op
= and_optab
;
19162 else if (INTVAL (operands
[3]) == -1 && operands
[3] != const0_rtx
)
19163 operands
[2] = const0_rtx
, op
= ior_optab
;
19170 orig_out
= operands
[0];
19171 tmp
= gen_reg_rtx (mode
);
19174 /* Recurse to get the constant loaded. */
19175 if (ix86_expand_int_movcc (operands
) == 0)
19178 /* Mask in the interesting variable. */
19179 out
= expand_binop (mode
, op
, var
, tmp
, orig_out
, 0,
19181 if (!rtx_equal_p (out
, orig_out
))
19182 emit_move_insn (copy_rtx (orig_out
), copy_rtx (out
));
19188 * For comparison with above,
19198 if (! nonimmediate_operand (operands
[2], mode
))
19199 operands
[2] = force_reg (mode
, operands
[2]);
19200 if (! nonimmediate_operand (operands
[3], mode
))
19201 operands
[3] = force_reg (mode
, operands
[3]);
19203 if (! register_operand (operands
[2], VOIDmode
)
19205 || ! register_operand (operands
[3], VOIDmode
)))
19206 operands
[2] = force_reg (mode
, operands
[2]);
19209 && ! register_operand (operands
[3], VOIDmode
))
19210 operands
[3] = force_reg (mode
, operands
[3]);
19212 emit_insn (compare_seq
);
19213 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
19214 gen_rtx_IF_THEN_ELSE (mode
,
19215 compare_op
, operands
[2],
19220 /* Swap, force into registers, or otherwise massage the two operands
19221 to an sse comparison with a mask result. Thus we differ a bit from
19222 ix86_prepare_fp_compare_args which expects to produce a flags result.
19224 The DEST operand exists to help determine whether to commute commutative
19225 operators. The POP0/POP1 operands are updated in place. The new
19226 comparison code is returned, or UNKNOWN if not implementable. */
19228 static enum rtx_code
19229 ix86_prepare_sse_fp_compare_args (rtx dest
, enum rtx_code code
,
19230 rtx
*pop0
, rtx
*pop1
)
19238 /* AVX supports all the needed comparisons. */
19241 /* We have no LTGT as an operator. We could implement it with
19242 NE & ORDERED, but this requires an extra temporary. It's
19243 not clear that it's worth it. */
19250 /* These are supported directly. */
19257 /* AVX has 3 operand comparisons, no need to swap anything. */
19260 /* For commutative operators, try to canonicalize the destination
19261 operand to be first in the comparison - this helps reload to
19262 avoid extra moves. */
19263 if (!dest
|| !rtx_equal_p (dest
, *pop1
))
19271 /* These are not supported directly before AVX, and furthermore
19272 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
19273 comparison operands to transform into something that is
19278 code
= swap_condition (code
);
19282 gcc_unreachable ();
19288 /* Detect conditional moves that exactly match min/max operational
19289 semantics. Note that this is IEEE safe, as long as we don't
19290 interchange the operands.
19292 Returns FALSE if this conditional move doesn't match a MIN/MAX,
19293 and TRUE if the operation is successful and instructions are emitted. */
19296 ix86_expand_sse_fp_minmax (rtx dest
, enum rtx_code code
, rtx cmp_op0
,
19297 rtx cmp_op1
, rtx if_true
, rtx if_false
)
19299 enum machine_mode mode
;
19305 else if (code
== UNGE
)
19308 if_true
= if_false
;
19314 if (rtx_equal_p (cmp_op0
, if_true
) && rtx_equal_p (cmp_op1
, if_false
))
19316 else if (rtx_equal_p (cmp_op1
, if_true
) && rtx_equal_p (cmp_op0
, if_false
))
19321 mode
= GET_MODE (dest
);
19323 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
19324 but MODE may be a vector mode and thus not appropriate. */
19325 if (!flag_finite_math_only
|| !flag_unsafe_math_optimizations
)
19327 int u
= is_min
? UNSPEC_IEEE_MIN
: UNSPEC_IEEE_MAX
;
19330 if_true
= force_reg (mode
, if_true
);
19331 v
= gen_rtvec (2, if_true
, if_false
);
19332 tmp
= gen_rtx_UNSPEC (mode
, v
, u
);
19336 code
= is_min
? SMIN
: SMAX
;
19337 tmp
= gen_rtx_fmt_ee (code
, mode
, if_true
, if_false
);
19340 emit_insn (gen_rtx_SET (VOIDmode
, dest
, tmp
));
19344 /* Expand an sse vector comparison. Return the register with the result. */
19347 ix86_expand_sse_cmp (rtx dest
, enum rtx_code code
, rtx cmp_op0
, rtx cmp_op1
,
19348 rtx op_true
, rtx op_false
)
19350 enum machine_mode mode
= GET_MODE (dest
);
19351 enum machine_mode cmp_mode
= GET_MODE (cmp_op0
);
19354 cmp_op0
= force_reg (cmp_mode
, cmp_op0
);
19355 if (!nonimmediate_operand (cmp_op1
, cmp_mode
))
19356 cmp_op1
= force_reg (cmp_mode
, cmp_op1
);
19359 || reg_overlap_mentioned_p (dest
, op_true
)
19360 || reg_overlap_mentioned_p (dest
, op_false
))
19361 dest
= gen_reg_rtx (mode
);
19363 x
= gen_rtx_fmt_ee (code
, cmp_mode
, cmp_op0
, cmp_op1
);
19364 if (cmp_mode
!= mode
)
19366 x
= force_reg (cmp_mode
, x
);
19367 convert_move (dest
, x
, false);
19370 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19375 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
19376 operations. This is used for both scalar and vector conditional moves. */
19379 ix86_expand_sse_movcc (rtx dest
, rtx cmp
, rtx op_true
, rtx op_false
)
19381 enum machine_mode mode
= GET_MODE (dest
);
19384 if (vector_all_ones_operand (op_true
, mode
)
19385 && rtx_equal_p (op_false
, CONST0_RTX (mode
)))
19387 emit_insn (gen_rtx_SET (VOIDmode
, dest
, cmp
));
19389 else if (op_false
== CONST0_RTX (mode
))
19391 op_true
= force_reg (mode
, op_true
);
19392 x
= gen_rtx_AND (mode
, cmp
, op_true
);
19393 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19395 else if (op_true
== CONST0_RTX (mode
))
19397 op_false
= force_reg (mode
, op_false
);
19398 x
= gen_rtx_NOT (mode
, cmp
);
19399 x
= gen_rtx_AND (mode
, x
, op_false
);
19400 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19402 else if (INTEGRAL_MODE_P (mode
) && op_true
== CONSTM1_RTX (mode
))
19404 op_false
= force_reg (mode
, op_false
);
19405 x
= gen_rtx_IOR (mode
, cmp
, op_false
);
19406 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19408 else if (TARGET_XOP
)
19410 op_true
= force_reg (mode
, op_true
);
19412 if (!nonimmediate_operand (op_false
, mode
))
19413 op_false
= force_reg (mode
, op_false
);
19415 emit_insn (gen_rtx_SET (mode
, dest
,
19416 gen_rtx_IF_THEN_ELSE (mode
, cmp
,
19422 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
) = NULL
;
19424 if (!nonimmediate_operand (op_true
, mode
))
19425 op_true
= force_reg (mode
, op_true
);
19427 op_false
= force_reg (mode
, op_false
);
19433 gen
= gen_sse4_1_blendvps
;
19437 gen
= gen_sse4_1_blendvpd
;
19445 gen
= gen_sse4_1_pblendvb
;
19446 dest
= gen_lowpart (V16QImode
, dest
);
19447 op_false
= gen_lowpart (V16QImode
, op_false
);
19448 op_true
= gen_lowpart (V16QImode
, op_true
);
19449 cmp
= gen_lowpart (V16QImode
, cmp
);
19454 gen
= gen_avx_blendvps256
;
19458 gen
= gen_avx_blendvpd256
;
19466 gen
= gen_avx2_pblendvb
;
19467 dest
= gen_lowpart (V32QImode
, dest
);
19468 op_false
= gen_lowpart (V32QImode
, op_false
);
19469 op_true
= gen_lowpart (V32QImode
, op_true
);
19470 cmp
= gen_lowpart (V32QImode
, cmp
);
19478 emit_insn (gen (dest
, op_false
, op_true
, cmp
));
19481 op_true
= force_reg (mode
, op_true
);
19483 t2
= gen_reg_rtx (mode
);
19485 t3
= gen_reg_rtx (mode
);
19489 x
= gen_rtx_AND (mode
, op_true
, cmp
);
19490 emit_insn (gen_rtx_SET (VOIDmode
, t2
, x
));
19492 x
= gen_rtx_NOT (mode
, cmp
);
19493 x
= gen_rtx_AND (mode
, x
, op_false
);
19494 emit_insn (gen_rtx_SET (VOIDmode
, t3
, x
));
19496 x
= gen_rtx_IOR (mode
, t3
, t2
);
19497 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19502 /* Expand a floating-point conditional move. Return true if successful. */
19505 ix86_expand_fp_movcc (rtx operands
[])
19507 enum machine_mode mode
= GET_MODE (operands
[0]);
19508 enum rtx_code code
= GET_CODE (operands
[1]);
19509 rtx tmp
, compare_op
;
19510 rtx op0
= XEXP (operands
[1], 0);
19511 rtx op1
= XEXP (operands
[1], 1);
19513 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
19515 enum machine_mode cmode
;
19517 /* Since we've no cmove for sse registers, don't force bad register
19518 allocation just to gain access to it. Deny movcc when the
19519 comparison mode doesn't match the move mode. */
19520 cmode
= GET_MODE (op0
);
19521 if (cmode
== VOIDmode
)
19522 cmode
= GET_MODE (op1
);
19526 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
, &op0
, &op1
);
19527 if (code
== UNKNOWN
)
19530 if (ix86_expand_sse_fp_minmax (operands
[0], code
, op0
, op1
,
19531 operands
[2], operands
[3]))
19534 tmp
= ix86_expand_sse_cmp (operands
[0], code
, op0
, op1
,
19535 operands
[2], operands
[3]);
19536 ix86_expand_sse_movcc (operands
[0], tmp
, operands
[2], operands
[3]);
19540 /* The floating point conditional move instructions don't directly
19541 support conditions resulting from a signed integer comparison. */
19543 compare_op
= ix86_expand_compare (code
, op0
, op1
);
19544 if (!fcmov_comparison_operator (compare_op
, VOIDmode
))
19546 tmp
= gen_reg_rtx (QImode
);
19547 ix86_expand_setcc (tmp
, code
, op0
, op1
);
19549 compare_op
= ix86_expand_compare (NE
, tmp
, const0_rtx
);
19552 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
19553 gen_rtx_IF_THEN_ELSE (mode
, compare_op
,
19554 operands
[2], operands
[3])));
19559 /* Expand a floating-point vector conditional move; a vcond operation
19560 rather than a movcc operation. */
19563 ix86_expand_fp_vcond (rtx operands
[])
19565 enum rtx_code code
= GET_CODE (operands
[3]);
19568 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
19569 &operands
[4], &operands
[5]);
19570 if (code
== UNKNOWN
)
19573 switch (GET_CODE (operands
[3]))
19576 temp
= ix86_expand_sse_cmp (operands
[0], ORDERED
, operands
[4],
19577 operands
[5], operands
[0], operands
[0]);
19578 cmp
= ix86_expand_sse_cmp (operands
[0], NE
, operands
[4],
19579 operands
[5], operands
[1], operands
[2]);
19583 temp
= ix86_expand_sse_cmp (operands
[0], UNORDERED
, operands
[4],
19584 operands
[5], operands
[0], operands
[0]);
19585 cmp
= ix86_expand_sse_cmp (operands
[0], EQ
, operands
[4],
19586 operands
[5], operands
[1], operands
[2]);
19590 gcc_unreachable ();
19592 cmp
= expand_simple_binop (GET_MODE (cmp
), code
, temp
, cmp
, cmp
, 1,
19594 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
19598 if (ix86_expand_sse_fp_minmax (operands
[0], code
, operands
[4],
19599 operands
[5], operands
[1], operands
[2]))
19602 cmp
= ix86_expand_sse_cmp (operands
[0], code
, operands
[4], operands
[5],
19603 operands
[1], operands
[2]);
19604 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
19608 /* Expand a signed/unsigned integral vector conditional move. */
19611 ix86_expand_int_vcond (rtx operands
[])
19613 enum machine_mode data_mode
= GET_MODE (operands
[0]);
19614 enum machine_mode mode
= GET_MODE (operands
[4]);
19615 enum rtx_code code
= GET_CODE (operands
[3]);
19616 bool negate
= false;
19619 cop0
= operands
[4];
19620 cop1
= operands
[5];
19622 /* XOP supports all of the comparisons on all vector int types. */
19625 /* Canonicalize the comparison to EQ, GT, GTU. */
19636 code
= reverse_condition (code
);
19642 code
= reverse_condition (code
);
19648 code
= swap_condition (code
);
19649 x
= cop0
, cop0
= cop1
, cop1
= x
;
19653 gcc_unreachable ();
19656 /* Only SSE4.1/SSE4.2 supports V2DImode. */
19657 if (mode
== V2DImode
)
19662 /* SSE4.1 supports EQ. */
19663 if (!TARGET_SSE4_1
)
19669 /* SSE4.2 supports GT/GTU. */
19670 if (!TARGET_SSE4_2
)
19675 gcc_unreachable ();
19679 /* Unsigned parallel compare is not supported by the hardware.
19680 Play some tricks to turn this into a signed comparison
19684 cop0
= force_reg (mode
, cop0
);
19694 rtx (*gen_sub3
) (rtx
, rtx
, rtx
);
19698 case V8SImode
: gen_sub3
= gen_subv8si3
; break;
19699 case V4DImode
: gen_sub3
= gen_subv4di3
; break;
19700 case V4SImode
: gen_sub3
= gen_subv4si3
; break;
19701 case V2DImode
: gen_sub3
= gen_subv2di3
; break;
19703 gcc_unreachable ();
19705 /* Subtract (-(INT MAX) - 1) from both operands to make
19707 mask
= ix86_build_signbit_mask (mode
, true, false);
19708 t1
= gen_reg_rtx (mode
);
19709 emit_insn (gen_sub3 (t1
, cop0
, mask
));
19711 t2
= gen_reg_rtx (mode
);
19712 emit_insn (gen_sub3 (t2
, cop1
, mask
));
19724 /* Perform a parallel unsigned saturating subtraction. */
19725 x
= gen_reg_rtx (mode
);
19726 emit_insn (gen_rtx_SET (VOIDmode
, x
,
19727 gen_rtx_US_MINUS (mode
, cop0
, cop1
)));
19730 cop1
= CONST0_RTX (mode
);
19736 gcc_unreachable ();
19741 /* Allow the comparison to be done in one mode, but the movcc to
19742 happen in another mode. */
19743 if (data_mode
== mode
)
19745 x
= ix86_expand_sse_cmp (operands
[0], code
, cop0
, cop1
,
19746 operands
[1+negate
], operands
[2-negate
]);
19750 gcc_assert (GET_MODE_SIZE (data_mode
) == GET_MODE_SIZE (mode
));
19751 x
= ix86_expand_sse_cmp (gen_lowpart (mode
, operands
[0]),
19753 operands
[1+negate
], operands
[2-negate
]);
19754 x
= gen_lowpart (data_mode
, x
);
19757 ix86_expand_sse_movcc (operands
[0], x
, operands
[1+negate
],
19758 operands
[2-negate
]);
19762 /* Expand a variable vector permutation. */
19765 ix86_expand_vec_perm (rtx operands
[])
19767 rtx target
= operands
[0];
19768 rtx op0
= operands
[1];
19769 rtx op1
= operands
[2];
19770 rtx mask
= operands
[3];
19771 rtx t1
, t2
, t3
, t4
, vt
, vt2
, vec
[32];
19772 enum machine_mode mode
= GET_MODE (op0
);
19773 enum machine_mode maskmode
= GET_MODE (mask
);
19775 bool one_operand_shuffle
= rtx_equal_p (op0
, op1
);
19777 /* Number of elements in the vector. */
19778 w
= GET_MODE_NUNITS (mode
);
19779 e
= GET_MODE_UNIT_SIZE (mode
);
19780 gcc_assert (w
<= 32);
19784 if (mode
== V4DImode
|| mode
== V4DFmode
|| mode
== V16HImode
)
19786 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
19787 an constant shuffle operand. With a tiny bit of effort we can
19788 use VPERMD instead. A re-interpretation stall for V4DFmode is
19789 unfortunate but there's no avoiding it.
19790 Similarly for V16HImode we don't have instructions for variable
19791 shuffling, while for V32QImode we can use after preparing suitable
19792 masks vpshufb; vpshufb; vpermq; vpor. */
19794 if (mode
== V16HImode
)
19796 maskmode
= mode
= V32QImode
;
19802 maskmode
= mode
= V8SImode
;
19806 t1
= gen_reg_rtx (maskmode
);
19808 /* Replicate the low bits of the V4DImode mask into V8SImode:
19810 t1 = { A A B B C C D D }. */
19811 for (i
= 0; i
< w
/ 2; ++i
)
19812 vec
[i
*2 + 1] = vec
[i
*2] = GEN_INT (i
* 2);
19813 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
19814 vt
= force_reg (maskmode
, vt
);
19815 mask
= gen_lowpart (maskmode
, mask
);
19816 if (maskmode
== V8SImode
)
19817 emit_insn (gen_avx2_permvarv8si (t1
, vt
, mask
));
19819 emit_insn (gen_avx2_pshufbv32qi3 (t1
, mask
, vt
));
19821 /* Multiply the shuffle indicies by two. */
19822 t1
= expand_simple_binop (maskmode
, PLUS
, t1
, t1
, t1
, 1,
19825 /* Add one to the odd shuffle indicies:
19826 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
19827 for (i
= 0; i
< w
/ 2; ++i
)
19829 vec
[i
* 2] = const0_rtx
;
19830 vec
[i
* 2 + 1] = const1_rtx
;
19832 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
19833 vt
= force_const_mem (maskmode
, vt
);
19834 t1
= expand_simple_binop (maskmode
, PLUS
, t1
, vt
, t1
, 1,
19837 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
19838 operands
[3] = mask
= t1
;
19839 target
= gen_lowpart (mode
, target
);
19840 op0
= gen_lowpart (mode
, op0
);
19841 op1
= gen_lowpart (mode
, op1
);
19847 /* The VPERMD and VPERMPS instructions already properly ignore
19848 the high bits of the shuffle elements. No need for us to
19849 perform an AND ourselves. */
19850 if (one_operand_shuffle
)
19851 emit_insn (gen_avx2_permvarv8si (target
, mask
, op0
));
19854 t1
= gen_reg_rtx (V8SImode
);
19855 t2
= gen_reg_rtx (V8SImode
);
19856 emit_insn (gen_avx2_permvarv8si (t1
, mask
, op0
));
19857 emit_insn (gen_avx2_permvarv8si (t2
, mask
, op1
));
19863 mask
= gen_lowpart (V8SFmode
, mask
);
19864 if (one_operand_shuffle
)
19865 emit_insn (gen_avx2_permvarv8sf (target
, mask
, op0
));
19868 t1
= gen_reg_rtx (V8SFmode
);
19869 t2
= gen_reg_rtx (V8SFmode
);
19870 emit_insn (gen_avx2_permvarv8sf (t1
, mask
, op0
));
19871 emit_insn (gen_avx2_permvarv8sf (t2
, mask
, op1
));
19877 /* By combining the two 128-bit input vectors into one 256-bit
19878 input vector, we can use VPERMD and VPERMPS for the full
19879 two-operand shuffle. */
19880 t1
= gen_reg_rtx (V8SImode
);
19881 t2
= gen_reg_rtx (V8SImode
);
19882 emit_insn (gen_avx_vec_concatv8si (t1
, op0
, op1
));
19883 emit_insn (gen_avx_vec_concatv8si (t2
, mask
, mask
));
19884 emit_insn (gen_avx2_permvarv8si (t1
, t2
, t1
));
19885 emit_insn (gen_avx_vextractf128v8si (target
, t1
, const0_rtx
));
19889 t1
= gen_reg_rtx (V8SFmode
);
19890 t2
= gen_reg_rtx (V8SFmode
);
19891 mask
= gen_lowpart (V4SFmode
, mask
);
19892 emit_insn (gen_avx_vec_concatv8sf (t1
, op0
, op1
));
19893 emit_insn (gen_avx_vec_concatv8sf (t2
, mask
, mask
));
19894 emit_insn (gen_avx2_permvarv8sf (t1
, t2
, t1
));
19895 emit_insn (gen_avx_vextractf128v8sf (target
, t1
, const0_rtx
));
19899 t1
= gen_reg_rtx (V32QImode
);
19900 t2
= gen_reg_rtx (V32QImode
);
19901 t3
= gen_reg_rtx (V32QImode
);
19902 vt2
= GEN_INT (128);
19903 for (i
= 0; i
< 32; i
++)
19905 vt
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, vec
));
19906 vt
= force_reg (V32QImode
, vt
);
19907 for (i
= 0; i
< 32; i
++)
19908 vec
[i
] = i
< 16 ? vt2
: const0_rtx
;
19909 vt2
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, vec
));
19910 vt2
= force_reg (V32QImode
, vt2
);
19911 /* From mask create two adjusted masks, which contain the same
19912 bits as mask in the low 7 bits of each vector element.
19913 The first mask will have the most significant bit clear
19914 if it requests element from the same 128-bit lane
19915 and MSB set if it requests element from the other 128-bit lane.
19916 The second mask will have the opposite values of the MSB,
19917 and additionally will have its 128-bit lanes swapped.
19918 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
19919 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
19920 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
19921 stands for other 12 bytes. */
19922 /* The bit whether element is from the same lane or the other
19923 lane is bit 4, so shift it up by 3 to the MSB position. */
19924 emit_insn (gen_ashlv4di3 (gen_lowpart (V4DImode
, t1
),
19925 gen_lowpart (V4DImode
, mask
),
19927 /* Clear MSB bits from the mask just in case it had them set. */
19928 emit_insn (gen_avx2_andnotv32qi3 (t2
, vt
, mask
));
19929 /* After this t1 will have MSB set for elements from other lane. */
19930 emit_insn (gen_xorv32qi3 (t1
, t1
, vt2
));
19931 /* Clear bits other than MSB. */
19932 emit_insn (gen_andv32qi3 (t1
, t1
, vt
));
19933 /* Or in the lower bits from mask into t3. */
19934 emit_insn (gen_iorv32qi3 (t3
, t1
, t2
));
19935 /* And invert MSB bits in t1, so MSB is set for elements from the same
19937 emit_insn (gen_xorv32qi3 (t1
, t1
, vt
));
19938 /* Swap 128-bit lanes in t3. */
19939 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t3
),
19940 gen_lowpart (V4DImode
, t3
),
19941 const2_rtx
, GEN_INT (3),
19942 const0_rtx
, const1_rtx
));
19943 /* And or in the lower bits from mask into t1. */
19944 emit_insn (gen_iorv32qi3 (t1
, t1
, t2
));
19945 if (one_operand_shuffle
)
19947 /* Each of these shuffles will put 0s in places where
19948 element from the other 128-bit lane is needed, otherwise
19949 will shuffle in the requested value. */
19950 emit_insn (gen_avx2_pshufbv32qi3 (t3
, op0
, t3
));
19951 emit_insn (gen_avx2_pshufbv32qi3 (t1
, op0
, t1
));
19952 /* For t3 the 128-bit lanes are swapped again. */
19953 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t3
),
19954 gen_lowpart (V4DImode
, t3
),
19955 const2_rtx
, GEN_INT (3),
19956 const0_rtx
, const1_rtx
));
19957 /* And oring both together leads to the result. */
19958 emit_insn (gen_iorv32qi3 (target
, t1
, t3
));
19962 t4
= gen_reg_rtx (V32QImode
);
19963 /* Similarly to the above one_operand_shuffle code,
19964 just for repeated twice for each operand. merge_two:
19965 code will merge the two results together. */
19966 emit_insn (gen_avx2_pshufbv32qi3 (t4
, op0
, t3
));
19967 emit_insn (gen_avx2_pshufbv32qi3 (t3
, op1
, t3
));
19968 emit_insn (gen_avx2_pshufbv32qi3 (t2
, op0
, t1
));
19969 emit_insn (gen_avx2_pshufbv32qi3 (t1
, op1
, t1
));
19970 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t4
),
19971 gen_lowpart (V4DImode
, t4
),
19972 const2_rtx
, GEN_INT (3),
19973 const0_rtx
, const1_rtx
));
19974 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t3
),
19975 gen_lowpart (V4DImode
, t3
),
19976 const2_rtx
, GEN_INT (3),
19977 const0_rtx
, const1_rtx
));
19978 emit_insn (gen_iorv32qi3 (t4
, t2
, t4
));
19979 emit_insn (gen_iorv32qi3 (t3
, t1
, t3
));
19985 gcc_assert (GET_MODE_SIZE (mode
) <= 16);
19992 /* The XOP VPPERM insn supports three inputs. By ignoring the
19993 one_operand_shuffle special case, we avoid creating another
19994 set of constant vectors in memory. */
19995 one_operand_shuffle
= false;
19997 /* mask = mask & {2*w-1, ...} */
19998 vt
= GEN_INT (2*w
- 1);
20002 /* mask = mask & {w-1, ...} */
20003 vt
= GEN_INT (w
- 1);
20006 for (i
= 0; i
< w
; i
++)
20008 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
20009 mask
= expand_simple_binop (maskmode
, AND
, mask
, vt
,
20010 NULL_RTX
, 0, OPTAB_DIRECT
);
20012 /* For non-QImode operations, convert the word permutation control
20013 into a byte permutation control. */
20014 if (mode
!= V16QImode
)
20016 mask
= expand_simple_binop (maskmode
, ASHIFT
, mask
,
20017 GEN_INT (exact_log2 (e
)),
20018 NULL_RTX
, 0, OPTAB_DIRECT
);
20020 /* Convert mask to vector of chars. */
20021 mask
= force_reg (V16QImode
, gen_lowpart (V16QImode
, mask
));
20023 /* Replicate each of the input bytes into byte positions:
20024 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
20025 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
20026 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
20027 for (i
= 0; i
< 16; ++i
)
20028 vec
[i
] = GEN_INT (i
/e
* e
);
20029 vt
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, vec
));
20030 vt
= force_const_mem (V16QImode
, vt
);
20032 emit_insn (gen_xop_pperm (mask
, mask
, mask
, vt
));
20034 emit_insn (gen_ssse3_pshufbv16qi3 (mask
, mask
, vt
));
20036 /* Convert it into the byte positions by doing
20037 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
20038 for (i
= 0; i
< 16; ++i
)
20039 vec
[i
] = GEN_INT (i
% e
);
20040 vt
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, vec
));
20041 vt
= force_const_mem (V16QImode
, vt
);
20042 emit_insn (gen_addv16qi3 (mask
, mask
, vt
));
20045 /* The actual shuffle operations all operate on V16QImode. */
20046 op0
= gen_lowpart (V16QImode
, op0
);
20047 op1
= gen_lowpart (V16QImode
, op1
);
20048 target
= gen_lowpart (V16QImode
, target
);
20052 emit_insn (gen_xop_pperm (target
, op0
, op1
, mask
));
20054 else if (one_operand_shuffle
)
20056 emit_insn (gen_ssse3_pshufbv16qi3 (target
, op0
, mask
));
20063 /* Shuffle the two input vectors independently. */
20064 t1
= gen_reg_rtx (V16QImode
);
20065 t2
= gen_reg_rtx (V16QImode
);
20066 emit_insn (gen_ssse3_pshufbv16qi3 (t1
, op0
, mask
));
20067 emit_insn (gen_ssse3_pshufbv16qi3 (t2
, op1
, mask
));
20070 /* Then merge them together. The key is whether any given control
20071 element contained a bit set that indicates the second word. */
20072 mask
= operands
[3];
20074 if (maskmode
== V2DImode
&& !TARGET_SSE4_1
)
20076 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
20077 more shuffle to convert the V2DI input mask into a V4SI
20078 input mask. At which point the masking that expand_int_vcond
20079 will work as desired. */
20080 rtx t3
= gen_reg_rtx (V4SImode
);
20081 emit_insn (gen_sse2_pshufd_1 (t3
, gen_lowpart (V4SImode
, mask
),
20082 const0_rtx
, const0_rtx
,
20083 const2_rtx
, const2_rtx
));
20085 maskmode
= V4SImode
;
20089 for (i
= 0; i
< w
; i
++)
20091 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
20092 vt
= force_reg (maskmode
, vt
);
20093 mask
= expand_simple_binop (maskmode
, AND
, mask
, vt
,
20094 NULL_RTX
, 0, OPTAB_DIRECT
);
20096 xops
[0] = gen_lowpart (mode
, operands
[0]);
20097 xops
[1] = gen_lowpart (mode
, t2
);
20098 xops
[2] = gen_lowpart (mode
, t1
);
20099 xops
[3] = gen_rtx_EQ (maskmode
, mask
, vt
);
20102 ok
= ix86_expand_int_vcond (xops
);
20107 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
20108 true if we should do zero extension, else sign extension. HIGH_P is
20109 true if we want the N/2 high elements, else the low elements. */
20112 ix86_expand_sse_unpack (rtx operands
[2], bool unsigned_p
, bool high_p
)
20114 enum machine_mode imode
= GET_MODE (operands
[1]);
20119 rtx (*unpack
)(rtx
, rtx
);
20120 rtx (*extract
)(rtx
, rtx
) = NULL
;
20121 enum machine_mode halfmode
= BLKmode
;
20127 unpack
= gen_avx2_zero_extendv16qiv16hi2
;
20129 unpack
= gen_avx2_sign_extendv16qiv16hi2
;
20130 halfmode
= V16QImode
;
20132 = high_p
? gen_vec_extract_hi_v32qi
: gen_vec_extract_lo_v32qi
;
20136 unpack
= gen_avx2_zero_extendv8hiv8si2
;
20138 unpack
= gen_avx2_sign_extendv8hiv8si2
;
20139 halfmode
= V8HImode
;
20141 = high_p
? gen_vec_extract_hi_v16hi
: gen_vec_extract_lo_v16hi
;
20145 unpack
= gen_avx2_zero_extendv4siv4di2
;
20147 unpack
= gen_avx2_sign_extendv4siv4di2
;
20148 halfmode
= V4SImode
;
20150 = high_p
? gen_vec_extract_hi_v8si
: gen_vec_extract_lo_v8si
;
20154 unpack
= gen_sse4_1_zero_extendv8qiv8hi2
;
20156 unpack
= gen_sse4_1_sign_extendv8qiv8hi2
;
20160 unpack
= gen_sse4_1_zero_extendv4hiv4si2
;
20162 unpack
= gen_sse4_1_sign_extendv4hiv4si2
;
20166 unpack
= gen_sse4_1_zero_extendv2siv2di2
;
20168 unpack
= gen_sse4_1_sign_extendv2siv2di2
;
20171 gcc_unreachable ();
20174 if (GET_MODE_SIZE (imode
) == 32)
20176 tmp
= gen_reg_rtx (halfmode
);
20177 emit_insn (extract (tmp
, operands
[1]));
20181 /* Shift higher 8 bytes to lower 8 bytes. */
20182 tmp
= gen_reg_rtx (imode
);
20183 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode
, tmp
),
20184 gen_lowpart (V1TImode
, operands
[1]),
20190 emit_insn (unpack (operands
[0], tmp
));
20194 rtx (*unpack
)(rtx
, rtx
, rtx
);
20200 unpack
= gen_vec_interleave_highv16qi
;
20202 unpack
= gen_vec_interleave_lowv16qi
;
20206 unpack
= gen_vec_interleave_highv8hi
;
20208 unpack
= gen_vec_interleave_lowv8hi
;
20212 unpack
= gen_vec_interleave_highv4si
;
20214 unpack
= gen_vec_interleave_lowv4si
;
20217 gcc_unreachable ();
20220 dest
= gen_lowpart (imode
, operands
[0]);
20223 tmp
= force_reg (imode
, CONST0_RTX (imode
));
20225 tmp
= ix86_expand_sse_cmp (gen_reg_rtx (imode
), GT
, CONST0_RTX (imode
),
20226 operands
[1], pc_rtx
, pc_rtx
);
20228 emit_insn (unpack (dest
, operands
[1], tmp
));
20232 /* Expand conditional increment or decrement using adb/sbb instructions.
20233 The default case using setcc followed by the conditional move can be
20234 done by generic code. */
20236 ix86_expand_int_addcc (rtx operands
[])
20238 enum rtx_code code
= GET_CODE (operands
[1]);
20240 rtx (*insn
)(rtx
, rtx
, rtx
, rtx
, rtx
);
20242 rtx val
= const0_rtx
;
20243 bool fpcmp
= false;
20244 enum machine_mode mode
;
20245 rtx op0
= XEXP (operands
[1], 0);
20246 rtx op1
= XEXP (operands
[1], 1);
20248 if (operands
[3] != const1_rtx
20249 && operands
[3] != constm1_rtx
)
20251 if (!ix86_expand_carry_flag_compare (code
, op0
, op1
, &compare_op
))
20253 code
= GET_CODE (compare_op
);
20255 flags
= XEXP (compare_op
, 0);
20257 if (GET_MODE (flags
) == CCFPmode
20258 || GET_MODE (flags
) == CCFPUmode
)
20261 code
= ix86_fp_compare_code_to_integer (code
);
20268 PUT_CODE (compare_op
,
20269 reverse_condition_maybe_unordered
20270 (GET_CODE (compare_op
)));
20272 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
20275 mode
= GET_MODE (operands
[0]);
20277 /* Construct either adc or sbb insn. */
20278 if ((code
== LTU
) == (operands
[3] == constm1_rtx
))
20283 insn
= gen_subqi3_carry
;
20286 insn
= gen_subhi3_carry
;
20289 insn
= gen_subsi3_carry
;
20292 insn
= gen_subdi3_carry
;
20295 gcc_unreachable ();
20303 insn
= gen_addqi3_carry
;
20306 insn
= gen_addhi3_carry
;
20309 insn
= gen_addsi3_carry
;
20312 insn
= gen_adddi3_carry
;
20315 gcc_unreachable ();
20318 emit_insn (insn (operands
[0], operands
[2], val
, flags
, compare_op
));
20324 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
20325 but works for floating pointer parameters and nonoffsetable memories.
20326 For pushes, it returns just stack offsets; the values will be saved
20327 in the right order. Maximally three parts are generated. */
20330 ix86_split_to_parts (rtx operand
, rtx
*parts
, enum machine_mode mode
)
20335 size
= mode
==XFmode
? 3 : GET_MODE_SIZE (mode
) / 4;
20337 size
= (GET_MODE_SIZE (mode
) + 4) / 8;
20339 gcc_assert (!REG_P (operand
) || !MMX_REGNO_P (REGNO (operand
)));
20340 gcc_assert (size
>= 2 && size
<= 4);
20342 /* Optimize constant pool reference to immediates. This is used by fp
20343 moves, that force all constants to memory to allow combining. */
20344 if (MEM_P (operand
) && MEM_READONLY_P (operand
))
20346 rtx tmp
= maybe_get_pool_constant (operand
);
20351 if (MEM_P (operand
) && !offsettable_memref_p (operand
))
20353 /* The only non-offsetable memories we handle are pushes. */
20354 int ok
= push_operand (operand
, VOIDmode
);
20358 operand
= copy_rtx (operand
);
20359 PUT_MODE (operand
, Pmode
);
20360 parts
[0] = parts
[1] = parts
[2] = parts
[3] = operand
;
20364 if (GET_CODE (operand
) == CONST_VECTOR
)
20366 enum machine_mode imode
= int_mode_for_mode (mode
);
20367 /* Caution: if we looked through a constant pool memory above,
20368 the operand may actually have a different mode now. That's
20369 ok, since we want to pun this all the way back to an integer. */
20370 operand
= simplify_subreg (imode
, operand
, GET_MODE (operand
), 0);
20371 gcc_assert (operand
!= NULL
);
20377 if (mode
== DImode
)
20378 split_double_mode (mode
, &operand
, 1, &parts
[0], &parts
[1]);
20383 if (REG_P (operand
))
20385 gcc_assert (reload_completed
);
20386 for (i
= 0; i
< size
; i
++)
20387 parts
[i
] = gen_rtx_REG (SImode
, REGNO (operand
) + i
);
20389 else if (offsettable_memref_p (operand
))
20391 operand
= adjust_address (operand
, SImode
, 0);
20392 parts
[0] = operand
;
20393 for (i
= 1; i
< size
; i
++)
20394 parts
[i
] = adjust_address (operand
, SImode
, 4 * i
);
20396 else if (GET_CODE (operand
) == CONST_DOUBLE
)
20401 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
20405 real_to_target (l
, &r
, mode
);
20406 parts
[3] = gen_int_mode (l
[3], SImode
);
20407 parts
[2] = gen_int_mode (l
[2], SImode
);
20410 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r
, l
);
20411 parts
[2] = gen_int_mode (l
[2], SImode
);
20414 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
20417 gcc_unreachable ();
20419 parts
[1] = gen_int_mode (l
[1], SImode
);
20420 parts
[0] = gen_int_mode (l
[0], SImode
);
20423 gcc_unreachable ();
20428 if (mode
== TImode
)
20429 split_double_mode (mode
, &operand
, 1, &parts
[0], &parts
[1]);
20430 if (mode
== XFmode
|| mode
== TFmode
)
20432 enum machine_mode upper_mode
= mode
==XFmode
? SImode
: DImode
;
20433 if (REG_P (operand
))
20435 gcc_assert (reload_completed
);
20436 parts
[0] = gen_rtx_REG (DImode
, REGNO (operand
) + 0);
20437 parts
[1] = gen_rtx_REG (upper_mode
, REGNO (operand
) + 1);
20439 else if (offsettable_memref_p (operand
))
20441 operand
= adjust_address (operand
, DImode
, 0);
20442 parts
[0] = operand
;
20443 parts
[1] = adjust_address (operand
, upper_mode
, 8);
20445 else if (GET_CODE (operand
) == CONST_DOUBLE
)
20450 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
20451 real_to_target (l
, &r
, mode
);
20453 /* Do not use shift by 32 to avoid warning on 32bit systems. */
20454 if (HOST_BITS_PER_WIDE_INT
>= 64)
20457 ((l
[0] & (((HOST_WIDE_INT
) 2 << 31) - 1))
20458 + ((((HOST_WIDE_INT
) l
[1]) << 31) << 1),
20461 parts
[0] = immed_double_const (l
[0], l
[1], DImode
);
20463 if (upper_mode
== SImode
)
20464 parts
[1] = gen_int_mode (l
[2], SImode
);
20465 else if (HOST_BITS_PER_WIDE_INT
>= 64)
20468 ((l
[2] & (((HOST_WIDE_INT
) 2 << 31) - 1))
20469 + ((((HOST_WIDE_INT
) l
[3]) << 31) << 1),
20472 parts
[1] = immed_double_const (l
[2], l
[3], DImode
);
20475 gcc_unreachable ();
20482 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
20483 Return false when normal moves are needed; true when all required
20484 insns have been emitted. Operands 2-4 contain the input values
20485 int the correct order; operands 5-7 contain the output values. */
20488 ix86_split_long_move (rtx operands
[])
20493 int collisions
= 0;
20494 enum machine_mode mode
= GET_MODE (operands
[0]);
20495 bool collisionparts
[4];
20497 /* The DFmode expanders may ask us to move double.
20498 For 64bit target this is single move. By hiding the fact
20499 here we simplify i386.md splitters. */
20500 if (TARGET_64BIT
&& GET_MODE_SIZE (GET_MODE (operands
[0])) == 8)
20502 /* Optimize constant pool reference to immediates. This is used by
20503 fp moves, that force all constants to memory to allow combining. */
20505 if (MEM_P (operands
[1])
20506 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
20507 && CONSTANT_POOL_ADDRESS_P (XEXP (operands
[1], 0)))
20508 operands
[1] = get_pool_constant (XEXP (operands
[1], 0));
20509 if (push_operand (operands
[0], VOIDmode
))
20511 operands
[0] = copy_rtx (operands
[0]);
20512 PUT_MODE (operands
[0], Pmode
);
20515 operands
[0] = gen_lowpart (DImode
, operands
[0]);
20516 operands
[1] = gen_lowpart (DImode
, operands
[1]);
20517 emit_move_insn (operands
[0], operands
[1]);
20521 /* The only non-offsettable memory we handle is push. */
20522 if (push_operand (operands
[0], VOIDmode
))
20525 gcc_assert (!MEM_P (operands
[0])
20526 || offsettable_memref_p (operands
[0]));
20528 nparts
= ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands
[0]));
20529 ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands
[0]));
20531 /* When emitting push, take care for source operands on the stack. */
20532 if (push
&& MEM_P (operands
[1])
20533 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
20535 rtx src_base
= XEXP (part
[1][nparts
- 1], 0);
20537 /* Compensate for the stack decrement by 4. */
20538 if (!TARGET_64BIT
&& nparts
== 3
20539 && mode
== XFmode
&& TARGET_128BIT_LONG_DOUBLE
)
20540 src_base
= plus_constant (src_base
, 4);
20542 /* src_base refers to the stack pointer and is
20543 automatically decreased by emitted push. */
20544 for (i
= 0; i
< nparts
; i
++)
20545 part
[1][i
] = change_address (part
[1][i
],
20546 GET_MODE (part
[1][i
]), src_base
);
20549 /* We need to do copy in the right order in case an address register
20550 of the source overlaps the destination. */
20551 if (REG_P (part
[0][0]) && MEM_P (part
[1][0]))
20555 for (i
= 0; i
< nparts
; i
++)
20558 = reg_overlap_mentioned_p (part
[0][i
], XEXP (part
[1][0], 0));
20559 if (collisionparts
[i
])
20563 /* Collision in the middle part can be handled by reordering. */
20564 if (collisions
== 1 && nparts
== 3 && collisionparts
[1])
20566 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
20567 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
20569 else if (collisions
== 1
20571 && (collisionparts
[1] || collisionparts
[2]))
20573 if (collisionparts
[1])
20575 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
20576 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
20580 tmp
= part
[0][2]; part
[0][2] = part
[0][3]; part
[0][3] = tmp
;
20581 tmp
= part
[1][2]; part
[1][2] = part
[1][3]; part
[1][3] = tmp
;
20585 /* If there are more collisions, we can't handle it by reordering.
20586 Do an lea to the last part and use only one colliding move. */
20587 else if (collisions
> 1)
20593 base
= part
[0][nparts
- 1];
20595 /* Handle the case when the last part isn't valid for lea.
20596 Happens in 64-bit mode storing the 12-byte XFmode. */
20597 if (GET_MODE (base
) != Pmode
)
20598 base
= gen_rtx_REG (Pmode
, REGNO (base
));
20600 emit_insn (gen_rtx_SET (VOIDmode
, base
, XEXP (part
[1][0], 0)));
20601 part
[1][0] = replace_equiv_address (part
[1][0], base
);
20602 for (i
= 1; i
< nparts
; i
++)
20604 tmp
= plus_constant (base
, UNITS_PER_WORD
* i
);
20605 part
[1][i
] = replace_equiv_address (part
[1][i
], tmp
);
20616 if (TARGET_128BIT_LONG_DOUBLE
&& mode
== XFmode
)
20617 emit_insn (gen_addsi3 (stack_pointer_rtx
,
20618 stack_pointer_rtx
, GEN_INT (-4)));
20619 emit_move_insn (part
[0][2], part
[1][2]);
20621 else if (nparts
== 4)
20623 emit_move_insn (part
[0][3], part
[1][3]);
20624 emit_move_insn (part
[0][2], part
[1][2]);
20629 /* In 64bit mode we don't have 32bit push available. In case this is
20630 register, it is OK - we will just use larger counterpart. We also
20631 retype memory - these comes from attempt to avoid REX prefix on
20632 moving of second half of TFmode value. */
20633 if (GET_MODE (part
[1][1]) == SImode
)
20635 switch (GET_CODE (part
[1][1]))
20638 part
[1][1] = adjust_address (part
[1][1], DImode
, 0);
20642 part
[1][1] = gen_rtx_REG (DImode
, REGNO (part
[1][1]));
20646 gcc_unreachable ();
20649 if (GET_MODE (part
[1][0]) == SImode
)
20650 part
[1][0] = part
[1][1];
20653 emit_move_insn (part
[0][1], part
[1][1]);
20654 emit_move_insn (part
[0][0], part
[1][0]);
20658 /* Choose correct order to not overwrite the source before it is copied. */
20659 if ((REG_P (part
[0][0])
20660 && REG_P (part
[1][1])
20661 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
20663 && REGNO (part
[0][0]) == REGNO (part
[1][2]))
20665 && REGNO (part
[0][0]) == REGNO (part
[1][3]))))
20667 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
20669 for (i
= 0, j
= nparts
- 1; i
< nparts
; i
++, j
--)
20671 operands
[2 + i
] = part
[0][j
];
20672 operands
[6 + i
] = part
[1][j
];
20677 for (i
= 0; i
< nparts
; i
++)
20679 operands
[2 + i
] = part
[0][i
];
20680 operands
[6 + i
] = part
[1][i
];
20684 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
20685 if (optimize_insn_for_size_p ())
20687 for (j
= 0; j
< nparts
- 1; j
++)
20688 if (CONST_INT_P (operands
[6 + j
])
20689 && operands
[6 + j
] != const0_rtx
20690 && REG_P (operands
[2 + j
]))
20691 for (i
= j
; i
< nparts
- 1; i
++)
20692 if (CONST_INT_P (operands
[7 + i
])
20693 && INTVAL (operands
[7 + i
]) == INTVAL (operands
[6 + j
]))
20694 operands
[7 + i
] = operands
[2 + j
];
20697 for (i
= 0; i
< nparts
; i
++)
20698 emit_move_insn (operands
[2 + i
], operands
[6 + i
]);
20703 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
20704 left shift by a constant, either using a single shift or
20705 a sequence of add instructions. */
20708 ix86_expand_ashl_const (rtx operand
, int count
, enum machine_mode mode
)
20710 rtx (*insn
)(rtx
, rtx
, rtx
);
20713 || (count
* ix86_cost
->add
<= ix86_cost
->shift_const
20714 && !optimize_insn_for_size_p ()))
20716 insn
= mode
== DImode
? gen_addsi3
: gen_adddi3
;
20717 while (count
-- > 0)
20718 emit_insn (insn (operand
, operand
, operand
));
20722 insn
= mode
== DImode
? gen_ashlsi3
: gen_ashldi3
;
20723 emit_insn (insn (operand
, operand
, GEN_INT (count
)));
20728 ix86_split_ashl (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
20730 rtx (*gen_ashl3
)(rtx
, rtx
, rtx
);
20731 rtx (*gen_shld
)(rtx
, rtx
, rtx
);
20732 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
20734 rtx low
[2], high
[2];
20737 if (CONST_INT_P (operands
[2]))
20739 split_double_mode (mode
, operands
, 2, low
, high
);
20740 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
20742 if (count
>= half_width
)
20744 emit_move_insn (high
[0], low
[1]);
20745 emit_move_insn (low
[0], const0_rtx
);
20747 if (count
> half_width
)
20748 ix86_expand_ashl_const (high
[0], count
- half_width
, mode
);
20752 gen_shld
= mode
== DImode
? gen_x86_shld
: gen_x86_64_shld
;
20754 if (!rtx_equal_p (operands
[0], operands
[1]))
20755 emit_move_insn (operands
[0], operands
[1]);
20757 emit_insn (gen_shld (high
[0], low
[0], GEN_INT (count
)));
20758 ix86_expand_ashl_const (low
[0], count
, mode
);
20763 split_double_mode (mode
, operands
, 1, low
, high
);
20765 gen_ashl3
= mode
== DImode
? gen_ashlsi3
: gen_ashldi3
;
20767 if (operands
[1] == const1_rtx
)
20769 /* Assuming we've chosen a QImode capable registers, then 1 << N
20770 can be done with two 32/64-bit shifts, no branches, no cmoves. */
20771 if (ANY_QI_REG_P (low
[0]) && ANY_QI_REG_P (high
[0]))
20773 rtx s
, d
, flags
= gen_rtx_REG (CCZmode
, FLAGS_REG
);
20775 ix86_expand_clear (low
[0]);
20776 ix86_expand_clear (high
[0]);
20777 emit_insn (gen_testqi_ccz_1 (operands
[2], GEN_INT (half_width
)));
20779 d
= gen_lowpart (QImode
, low
[0]);
20780 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
20781 s
= gen_rtx_EQ (QImode
, flags
, const0_rtx
);
20782 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
20784 d
= gen_lowpart (QImode
, high
[0]);
20785 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
20786 s
= gen_rtx_NE (QImode
, flags
, const0_rtx
);
20787 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
20790 /* Otherwise, we can get the same results by manually performing
20791 a bit extract operation on bit 5/6, and then performing the two
20792 shifts. The two methods of getting 0/1 into low/high are exactly
20793 the same size. Avoiding the shift in the bit extract case helps
20794 pentium4 a bit; no one else seems to care much either way. */
20797 enum machine_mode half_mode
;
20798 rtx (*gen_lshr3
)(rtx
, rtx
, rtx
);
20799 rtx (*gen_and3
)(rtx
, rtx
, rtx
);
20800 rtx (*gen_xor3
)(rtx
, rtx
, rtx
);
20801 HOST_WIDE_INT bits
;
20804 if (mode
== DImode
)
20806 half_mode
= SImode
;
20807 gen_lshr3
= gen_lshrsi3
;
20808 gen_and3
= gen_andsi3
;
20809 gen_xor3
= gen_xorsi3
;
20814 half_mode
= DImode
;
20815 gen_lshr3
= gen_lshrdi3
;
20816 gen_and3
= gen_anddi3
;
20817 gen_xor3
= gen_xordi3
;
20821 if (TARGET_PARTIAL_REG_STALL
&& !optimize_insn_for_size_p ())
20822 x
= gen_rtx_ZERO_EXTEND (half_mode
, operands
[2]);
20824 x
= gen_lowpart (half_mode
, operands
[2]);
20825 emit_insn (gen_rtx_SET (VOIDmode
, high
[0], x
));
20827 emit_insn (gen_lshr3 (high
[0], high
[0], GEN_INT (bits
)));
20828 emit_insn (gen_and3 (high
[0], high
[0], const1_rtx
));
20829 emit_move_insn (low
[0], high
[0]);
20830 emit_insn (gen_xor3 (low
[0], low
[0], const1_rtx
));
20833 emit_insn (gen_ashl3 (low
[0], low
[0], operands
[2]));
20834 emit_insn (gen_ashl3 (high
[0], high
[0], operands
[2]));
20838 if (operands
[1] == constm1_rtx
)
20840 /* For -1 << N, we can avoid the shld instruction, because we
20841 know that we're shifting 0...31/63 ones into a -1. */
20842 emit_move_insn (low
[0], constm1_rtx
);
20843 if (optimize_insn_for_size_p ())
20844 emit_move_insn (high
[0], low
[0]);
20846 emit_move_insn (high
[0], constm1_rtx
);
20850 gen_shld
= mode
== DImode
? gen_x86_shld
: gen_x86_64_shld
;
20852 if (!rtx_equal_p (operands
[0], operands
[1]))
20853 emit_move_insn (operands
[0], operands
[1]);
20855 split_double_mode (mode
, operands
, 1, low
, high
);
20856 emit_insn (gen_shld (high
[0], low
[0], operands
[2]));
20859 emit_insn (gen_ashl3 (low
[0], low
[0], operands
[2]));
20861 if (TARGET_CMOVE
&& scratch
)
20863 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
20864 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
20866 ix86_expand_clear (scratch
);
20867 emit_insn (gen_x86_shift_adj_1 (high
[0], low
[0], operands
[2], scratch
));
20871 rtx (*gen_x86_shift_adj_2
)(rtx
, rtx
, rtx
)
20872 = mode
== DImode
? gen_x86_shiftsi_adj_2
: gen_x86_shiftdi_adj_2
;
20874 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
20879 ix86_split_ashr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
20881 rtx (*gen_ashr3
)(rtx
, rtx
, rtx
)
20882 = mode
== DImode
? gen_ashrsi3
: gen_ashrdi3
;
20883 rtx (*gen_shrd
)(rtx
, rtx
, rtx
);
20884 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
20886 rtx low
[2], high
[2];
20889 if (CONST_INT_P (operands
[2]))
20891 split_double_mode (mode
, operands
, 2, low
, high
);
20892 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
20894 if (count
== GET_MODE_BITSIZE (mode
) - 1)
20896 emit_move_insn (high
[0], high
[1]);
20897 emit_insn (gen_ashr3 (high
[0], high
[0],
20898 GEN_INT (half_width
- 1)));
20899 emit_move_insn (low
[0], high
[0]);
20902 else if (count
>= half_width
)
20904 emit_move_insn (low
[0], high
[1]);
20905 emit_move_insn (high
[0], low
[0]);
20906 emit_insn (gen_ashr3 (high
[0], high
[0],
20907 GEN_INT (half_width
- 1)));
20909 if (count
> half_width
)
20910 emit_insn (gen_ashr3 (low
[0], low
[0],
20911 GEN_INT (count
- half_width
)));
20915 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
20917 if (!rtx_equal_p (operands
[0], operands
[1]))
20918 emit_move_insn (operands
[0], operands
[1]);
20920 emit_insn (gen_shrd (low
[0], high
[0], GEN_INT (count
)));
20921 emit_insn (gen_ashr3 (high
[0], high
[0], GEN_INT (count
)));
20926 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
20928 if (!rtx_equal_p (operands
[0], operands
[1]))
20929 emit_move_insn (operands
[0], operands
[1]);
20931 split_double_mode (mode
, operands
, 1, low
, high
);
20933 emit_insn (gen_shrd (low
[0], high
[0], operands
[2]));
20934 emit_insn (gen_ashr3 (high
[0], high
[0], operands
[2]));
20936 if (TARGET_CMOVE
&& scratch
)
20938 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
20939 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
20941 emit_move_insn (scratch
, high
[0]);
20942 emit_insn (gen_ashr3 (scratch
, scratch
,
20943 GEN_INT (half_width
- 1)));
20944 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
20949 rtx (*gen_x86_shift_adj_3
)(rtx
, rtx
, rtx
)
20950 = mode
== DImode
? gen_x86_shiftsi_adj_3
: gen_x86_shiftdi_adj_3
;
20952 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
20958 ix86_split_lshr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
20960 rtx (*gen_lshr3
)(rtx
, rtx
, rtx
)
20961 = mode
== DImode
? gen_lshrsi3
: gen_lshrdi3
;
20962 rtx (*gen_shrd
)(rtx
, rtx
, rtx
);
20963 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
20965 rtx low
[2], high
[2];
20968 if (CONST_INT_P (operands
[2]))
20970 split_double_mode (mode
, operands
, 2, low
, high
);
20971 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
20973 if (count
>= half_width
)
20975 emit_move_insn (low
[0], high
[1]);
20976 ix86_expand_clear (high
[0]);
20978 if (count
> half_width
)
20979 emit_insn (gen_lshr3 (low
[0], low
[0],
20980 GEN_INT (count
- half_width
)));
20984 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
20986 if (!rtx_equal_p (operands
[0], operands
[1]))
20987 emit_move_insn (operands
[0], operands
[1]);
20989 emit_insn (gen_shrd (low
[0], high
[0], GEN_INT (count
)));
20990 emit_insn (gen_lshr3 (high
[0], high
[0], GEN_INT (count
)));
20995 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
20997 if (!rtx_equal_p (operands
[0], operands
[1]))
20998 emit_move_insn (operands
[0], operands
[1]);
21000 split_double_mode (mode
, operands
, 1, low
, high
);
21002 emit_insn (gen_shrd (low
[0], high
[0], operands
[2]));
21003 emit_insn (gen_lshr3 (high
[0], high
[0], operands
[2]));
21005 if (TARGET_CMOVE
&& scratch
)
21007 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
21008 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
21010 ix86_expand_clear (scratch
);
21011 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
21016 rtx (*gen_x86_shift_adj_2
)(rtx
, rtx
, rtx
)
21017 = mode
== DImode
? gen_x86_shiftsi_adj_2
: gen_x86_shiftdi_adj_2
;
21019 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
21024 /* Predict just emitted jump instruction to be taken with probability PROB. */
21026 predict_jump (int prob
)
21028 rtx insn
= get_last_insn ();
21029 gcc_assert (JUMP_P (insn
));
21030 add_reg_note (insn
, REG_BR_PROB
, GEN_INT (prob
));
21033 /* Helper function for the string operations below. Dest VARIABLE whether
21034 it is aligned to VALUE bytes. If true, jump to the label. */
21036 ix86_expand_aligntest (rtx variable
, int value
, bool epilogue
)
21038 rtx label
= gen_label_rtx ();
21039 rtx tmpcount
= gen_reg_rtx (GET_MODE (variable
));
21040 if (GET_MODE (variable
) == DImode
)
21041 emit_insn (gen_anddi3 (tmpcount
, variable
, GEN_INT (value
)));
21043 emit_insn (gen_andsi3 (tmpcount
, variable
, GEN_INT (value
)));
21044 emit_cmp_and_jump_insns (tmpcount
, const0_rtx
, EQ
, 0, GET_MODE (variable
),
21047 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
21049 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
21053 /* Adjust COUNTER by the VALUE. */
21055 ix86_adjust_counter (rtx countreg
, HOST_WIDE_INT value
)
21057 rtx (*gen_add
)(rtx
, rtx
, rtx
)
21058 = GET_MODE (countreg
) == DImode
? gen_adddi3
: gen_addsi3
;
21060 emit_insn (gen_add (countreg
, countreg
, GEN_INT (-value
)));
21063 /* Zero extend possibly SImode EXP to Pmode register. */
21065 ix86_zero_extend_to_Pmode (rtx exp
)
21068 if (GET_MODE (exp
) == VOIDmode
)
21069 return force_reg (Pmode
, exp
);
21070 if (GET_MODE (exp
) == Pmode
)
21071 return copy_to_mode_reg (Pmode
, exp
);
21072 r
= gen_reg_rtx (Pmode
);
21073 emit_insn (gen_zero_extendsidi2 (r
, exp
));
21077 /* Divide COUNTREG by SCALE. */
21079 scale_counter (rtx countreg
, int scale
)
21085 if (CONST_INT_P (countreg
))
21086 return GEN_INT (INTVAL (countreg
) / scale
);
21087 gcc_assert (REG_P (countreg
));
21089 sc
= expand_simple_binop (GET_MODE (countreg
), LSHIFTRT
, countreg
,
21090 GEN_INT (exact_log2 (scale
)),
21091 NULL
, 1, OPTAB_DIRECT
);
21095 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
21096 DImode for constant loop counts. */
21098 static enum machine_mode
21099 counter_mode (rtx count_exp
)
21101 if (GET_MODE (count_exp
) != VOIDmode
)
21102 return GET_MODE (count_exp
);
21103 if (!CONST_INT_P (count_exp
))
21105 if (TARGET_64BIT
&& (INTVAL (count_exp
) & ~0xffffffff))
21110 /* Helper function for expand_set_or_movmem_via_loop.
21112 When SRCPTR is non-NULL, output simple loop to move memory
21113 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
21114 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
21115 equivalent loop to set memory by VALUE (supposed to be in MODE).
21117 The size is rounded down to whole number of chunk size moved at once.
21118 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info.
21120 If ITER isn't NULL, than it'll be used in the generated loop without
21121 initialization (that allows to generate several consequent loops using the
21123 If CHANGE_PTRS is specified, DESTPTR and SRCPTR would be increased by
21124 iterator value at the end of the function (as if they iterate in the loop).
21125 Otherwise, their vaules'll stay unchanged.
21127 If EXPECTED_SIZE isn't -1, than it's used to compute branch-probabilities on
21128 the loop backedge. When expected size is unknown (it's -1), the probability
21131 Return value is rtx of iterator, used in the loop - it could be reused in
21132 consequent calls of this function. */
21134 expand_set_or_movmem_via_loop_with_iter (rtx destmem
, rtx srcmem
,
21135 rtx destptr
, rtx srcptr
, rtx value
,
21136 rtx count
, rtx iter
,
21137 enum machine_mode mode
, int unroll
,
21138 int expected_size
, bool change_ptrs
)
21140 rtx out_label
, top_label
, tmp
;
21141 enum machine_mode iter_mode
= counter_mode (count
);
21142 rtx piece_size
= GEN_INT (GET_MODE_SIZE (mode
) * unroll
);
21143 rtx piece_size_mask
= GEN_INT (~((GET_MODE_SIZE (mode
) * unroll
) - 1));
21148 bool reuse_iter
= (iter
!= NULL_RTX
);
21150 top_label
= gen_label_rtx ();
21151 out_label
= gen_label_rtx ();
21152 size
= expand_simple_binop (iter_mode
, AND
, count
, piece_size_mask
,
21153 NULL
, 1, OPTAB_DIRECT
);
21156 iter
= gen_reg_rtx (iter_mode
);
21157 /* Those two should combine. */
21158 if (piece_size
== const1_rtx
)
21160 emit_cmp_and_jump_insns (size
, const0_rtx
, EQ
, NULL_RTX
, iter_mode
,
21162 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
21164 emit_move_insn (iter
, const0_rtx
);
21168 emit_cmp_and_jump_insns (iter
, size
, GE
, NULL_RTX
, iter_mode
,
21172 emit_label (top_label
);
21174 tmp
= convert_modes (Pmode
, iter_mode
, iter
, true);
21175 x_addr
= gen_rtx_PLUS (Pmode
, destptr
, tmp
);
21177 adjust_automodify_address_nv (copy_rtx (destmem
), mode
, x_addr
, 0);
21181 y_addr
= gen_rtx_PLUS (Pmode
, srcptr
, copy_rtx (tmp
));
21183 adjust_automodify_address_nv (copy_rtx (srcmem
), mode
, y_addr
, 0);
21185 /* When unrolling for chips that reorder memory reads and writes,
21186 we can save registers by using single temporary.
21187 Also using 4 temporaries is overkill in 32bit mode. */
21188 if (!TARGET_64BIT
&& 0)
21190 for (i
= 0; i
< unroll
; i
++)
21195 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
21197 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
21199 emit_move_insn (destmem
, srcmem
);
21205 gcc_assert (unroll
<= 4);
21206 for (i
= 0; i
< unroll
; i
++)
21208 tmpreg
[i
] = gen_reg_rtx (mode
);
21212 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
21214 emit_move_insn (tmpreg
[i
], srcmem
);
21216 for (i
= 0; i
< unroll
; i
++)
21221 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
21223 emit_move_insn (destmem
, tmpreg
[i
]);
21228 for (i
= 0; i
< unroll
; i
++)
21232 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
21233 emit_move_insn (destmem
, value
);
21236 tmp
= expand_simple_binop (iter_mode
, PLUS
, iter
, piece_size
, iter
,
21237 true, OPTAB_LIB_WIDEN
);
21239 emit_move_insn (iter
, tmp
);
21241 emit_cmp_and_jump_insns (iter
, size
, LT
, NULL_RTX
, iter_mode
,
21243 if (expected_size
!= -1)
21245 expected_size
/= GET_MODE_SIZE (mode
) * unroll
;
21246 if (expected_size
== 0)
21248 else if (expected_size
> REG_BR_PROB_BASE
)
21249 predict_jump (REG_BR_PROB_BASE
- 1);
21251 predict_jump (REG_BR_PROB_BASE
- (REG_BR_PROB_BASE
+ expected_size
/ 2) / expected_size
);
21254 predict_jump (REG_BR_PROB_BASE
* 80 / 100);
21257 iter
= ix86_zero_extend_to_Pmode (iter
);
21258 tmp
= expand_simple_binop (Pmode
, PLUS
, destptr
, iter
, destptr
,
21259 true, OPTAB_LIB_WIDEN
);
21260 if (tmp
!= destptr
)
21261 emit_move_insn (destptr
, tmp
);
21264 tmp
= expand_simple_binop (Pmode
, PLUS
, srcptr
, iter
, srcptr
,
21265 true, OPTAB_LIB_WIDEN
);
21267 emit_move_insn (srcptr
, tmp
);
21270 emit_label (out_label
);
21274 /* When SRCPTR is non-NULL, output simple loop to move memory
21275 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
21276 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
21277 equivalent loop to set memory by VALUE (supposed to be in MODE).
21279 The size is rounded down to whole number of chunk size moved at once.
21280 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
21283 expand_set_or_movmem_via_loop (rtx destmem
, rtx srcmem
,
21284 rtx destptr
, rtx srcptr
, rtx value
,
21285 rtx count
, enum machine_mode mode
, int unroll
,
21288 expand_set_or_movmem_via_loop_with_iter (destmem
, srcmem
,
21289 destptr
, srcptr
, value
,
21290 count
, NULL_RTX
, mode
, unroll
,
21291 expected_size
, true);
21294 /* Output "rep; mov" instruction.
21295 Arguments have same meaning as for previous function */
21297 expand_movmem_via_rep_mov (rtx destmem
, rtx srcmem
,
21298 rtx destptr
, rtx srcptr
,
21300 enum machine_mode mode
)
21305 HOST_WIDE_INT rounded_count
;
21307 /* If the size is known, it is shorter to use rep movs. */
21308 if (mode
== QImode
&& CONST_INT_P (count
)
21309 && !(INTVAL (count
) & 3))
21312 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
21313 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
21314 if (srcptr
!= XEXP (srcmem
, 0) || GET_MODE (srcmem
) != BLKmode
)
21315 srcmem
= adjust_automodify_address_nv (srcmem
, BLKmode
, srcptr
, 0);
21316 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
21317 if (mode
!= QImode
)
21319 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
21320 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
21321 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
21322 srcexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
21323 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
21324 srcexp
= gen_rtx_PLUS (Pmode
, srcexp
, srcptr
);
21328 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
21329 srcexp
= gen_rtx_PLUS (Pmode
, srcptr
, countreg
);
21331 if (CONST_INT_P (count
))
21333 rounded_count
= (INTVAL (count
)
21334 & ~((HOST_WIDE_INT
) GET_MODE_SIZE (mode
) - 1));
21335 destmem
= shallow_copy_rtx (destmem
);
21336 srcmem
= shallow_copy_rtx (srcmem
);
21337 set_mem_size (destmem
, rounded_count
);
21338 set_mem_size (srcmem
, rounded_count
);
21342 if (MEM_SIZE_KNOWN_P (destmem
))
21343 clear_mem_size (destmem
);
21344 if (MEM_SIZE_KNOWN_P (srcmem
))
21345 clear_mem_size (srcmem
);
21347 emit_insn (gen_rep_mov (destptr
, destmem
, srcptr
, srcmem
, countreg
,
21351 /* Output "rep; stos" instruction.
21352 Arguments have same meaning as for previous function */
21354 expand_setmem_via_rep_stos (rtx destmem
, rtx destptr
, rtx value
,
21355 rtx count
, enum machine_mode mode
,
21360 HOST_WIDE_INT rounded_count
;
21362 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
21363 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
21364 value
= force_reg (mode
, gen_lowpart (mode
, value
));
21365 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
21366 if (mode
!= QImode
)
21368 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
21369 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
21370 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
21373 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
21374 if (orig_value
== const0_rtx
&& CONST_INT_P (count
))
21376 rounded_count
= (INTVAL (count
)
21377 & ~((HOST_WIDE_INT
) GET_MODE_SIZE (mode
) - 1));
21378 destmem
= shallow_copy_rtx (destmem
);
21379 set_mem_size (destmem
, rounded_count
);
21381 else if (MEM_SIZE_KNOWN_P (destmem
))
21382 clear_mem_size (destmem
);
21383 emit_insn (gen_rep_stos (destptr
, countreg
, destmem
, value
, destexp
));
21387 emit_strmov (rtx destmem
, rtx srcmem
,
21388 rtx destptr
, rtx srcptr
, enum machine_mode mode
, int offset
)
21390 rtx src
= adjust_automodify_address_nv (srcmem
, mode
, srcptr
, offset
);
21391 rtx dest
= adjust_automodify_address_nv (destmem
, mode
, destptr
, offset
);
21392 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
21395 /* Emit strset instuction. If RHS is constant, and vector mode will be used,
21396 then move this constant to a vector register before emitting strset. */
21398 emit_strset (rtx destmem
, rtx value
,
21399 rtx destptr
, enum machine_mode mode
, int offset
)
21401 rtx dest
= adjust_automodify_address_nv (destmem
, mode
, destptr
, offset
);
21402 emit_insn (gen_strset (destptr
, dest
, value
));
21405 /* Output code to copy (COUNT % MAX_SIZE) bytes from SRCPTR to DESTPTR.
21406 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
21408 expand_movmem_epilogue (rtx destmem
, rtx srcmem
,
21409 rtx destptr
, rtx srcptr
, rtx count
, int max_size
)
21412 if (CONST_INT_P (count
))
21414 HOST_WIDE_INT countval
= INTVAL (count
);
21417 int remainder_size
= countval
% max_size
;
21418 enum machine_mode move_mode
= Pmode
;
21420 /* Firstly, try to move data with the widest possible mode.
21421 Remaining part we'll move using Pmode and narrower modes. */
21424 if (max_size
>= GET_MODE_SIZE (V4SImode
))
21425 move_mode
= V4SImode
;
21426 else if (max_size
>= GET_MODE_SIZE (DImode
))
21427 move_mode
= DImode
;
21430 while (remainder_size
>= GET_MODE_SIZE (move_mode
))
21432 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, move_mode
, offset
);
21433 offset
+= GET_MODE_SIZE (move_mode
);
21434 remainder_size
-= GET_MODE_SIZE (move_mode
);
21437 /* Move the remaining part of epilogue - its size might be
21438 a size of the widest mode. */
21440 while (remainder_size
>= GET_MODE_SIZE (move_mode
))
21442 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, move_mode
, offset
);
21443 offset
+= GET_MODE_SIZE (move_mode
);
21444 remainder_size
-= GET_MODE_SIZE (move_mode
);
21447 if (remainder_size
>= 4)
21449 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
);
21451 remainder_size
-= 4;
21453 if (remainder_size
>= 2)
21455 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, HImode
, offset
);
21457 remainder_size
-= 2;
21459 if (remainder_size
>= 1)
21461 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, QImode
, offset
);
21463 remainder_size
-= 1;
21465 gcc_assert (remainder_size
== 0);
21470 count
= expand_simple_binop (GET_MODE (count
), AND
, count
, GEN_INT (max_size
- 1),
21471 count
, 1, OPTAB_DIRECT
);
21472 expand_set_or_movmem_via_loop (destmem
, srcmem
, destptr
, srcptr
, NULL
,
21473 count
, QImode
, 1, 4);
21477 /* When there are stringops, we can cheaply increase dest and src pointers.
21478 Otherwise we save code size by maintaining offset (zero is readily
21479 available from preceding rep operation) and using x86 addressing modes.
21481 if (TARGET_SINGLE_STRINGOP
)
21485 rtx label
= ix86_expand_aligntest (count
, 8, true);
21488 src
= change_address (srcmem
, DImode
, srcptr
);
21489 dest
= change_address (destmem
, DImode
, destptr
);
21490 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
21494 src
= change_address (srcmem
, SImode
, srcptr
);
21495 dest
= change_address (destmem
, SImode
, destptr
);
21496 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
21497 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
21499 emit_label (label
);
21500 LABEL_NUSES (label
) = 1;
21504 rtx label
= ix86_expand_aligntest (count
, 4, true);
21505 src
= change_address (srcmem
, SImode
, srcptr
);
21506 dest
= change_address (destmem
, SImode
, destptr
);
21507 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
21508 emit_label (label
);
21509 LABEL_NUSES (label
) = 1;
21513 rtx label
= ix86_expand_aligntest (count
, 2, true);
21514 src
= change_address (srcmem
, HImode
, srcptr
);
21515 dest
= change_address (destmem
, HImode
, destptr
);
21516 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
21517 emit_label (label
);
21518 LABEL_NUSES (label
) = 1;
21522 rtx label
= ix86_expand_aligntest (count
, 1, true);
21523 src
= change_address (srcmem
, QImode
, srcptr
);
21524 dest
= change_address (destmem
, QImode
, destptr
);
21525 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
21526 emit_label (label
);
21527 LABEL_NUSES (label
) = 1;
21532 rtx offset
= force_reg (Pmode
, const0_rtx
);
21537 rtx label
= ix86_expand_aligntest (count
, 8, true);
21540 src
= change_address (srcmem
, DImode
, srcptr
);
21541 dest
= change_address (destmem
, DImode
, destptr
);
21542 emit_move_insn (dest
, src
);
21543 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (8), NULL
,
21544 true, OPTAB_LIB_WIDEN
);
21548 src
= change_address (srcmem
, SImode
, srcptr
);
21549 dest
= change_address (destmem
, SImode
, destptr
);
21550 emit_move_insn (dest
, src
);
21551 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (4), NULL
,
21552 true, OPTAB_LIB_WIDEN
);
21554 emit_move_insn (offset
, tmp
);
21555 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (4), NULL
,
21556 true, OPTAB_LIB_WIDEN
);
21557 emit_move_insn (dest
, src
);
21560 emit_move_insn (offset
, tmp
);
21561 emit_label (label
);
21562 LABEL_NUSES (label
) = 1;
21566 rtx label
= ix86_expand_aligntest (count
, 4, true);
21567 src
= change_address (srcmem
, SImode
, srcptr
);
21568 dest
= change_address (destmem
, SImode
, destptr
);
21569 emit_move_insn (dest
, src
);
21570 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (4), NULL
,
21571 true, OPTAB_LIB_WIDEN
);
21573 emit_move_insn (offset
, tmp
);
21574 emit_label (label
);
21575 LABEL_NUSES (label
) = 1;
21579 rtx label
= ix86_expand_aligntest (count
, 2, true);
21580 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
21581 src
= change_address (srcmem
, HImode
, tmp
);
21582 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
21583 dest
= change_address (destmem
, HImode
, tmp
);
21584 emit_move_insn (dest
, src
);
21585 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (2), tmp
,
21586 true, OPTAB_LIB_WIDEN
);
21588 emit_move_insn (offset
, tmp
);
21589 emit_label (label
);
21590 LABEL_NUSES (label
) = 1;
21594 rtx label
= ix86_expand_aligntest (count
, 1, true);
21595 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
21596 src
= change_address (srcmem
, QImode
, tmp
);
21597 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
21598 dest
= change_address (destmem
, QImode
, tmp
);
21599 emit_move_insn (dest
, src
);
21600 emit_label (label
);
21601 LABEL_NUSES (label
) = 1;
21606 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
21608 expand_setmem_epilogue_via_loop (rtx destmem
, rtx destptr
, rtx value
,
21609 rtx count
, int max_size
)
21612 expand_simple_binop (counter_mode (count
), AND
, count
,
21613 GEN_INT (max_size
- 1), count
, 1, OPTAB_DIRECT
);
21614 expand_set_or_movmem_via_loop (destmem
, NULL
, destptr
, NULL
,
21615 gen_lowpart (QImode
, value
), count
, QImode
,
21619 /* Output code to set with VALUE at most (COUNT % MAX_SIZE) bytes starting from
21621 DESTMEM provides MEMrtx to feed proper aliasing info.
21622 PROMOTED_TO_GPR_VALUE is rtx representing a GPR containing broadcasted VALUE.
21623 PROMOTED_TO_VECTOR_VALUE is rtx representing a vector register containing
21625 PROMOTED_TO_GPR_VALUE and PROMOTED_TO_VECTOR_VALUE could be NULL if the
21626 promotion hasn't been generated before. */
21628 expand_setmem_epilogue (rtx destmem
, rtx destptr
, rtx promoted_to_vector_value
,
21629 rtx promoted_to_gpr_value
, rtx value
, rtx count
,
21632 if (CONST_INT_P (count
))
21634 HOST_WIDE_INT countval
= INTVAL (count
);
21637 int remainder_size
= countval
% max_size
;
21638 enum machine_mode move_mode
= Pmode
;
21640 /* Firstly, try to move data with the widest possible mode.
21641 Remaining part we'll move using Pmode and narrower modes. */
21643 if (promoted_to_vector_value
)
21645 if (promoted_to_vector_value
)
21647 if (max_size
>= GET_MODE_SIZE (V4SImode
))
21648 move_mode
= V4SImode
;
21649 else if (max_size
>= GET_MODE_SIZE (DImode
))
21650 move_mode
= DImode
;
21652 while (remainder_size
>= GET_MODE_SIZE (move_mode
))
21654 if (GET_MODE (destmem
) != move_mode
)
21655 destmem
= adjust_automodify_address_nv (destmem
, move_mode
,
21657 emit_strset (destmem
,
21658 promoted_to_vector_value
,
21660 move_mode
, offset
);
21662 offset
+= GET_MODE_SIZE (move_mode
);
21663 remainder_size
-= GET_MODE_SIZE (move_mode
);
21667 /* Move the remaining part of epilogue - its size might be
21668 a size of the widest mode. */
21669 while (remainder_size
>= GET_MODE_SIZE (Pmode
))
21671 if (!promoted_to_gpr_value
)
21672 promoted_to_gpr_value
= promote_duplicated_reg (Pmode
, value
);
21673 emit_strset (destmem
, promoted_to_gpr_value
, destptr
, Pmode
, offset
);
21674 offset
+= GET_MODE_SIZE (Pmode
);
21675 remainder_size
-= GET_MODE_SIZE (Pmode
);
21678 if (!promoted_to_gpr_value
&& remainder_size
> 1)
21679 promoted_to_gpr_value
= promote_duplicated_reg (remainder_size
>= 4
21680 ? SImode
: HImode
, value
);
21681 if (remainder_size
>= 4)
21683 emit_strset (destmem
, gen_lowpart (SImode
, promoted_to_gpr_value
), destptr
,
21686 remainder_size
-= 4;
21688 if (remainder_size
>= 2)
21690 emit_strset (destmem
, gen_lowpart (HImode
, promoted_to_gpr_value
), destptr
,
21693 remainder_size
-= 2;
21695 if (remainder_size
>= 1)
21697 emit_strset (destmem
,
21698 promoted_to_gpr_value
? gen_lowpart (QImode
, promoted_to_gpr_value
) : value
,
21702 remainder_size
-= 1;
21704 gcc_assert (remainder_size
== 0);
21708 /* count isn't const. */
21711 expand_setmem_epilogue_via_loop (destmem
, destptr
, value
, count
,
21716 if (!promoted_to_gpr_value
)
21717 promoted_to_gpr_value
= promote_duplicated_reg_to_size (value
,
21718 GET_MODE_SIZE (Pmode
),
21719 GET_MODE_SIZE (Pmode
),
21720 GET_MODE_SIZE (Pmode
));
21724 rtx label
= ix86_expand_aligntest (count
, 16, true);
21725 if (TARGET_SSE
&& promoted_to_vector_value
)
21727 destmem
= change_address (destmem
,
21728 GET_MODE (promoted_to_vector_value
),
21730 emit_insn (gen_strset (destptr
, destmem
, promoted_to_vector_value
));
21732 else if (TARGET_64BIT
)
21734 destmem
= change_address (destmem
, DImode
, destptr
);
21735 emit_insn (gen_strset (destptr
, destmem
, promoted_to_gpr_value
));
21736 emit_insn (gen_strset (destptr
, destmem
, promoted_to_gpr_value
));
21740 destmem
= change_address (destmem
, SImode
, destptr
);
21741 emit_insn (gen_strset (destptr
, destmem
, promoted_to_gpr_value
));
21742 emit_insn (gen_strset (destptr
, destmem
, promoted_to_gpr_value
));
21743 emit_insn (gen_strset (destptr
, destmem
, promoted_to_gpr_value
));
21744 emit_insn (gen_strset (destptr
, destmem
, promoted_to_gpr_value
));
21746 emit_label (label
);
21747 LABEL_NUSES (label
) = 1;
21751 rtx label
= ix86_expand_aligntest (count
, 8, true);
21754 destmem
= change_address (destmem
, DImode
, destptr
);
21755 emit_insn (gen_strset (destptr
, destmem
, promoted_to_gpr_value
));
21757 /* FIXME: When this hunk it output, IRA classifies promoted_to_vector_value
21759 else if (TARGET_SSE
&& promoted_to_vector_value
&& 0)
21761 destmem
= change_address (destmem
, V2SImode
, destptr
);
21762 emit_insn (gen_strset (destptr
, destmem
,
21763 gen_lowpart (V2SImode
, promoted_to_vector_value
)));
21767 destmem
= change_address (destmem
, SImode
, destptr
);
21768 emit_insn (gen_strset (destptr
, destmem
, promoted_to_gpr_value
));
21769 emit_insn (gen_strset (destptr
, destmem
, promoted_to_gpr_value
));
21771 emit_label (label
);
21772 LABEL_NUSES (label
) = 1;
21776 rtx label
= ix86_expand_aligntest (count
, 4, true);
21777 destmem
= change_address (destmem
, SImode
, destptr
);
21778 emit_insn (gen_strset (destptr
, destmem
,
21779 gen_lowpart (SImode
, promoted_to_gpr_value
)));
21780 emit_label (label
);
21781 LABEL_NUSES (label
) = 1;
21785 rtx label
= ix86_expand_aligntest (count
, 2, true);
21786 destmem
= change_address (destmem
, HImode
, destptr
);
21787 emit_insn (gen_strset (destptr
, destmem
,
21788 gen_lowpart (HImode
, promoted_to_gpr_value
)));
21789 emit_label (label
);
21790 LABEL_NUSES (label
) = 1;
21794 rtx label
= ix86_expand_aligntest (count
, 1, true);
21795 destmem
= change_address (destmem
, QImode
, destptr
);
21796 emit_insn (gen_strset (destptr
, destmem
,
21797 gen_lowpart (QImode
, promoted_to_gpr_value
)));
21798 emit_label (label
);
21799 LABEL_NUSES (label
) = 1;
21803 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
21804 DESIRED_ALIGNMENT. */
21806 expand_movmem_prologue (rtx destmem
, rtx srcmem
,
21807 rtx destptr
, rtx srcptr
, rtx count
,
21808 int align
, int desired_alignment
)
21810 if (align
<= 1 && desired_alignment
> 1)
21812 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
21813 srcmem
= adjust_automodify_address_nv (srcmem
, QImode
, srcptr
, 0);
21814 destmem
= adjust_automodify_address_nv (destmem
, QImode
, destptr
, 0);
21815 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
21816 ix86_adjust_counter (count
, 1);
21817 emit_label (label
);
21818 LABEL_NUSES (label
) = 1;
21820 if (align
<= 2 && desired_alignment
> 2)
21822 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
21823 srcmem
= adjust_automodify_address_nv (srcmem
, HImode
, srcptr
, 0);
21824 destmem
= adjust_automodify_address_nv (destmem
, HImode
, destptr
, 0);
21825 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
21826 ix86_adjust_counter (count
, 2);
21827 emit_label (label
);
21828 LABEL_NUSES (label
) = 1;
21830 if (align
<= 4 && desired_alignment
> 4)
21832 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
21833 srcmem
= adjust_automodify_address_nv (srcmem
, SImode
, srcptr
, 0);
21834 destmem
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, 0);
21835 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
21836 ix86_adjust_counter (count
, 4);
21837 emit_label (label
);
21838 LABEL_NUSES (label
) = 1;
21840 if (align
<= 8 && desired_alignment
> 8)
21842 rtx label
= ix86_expand_aligntest (destptr
, 8, false);
21843 if (TARGET_64BIT
|| TARGET_SSE
)
21845 srcmem
= adjust_automodify_address_nv (srcmem
, DImode
, srcptr
, 0);
21846 destmem
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, 0);
21847 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
21851 srcmem
= adjust_automodify_address_nv (srcmem
, SImode
, srcptr
, 0);
21852 destmem
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, 0);
21853 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
21854 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
21856 ix86_adjust_counter (count
, 8);
21857 emit_label (label
);
21858 LABEL_NUSES (label
) = 1;
21860 gcc_assert (desired_alignment
<= 16);
21863 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
21864 ALIGN_BYTES is how many bytes need to be copied. */
21866 expand_constant_movmem_prologue (rtx dst
, rtx
*srcp
, rtx destreg
, rtx srcreg
,
21867 int desired_align
, int align_bytes
)
21870 rtx orig_dst
= dst
;
21871 rtx orig_src
= src
;
21873 int src_align_bytes
= get_mem_align_offset (src
, desired_align
* BITS_PER_UNIT
);
21874 if (src_align_bytes
>= 0)
21875 src_align_bytes
= desired_align
- src_align_bytes
;
21876 if (align_bytes
& 1)
21878 dst
= adjust_automodify_address_nv (dst
, QImode
, destreg
, 0);
21879 src
= adjust_automodify_address_nv (src
, QImode
, srcreg
, 0);
21881 emit_insn (gen_strmov (destreg
, dst
, srcreg
, src
));
21883 if (align_bytes
& 2)
21885 dst
= adjust_automodify_address_nv (dst
, HImode
, destreg
, off
);
21886 src
= adjust_automodify_address_nv (src
, HImode
, srcreg
, off
);
21887 if (MEM_ALIGN (dst
) < 2 * BITS_PER_UNIT
)
21888 set_mem_align (dst
, 2 * BITS_PER_UNIT
);
21889 if (src_align_bytes
>= 0
21890 && (src_align_bytes
& 1) == (align_bytes
& 1)
21891 && MEM_ALIGN (src
) < 2 * BITS_PER_UNIT
)
21892 set_mem_align (src
, 2 * BITS_PER_UNIT
);
21894 emit_insn (gen_strmov (destreg
, dst
, srcreg
, src
));
21896 if (align_bytes
& 4)
21898 dst
= adjust_automodify_address_nv (dst
, SImode
, destreg
, off
);
21899 src
= adjust_automodify_address_nv (src
, SImode
, srcreg
, off
);
21900 if (MEM_ALIGN (dst
) < 4 * BITS_PER_UNIT
)
21901 set_mem_align (dst
, 4 * BITS_PER_UNIT
);
21902 if (src_align_bytes
>= 0)
21904 unsigned int src_align
= 0;
21905 if ((src_align_bytes
& 3) == (align_bytes
& 3))
21907 else if ((src_align_bytes
& 1) == (align_bytes
& 1))
21909 if (MEM_ALIGN (src
) < src_align
* BITS_PER_UNIT
)
21910 set_mem_align (src
, src_align
* BITS_PER_UNIT
);
21913 emit_insn (gen_strmov (destreg
, dst
, srcreg
, src
));
21915 if (align_bytes
& 8)
21917 if (TARGET_64BIT
|| TARGET_SSE
)
21919 dst
= adjust_automodify_address_nv (dst
, DImode
, destreg
, off
);
21920 src
= adjust_automodify_address_nv (src
, DImode
, srcreg
, off
);
21921 emit_insn (gen_strmov (destreg
, dst
, srcreg
, src
));
21925 dst
= adjust_automodify_address_nv (dst
, SImode
, destreg
, off
);
21926 src
= adjust_automodify_address_nv (src
, SImode
, srcreg
, off
);
21927 emit_insn (gen_strmov (destreg
, dst
, srcreg
, src
));
21928 emit_insn (gen_strmov (destreg
, dst
, srcreg
, src
));
21930 if (MEM_ALIGN (dst
) < 8 * BITS_PER_UNIT
)
21931 set_mem_align (dst
, 8 * BITS_PER_UNIT
);
21932 if (src_align_bytes
>= 0)
21934 unsigned int src_align
= 0;
21935 if ((src_align_bytes
& 7) == (align_bytes
& 7))
21937 else if ((src_align_bytes
& 3) == (align_bytes
& 3))
21939 else if ((src_align_bytes
& 1) == (align_bytes
& 1))
21941 if (MEM_ALIGN (src
) < src_align
* BITS_PER_UNIT
)
21942 set_mem_align (src
, src_align
* BITS_PER_UNIT
);
21946 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
, off
);
21947 src
= adjust_automodify_address_nv (src
, BLKmode
, srcreg
, off
);
21948 if (MEM_ALIGN (dst
) < (unsigned int) desired_align
* BITS_PER_UNIT
)
21949 set_mem_align (dst
, desired_align
* BITS_PER_UNIT
);
21950 if (src_align_bytes
>= 0)
21952 unsigned int src_align
= 0;
21953 if ((src_align_bytes
& 15) == (align_bytes
& 15))
21955 else if ((src_align_bytes
& 7) == (align_bytes
& 7))
21957 else if ((src_align_bytes
& 3) == (align_bytes
& 3))
21959 else if ((src_align_bytes
& 1) == (align_bytes
& 1))
21961 if (src_align
> (unsigned int) desired_align
)
21962 src_align
= desired_align
;
21963 if (MEM_ALIGN (src
) < src_align
* BITS_PER_UNIT
)
21964 set_mem_align (src
, src_align
* BITS_PER_UNIT
);
21966 if (MEM_SIZE_KNOWN_P (orig_dst
))
21967 set_mem_size (dst
, MEM_SIZE (orig_dst
) - align_bytes
);
21968 if (MEM_SIZE_KNOWN_P (orig_src
))
21969 set_mem_size (src
, MEM_SIZE (orig_src
) - align_bytes
);
21974 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
21975 DESIRED_ALIGNMENT. */
21977 expand_setmem_prologue (rtx destmem
, rtx destptr
, rtx value
, rtx count
,
21978 int align
, int desired_alignment
)
21980 if (align
<= 1 && desired_alignment
> 1)
21982 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
21983 destmem
= adjust_automodify_address_nv (destmem
, QImode
, destptr
, 0);
21984 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (QImode
, value
)));
21985 ix86_adjust_counter (count
, 1);
21986 emit_label (label
);
21987 LABEL_NUSES (label
) = 1;
21989 if (align
<= 2 && desired_alignment
> 2)
21991 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
21992 destmem
= adjust_automodify_address_nv (destmem
, HImode
, destptr
, 0);
21993 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (HImode
, value
)));
21994 ix86_adjust_counter (count
, 2);
21995 emit_label (label
);
21996 LABEL_NUSES (label
) = 1;
21998 if (align
<= 4 && desired_alignment
> 4)
22000 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
22001 destmem
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, 0);
22002 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (SImode
, value
)));
22003 ix86_adjust_counter (count
, 4);
22004 emit_label (label
);
22005 LABEL_NUSES (label
) = 1;
22007 if (align
<= 8 && desired_alignment
> 8)
22009 rtx label
= ix86_expand_aligntest (destptr
, 8, false);
22010 destmem
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, 0);
22011 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (SImode
, value
)));
22012 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (SImode
, value
)));
22013 ix86_adjust_counter (count
, 8);
22014 emit_label (label
);
22015 LABEL_NUSES (label
) = 1;
22017 gcc_assert (desired_alignment
<= 16);
22020 /* Set enough from DST to align DST known to by aligned by ALIGN to
22021 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
22023 expand_constant_setmem_prologue (rtx dst
, rtx destreg
, rtx value
,
22024 int desired_align
, int align_bytes
)
22027 rtx orig_dst
= dst
;
22028 if (align_bytes
& 1)
22030 dst
= adjust_automodify_address_nv (dst
, QImode
, destreg
, 0);
22032 emit_insn (gen_strset (destreg
, dst
,
22033 gen_lowpart (QImode
, value
)));
22035 if (align_bytes
& 2)
22037 dst
= adjust_automodify_address_nv (dst
, HImode
, destreg
, off
);
22038 if (MEM_ALIGN (dst
) < 2 * BITS_PER_UNIT
)
22039 set_mem_align (dst
, 2 * BITS_PER_UNIT
);
22041 emit_insn (gen_strset (destreg
, dst
,
22042 gen_lowpart (HImode
, value
)));
22044 if (align_bytes
& 4)
22046 dst
= adjust_automodify_address_nv (dst
, SImode
, destreg
, off
);
22047 if (MEM_ALIGN (dst
) < 4 * BITS_PER_UNIT
)
22048 set_mem_align (dst
, 4 * BITS_PER_UNIT
);
22050 emit_insn (gen_strset (destreg
, dst
,
22051 gen_lowpart (SImode
, value
)));
22053 if (align_bytes
& 8)
22055 dst
= adjust_automodify_address_nv (dst
, SImode
, destreg
, off
);
22056 emit_insn (gen_strset (destreg
, dst
,
22057 gen_lowpart (SImode
, value
)));
22059 dst
= adjust_automodify_address_nv (dst
, SImode
, destreg
, off
);
22060 emit_insn (gen_strset (destreg
, dst
,
22061 gen_lowpart (SImode
, value
)));
22062 if (MEM_ALIGN (dst
) < 8 * BITS_PER_UNIT
)
22063 set_mem_align (dst
, 8 * BITS_PER_UNIT
);
22066 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
, off
);
22067 if (MEM_ALIGN (dst
) < (unsigned int) desired_align
* BITS_PER_UNIT
)
22068 set_mem_align (dst
, desired_align
* BITS_PER_UNIT
);
22069 if (MEM_SIZE_KNOWN_P (orig_dst
))
22070 set_mem_size (dst
, MEM_SIZE (orig_dst
) - align_bytes
);
22074 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
22075 static enum stringop_alg
22076 decide_alg (HOST_WIDE_INT count
, HOST_WIDE_INT expected_size
, bool memset
,
22077 int *dynamic_check
, bool align_unknown
)
22079 const struct stringop_algs
* algs
;
22080 bool optimize_for_speed
;
22081 /* Algorithms using the rep prefix want at least edi and ecx;
22082 additionally, memset wants eax and memcpy wants esi. Don't
22083 consider such algorithms if the user has appropriated those
22084 registers for their own purposes. */
22085 bool rep_prefix_usable
= !(fixed_regs
[CX_REG
] || fixed_regs
[DI_REG
]
22087 ? fixed_regs
[AX_REG
] : fixed_regs
[SI_REG
]));
22089 #define ALG_USABLE_P(alg) ((rep_prefix_usable \
22090 || (alg != rep_prefix_1_byte \
22091 && alg != rep_prefix_4_byte \
22092 && alg != rep_prefix_8_byte)) \
22093 && (TARGET_SSE2 || alg != sse_loop))
22094 const struct processor_costs
*cost
;
22096 /* Even if the string operation call is cold, we still might spend a lot
22097 of time processing large blocks. */
22098 if (optimize_function_for_size_p (cfun
)
22099 || (optimize_insn_for_size_p ()
22100 && expected_size
!= -1 && expected_size
< 256))
22101 optimize_for_speed
= false;
22103 optimize_for_speed
= true;
22106 return (rep_prefix_usable
? rep_prefix_1_byte
: libcall
);
22108 cost
= optimize_for_speed
? ix86_cost
: &ix86_size_cost
;
22110 *dynamic_check
= -1;
22112 algs
= &cost
->memset
[align_unknown
][TARGET_64BIT
!= 0];
22114 algs
= &cost
->memcpy
[align_unknown
][TARGET_64BIT
!= 0];
22115 if (ix86_stringop_alg
!= no_stringop
&& ALG_USABLE_P (ix86_stringop_alg
))
22116 return ix86_stringop_alg
;
22117 /* rep; movq or rep; movl is the smallest variant. */
22118 else if (!optimize_for_speed
)
22120 if (!count
|| (count
& 3) || memset
)
22121 return rep_prefix_usable
? rep_prefix_1_byte
: libcall
;
22123 return rep_prefix_usable
? rep_prefix_4_byte
: libcall
;
22125 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
22127 else if (expected_size
!= -1 && expected_size
< 4)
22128 return loop_1_byte
;
22129 else if (expected_size
!= -1)
22132 enum stringop_alg alg
= libcall
;
22133 for (i
= 0; i
< MAX_STRINGOP_ALGS
; i
++)
22135 /* We get here if the algorithms that were not libcall-based
22136 were rep-prefix based and we are unable to use rep prefixes
22137 based on global register usage. Break out of the loop and
22138 use the heuristic below. */
22139 if (algs
->size
[i
].max
== 0)
22141 if (algs
->size
[i
].max
>= expected_size
|| algs
->size
[i
].max
== -1)
22143 enum stringop_alg candidate
= algs
->size
[i
].alg
;
22145 if (candidate
!= libcall
&& ALG_USABLE_P (candidate
))
22147 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
22148 last non-libcall inline algorithm. */
22149 if (TARGET_INLINE_ALL_STRINGOPS
)
22151 /* When the current size is best to be copied by a libcall,
22152 but we are still forced to inline, run the heuristic below
22153 that will pick code for medium sized blocks. */
22154 if (alg
!= libcall
)
22158 else if (ALG_USABLE_P (candidate
))
22162 gcc_assert (TARGET_INLINE_ALL_STRINGOPS
|| !rep_prefix_usable
);
22164 /* When asked to inline the call anyway, try to pick meaningful choice.
22165 We look for maximal size of block that is faster to copy by hand and
22166 take blocks of at most of that size guessing that average size will
22167 be roughly half of the block.
22169 If this turns out to be bad, we might simply specify the preferred
22170 choice in ix86_costs. */
22171 if ((TARGET_INLINE_ALL_STRINGOPS
|| TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
22172 && (algs
->unknown_size
== libcall
|| !ALG_USABLE_P (algs
->unknown_size
)))
22175 enum stringop_alg alg
;
22177 bool only_libcall_fits
= true;
22179 for (i
= 0; i
< MAX_STRINGOP_ALGS
; i
++)
22181 enum stringop_alg candidate
= algs
->size
[i
].alg
;
22183 if (candidate
!= libcall
&& candidate
22184 && ALG_USABLE_P (candidate
))
22186 max
= algs
->size
[i
].max
;
22187 only_libcall_fits
= false;
22190 /* If there aren't any usable algorithms, then recursing on
22191 smaller sizes isn't going to find anything. Just return the
22192 simple byte-at-a-time copy loop. */
22193 if (only_libcall_fits
)
22195 /* Pick something reasonable. */
22196 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
22197 *dynamic_check
= 128;
22198 return loop_1_byte
;
22202 alg
= decide_alg (count
, max
/ 2, memset
, dynamic_check
, align_unknown
);
22203 gcc_assert (*dynamic_check
== -1);
22204 gcc_assert (alg
!= libcall
);
22205 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
22206 *dynamic_check
= max
;
22209 return ALG_USABLE_P (algs
->unknown_size
) ? algs
->unknown_size
: libcall
;
22210 #undef ALG_USABLE_P
22213 /* Decide on alignment. We know that the operand is already aligned to ALIGN
22214 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
22216 decide_alignment (int align
,
22217 enum stringop_alg alg
,
22220 int desired_align
= 0;
22224 gcc_unreachable ();
22226 desired_align
= GET_MODE_SIZE (Pmode
);
22228 case unrolled_loop
:
22229 desired_align
= GET_MODE_SIZE (Pmode
);
22232 desired_align
= 16;
22234 case rep_prefix_8_byte
:
22237 case rep_prefix_4_byte
:
22238 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
22239 copying whole cacheline at once. */
22240 if (TARGET_PENTIUMPRO
)
22245 case rep_prefix_1_byte
:
22246 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
22247 copying whole cacheline at once. */
22248 if (TARGET_PENTIUMPRO
)
22262 if (desired_align
< align
)
22263 desired_align
= align
;
22264 if (expected_size
!= -1 && expected_size
< 4)
22265 desired_align
= align
;
22266 return desired_align
;
22269 /* Return the smallest power of 2 greater than VAL. */
22271 smallest_pow2_greater_than (int val
)
22279 /* Expand string move (memcpy) operation. Use i386 string operations
22280 when profitable. expand_setmem contains similar code. The code
22281 depends upon architecture, block size and alignment, but always has
22282 the same overall structure:
22284 1) Prologue guard: Conditional that jumps up to epilogues for small
22285 blocks that can be handled by epilogue alone. This is faster
22286 but also needed for correctness, since prologue assume the block
22287 is larger than the desired alignment.
22289 Optional dynamic check for size and libcall for large
22290 blocks is emitted here too, with -minline-stringops-dynamically.
22292 2) Prologue: copy first few bytes in order to get destination
22293 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
22294 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
22295 copied. We emit either a jump tree on power of two sized
22296 blocks, or a byte loop.
22298 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
22299 with specified algorithm.
22301 4) Epilogue: code copying tail of the block that is too small to be
22302 handled by main body (or up to size guarded by prologue guard). */
22305 ix86_expand_movmem (rtx dst
, rtx src
, rtx count_exp
, rtx align_exp
,
22306 rtx expected_align_exp
, rtx expected_size_exp
)
22312 rtx jump_around_label
= NULL
;
22313 HOST_WIDE_INT align
= 1;
22314 unsigned HOST_WIDE_INT count
= 0;
22315 HOST_WIDE_INT expected_size
= -1;
22316 int size_needed
= 0, epilogue_size_needed
;
22317 int desired_align
= 0, align_bytes
= 0;
22318 enum stringop_alg alg
;
22320 bool need_zero_guard
= false;
22321 bool align_unknown
;
22322 unsigned int unroll_factor
;
22323 enum machine_mode move_mode
;
22324 rtx loop_iter
= NULL_RTX
;
22325 int dst_offset
, src_offset
;
22327 if (CONST_INT_P (align_exp
))
22328 align
= INTVAL (align_exp
);
22329 /* i386 can do misaligned access on reasonably increased cost. */
22330 if (CONST_INT_P (expected_align_exp
)
22331 && INTVAL (expected_align_exp
) > align
)
22332 align
= INTVAL (expected_align_exp
);
22333 /* ALIGN is the minimum of destination and source alignment, but we care here
22334 just about destination alignment. */
22335 else if (MEM_ALIGN (dst
) > (unsigned HOST_WIDE_INT
) align
* BITS_PER_UNIT
)
22336 align
= MEM_ALIGN (dst
) / BITS_PER_UNIT
;
22338 if (CONST_INT_P (count_exp
))
22339 count
= expected_size
= INTVAL (count_exp
);
22340 if (CONST_INT_P (expected_size_exp
) && count
== 0)
22341 expected_size
= INTVAL (expected_size_exp
);
22343 /* Make sure we don't need to care about overflow later on. */
22344 if (count
> ((unsigned HOST_WIDE_INT
) 1 << 30))
22347 /* Step 0: Decide on preferred algorithm, desired alignment and
22348 size of chunks to be copied by main loop. */
22349 dst_offset
= get_mem_align_offset (dst
, MOVE_MAX
*BITS_PER_UNIT
);
22350 src_offset
= get_mem_align_offset (src
, MOVE_MAX
*BITS_PER_UNIT
);
22351 align_unknown
= (dst_offset
< 0
22353 || src_offset
!= dst_offset
);
22354 alg
= decide_alg (count
, expected_size
, false, &dynamic_check
, align_unknown
);
22355 desired_align
= decide_alignment (align
, alg
, expected_size
);
22357 desired_align
= align
;
22361 if (!TARGET_ALIGN_STRINGOPS
)
22362 align
= desired_align
;
22364 if (alg
== libcall
)
22366 gcc_assert (alg
!= no_stringop
);
22368 count_exp
= copy_to_mode_reg (GET_MODE (count_exp
), count_exp
);
22369 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
22370 srcreg
= copy_to_mode_reg (Pmode
, XEXP (src
, 0));
22375 gcc_unreachable ();
22377 need_zero_guard
= true;
22380 size_needed
= GET_MODE_SIZE (move_mode
) * unroll_factor
;
22382 case unrolled_loop
:
22383 need_zero_guard
= true;
22386 /* Select maximal available 1,2 or 4 unroll factor.
22387 In 32bit we can not afford to use 4 registers inside the loop. */
22389 unroll_factor
= TARGET_64BIT
? 4 : 2;
22391 while (GET_MODE_SIZE (move_mode
) * unroll_factor
* 2 < count
22392 && unroll_factor
< (TARGET_64BIT
? 4 :2))
22393 unroll_factor
*= 2;
22394 size_needed
= GET_MODE_SIZE (move_mode
) * unroll_factor
;
22397 need_zero_guard
= true;
22398 /* Use SSE instructions, if possible. */
22399 move_mode
= V4SImode
;
22400 /* Select maximal available 1,2 or 4 unroll factor. */
22404 while (GET_MODE_SIZE (move_mode
) * unroll_factor
* 2 < count
22405 && unroll_factor
< 4)
22406 unroll_factor
*= 2;
22407 size_needed
= GET_MODE_SIZE (move_mode
) * unroll_factor
;
22409 case rep_prefix_8_byte
:
22412 case rep_prefix_4_byte
:
22415 case rep_prefix_1_byte
:
22419 need_zero_guard
= true;
22424 epilogue_size_needed
= size_needed
;
22426 /* Step 1: Prologue guard. */
22428 /* Alignment code needs count to be in register. */
22429 if (CONST_INT_P (count_exp
) && desired_align
> align
)
22431 if (INTVAL (count_exp
) > desired_align
22432 && INTVAL (count_exp
) > size_needed
)
22435 = get_mem_align_offset (dst
, desired_align
* BITS_PER_UNIT
);
22436 if (align_bytes
<= 0)
22439 align_bytes
= desired_align
- align_bytes
;
22441 if (align_bytes
== 0)
22442 count_exp
= force_reg (counter_mode (count_exp
), count_exp
);
22444 gcc_assert (desired_align
>= 1 && align
>= 1);
22446 /* Ensure that alignment prologue won't copy past end of block. */
22447 if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
22449 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
22450 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
22451 Make sure it is power of 2. */
22452 epilogue_size_needed
= smallest_pow2_greater_than (epilogue_size_needed
);
22456 if (count
< (unsigned HOST_WIDE_INT
)epilogue_size_needed
)
22458 /* If main algorithm works on QImode, no epilogue is needed.
22459 For small sizes just don't align anything. */
22460 if (size_needed
== 1)
22461 desired_align
= align
;
22468 /* SSE and unrolled algs re-use iteration counter in the epilogue. */
22469 if (alg
== sse_loop
|| alg
== unrolled_loop
)
22471 loop_iter
= gen_reg_rtx (counter_mode (count_exp
));
22472 emit_move_insn (loop_iter
, const0_rtx
);
22474 label
= gen_label_rtx ();
22475 emit_cmp_and_jump_insns (count_exp
,
22476 GEN_INT (epilogue_size_needed
),
22477 LTU
, 0, counter_mode (count_exp
), 1, label
);
22478 if (expected_size
== -1 || expected_size
< epilogue_size_needed
)
22479 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
22481 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
22485 /* Emit code to decide on runtime whether library call or inline should be
22487 if (dynamic_check
!= -1)
22489 if (CONST_INT_P (count_exp
))
22491 if (UINTVAL (count_exp
) >= (unsigned HOST_WIDE_INT
)dynamic_check
)
22493 emit_block_move_via_libcall (dst
, src
, count_exp
, false);
22494 count_exp
= const0_rtx
;
22500 rtx hot_label
= gen_label_rtx ();
22501 jump_around_label
= gen_label_rtx ();
22502 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
22503 LEU
, 0, GET_MODE (count_exp
), 1, hot_label
);
22504 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
22505 emit_block_move_via_libcall (dst
, src
, count_exp
, false);
22506 emit_jump (jump_around_label
);
22507 emit_label (hot_label
);
22511 /* Step 2: Alignment prologue. */
22513 if (desired_align
> align
)
22515 if (align_bytes
== 0)
22517 /* Except for the first move in epilogue, we no longer know
22518 constant offset in aliasing info. It don't seems to worth
22519 the pain to maintain it for the first move, so throw away
22521 src
= change_address (src
, BLKmode
, srcreg
);
22522 dst
= change_address (dst
, BLKmode
, destreg
);
22523 expand_movmem_prologue (dst
, src
, destreg
, srcreg
, count_exp
, align
,
22525 set_mem_align (src
, desired_align
*BITS_PER_UNIT
);
22526 set_mem_align (dst
, desired_align
*BITS_PER_UNIT
);
22530 /* If we know how many bytes need to be stored before dst is
22531 sufficiently aligned, maintain aliasing info accurately. */
22532 dst
= expand_constant_movmem_prologue (dst
, &src
, destreg
, srcreg
,
22533 desired_align
, align_bytes
);
22534 count_exp
= plus_constant (count_exp
, -align_bytes
);
22535 count
-= align_bytes
;
22537 if (need_zero_guard
22538 && (count
< (unsigned HOST_WIDE_INT
) size_needed
22539 || (align_bytes
== 0
22540 && count
< ((unsigned HOST_WIDE_INT
) size_needed
22541 + desired_align
- align
))))
22543 /* It is possible that we copied enough so the main loop will not
22545 gcc_assert (size_needed
> 1);
22546 if (label
== NULL_RTX
)
22547 label
= gen_label_rtx ();
22548 emit_cmp_and_jump_insns (count_exp
,
22549 GEN_INT (size_needed
),
22550 LTU
, 0, counter_mode (count_exp
), 1, label
);
22551 if (expected_size
== -1
22552 || expected_size
< (desired_align
- align
) / 2 + size_needed
)
22553 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
22555 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
22558 if (label
&& size_needed
== 1)
22560 emit_label (label
);
22561 LABEL_NUSES (label
) = 1;
22563 epilogue_size_needed
= 1;
22565 else if (label
== NULL_RTX
)
22566 epilogue_size_needed
= size_needed
;
22568 /* Step 3: Main loop. */
22574 gcc_unreachable ();
22576 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
22577 count_exp
, QImode
, 1, expected_size
);
22580 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
22581 count_exp
, Pmode
, 1, expected_size
);
22584 case unrolled_loop
:
22585 /* In some cases we want to use the same iterator in several adjacent
22586 loops, so here we save loop iterator rtx and don't update addresses. */
22587 loop_iter
= expand_set_or_movmem_via_loop_with_iter (dst
, src
, destreg
,
22589 count_exp
, loop_iter
,
22592 expected_size
, false);
22594 case rep_prefix_8_byte
:
22595 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
22598 case rep_prefix_4_byte
:
22599 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
22602 case rep_prefix_1_byte
:
22603 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
22607 /* Adjust properly the offset of src and dest memory for aliasing. */
22608 if (CONST_INT_P (count_exp
))
22610 src
= adjust_automodify_address_nv (src
, BLKmode
, srcreg
,
22611 (count
/ size_needed
) * size_needed
);
22612 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
22613 (count
/ size_needed
) * size_needed
);
22617 src
= change_address (src
, BLKmode
, srcreg
);
22618 dst
= change_address (dst
, BLKmode
, destreg
);
22621 /* Step 4: Epilogue to copy the remaining bytes. */
22625 /* When the main loop is done, COUNT_EXP might hold original count,
22626 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
22627 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
22628 bytes. Compensate if needed. */
22630 if (size_needed
< epilogue_size_needed
)
22633 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
22634 GEN_INT (size_needed
- 1), count_exp
, 1,
22636 if (tmp
!= count_exp
)
22637 emit_move_insn (count_exp
, tmp
);
22639 emit_label (label
);
22640 LABEL_NUSES (label
) = 1;
22643 /* We haven't updated addresses, so we'll do it now.
22644 Also, if the epilogue seems to be big, we'll generate a loop (not
22645 unrolled) in it. We'll do it only if alignment is unknown, because in
22646 this case in epilogue we have to perform memmove by bytes, which is very
22648 if (alg
== sse_loop
|| alg
== unrolled_loop
)
22651 int remainder_size
= epilogue_size_needed
;
22653 /* We may not need the epilgoue loop at all when the count is known
22654 and alignment is not adjusted. */
22655 if (count
&& desired_align
<= align
)
22656 remainder_size
= count
% epilogue_size_needed
;
22657 if (remainder_size
> 31)
22659 /* Reduce epilogue's size by creating not-unrolled loop. If we won't
22660 do this, we can have very big epilogue - when alignment is statically
22661 unknown we'll have the epilogue byte by byte which may be very slow. */
22662 loop_iter
= expand_set_or_movmem_via_loop_with_iter (dst
, src
, destreg
,
22663 srcreg
, NULL
, count_exp
,
22664 loop_iter
, move_mode
, 1,
22665 expected_size
, false);
22666 src
= change_address (src
, BLKmode
, srcreg
);
22667 dst
= change_address (dst
, BLKmode
, destreg
);
22668 epilogue_size_needed
= GET_MODE_SIZE (move_mode
);
22670 tmp
= expand_simple_binop (Pmode
, PLUS
, destreg
, loop_iter
, destreg
,
22671 true, OPTAB_LIB_WIDEN
);
22672 if (tmp
!= destreg
)
22673 emit_move_insn (destreg
, tmp
);
22675 tmp
= expand_simple_binop (Pmode
, PLUS
, srcreg
, loop_iter
, srcreg
,
22676 true, OPTAB_LIB_WIDEN
);
22678 emit_move_insn (srcreg
, tmp
);
22680 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
22681 expand_movmem_epilogue (dst
, src
, destreg
, srcreg
, count_exp
,
22682 epilogue_size_needed
);
22684 if (jump_around_label
)
22685 emit_label (jump_around_label
);
22689 /* Helper function for memcpy. For QImode value 0xXY produce
22690 0xXYXYXYXY of wide specified by MODE. This is essentially
22691 a * 0x10101010, but we can do slightly better than
22692 synth_mult by unwinding the sequence by hand on CPUs with
22695 promote_duplicated_reg (enum machine_mode mode
, rtx val
)
22697 enum machine_mode valmode
= GET_MODE (val
);
22699 int nops
= mode
== DImode
? 3 : 2;
22701 if (VECTOR_MODE_P (mode
))
22703 enum machine_mode inner
= GET_MODE_INNER (mode
);
22704 rtx promoted_val
, vec_reg
;
22705 if (CONST_INT_P (val
))
22706 return ix86_build_const_vector (mode
, true, val
);
22708 promoted_val
= promote_duplicated_reg (inner
, val
);
22709 vec_reg
= gen_reg_rtx (mode
);
22713 emit_insn (gen_vec_dupv2di (vec_reg
, promoted_val
));
22716 emit_insn (gen_vec_dupv4si (vec_reg
, promoted_val
));
22719 gcc_unreachable ();
22725 gcc_assert (mode
== SImode
|| mode
== DImode
);
22726 if (mode
== DImode
&& !TARGET_64BIT
)
22728 rtx vec_reg
= promote_duplicated_reg (V4SImode
, val
);
22729 vec_reg
= convert_to_mode (V2DImode
, vec_reg
, 1);
22732 if (val
== const0_rtx
)
22733 return copy_to_mode_reg (mode
, const0_rtx
);
22734 if (CONST_INT_P (val
))
22736 HOST_WIDE_INT v
= INTVAL (val
) & 255;
22740 if (mode
== DImode
)
22741 v
|= (v
<< 16) << 16;
22742 return copy_to_mode_reg (mode
, gen_int_mode (v
, mode
));
22745 if (valmode
== VOIDmode
)
22747 if (valmode
!= QImode
)
22748 val
= gen_lowpart (QImode
, val
);
22749 if (mode
== QImode
)
22751 if (!TARGET_PARTIAL_REG_STALL
)
22753 if (ix86_cost
->mult_init
[mode
== DImode
? 3 : 2]
22754 + ix86_cost
->mult_bit
* (mode
== DImode
? 8 : 4)
22755 <= (ix86_cost
->shift_const
+ ix86_cost
->add
) * nops
22756 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL
== 0)))
22758 rtx reg
= convert_modes (mode
, QImode
, val
, true);
22759 tmp
= promote_duplicated_reg (mode
, const1_rtx
);
22760 return expand_simple_binop (mode
, MULT
, reg
, tmp
, NULL
, 1,
22765 rtx reg
= convert_modes (mode
, QImode
, val
, true);
22767 if (!TARGET_PARTIAL_REG_STALL
)
22768 if (mode
== SImode
)
22769 emit_insn (gen_movsi_insv_1 (reg
, reg
));
22771 emit_insn (gen_movdi_insv_1 (reg
, reg
));
22774 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (8),
22775 NULL
, 1, OPTAB_DIRECT
);
22777 expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
22779 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (16),
22780 NULL
, 1, OPTAB_DIRECT
);
22781 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
22782 if (mode
== SImode
)
22784 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (32),
22785 NULL
, 1, OPTAB_DIRECT
);
22786 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
22791 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
22792 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
22793 alignment from ALIGN to DESIRED_ALIGN. */
22795 promote_duplicated_reg_to_size (rtx val
, int size_needed
, int desired_align
, int align
)
22797 rtx promoted_val
= NULL_RTX
;
22799 if (size_needed
> 8)
22801 /* We want to promote to vector register, so we expect that at least SSE
22803 gcc_assert (TARGET_SSE
);
22805 /* In case of promotion to vector register, we expect that val is a
22806 constant or already promoted to GPR value. */
22807 gcc_assert (GET_MODE (val
) == Pmode
|| CONSTANT_P (val
));
22809 promoted_val
= promote_duplicated_reg (V2DImode
, val
);
22811 promoted_val
= promote_duplicated_reg (V4SImode
, val
);
22813 else if (size_needed
> 4)
22815 gcc_assert (TARGET_64BIT
);
22816 promoted_val
= promote_duplicated_reg (DImode
, val
);
22818 else if (size_needed
> 2 || (desired_align
> align
&& desired_align
> 2))
22819 promoted_val
= promote_duplicated_reg (SImode
, val
);
22820 else if (size_needed
> 1 || (desired_align
> align
&& desired_align
> 1))
22821 promoted_val
= promote_duplicated_reg (HImode
, val
);
22823 promoted_val
= val
;
22825 return promoted_val
;
22828 /* Expand string clear operation (bzero). Use i386 string operations when
22829 profitable. See expand_movmem comment for explanation of individual
22830 steps performed. */
22832 ix86_expand_setmem (rtx dst
, rtx count_exp
, rtx val_exp
, rtx align_exp
,
22833 rtx expected_align_exp
, rtx expected_size_exp
)
22838 rtx jump_around_label
= NULL
;
22839 HOST_WIDE_INT align
= 1;
22840 unsigned HOST_WIDE_INT count
= 0;
22841 HOST_WIDE_INT expected_size
= -1;
22842 int size_needed
= 0, epilogue_size_needed
;
22843 int desired_align
= 0, align_bytes
= 0;
22844 enum stringop_alg alg
;
22845 rtx gpr_promoted_val
= NULL
;
22846 rtx vec_promoted_val
= NULL
;
22848 bool need_zero_guard
= false;
22849 bool align_unknown
;
22850 unsigned int unroll_factor
;
22851 enum machine_mode move_mode
;
22852 rtx loop_iter
= NULL_RTX
;
22853 bool early_jump
= false;
22855 if (CONST_INT_P (align_exp
))
22856 align
= INTVAL (align_exp
);
22857 /* i386 can do misaligned access on reasonably increased cost. */
22858 if (CONST_INT_P (expected_align_exp
)
22859 && INTVAL (expected_align_exp
) > align
)
22860 align
= INTVAL (expected_align_exp
);
22861 if (CONST_INT_P (count_exp
))
22862 count
= expected_size
= INTVAL (count_exp
);
22863 if (CONST_INT_P (expected_size_exp
) && count
== 0)
22864 expected_size
= INTVAL (expected_size_exp
);
22866 /* Make sure we don't need to care about overflow later on. */
22867 if (count
> ((unsigned HOST_WIDE_INT
) 1 << 30))
22870 /* Step 0: Decide on preferred algorithm, desired alignment and
22871 size of chunks to be copied by main loop. */
22873 align_unknown
= !(CONST_INT_P (align_exp
) && INTVAL (align_exp
) > 0);
22874 alg
= decide_alg (count
, expected_size
, true, &dynamic_check
, align_unknown
);
22875 desired_align
= decide_alignment (align
, alg
, expected_size
);
22879 if (!TARGET_ALIGN_STRINGOPS
)
22880 align
= desired_align
;
22882 if (alg
== libcall
)
22884 gcc_assert (alg
!= no_stringop
);
22886 count_exp
= copy_to_mode_reg (counter_mode (count_exp
), count_exp
);
22887 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
22892 gcc_unreachable ();
22894 need_zero_guard
= true;
22896 size_needed
= GET_MODE_SIZE (move_mode
) * unroll_factor
;
22898 case unrolled_loop
:
22899 need_zero_guard
= true;
22902 /* Select maximal available 1,2 or 4 unroll factor. */
22906 while (GET_MODE_SIZE (move_mode
) * unroll_factor
* 2 < count
22907 && unroll_factor
< 4)
22908 unroll_factor
*= 2;
22909 size_needed
= GET_MODE_SIZE (move_mode
) * unroll_factor
;
22912 need_zero_guard
= true;
22913 move_mode
= TARGET_64BIT
? V2DImode
: V4SImode
;
22915 /* Select maximal available 1,2 or 4 unroll factor. */
22919 while (GET_MODE_SIZE (move_mode
) * unroll_factor
* 2 < count
22920 && unroll_factor
< 4)
22921 unroll_factor
*= 2;
22922 size_needed
= GET_MODE_SIZE (move_mode
) * unroll_factor
;
22924 case rep_prefix_8_byte
:
22927 case rep_prefix_4_byte
:
22930 case rep_prefix_1_byte
:
22934 need_zero_guard
= true;
22938 epilogue_size_needed
= size_needed
;
22940 /* Step 1: Prologue guard. */
22942 /* Alignment code needs count to be in register. */
22943 if (CONST_INT_P (count_exp
) && desired_align
> align
)
22945 if (INTVAL (count_exp
) > desired_align
22946 && INTVAL (count_exp
) > size_needed
)
22949 = get_mem_align_offset (dst
, desired_align
* BITS_PER_UNIT
);
22950 if (align_bytes
<= 0)
22953 align_bytes
= desired_align
- align_bytes
;
22955 if (align_bytes
== 0)
22957 enum machine_mode mode
= SImode
;
22958 if (TARGET_64BIT
&& (count
& ~0xffffffff))
22960 count_exp
= force_reg (mode
, count_exp
);
22963 /* Do the cheap promotion to allow better CSE across the
22964 main loop and epilogue (ie one load of the big constant in the
22965 front of all code. */
22966 if (CONST_INT_P (val_exp
))
22967 gpr_promoted_val
= promote_duplicated_reg_to_size (val_exp
,
22968 GET_MODE_SIZE (Pmode
),
22969 GET_MODE_SIZE (Pmode
),
22971 /* Ensure that alignment prologue won't copy past end of block. */
22972 if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
22974 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
22975 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
22976 Make sure it is power of 2. */
22977 epilogue_size_needed
= smallest_pow2_greater_than (epilogue_size_needed
);
22981 if (count
< (unsigned HOST_WIDE_INT
)epilogue_size_needed
)
22983 /* If main algorithm works on QImode, no epilogue is needed.
22984 For small sizes just don't align anything. */
22985 if (size_needed
== 1)
22986 desired_align
= align
;
22993 /* SSE and unrolled_lopo algs re-use iteration counter in the epilogue. */
22994 if (alg
== sse_loop
|| alg
== unrolled_loop
)
22996 loop_iter
= gen_reg_rtx (counter_mode (count_exp
));
22997 emit_move_insn (loop_iter
, const0_rtx
);
22999 label
= gen_label_rtx ();
23001 emit_cmp_and_jump_insns (count_exp
,
23002 GEN_INT (epilogue_size_needed
),
23003 LTU
, 0, counter_mode (count_exp
), 1, label
);
23004 if (expected_size
== -1 || expected_size
<= epilogue_size_needed
)
23005 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
23007 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
23010 if (dynamic_check
!= -1)
23012 rtx hot_label
= gen_label_rtx ();
23013 jump_around_label
= gen_label_rtx ();
23014 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
23015 LEU
, 0, counter_mode (count_exp
), 1, hot_label
);
23016 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
23017 set_storage_via_libcall (dst
, count_exp
, val_exp
, false);
23018 emit_jump (jump_around_label
);
23019 emit_label (hot_label
);
23022 /* Step 2: Alignment prologue. */
23024 /* Do the expensive promotion once we branched off the small blocks. */
23025 if (!gpr_promoted_val
)
23026 gpr_promoted_val
= promote_duplicated_reg_to_size (val_exp
,
23027 GET_MODE_SIZE (Pmode
),
23028 GET_MODE_SIZE (Pmode
),
23030 gcc_assert (desired_align
>= 1 && align
>= 1);
23032 if (desired_align
> align
)
23034 if (align_bytes
== 0)
23036 /* Except for the first move in epilogue, we no longer know
23037 constant offset in aliasing info. It don't seems to worth
23038 the pain to maintain it for the first move, so throw away
23040 dst
= change_address (dst
, BLKmode
, destreg
);
23041 expand_setmem_prologue (dst
, destreg
, gpr_promoted_val
, count_exp
, align
,
23043 set_mem_align (dst
, desired_align
*BITS_PER_UNIT
);
23047 /* If we know how many bytes need to be stored before dst is
23048 sufficiently aligned, maintain aliasing info accurately. */
23049 dst
= expand_constant_setmem_prologue (dst
, destreg
, gpr_promoted_val
,
23050 desired_align
, align_bytes
);
23051 count_exp
= plus_constant (count_exp
, -align_bytes
);
23052 count
-= align_bytes
;
23053 if (count
< (unsigned HOST_WIDE_INT
) size_needed
)
23056 if (need_zero_guard
23057 && (count
< (unsigned HOST_WIDE_INT
) size_needed
23058 || (align_bytes
== 0
23059 && count
< ((unsigned HOST_WIDE_INT
) size_needed
23060 + desired_align
- align
))))
23062 /* It is possible that we copied enough so the main loop will not
23064 gcc_assert (size_needed
> 1);
23065 if (label
== NULL_RTX
)
23066 label
= gen_label_rtx ();
23067 emit_cmp_and_jump_insns (count_exp
,
23068 GEN_INT (size_needed
),
23069 LTU
, 0, counter_mode (count_exp
), 1, label
);
23070 if (expected_size
== -1
23071 || expected_size
< (desired_align
- align
) / 2 + size_needed
)
23072 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
23074 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
23077 if (label
&& size_needed
== 1)
23079 emit_label (label
);
23080 LABEL_NUSES (label
) = 1;
23082 gpr_promoted_val
= val_exp
;
23083 epilogue_size_needed
= 1;
23085 else if (label
== NULL_RTX
)
23086 epilogue_size_needed
= size_needed
;
23088 /* Step 3: Main loop. */
23094 gcc_unreachable ();
23096 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, val_exp
,
23097 count_exp
, QImode
, 1, expected_size
);
23100 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, gpr_promoted_val
,
23101 count_exp
, Pmode
, 1, expected_size
);
23103 case unrolled_loop
:
23104 loop_iter
= expand_set_or_movmem_via_loop_with_iter (dst
, NULL
, destreg
,
23105 NULL
, gpr_promoted_val
, count_exp
,
23106 loop_iter
, move_mode
, unroll_factor
,
23107 expected_size
, false);
23111 promote_duplicated_reg_to_size (gpr_promoted_val
,
23112 GET_MODE_SIZE (move_mode
),
23113 GET_MODE_SIZE (move_mode
), align
);
23114 loop_iter
= expand_set_or_movmem_via_loop_with_iter (dst
, NULL
, destreg
,
23115 NULL
, vec_promoted_val
, count_exp
,
23116 loop_iter
, move_mode
, unroll_factor
,
23117 expected_size
, false);
23119 case rep_prefix_8_byte
:
23120 gcc_assert (TARGET_64BIT
);
23121 expand_setmem_via_rep_stos (dst
, destreg
, gpr_promoted_val
, count_exp
,
23124 case rep_prefix_4_byte
:
23125 expand_setmem_via_rep_stos (dst
, destreg
, gpr_promoted_val
, count_exp
,
23128 case rep_prefix_1_byte
:
23129 expand_setmem_via_rep_stos (dst
, destreg
, gpr_promoted_val
, count_exp
,
23133 /* Adjust properly the offset of src and dest memory for aliasing. */
23134 if (CONST_INT_P (count_exp
))
23135 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
23136 (count
/ size_needed
) * size_needed
);
23138 dst
= change_address (dst
, BLKmode
, destreg
);
23140 /* Step 4: Epilogue to copy the remaining bytes. */
23144 /* When the main loop is done, COUNT_EXP might hold original count,
23145 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
23146 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
23147 bytes. Compensate if needed. */
23149 if (size_needed
< epilogue_size_needed
)
23152 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
23153 GEN_INT (size_needed
- 1), count_exp
, 1,
23155 if (tmp
!= count_exp
)
23156 emit_move_insn (count_exp
, tmp
);
23158 emit_label (label
);
23159 LABEL_NUSES (label
) = 1;
23160 /* We can not rely on fact that promoved value is known. */
23161 vec_promoted_val
= 0;
23163 gpr_promoted_val
= 0;
23166 if (alg
== unrolled_loop
|| alg
== sse_loop
)
23169 int remainder_size
= epilogue_size_needed
;
23170 if (count
&& desired_align
<= align
)
23171 remainder_size
= count
% epilogue_size_needed
;
23172 /* We may not need the epilgoue loop at all when the count is known
23173 and alignment is not adjusted. */
23174 if (remainder_size
> 31
23175 && (alg
== sse_loop
? vec_promoted_val
: gpr_promoted_val
))
23177 /* Reduce epilogue's size by creating not-unrolled loop. If we won't
23178 do this, we can have very big epilogue - when alignment is statically
23179 unknown we'll have the epilogue byte by byte which may be very slow. */
23180 loop_iter
= expand_set_or_movmem_via_loop_with_iter (dst
, NULL
, destreg
,
23181 NULL
, (alg
== sse_loop
? vec_promoted_val
: gpr_promoted_val
), count_exp
,
23182 loop_iter
, move_mode
, 1,
23183 expected_size
, false);
23184 dst
= change_address (dst
, BLKmode
, destreg
);
23185 epilogue_size_needed
= GET_MODE_SIZE (move_mode
);
23187 tmp
= expand_simple_binop (Pmode
, PLUS
, destreg
, loop_iter
, destreg
,
23188 true, OPTAB_LIB_WIDEN
);
23189 if (tmp
!= destreg
)
23190 emit_move_insn (destreg
, tmp
);
23192 if (count_exp
== const0_rtx
|| epilogue_size_needed
<= 1)
23194 else if (!gpr_promoted_val
)
23195 expand_setmem_epilogue_via_loop (dst
, destreg
, val_exp
, count_exp
,
23196 epilogue_size_needed
);
23198 expand_setmem_epilogue (dst
, destreg
, vec_promoted_val
, gpr_promoted_val
,
23199 val_exp
, count_exp
, epilogue_size_needed
);
23200 if (jump_around_label
)
23201 emit_label (jump_around_label
);
23205 /* Expand the appropriate insns for doing strlen if not just doing
23208 out = result, initialized with the start address
23209 align_rtx = alignment of the address.
23210 scratch = scratch register, initialized with the startaddress when
23211 not aligned, otherwise undefined
23213 This is just the body. It needs the initializations mentioned above and
23214 some address computing at the end. These things are done in i386.md. */
23217 ix86_expand_strlensi_unroll_1 (rtx out
, rtx src
, rtx align_rtx
)
23221 rtx align_2_label
= NULL_RTX
;
23222 rtx align_3_label
= NULL_RTX
;
23223 rtx align_4_label
= gen_label_rtx ();
23224 rtx end_0_label
= gen_label_rtx ();
23226 rtx tmpreg
= gen_reg_rtx (SImode
);
23227 rtx scratch
= gen_reg_rtx (SImode
);
23231 if (CONST_INT_P (align_rtx
))
23232 align
= INTVAL (align_rtx
);
23234 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
23236 /* Is there a known alignment and is it less than 4? */
23239 rtx scratch1
= gen_reg_rtx (Pmode
);
23240 emit_move_insn (scratch1
, out
);
23241 /* Is there a known alignment and is it not 2? */
23244 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
23245 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
23247 /* Leave just the 3 lower bits. */
23248 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (3),
23249 NULL_RTX
, 0, OPTAB_WIDEN
);
23251 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
23252 Pmode
, 1, align_4_label
);
23253 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, EQ
, NULL
,
23254 Pmode
, 1, align_2_label
);
23255 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, GTU
, NULL
,
23256 Pmode
, 1, align_3_label
);
23260 /* Since the alignment is 2, we have to check 2 or 0 bytes;
23261 check if is aligned to 4 - byte. */
23263 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, const2_rtx
,
23264 NULL_RTX
, 0, OPTAB_WIDEN
);
23266 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
23267 Pmode
, 1, align_4_label
);
23270 mem
= change_address (src
, QImode
, out
);
23272 /* Now compare the bytes. */
23274 /* Compare the first n unaligned byte on a byte per byte basis. */
23275 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
23276 QImode
, 1, end_0_label
);
23278 /* Increment the address. */
23279 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
23281 /* Not needed with an alignment of 2 */
23284 emit_label (align_2_label
);
23286 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
23289 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
23291 emit_label (align_3_label
);
23294 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
23297 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
23300 /* Generate loop to check 4 bytes at a time. It is not a good idea to
23301 align this loop. It gives only huge programs, but does not help to
23303 emit_label (align_4_label
);
23305 mem
= change_address (src
, SImode
, out
);
23306 emit_move_insn (scratch
, mem
);
23307 emit_insn (ix86_gen_add3 (out
, out
, GEN_INT (4)));
23309 /* This formula yields a nonzero result iff one of the bytes is zero.
23310 This saves three branches inside loop and many cycles. */
23312 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
23313 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
23314 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
23315 emit_insn (gen_andsi3 (tmpreg
, tmpreg
,
23316 gen_int_mode (0x80808080, SImode
)));
23317 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0, SImode
, 1,
23322 rtx reg
= gen_reg_rtx (SImode
);
23323 rtx reg2
= gen_reg_rtx (Pmode
);
23324 emit_move_insn (reg
, tmpreg
);
23325 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
23327 /* If zero is not in the first two bytes, move two bytes forward. */
23328 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
23329 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
23330 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
23331 emit_insn (gen_rtx_SET (VOIDmode
, tmpreg
,
23332 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
23335 /* Emit lea manually to avoid clobbering of flags. */
23336 emit_insn (gen_rtx_SET (SImode
, reg2
,
23337 gen_rtx_PLUS (Pmode
, out
, const2_rtx
)));
23339 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
23340 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
23341 emit_insn (gen_rtx_SET (VOIDmode
, out
,
23342 gen_rtx_IF_THEN_ELSE (Pmode
, tmp
,
23348 rtx end_2_label
= gen_label_rtx ();
23349 /* Is zero in the first two bytes? */
23351 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
23352 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
23353 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
23354 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
23355 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
23357 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
23358 JUMP_LABEL (tmp
) = end_2_label
;
23360 /* Not in the first two. Move two bytes forward. */
23361 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
23362 emit_insn (ix86_gen_add3 (out
, out
, const2_rtx
));
23364 emit_label (end_2_label
);
23368 /* Avoid branch in fixing the byte. */
23369 tmpreg
= gen_lowpart (QImode
, tmpreg
);
23370 emit_insn (gen_addqi3_cc (tmpreg
, tmpreg
, tmpreg
));
23371 tmp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
23372 cmp
= gen_rtx_LTU (VOIDmode
, tmp
, const0_rtx
);
23373 emit_insn (ix86_gen_sub3_carry (out
, out
, GEN_INT (3), tmp
, cmp
));
23375 emit_label (end_0_label
);
23378 /* Expand strlen. */
23381 ix86_expand_strlen (rtx out
, rtx src
, rtx eoschar
, rtx align
)
23383 rtx addr
, scratch1
, scratch2
, scratch3
, scratch4
;
23385 /* The generic case of strlen expander is long. Avoid it's
23386 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
23388 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
23389 && !TARGET_INLINE_ALL_STRINGOPS
23390 && !optimize_insn_for_size_p ()
23391 && (!CONST_INT_P (align
) || INTVAL (align
) < 4))
23394 addr
= force_reg (Pmode
, XEXP (src
, 0));
23395 scratch1
= gen_reg_rtx (Pmode
);
23397 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
23398 && !optimize_insn_for_size_p ())
23400 /* Well it seems that some optimizer does not combine a call like
23401 foo(strlen(bar), strlen(bar));
23402 when the move and the subtraction is done here. It does calculate
23403 the length just once when these instructions are done inside of
23404 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
23405 often used and I use one fewer register for the lifetime of
23406 output_strlen_unroll() this is better. */
23408 emit_move_insn (out
, addr
);
23410 ix86_expand_strlensi_unroll_1 (out
, src
, align
);
23412 /* strlensi_unroll_1 returns the address of the zero at the end of
23413 the string, like memchr(), so compute the length by subtracting
23414 the start address. */
23415 emit_insn (ix86_gen_sub3 (out
, out
, addr
));
23421 /* Can't use this if the user has appropriated eax, ecx, or edi. */
23422 if (fixed_regs
[AX_REG
] || fixed_regs
[CX_REG
] || fixed_regs
[DI_REG
])
23425 scratch2
= gen_reg_rtx (Pmode
);
23426 scratch3
= gen_reg_rtx (Pmode
);
23427 scratch4
= force_reg (Pmode
, constm1_rtx
);
23429 emit_move_insn (scratch3
, addr
);
23430 eoschar
= force_reg (QImode
, eoschar
);
23432 src
= replace_equiv_address_nv (src
, scratch3
);
23434 /* If .md starts supporting :P, this can be done in .md. */
23435 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (4, src
, eoschar
, align
,
23436 scratch4
), UNSPEC_SCAS
);
23437 emit_insn (gen_strlenqi_1 (scratch1
, scratch3
, unspec
));
23438 emit_insn (ix86_gen_one_cmpl2 (scratch2
, scratch1
));
23439 emit_insn (ix86_gen_add3 (out
, scratch2
, constm1_rtx
));
23444 /* For given symbol (function) construct code to compute address of it's PLT
23445 entry in large x86-64 PIC model. */
23447 construct_plt_address (rtx symbol
)
23449 rtx tmp
= gen_reg_rtx (Pmode
);
23450 rtx unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, symbol
), UNSPEC_PLTOFF
);
23452 gcc_assert (GET_CODE (symbol
) == SYMBOL_REF
);
23453 gcc_assert (ix86_cmodel
== CM_LARGE_PIC
);
23455 emit_move_insn (tmp
, gen_rtx_CONST (Pmode
, unspec
));
23456 emit_insn (gen_adddi3 (tmp
, tmp
, pic_offset_table_rtx
));
23461 ix86_expand_call (rtx retval
, rtx fnaddr
, rtx callarg1
,
23463 rtx pop
, bool sibcall
)
23465 /* We need to represent that SI and DI registers are clobbered
23467 static int clobbered_registers
[] = {
23468 XMM6_REG
, XMM7_REG
, XMM8_REG
,
23469 XMM9_REG
, XMM10_REG
, XMM11_REG
,
23470 XMM12_REG
, XMM13_REG
, XMM14_REG
,
23471 XMM15_REG
, SI_REG
, DI_REG
23473 rtx vec
[ARRAY_SIZE (clobbered_registers
) + 3];
23474 rtx use
= NULL
, call
;
23475 unsigned int vec_len
;
23477 if (pop
== const0_rtx
)
23479 gcc_assert (!TARGET_64BIT
|| !pop
);
23481 if (TARGET_MACHO
&& !TARGET_64BIT
)
23484 if (flag_pic
&& GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
)
23485 fnaddr
= machopic_indirect_call_target (fnaddr
);
23490 /* Static functions and indirect calls don't need the pic register. */
23491 if (flag_pic
&& (!TARGET_64BIT
|| ix86_cmodel
== CM_LARGE_PIC
)
23492 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
23493 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr
, 0)))
23494 use_reg (&use
, pic_offset_table_rtx
);
23497 if (TARGET_64BIT
&& INTVAL (callarg2
) >= 0)
23499 rtx al
= gen_rtx_REG (QImode
, AX_REG
);
23500 emit_move_insn (al
, callarg2
);
23501 use_reg (&use
, al
);
23504 if (ix86_cmodel
== CM_LARGE_PIC
23506 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
23507 && !local_symbolic_operand (XEXP (fnaddr
, 0), VOIDmode
))
23508 fnaddr
= gen_rtx_MEM (QImode
, construct_plt_address (XEXP (fnaddr
, 0)));
23510 ? !sibcall_insn_operand (XEXP (fnaddr
, 0), Pmode
)
23511 : !call_insn_operand (XEXP (fnaddr
, 0), Pmode
))
23513 fnaddr
= XEXP (fnaddr
, 0);
23514 if (GET_MODE (fnaddr
) != Pmode
)
23515 fnaddr
= convert_to_mode (Pmode
, fnaddr
, 1);
23516 fnaddr
= gen_rtx_MEM (QImode
, copy_to_mode_reg (Pmode
, fnaddr
));
23520 call
= gen_rtx_CALL (VOIDmode
, fnaddr
, callarg1
);
23522 call
= gen_rtx_SET (VOIDmode
, retval
, call
);
23523 vec
[vec_len
++] = call
;
23527 pop
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, pop
);
23528 pop
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, pop
);
23529 vec
[vec_len
++] = pop
;
23532 if (TARGET_64BIT_MS_ABI
23533 && (!callarg2
|| INTVAL (callarg2
) != -2))
23537 vec
[vec_len
++] = gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, const0_rtx
),
23538 UNSPEC_MS_TO_SYSV_CALL
);
23540 for (i
= 0; i
< ARRAY_SIZE (clobbered_registers
); i
++)
23542 = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers
[i
])
23544 gen_rtx_REG (SSE_REGNO_P (clobbered_registers
[i
])
23546 clobbered_registers
[i
]));
23549 /* Add UNSPEC_CALL_NEEDS_VZEROUPPER decoration. */
23550 if (TARGET_VZEROUPPER
)
23553 if (cfun
->machine
->callee_pass_avx256_p
)
23555 if (cfun
->machine
->callee_return_avx256_p
)
23556 avx256
= callee_return_pass_avx256
;
23558 avx256
= callee_pass_avx256
;
23560 else if (cfun
->machine
->callee_return_avx256_p
)
23561 avx256
= callee_return_avx256
;
23563 avx256
= call_no_avx256
;
23565 if (reload_completed
)
23566 emit_insn (gen_avx_vzeroupper (GEN_INT (avx256
)));
23568 vec
[vec_len
++] = gen_rtx_UNSPEC (VOIDmode
,
23569 gen_rtvec (1, GEN_INT (avx256
)),
23570 UNSPEC_CALL_NEEDS_VZEROUPPER
);
23574 call
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (vec_len
, vec
));
23575 call
= emit_call_insn (call
);
23577 CALL_INSN_FUNCTION_USAGE (call
) = use
;
23583 ix86_split_call_vzeroupper (rtx insn
, rtx vzeroupper
)
23585 rtx pat
= PATTERN (insn
);
23586 rtvec vec
= XVEC (pat
, 0);
23587 int len
= GET_NUM_ELEM (vec
) - 1;
23589 /* Strip off the last entry of the parallel. */
23590 gcc_assert (GET_CODE (RTVEC_ELT (vec
, len
)) == UNSPEC
);
23591 gcc_assert (XINT (RTVEC_ELT (vec
, len
), 1) == UNSPEC_CALL_NEEDS_VZEROUPPER
);
23593 pat
= RTVEC_ELT (vec
, 0);
23595 pat
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (len
, &RTVEC_ELT (vec
, 0)));
23597 emit_insn (gen_avx_vzeroupper (vzeroupper
));
23598 emit_call_insn (pat
);
23601 /* Output the assembly for a call instruction. */
23604 ix86_output_call_insn (rtx insn
, rtx call_op
)
23606 bool direct_p
= constant_call_address_operand (call_op
, Pmode
);
23607 bool seh_nop_p
= false;
23610 if (SIBLING_CALL_P (insn
))
23614 /* SEH epilogue detection requires the indirect branch case
23615 to include REX.W. */
23616 else if (TARGET_SEH
)
23617 xasm
= "rex.W jmp %A0";
23621 output_asm_insn (xasm
, &call_op
);
23625 /* SEH unwinding can require an extra nop to be emitted in several
23626 circumstances. Determine if we have one of those. */
23631 for (i
= NEXT_INSN (insn
); i
; i
= NEXT_INSN (i
))
23633 /* If we get to another real insn, we don't need the nop. */
23637 /* If we get to the epilogue note, prevent a catch region from
23638 being adjacent to the standard epilogue sequence. If non-
23639 call-exceptions, we'll have done this during epilogue emission. */
23640 if (NOTE_P (i
) && NOTE_KIND (i
) == NOTE_INSN_EPILOGUE_BEG
23641 && !flag_non_call_exceptions
23642 && !can_throw_internal (insn
))
23649 /* If we didn't find a real insn following the call, prevent the
23650 unwinder from looking into the next function. */
23656 xasm
= "call\t%P0";
23658 xasm
= "call\t%A0";
23660 output_asm_insn (xasm
, &call_op
);
23668 /* Clear stack slot assignments remembered from previous functions.
23669 This is called from INIT_EXPANDERS once before RTL is emitted for each
23672 static struct machine_function
*
23673 ix86_init_machine_status (void)
23675 struct machine_function
*f
;
23677 f
= ggc_alloc_cleared_machine_function ();
23678 f
->use_fast_prologue_epilogue_nregs
= -1;
23679 f
->tls_descriptor_call_expanded_p
= 0;
23680 f
->call_abi
= ix86_abi
;
23685 /* Return a MEM corresponding to a stack slot with mode MODE.
23686 Allocate a new slot if necessary.
23688 The RTL for a function can have several slots available: N is
23689 which slot to use. */
23692 assign_386_stack_local (enum machine_mode mode
, enum ix86_stack_slot n
)
23694 struct stack_local_entry
*s
;
23696 gcc_assert (n
< MAX_386_STACK_LOCALS
);
23698 /* Virtual slot is valid only before vregs are instantiated. */
23699 gcc_assert ((n
== SLOT_VIRTUAL
) == !virtuals_instantiated
);
23701 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
23702 if (s
->mode
== mode
&& s
->n
== n
)
23703 return validize_mem (copy_rtx (s
->rtl
));
23705 s
= ggc_alloc_stack_local_entry ();
23708 s
->rtl
= assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
23710 s
->next
= ix86_stack_locals
;
23711 ix86_stack_locals
= s
;
23712 return validize_mem (s
->rtl
);
23715 /* Calculate the length of the memory address in the instruction encoding.
23716 Includes addr32 prefix, does not include the one-byte modrm, opcode,
23717 or other prefixes. */
23720 memory_address_length (rtx addr
)
23722 struct ix86_address parts
;
23723 rtx base
, index
, disp
;
23727 if (GET_CODE (addr
) == PRE_DEC
23728 || GET_CODE (addr
) == POST_INC
23729 || GET_CODE (addr
) == PRE_MODIFY
23730 || GET_CODE (addr
) == POST_MODIFY
)
23733 ok
= ix86_decompose_address (addr
, &parts
);
23736 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
23737 parts
.base
= SUBREG_REG (parts
.base
);
23738 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
23739 parts
.index
= SUBREG_REG (parts
.index
);
23742 index
= parts
.index
;
23745 /* Add length of addr32 prefix. */
23746 len
= (GET_CODE (addr
) == ZERO_EXTEND
23747 || GET_CODE (addr
) == AND
);
23750 - esp as the base always wants an index,
23751 - ebp as the base always wants a displacement,
23752 - r12 as the base always wants an index,
23753 - r13 as the base always wants a displacement. */
23755 /* Register Indirect. */
23756 if (base
&& !index
&& !disp
)
23758 /* esp (for its index) and ebp (for its displacement) need
23759 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
23762 && (addr
== arg_pointer_rtx
23763 || addr
== frame_pointer_rtx
23764 || REGNO (addr
) == SP_REG
23765 || REGNO (addr
) == BP_REG
23766 || REGNO (addr
) == R12_REG
23767 || REGNO (addr
) == R13_REG
))
23771 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
23772 is not disp32, but disp32(%rip), so for disp32
23773 SIB byte is needed, unless print_operand_address
23774 optimizes it into disp32(%rip) or (%rip) is implied
23776 else if (disp
&& !base
&& !index
)
23783 if (GET_CODE (disp
) == CONST
)
23784 symbol
= XEXP (disp
, 0);
23785 if (GET_CODE (symbol
) == PLUS
23786 && CONST_INT_P (XEXP (symbol
, 1)))
23787 symbol
= XEXP (symbol
, 0);
23789 if (GET_CODE (symbol
) != LABEL_REF
23790 && (GET_CODE (symbol
) != SYMBOL_REF
23791 || SYMBOL_REF_TLS_MODEL (symbol
) != 0)
23792 && (GET_CODE (symbol
) != UNSPEC
23793 || (XINT (symbol
, 1) != UNSPEC_GOTPCREL
23794 && XINT (symbol
, 1) != UNSPEC_PCREL
23795 && XINT (symbol
, 1) != UNSPEC_GOTNTPOFF
)))
23802 /* Find the length of the displacement constant. */
23805 if (base
&& satisfies_constraint_K (disp
))
23810 /* ebp always wants a displacement. Similarly r13. */
23811 else if (base
&& REG_P (base
)
23812 && (REGNO (base
) == BP_REG
|| REGNO (base
) == R13_REG
))
23815 /* An index requires the two-byte modrm form.... */
23817 /* ...like esp (or r12), which always wants an index. */
23818 || base
== arg_pointer_rtx
23819 || base
== frame_pointer_rtx
23820 || (base
&& REG_P (base
)
23821 && (REGNO (base
) == SP_REG
|| REGNO (base
) == R12_REG
)))
23838 /* Compute default value for "length_immediate" attribute. When SHORTFORM
23839 is set, expect that insn have 8bit immediate alternative. */
23841 ix86_attr_length_immediate_default (rtx insn
, bool shortform
)
23845 extract_insn_cached (insn
);
23846 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
23847 if (CONSTANT_P (recog_data
.operand
[i
]))
23849 enum attr_mode mode
= get_attr_mode (insn
);
23852 if (shortform
&& CONST_INT_P (recog_data
.operand
[i
]))
23854 HOST_WIDE_INT ival
= INTVAL (recog_data
.operand
[i
]);
23861 ival
= trunc_int_for_mode (ival
, HImode
);
23864 ival
= trunc_int_for_mode (ival
, SImode
);
23869 if (IN_RANGE (ival
, -128, 127))
23886 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
23891 fatal_insn ("unknown insn mode", insn
);
23896 /* Compute default value for "length_address" attribute. */
23898 ix86_attr_length_address_default (rtx insn
)
23902 if (get_attr_type (insn
) == TYPE_LEA
)
23904 rtx set
= PATTERN (insn
), addr
;
23906 if (GET_CODE (set
) == PARALLEL
)
23907 set
= XVECEXP (set
, 0, 0);
23909 gcc_assert (GET_CODE (set
) == SET
);
23911 addr
= SET_SRC (set
);
23912 if (TARGET_64BIT
&& get_attr_mode (insn
) == MODE_SI
)
23914 if (GET_CODE (addr
) == ZERO_EXTEND
)
23915 addr
= XEXP (addr
, 0);
23916 if (GET_CODE (addr
) == SUBREG
)
23917 addr
= SUBREG_REG (addr
);
23920 return memory_address_length (addr
);
23923 extract_insn_cached (insn
);
23924 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
23925 if (MEM_P (recog_data
.operand
[i
]))
23927 constrain_operands_cached (reload_completed
);
23928 if (which_alternative
!= -1)
23930 const char *constraints
= recog_data
.constraints
[i
];
23931 int alt
= which_alternative
;
23933 while (*constraints
== '=' || *constraints
== '+')
23936 while (*constraints
++ != ',')
23938 /* Skip ignored operands. */
23939 if (*constraints
== 'X')
23942 return memory_address_length (XEXP (recog_data
.operand
[i
], 0));
23947 /* Compute default value for "length_vex" attribute. It includes
23948 2 or 3 byte VEX prefix and 1 opcode byte. */
23951 ix86_attr_length_vex_default (rtx insn
, bool has_0f_opcode
, bool has_vex_w
)
23955 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
23956 byte VEX prefix. */
23957 if (!has_0f_opcode
|| has_vex_w
)
23960 /* We can always use 2 byte VEX prefix in 32bit. */
23964 extract_insn_cached (insn
);
23966 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
23967 if (REG_P (recog_data
.operand
[i
]))
23969 /* REX.W bit uses 3 byte VEX prefix. */
23970 if (GET_MODE (recog_data
.operand
[i
]) == DImode
23971 && GENERAL_REG_P (recog_data
.operand
[i
]))
23976 /* REX.X or REX.B bits use 3 byte VEX prefix. */
23977 if (MEM_P (recog_data
.operand
[i
])
23978 && x86_extended_reg_mentioned_p (recog_data
.operand
[i
]))
23985 /* Return the maximum number of instructions a cpu can issue. */
23988 ix86_issue_rate (void)
23992 case PROCESSOR_PENTIUM
:
23993 case PROCESSOR_ATOM
:
23997 case PROCESSOR_PENTIUMPRO
:
23998 case PROCESSOR_PENTIUM4
:
23999 case PROCESSOR_CORE2_32
:
24000 case PROCESSOR_CORE2_64
:
24001 case PROCESSOR_COREI7_32
:
24002 case PROCESSOR_COREI7_64
:
24003 case PROCESSOR_ATHLON
:
24005 case PROCESSOR_AMDFAM10
:
24006 case PROCESSOR_NOCONA
:
24007 case PROCESSOR_GENERIC32
:
24008 case PROCESSOR_GENERIC64
:
24009 case PROCESSOR_BDVER1
:
24010 case PROCESSOR_BDVER2
:
24011 case PROCESSOR_BTVER1
:
24019 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
24020 by DEP_INSN and nothing set by DEP_INSN. */
24023 ix86_flags_dependent (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
24027 /* Simplify the test for uninteresting insns. */
24028 if (insn_type
!= TYPE_SETCC
24029 && insn_type
!= TYPE_ICMOV
24030 && insn_type
!= TYPE_FCMOV
24031 && insn_type
!= TYPE_IBR
)
24034 if ((set
= single_set (dep_insn
)) != 0)
24036 set
= SET_DEST (set
);
24039 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
24040 && XVECLEN (PATTERN (dep_insn
), 0) == 2
24041 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
24042 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
24044 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
24045 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
24050 if (!REG_P (set
) || REGNO (set
) != FLAGS_REG
)
24053 /* This test is true if the dependent insn reads the flags but
24054 not any other potentially set register. */
24055 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
24058 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
24064 /* Return true iff USE_INSN has a memory address with operands set by
24068 ix86_agi_dependent (rtx set_insn
, rtx use_insn
)
24071 extract_insn_cached (use_insn
);
24072 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
24073 if (MEM_P (recog_data
.operand
[i
]))
24075 rtx addr
= XEXP (recog_data
.operand
[i
], 0);
24076 return modified_in_p (addr
, set_insn
) != 0;
24082 ix86_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
)
24084 enum attr_type insn_type
, dep_insn_type
;
24085 enum attr_memory memory
;
24087 int dep_insn_code_number
;
24089 /* Anti and output dependencies have zero cost on all CPUs. */
24090 if (REG_NOTE_KIND (link
) != 0)
24093 dep_insn_code_number
= recog_memoized (dep_insn
);
24095 /* If we can't recognize the insns, we can't really do anything. */
24096 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
24099 insn_type
= get_attr_type (insn
);
24100 dep_insn_type
= get_attr_type (dep_insn
);
24104 case PROCESSOR_PENTIUM
:
24105 /* Address Generation Interlock adds a cycle of latency. */
24106 if (insn_type
== TYPE_LEA
)
24108 rtx addr
= PATTERN (insn
);
24110 if (GET_CODE (addr
) == PARALLEL
)
24111 addr
= XVECEXP (addr
, 0, 0);
24113 gcc_assert (GET_CODE (addr
) == SET
);
24115 addr
= SET_SRC (addr
);
24116 if (modified_in_p (addr
, dep_insn
))
24119 else if (ix86_agi_dependent (dep_insn
, insn
))
24122 /* ??? Compares pair with jump/setcc. */
24123 if (ix86_flags_dependent (insn
, dep_insn
, insn_type
))
24126 /* Floating point stores require value to be ready one cycle earlier. */
24127 if (insn_type
== TYPE_FMOV
24128 && get_attr_memory (insn
) == MEMORY_STORE
24129 && !ix86_agi_dependent (dep_insn
, insn
))
24133 case PROCESSOR_PENTIUMPRO
:
24134 memory
= get_attr_memory (insn
);
24136 /* INT->FP conversion is expensive. */
24137 if (get_attr_fp_int_src (dep_insn
))
24140 /* There is one cycle extra latency between an FP op and a store. */
24141 if (insn_type
== TYPE_FMOV
24142 && (set
= single_set (dep_insn
)) != NULL_RTX
24143 && (set2
= single_set (insn
)) != NULL_RTX
24144 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
24145 && MEM_P (SET_DEST (set2
)))
24148 /* Show ability of reorder buffer to hide latency of load by executing
24149 in parallel with previous instruction in case
24150 previous instruction is not needed to compute the address. */
24151 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
24152 && !ix86_agi_dependent (dep_insn
, insn
))
24154 /* Claim moves to take one cycle, as core can issue one load
24155 at time and the next load can start cycle later. */
24156 if (dep_insn_type
== TYPE_IMOV
24157 || dep_insn_type
== TYPE_FMOV
)
24165 memory
= get_attr_memory (insn
);
24167 /* The esp dependency is resolved before the instruction is really
24169 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
24170 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
24173 /* INT->FP conversion is expensive. */
24174 if (get_attr_fp_int_src (dep_insn
))
24177 /* Show ability of reorder buffer to hide latency of load by executing
24178 in parallel with previous instruction in case
24179 previous instruction is not needed to compute the address. */
24180 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
24181 && !ix86_agi_dependent (dep_insn
, insn
))
24183 /* Claim moves to take one cycle, as core can issue one load
24184 at time and the next load can start cycle later. */
24185 if (dep_insn_type
== TYPE_IMOV
24186 || dep_insn_type
== TYPE_FMOV
)
24195 case PROCESSOR_ATHLON
:
24197 case PROCESSOR_AMDFAM10
:
24198 case PROCESSOR_BDVER1
:
24199 case PROCESSOR_BDVER2
:
24200 case PROCESSOR_BTVER1
:
24201 case PROCESSOR_ATOM
:
24202 case PROCESSOR_GENERIC32
:
24203 case PROCESSOR_GENERIC64
:
24204 memory
= get_attr_memory (insn
);
24206 /* Show ability of reorder buffer to hide latency of load by executing
24207 in parallel with previous instruction in case
24208 previous instruction is not needed to compute the address. */
24209 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
24210 && !ix86_agi_dependent (dep_insn
, insn
))
24212 enum attr_unit unit
= get_attr_unit (insn
);
24215 /* Because of the difference between the length of integer and
24216 floating unit pipeline preparation stages, the memory operands
24217 for floating point are cheaper.
24219 ??? For Athlon it the difference is most probably 2. */
24220 if (unit
== UNIT_INTEGER
|| unit
== UNIT_UNKNOWN
)
24223 loadcost
= TARGET_ATHLON
? 2 : 0;
24225 if (cost
>= loadcost
)
24238 /* How many alternative schedules to try. This should be as wide as the
24239 scheduling freedom in the DFA, but no wider. Making this value too
24240 large results extra work for the scheduler. */
24243 ia32_multipass_dfa_lookahead (void)
24247 case PROCESSOR_PENTIUM
:
24250 case PROCESSOR_PENTIUMPRO
:
24254 case PROCESSOR_CORE2_32
:
24255 case PROCESSOR_CORE2_64
:
24256 case PROCESSOR_COREI7_32
:
24257 case PROCESSOR_COREI7_64
:
24258 /* Generally, we want haifa-sched:max_issue() to look ahead as far
24259 as many instructions can be executed on a cycle, i.e.,
24260 issue_rate. I wonder why tuning for many CPUs does not do this. */
24261 return ix86_issue_rate ();
24270 /* Model decoder of Core 2/i7.
24271 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
24272 track the instruction fetch block boundaries and make sure that long
24273 (9+ bytes) instructions are assigned to D0. */
24275 /* Maximum length of an insn that can be handled by
24276 a secondary decoder unit. '8' for Core 2/i7. */
24277 static int core2i7_secondary_decoder_max_insn_size
;
24279 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
24280 '16' for Core 2/i7. */
24281 static int core2i7_ifetch_block_size
;
24283 /* Maximum number of instructions decoder can handle per cycle.
24284 '6' for Core 2/i7. */
24285 static int core2i7_ifetch_block_max_insns
;
24287 typedef struct ix86_first_cycle_multipass_data_
*
24288 ix86_first_cycle_multipass_data_t
;
24289 typedef const struct ix86_first_cycle_multipass_data_
*
24290 const_ix86_first_cycle_multipass_data_t
;
24292 /* A variable to store target state across calls to max_issue within
24294 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data
,
24295 *ix86_first_cycle_multipass_data
= &_ix86_first_cycle_multipass_data
;
24297 /* Initialize DATA. */
24299 core2i7_first_cycle_multipass_init (void *_data
)
24301 ix86_first_cycle_multipass_data_t data
24302 = (ix86_first_cycle_multipass_data_t
) _data
;
24304 data
->ifetch_block_len
= 0;
24305 data
->ifetch_block_n_insns
= 0;
24306 data
->ready_try_change
= NULL
;
24307 data
->ready_try_change_size
= 0;
24310 /* Advancing the cycle; reset ifetch block counts. */
24312 core2i7_dfa_post_advance_cycle (void)
24314 ix86_first_cycle_multipass_data_t data
= ix86_first_cycle_multipass_data
;
24316 gcc_assert (data
->ifetch_block_n_insns
<= core2i7_ifetch_block_max_insns
);
24318 data
->ifetch_block_len
= 0;
24319 data
->ifetch_block_n_insns
= 0;
24322 static int min_insn_size (rtx
);
24324 /* Filter out insns from ready_try that the core will not be able to issue
24325 on current cycle due to decoder. */
24327 core2i7_first_cycle_multipass_filter_ready_try
24328 (const_ix86_first_cycle_multipass_data_t data
,
24329 char *ready_try
, int n_ready
, bool first_cycle_insn_p
)
24336 if (ready_try
[n_ready
])
24339 insn
= get_ready_element (n_ready
);
24340 insn_size
= min_insn_size (insn
);
24342 if (/* If this is a too long an insn for a secondary decoder ... */
24343 (!first_cycle_insn_p
24344 && insn_size
> core2i7_secondary_decoder_max_insn_size
)
24345 /* ... or it would not fit into the ifetch block ... */
24346 || data
->ifetch_block_len
+ insn_size
> core2i7_ifetch_block_size
24347 /* ... or the decoder is full already ... */
24348 || data
->ifetch_block_n_insns
+ 1 > core2i7_ifetch_block_max_insns
)
24349 /* ... mask the insn out. */
24351 ready_try
[n_ready
] = 1;
24353 if (data
->ready_try_change
)
24354 SET_BIT (data
->ready_try_change
, n_ready
);
24359 /* Prepare for a new round of multipass lookahead scheduling. */
24361 core2i7_first_cycle_multipass_begin (void *_data
, char *ready_try
, int n_ready
,
24362 bool first_cycle_insn_p
)
24364 ix86_first_cycle_multipass_data_t data
24365 = (ix86_first_cycle_multipass_data_t
) _data
;
24366 const_ix86_first_cycle_multipass_data_t prev_data
24367 = ix86_first_cycle_multipass_data
;
24369 /* Restore the state from the end of the previous round. */
24370 data
->ifetch_block_len
= prev_data
->ifetch_block_len
;
24371 data
->ifetch_block_n_insns
= prev_data
->ifetch_block_n_insns
;
24373 /* Filter instructions that cannot be issued on current cycle due to
24374 decoder restrictions. */
24375 core2i7_first_cycle_multipass_filter_ready_try (data
, ready_try
, n_ready
,
24376 first_cycle_insn_p
);
24379 /* INSN is being issued in current solution. Account for its impact on
24380 the decoder model. */
24382 core2i7_first_cycle_multipass_issue (void *_data
, char *ready_try
, int n_ready
,
24383 rtx insn
, const void *_prev_data
)
24385 ix86_first_cycle_multipass_data_t data
24386 = (ix86_first_cycle_multipass_data_t
) _data
;
24387 const_ix86_first_cycle_multipass_data_t prev_data
24388 = (const_ix86_first_cycle_multipass_data_t
) _prev_data
;
24390 int insn_size
= min_insn_size (insn
);
24392 data
->ifetch_block_len
= prev_data
->ifetch_block_len
+ insn_size
;
24393 data
->ifetch_block_n_insns
= prev_data
->ifetch_block_n_insns
+ 1;
24394 gcc_assert (data
->ifetch_block_len
<= core2i7_ifetch_block_size
24395 && data
->ifetch_block_n_insns
<= core2i7_ifetch_block_max_insns
);
24397 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
24398 if (!data
->ready_try_change
)
24400 data
->ready_try_change
= sbitmap_alloc (n_ready
);
24401 data
->ready_try_change_size
= n_ready
;
24403 else if (data
->ready_try_change_size
< n_ready
)
24405 data
->ready_try_change
= sbitmap_resize (data
->ready_try_change
,
24407 data
->ready_try_change_size
= n_ready
;
24409 sbitmap_zero (data
->ready_try_change
);
24411 /* Filter out insns from ready_try that the core will not be able to issue
24412 on current cycle due to decoder. */
24413 core2i7_first_cycle_multipass_filter_ready_try (data
, ready_try
, n_ready
,
24417 /* Revert the effect on ready_try. */
24419 core2i7_first_cycle_multipass_backtrack (const void *_data
,
24421 int n_ready ATTRIBUTE_UNUSED
)
24423 const_ix86_first_cycle_multipass_data_t data
24424 = (const_ix86_first_cycle_multipass_data_t
) _data
;
24425 unsigned int i
= 0;
24426 sbitmap_iterator sbi
;
24428 gcc_assert (sbitmap_last_set_bit (data
->ready_try_change
) < n_ready
);
24429 EXECUTE_IF_SET_IN_SBITMAP (data
->ready_try_change
, 0, i
, sbi
)
24435 /* Save the result of multipass lookahead scheduling for the next round. */
24437 core2i7_first_cycle_multipass_end (const void *_data
)
24439 const_ix86_first_cycle_multipass_data_t data
24440 = (const_ix86_first_cycle_multipass_data_t
) _data
;
24441 ix86_first_cycle_multipass_data_t next_data
24442 = ix86_first_cycle_multipass_data
;
24446 next_data
->ifetch_block_len
= data
->ifetch_block_len
;
24447 next_data
->ifetch_block_n_insns
= data
->ifetch_block_n_insns
;
24451 /* Deallocate target data. */
24453 core2i7_first_cycle_multipass_fini (void *_data
)
24455 ix86_first_cycle_multipass_data_t data
24456 = (ix86_first_cycle_multipass_data_t
) _data
;
24458 if (data
->ready_try_change
)
24460 sbitmap_free (data
->ready_try_change
);
24461 data
->ready_try_change
= NULL
;
24462 data
->ready_try_change_size
= 0;
24466 /* Prepare for scheduling pass. */
24468 ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED
,
24469 int verbose ATTRIBUTE_UNUSED
,
24470 int max_uid ATTRIBUTE_UNUSED
)
24472 /* Install scheduling hooks for current CPU. Some of these hooks are used
24473 in time-critical parts of the scheduler, so we only set them up when
24474 they are actually used. */
24477 case PROCESSOR_CORE2_32
:
24478 case PROCESSOR_CORE2_64
:
24479 case PROCESSOR_COREI7_32
:
24480 case PROCESSOR_COREI7_64
:
24481 targetm
.sched
.dfa_post_advance_cycle
24482 = core2i7_dfa_post_advance_cycle
;
24483 targetm
.sched
.first_cycle_multipass_init
24484 = core2i7_first_cycle_multipass_init
;
24485 targetm
.sched
.first_cycle_multipass_begin
24486 = core2i7_first_cycle_multipass_begin
;
24487 targetm
.sched
.first_cycle_multipass_issue
24488 = core2i7_first_cycle_multipass_issue
;
24489 targetm
.sched
.first_cycle_multipass_backtrack
24490 = core2i7_first_cycle_multipass_backtrack
;
24491 targetm
.sched
.first_cycle_multipass_end
24492 = core2i7_first_cycle_multipass_end
;
24493 targetm
.sched
.first_cycle_multipass_fini
24494 = core2i7_first_cycle_multipass_fini
;
24496 /* Set decoder parameters. */
24497 core2i7_secondary_decoder_max_insn_size
= 8;
24498 core2i7_ifetch_block_size
= 16;
24499 core2i7_ifetch_block_max_insns
= 6;
24503 targetm
.sched
.dfa_post_advance_cycle
= NULL
;
24504 targetm
.sched
.first_cycle_multipass_init
= NULL
;
24505 targetm
.sched
.first_cycle_multipass_begin
= NULL
;
24506 targetm
.sched
.first_cycle_multipass_issue
= NULL
;
24507 targetm
.sched
.first_cycle_multipass_backtrack
= NULL
;
24508 targetm
.sched
.first_cycle_multipass_end
= NULL
;
24509 targetm
.sched
.first_cycle_multipass_fini
= NULL
;
24515 /* Compute the alignment given to a constant that is being placed in memory.
24516 EXP is the constant and ALIGN is the alignment that the object would
24518 The value of this function is used instead of that alignment to align
24522 ix86_constant_alignment (tree exp
, int align
)
24524 if (TREE_CODE (exp
) == REAL_CST
|| TREE_CODE (exp
) == VECTOR_CST
24525 || TREE_CODE (exp
) == INTEGER_CST
)
24527 if (TYPE_MODE (TREE_TYPE (exp
)) == DFmode
&& align
< 64)
24529 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp
))) && align
< 128)
24532 else if (!optimize_size
&& TREE_CODE (exp
) == STRING_CST
24533 && TREE_STRING_LENGTH (exp
) >= 31 && align
< BITS_PER_WORD
)
24534 return BITS_PER_WORD
;
24539 /* Compute the alignment for a static variable.
24540 TYPE is the data type, and ALIGN is the alignment that
24541 the object would ordinarily have. The value of this function is used
24542 instead of that alignment to align the object. */
24545 ix86_data_alignment (tree type
, int align
)
24547 int max_align
= optimize_size
? BITS_PER_WORD
: MIN (256, MAX_OFILE_ALIGNMENT
);
24549 if (AGGREGATE_TYPE_P (type
)
24550 && TYPE_SIZE (type
)
24551 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
24552 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= (unsigned) max_align
24553 || TREE_INT_CST_HIGH (TYPE_SIZE (type
)))
24554 && align
< max_align
)
24557 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
24558 to 16byte boundary. */
24561 if (AGGREGATE_TYPE_P (type
)
24562 && TYPE_SIZE (type
)
24563 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
24564 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 128
24565 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
24569 if (TREE_CODE (type
) == ARRAY_TYPE
)
24571 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
24573 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
24576 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
24579 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
24581 if ((TYPE_MODE (type
) == XCmode
24582 || TYPE_MODE (type
) == TCmode
) && align
< 128)
24585 else if ((TREE_CODE (type
) == RECORD_TYPE
24586 || TREE_CODE (type
) == UNION_TYPE
24587 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
24588 && TYPE_FIELDS (type
))
24590 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
24592 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
24595 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
24596 || TREE_CODE (type
) == INTEGER_TYPE
)
24598 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
24600 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
24607 /* Compute the alignment for a local variable or a stack slot. EXP is
24608 the data type or decl itself, MODE is the widest mode available and
24609 ALIGN is the alignment that the object would ordinarily have. The
24610 value of this macro is used instead of that alignment to align the
24614 ix86_local_alignment (tree exp
, enum machine_mode mode
,
24615 unsigned int align
)
24619 if (exp
&& DECL_P (exp
))
24621 type
= TREE_TYPE (exp
);
24630 /* Don't do dynamic stack realignment for long long objects with
24631 -mpreferred-stack-boundary=2. */
24634 && ix86_preferred_stack_boundary
< 64
24635 && (mode
== DImode
|| (type
&& TYPE_MODE (type
) == DImode
))
24636 && (!type
|| !TYPE_USER_ALIGN (type
))
24637 && (!decl
|| !DECL_USER_ALIGN (decl
)))
24640 /* If TYPE is NULL, we are allocating a stack slot for caller-save
24641 register in MODE. We will return the largest alignment of XF
24645 if (mode
== XFmode
&& align
< GET_MODE_ALIGNMENT (DFmode
))
24646 align
= GET_MODE_ALIGNMENT (DFmode
);
24650 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
24651 to 16byte boundary. Exact wording is:
24653 An array uses the same alignment as its elements, except that a local or
24654 global array variable of length at least 16 bytes or
24655 a C99 variable-length array variable always has alignment of at least 16 bytes.
24657 This was added to allow use of aligned SSE instructions at arrays. This
24658 rule is meant for static storage (where compiler can not do the analysis
24659 by itself). We follow it for automatic variables only when convenient.
24660 We fully control everything in the function compiled and functions from
24661 other unit can not rely on the alignment.
24663 Exclude va_list type. It is the common case of local array where
24664 we can not benefit from the alignment. */
24665 if (TARGET_64BIT
&& optimize_function_for_speed_p (cfun
)
24668 if (AGGREGATE_TYPE_P (type
)
24669 && (va_list_type_node
== NULL_TREE
24670 || (TYPE_MAIN_VARIANT (type
)
24671 != TYPE_MAIN_VARIANT (va_list_type_node
)))
24672 && TYPE_SIZE (type
)
24673 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
24674 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 16
24675 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
24678 if (TREE_CODE (type
) == ARRAY_TYPE
)
24680 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
24682 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
24685 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
24687 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
24689 if ((TYPE_MODE (type
) == XCmode
24690 || TYPE_MODE (type
) == TCmode
) && align
< 128)
24693 else if ((TREE_CODE (type
) == RECORD_TYPE
24694 || TREE_CODE (type
) == UNION_TYPE
24695 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
24696 && TYPE_FIELDS (type
))
24698 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
24700 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
24703 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
24704 || TREE_CODE (type
) == INTEGER_TYPE
)
24707 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
24709 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
24715 /* Compute the minimum required alignment for dynamic stack realignment
24716 purposes for a local variable, parameter or a stack slot. EXP is
24717 the data type or decl itself, MODE is its mode and ALIGN is the
24718 alignment that the object would ordinarily have. */
24721 ix86_minimum_alignment (tree exp
, enum machine_mode mode
,
24722 unsigned int align
)
24726 if (exp
&& DECL_P (exp
))
24728 type
= TREE_TYPE (exp
);
24737 if (TARGET_64BIT
|| align
!= 64 || ix86_preferred_stack_boundary
>= 64)
24740 /* Don't do dynamic stack realignment for long long objects with
24741 -mpreferred-stack-boundary=2. */
24742 if ((mode
== DImode
|| (type
&& TYPE_MODE (type
) == DImode
))
24743 && (!type
|| !TYPE_USER_ALIGN (type
))
24744 && (!decl
|| !DECL_USER_ALIGN (decl
)))
24750 /* Find a location for the static chain incoming to a nested function.
24751 This is a register, unless all free registers are used by arguments. */
24754 ix86_static_chain (const_tree fndecl
, bool incoming_p
)
24758 if (!DECL_STATIC_CHAIN (fndecl
))
24763 /* We always use R10 in 64-bit mode. */
24771 /* By default in 32-bit mode we use ECX to pass the static chain. */
24774 fntype
= TREE_TYPE (fndecl
);
24775 ccvt
= ix86_get_callcvt (fntype
);
24776 if ((ccvt
& (IX86_CALLCVT_FASTCALL
| IX86_CALLCVT_THISCALL
)) != 0)
24778 /* Fastcall functions use ecx/edx for arguments, which leaves
24779 us with EAX for the static chain.
24780 Thiscall functions use ecx for arguments, which also
24781 leaves us with EAX for the static chain. */
24784 else if (ix86_function_regparm (fntype
, fndecl
) == 3)
24786 /* For regparm 3, we have no free call-clobbered registers in
24787 which to store the static chain. In order to implement this,
24788 we have the trampoline push the static chain to the stack.
24789 However, we can't push a value below the return address when
24790 we call the nested function directly, so we have to use an
24791 alternate entry point. For this we use ESI, and have the
24792 alternate entry point push ESI, so that things appear the
24793 same once we're executing the nested function. */
24796 if (fndecl
== current_function_decl
)
24797 ix86_static_chain_on_stack
= true;
24798 return gen_frame_mem (SImode
,
24799 plus_constant (arg_pointer_rtx
, -8));
24805 return gen_rtx_REG (Pmode
, regno
);
24808 /* Emit RTL insns to initialize the variable parts of a trampoline.
24809 FNDECL is the decl of the target address; M_TRAMP is a MEM for
24810 the trampoline, and CHAIN_VALUE is an RTX for the static chain
24811 to be passed to the target function. */
24814 ix86_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
24820 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
24826 /* Load the function address to r11. Try to load address using
24827 the shorter movl instead of movabs. We may want to support
24828 movq for kernel mode, but kernel does not use trampolines at
24830 if (x86_64_zext_immediate_operand (fnaddr
, VOIDmode
))
24832 fnaddr
= copy_to_mode_reg (DImode
, fnaddr
);
24834 mem
= adjust_address (m_tramp
, HImode
, offset
);
24835 emit_move_insn (mem
, gen_int_mode (0xbb41, HImode
));
24837 mem
= adjust_address (m_tramp
, SImode
, offset
+ 2);
24838 emit_move_insn (mem
, gen_lowpart (SImode
, fnaddr
));
24843 mem
= adjust_address (m_tramp
, HImode
, offset
);
24844 emit_move_insn (mem
, gen_int_mode (0xbb49, HImode
));
24846 mem
= adjust_address (m_tramp
, DImode
, offset
+ 2);
24847 emit_move_insn (mem
, fnaddr
);
24851 /* Load static chain using movabs to r10. Use the
24852 shorter movl instead of movabs for x32. */
24864 mem
= adjust_address (m_tramp
, HImode
, offset
);
24865 emit_move_insn (mem
, gen_int_mode (opcode
, HImode
));
24867 mem
= adjust_address (m_tramp
, ptr_mode
, offset
+ 2);
24868 emit_move_insn (mem
, chain_value
);
24871 /* Jump to r11; the last (unused) byte is a nop, only there to
24872 pad the write out to a single 32-bit store. */
24873 mem
= adjust_address (m_tramp
, SImode
, offset
);
24874 emit_move_insn (mem
, gen_int_mode (0x90e3ff49, SImode
));
24881 /* Depending on the static chain location, either load a register
24882 with a constant, or push the constant to the stack. All of the
24883 instructions are the same size. */
24884 chain
= ix86_static_chain (fndecl
, true);
24887 switch (REGNO (chain
))
24890 opcode
= 0xb8; break;
24892 opcode
= 0xb9; break;
24894 gcc_unreachable ();
24900 mem
= adjust_address (m_tramp
, QImode
, offset
);
24901 emit_move_insn (mem
, gen_int_mode (opcode
, QImode
));
24903 mem
= adjust_address (m_tramp
, SImode
, offset
+ 1);
24904 emit_move_insn (mem
, chain_value
);
24907 mem
= adjust_address (m_tramp
, QImode
, offset
);
24908 emit_move_insn (mem
, gen_int_mode (0xe9, QImode
));
24910 mem
= adjust_address (m_tramp
, SImode
, offset
+ 1);
24912 /* Compute offset from the end of the jmp to the target function.
24913 In the case in which the trampoline stores the static chain on
24914 the stack, we need to skip the first insn which pushes the
24915 (call-saved) register static chain; this push is 1 byte. */
24917 disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
24918 plus_constant (XEXP (m_tramp
, 0),
24919 offset
- (MEM_P (chain
) ? 1 : 0)),
24920 NULL_RTX
, 1, OPTAB_DIRECT
);
24921 emit_move_insn (mem
, disp
);
24924 gcc_assert (offset
<= TRAMPOLINE_SIZE
);
24926 #ifdef HAVE_ENABLE_EXECUTE_STACK
24927 #ifdef CHECK_EXECUTE_STACK_ENABLED
24928 if (CHECK_EXECUTE_STACK_ENABLED
)
24930 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__enable_execute_stack"),
24931 LCT_NORMAL
, VOIDmode
, 1, XEXP (m_tramp
, 0), Pmode
);
24935 /* The following file contains several enumerations and data structures
24936 built from the definitions in i386-builtin-types.def. */
24938 #include "i386-builtin-types.inc"
24940 /* Table for the ix86 builtin non-function types. */
24941 static GTY(()) tree ix86_builtin_type_tab
[(int) IX86_BT_LAST_CPTR
+ 1];
24943 /* Retrieve an element from the above table, building some of
24944 the types lazily. */
24947 ix86_get_builtin_type (enum ix86_builtin_type tcode
)
24949 unsigned int index
;
24952 gcc_assert ((unsigned)tcode
< ARRAY_SIZE(ix86_builtin_type_tab
));
24954 type
= ix86_builtin_type_tab
[(int) tcode
];
24958 gcc_assert (tcode
> IX86_BT_LAST_PRIM
);
24959 if (tcode
<= IX86_BT_LAST_VECT
)
24961 enum machine_mode mode
;
24963 index
= tcode
- IX86_BT_LAST_PRIM
- 1;
24964 itype
= ix86_get_builtin_type (ix86_builtin_type_vect_base
[index
]);
24965 mode
= ix86_builtin_type_vect_mode
[index
];
24967 type
= build_vector_type_for_mode (itype
, mode
);
24973 index
= tcode
- IX86_BT_LAST_VECT
- 1;
24974 if (tcode
<= IX86_BT_LAST_PTR
)
24975 quals
= TYPE_UNQUALIFIED
;
24977 quals
= TYPE_QUAL_CONST
;
24979 itype
= ix86_get_builtin_type (ix86_builtin_type_ptr_base
[index
]);
24980 if (quals
!= TYPE_UNQUALIFIED
)
24981 itype
= build_qualified_type (itype
, quals
);
24983 type
= build_pointer_type (itype
);
24986 ix86_builtin_type_tab
[(int) tcode
] = type
;
24990 /* Table for the ix86 builtin function types. */
24991 static GTY(()) tree ix86_builtin_func_type_tab
[(int) IX86_BT_LAST_ALIAS
+ 1];
24993 /* Retrieve an element from the above table, building some of
24994 the types lazily. */
24997 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode
)
25001 gcc_assert ((unsigned)tcode
< ARRAY_SIZE (ix86_builtin_func_type_tab
));
25003 type
= ix86_builtin_func_type_tab
[(int) tcode
];
25007 if (tcode
<= IX86_BT_LAST_FUNC
)
25009 unsigned start
= ix86_builtin_func_start
[(int) tcode
];
25010 unsigned after
= ix86_builtin_func_start
[(int) tcode
+ 1];
25011 tree rtype
, atype
, args
= void_list_node
;
25014 rtype
= ix86_get_builtin_type (ix86_builtin_func_args
[start
]);
25015 for (i
= after
- 1; i
> start
; --i
)
25017 atype
= ix86_get_builtin_type (ix86_builtin_func_args
[i
]);
25018 args
= tree_cons (NULL
, atype
, args
);
25021 type
= build_function_type (rtype
, args
);
25025 unsigned index
= tcode
- IX86_BT_LAST_FUNC
- 1;
25026 enum ix86_builtin_func_type icode
;
25028 icode
= ix86_builtin_func_alias_base
[index
];
25029 type
= ix86_get_builtin_func_type (icode
);
25032 ix86_builtin_func_type_tab
[(int) tcode
] = type
;
25037 /* Codes for all the SSE/MMX builtins. */
25040 IX86_BUILTIN_ADDPS
,
25041 IX86_BUILTIN_ADDSS
,
25042 IX86_BUILTIN_DIVPS
,
25043 IX86_BUILTIN_DIVSS
,
25044 IX86_BUILTIN_MULPS
,
25045 IX86_BUILTIN_MULSS
,
25046 IX86_BUILTIN_SUBPS
,
25047 IX86_BUILTIN_SUBSS
,
25049 IX86_BUILTIN_CMPEQPS
,
25050 IX86_BUILTIN_CMPLTPS
,
25051 IX86_BUILTIN_CMPLEPS
,
25052 IX86_BUILTIN_CMPGTPS
,
25053 IX86_BUILTIN_CMPGEPS
,
25054 IX86_BUILTIN_CMPNEQPS
,
25055 IX86_BUILTIN_CMPNLTPS
,
25056 IX86_BUILTIN_CMPNLEPS
,
25057 IX86_BUILTIN_CMPNGTPS
,
25058 IX86_BUILTIN_CMPNGEPS
,
25059 IX86_BUILTIN_CMPORDPS
,
25060 IX86_BUILTIN_CMPUNORDPS
,
25061 IX86_BUILTIN_CMPEQSS
,
25062 IX86_BUILTIN_CMPLTSS
,
25063 IX86_BUILTIN_CMPLESS
,
25064 IX86_BUILTIN_CMPNEQSS
,
25065 IX86_BUILTIN_CMPNLTSS
,
25066 IX86_BUILTIN_CMPNLESS
,
25067 IX86_BUILTIN_CMPNGTSS
,
25068 IX86_BUILTIN_CMPNGESS
,
25069 IX86_BUILTIN_CMPORDSS
,
25070 IX86_BUILTIN_CMPUNORDSS
,
25072 IX86_BUILTIN_COMIEQSS
,
25073 IX86_BUILTIN_COMILTSS
,
25074 IX86_BUILTIN_COMILESS
,
25075 IX86_BUILTIN_COMIGTSS
,
25076 IX86_BUILTIN_COMIGESS
,
25077 IX86_BUILTIN_COMINEQSS
,
25078 IX86_BUILTIN_UCOMIEQSS
,
25079 IX86_BUILTIN_UCOMILTSS
,
25080 IX86_BUILTIN_UCOMILESS
,
25081 IX86_BUILTIN_UCOMIGTSS
,
25082 IX86_BUILTIN_UCOMIGESS
,
25083 IX86_BUILTIN_UCOMINEQSS
,
25085 IX86_BUILTIN_CVTPI2PS
,
25086 IX86_BUILTIN_CVTPS2PI
,
25087 IX86_BUILTIN_CVTSI2SS
,
25088 IX86_BUILTIN_CVTSI642SS
,
25089 IX86_BUILTIN_CVTSS2SI
,
25090 IX86_BUILTIN_CVTSS2SI64
,
25091 IX86_BUILTIN_CVTTPS2PI
,
25092 IX86_BUILTIN_CVTTSS2SI
,
25093 IX86_BUILTIN_CVTTSS2SI64
,
25095 IX86_BUILTIN_MAXPS
,
25096 IX86_BUILTIN_MAXSS
,
25097 IX86_BUILTIN_MINPS
,
25098 IX86_BUILTIN_MINSS
,
25100 IX86_BUILTIN_LOADUPS
,
25101 IX86_BUILTIN_STOREUPS
,
25102 IX86_BUILTIN_MOVSS
,
25104 IX86_BUILTIN_MOVHLPS
,
25105 IX86_BUILTIN_MOVLHPS
,
25106 IX86_BUILTIN_LOADHPS
,
25107 IX86_BUILTIN_LOADLPS
,
25108 IX86_BUILTIN_STOREHPS
,
25109 IX86_BUILTIN_STORELPS
,
25111 IX86_BUILTIN_MASKMOVQ
,
25112 IX86_BUILTIN_MOVMSKPS
,
25113 IX86_BUILTIN_PMOVMSKB
,
25115 IX86_BUILTIN_MOVNTPS
,
25116 IX86_BUILTIN_MOVNTQ
,
25118 IX86_BUILTIN_LOADDQU
,
25119 IX86_BUILTIN_STOREDQU
,
25121 IX86_BUILTIN_PACKSSWB
,
25122 IX86_BUILTIN_PACKSSDW
,
25123 IX86_BUILTIN_PACKUSWB
,
25125 IX86_BUILTIN_PADDB
,
25126 IX86_BUILTIN_PADDW
,
25127 IX86_BUILTIN_PADDD
,
25128 IX86_BUILTIN_PADDQ
,
25129 IX86_BUILTIN_PADDSB
,
25130 IX86_BUILTIN_PADDSW
,
25131 IX86_BUILTIN_PADDUSB
,
25132 IX86_BUILTIN_PADDUSW
,
25133 IX86_BUILTIN_PSUBB
,
25134 IX86_BUILTIN_PSUBW
,
25135 IX86_BUILTIN_PSUBD
,
25136 IX86_BUILTIN_PSUBQ
,
25137 IX86_BUILTIN_PSUBSB
,
25138 IX86_BUILTIN_PSUBSW
,
25139 IX86_BUILTIN_PSUBUSB
,
25140 IX86_BUILTIN_PSUBUSW
,
25143 IX86_BUILTIN_PANDN
,
25147 IX86_BUILTIN_PAVGB
,
25148 IX86_BUILTIN_PAVGW
,
25150 IX86_BUILTIN_PCMPEQB
,
25151 IX86_BUILTIN_PCMPEQW
,
25152 IX86_BUILTIN_PCMPEQD
,
25153 IX86_BUILTIN_PCMPGTB
,
25154 IX86_BUILTIN_PCMPGTW
,
25155 IX86_BUILTIN_PCMPGTD
,
25157 IX86_BUILTIN_PMADDWD
,
25159 IX86_BUILTIN_PMAXSW
,
25160 IX86_BUILTIN_PMAXUB
,
25161 IX86_BUILTIN_PMINSW
,
25162 IX86_BUILTIN_PMINUB
,
25164 IX86_BUILTIN_PMULHUW
,
25165 IX86_BUILTIN_PMULHW
,
25166 IX86_BUILTIN_PMULLW
,
25168 IX86_BUILTIN_PSADBW
,
25169 IX86_BUILTIN_PSHUFW
,
25171 IX86_BUILTIN_PSLLW
,
25172 IX86_BUILTIN_PSLLD
,
25173 IX86_BUILTIN_PSLLQ
,
25174 IX86_BUILTIN_PSRAW
,
25175 IX86_BUILTIN_PSRAD
,
25176 IX86_BUILTIN_PSRLW
,
25177 IX86_BUILTIN_PSRLD
,
25178 IX86_BUILTIN_PSRLQ
,
25179 IX86_BUILTIN_PSLLWI
,
25180 IX86_BUILTIN_PSLLDI
,
25181 IX86_BUILTIN_PSLLQI
,
25182 IX86_BUILTIN_PSRAWI
,
25183 IX86_BUILTIN_PSRADI
,
25184 IX86_BUILTIN_PSRLWI
,
25185 IX86_BUILTIN_PSRLDI
,
25186 IX86_BUILTIN_PSRLQI
,
25188 IX86_BUILTIN_PUNPCKHBW
,
25189 IX86_BUILTIN_PUNPCKHWD
,
25190 IX86_BUILTIN_PUNPCKHDQ
,
25191 IX86_BUILTIN_PUNPCKLBW
,
25192 IX86_BUILTIN_PUNPCKLWD
,
25193 IX86_BUILTIN_PUNPCKLDQ
,
25195 IX86_BUILTIN_SHUFPS
,
25197 IX86_BUILTIN_RCPPS
,
25198 IX86_BUILTIN_RCPSS
,
25199 IX86_BUILTIN_RSQRTPS
,
25200 IX86_BUILTIN_RSQRTPS_NR
,
25201 IX86_BUILTIN_RSQRTSS
,
25202 IX86_BUILTIN_RSQRTF
,
25203 IX86_BUILTIN_SQRTPS
,
25204 IX86_BUILTIN_SQRTPS_NR
,
25205 IX86_BUILTIN_SQRTSS
,
25207 IX86_BUILTIN_UNPCKHPS
,
25208 IX86_BUILTIN_UNPCKLPS
,
25210 IX86_BUILTIN_ANDPS
,
25211 IX86_BUILTIN_ANDNPS
,
25213 IX86_BUILTIN_XORPS
,
25216 IX86_BUILTIN_LDMXCSR
,
25217 IX86_BUILTIN_STMXCSR
,
25218 IX86_BUILTIN_SFENCE
,
25220 /* 3DNow! Original */
25221 IX86_BUILTIN_FEMMS
,
25222 IX86_BUILTIN_PAVGUSB
,
25223 IX86_BUILTIN_PF2ID
,
25224 IX86_BUILTIN_PFACC
,
25225 IX86_BUILTIN_PFADD
,
25226 IX86_BUILTIN_PFCMPEQ
,
25227 IX86_BUILTIN_PFCMPGE
,
25228 IX86_BUILTIN_PFCMPGT
,
25229 IX86_BUILTIN_PFMAX
,
25230 IX86_BUILTIN_PFMIN
,
25231 IX86_BUILTIN_PFMUL
,
25232 IX86_BUILTIN_PFRCP
,
25233 IX86_BUILTIN_PFRCPIT1
,
25234 IX86_BUILTIN_PFRCPIT2
,
25235 IX86_BUILTIN_PFRSQIT1
,
25236 IX86_BUILTIN_PFRSQRT
,
25237 IX86_BUILTIN_PFSUB
,
25238 IX86_BUILTIN_PFSUBR
,
25239 IX86_BUILTIN_PI2FD
,
25240 IX86_BUILTIN_PMULHRW
,
25242 /* 3DNow! Athlon Extensions */
25243 IX86_BUILTIN_PF2IW
,
25244 IX86_BUILTIN_PFNACC
,
25245 IX86_BUILTIN_PFPNACC
,
25246 IX86_BUILTIN_PI2FW
,
25247 IX86_BUILTIN_PSWAPDSI
,
25248 IX86_BUILTIN_PSWAPDSF
,
25251 IX86_BUILTIN_ADDPD
,
25252 IX86_BUILTIN_ADDSD
,
25253 IX86_BUILTIN_DIVPD
,
25254 IX86_BUILTIN_DIVSD
,
25255 IX86_BUILTIN_MULPD
,
25256 IX86_BUILTIN_MULSD
,
25257 IX86_BUILTIN_SUBPD
,
25258 IX86_BUILTIN_SUBSD
,
25260 IX86_BUILTIN_CMPEQPD
,
25261 IX86_BUILTIN_CMPLTPD
,
25262 IX86_BUILTIN_CMPLEPD
,
25263 IX86_BUILTIN_CMPGTPD
,
25264 IX86_BUILTIN_CMPGEPD
,
25265 IX86_BUILTIN_CMPNEQPD
,
25266 IX86_BUILTIN_CMPNLTPD
,
25267 IX86_BUILTIN_CMPNLEPD
,
25268 IX86_BUILTIN_CMPNGTPD
,
25269 IX86_BUILTIN_CMPNGEPD
,
25270 IX86_BUILTIN_CMPORDPD
,
25271 IX86_BUILTIN_CMPUNORDPD
,
25272 IX86_BUILTIN_CMPEQSD
,
25273 IX86_BUILTIN_CMPLTSD
,
25274 IX86_BUILTIN_CMPLESD
,
25275 IX86_BUILTIN_CMPNEQSD
,
25276 IX86_BUILTIN_CMPNLTSD
,
25277 IX86_BUILTIN_CMPNLESD
,
25278 IX86_BUILTIN_CMPORDSD
,
25279 IX86_BUILTIN_CMPUNORDSD
,
25281 IX86_BUILTIN_COMIEQSD
,
25282 IX86_BUILTIN_COMILTSD
,
25283 IX86_BUILTIN_COMILESD
,
25284 IX86_BUILTIN_COMIGTSD
,
25285 IX86_BUILTIN_COMIGESD
,
25286 IX86_BUILTIN_COMINEQSD
,
25287 IX86_BUILTIN_UCOMIEQSD
,
25288 IX86_BUILTIN_UCOMILTSD
,
25289 IX86_BUILTIN_UCOMILESD
,
25290 IX86_BUILTIN_UCOMIGTSD
,
25291 IX86_BUILTIN_UCOMIGESD
,
25292 IX86_BUILTIN_UCOMINEQSD
,
25294 IX86_BUILTIN_MAXPD
,
25295 IX86_BUILTIN_MAXSD
,
25296 IX86_BUILTIN_MINPD
,
25297 IX86_BUILTIN_MINSD
,
25299 IX86_BUILTIN_ANDPD
,
25300 IX86_BUILTIN_ANDNPD
,
25302 IX86_BUILTIN_XORPD
,
25304 IX86_BUILTIN_SQRTPD
,
25305 IX86_BUILTIN_SQRTSD
,
25307 IX86_BUILTIN_UNPCKHPD
,
25308 IX86_BUILTIN_UNPCKLPD
,
25310 IX86_BUILTIN_SHUFPD
,
25312 IX86_BUILTIN_LOADUPD
,
25313 IX86_BUILTIN_STOREUPD
,
25314 IX86_BUILTIN_MOVSD
,
25316 IX86_BUILTIN_LOADHPD
,
25317 IX86_BUILTIN_LOADLPD
,
25319 IX86_BUILTIN_CVTDQ2PD
,
25320 IX86_BUILTIN_CVTDQ2PS
,
25322 IX86_BUILTIN_CVTPD2DQ
,
25323 IX86_BUILTIN_CVTPD2PI
,
25324 IX86_BUILTIN_CVTPD2PS
,
25325 IX86_BUILTIN_CVTTPD2DQ
,
25326 IX86_BUILTIN_CVTTPD2PI
,
25328 IX86_BUILTIN_CVTPI2PD
,
25329 IX86_BUILTIN_CVTSI2SD
,
25330 IX86_BUILTIN_CVTSI642SD
,
25332 IX86_BUILTIN_CVTSD2SI
,
25333 IX86_BUILTIN_CVTSD2SI64
,
25334 IX86_BUILTIN_CVTSD2SS
,
25335 IX86_BUILTIN_CVTSS2SD
,
25336 IX86_BUILTIN_CVTTSD2SI
,
25337 IX86_BUILTIN_CVTTSD2SI64
,
25339 IX86_BUILTIN_CVTPS2DQ
,
25340 IX86_BUILTIN_CVTPS2PD
,
25341 IX86_BUILTIN_CVTTPS2DQ
,
25343 IX86_BUILTIN_MOVNTI
,
25344 IX86_BUILTIN_MOVNTI64
,
25345 IX86_BUILTIN_MOVNTPD
,
25346 IX86_BUILTIN_MOVNTDQ
,
25348 IX86_BUILTIN_MOVQ128
,
25351 IX86_BUILTIN_MASKMOVDQU
,
25352 IX86_BUILTIN_MOVMSKPD
,
25353 IX86_BUILTIN_PMOVMSKB128
,
25355 IX86_BUILTIN_PACKSSWB128
,
25356 IX86_BUILTIN_PACKSSDW128
,
25357 IX86_BUILTIN_PACKUSWB128
,
25359 IX86_BUILTIN_PADDB128
,
25360 IX86_BUILTIN_PADDW128
,
25361 IX86_BUILTIN_PADDD128
,
25362 IX86_BUILTIN_PADDQ128
,
25363 IX86_BUILTIN_PADDSB128
,
25364 IX86_BUILTIN_PADDSW128
,
25365 IX86_BUILTIN_PADDUSB128
,
25366 IX86_BUILTIN_PADDUSW128
,
25367 IX86_BUILTIN_PSUBB128
,
25368 IX86_BUILTIN_PSUBW128
,
25369 IX86_BUILTIN_PSUBD128
,
25370 IX86_BUILTIN_PSUBQ128
,
25371 IX86_BUILTIN_PSUBSB128
,
25372 IX86_BUILTIN_PSUBSW128
,
25373 IX86_BUILTIN_PSUBUSB128
,
25374 IX86_BUILTIN_PSUBUSW128
,
25376 IX86_BUILTIN_PAND128
,
25377 IX86_BUILTIN_PANDN128
,
25378 IX86_BUILTIN_POR128
,
25379 IX86_BUILTIN_PXOR128
,
25381 IX86_BUILTIN_PAVGB128
,
25382 IX86_BUILTIN_PAVGW128
,
25384 IX86_BUILTIN_PCMPEQB128
,
25385 IX86_BUILTIN_PCMPEQW128
,
25386 IX86_BUILTIN_PCMPEQD128
,
25387 IX86_BUILTIN_PCMPGTB128
,
25388 IX86_BUILTIN_PCMPGTW128
,
25389 IX86_BUILTIN_PCMPGTD128
,
25391 IX86_BUILTIN_PMADDWD128
,
25393 IX86_BUILTIN_PMAXSW128
,
25394 IX86_BUILTIN_PMAXUB128
,
25395 IX86_BUILTIN_PMINSW128
,
25396 IX86_BUILTIN_PMINUB128
,
25398 IX86_BUILTIN_PMULUDQ
,
25399 IX86_BUILTIN_PMULUDQ128
,
25400 IX86_BUILTIN_PMULHUW128
,
25401 IX86_BUILTIN_PMULHW128
,
25402 IX86_BUILTIN_PMULLW128
,
25404 IX86_BUILTIN_PSADBW128
,
25405 IX86_BUILTIN_PSHUFHW
,
25406 IX86_BUILTIN_PSHUFLW
,
25407 IX86_BUILTIN_PSHUFD
,
25409 IX86_BUILTIN_PSLLDQI128
,
25410 IX86_BUILTIN_PSLLWI128
,
25411 IX86_BUILTIN_PSLLDI128
,
25412 IX86_BUILTIN_PSLLQI128
,
25413 IX86_BUILTIN_PSRAWI128
,
25414 IX86_BUILTIN_PSRADI128
,
25415 IX86_BUILTIN_PSRLDQI128
,
25416 IX86_BUILTIN_PSRLWI128
,
25417 IX86_BUILTIN_PSRLDI128
,
25418 IX86_BUILTIN_PSRLQI128
,
25420 IX86_BUILTIN_PSLLDQ128
,
25421 IX86_BUILTIN_PSLLW128
,
25422 IX86_BUILTIN_PSLLD128
,
25423 IX86_BUILTIN_PSLLQ128
,
25424 IX86_BUILTIN_PSRAW128
,
25425 IX86_BUILTIN_PSRAD128
,
25426 IX86_BUILTIN_PSRLW128
,
25427 IX86_BUILTIN_PSRLD128
,
25428 IX86_BUILTIN_PSRLQ128
,
25430 IX86_BUILTIN_PUNPCKHBW128
,
25431 IX86_BUILTIN_PUNPCKHWD128
,
25432 IX86_BUILTIN_PUNPCKHDQ128
,
25433 IX86_BUILTIN_PUNPCKHQDQ128
,
25434 IX86_BUILTIN_PUNPCKLBW128
,
25435 IX86_BUILTIN_PUNPCKLWD128
,
25436 IX86_BUILTIN_PUNPCKLDQ128
,
25437 IX86_BUILTIN_PUNPCKLQDQ128
,
25439 IX86_BUILTIN_CLFLUSH
,
25440 IX86_BUILTIN_MFENCE
,
25441 IX86_BUILTIN_LFENCE
,
25442 IX86_BUILTIN_PAUSE
,
25444 IX86_BUILTIN_BSRSI
,
25445 IX86_BUILTIN_BSRDI
,
25446 IX86_BUILTIN_RDPMC
,
25447 IX86_BUILTIN_RDTSC
,
25448 IX86_BUILTIN_RDTSCP
,
25449 IX86_BUILTIN_ROLQI
,
25450 IX86_BUILTIN_ROLHI
,
25451 IX86_BUILTIN_RORQI
,
25452 IX86_BUILTIN_RORHI
,
25455 IX86_BUILTIN_ADDSUBPS
,
25456 IX86_BUILTIN_HADDPS
,
25457 IX86_BUILTIN_HSUBPS
,
25458 IX86_BUILTIN_MOVSHDUP
,
25459 IX86_BUILTIN_MOVSLDUP
,
25460 IX86_BUILTIN_ADDSUBPD
,
25461 IX86_BUILTIN_HADDPD
,
25462 IX86_BUILTIN_HSUBPD
,
25463 IX86_BUILTIN_LDDQU
,
25465 IX86_BUILTIN_MONITOR
,
25466 IX86_BUILTIN_MWAIT
,
25469 IX86_BUILTIN_PHADDW
,
25470 IX86_BUILTIN_PHADDD
,
25471 IX86_BUILTIN_PHADDSW
,
25472 IX86_BUILTIN_PHSUBW
,
25473 IX86_BUILTIN_PHSUBD
,
25474 IX86_BUILTIN_PHSUBSW
,
25475 IX86_BUILTIN_PMADDUBSW
,
25476 IX86_BUILTIN_PMULHRSW
,
25477 IX86_BUILTIN_PSHUFB
,
25478 IX86_BUILTIN_PSIGNB
,
25479 IX86_BUILTIN_PSIGNW
,
25480 IX86_BUILTIN_PSIGND
,
25481 IX86_BUILTIN_PALIGNR
,
25482 IX86_BUILTIN_PABSB
,
25483 IX86_BUILTIN_PABSW
,
25484 IX86_BUILTIN_PABSD
,
25486 IX86_BUILTIN_PHADDW128
,
25487 IX86_BUILTIN_PHADDD128
,
25488 IX86_BUILTIN_PHADDSW128
,
25489 IX86_BUILTIN_PHSUBW128
,
25490 IX86_BUILTIN_PHSUBD128
,
25491 IX86_BUILTIN_PHSUBSW128
,
25492 IX86_BUILTIN_PMADDUBSW128
,
25493 IX86_BUILTIN_PMULHRSW128
,
25494 IX86_BUILTIN_PSHUFB128
,
25495 IX86_BUILTIN_PSIGNB128
,
25496 IX86_BUILTIN_PSIGNW128
,
25497 IX86_BUILTIN_PSIGND128
,
25498 IX86_BUILTIN_PALIGNR128
,
25499 IX86_BUILTIN_PABSB128
,
25500 IX86_BUILTIN_PABSW128
,
25501 IX86_BUILTIN_PABSD128
,
25503 /* AMDFAM10 - SSE4A New Instructions. */
25504 IX86_BUILTIN_MOVNTSD
,
25505 IX86_BUILTIN_MOVNTSS
,
25506 IX86_BUILTIN_EXTRQI
,
25507 IX86_BUILTIN_EXTRQ
,
25508 IX86_BUILTIN_INSERTQI
,
25509 IX86_BUILTIN_INSERTQ
,
25512 IX86_BUILTIN_BLENDPD
,
25513 IX86_BUILTIN_BLENDPS
,
25514 IX86_BUILTIN_BLENDVPD
,
25515 IX86_BUILTIN_BLENDVPS
,
25516 IX86_BUILTIN_PBLENDVB128
,
25517 IX86_BUILTIN_PBLENDW128
,
25522 IX86_BUILTIN_INSERTPS128
,
25524 IX86_BUILTIN_MOVNTDQA
,
25525 IX86_BUILTIN_MPSADBW128
,
25526 IX86_BUILTIN_PACKUSDW128
,
25527 IX86_BUILTIN_PCMPEQQ
,
25528 IX86_BUILTIN_PHMINPOSUW128
,
25530 IX86_BUILTIN_PMAXSB128
,
25531 IX86_BUILTIN_PMAXSD128
,
25532 IX86_BUILTIN_PMAXUD128
,
25533 IX86_BUILTIN_PMAXUW128
,
25535 IX86_BUILTIN_PMINSB128
,
25536 IX86_BUILTIN_PMINSD128
,
25537 IX86_BUILTIN_PMINUD128
,
25538 IX86_BUILTIN_PMINUW128
,
25540 IX86_BUILTIN_PMOVSXBW128
,
25541 IX86_BUILTIN_PMOVSXBD128
,
25542 IX86_BUILTIN_PMOVSXBQ128
,
25543 IX86_BUILTIN_PMOVSXWD128
,
25544 IX86_BUILTIN_PMOVSXWQ128
,
25545 IX86_BUILTIN_PMOVSXDQ128
,
25547 IX86_BUILTIN_PMOVZXBW128
,
25548 IX86_BUILTIN_PMOVZXBD128
,
25549 IX86_BUILTIN_PMOVZXBQ128
,
25550 IX86_BUILTIN_PMOVZXWD128
,
25551 IX86_BUILTIN_PMOVZXWQ128
,
25552 IX86_BUILTIN_PMOVZXDQ128
,
25554 IX86_BUILTIN_PMULDQ128
,
25555 IX86_BUILTIN_PMULLD128
,
25557 IX86_BUILTIN_ROUNDSD
,
25558 IX86_BUILTIN_ROUNDSS
,
25560 IX86_BUILTIN_ROUNDPD
,
25561 IX86_BUILTIN_ROUNDPS
,
25563 IX86_BUILTIN_FLOORPD
,
25564 IX86_BUILTIN_CEILPD
,
25565 IX86_BUILTIN_TRUNCPD
,
25566 IX86_BUILTIN_RINTPD
,
25567 IX86_BUILTIN_ROUNDPD_AZ
,
25569 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX
,
25570 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX
,
25571 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX
,
25573 IX86_BUILTIN_FLOORPS
,
25574 IX86_BUILTIN_CEILPS
,
25575 IX86_BUILTIN_TRUNCPS
,
25576 IX86_BUILTIN_RINTPS
,
25577 IX86_BUILTIN_ROUNDPS_AZ
,
25579 IX86_BUILTIN_FLOORPS_SFIX
,
25580 IX86_BUILTIN_CEILPS_SFIX
,
25581 IX86_BUILTIN_ROUNDPS_AZ_SFIX
,
25583 IX86_BUILTIN_PTESTZ
,
25584 IX86_BUILTIN_PTESTC
,
25585 IX86_BUILTIN_PTESTNZC
,
25587 IX86_BUILTIN_VEC_INIT_V2SI
,
25588 IX86_BUILTIN_VEC_INIT_V4HI
,
25589 IX86_BUILTIN_VEC_INIT_V8QI
,
25590 IX86_BUILTIN_VEC_EXT_V2DF
,
25591 IX86_BUILTIN_VEC_EXT_V2DI
,
25592 IX86_BUILTIN_VEC_EXT_V4SF
,
25593 IX86_BUILTIN_VEC_EXT_V4SI
,
25594 IX86_BUILTIN_VEC_EXT_V8HI
,
25595 IX86_BUILTIN_VEC_EXT_V2SI
,
25596 IX86_BUILTIN_VEC_EXT_V4HI
,
25597 IX86_BUILTIN_VEC_EXT_V16QI
,
25598 IX86_BUILTIN_VEC_SET_V2DI
,
25599 IX86_BUILTIN_VEC_SET_V4SF
,
25600 IX86_BUILTIN_VEC_SET_V4SI
,
25601 IX86_BUILTIN_VEC_SET_V8HI
,
25602 IX86_BUILTIN_VEC_SET_V4HI
,
25603 IX86_BUILTIN_VEC_SET_V16QI
,
25605 IX86_BUILTIN_VEC_PACK_SFIX
,
25606 IX86_BUILTIN_VEC_PACK_SFIX256
,
25609 IX86_BUILTIN_CRC32QI
,
25610 IX86_BUILTIN_CRC32HI
,
25611 IX86_BUILTIN_CRC32SI
,
25612 IX86_BUILTIN_CRC32DI
,
25614 IX86_BUILTIN_PCMPESTRI128
,
25615 IX86_BUILTIN_PCMPESTRM128
,
25616 IX86_BUILTIN_PCMPESTRA128
,
25617 IX86_BUILTIN_PCMPESTRC128
,
25618 IX86_BUILTIN_PCMPESTRO128
,
25619 IX86_BUILTIN_PCMPESTRS128
,
25620 IX86_BUILTIN_PCMPESTRZ128
,
25621 IX86_BUILTIN_PCMPISTRI128
,
25622 IX86_BUILTIN_PCMPISTRM128
,
25623 IX86_BUILTIN_PCMPISTRA128
,
25624 IX86_BUILTIN_PCMPISTRC128
,
25625 IX86_BUILTIN_PCMPISTRO128
,
25626 IX86_BUILTIN_PCMPISTRS128
,
25627 IX86_BUILTIN_PCMPISTRZ128
,
25629 IX86_BUILTIN_PCMPGTQ
,
25631 /* AES instructions */
25632 IX86_BUILTIN_AESENC128
,
25633 IX86_BUILTIN_AESENCLAST128
,
25634 IX86_BUILTIN_AESDEC128
,
25635 IX86_BUILTIN_AESDECLAST128
,
25636 IX86_BUILTIN_AESIMC128
,
25637 IX86_BUILTIN_AESKEYGENASSIST128
,
25639 /* PCLMUL instruction */
25640 IX86_BUILTIN_PCLMULQDQ128
,
25643 IX86_BUILTIN_ADDPD256
,
25644 IX86_BUILTIN_ADDPS256
,
25645 IX86_BUILTIN_ADDSUBPD256
,
25646 IX86_BUILTIN_ADDSUBPS256
,
25647 IX86_BUILTIN_ANDPD256
,
25648 IX86_BUILTIN_ANDPS256
,
25649 IX86_BUILTIN_ANDNPD256
,
25650 IX86_BUILTIN_ANDNPS256
,
25651 IX86_BUILTIN_BLENDPD256
,
25652 IX86_BUILTIN_BLENDPS256
,
25653 IX86_BUILTIN_BLENDVPD256
,
25654 IX86_BUILTIN_BLENDVPS256
,
25655 IX86_BUILTIN_DIVPD256
,
25656 IX86_BUILTIN_DIVPS256
,
25657 IX86_BUILTIN_DPPS256
,
25658 IX86_BUILTIN_HADDPD256
,
25659 IX86_BUILTIN_HADDPS256
,
25660 IX86_BUILTIN_HSUBPD256
,
25661 IX86_BUILTIN_HSUBPS256
,
25662 IX86_BUILTIN_MAXPD256
,
25663 IX86_BUILTIN_MAXPS256
,
25664 IX86_BUILTIN_MINPD256
,
25665 IX86_BUILTIN_MINPS256
,
25666 IX86_BUILTIN_MULPD256
,
25667 IX86_BUILTIN_MULPS256
,
25668 IX86_BUILTIN_ORPD256
,
25669 IX86_BUILTIN_ORPS256
,
25670 IX86_BUILTIN_SHUFPD256
,
25671 IX86_BUILTIN_SHUFPS256
,
25672 IX86_BUILTIN_SUBPD256
,
25673 IX86_BUILTIN_SUBPS256
,
25674 IX86_BUILTIN_XORPD256
,
25675 IX86_BUILTIN_XORPS256
,
25676 IX86_BUILTIN_CMPSD
,
25677 IX86_BUILTIN_CMPSS
,
25678 IX86_BUILTIN_CMPPD
,
25679 IX86_BUILTIN_CMPPS
,
25680 IX86_BUILTIN_CMPPD256
,
25681 IX86_BUILTIN_CMPPS256
,
25682 IX86_BUILTIN_CVTDQ2PD256
,
25683 IX86_BUILTIN_CVTDQ2PS256
,
25684 IX86_BUILTIN_CVTPD2PS256
,
25685 IX86_BUILTIN_CVTPS2DQ256
,
25686 IX86_BUILTIN_CVTPS2PD256
,
25687 IX86_BUILTIN_CVTTPD2DQ256
,
25688 IX86_BUILTIN_CVTPD2DQ256
,
25689 IX86_BUILTIN_CVTTPS2DQ256
,
25690 IX86_BUILTIN_EXTRACTF128PD256
,
25691 IX86_BUILTIN_EXTRACTF128PS256
,
25692 IX86_BUILTIN_EXTRACTF128SI256
,
25693 IX86_BUILTIN_VZEROALL
,
25694 IX86_BUILTIN_VZEROUPPER
,
25695 IX86_BUILTIN_VPERMILVARPD
,
25696 IX86_BUILTIN_VPERMILVARPS
,
25697 IX86_BUILTIN_VPERMILVARPD256
,
25698 IX86_BUILTIN_VPERMILVARPS256
,
25699 IX86_BUILTIN_VPERMILPD
,
25700 IX86_BUILTIN_VPERMILPS
,
25701 IX86_BUILTIN_VPERMILPD256
,
25702 IX86_BUILTIN_VPERMILPS256
,
25703 IX86_BUILTIN_VPERMIL2PD
,
25704 IX86_BUILTIN_VPERMIL2PS
,
25705 IX86_BUILTIN_VPERMIL2PD256
,
25706 IX86_BUILTIN_VPERMIL2PS256
,
25707 IX86_BUILTIN_VPERM2F128PD256
,
25708 IX86_BUILTIN_VPERM2F128PS256
,
25709 IX86_BUILTIN_VPERM2F128SI256
,
25710 IX86_BUILTIN_VBROADCASTSS
,
25711 IX86_BUILTIN_VBROADCASTSD256
,
25712 IX86_BUILTIN_VBROADCASTSS256
,
25713 IX86_BUILTIN_VBROADCASTPD256
,
25714 IX86_BUILTIN_VBROADCASTPS256
,
25715 IX86_BUILTIN_VINSERTF128PD256
,
25716 IX86_BUILTIN_VINSERTF128PS256
,
25717 IX86_BUILTIN_VINSERTF128SI256
,
25718 IX86_BUILTIN_LOADUPD256
,
25719 IX86_BUILTIN_LOADUPS256
,
25720 IX86_BUILTIN_STOREUPD256
,
25721 IX86_BUILTIN_STOREUPS256
,
25722 IX86_BUILTIN_LDDQU256
,
25723 IX86_BUILTIN_MOVNTDQ256
,
25724 IX86_BUILTIN_MOVNTPD256
,
25725 IX86_BUILTIN_MOVNTPS256
,
25726 IX86_BUILTIN_LOADDQU256
,
25727 IX86_BUILTIN_STOREDQU256
,
25728 IX86_BUILTIN_MASKLOADPD
,
25729 IX86_BUILTIN_MASKLOADPS
,
25730 IX86_BUILTIN_MASKSTOREPD
,
25731 IX86_BUILTIN_MASKSTOREPS
,
25732 IX86_BUILTIN_MASKLOADPD256
,
25733 IX86_BUILTIN_MASKLOADPS256
,
25734 IX86_BUILTIN_MASKSTOREPD256
,
25735 IX86_BUILTIN_MASKSTOREPS256
,
25736 IX86_BUILTIN_MOVSHDUP256
,
25737 IX86_BUILTIN_MOVSLDUP256
,
25738 IX86_BUILTIN_MOVDDUP256
,
25740 IX86_BUILTIN_SQRTPD256
,
25741 IX86_BUILTIN_SQRTPS256
,
25742 IX86_BUILTIN_SQRTPS_NR256
,
25743 IX86_BUILTIN_RSQRTPS256
,
25744 IX86_BUILTIN_RSQRTPS_NR256
,
25746 IX86_BUILTIN_RCPPS256
,
25748 IX86_BUILTIN_ROUNDPD256
,
25749 IX86_BUILTIN_ROUNDPS256
,
25751 IX86_BUILTIN_FLOORPD256
,
25752 IX86_BUILTIN_CEILPD256
,
25753 IX86_BUILTIN_TRUNCPD256
,
25754 IX86_BUILTIN_RINTPD256
,
25755 IX86_BUILTIN_ROUNDPD_AZ256
,
25757 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256
,
25758 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256
,
25759 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256
,
25761 IX86_BUILTIN_FLOORPS256
,
25762 IX86_BUILTIN_CEILPS256
,
25763 IX86_BUILTIN_TRUNCPS256
,
25764 IX86_BUILTIN_RINTPS256
,
25765 IX86_BUILTIN_ROUNDPS_AZ256
,
25767 IX86_BUILTIN_FLOORPS_SFIX256
,
25768 IX86_BUILTIN_CEILPS_SFIX256
,
25769 IX86_BUILTIN_ROUNDPS_AZ_SFIX256
,
25771 IX86_BUILTIN_UNPCKHPD256
,
25772 IX86_BUILTIN_UNPCKLPD256
,
25773 IX86_BUILTIN_UNPCKHPS256
,
25774 IX86_BUILTIN_UNPCKLPS256
,
25776 IX86_BUILTIN_SI256_SI
,
25777 IX86_BUILTIN_PS256_PS
,
25778 IX86_BUILTIN_PD256_PD
,
25779 IX86_BUILTIN_SI_SI256
,
25780 IX86_BUILTIN_PS_PS256
,
25781 IX86_BUILTIN_PD_PD256
,
25783 IX86_BUILTIN_VTESTZPD
,
25784 IX86_BUILTIN_VTESTCPD
,
25785 IX86_BUILTIN_VTESTNZCPD
,
25786 IX86_BUILTIN_VTESTZPS
,
25787 IX86_BUILTIN_VTESTCPS
,
25788 IX86_BUILTIN_VTESTNZCPS
,
25789 IX86_BUILTIN_VTESTZPD256
,
25790 IX86_BUILTIN_VTESTCPD256
,
25791 IX86_BUILTIN_VTESTNZCPD256
,
25792 IX86_BUILTIN_VTESTZPS256
,
25793 IX86_BUILTIN_VTESTCPS256
,
25794 IX86_BUILTIN_VTESTNZCPS256
,
25795 IX86_BUILTIN_PTESTZ256
,
25796 IX86_BUILTIN_PTESTC256
,
25797 IX86_BUILTIN_PTESTNZC256
,
25799 IX86_BUILTIN_MOVMSKPD256
,
25800 IX86_BUILTIN_MOVMSKPS256
,
25803 IX86_BUILTIN_MPSADBW256
,
25804 IX86_BUILTIN_PABSB256
,
25805 IX86_BUILTIN_PABSW256
,
25806 IX86_BUILTIN_PABSD256
,
25807 IX86_BUILTIN_PACKSSDW256
,
25808 IX86_BUILTIN_PACKSSWB256
,
25809 IX86_BUILTIN_PACKUSDW256
,
25810 IX86_BUILTIN_PACKUSWB256
,
25811 IX86_BUILTIN_PADDB256
,
25812 IX86_BUILTIN_PADDW256
,
25813 IX86_BUILTIN_PADDD256
,
25814 IX86_BUILTIN_PADDQ256
,
25815 IX86_BUILTIN_PADDSB256
,
25816 IX86_BUILTIN_PADDSW256
,
25817 IX86_BUILTIN_PADDUSB256
,
25818 IX86_BUILTIN_PADDUSW256
,
25819 IX86_BUILTIN_PALIGNR256
,
25820 IX86_BUILTIN_AND256I
,
25821 IX86_BUILTIN_ANDNOT256I
,
25822 IX86_BUILTIN_PAVGB256
,
25823 IX86_BUILTIN_PAVGW256
,
25824 IX86_BUILTIN_PBLENDVB256
,
25825 IX86_BUILTIN_PBLENDVW256
,
25826 IX86_BUILTIN_PCMPEQB256
,
25827 IX86_BUILTIN_PCMPEQW256
,
25828 IX86_BUILTIN_PCMPEQD256
,
25829 IX86_BUILTIN_PCMPEQQ256
,
25830 IX86_BUILTIN_PCMPGTB256
,
25831 IX86_BUILTIN_PCMPGTW256
,
25832 IX86_BUILTIN_PCMPGTD256
,
25833 IX86_BUILTIN_PCMPGTQ256
,
25834 IX86_BUILTIN_PHADDW256
,
25835 IX86_BUILTIN_PHADDD256
,
25836 IX86_BUILTIN_PHADDSW256
,
25837 IX86_BUILTIN_PHSUBW256
,
25838 IX86_BUILTIN_PHSUBD256
,
25839 IX86_BUILTIN_PHSUBSW256
,
25840 IX86_BUILTIN_PMADDUBSW256
,
25841 IX86_BUILTIN_PMADDWD256
,
25842 IX86_BUILTIN_PMAXSB256
,
25843 IX86_BUILTIN_PMAXSW256
,
25844 IX86_BUILTIN_PMAXSD256
,
25845 IX86_BUILTIN_PMAXUB256
,
25846 IX86_BUILTIN_PMAXUW256
,
25847 IX86_BUILTIN_PMAXUD256
,
25848 IX86_BUILTIN_PMINSB256
,
25849 IX86_BUILTIN_PMINSW256
,
25850 IX86_BUILTIN_PMINSD256
,
25851 IX86_BUILTIN_PMINUB256
,
25852 IX86_BUILTIN_PMINUW256
,
25853 IX86_BUILTIN_PMINUD256
,
25854 IX86_BUILTIN_PMOVMSKB256
,
25855 IX86_BUILTIN_PMOVSXBW256
,
25856 IX86_BUILTIN_PMOVSXBD256
,
25857 IX86_BUILTIN_PMOVSXBQ256
,
25858 IX86_BUILTIN_PMOVSXWD256
,
25859 IX86_BUILTIN_PMOVSXWQ256
,
25860 IX86_BUILTIN_PMOVSXDQ256
,
25861 IX86_BUILTIN_PMOVZXBW256
,
25862 IX86_BUILTIN_PMOVZXBD256
,
25863 IX86_BUILTIN_PMOVZXBQ256
,
25864 IX86_BUILTIN_PMOVZXWD256
,
25865 IX86_BUILTIN_PMOVZXWQ256
,
25866 IX86_BUILTIN_PMOVZXDQ256
,
25867 IX86_BUILTIN_PMULDQ256
,
25868 IX86_BUILTIN_PMULHRSW256
,
25869 IX86_BUILTIN_PMULHUW256
,
25870 IX86_BUILTIN_PMULHW256
,
25871 IX86_BUILTIN_PMULLW256
,
25872 IX86_BUILTIN_PMULLD256
,
25873 IX86_BUILTIN_PMULUDQ256
,
25874 IX86_BUILTIN_POR256
,
25875 IX86_BUILTIN_PSADBW256
,
25876 IX86_BUILTIN_PSHUFB256
,
25877 IX86_BUILTIN_PSHUFD256
,
25878 IX86_BUILTIN_PSHUFHW256
,
25879 IX86_BUILTIN_PSHUFLW256
,
25880 IX86_BUILTIN_PSIGNB256
,
25881 IX86_BUILTIN_PSIGNW256
,
25882 IX86_BUILTIN_PSIGND256
,
25883 IX86_BUILTIN_PSLLDQI256
,
25884 IX86_BUILTIN_PSLLWI256
,
25885 IX86_BUILTIN_PSLLW256
,
25886 IX86_BUILTIN_PSLLDI256
,
25887 IX86_BUILTIN_PSLLD256
,
25888 IX86_BUILTIN_PSLLQI256
,
25889 IX86_BUILTIN_PSLLQ256
,
25890 IX86_BUILTIN_PSRAWI256
,
25891 IX86_BUILTIN_PSRAW256
,
25892 IX86_BUILTIN_PSRADI256
,
25893 IX86_BUILTIN_PSRAD256
,
25894 IX86_BUILTIN_PSRLDQI256
,
25895 IX86_BUILTIN_PSRLWI256
,
25896 IX86_BUILTIN_PSRLW256
,
25897 IX86_BUILTIN_PSRLDI256
,
25898 IX86_BUILTIN_PSRLD256
,
25899 IX86_BUILTIN_PSRLQI256
,
25900 IX86_BUILTIN_PSRLQ256
,
25901 IX86_BUILTIN_PSUBB256
,
25902 IX86_BUILTIN_PSUBW256
,
25903 IX86_BUILTIN_PSUBD256
,
25904 IX86_BUILTIN_PSUBQ256
,
25905 IX86_BUILTIN_PSUBSB256
,
25906 IX86_BUILTIN_PSUBSW256
,
25907 IX86_BUILTIN_PSUBUSB256
,
25908 IX86_BUILTIN_PSUBUSW256
,
25909 IX86_BUILTIN_PUNPCKHBW256
,
25910 IX86_BUILTIN_PUNPCKHWD256
,
25911 IX86_BUILTIN_PUNPCKHDQ256
,
25912 IX86_BUILTIN_PUNPCKHQDQ256
,
25913 IX86_BUILTIN_PUNPCKLBW256
,
25914 IX86_BUILTIN_PUNPCKLWD256
,
25915 IX86_BUILTIN_PUNPCKLDQ256
,
25916 IX86_BUILTIN_PUNPCKLQDQ256
,
25917 IX86_BUILTIN_PXOR256
,
25918 IX86_BUILTIN_MOVNTDQA256
,
25919 IX86_BUILTIN_VBROADCASTSS_PS
,
25920 IX86_BUILTIN_VBROADCASTSS_PS256
,
25921 IX86_BUILTIN_VBROADCASTSD_PD256
,
25922 IX86_BUILTIN_VBROADCASTSI256
,
25923 IX86_BUILTIN_PBLENDD256
,
25924 IX86_BUILTIN_PBLENDD128
,
25925 IX86_BUILTIN_PBROADCASTB256
,
25926 IX86_BUILTIN_PBROADCASTW256
,
25927 IX86_BUILTIN_PBROADCASTD256
,
25928 IX86_BUILTIN_PBROADCASTQ256
,
25929 IX86_BUILTIN_PBROADCASTB128
,
25930 IX86_BUILTIN_PBROADCASTW128
,
25931 IX86_BUILTIN_PBROADCASTD128
,
25932 IX86_BUILTIN_PBROADCASTQ128
,
25933 IX86_BUILTIN_VPERMVARSI256
,
25934 IX86_BUILTIN_VPERMDF256
,
25935 IX86_BUILTIN_VPERMVARSF256
,
25936 IX86_BUILTIN_VPERMDI256
,
25937 IX86_BUILTIN_VPERMTI256
,
25938 IX86_BUILTIN_VEXTRACT128I256
,
25939 IX86_BUILTIN_VINSERT128I256
,
25940 IX86_BUILTIN_MASKLOADD
,
25941 IX86_BUILTIN_MASKLOADQ
,
25942 IX86_BUILTIN_MASKLOADD256
,
25943 IX86_BUILTIN_MASKLOADQ256
,
25944 IX86_BUILTIN_MASKSTORED
,
25945 IX86_BUILTIN_MASKSTOREQ
,
25946 IX86_BUILTIN_MASKSTORED256
,
25947 IX86_BUILTIN_MASKSTOREQ256
,
25948 IX86_BUILTIN_PSLLVV4DI
,
25949 IX86_BUILTIN_PSLLVV2DI
,
25950 IX86_BUILTIN_PSLLVV8SI
,
25951 IX86_BUILTIN_PSLLVV4SI
,
25952 IX86_BUILTIN_PSRAVV8SI
,
25953 IX86_BUILTIN_PSRAVV4SI
,
25954 IX86_BUILTIN_PSRLVV4DI
,
25955 IX86_BUILTIN_PSRLVV2DI
,
25956 IX86_BUILTIN_PSRLVV8SI
,
25957 IX86_BUILTIN_PSRLVV4SI
,
25959 IX86_BUILTIN_GATHERSIV2DF
,
25960 IX86_BUILTIN_GATHERSIV4DF
,
25961 IX86_BUILTIN_GATHERDIV2DF
,
25962 IX86_BUILTIN_GATHERDIV4DF
,
25963 IX86_BUILTIN_GATHERSIV4SF
,
25964 IX86_BUILTIN_GATHERSIV8SF
,
25965 IX86_BUILTIN_GATHERDIV4SF
,
25966 IX86_BUILTIN_GATHERDIV8SF
,
25967 IX86_BUILTIN_GATHERSIV2DI
,
25968 IX86_BUILTIN_GATHERSIV4DI
,
25969 IX86_BUILTIN_GATHERDIV2DI
,
25970 IX86_BUILTIN_GATHERDIV4DI
,
25971 IX86_BUILTIN_GATHERSIV4SI
,
25972 IX86_BUILTIN_GATHERSIV8SI
,
25973 IX86_BUILTIN_GATHERDIV4SI
,
25974 IX86_BUILTIN_GATHERDIV8SI
,
25976 /* Alternate 4 element gather for the vectorizer where
25977 all operands are 32-byte wide. */
25978 IX86_BUILTIN_GATHERALTSIV4DF
,
25979 IX86_BUILTIN_GATHERALTDIV8SF
,
25980 IX86_BUILTIN_GATHERALTSIV4DI
,
25981 IX86_BUILTIN_GATHERALTDIV8SI
,
25983 /* TFmode support builtins. */
25985 IX86_BUILTIN_HUGE_VALQ
,
25986 IX86_BUILTIN_FABSQ
,
25987 IX86_BUILTIN_COPYSIGNQ
,
25989 /* Vectorizer support builtins. */
25990 IX86_BUILTIN_CPYSGNPS
,
25991 IX86_BUILTIN_CPYSGNPD
,
25992 IX86_BUILTIN_CPYSGNPS256
,
25993 IX86_BUILTIN_CPYSGNPD256
,
25995 /* FMA4 instructions. */
25996 IX86_BUILTIN_VFMADDSS
,
25997 IX86_BUILTIN_VFMADDSD
,
25998 IX86_BUILTIN_VFMADDPS
,
25999 IX86_BUILTIN_VFMADDPD
,
26000 IX86_BUILTIN_VFMADDPS256
,
26001 IX86_BUILTIN_VFMADDPD256
,
26002 IX86_BUILTIN_VFMADDSUBPS
,
26003 IX86_BUILTIN_VFMADDSUBPD
,
26004 IX86_BUILTIN_VFMADDSUBPS256
,
26005 IX86_BUILTIN_VFMADDSUBPD256
,
26007 /* FMA3 instructions. */
26008 IX86_BUILTIN_VFMADDSS3
,
26009 IX86_BUILTIN_VFMADDSD3
,
26011 /* XOP instructions. */
26012 IX86_BUILTIN_VPCMOV
,
26013 IX86_BUILTIN_VPCMOV_V2DI
,
26014 IX86_BUILTIN_VPCMOV_V4SI
,
26015 IX86_BUILTIN_VPCMOV_V8HI
,
26016 IX86_BUILTIN_VPCMOV_V16QI
,
26017 IX86_BUILTIN_VPCMOV_V4SF
,
26018 IX86_BUILTIN_VPCMOV_V2DF
,
26019 IX86_BUILTIN_VPCMOV256
,
26020 IX86_BUILTIN_VPCMOV_V4DI256
,
26021 IX86_BUILTIN_VPCMOV_V8SI256
,
26022 IX86_BUILTIN_VPCMOV_V16HI256
,
26023 IX86_BUILTIN_VPCMOV_V32QI256
,
26024 IX86_BUILTIN_VPCMOV_V8SF256
,
26025 IX86_BUILTIN_VPCMOV_V4DF256
,
26027 IX86_BUILTIN_VPPERM
,
26029 IX86_BUILTIN_VPMACSSWW
,
26030 IX86_BUILTIN_VPMACSWW
,
26031 IX86_BUILTIN_VPMACSSWD
,
26032 IX86_BUILTIN_VPMACSWD
,
26033 IX86_BUILTIN_VPMACSSDD
,
26034 IX86_BUILTIN_VPMACSDD
,
26035 IX86_BUILTIN_VPMACSSDQL
,
26036 IX86_BUILTIN_VPMACSSDQH
,
26037 IX86_BUILTIN_VPMACSDQL
,
26038 IX86_BUILTIN_VPMACSDQH
,
26039 IX86_BUILTIN_VPMADCSSWD
,
26040 IX86_BUILTIN_VPMADCSWD
,
26042 IX86_BUILTIN_VPHADDBW
,
26043 IX86_BUILTIN_VPHADDBD
,
26044 IX86_BUILTIN_VPHADDBQ
,
26045 IX86_BUILTIN_VPHADDWD
,
26046 IX86_BUILTIN_VPHADDWQ
,
26047 IX86_BUILTIN_VPHADDDQ
,
26048 IX86_BUILTIN_VPHADDUBW
,
26049 IX86_BUILTIN_VPHADDUBD
,
26050 IX86_BUILTIN_VPHADDUBQ
,
26051 IX86_BUILTIN_VPHADDUWD
,
26052 IX86_BUILTIN_VPHADDUWQ
,
26053 IX86_BUILTIN_VPHADDUDQ
,
26054 IX86_BUILTIN_VPHSUBBW
,
26055 IX86_BUILTIN_VPHSUBWD
,
26056 IX86_BUILTIN_VPHSUBDQ
,
26058 IX86_BUILTIN_VPROTB
,
26059 IX86_BUILTIN_VPROTW
,
26060 IX86_BUILTIN_VPROTD
,
26061 IX86_BUILTIN_VPROTQ
,
26062 IX86_BUILTIN_VPROTB_IMM
,
26063 IX86_BUILTIN_VPROTW_IMM
,
26064 IX86_BUILTIN_VPROTD_IMM
,
26065 IX86_BUILTIN_VPROTQ_IMM
,
26067 IX86_BUILTIN_VPSHLB
,
26068 IX86_BUILTIN_VPSHLW
,
26069 IX86_BUILTIN_VPSHLD
,
26070 IX86_BUILTIN_VPSHLQ
,
26071 IX86_BUILTIN_VPSHAB
,
26072 IX86_BUILTIN_VPSHAW
,
26073 IX86_BUILTIN_VPSHAD
,
26074 IX86_BUILTIN_VPSHAQ
,
26076 IX86_BUILTIN_VFRCZSS
,
26077 IX86_BUILTIN_VFRCZSD
,
26078 IX86_BUILTIN_VFRCZPS
,
26079 IX86_BUILTIN_VFRCZPD
,
26080 IX86_BUILTIN_VFRCZPS256
,
26081 IX86_BUILTIN_VFRCZPD256
,
26083 IX86_BUILTIN_VPCOMEQUB
,
26084 IX86_BUILTIN_VPCOMNEUB
,
26085 IX86_BUILTIN_VPCOMLTUB
,
26086 IX86_BUILTIN_VPCOMLEUB
,
26087 IX86_BUILTIN_VPCOMGTUB
,
26088 IX86_BUILTIN_VPCOMGEUB
,
26089 IX86_BUILTIN_VPCOMFALSEUB
,
26090 IX86_BUILTIN_VPCOMTRUEUB
,
26092 IX86_BUILTIN_VPCOMEQUW
,
26093 IX86_BUILTIN_VPCOMNEUW
,
26094 IX86_BUILTIN_VPCOMLTUW
,
26095 IX86_BUILTIN_VPCOMLEUW
,
26096 IX86_BUILTIN_VPCOMGTUW
,
26097 IX86_BUILTIN_VPCOMGEUW
,
26098 IX86_BUILTIN_VPCOMFALSEUW
,
26099 IX86_BUILTIN_VPCOMTRUEUW
,
26101 IX86_BUILTIN_VPCOMEQUD
,
26102 IX86_BUILTIN_VPCOMNEUD
,
26103 IX86_BUILTIN_VPCOMLTUD
,
26104 IX86_BUILTIN_VPCOMLEUD
,
26105 IX86_BUILTIN_VPCOMGTUD
,
26106 IX86_BUILTIN_VPCOMGEUD
,
26107 IX86_BUILTIN_VPCOMFALSEUD
,
26108 IX86_BUILTIN_VPCOMTRUEUD
,
26110 IX86_BUILTIN_VPCOMEQUQ
,
26111 IX86_BUILTIN_VPCOMNEUQ
,
26112 IX86_BUILTIN_VPCOMLTUQ
,
26113 IX86_BUILTIN_VPCOMLEUQ
,
26114 IX86_BUILTIN_VPCOMGTUQ
,
26115 IX86_BUILTIN_VPCOMGEUQ
,
26116 IX86_BUILTIN_VPCOMFALSEUQ
,
26117 IX86_BUILTIN_VPCOMTRUEUQ
,
26119 IX86_BUILTIN_VPCOMEQB
,
26120 IX86_BUILTIN_VPCOMNEB
,
26121 IX86_BUILTIN_VPCOMLTB
,
26122 IX86_BUILTIN_VPCOMLEB
,
26123 IX86_BUILTIN_VPCOMGTB
,
26124 IX86_BUILTIN_VPCOMGEB
,
26125 IX86_BUILTIN_VPCOMFALSEB
,
26126 IX86_BUILTIN_VPCOMTRUEB
,
26128 IX86_BUILTIN_VPCOMEQW
,
26129 IX86_BUILTIN_VPCOMNEW
,
26130 IX86_BUILTIN_VPCOMLTW
,
26131 IX86_BUILTIN_VPCOMLEW
,
26132 IX86_BUILTIN_VPCOMGTW
,
26133 IX86_BUILTIN_VPCOMGEW
,
26134 IX86_BUILTIN_VPCOMFALSEW
,
26135 IX86_BUILTIN_VPCOMTRUEW
,
26137 IX86_BUILTIN_VPCOMEQD
,
26138 IX86_BUILTIN_VPCOMNED
,
26139 IX86_BUILTIN_VPCOMLTD
,
26140 IX86_BUILTIN_VPCOMLED
,
26141 IX86_BUILTIN_VPCOMGTD
,
26142 IX86_BUILTIN_VPCOMGED
,
26143 IX86_BUILTIN_VPCOMFALSED
,
26144 IX86_BUILTIN_VPCOMTRUED
,
26146 IX86_BUILTIN_VPCOMEQQ
,
26147 IX86_BUILTIN_VPCOMNEQ
,
26148 IX86_BUILTIN_VPCOMLTQ
,
26149 IX86_BUILTIN_VPCOMLEQ
,
26150 IX86_BUILTIN_VPCOMGTQ
,
26151 IX86_BUILTIN_VPCOMGEQ
,
26152 IX86_BUILTIN_VPCOMFALSEQ
,
26153 IX86_BUILTIN_VPCOMTRUEQ
,
26155 /* LWP instructions. */
26156 IX86_BUILTIN_LLWPCB
,
26157 IX86_BUILTIN_SLWPCB
,
26158 IX86_BUILTIN_LWPVAL32
,
26159 IX86_BUILTIN_LWPVAL64
,
26160 IX86_BUILTIN_LWPINS32
,
26161 IX86_BUILTIN_LWPINS64
,
26165 /* BMI instructions. */
26166 IX86_BUILTIN_BEXTR32
,
26167 IX86_BUILTIN_BEXTR64
,
26170 /* TBM instructions. */
26171 IX86_BUILTIN_BEXTRI32
,
26172 IX86_BUILTIN_BEXTRI64
,
26174 /* BMI2 instructions. */
26175 IX86_BUILTIN_BZHI32
,
26176 IX86_BUILTIN_BZHI64
,
26177 IX86_BUILTIN_PDEP32
,
26178 IX86_BUILTIN_PDEP64
,
26179 IX86_BUILTIN_PEXT32
,
26180 IX86_BUILTIN_PEXT64
,
26182 /* FSGSBASE instructions. */
26183 IX86_BUILTIN_RDFSBASE32
,
26184 IX86_BUILTIN_RDFSBASE64
,
26185 IX86_BUILTIN_RDGSBASE32
,
26186 IX86_BUILTIN_RDGSBASE64
,
26187 IX86_BUILTIN_WRFSBASE32
,
26188 IX86_BUILTIN_WRFSBASE64
,
26189 IX86_BUILTIN_WRGSBASE32
,
26190 IX86_BUILTIN_WRGSBASE64
,
26192 /* RDRND instructions. */
26193 IX86_BUILTIN_RDRAND16_STEP
,
26194 IX86_BUILTIN_RDRAND32_STEP
,
26195 IX86_BUILTIN_RDRAND64_STEP
,
26197 /* F16C instructions. */
26198 IX86_BUILTIN_CVTPH2PS
,
26199 IX86_BUILTIN_CVTPH2PS256
,
26200 IX86_BUILTIN_CVTPS2PH
,
26201 IX86_BUILTIN_CVTPS2PH256
,
26203 /* CFString built-in for darwin */
26204 IX86_BUILTIN_CFSTRING
,
26209 /* Table for the ix86 builtin decls. */
26210 static GTY(()) tree ix86_builtins
[(int) IX86_BUILTIN_MAX
];
26212 /* Table of all of the builtin functions that are possible with different ISA's
26213 but are waiting to be built until a function is declared to use that
26215 struct builtin_isa
{
26216 const char *name
; /* function name */
26217 enum ix86_builtin_func_type tcode
; /* type to use in the declaration */
26218 HOST_WIDE_INT isa
; /* isa_flags this builtin is defined for */
26219 bool const_p
; /* true if the declaration is constant */
26220 bool set_and_not_built_p
;
26223 static struct builtin_isa ix86_builtins_isa
[(int) IX86_BUILTIN_MAX
];
26226 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
26227 of which isa_flags to use in the ix86_builtins_isa array. Stores the
26228 function decl in the ix86_builtins array. Returns the function decl or
26229 NULL_TREE, if the builtin was not added.
26231 If the front end has a special hook for builtin functions, delay adding
26232 builtin functions that aren't in the current ISA until the ISA is changed
26233 with function specific optimization. Doing so, can save about 300K for the
26234 default compiler. When the builtin is expanded, check at that time whether
26237 If the front end doesn't have a special hook, record all builtins, even if
26238 it isn't an instruction set in the current ISA in case the user uses
26239 function specific options for a different ISA, so that we don't get scope
26240 errors if a builtin is added in the middle of a function scope. */
26243 def_builtin (HOST_WIDE_INT mask
, const char *name
,
26244 enum ix86_builtin_func_type tcode
,
26245 enum ix86_builtins code
)
26247 tree decl
= NULL_TREE
;
26249 if (!(mask
& OPTION_MASK_ISA_64BIT
) || TARGET_64BIT
)
26251 ix86_builtins_isa
[(int) code
].isa
= mask
;
26253 mask
&= ~OPTION_MASK_ISA_64BIT
;
26255 || (mask
& ix86_isa_flags
) != 0
26256 || (lang_hooks
.builtin_function
26257 == lang_hooks
.builtin_function_ext_scope
))
26260 tree type
= ix86_get_builtin_func_type (tcode
);
26261 decl
= add_builtin_function (name
, type
, code
, BUILT_IN_MD
,
26263 ix86_builtins
[(int) code
] = decl
;
26264 ix86_builtins_isa
[(int) code
].set_and_not_built_p
= false;
26268 ix86_builtins
[(int) code
] = NULL_TREE
;
26269 ix86_builtins_isa
[(int) code
].tcode
= tcode
;
26270 ix86_builtins_isa
[(int) code
].name
= name
;
26271 ix86_builtins_isa
[(int) code
].const_p
= false;
26272 ix86_builtins_isa
[(int) code
].set_and_not_built_p
= true;
26279 /* Like def_builtin, but also marks the function decl "const". */
26282 def_builtin_const (HOST_WIDE_INT mask
, const char *name
,
26283 enum ix86_builtin_func_type tcode
, enum ix86_builtins code
)
26285 tree decl
= def_builtin (mask
, name
, tcode
, code
);
26287 TREE_READONLY (decl
) = 1;
26289 ix86_builtins_isa
[(int) code
].const_p
= true;
26294 /* Add any new builtin functions for a given ISA that may not have been
26295 declared. This saves a bit of space compared to adding all of the
26296 declarations to the tree, even if we didn't use them. */
26299 ix86_add_new_builtins (HOST_WIDE_INT isa
)
26303 for (i
= 0; i
< (int)IX86_BUILTIN_MAX
; i
++)
26305 if ((ix86_builtins_isa
[i
].isa
& isa
) != 0
26306 && ix86_builtins_isa
[i
].set_and_not_built_p
)
26310 /* Don't define the builtin again. */
26311 ix86_builtins_isa
[i
].set_and_not_built_p
= false;
26313 type
= ix86_get_builtin_func_type (ix86_builtins_isa
[i
].tcode
);
26314 decl
= add_builtin_function_ext_scope (ix86_builtins_isa
[i
].name
,
26315 type
, i
, BUILT_IN_MD
, NULL
,
26318 ix86_builtins
[i
] = decl
;
26319 if (ix86_builtins_isa
[i
].const_p
)
26320 TREE_READONLY (decl
) = 1;
26325 /* Bits for builtin_description.flag. */
26327 /* Set when we don't support the comparison natively, and should
26328 swap_comparison in order to support it. */
26329 #define BUILTIN_DESC_SWAP_OPERANDS 1
26331 struct builtin_description
26333 const HOST_WIDE_INT mask
;
26334 const enum insn_code icode
;
26335 const char *const name
;
26336 const enum ix86_builtins code
;
26337 const enum rtx_code comparison
;
26341 static const struct builtin_description bdesc_comi
[] =
26343 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS
, UNEQ
, 0 },
26344 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS
, UNLT
, 0 },
26345 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS
, UNLE
, 0 },
26346 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS
, GT
, 0 },
26347 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS
, GE
, 0 },
26348 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS
, LTGT
, 0 },
26349 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS
, UNEQ
, 0 },
26350 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS
, UNLT
, 0 },
26351 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS
, UNLE
, 0 },
26352 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS
, GT
, 0 },
26353 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS
, GE
, 0 },
26354 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS
, LTGT
, 0 },
26355 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD
, UNEQ
, 0 },
26356 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD
, UNLT
, 0 },
26357 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD
, UNLE
, 0 },
26358 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD
, GT
, 0 },
26359 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD
, GE
, 0 },
26360 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD
, LTGT
, 0 },
26361 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD
, UNEQ
, 0 },
26362 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD
, UNLT
, 0 },
26363 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD
, UNLE
, 0 },
26364 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD
, GT
, 0 },
26365 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD
, GE
, 0 },
26366 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD
, LTGT
, 0 },
26369 static const struct builtin_description bdesc_pcmpestr
[] =
26372 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128
, UNKNOWN
, 0 },
26373 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128
, UNKNOWN
, 0 },
26374 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128
, UNKNOWN
, (int) CCAmode
},
26375 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128
, UNKNOWN
, (int) CCCmode
},
26376 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128
, UNKNOWN
, (int) CCOmode
},
26377 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128
, UNKNOWN
, (int) CCSmode
},
26378 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128
, UNKNOWN
, (int) CCZmode
},
26381 static const struct builtin_description bdesc_pcmpistr
[] =
26384 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128
, UNKNOWN
, 0 },
26385 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128
, UNKNOWN
, 0 },
26386 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128
, UNKNOWN
, (int) CCAmode
},
26387 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128
, UNKNOWN
, (int) CCCmode
},
26388 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128
, UNKNOWN
, (int) CCOmode
},
26389 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128
, UNKNOWN
, (int) CCSmode
},
26390 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128
, UNKNOWN
, (int) CCZmode
},
26393 /* Special builtins with variable number of arguments. */
26394 static const struct builtin_description bdesc_special_args
[] =
26396 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rdtsc
, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
26397 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rdtscp
, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP
, UNKNOWN
, (int) UINT64_FTYPE_PUNSIGNED
},
26398 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_pause
, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26401 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_emms
, "__builtin_ia32_emms", IX86_BUILTIN_EMMS
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26404 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_femms
, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26407 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movups
, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
26408 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movntv4sf
, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
26409 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movups
, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS
, UNKNOWN
, (int) V4SF_FTYPE_PCFLOAT
},
26411 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_loadhps_exp
, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_PCV2SF
},
26412 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_loadlps_exp
, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_PCV2SF
},
26413 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_storehps
, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS
, UNKNOWN
, (int) VOID_FTYPE_PV2SF_V4SF
},
26414 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_storelps
, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS
, UNKNOWN
, (int) VOID_FTYPE_PV2SF_V4SF
},
26416 /* SSE or 3DNow!A */
26417 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_sse_sfence
, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26418 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_sse_movntdi
, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ
, UNKNOWN
, (int) VOID_FTYPE_PULONGLONG_ULONGLONG
},
26421 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_lfence
, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26422 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_mfence
, 0, IX86_BUILTIN_MFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26423 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movupd
, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
26424 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movdqu
, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU
, UNKNOWN
, (int) VOID_FTYPE_PCHAR_V16QI
},
26425 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntv2df
, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
26426 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntv2di
, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ
, UNKNOWN
, (int) VOID_FTYPE_PV2DI_V2DI
},
26427 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntisi
, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI
, UNKNOWN
, (int) VOID_FTYPE_PINT_INT
},
26428 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_movntidi
, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64
, UNKNOWN
, (int) VOID_FTYPE_PLONGLONG_LONGLONG
},
26429 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movupd
, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD
, UNKNOWN
, (int) V2DF_FTYPE_PCDOUBLE
},
26430 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movdqu
, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU
, UNKNOWN
, (int) V16QI_FTYPE_PCCHAR
},
26432 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loadhpd_exp
, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_PCDOUBLE
},
26433 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loadlpd_exp
, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_PCDOUBLE
},
26436 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_lddqu
, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU
, UNKNOWN
, (int) V16QI_FTYPE_PCCHAR
},
26439 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_movntdqa
, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA
, UNKNOWN
, (int) V2DI_FTYPE_PV2DI
},
26442 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_vmmovntv2df
, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
26443 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_vmmovntv4sf
, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
26446 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vzeroall
, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26447 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vzeroupper
, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26449 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv4sf
, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS
, UNKNOWN
, (int) V4SF_FTYPE_PCFLOAT
},
26450 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv4df
, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256
, UNKNOWN
, (int) V4DF_FTYPE_PCDOUBLE
},
26451 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv8sf
, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256
, UNKNOWN
, (int) V8SF_FTYPE_PCFLOAT
},
26452 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vbroadcastf128_v4df
, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCV2DF
},
26453 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vbroadcastf128_v8sf
, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCV4SF
},
26455 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movupd256
, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCDOUBLE
},
26456 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movups256
, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCFLOAT
},
26457 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movupd256
, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V4DF
},
26458 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movups256
, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V8SF
},
26459 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movdqu256
, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256
, UNKNOWN
, (int) V32QI_FTYPE_PCCHAR
},
26460 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movdqu256
, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256
, UNKNOWN
, (int) VOID_FTYPE_PCHAR_V32QI
},
26461 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_lddqu256
, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256
, UNKNOWN
, (int) V32QI_FTYPE_PCCHAR
},
26463 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv4di
, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256
, UNKNOWN
, (int) VOID_FTYPE_PV4DI_V4DI
},
26464 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv4df
, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V4DF
},
26465 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv8sf
, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V8SF
},
26467 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadpd
, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD
, UNKNOWN
, (int) V2DF_FTYPE_PCV2DF_V2DI
},
26468 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadps
, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS
, UNKNOWN
, (int) V4SF_FTYPE_PCV4SF_V4SI
},
26469 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadpd256
, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCV4DF_V4DI
},
26470 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadps256
, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCV8SF_V8SI
},
26471 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstorepd
, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD
, UNKNOWN
, (int) VOID_FTYPE_PV2DF_V2DI_V2DF
},
26472 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstoreps
, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS
, UNKNOWN
, (int) VOID_FTYPE_PV4SF_V4SI_V4SF
},
26473 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstorepd256
, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256
, UNKNOWN
, (int) VOID_FTYPE_PV4DF_V4DI_V4DF
},
26474 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstoreps256
, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256
, UNKNOWN
, (int) VOID_FTYPE_PV8SF_V8SI_V8SF
},
26477 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_movntdqa
, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256
, UNKNOWN
, (int) V4DI_FTYPE_PV4DI
},
26478 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadd
, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD
, UNKNOWN
, (int) V4SI_FTYPE_PCV4SI_V4SI
},
26479 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadq
, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ
, UNKNOWN
, (int) V2DI_FTYPE_PCV2DI_V2DI
},
26480 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadd256
, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256
, UNKNOWN
, (int) V8SI_FTYPE_PCV8SI_V8SI
},
26481 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadq256
, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256
, UNKNOWN
, (int) V4DI_FTYPE_PCV4DI_V4DI
},
26482 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstored
, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED
, UNKNOWN
, (int) VOID_FTYPE_PV4SI_V4SI_V4SI
},
26483 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstoreq
, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ
, UNKNOWN
, (int) VOID_FTYPE_PV2DI_V2DI_V2DI
},
26484 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstored256
, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256
, UNKNOWN
, (int) VOID_FTYPE_PV8SI_V8SI_V8SI
},
26485 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstoreq256
, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256
, UNKNOWN
, (int) VOID_FTYPE_PV4DI_V4DI_V4DI
},
26487 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_llwpcb
, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
26488 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_slwpcb
, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB
, UNKNOWN
, (int) PVOID_FTYPE_VOID
},
26489 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpvalsi3
, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32
, UNKNOWN
, (int) VOID_FTYPE_UINT_UINT_UINT
},
26490 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpvaldi3
, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64
, UNKNOWN
, (int) VOID_FTYPE_UINT64_UINT_UINT
},
26491 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpinssi3
, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32
, UNKNOWN
, (int) UCHAR_FTYPE_UINT_UINT_UINT
},
26492 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpinsdi3
, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64
, UNKNOWN
, (int) UCHAR_FTYPE_UINT64_UINT_UINT
},
26495 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdfsbasesi
, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
26496 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdfsbasedi
, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
26497 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdgsbasesi
, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
26498 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdgsbasedi
, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
26499 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrfsbasesi
, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32
, UNKNOWN
, (int) VOID_FTYPE_UNSIGNED
},
26500 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrfsbasedi
, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64
, UNKNOWN
, (int) VOID_FTYPE_UINT64
},
26501 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrgsbasesi
, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32
, UNKNOWN
, (int) VOID_FTYPE_UNSIGNED
},
26502 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrgsbasedi
, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64
, UNKNOWN
, (int) VOID_FTYPE_UINT64
},
26505 /* Builtins with variable number of arguments. */
26506 static const struct builtin_description bdesc_args
[] =
26508 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_bsr
, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI
, UNKNOWN
, (int) INT_FTYPE_INT
},
26509 { OPTION_MASK_ISA_64BIT
, CODE_FOR_bsr_rex64
, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI
, UNKNOWN
, (int) INT64_FTYPE_INT64
},
26510 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rdpmc
, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC
, UNKNOWN
, (int) UINT64_FTYPE_INT
},
26511 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotlqi3
, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI
, UNKNOWN
, (int) UINT8_FTYPE_UINT8_INT
},
26512 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotlhi3
, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI
, UNKNOWN
, (int) UINT16_FTYPE_UINT16_INT
},
26513 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotrqi3
, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI
, UNKNOWN
, (int) UINT8_FTYPE_UINT8_INT
},
26514 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotrhi3
, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI
, UNKNOWN
, (int) UINT16_FTYPE_UINT16_INT
},
26517 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv8qi3
, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26518 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv4hi3
, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26519 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv2si3
, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26520 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv8qi3
, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26521 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv4hi3
, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26522 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv2si3
, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26524 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ssaddv8qi3
, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26525 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ssaddv4hi3
, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26526 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_sssubv8qi3
, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26527 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_sssubv4hi3
, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26528 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_usaddv8qi3
, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26529 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_usaddv4hi3
, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26530 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ussubv8qi3
, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26531 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ussubv4hi3
, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26533 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_mulv4hi3
, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26534 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_smulv4hi3_highpart
, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26536 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_andv2si3
, "__builtin_ia32_pand", IX86_BUILTIN_PAND
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26537 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_andnotv2si3
, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26538 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_iorv2si3
, "__builtin_ia32_por", IX86_BUILTIN_POR
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26539 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_xorv2si3
, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26541 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv8qi3
, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26542 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv4hi3
, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26543 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv2si3
, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26544 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv8qi3
, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26545 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv4hi3
, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26546 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv2si3
, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26548 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhbw
, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26549 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhwd
, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26550 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhdq
, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26551 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpcklbw
, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26552 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpcklwd
, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26553 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckldq
, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26555 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packsswb
, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB
, UNKNOWN
, (int) V8QI_FTYPE_V4HI_V4HI
},
26556 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packssdw
, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW
, UNKNOWN
, (int) V4HI_FTYPE_V2SI_V2SI
},
26557 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packuswb
, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB
, UNKNOWN
, (int) V8QI_FTYPE_V4HI_V4HI
},
26559 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_pmaddwd
, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD
, UNKNOWN
, (int) V2SI_FTYPE_V4HI_V4HI
},
26561 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv4hi3
, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
26562 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv2si3
, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
26563 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv1di3
, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_SI_COUNT
},
26564 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv4hi3
, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
26565 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv2si3
, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
26566 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv1di3
, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_COUNT
},
26568 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv4hi3
, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
26569 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv2si3
, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
26570 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv1di3
, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_SI_COUNT
},
26571 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv4hi3
, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
26572 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv2si3
, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
26573 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv1di3
, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_COUNT
},
26575 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv4hi3
, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
26576 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv2si3
, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
26577 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv4hi3
, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
26578 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv2si3
, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
26581 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_pf2id
, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID
, UNKNOWN
, (int) V2SI_FTYPE_V2SF
},
26582 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_floatv2si2
, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD
, UNKNOWN
, (int) V2SF_FTYPE_V2SI
},
26583 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpv2sf2
, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
26584 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rsqrtv2sf2
, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
26586 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26587 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_haddv2sf3
, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26588 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_addv2sf3
, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26589 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_eqv2sf3
, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
26590 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_gev2sf3
, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
26591 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_gtv2sf3
, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
26592 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_smaxv2sf3
, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26593 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_sminv2sf3
, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26594 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_mulv2sf3
, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26595 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpit1v2sf3
, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26596 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpit2v2sf3
, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26597 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rsqit1v2sf3
, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26598 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_subv2sf3
, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26599 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_subrv2sf3
, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26600 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_pmulhrwv4hi3
, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26603 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pf2iw
, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW
, UNKNOWN
, (int) V2SI_FTYPE_V2SF
},
26604 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pi2fw
, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW
, UNKNOWN
, (int) V2SF_FTYPE_V2SI
},
26605 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pswapdv2si2
, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI
},
26606 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pswapdv2sf2
, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
26607 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_hsubv2sf3
, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26608 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_addsubv2sf3
, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26611 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movmskps
, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
26612 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_sqrtv4sf2
, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26613 { OPTION_MASK_ISA_SSE
, CODE_FOR_sqrtv4sf2
, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26614 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_rsqrtv4sf2
, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26615 { OPTION_MASK_ISA_SSE
, CODE_FOR_rsqrtv4sf2
, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26616 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_rcpv4sf2
, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26617 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtps2pi
, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI
, UNKNOWN
, (int) V2SI_FTYPE_V4SF
},
26618 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtss2si
, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
26619 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvtss2siq
, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64
, UNKNOWN
, (int) INT64_FTYPE_V4SF
},
26620 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvttps2pi
, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI
, UNKNOWN
, (int) V2SI_FTYPE_V4SF
},
26621 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvttss2si
, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
26622 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvttss2siq
, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64
, UNKNOWN
, (int) INT64_FTYPE_V4SF
},
26624 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_shufps
, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
26626 { OPTION_MASK_ISA_SSE
, CODE_FOR_addv4sf3
, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26627 { OPTION_MASK_ISA_SSE
, CODE_FOR_subv4sf3
, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26628 { OPTION_MASK_ISA_SSE
, CODE_FOR_mulv4sf3
, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26629 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_divv4sf3
, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26630 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmaddv4sf3
, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26631 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsubv4sf3
, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26632 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmulv4sf3
, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26633 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmdivv4sf3
, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26635 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS
, EQ
, (int) V4SF_FTYPE_V4SF_V4SF
},
26636 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF
},
26637 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF
},
26638 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
26639 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
26640 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS
, UNORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
26641 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS
, NE
, (int) V4SF_FTYPE_V4SF_V4SF
},
26642 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF
},
26643 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF
},
26644 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
26645 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
26646 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS
, ORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
26647 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS
, EQ
, (int) V4SF_FTYPE_V4SF_V4SF
},
26648 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF
},
26649 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF
},
26650 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS
, UNORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
26651 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS
, NE
, (int) V4SF_FTYPE_V4SF_V4SF
},
26652 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF
},
26653 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF
},
26654 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
26655 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
26656 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS
, ORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
26658 { OPTION_MASK_ISA_SSE
, CODE_FOR_sminv4sf3
, "__builtin_ia32_minps", IX86_BUILTIN_MINPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26659 { OPTION_MASK_ISA_SSE
, CODE_FOR_smaxv4sf3
, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26660 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsminv4sf3
, "__builtin_ia32_minss", IX86_BUILTIN_MINSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26661 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsmaxv4sf3
, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26663 { OPTION_MASK_ISA_SSE
, CODE_FOR_andv4sf3
, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26664 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_andnotv4sf3
, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26665 { OPTION_MASK_ISA_SSE
, CODE_FOR_iorv4sf3
, "__builtin_ia32_orps", IX86_BUILTIN_ORPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26666 { OPTION_MASK_ISA_SSE
, CODE_FOR_xorv4sf3
, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26668 { OPTION_MASK_ISA_SSE
, CODE_FOR_copysignv4sf3
, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26670 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movss
, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26671 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movhlps_exp
, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26672 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movlhps_exp
, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26673 { OPTION_MASK_ISA_SSE
, CODE_FOR_vec_interleave_highv4sf
, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26674 { OPTION_MASK_ISA_SSE
, CODE_FOR_vec_interleave_lowv4sf
, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26676 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtpi2ps
, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V2SI
},
26677 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtsi2ss
, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_SI
},
26678 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvtsi2ssq
, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS
, UNKNOWN
, V4SF_FTYPE_V4SF_DI
},
26680 { OPTION_MASK_ISA_SSE
, CODE_FOR_rsqrtsf2
, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF
, UNKNOWN
, (int) FLOAT_FTYPE_FLOAT
},
26682 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsqrtv4sf2
, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
26683 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmrsqrtv4sf2
, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
26684 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmrcpv4sf2
, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
26686 /* SSE MMX or 3Dnow!A */
26687 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26688 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uavgv4hi3
, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26689 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_umulv4hi3_highpart
, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26691 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_umaxv8qi3
, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26692 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_smaxv4hi3
, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26693 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uminv8qi3
, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26694 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_sminv4hi3
, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26696 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_psadbw
, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW
, UNKNOWN
, (int) V1DI_FTYPE_V8QI_V8QI
},
26697 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pmovmskb
, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB
, UNKNOWN
, (int) INT_FTYPE_V8QI
},
26699 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pshufw
, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_INT
},
26702 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_shufpd
, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
26704 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movmskpd
, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
26705 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pmovmskb
, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128
, UNKNOWN
, (int) INT_FTYPE_V16QI
},
26706 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sqrtv2df2
, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF
},
26707 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtdq2pd
, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD
, UNKNOWN
, (int) V2DF_FTYPE_V4SI
},
26708 { OPTION_MASK_ISA_SSE2
, CODE_FOR_floatv4siv4sf2
, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SI
},
26710 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2dq
, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V2DF
},
26711 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2pi
, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI
, UNKNOWN
, (int) V2SI_FTYPE_V2DF
},
26712 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2ps
, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS
, UNKNOWN
, (int) V4SF_FTYPE_V2DF
},
26713 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttpd2dq
, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V2DF
},
26714 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttpd2pi
, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI
, UNKNOWN
, (int) V2SI_FTYPE_V2DF
},
26716 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpi2pd
, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD
, UNKNOWN
, (int) V2DF_FTYPE_V2SI
},
26718 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsd2si
, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
26719 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttsd2si
, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
26720 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvtsd2siq
, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64
, UNKNOWN
, (int) INT64_FTYPE_V2DF
},
26721 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvttsd2siq
, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64
, UNKNOWN
, (int) INT64_FTYPE_V2DF
},
26723 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtps2dq
, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
26724 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtps2pd
, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD
, UNKNOWN
, (int) V2DF_FTYPE_V4SF
},
26725 { OPTION_MASK_ISA_SSE2
, CODE_FOR_fix_truncv4sfv4si2
, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
26727 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv2df3
, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26728 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv2df3
, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26729 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mulv2df3
, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26730 { OPTION_MASK_ISA_SSE2
, CODE_FOR_divv2df3
, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26731 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmaddv2df3
, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26732 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsubv2df3
, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26733 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmulv2df3
, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26734 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmdivv2df3
, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26736 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD
, EQ
, (int) V2DF_FTYPE_V2DF_V2DF
},
26737 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF
},
26738 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF
},
26739 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
26740 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
26741 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD
, UNORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
26742 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD
, NE
, (int) V2DF_FTYPE_V2DF_V2DF
},
26743 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF
},
26744 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF
},
26745 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
26746 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
26747 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD
, ORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
26748 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD
, EQ
, (int) V2DF_FTYPE_V2DF_V2DF
},
26749 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF
},
26750 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF
},
26751 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD
, UNORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
26752 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD
, NE
, (int) V2DF_FTYPE_V2DF_V2DF
},
26753 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF
},
26754 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF
},
26755 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD
, ORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
26757 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sminv2df3
, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26758 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smaxv2df3
, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26759 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsminv2df3
, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26760 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsmaxv2df3
, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26762 { OPTION_MASK_ISA_SSE2
, CODE_FOR_andv2df3
, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26763 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_andnotv2df3
, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26764 { OPTION_MASK_ISA_SSE2
, CODE_FOR_iorv2df3
, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26765 { OPTION_MASK_ISA_SSE2
, CODE_FOR_xorv2df3
, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26767 { OPTION_MASK_ISA_SSE2
, CODE_FOR_copysignv2df3
, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26769 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movsd
, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26770 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv2df
, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26771 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv2df
, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26773 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_pack_sfix_v2df
, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX
, UNKNOWN
, (int) V4SI_FTYPE_V2DF_V2DF
},
26775 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv16qi3
, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26776 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv8hi3
, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26777 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26778 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv2di3
, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26779 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv16qi3
, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26780 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv8hi3
, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26781 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26782 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv2di3
, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26784 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ssaddv16qi3
, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26785 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ssaddv8hi3
, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26786 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_sssubv16qi3
, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26787 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_sssubv8hi3
, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26788 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_usaddv16qi3
, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26789 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_usaddv8hi3
, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26790 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ussubv16qi3
, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26791 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ussubv8hi3
, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26793 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mulv8hi3
, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26794 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smulv8hi3_highpart
, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128
, UNKNOWN
,(int) V8HI_FTYPE_V8HI_V8HI
},
26796 { OPTION_MASK_ISA_SSE2
, CODE_FOR_andv2di3
, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26797 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_andnotv2di3
, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26798 { OPTION_MASK_ISA_SSE2
, CODE_FOR_iorv2di3
, "__builtin_ia32_por128", IX86_BUILTIN_POR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26799 { OPTION_MASK_ISA_SSE2
, CODE_FOR_xorv2di3
, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26801 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_uavgv16qi3
, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26802 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_uavgv8hi3
, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26804 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv16qi3
, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26805 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv8hi3
, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26806 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv4si3
, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26807 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv16qi3
, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26808 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv8hi3
, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26809 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv4si3
, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26811 { OPTION_MASK_ISA_SSE2
, CODE_FOR_umaxv16qi3
, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26812 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smaxv8hi3
, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26813 { OPTION_MASK_ISA_SSE2
, CODE_FOR_uminv16qi3
, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26814 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sminv8hi3
, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26816 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv16qi
, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26817 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv8hi
, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26818 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv4si
, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26819 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv2di
, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26820 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv16qi
, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26821 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv8hi
, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26822 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv4si
, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26823 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv2di
, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26825 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packsswb
, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128
, UNKNOWN
, (int) V16QI_FTYPE_V8HI_V8HI
},
26826 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packssdw
, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128
, UNKNOWN
, (int) V8HI_FTYPE_V4SI_V4SI
},
26827 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packuswb
, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128
, UNKNOWN
, (int) V16QI_FTYPE_V8HI_V8HI
},
26829 { OPTION_MASK_ISA_SSE2
, CODE_FOR_umulv8hi3_highpart
, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26830 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_psadbw
, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI_V16QI
},
26832 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_umulv1siv1di3
, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ
, UNKNOWN
, (int) V1DI_FTYPE_V2SI_V2SI
},
26833 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_umulv2siv2di3
, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI_V4SI
},
26835 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pmaddwd
, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI_V8HI
},
26837 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsi2sd
, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_SI
},
26838 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvtsi2sdq
, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_DI
},
26839 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsd2ss
, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V2DF
},
26840 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtss2sd
, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V4SF
},
26842 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ashlv1ti3
, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT_CONVERT
},
26843 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv8hi3
, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
26844 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv4si3
, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
26845 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv2di3
, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_SI_COUNT
},
26846 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv8hi3
, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
26847 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv4si3
, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
26848 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv2di3
, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_COUNT
},
26850 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_lshrv1ti3
, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT_CONVERT
},
26851 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv8hi3
, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
26852 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv4si3
, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
26853 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv2di3
, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_SI_COUNT
},
26854 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv8hi3
, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
26855 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv4si3
, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
26856 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv2di3
, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_COUNT
},
26858 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv8hi3
, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
26859 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv4si3
, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
26860 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv8hi3
, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
26861 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv4si3
, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
26863 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshufd
, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_INT
},
26864 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshuflw
, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_INT
},
26865 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshufhw
, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_INT
},
26867 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsqrtv2df2
, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_VEC_MERGE
},
26869 { OPTION_MASK_ISA_SSE2
, CODE_FOR_abstf2
, 0, IX86_BUILTIN_FABSQ
, UNKNOWN
, (int) FLOAT128_FTYPE_FLOAT128
},
26870 { OPTION_MASK_ISA_SSE2
, CODE_FOR_copysigntf3
, 0, IX86_BUILTIN_COPYSIGNQ
, UNKNOWN
, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128
},
26872 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse2_movq128
, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
26875 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mmx_addv1di3
, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI
},
26876 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mmx_subv1di3
, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI
},
26879 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_movshdup
, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26880 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_movsldup
, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26882 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_addsubv4sf3
, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26883 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_addsubv2df3
, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26884 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_haddv4sf3
, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26885 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_haddv2df3
, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26886 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_hsubv4sf3
, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26887 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_hsubv2df3
, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26890 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv16qi2
, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI
},
26891 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv8qi2
, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI
},
26892 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv8hi2
, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
26893 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv4hi2
, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI
},
26894 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv4si2
, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI
},
26895 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv2si2
, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI
},
26897 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddwv8hi3
, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26898 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddwv4hi3
, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26899 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phadddv4si3
, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26900 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phadddv2si3
, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26901 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddswv8hi3
, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26902 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddswv4hi3
, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26903 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubwv8hi3
, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26904 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubwv4hi3
, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26905 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubdv4si3
, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26906 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubdv2si3
, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26907 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubswv8hi3
, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26908 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubswv4hi3
, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26909 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmaddubsw128
, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI_V16QI
},
26910 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmaddubsw
, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V8QI_V8QI
},
26911 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmulhrswv8hi3
, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26912 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmulhrswv4hi3
, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26913 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pshufbv16qi3
, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26914 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pshufbv8qi3
, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26915 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv16qi3
, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26916 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv8qi3
, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26917 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv8hi3
, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26918 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv4hi3
, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26919 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv4si3
, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26920 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv2si3
, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26923 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_palignrti
, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT
},
26924 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_palignrdi
, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT
},
26927 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendpd
, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
26928 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendps
, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
26929 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendvpd
, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_V2DF
},
26930 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendvps
, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_V4SF
},
26931 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_dppd
, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
26932 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_dpps
, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
26933 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_insertps
, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
26934 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_mpsadbw
, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI_INT
},
26935 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_pblendvb
, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI_V16QI
},
26936 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_pblendw
, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_INT
},
26938 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv8qiv8hi2
, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI
},
26939 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv4qiv4si2
, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128
, UNKNOWN
, (int) V4SI_FTYPE_V16QI
},
26940 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2qiv2di2
, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI
},
26941 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv4hiv4si2
, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI
},
26942 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2hiv2di2
, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128
, UNKNOWN
, (int) V2DI_FTYPE_V8HI
},
26943 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2siv2di2
, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI
},
26944 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv8qiv8hi2
, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI
},
26945 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv4qiv4si2
, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128
, UNKNOWN
, (int) V4SI_FTYPE_V16QI
},
26946 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2qiv2di2
, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI
},
26947 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv4hiv4si2
, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI
},
26948 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2hiv2di2
, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128
, UNKNOWN
, (int) V2DI_FTYPE_V8HI
},
26949 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2siv2di2
, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI
},
26950 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_phminposuw
, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
26952 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_packusdw
, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128
, UNKNOWN
, (int) V8HI_FTYPE_V4SI_V4SI
},
26953 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_eqv2di3
, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26954 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_smaxv16qi3
, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26955 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_smaxv4si3
, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26956 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_umaxv4si3
, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26957 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_umaxv8hi3
, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26958 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sminv16qi3
, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26959 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sminv4si3
, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26960 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_uminv4si3
, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26961 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_uminv8hi3
, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26962 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_mulv2siv2di3
, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI_V4SI
},
26963 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_mulv4si3
, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26966 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_INT
},
26967 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_INT
},
26968 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundsd
, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
26969 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundss
, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
26971 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD
, (enum rtx_code
) ROUND_FLOOR
, (int) V2DF_FTYPE_V2DF_ROUND
},
26972 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD
, (enum rtx_code
) ROUND_CEIL
, (int) V2DF_FTYPE_V2DF_ROUND
},
26973 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD
, (enum rtx_code
) ROUND_TRUNC
, (int) V2DF_FTYPE_V2DF_ROUND
},
26974 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD
, (enum rtx_code
) ROUND_MXCSR
, (int) V2DF_FTYPE_V2DF_ROUND
},
26976 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd_vec_pack_sfix
, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX
, (enum rtx_code
) ROUND_FLOOR
, (int) V4SI_FTYPE_V2DF_V2DF_ROUND
},
26977 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd_vec_pack_sfix
, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX
, (enum rtx_code
) ROUND_CEIL
, (int) V4SI_FTYPE_V2DF_V2DF_ROUND
},
26979 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv2df2
, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ
, UNKNOWN
, (int) V2DF_FTYPE_V2DF
},
26980 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv2df2_vec_pack_sfix
, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX
, UNKNOWN
, (int) V4SI_FTYPE_V2DF_V2DF
},
26982 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS
, (enum rtx_code
) ROUND_FLOOR
, (int) V4SF_FTYPE_V4SF_ROUND
},
26983 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS
, (enum rtx_code
) ROUND_CEIL
, (int) V4SF_FTYPE_V4SF_ROUND
},
26984 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS
, (enum rtx_code
) ROUND_TRUNC
, (int) V4SF_FTYPE_V4SF_ROUND
},
26985 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS
, (enum rtx_code
) ROUND_MXCSR
, (int) V4SF_FTYPE_V4SF_ROUND
},
26987 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps_sfix
, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX
, (enum rtx_code
) ROUND_FLOOR
, (int) V4SI_FTYPE_V4SF_ROUND
},
26988 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps_sfix
, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX
, (enum rtx_code
) ROUND_CEIL
, (int) V4SI_FTYPE_V4SF_ROUND
},
26990 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv4sf2
, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26991 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv4sf2_sfix
, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
26993 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ
, EQ
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
26994 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC
, LTU
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
26995 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC
, GTU
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
26998 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_gtv2di3
, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26999 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32qi
, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI
, UNKNOWN
, (int) UINT_FTYPE_UINT_UCHAR
},
27000 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32hi
, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI
, UNKNOWN
, (int) UINT_FTYPE_UINT_USHORT
},
27001 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32si
, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
27002 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse4_2_crc32di
, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
27005 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_extrqi
, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_UINT_UINT
},
27006 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_extrq
, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V16QI
},
27007 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_insertqi
, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT
},
27008 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_insertq
, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27011 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aeskeygenassist
, 0, IX86_BUILTIN_AESKEYGENASSIST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT
},
27012 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesimc
, 0, IX86_BUILTIN_AESIMC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
27014 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesenc
, 0, IX86_BUILTIN_AESENC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27015 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesenclast
, 0, IX86_BUILTIN_AESENCLAST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27016 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesdec
, 0, IX86_BUILTIN_AESDEC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27017 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesdeclast
, 0, IX86_BUILTIN_AESDECLAST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27020 { OPTION_MASK_ISA_SSE2
, CODE_FOR_pclmulqdq
, 0, IX86_BUILTIN_PCLMULQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_INT
},
27023 { OPTION_MASK_ISA_AVX
, CODE_FOR_addv4df3
, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27024 { OPTION_MASK_ISA_AVX
, CODE_FOR_addv8sf3
, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27025 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_addsubv4df3
, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27026 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_addsubv8sf3
, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27027 { OPTION_MASK_ISA_AVX
, CODE_FOR_andv4df3
, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27028 { OPTION_MASK_ISA_AVX
, CODE_FOR_andv8sf3
, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27029 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_andnotv4df3
, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27030 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_andnotv8sf3
, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27031 { OPTION_MASK_ISA_AVX
, CODE_FOR_divv4df3
, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27032 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_divv8sf3
, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27033 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_haddv4df3
, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27034 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_hsubv8sf3
, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27035 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_hsubv4df3
, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27036 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_haddv8sf3
, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27037 { OPTION_MASK_ISA_AVX
, CODE_FOR_smaxv4df3
, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27038 { OPTION_MASK_ISA_AVX
, CODE_FOR_smaxv8sf3
, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27039 { OPTION_MASK_ISA_AVX
, CODE_FOR_sminv4df3
, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27040 { OPTION_MASK_ISA_AVX
, CODE_FOR_sminv8sf3
, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27041 { OPTION_MASK_ISA_AVX
, CODE_FOR_mulv4df3
, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27042 { OPTION_MASK_ISA_AVX
, CODE_FOR_mulv8sf3
, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27043 { OPTION_MASK_ISA_AVX
, CODE_FOR_iorv4df3
, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27044 { OPTION_MASK_ISA_AVX
, CODE_FOR_iorv8sf3
, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27045 { OPTION_MASK_ISA_AVX
, CODE_FOR_subv4df3
, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27046 { OPTION_MASK_ISA_AVX
, CODE_FOR_subv8sf3
, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27047 { OPTION_MASK_ISA_AVX
, CODE_FOR_xorv4df3
, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27048 { OPTION_MASK_ISA_AVX
, CODE_FOR_xorv8sf3
, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27050 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv2df3
, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DI
},
27051 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv4sf3
, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SI
},
27052 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv4df3
, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DI
},
27053 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv8sf3
, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SI
},
27055 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendpd256
, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
27056 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendps256
, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
27057 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendvpd256
, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_V4DF
},
27058 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendvps256
, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_V8SF
},
27059 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_dpps256
, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
27060 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_shufpd256
, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
27061 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_shufps256
, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
27062 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vmcmpv2df3
, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
27063 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vmcmpv4sf3
, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
27064 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv2df3
, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
27065 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv4sf3
, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
27066 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv4df3
, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
27067 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv8sf3
, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
27068 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v4df
, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256
, UNKNOWN
, (int) V2DF_FTYPE_V4DF_INT
},
27069 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v8sf
, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256
, UNKNOWN
, (int) V4SF_FTYPE_V8SF_INT
},
27070 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v8si
, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256
, UNKNOWN
, (int) V4SI_FTYPE_V8SI_INT
},
27071 { OPTION_MASK_ISA_AVX
, CODE_FOR_floatv4siv4df2
, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4SI
},
27072 { OPTION_MASK_ISA_AVX
, CODE_FOR_floatv8siv8sf2
, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SI
},
27073 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtpd2ps256
, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256
, UNKNOWN
, (int) V4SF_FTYPE_V4DF
},
27074 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtps2dq256
, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
27075 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtps2pd256
, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4SF
},
27076 { OPTION_MASK_ISA_AVX
, CODE_FOR_fix_truncv4dfv4si2
, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256
, UNKNOWN
, (int) V4SI_FTYPE_V4DF
},
27077 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtpd2dq256
, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256
, UNKNOWN
, (int) V4SI_FTYPE_V4DF
},
27078 { OPTION_MASK_ISA_AVX
, CODE_FOR_fix_truncv8sfv8si2
, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
27079 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v4df3
, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
27080 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v8sf3
, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
27081 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v8si3
, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI_INT
},
27082 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv2df
, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_INT
},
27083 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv4sf
, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_INT
},
27084 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv4df
, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
27085 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv8sf
, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_INT
},
27086 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v4df
, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V2DF_INT
},
27087 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v8sf
, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V4SF_INT
},
27088 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v8si
, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_INT
},
27090 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movshdup256
, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
27091 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movsldup256
, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
27092 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movddup256
, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
27094 { OPTION_MASK_ISA_AVX
, CODE_FOR_sqrtv4df2
, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
27095 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_sqrtv8sf2
, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
27096 { OPTION_MASK_ISA_AVX
, CODE_FOR_sqrtv8sf2
, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
27097 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_rsqrtv8sf2
, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
27098 { OPTION_MASK_ISA_AVX
, CODE_FOR_rsqrtv8sf2
, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
27100 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_rcpv8sf2
, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
27102 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
27103 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_INT
},
27105 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256
, (enum rtx_code
) ROUND_FLOOR
, (int) V4DF_FTYPE_V4DF_ROUND
},
27106 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256
, (enum rtx_code
) ROUND_CEIL
, (int) V4DF_FTYPE_V4DF_ROUND
},
27107 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256
, (enum rtx_code
) ROUND_TRUNC
, (int) V4DF_FTYPE_V4DF_ROUND
},
27108 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256
, (enum rtx_code
) ROUND_MXCSR
, (int) V4DF_FTYPE_V4DF_ROUND
},
27110 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv4df2
, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
27111 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv4df2_vec_pack_sfix
, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256
, UNKNOWN
, (int) V8SI_FTYPE_V4DF_V4DF
},
27113 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd_vec_pack_sfix256
, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256
, (enum rtx_code
) ROUND_FLOOR
, (int) V8SI_FTYPE_V4DF_V4DF_ROUND
},
27114 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd_vec_pack_sfix256
, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256
, (enum rtx_code
) ROUND_CEIL
, (int) V8SI_FTYPE_V4DF_V4DF_ROUND
},
27116 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256
, (enum rtx_code
) ROUND_FLOOR
, (int) V8SF_FTYPE_V8SF_ROUND
},
27117 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256
, (enum rtx_code
) ROUND_CEIL
, (int) V8SF_FTYPE_V8SF_ROUND
},
27118 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256
, (enum rtx_code
) ROUND_TRUNC
, (int) V8SF_FTYPE_V8SF_ROUND
},
27119 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256
, (enum rtx_code
) ROUND_MXCSR
, (int) V8SF_FTYPE_V8SF_ROUND
},
27121 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps_sfix256
, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256
, (enum rtx_code
) ROUND_FLOOR
, (int) V8SI_FTYPE_V8SF_ROUND
},
27122 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps_sfix256
, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256
, (enum rtx_code
) ROUND_CEIL
, (int) V8SI_FTYPE_V8SF_ROUND
},
27124 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv8sf2
, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
27125 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv8sf2_sfix
, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
27127 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpckhpd256
, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27128 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpcklpd256
, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27129 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpckhps256
, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27130 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpcklps256
, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27132 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_si256_si
, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI
, UNKNOWN
, (int) V8SI_FTYPE_V4SI
},
27133 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ps256_ps
, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS
, UNKNOWN
, (int) V8SF_FTYPE_V4SF
},
27134 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_pd256_pd
, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD
, UNKNOWN
, (int) V4DF_FTYPE_V2DF
},
27135 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v8si
, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256
, UNKNOWN
, (int) V4SI_FTYPE_V8SI
},
27136 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v8sf
, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256
, UNKNOWN
, (int) V4SF_FTYPE_V8SF
},
27137 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v4df
, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256
, UNKNOWN
, (int) V2DF_FTYPE_V4DF
},
27139 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD
, EQ
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
27140 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD
, LTU
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
27141 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD
, GTU
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
27142 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS
, EQ
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
27143 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS
, LTU
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
27144 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS
, GTU
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
27145 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256
, EQ
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
27146 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256
, LTU
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
27147 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256
, GTU
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
27148 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256
, EQ
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
27149 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256
, LTU
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
27150 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256
, GTU
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
27151 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256
, EQ
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
27152 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256
, LTU
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
27153 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256
, GTU
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
27155 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movmskpd256
, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256
, UNKNOWN
, (int) INT_FTYPE_V4DF
},
27156 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movmskps256
, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256
, UNKNOWN
, (int) INT_FTYPE_V8SF
},
27158 { OPTION_MASK_ISA_AVX
, CODE_FOR_copysignv8sf3
, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27159 { OPTION_MASK_ISA_AVX
, CODE_FOR_copysignv4df3
, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27161 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_pack_sfix_v4df
, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256
, UNKNOWN
, (int) V8SI_FTYPE_V4DF_V4DF
},
27164 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_mpsadbw
, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI_INT
},
27165 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv32qi2
, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI
},
27166 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv16hi2
, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI
},
27167 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv8si2
, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI
},
27168 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packssdw
, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256
, UNKNOWN
, (int) V16HI_FTYPE_V8SI_V8SI
},
27169 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packsswb
, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256
, UNKNOWN
, (int) V32QI_FTYPE_V16HI_V16HI
},
27170 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packusdw
, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256
, UNKNOWN
, (int) V16HI_FTYPE_V8SI_V8SI
},
27171 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packuswb
, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256
, UNKNOWN
, (int) V32QI_FTYPE_V16HI_V16HI
},
27172 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv32qi3
, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27173 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv16hi3
, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27174 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv8si3
, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27175 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv4di3
, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27176 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ssaddv32qi3
, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27177 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ssaddv16hi3
, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27178 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_usaddv32qi3
, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27179 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_usaddv16hi3
, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27180 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_palignrv2ti
, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT
},
27181 { OPTION_MASK_ISA_AVX2
, CODE_FOR_andv4di3
, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27182 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_andnotv4di3
, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27183 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_uavgv32qi3
, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27184 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_uavgv16hi3
, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27185 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblendvb
, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI_V32QI
},
27186 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblendw
, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI_INT
},
27187 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv32qi3
, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27188 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv16hi3
, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27189 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv8si3
, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27190 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv4di3
, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27191 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv32qi3
, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27192 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv16hi3
, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27193 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv8si3
, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27194 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv4di3
, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27195 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phaddwv16hi3
, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27196 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phadddv8si3
, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27197 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phaddswv16hi3
, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27198 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubwv16hi3
, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27199 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubdv8si3
, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27200 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubswv16hi3
, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27201 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmaddubsw256
, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V32QI_V32QI
},
27202 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmaddwd
, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256
, UNKNOWN
, (int) V8SI_FTYPE_V16HI_V16HI
},
27203 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv32qi3
, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27204 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv16hi3
, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27205 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv8si3
, "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27206 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv32qi3
, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27207 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv16hi3
, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27208 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv8si3
, "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27209 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv32qi3
, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27210 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv16hi3
, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27211 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv8si3
, "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27212 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv32qi3
, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27213 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv16hi3
, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27214 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv8si3
, "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27215 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmovmskb
, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256
, UNKNOWN
, (int) INT_FTYPE_V32QI
},
27216 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv16qiv16hi2
, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16QI
},
27217 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv8qiv8si2
, "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256
, UNKNOWN
, (int) V8SI_FTYPE_V16QI
},
27218 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4qiv4di2
, "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V16QI
},
27219 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv8hiv8si2
, "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256
, UNKNOWN
, (int) V8SI_FTYPE_V8HI
},
27220 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4hiv4di2
, "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8HI
},
27221 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4siv4di2
, "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4SI
},
27222 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv16qiv16hi2
, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16QI
},
27223 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv8qiv8si2
, "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256
, UNKNOWN
, (int) V8SI_FTYPE_V16QI
},
27224 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4qiv4di2
, "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V16QI
},
27225 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv8hiv8si2
, "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256
, UNKNOWN
, (int) V8SI_FTYPE_V8HI
},
27226 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4hiv4di2
, "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8HI
},
27227 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4siv4di2
, "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4SI
},
27228 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_mulv4siv4di3
, "__builtin_ia32_pmuldq256" , IX86_BUILTIN_PMULDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8SI_V8SI
},
27229 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_umulhrswv16hi3
, "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27230 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umulv16hi3_highpart
, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27231 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smulv16hi3_highpart
, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27232 { OPTION_MASK_ISA_AVX2
, CODE_FOR_mulv16hi3
, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27233 { OPTION_MASK_ISA_AVX2
, CODE_FOR_mulv8si3
, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27234 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_umulv4siv4di3
, "__builtin_ia32_pmuludq256" , IX86_BUILTIN_PMULUDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8SI_V8SI
},
27235 { OPTION_MASK_ISA_AVX2
, CODE_FOR_iorv4di3
, "__builtin_ia32_por256", IX86_BUILTIN_POR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27236 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psadbw
, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256
, UNKNOWN
, (int) V16HI_FTYPE_V32QI_V32QI
},
27237 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufbv32qi3
, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27238 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufdv3
, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_INT
},
27239 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufhwv3
, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_INT
},
27240 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshuflwv3
, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_INT
},
27241 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv32qi3
, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27242 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv16hi3
, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27243 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv8si3
, "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27244 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlv2ti3
, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_CONVERT
},
27245 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv16hi3
, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
27246 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv16hi3
, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
27247 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv8si3
, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
27248 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv8si3
, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
27249 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv4di3
, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_COUNT
},
27250 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv4di3
, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_COUNT
},
27251 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv16hi3
, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
27252 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv16hi3
, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
27253 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv8si3
, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
27254 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv8si3
, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
27255 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrv2ti3
, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_CONVERT
},
27256 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv16hi3
, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
27257 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv16hi3
, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
27258 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv8si3
, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
27259 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv8si3
, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
27260 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv4di3
, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_COUNT
},
27261 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv4di3
, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_COUNT
},
27262 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv32qi3
, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27263 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv16hi3
, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27264 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv8si3
, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27265 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv4di3
, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27266 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sssubv32qi3
, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27267 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sssubv16hi3
, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27268 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ussubv32qi3
, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27269 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ussubv16hi3
, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27270 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv32qi
, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27271 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv16hi
, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27272 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv8si
, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27273 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv4di
, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27274 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv32qi
, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27275 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv16hi
, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27276 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv8si
, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27277 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv4di
, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27278 { OPTION_MASK_ISA_AVX2
, CODE_FOR_xorv4di3
, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27279 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv4sf
, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27280 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv8sf
, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256
, UNKNOWN
, (int) V8SF_FTYPE_V4SF
},
27281 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv4df
, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256
, UNKNOWN
, (int) V4DF_FTYPE_V2DF
},
27282 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vbroadcasti128_v4di
, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256
, UNKNOWN
, (int) V4DI_FTYPE_V2DI
},
27283 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblenddv4si
, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_INT
},
27284 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblenddv8si
, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI_INT
},
27285 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv32qi
, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256
, UNKNOWN
, (int) V32QI_FTYPE_V16QI
},
27286 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv16hi
, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256
, UNKNOWN
, (int) V16HI_FTYPE_V8HI
},
27287 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv8si
, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256
, UNKNOWN
, (int) V8SI_FTYPE_V4SI
},
27288 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv4di
, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256
, UNKNOWN
, (int) V4DI_FTYPE_V2DI
},
27289 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv16qi
, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI
},
27290 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv8hi
, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
27291 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv4si
, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI
},
27292 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv2di
, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
27293 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permvarv8si
, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27294 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv4df
, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
27295 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permvarv8sf
, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27296 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv4di
, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT
},
27297 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv2ti
, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI_INT
},
27298 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_extracti128
, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256
, UNKNOWN
, (int) V2DI_FTYPE_V4DI_INT
},
27299 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_inserti128
, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_INT
},
27300 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv4di
, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27301 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv2di
, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27302 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv8si
, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27303 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv4si
, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27304 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashrvv8si
, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27305 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashrvv4si
, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27306 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv4di
, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27307 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv2di
, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27308 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv8si
, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27309 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv4si
, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27311 { OPTION_MASK_ISA_LZCNT
, CODE_FOR_clzhi2_lzcnt
, "__builtin_clzs", IX86_BUILTIN_CLZS
, UNKNOWN
, (int) UINT16_FTYPE_UINT16
},
27314 { OPTION_MASK_ISA_BMI
, CODE_FOR_bmi_bextr_si
, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
27315 { OPTION_MASK_ISA_BMI
, CODE_FOR_bmi_bextr_di
, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
27316 { OPTION_MASK_ISA_BMI
, CODE_FOR_ctzhi2
, "__builtin_ctzs", IX86_BUILTIN_CTZS
, UNKNOWN
, (int) UINT16_FTYPE_UINT16
},
27319 { OPTION_MASK_ISA_TBM
, CODE_FOR_tbm_bextri_si
, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
27320 { OPTION_MASK_ISA_TBM
, CODE_FOR_tbm_bextri_di
, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
27323 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtph2ps
, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS
, UNKNOWN
, (int) V4SF_FTYPE_V8HI
},
27324 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtph2ps256
, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8HI
},
27325 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtps2ph
, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH
, UNKNOWN
, (int) V8HI_FTYPE_V4SF_INT
},
27326 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtps2ph256
, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256
, UNKNOWN
, (int) V8HI_FTYPE_V8SF_INT
},
27329 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_bzhi_si3
, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
27330 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_bzhi_di3
, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
27331 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pdep_si3
, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
27332 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pdep_di3
, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
27333 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pext_si3
, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
27334 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pext_di3
, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
27337 /* FMA4 and XOP. */
27338 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
27339 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
27340 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
27341 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
27342 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
27343 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
27344 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
27345 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
27346 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
27347 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
27348 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
27349 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
27350 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
27351 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
27352 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
27353 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
27354 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
27355 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
27356 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
27357 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
27358 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
27359 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
27360 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
27361 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
27362 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
27363 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
27364 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
27365 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
27366 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
27367 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
27368 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
27369 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
27370 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
27371 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
27372 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
27373 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
27374 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
27375 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
27376 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
27377 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
27378 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
27379 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
27380 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
27381 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
27382 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
27383 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
27384 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
27385 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
27386 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
27387 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
27388 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
27389 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
27391 static const struct builtin_description bdesc_multi_arg
[] =
27393 { OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_vmfmadd_v4sf
,
27394 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS
,
27395 UNKNOWN
, (int)MULTI_ARG_3_SF
},
27396 { OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_vmfmadd_v2df
,
27397 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD
,
27398 UNKNOWN
, (int)MULTI_ARG_3_DF
},
27400 { OPTION_MASK_ISA_FMA
, CODE_FOR_fmai_vmfmadd_v4sf
,
27401 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3
,
27402 UNKNOWN
, (int)MULTI_ARG_3_SF
},
27403 { OPTION_MASK_ISA_FMA
, CODE_FOR_fmai_vmfmadd_v2df
,
27404 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3
,
27405 UNKNOWN
, (int)MULTI_ARG_3_DF
},
27407 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v4sf
,
27408 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS
,
27409 UNKNOWN
, (int)MULTI_ARG_3_SF
},
27410 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v2df
,
27411 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD
,
27412 UNKNOWN
, (int)MULTI_ARG_3_DF
},
27413 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v8sf
,
27414 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256
,
27415 UNKNOWN
, (int)MULTI_ARG_3_SF2
},
27416 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v4df
,
27417 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256
,
27418 UNKNOWN
, (int)MULTI_ARG_3_DF2
},
27420 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v4sf
,
27421 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS
,
27422 UNKNOWN
, (int)MULTI_ARG_3_SF
},
27423 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v2df
,
27424 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD
,
27425 UNKNOWN
, (int)MULTI_ARG_3_DF
},
27426 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v8sf
,
27427 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256
,
27428 UNKNOWN
, (int)MULTI_ARG_3_SF2
},
27429 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v4df
,
27430 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256
,
27431 UNKNOWN
, (int)MULTI_ARG_3_DF2
},
27433 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2di
, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV
, UNKNOWN
, (int)MULTI_ARG_3_DI
},
27434 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2di
, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI
, UNKNOWN
, (int)MULTI_ARG_3_DI
},
27435 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4si
, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
27436 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8hi
, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
27437 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v16qi
, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI
,UNKNOWN
, (int)MULTI_ARG_3_QI
},
27438 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2df
, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF
, UNKNOWN
, (int)MULTI_ARG_3_DF
},
27439 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4sf
, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF
, UNKNOWN
, (int)MULTI_ARG_3_SF
},
27441 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4di256
, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256
, UNKNOWN
, (int)MULTI_ARG_3_DI2
},
27442 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4di256
, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256
, UNKNOWN
, (int)MULTI_ARG_3_DI2
},
27443 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8si256
, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256
, UNKNOWN
, (int)MULTI_ARG_3_SI2
},
27444 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v16hi256
, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256
, UNKNOWN
, (int)MULTI_ARG_3_HI2
},
27445 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v32qi256
, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256
, UNKNOWN
, (int)MULTI_ARG_3_QI2
},
27446 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4df256
, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256
, UNKNOWN
, (int)MULTI_ARG_3_DF2
},
27447 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8sf256
, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256
, UNKNOWN
, (int)MULTI_ARG_3_SF2
},
27449 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pperm
, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM
, UNKNOWN
, (int)MULTI_ARG_3_QI
},
27451 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssww
, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
27452 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsww
, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
27453 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsswd
, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
27454 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacswd
, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
27455 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdd
, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
27456 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdd
, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
27457 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdql
, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
27458 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdqh
, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
27459 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdql
, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
27460 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdqh
, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
27461 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmadcsswd
, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
27462 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmadcswd
, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
27464 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv2di3
, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
27465 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv4si3
, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
27466 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv8hi3
, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
27467 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv16qi3
, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
27468 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv2di3
, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM
, UNKNOWN
, (int)MULTI_ARG_2_DI_IMM
},
27469 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv4si3
, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM
, UNKNOWN
, (int)MULTI_ARG_2_SI_IMM
},
27470 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv8hi3
, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM
, UNKNOWN
, (int)MULTI_ARG_2_HI_IMM
},
27471 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv16qi3
, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM
, UNKNOWN
, (int)MULTI_ARG_2_QI_IMM
},
27472 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav2di3
, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
27473 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav4si3
, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
27474 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav8hi3
, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
27475 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav16qi3
, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
27476 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv2di3
, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
27477 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv4si3
, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
27478 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv8hi3
, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
27479 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv16qi3
, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
27481 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vmfrczv4sf2
, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS
, UNKNOWN
, (int)MULTI_ARG_2_SF
},
27482 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vmfrczv2df2
, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD
, UNKNOWN
, (int)MULTI_ARG_2_DF
},
27483 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv4sf2
, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS
, UNKNOWN
, (int)MULTI_ARG_1_SF
},
27484 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv2df2
, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD
, UNKNOWN
, (int)MULTI_ARG_1_DF
},
27485 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv8sf2
, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256
, UNKNOWN
, (int)MULTI_ARG_1_SF2
},
27486 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv4df2
, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256
, UNKNOWN
, (int)MULTI_ARG_1_DF2
},
27488 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbw
, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
27489 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbd
, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD
, UNKNOWN
, (int)MULTI_ARG_1_QI_SI
},
27490 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbq
, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ
, UNKNOWN
, (int)MULTI_ARG_1_QI_DI
},
27491 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddwd
, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
27492 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddwq
, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ
, UNKNOWN
, (int)MULTI_ARG_1_HI_DI
},
27493 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadddq
, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
27494 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubw
, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
27495 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubd
, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD
, UNKNOWN
, (int)MULTI_ARG_1_QI_SI
},
27496 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubq
, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ
, UNKNOWN
, (int)MULTI_ARG_1_QI_DI
},
27497 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadduwd
, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
27498 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadduwq
, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ
, UNKNOWN
, (int)MULTI_ARG_1_HI_DI
},
27499 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddudq
, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
27500 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubbw
, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
27501 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubwd
, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
27502 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubdq
, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
27504 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB
, EQ
, (int)MULTI_ARG_2_QI_CMP
},
27505 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
27506 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
27507 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB
, LT
, (int)MULTI_ARG_2_QI_CMP
},
27508 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB
, LE
, (int)MULTI_ARG_2_QI_CMP
},
27509 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB
, GT
, (int)MULTI_ARG_2_QI_CMP
},
27510 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB
, GE
, (int)MULTI_ARG_2_QI_CMP
},
27512 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW
, EQ
, (int)MULTI_ARG_2_HI_CMP
},
27513 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
27514 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
27515 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW
, LT
, (int)MULTI_ARG_2_HI_CMP
},
27516 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW
, LE
, (int)MULTI_ARG_2_HI_CMP
},
27517 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW
, GT
, (int)MULTI_ARG_2_HI_CMP
},
27518 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW
, GE
, (int)MULTI_ARG_2_HI_CMP
},
27520 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD
, EQ
, (int)MULTI_ARG_2_SI_CMP
},
27521 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED
, NE
, (int)MULTI_ARG_2_SI_CMP
},
27522 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED
, NE
, (int)MULTI_ARG_2_SI_CMP
},
27523 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD
, LT
, (int)MULTI_ARG_2_SI_CMP
},
27524 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED
, LE
, (int)MULTI_ARG_2_SI_CMP
},
27525 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD
, GT
, (int)MULTI_ARG_2_SI_CMP
},
27526 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED
, GE
, (int)MULTI_ARG_2_SI_CMP
},
27528 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ
, EQ
, (int)MULTI_ARG_2_DI_CMP
},
27529 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
27530 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
27531 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ
, LT
, (int)MULTI_ARG_2_DI_CMP
},
27532 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ
, LE
, (int)MULTI_ARG_2_DI_CMP
},
27533 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ
, GT
, (int)MULTI_ARG_2_DI_CMP
},
27534 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ
, GE
, (int)MULTI_ARG_2_DI_CMP
},
27536 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB
, EQ
, (int)MULTI_ARG_2_QI_CMP
},
27537 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
27538 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
27539 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB
, LTU
, (int)MULTI_ARG_2_QI_CMP
},
27540 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB
, LEU
, (int)MULTI_ARG_2_QI_CMP
},
27541 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB
, GTU
, (int)MULTI_ARG_2_QI_CMP
},
27542 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB
, GEU
, (int)MULTI_ARG_2_QI_CMP
},
27544 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW
, EQ
, (int)MULTI_ARG_2_HI_CMP
},
27545 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
27546 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
27547 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW
, LTU
, (int)MULTI_ARG_2_HI_CMP
},
27548 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW
, LEU
, (int)MULTI_ARG_2_HI_CMP
},
27549 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW
, GTU
, (int)MULTI_ARG_2_HI_CMP
},
27550 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW
, GEU
, (int)MULTI_ARG_2_HI_CMP
},
27552 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD
, EQ
, (int)MULTI_ARG_2_SI_CMP
},
27553 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD
, NE
, (int)MULTI_ARG_2_SI_CMP
},
27554 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD
, NE
, (int)MULTI_ARG_2_SI_CMP
},
27555 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD
, LTU
, (int)MULTI_ARG_2_SI_CMP
},
27556 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD
, LEU
, (int)MULTI_ARG_2_SI_CMP
},
27557 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD
, GTU
, (int)MULTI_ARG_2_SI_CMP
},
27558 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD
, GEU
, (int)MULTI_ARG_2_SI_CMP
},
27560 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ
, EQ
, (int)MULTI_ARG_2_DI_CMP
},
27561 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
27562 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
27563 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ
, LTU
, (int)MULTI_ARG_2_DI_CMP
},
27564 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ
, LEU
, (int)MULTI_ARG_2_DI_CMP
},
27565 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ
, GTU
, (int)MULTI_ARG_2_DI_CMP
},
27566 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ
, GEU
, (int)MULTI_ARG_2_DI_CMP
},
27568 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_QI_TF
},
27569 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_HI_TF
},
27570 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_SI_TF
},
27571 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_DI_TF
},
27572 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_QI_TF
},
27573 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_HI_TF
},
27574 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_SI_TF
},
27575 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_DI_TF
},
27577 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_QI_TF
},
27578 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_HI_TF
},
27579 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_SI_TF
},
27580 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_DI_TF
},
27581 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_QI_TF
},
27582 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_HI_TF
},
27583 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_SI_TF
},
27584 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_DI_TF
},
27586 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v2df3
, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD
, UNKNOWN
, (int)MULTI_ARG_4_DF2_DI_I
},
27587 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v4sf3
, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS
, UNKNOWN
, (int)MULTI_ARG_4_SF2_SI_I
},
27588 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v4df3
, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256
, UNKNOWN
, (int)MULTI_ARG_4_DF2_DI_I1
},
27589 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v8sf3
, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256
, UNKNOWN
, (int)MULTI_ARG_4_SF2_SI_I1
},
27593 /* TM vector builtins. */
27595 /* Reuse the existing x86-specific `struct builtin_description' cause
27596 we're lazy. Add casts to make them fit. */
27597 static const struct builtin_description bdesc_tm
[] =
27599 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
27600 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WaRM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
27601 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WaWM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
27602 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
27603 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RaRM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
27604 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RaWM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
27605 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RfWM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
27607 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
27608 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WaRM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
27609 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WaWM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
27610 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
27611 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RaRM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
27612 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RaWM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
27613 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RfWM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
27615 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
27616 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WaRM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
27617 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WaWM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
27618 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
27619 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RaRM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
27620 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RaWM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
27621 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RfWM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
27623 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_LM64", (enum ix86_builtins
) BUILT_IN_TM_LOG_M64
, UNKNOWN
, VOID_FTYPE_PCVOID
},
27624 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_LM128", (enum ix86_builtins
) BUILT_IN_TM_LOG_M128
, UNKNOWN
, VOID_FTYPE_PCVOID
},
27625 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_LM256", (enum ix86_builtins
) BUILT_IN_TM_LOG_M256
, UNKNOWN
, VOID_FTYPE_PCVOID
},
27628 /* TM callbacks. */
27630 /* Return the builtin decl needed to load a vector of TYPE. */
27633 ix86_builtin_tm_load (tree type
)
27635 if (TREE_CODE (type
) == VECTOR_TYPE
)
27637 switch (tree_low_cst (TYPE_SIZE (type
), 1))
27640 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64
);
27642 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128
);
27644 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256
);
27650 /* Return the builtin decl needed to store a vector of TYPE. */
27653 ix86_builtin_tm_store (tree type
)
27655 if (TREE_CODE (type
) == VECTOR_TYPE
)
27657 switch (tree_low_cst (TYPE_SIZE (type
), 1))
27660 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64
);
27662 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128
);
27664 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256
);
27670 /* Initialize the transactional memory vector load/store builtins. */
27673 ix86_init_tm_builtins (void)
27675 enum ix86_builtin_func_type ftype
;
27676 const struct builtin_description
*d
;
27679 tree attrs_load
, attrs_type_load
, attrs_store
, attrs_type_store
;
27680 tree attrs_log
, attrs_type_log
;
27685 /* Use whatever attributes a normal TM load has. */
27686 decl
= builtin_decl_explicit (BUILT_IN_TM_LOAD_1
);
27687 attrs_load
= DECL_ATTRIBUTES (decl
);
27688 attrs_type_load
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
27689 /* Use whatever attributes a normal TM store has. */
27690 decl
= builtin_decl_explicit (BUILT_IN_TM_STORE_1
);
27691 attrs_store
= DECL_ATTRIBUTES (decl
);
27692 attrs_type_store
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
27693 /* Use whatever attributes a normal TM log has. */
27694 decl
= builtin_decl_explicit (BUILT_IN_TM_LOG
);
27695 attrs_log
= DECL_ATTRIBUTES (decl
);
27696 attrs_type_log
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
27698 for (i
= 0, d
= bdesc_tm
;
27699 i
< ARRAY_SIZE (bdesc_tm
);
27702 if ((d
->mask
& ix86_isa_flags
) != 0
27703 || (lang_hooks
.builtin_function
27704 == lang_hooks
.builtin_function_ext_scope
))
27706 tree type
, attrs
, attrs_type
;
27707 enum built_in_function code
= (enum built_in_function
) d
->code
;
27709 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
27710 type
= ix86_get_builtin_func_type (ftype
);
27712 if (BUILTIN_TM_LOAD_P (code
))
27714 attrs
= attrs_load
;
27715 attrs_type
= attrs_type_load
;
27717 else if (BUILTIN_TM_STORE_P (code
))
27719 attrs
= attrs_store
;
27720 attrs_type
= attrs_type_store
;
27725 attrs_type
= attrs_type_log
;
27727 decl
= add_builtin_function (d
->name
, type
, code
, BUILT_IN_NORMAL
,
27728 /* The builtin without the prefix for
27729 calling it directly. */
27730 d
->name
+ strlen ("__builtin_"),
27732 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
27733 set the TYPE_ATTRIBUTES. */
27734 decl_attributes (&TREE_TYPE (decl
), attrs_type
, ATTR_FLAG_BUILT_IN
);
27736 set_builtin_decl (code
, decl
, false);
27741 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
27742 in the current target ISA to allow the user to compile particular modules
27743 with different target specific options that differ from the command line
27746 ix86_init_mmx_sse_builtins (void)
27748 const struct builtin_description
* d
;
27749 enum ix86_builtin_func_type ftype
;
27752 /* Add all special builtins with variable number of operands. */
27753 for (i
= 0, d
= bdesc_special_args
;
27754 i
< ARRAY_SIZE (bdesc_special_args
);
27760 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
27761 def_builtin (d
->mask
, d
->name
, ftype
, d
->code
);
27764 /* Add all builtins with variable number of operands. */
27765 for (i
= 0, d
= bdesc_args
;
27766 i
< ARRAY_SIZE (bdesc_args
);
27772 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
27773 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
27776 /* pcmpestr[im] insns. */
27777 for (i
= 0, d
= bdesc_pcmpestr
;
27778 i
< ARRAY_SIZE (bdesc_pcmpestr
);
27781 if (d
->code
== IX86_BUILTIN_PCMPESTRM128
)
27782 ftype
= V16QI_FTYPE_V16QI_INT_V16QI_INT_INT
;
27784 ftype
= INT_FTYPE_V16QI_INT_V16QI_INT_INT
;
27785 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
27788 /* pcmpistr[im] insns. */
27789 for (i
= 0, d
= bdesc_pcmpistr
;
27790 i
< ARRAY_SIZE (bdesc_pcmpistr
);
27793 if (d
->code
== IX86_BUILTIN_PCMPISTRM128
)
27794 ftype
= V16QI_FTYPE_V16QI_V16QI_INT
;
27796 ftype
= INT_FTYPE_V16QI_V16QI_INT
;
27797 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
27800 /* comi/ucomi insns. */
27801 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
27803 if (d
->mask
== OPTION_MASK_ISA_SSE2
)
27804 ftype
= INT_FTYPE_V2DF_V2DF
;
27806 ftype
= INT_FTYPE_V4SF_V4SF
;
27807 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
27811 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_ldmxcsr",
27812 VOID_FTYPE_UNSIGNED
, IX86_BUILTIN_LDMXCSR
);
27813 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_stmxcsr",
27814 UNSIGNED_FTYPE_VOID
, IX86_BUILTIN_STMXCSR
);
27816 /* SSE or 3DNow!A */
27817 def_builtin (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
27818 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR
,
27819 IX86_BUILTIN_MASKMOVQ
);
27822 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_maskmovdqu",
27823 VOID_FTYPE_V16QI_V16QI_PCHAR
, IX86_BUILTIN_MASKMOVDQU
);
27825 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_clflush",
27826 VOID_FTYPE_PCVOID
, IX86_BUILTIN_CLFLUSH
);
27827 x86_mfence
= def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_mfence",
27828 VOID_FTYPE_VOID
, IX86_BUILTIN_MFENCE
);
27831 def_builtin (OPTION_MASK_ISA_SSE3
, "__builtin_ia32_monitor",
27832 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED
, IX86_BUILTIN_MONITOR
);
27833 def_builtin (OPTION_MASK_ISA_SSE3
, "__builtin_ia32_mwait",
27834 VOID_FTYPE_UNSIGNED_UNSIGNED
, IX86_BUILTIN_MWAIT
);
27837 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesenc128",
27838 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESENC128
);
27839 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesenclast128",
27840 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESENCLAST128
);
27841 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesdec128",
27842 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESDEC128
);
27843 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesdeclast128",
27844 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESDECLAST128
);
27845 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesimc128",
27846 V2DI_FTYPE_V2DI
, IX86_BUILTIN_AESIMC128
);
27847 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aeskeygenassist128",
27848 V2DI_FTYPE_V2DI_INT
, IX86_BUILTIN_AESKEYGENASSIST128
);
27851 def_builtin_const (OPTION_MASK_ISA_PCLMUL
, "__builtin_ia32_pclmulqdq128",
27852 V2DI_FTYPE_V2DI_V2DI_INT
, IX86_BUILTIN_PCLMULQDQ128
);
27855 def_builtin (OPTION_MASK_ISA_RDRND
, "__builtin_ia32_rdrand16_step",
27856 INT_FTYPE_PUSHORT
, IX86_BUILTIN_RDRAND16_STEP
);
27857 def_builtin (OPTION_MASK_ISA_RDRND
, "__builtin_ia32_rdrand32_step",
27858 INT_FTYPE_PUNSIGNED
, IX86_BUILTIN_RDRAND32_STEP
);
27859 def_builtin (OPTION_MASK_ISA_RDRND
| OPTION_MASK_ISA_64BIT
,
27860 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG
,
27861 IX86_BUILTIN_RDRAND64_STEP
);
27864 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv2df",
27865 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT
,
27866 IX86_BUILTIN_GATHERSIV2DF
);
27868 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4df",
27869 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT
,
27870 IX86_BUILTIN_GATHERSIV4DF
);
27872 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv2df",
27873 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT
,
27874 IX86_BUILTIN_GATHERDIV2DF
);
27876 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4df",
27877 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT
,
27878 IX86_BUILTIN_GATHERDIV4DF
);
27880 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4sf",
27881 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT
,
27882 IX86_BUILTIN_GATHERSIV4SF
);
27884 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv8sf",
27885 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT
,
27886 IX86_BUILTIN_GATHERSIV8SF
);
27888 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4sf",
27889 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT
,
27890 IX86_BUILTIN_GATHERDIV4SF
);
27892 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4sf256",
27893 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT
,
27894 IX86_BUILTIN_GATHERDIV8SF
);
27896 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv2di",
27897 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT
,
27898 IX86_BUILTIN_GATHERSIV2DI
);
27900 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4di",
27901 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT
,
27902 IX86_BUILTIN_GATHERSIV4DI
);
27904 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv2di",
27905 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT
,
27906 IX86_BUILTIN_GATHERDIV2DI
);
27908 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4di",
27909 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT
,
27910 IX86_BUILTIN_GATHERDIV4DI
);
27912 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4si",
27913 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT
,
27914 IX86_BUILTIN_GATHERSIV4SI
);
27916 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv8si",
27917 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT
,
27918 IX86_BUILTIN_GATHERSIV8SI
);
27920 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4si",
27921 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT
,
27922 IX86_BUILTIN_GATHERDIV4SI
);
27924 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4si256",
27925 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT
,
27926 IX86_BUILTIN_GATHERDIV8SI
);
27928 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltsiv4df ",
27929 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT
,
27930 IX86_BUILTIN_GATHERALTSIV4DF
);
27932 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltdiv4sf256 ",
27933 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT
,
27934 IX86_BUILTIN_GATHERALTDIV8SF
);
27936 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltsiv4di ",
27937 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT
,
27938 IX86_BUILTIN_GATHERALTSIV4DI
);
27940 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltdiv4si256 ",
27941 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT
,
27942 IX86_BUILTIN_GATHERALTDIV8SI
);
27944 /* MMX access to the vec_init patterns. */
27945 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v2si",
27946 V2SI_FTYPE_INT_INT
, IX86_BUILTIN_VEC_INIT_V2SI
);
27948 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v4hi",
27949 V4HI_FTYPE_HI_HI_HI_HI
,
27950 IX86_BUILTIN_VEC_INIT_V4HI
);
27952 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v8qi",
27953 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI
,
27954 IX86_BUILTIN_VEC_INIT_V8QI
);
27956 /* Access to the vec_extract patterns. */
27957 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v2df",
27958 DOUBLE_FTYPE_V2DF_INT
, IX86_BUILTIN_VEC_EXT_V2DF
);
27959 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v2di",
27960 DI_FTYPE_V2DI_INT
, IX86_BUILTIN_VEC_EXT_V2DI
);
27961 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_vec_ext_v4sf",
27962 FLOAT_FTYPE_V4SF_INT
, IX86_BUILTIN_VEC_EXT_V4SF
);
27963 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v4si",
27964 SI_FTYPE_V4SI_INT
, IX86_BUILTIN_VEC_EXT_V4SI
);
27965 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v8hi",
27966 HI_FTYPE_V8HI_INT
, IX86_BUILTIN_VEC_EXT_V8HI
);
27968 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
27969 "__builtin_ia32_vec_ext_v4hi",
27970 HI_FTYPE_V4HI_INT
, IX86_BUILTIN_VEC_EXT_V4HI
);
27972 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_ext_v2si",
27973 SI_FTYPE_V2SI_INT
, IX86_BUILTIN_VEC_EXT_V2SI
);
27975 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v16qi",
27976 QI_FTYPE_V16QI_INT
, IX86_BUILTIN_VEC_EXT_V16QI
);
27978 /* Access to the vec_set patterns. */
27979 def_builtin_const (OPTION_MASK_ISA_SSE4_1
| OPTION_MASK_ISA_64BIT
,
27980 "__builtin_ia32_vec_set_v2di",
27981 V2DI_FTYPE_V2DI_DI_INT
, IX86_BUILTIN_VEC_SET_V2DI
);
27983 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v4sf",
27984 V4SF_FTYPE_V4SF_FLOAT_INT
, IX86_BUILTIN_VEC_SET_V4SF
);
27986 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v4si",
27987 V4SI_FTYPE_V4SI_SI_INT
, IX86_BUILTIN_VEC_SET_V4SI
);
27989 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_set_v8hi",
27990 V8HI_FTYPE_V8HI_HI_INT
, IX86_BUILTIN_VEC_SET_V8HI
);
27992 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
27993 "__builtin_ia32_vec_set_v4hi",
27994 V4HI_FTYPE_V4HI_HI_INT
, IX86_BUILTIN_VEC_SET_V4HI
);
27996 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v16qi",
27997 V16QI_FTYPE_V16QI_QI_INT
, IX86_BUILTIN_VEC_SET_V16QI
);
27999 /* Add FMA4 multi-arg argument instructions */
28000 for (i
= 0, d
= bdesc_multi_arg
; i
< ARRAY_SIZE (bdesc_multi_arg
); i
++, d
++)
28005 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
28006 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
28010 /* Internal method for ix86_init_builtins. */
28013 ix86_init_builtins_va_builtins_abi (void)
28015 tree ms_va_ref
, sysv_va_ref
;
28016 tree fnvoid_va_end_ms
, fnvoid_va_end_sysv
;
28017 tree fnvoid_va_start_ms
, fnvoid_va_start_sysv
;
28018 tree fnvoid_va_copy_ms
, fnvoid_va_copy_sysv
;
28019 tree fnattr_ms
= NULL_TREE
, fnattr_sysv
= NULL_TREE
;
28023 fnattr_ms
= build_tree_list (get_identifier ("ms_abi"), NULL_TREE
);
28024 fnattr_sysv
= build_tree_list (get_identifier ("sysv_abi"), NULL_TREE
);
28025 ms_va_ref
= build_reference_type (ms_va_list_type_node
);
28027 build_pointer_type (TREE_TYPE (sysv_va_list_type_node
));
28030 build_function_type_list (void_type_node
, ms_va_ref
, NULL_TREE
);
28031 fnvoid_va_start_ms
=
28032 build_varargs_function_type_list (void_type_node
, ms_va_ref
, NULL_TREE
);
28033 fnvoid_va_end_sysv
=
28034 build_function_type_list (void_type_node
, sysv_va_ref
, NULL_TREE
);
28035 fnvoid_va_start_sysv
=
28036 build_varargs_function_type_list (void_type_node
, sysv_va_ref
,
28038 fnvoid_va_copy_ms
=
28039 build_function_type_list (void_type_node
, ms_va_ref
, ms_va_list_type_node
,
28041 fnvoid_va_copy_sysv
=
28042 build_function_type_list (void_type_node
, sysv_va_ref
,
28043 sysv_va_ref
, NULL_TREE
);
28045 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms
,
28046 BUILT_IN_VA_START
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
28047 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms
,
28048 BUILT_IN_VA_END
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
28049 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms
,
28050 BUILT_IN_VA_COPY
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
28051 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv
,
28052 BUILT_IN_VA_START
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
28053 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv
,
28054 BUILT_IN_VA_END
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
28055 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv
,
28056 BUILT_IN_VA_COPY
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
28060 ix86_init_builtin_types (void)
28062 tree float128_type_node
, float80_type_node
;
28064 /* The __float80 type. */
28065 float80_type_node
= long_double_type_node
;
28066 if (TYPE_MODE (float80_type_node
) != XFmode
)
28068 /* The __float80 type. */
28069 float80_type_node
= make_node (REAL_TYPE
);
28071 TYPE_PRECISION (float80_type_node
) = 80;
28072 layout_type (float80_type_node
);
28074 lang_hooks
.types
.register_builtin_type (float80_type_node
, "__float80");
28076 /* The __float128 type. */
28077 float128_type_node
= make_node (REAL_TYPE
);
28078 TYPE_PRECISION (float128_type_node
) = 128;
28079 layout_type (float128_type_node
);
28080 lang_hooks
.types
.register_builtin_type (float128_type_node
, "__float128");
28082 /* This macro is built by i386-builtin-types.awk. */
28083 DEFINE_BUILTIN_PRIMITIVE_TYPES
;
28087 ix86_init_builtins (void)
28091 ix86_init_builtin_types ();
28093 /* TFmode support builtins. */
28094 def_builtin_const (0, "__builtin_infq",
28095 FLOAT128_FTYPE_VOID
, IX86_BUILTIN_INFQ
);
28096 def_builtin_const (0, "__builtin_huge_valq",
28097 FLOAT128_FTYPE_VOID
, IX86_BUILTIN_HUGE_VALQ
);
28099 /* We will expand them to normal call if SSE2 isn't available since
28100 they are used by libgcc. */
28101 t
= ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128
);
28102 t
= add_builtin_function ("__builtin_fabsq", t
, IX86_BUILTIN_FABSQ
,
28103 BUILT_IN_MD
, "__fabstf2", NULL_TREE
);
28104 TREE_READONLY (t
) = 1;
28105 ix86_builtins
[(int) IX86_BUILTIN_FABSQ
] = t
;
28107 t
= ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128
);
28108 t
= add_builtin_function ("__builtin_copysignq", t
, IX86_BUILTIN_COPYSIGNQ
,
28109 BUILT_IN_MD
, "__copysigntf3", NULL_TREE
);
28110 TREE_READONLY (t
) = 1;
28111 ix86_builtins
[(int) IX86_BUILTIN_COPYSIGNQ
] = t
;
28113 ix86_init_tm_builtins ();
28114 ix86_init_mmx_sse_builtins ();
28117 ix86_init_builtins_va_builtins_abi ();
28119 #ifdef SUBTARGET_INIT_BUILTINS
28120 SUBTARGET_INIT_BUILTINS
;
28124 /* Return the ix86 builtin for CODE. */
28127 ix86_builtin_decl (unsigned code
, bool initialize_p ATTRIBUTE_UNUSED
)
28129 if (code
>= IX86_BUILTIN_MAX
)
28130 return error_mark_node
;
28132 return ix86_builtins
[code
];
28135 /* Errors in the source file can cause expand_expr to return const0_rtx
28136 where we expect a vector. To avoid crashing, use one of the vector
28137 clear instructions. */
28139 safe_vector_operand (rtx x
, enum machine_mode mode
)
28141 if (x
== const0_rtx
)
28142 x
= CONST0_RTX (mode
);
28146 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
28149 ix86_expand_binop_builtin (enum insn_code icode
, tree exp
, rtx target
)
28152 tree arg0
= CALL_EXPR_ARG (exp
, 0);
28153 tree arg1
= CALL_EXPR_ARG (exp
, 1);
28154 rtx op0
= expand_normal (arg0
);
28155 rtx op1
= expand_normal (arg1
);
28156 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
28157 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
28158 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
28160 if (VECTOR_MODE_P (mode0
))
28161 op0
= safe_vector_operand (op0
, mode0
);
28162 if (VECTOR_MODE_P (mode1
))
28163 op1
= safe_vector_operand (op1
, mode1
);
28165 if (optimize
|| !target
28166 || GET_MODE (target
) != tmode
28167 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
28168 target
= gen_reg_rtx (tmode
);
28170 if (GET_MODE (op1
) == SImode
&& mode1
== TImode
)
28172 rtx x
= gen_reg_rtx (V4SImode
);
28173 emit_insn (gen_sse2_loadd (x
, op1
));
28174 op1
= gen_lowpart (TImode
, x
);
28177 if (!insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
28178 op0
= copy_to_mode_reg (mode0
, op0
);
28179 if (!insn_data
[icode
].operand
[2].predicate (op1
, mode1
))
28180 op1
= copy_to_mode_reg (mode1
, op1
);
28182 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
28191 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
28194 ix86_expand_multi_arg_builtin (enum insn_code icode
, tree exp
, rtx target
,
28195 enum ix86_builtin_func_type m_type
,
28196 enum rtx_code sub_code
)
28201 bool comparison_p
= false;
28203 bool last_arg_constant
= false;
28204 int num_memory
= 0;
28207 enum machine_mode mode
;
28210 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
28214 case MULTI_ARG_4_DF2_DI_I
:
28215 case MULTI_ARG_4_DF2_DI_I1
:
28216 case MULTI_ARG_4_SF2_SI_I
:
28217 case MULTI_ARG_4_SF2_SI_I1
:
28219 last_arg_constant
= true;
28222 case MULTI_ARG_3_SF
:
28223 case MULTI_ARG_3_DF
:
28224 case MULTI_ARG_3_SF2
:
28225 case MULTI_ARG_3_DF2
:
28226 case MULTI_ARG_3_DI
:
28227 case MULTI_ARG_3_SI
:
28228 case MULTI_ARG_3_SI_DI
:
28229 case MULTI_ARG_3_HI
:
28230 case MULTI_ARG_3_HI_SI
:
28231 case MULTI_ARG_3_QI
:
28232 case MULTI_ARG_3_DI2
:
28233 case MULTI_ARG_3_SI2
:
28234 case MULTI_ARG_3_HI2
:
28235 case MULTI_ARG_3_QI2
:
28239 case MULTI_ARG_2_SF
:
28240 case MULTI_ARG_2_DF
:
28241 case MULTI_ARG_2_DI
:
28242 case MULTI_ARG_2_SI
:
28243 case MULTI_ARG_2_HI
:
28244 case MULTI_ARG_2_QI
:
28248 case MULTI_ARG_2_DI_IMM
:
28249 case MULTI_ARG_2_SI_IMM
:
28250 case MULTI_ARG_2_HI_IMM
:
28251 case MULTI_ARG_2_QI_IMM
:
28253 last_arg_constant
= true;
28256 case MULTI_ARG_1_SF
:
28257 case MULTI_ARG_1_DF
:
28258 case MULTI_ARG_1_SF2
:
28259 case MULTI_ARG_1_DF2
:
28260 case MULTI_ARG_1_DI
:
28261 case MULTI_ARG_1_SI
:
28262 case MULTI_ARG_1_HI
:
28263 case MULTI_ARG_1_QI
:
28264 case MULTI_ARG_1_SI_DI
:
28265 case MULTI_ARG_1_HI_DI
:
28266 case MULTI_ARG_1_HI_SI
:
28267 case MULTI_ARG_1_QI_DI
:
28268 case MULTI_ARG_1_QI_SI
:
28269 case MULTI_ARG_1_QI_HI
:
28273 case MULTI_ARG_2_DI_CMP
:
28274 case MULTI_ARG_2_SI_CMP
:
28275 case MULTI_ARG_2_HI_CMP
:
28276 case MULTI_ARG_2_QI_CMP
:
28278 comparison_p
= true;
28281 case MULTI_ARG_2_SF_TF
:
28282 case MULTI_ARG_2_DF_TF
:
28283 case MULTI_ARG_2_DI_TF
:
28284 case MULTI_ARG_2_SI_TF
:
28285 case MULTI_ARG_2_HI_TF
:
28286 case MULTI_ARG_2_QI_TF
:
28292 gcc_unreachable ();
28295 if (optimize
|| !target
28296 || GET_MODE (target
) != tmode
28297 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
28298 target
= gen_reg_rtx (tmode
);
28300 gcc_assert (nargs
<= 4);
28302 for (i
= 0; i
< nargs
; i
++)
28304 tree arg
= CALL_EXPR_ARG (exp
, i
);
28305 rtx op
= expand_normal (arg
);
28306 int adjust
= (comparison_p
) ? 1 : 0;
28307 enum machine_mode mode
= insn_data
[icode
].operand
[i
+adjust
+1].mode
;
28309 if (last_arg_constant
&& i
== nargs
- 1)
28311 if (!insn_data
[icode
].operand
[i
+ 1].predicate (op
, mode
))
28313 enum insn_code new_icode
= icode
;
28316 case CODE_FOR_xop_vpermil2v2df3
:
28317 case CODE_FOR_xop_vpermil2v4sf3
:
28318 case CODE_FOR_xop_vpermil2v4df3
:
28319 case CODE_FOR_xop_vpermil2v8sf3
:
28320 error ("the last argument must be a 2-bit immediate");
28321 return gen_reg_rtx (tmode
);
28322 case CODE_FOR_xop_rotlv2di3
:
28323 new_icode
= CODE_FOR_rotlv2di3
;
28325 case CODE_FOR_xop_rotlv4si3
:
28326 new_icode
= CODE_FOR_rotlv4si3
;
28328 case CODE_FOR_xop_rotlv8hi3
:
28329 new_icode
= CODE_FOR_rotlv8hi3
;
28331 case CODE_FOR_xop_rotlv16qi3
:
28332 new_icode
= CODE_FOR_rotlv16qi3
;
28334 if (CONST_INT_P (op
))
28336 int mask
= GET_MODE_BITSIZE (GET_MODE_INNER (tmode
)) - 1;
28337 op
= GEN_INT (INTVAL (op
) & mask
);
28338 gcc_checking_assert
28339 (insn_data
[icode
].operand
[i
+ 1].predicate (op
, mode
));
28343 gcc_checking_assert
28345 && insn_data
[new_icode
].operand
[0].mode
== tmode
28346 && insn_data
[new_icode
].operand
[1].mode
== tmode
28347 && insn_data
[new_icode
].operand
[2].mode
== mode
28348 && insn_data
[new_icode
].operand
[0].predicate
28349 == insn_data
[icode
].operand
[0].predicate
28350 && insn_data
[new_icode
].operand
[1].predicate
28351 == insn_data
[icode
].operand
[1].predicate
);
28357 gcc_unreachable ();
28364 if (VECTOR_MODE_P (mode
))
28365 op
= safe_vector_operand (op
, mode
);
28367 /* If we aren't optimizing, only allow one memory operand to be
28369 if (memory_operand (op
, mode
))
28372 gcc_assert (GET_MODE (op
) == mode
|| GET_MODE (op
) == VOIDmode
);
28375 || !insn_data
[icode
].operand
[i
+adjust
+1].predicate (op
, mode
)
28377 op
= force_reg (mode
, op
);
28381 args
[i
].mode
= mode
;
28387 pat
= GEN_FCN (icode
) (target
, args
[0].op
);
28392 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
,
28393 GEN_INT ((int)sub_code
));
28394 else if (! comparison_p
)
28395 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
);
28398 rtx cmp_op
= gen_rtx_fmt_ee (sub_code
, GET_MODE (target
),
28402 pat
= GEN_FCN (icode
) (target
, cmp_op
, args
[0].op
, args
[1].op
);
28407 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
);
28411 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
, args
[3].op
);
28415 gcc_unreachable ();
28425 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
28426 insns with vec_merge. */
28429 ix86_expand_unop_vec_merge_builtin (enum insn_code icode
, tree exp
,
28433 tree arg0
= CALL_EXPR_ARG (exp
, 0);
28434 rtx op1
, op0
= expand_normal (arg0
);
28435 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
28436 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
28438 if (optimize
|| !target
28439 || GET_MODE (target
) != tmode
28440 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
28441 target
= gen_reg_rtx (tmode
);
28443 if (VECTOR_MODE_P (mode0
))
28444 op0
= safe_vector_operand (op0
, mode0
);
28446 if ((optimize
&& !register_operand (op0
, mode0
))
28447 || !insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
28448 op0
= copy_to_mode_reg (mode0
, op0
);
28451 if (!insn_data
[icode
].operand
[2].predicate (op1
, mode0
))
28452 op1
= copy_to_mode_reg (mode0
, op1
);
28454 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
28461 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
28464 ix86_expand_sse_compare (const struct builtin_description
*d
,
28465 tree exp
, rtx target
, bool swap
)
28468 tree arg0
= CALL_EXPR_ARG (exp
, 0);
28469 tree arg1
= CALL_EXPR_ARG (exp
, 1);
28470 rtx op0
= expand_normal (arg0
);
28471 rtx op1
= expand_normal (arg1
);
28473 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
28474 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
28475 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
28476 enum rtx_code comparison
= d
->comparison
;
28478 if (VECTOR_MODE_P (mode0
))
28479 op0
= safe_vector_operand (op0
, mode0
);
28480 if (VECTOR_MODE_P (mode1
))
28481 op1
= safe_vector_operand (op1
, mode1
);
28483 /* Swap operands if we have a comparison that isn't available in
28487 rtx tmp
= gen_reg_rtx (mode1
);
28488 emit_move_insn (tmp
, op1
);
28493 if (optimize
|| !target
28494 || GET_MODE (target
) != tmode
28495 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
28496 target
= gen_reg_rtx (tmode
);
28498 if ((optimize
&& !register_operand (op0
, mode0
))
28499 || !insn_data
[d
->icode
].operand
[1].predicate (op0
, mode0
))
28500 op0
= copy_to_mode_reg (mode0
, op0
);
28501 if ((optimize
&& !register_operand (op1
, mode1
))
28502 || !insn_data
[d
->icode
].operand
[2].predicate (op1
, mode1
))
28503 op1
= copy_to_mode_reg (mode1
, op1
);
28505 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
28506 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
28513 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
28516 ix86_expand_sse_comi (const struct builtin_description
*d
, tree exp
,
28520 tree arg0
= CALL_EXPR_ARG (exp
, 0);
28521 tree arg1
= CALL_EXPR_ARG (exp
, 1);
28522 rtx op0
= expand_normal (arg0
);
28523 rtx op1
= expand_normal (arg1
);
28524 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
28525 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
28526 enum rtx_code comparison
= d
->comparison
;
28528 if (VECTOR_MODE_P (mode0
))
28529 op0
= safe_vector_operand (op0
, mode0
);
28530 if (VECTOR_MODE_P (mode1
))
28531 op1
= safe_vector_operand (op1
, mode1
);
28533 /* Swap operands if we have a comparison that isn't available in
28535 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
28542 target
= gen_reg_rtx (SImode
);
28543 emit_move_insn (target
, const0_rtx
);
28544 target
= gen_rtx_SUBREG (QImode
, target
, 0);
28546 if ((optimize
&& !register_operand (op0
, mode0
))
28547 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
28548 op0
= copy_to_mode_reg (mode0
, op0
);
28549 if ((optimize
&& !register_operand (op1
, mode1
))
28550 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
28551 op1
= copy_to_mode_reg (mode1
, op1
);
28553 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
28557 emit_insn (gen_rtx_SET (VOIDmode
,
28558 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
28559 gen_rtx_fmt_ee (comparison
, QImode
,
28563 return SUBREG_REG (target
);
28566 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
28569 ix86_expand_sse_round (const struct builtin_description
*d
, tree exp
,
28573 tree arg0
= CALL_EXPR_ARG (exp
, 0);
28574 rtx op1
, op0
= expand_normal (arg0
);
28575 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
28576 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
28578 if (optimize
|| target
== 0
28579 || GET_MODE (target
) != tmode
28580 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
28581 target
= gen_reg_rtx (tmode
);
28583 if (VECTOR_MODE_P (mode0
))
28584 op0
= safe_vector_operand (op0
, mode0
);
28586 if ((optimize
&& !register_operand (op0
, mode0
))
28587 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
28588 op0
= copy_to_mode_reg (mode0
, op0
);
28590 op1
= GEN_INT (d
->comparison
);
28592 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
);
28600 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description
*d
,
28601 tree exp
, rtx target
)
28604 tree arg0
= CALL_EXPR_ARG (exp
, 0);
28605 tree arg1
= CALL_EXPR_ARG (exp
, 1);
28606 rtx op0
= expand_normal (arg0
);
28607 rtx op1
= expand_normal (arg1
);
28609 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
28610 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
28611 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
28613 if (optimize
|| target
== 0
28614 || GET_MODE (target
) != tmode
28615 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
28616 target
= gen_reg_rtx (tmode
);
28618 op0
= safe_vector_operand (op0
, mode0
);
28619 op1
= safe_vector_operand (op1
, mode1
);
28621 if ((optimize
&& !register_operand (op0
, mode0
))
28622 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
28623 op0
= copy_to_mode_reg (mode0
, op0
);
28624 if ((optimize
&& !register_operand (op1
, mode1
))
28625 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
28626 op1
= copy_to_mode_reg (mode1
, op1
);
28628 op2
= GEN_INT (d
->comparison
);
28630 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
28637 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
28640 ix86_expand_sse_ptest (const struct builtin_description
*d
, tree exp
,
28644 tree arg0
= CALL_EXPR_ARG (exp
, 0);
28645 tree arg1
= CALL_EXPR_ARG (exp
, 1);
28646 rtx op0
= expand_normal (arg0
);
28647 rtx op1
= expand_normal (arg1
);
28648 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
28649 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
28650 enum rtx_code comparison
= d
->comparison
;
28652 if (VECTOR_MODE_P (mode0
))
28653 op0
= safe_vector_operand (op0
, mode0
);
28654 if (VECTOR_MODE_P (mode1
))
28655 op1
= safe_vector_operand (op1
, mode1
);
28657 target
= gen_reg_rtx (SImode
);
28658 emit_move_insn (target
, const0_rtx
);
28659 target
= gen_rtx_SUBREG (QImode
, target
, 0);
28661 if ((optimize
&& !register_operand (op0
, mode0
))
28662 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
28663 op0
= copy_to_mode_reg (mode0
, op0
);
28664 if ((optimize
&& !register_operand (op1
, mode1
))
28665 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
28666 op1
= copy_to_mode_reg (mode1
, op1
);
28668 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
28672 emit_insn (gen_rtx_SET (VOIDmode
,
28673 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
28674 gen_rtx_fmt_ee (comparison
, QImode
,
28678 return SUBREG_REG (target
);
28681 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
28684 ix86_expand_sse_pcmpestr (const struct builtin_description
*d
,
28685 tree exp
, rtx target
)
28688 tree arg0
= CALL_EXPR_ARG (exp
, 0);
28689 tree arg1
= CALL_EXPR_ARG (exp
, 1);
28690 tree arg2
= CALL_EXPR_ARG (exp
, 2);
28691 tree arg3
= CALL_EXPR_ARG (exp
, 3);
28692 tree arg4
= CALL_EXPR_ARG (exp
, 4);
28693 rtx scratch0
, scratch1
;
28694 rtx op0
= expand_normal (arg0
);
28695 rtx op1
= expand_normal (arg1
);
28696 rtx op2
= expand_normal (arg2
);
28697 rtx op3
= expand_normal (arg3
);
28698 rtx op4
= expand_normal (arg4
);
28699 enum machine_mode tmode0
, tmode1
, modev2
, modei3
, modev4
, modei5
, modeimm
;
28701 tmode0
= insn_data
[d
->icode
].operand
[0].mode
;
28702 tmode1
= insn_data
[d
->icode
].operand
[1].mode
;
28703 modev2
= insn_data
[d
->icode
].operand
[2].mode
;
28704 modei3
= insn_data
[d
->icode
].operand
[3].mode
;
28705 modev4
= insn_data
[d
->icode
].operand
[4].mode
;
28706 modei5
= insn_data
[d
->icode
].operand
[5].mode
;
28707 modeimm
= insn_data
[d
->icode
].operand
[6].mode
;
28709 if (VECTOR_MODE_P (modev2
))
28710 op0
= safe_vector_operand (op0
, modev2
);
28711 if (VECTOR_MODE_P (modev4
))
28712 op2
= safe_vector_operand (op2
, modev4
);
28714 if (!insn_data
[d
->icode
].operand
[2].predicate (op0
, modev2
))
28715 op0
= copy_to_mode_reg (modev2
, op0
);
28716 if (!insn_data
[d
->icode
].operand
[3].predicate (op1
, modei3
))
28717 op1
= copy_to_mode_reg (modei3
, op1
);
28718 if ((optimize
&& !register_operand (op2
, modev4
))
28719 || !insn_data
[d
->icode
].operand
[4].predicate (op2
, modev4
))
28720 op2
= copy_to_mode_reg (modev4
, op2
);
28721 if (!insn_data
[d
->icode
].operand
[5].predicate (op3
, modei5
))
28722 op3
= copy_to_mode_reg (modei5
, op3
);
28724 if (!insn_data
[d
->icode
].operand
[6].predicate (op4
, modeimm
))
28726 error ("the fifth argument must be an 8-bit immediate");
28730 if (d
->code
== IX86_BUILTIN_PCMPESTRI128
)
28732 if (optimize
|| !target
28733 || GET_MODE (target
) != tmode0
28734 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode0
))
28735 target
= gen_reg_rtx (tmode0
);
28737 scratch1
= gen_reg_rtx (tmode1
);
28739 pat
= GEN_FCN (d
->icode
) (target
, scratch1
, op0
, op1
, op2
, op3
, op4
);
28741 else if (d
->code
== IX86_BUILTIN_PCMPESTRM128
)
28743 if (optimize
|| !target
28744 || GET_MODE (target
) != tmode1
28745 || !insn_data
[d
->icode
].operand
[1].predicate (target
, tmode1
))
28746 target
= gen_reg_rtx (tmode1
);
28748 scratch0
= gen_reg_rtx (tmode0
);
28750 pat
= GEN_FCN (d
->icode
) (scratch0
, target
, op0
, op1
, op2
, op3
, op4
);
28754 gcc_assert (d
->flag
);
28756 scratch0
= gen_reg_rtx (tmode0
);
28757 scratch1
= gen_reg_rtx (tmode1
);
28759 pat
= GEN_FCN (d
->icode
) (scratch0
, scratch1
, op0
, op1
, op2
, op3
, op4
);
28769 target
= gen_reg_rtx (SImode
);
28770 emit_move_insn (target
, const0_rtx
);
28771 target
= gen_rtx_SUBREG (QImode
, target
, 0);
28774 (gen_rtx_SET (VOIDmode
, gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
28775 gen_rtx_fmt_ee (EQ
, QImode
,
28776 gen_rtx_REG ((enum machine_mode
) d
->flag
,
28779 return SUBREG_REG (target
);
28786 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
28789 ix86_expand_sse_pcmpistr (const struct builtin_description
*d
,
28790 tree exp
, rtx target
)
28793 tree arg0
= CALL_EXPR_ARG (exp
, 0);
28794 tree arg1
= CALL_EXPR_ARG (exp
, 1);
28795 tree arg2
= CALL_EXPR_ARG (exp
, 2);
28796 rtx scratch0
, scratch1
;
28797 rtx op0
= expand_normal (arg0
);
28798 rtx op1
= expand_normal (arg1
);
28799 rtx op2
= expand_normal (arg2
);
28800 enum machine_mode tmode0
, tmode1
, modev2
, modev3
, modeimm
;
28802 tmode0
= insn_data
[d
->icode
].operand
[0].mode
;
28803 tmode1
= insn_data
[d
->icode
].operand
[1].mode
;
28804 modev2
= insn_data
[d
->icode
].operand
[2].mode
;
28805 modev3
= insn_data
[d
->icode
].operand
[3].mode
;
28806 modeimm
= insn_data
[d
->icode
].operand
[4].mode
;
28808 if (VECTOR_MODE_P (modev2
))
28809 op0
= safe_vector_operand (op0
, modev2
);
28810 if (VECTOR_MODE_P (modev3
))
28811 op1
= safe_vector_operand (op1
, modev3
);
28813 if (!insn_data
[d
->icode
].operand
[2].predicate (op0
, modev2
))
28814 op0
= copy_to_mode_reg (modev2
, op0
);
28815 if ((optimize
&& !register_operand (op1
, modev3
))
28816 || !insn_data
[d
->icode
].operand
[3].predicate (op1
, modev3
))
28817 op1
= copy_to_mode_reg (modev3
, op1
);
28819 if (!insn_data
[d
->icode
].operand
[4].predicate (op2
, modeimm
))
28821 error ("the third argument must be an 8-bit immediate");
28825 if (d
->code
== IX86_BUILTIN_PCMPISTRI128
)
28827 if (optimize
|| !target
28828 || GET_MODE (target
) != tmode0
28829 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode0
))
28830 target
= gen_reg_rtx (tmode0
);
28832 scratch1
= gen_reg_rtx (tmode1
);
28834 pat
= GEN_FCN (d
->icode
) (target
, scratch1
, op0
, op1
, op2
);
28836 else if (d
->code
== IX86_BUILTIN_PCMPISTRM128
)
28838 if (optimize
|| !target
28839 || GET_MODE (target
) != tmode1
28840 || !insn_data
[d
->icode
].operand
[1].predicate (target
, tmode1
))
28841 target
= gen_reg_rtx (tmode1
);
28843 scratch0
= gen_reg_rtx (tmode0
);
28845 pat
= GEN_FCN (d
->icode
) (scratch0
, target
, op0
, op1
, op2
);
28849 gcc_assert (d
->flag
);
28851 scratch0
= gen_reg_rtx (tmode0
);
28852 scratch1
= gen_reg_rtx (tmode1
);
28854 pat
= GEN_FCN (d
->icode
) (scratch0
, scratch1
, op0
, op1
, op2
);
28864 target
= gen_reg_rtx (SImode
);
28865 emit_move_insn (target
, const0_rtx
);
28866 target
= gen_rtx_SUBREG (QImode
, target
, 0);
28869 (gen_rtx_SET (VOIDmode
, gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
28870 gen_rtx_fmt_ee (EQ
, QImode
,
28871 gen_rtx_REG ((enum machine_mode
) d
->flag
,
28874 return SUBREG_REG (target
);
28880 /* Subroutine of ix86_expand_builtin to take care of insns with
28881 variable number of operands. */
28884 ix86_expand_args_builtin (const struct builtin_description
*d
,
28885 tree exp
, rtx target
)
28887 rtx pat
, real_target
;
28888 unsigned int i
, nargs
;
28889 unsigned int nargs_constant
= 0;
28890 int num_memory
= 0;
28894 enum machine_mode mode
;
28896 bool last_arg_count
= false;
28897 enum insn_code icode
= d
->icode
;
28898 const struct insn_data_d
*insn_p
= &insn_data
[icode
];
28899 enum machine_mode tmode
= insn_p
->operand
[0].mode
;
28900 enum machine_mode rmode
= VOIDmode
;
28902 enum rtx_code comparison
= d
->comparison
;
28904 switch ((enum ix86_builtin_func_type
) d
->flag
)
28906 case V2DF_FTYPE_V2DF_ROUND
:
28907 case V4DF_FTYPE_V4DF_ROUND
:
28908 case V4SF_FTYPE_V4SF_ROUND
:
28909 case V8SF_FTYPE_V8SF_ROUND
:
28910 case V4SI_FTYPE_V4SF_ROUND
:
28911 case V8SI_FTYPE_V8SF_ROUND
:
28912 return ix86_expand_sse_round (d
, exp
, target
);
28913 case V4SI_FTYPE_V2DF_V2DF_ROUND
:
28914 case V8SI_FTYPE_V4DF_V4DF_ROUND
:
28915 return ix86_expand_sse_round_vec_pack_sfix (d
, exp
, target
);
28916 case INT_FTYPE_V8SF_V8SF_PTEST
:
28917 case INT_FTYPE_V4DI_V4DI_PTEST
:
28918 case INT_FTYPE_V4DF_V4DF_PTEST
:
28919 case INT_FTYPE_V4SF_V4SF_PTEST
:
28920 case INT_FTYPE_V2DI_V2DI_PTEST
:
28921 case INT_FTYPE_V2DF_V2DF_PTEST
:
28922 return ix86_expand_sse_ptest (d
, exp
, target
);
28923 case FLOAT128_FTYPE_FLOAT128
:
28924 case FLOAT_FTYPE_FLOAT
:
28925 case INT_FTYPE_INT
:
28926 case UINT64_FTYPE_INT
:
28927 case UINT16_FTYPE_UINT16
:
28928 case INT64_FTYPE_INT64
:
28929 case INT64_FTYPE_V4SF
:
28930 case INT64_FTYPE_V2DF
:
28931 case INT_FTYPE_V16QI
:
28932 case INT_FTYPE_V8QI
:
28933 case INT_FTYPE_V8SF
:
28934 case INT_FTYPE_V4DF
:
28935 case INT_FTYPE_V4SF
:
28936 case INT_FTYPE_V2DF
:
28937 case INT_FTYPE_V32QI
:
28938 case V16QI_FTYPE_V16QI
:
28939 case V8SI_FTYPE_V8SF
:
28940 case V8SI_FTYPE_V4SI
:
28941 case V8HI_FTYPE_V8HI
:
28942 case V8HI_FTYPE_V16QI
:
28943 case V8QI_FTYPE_V8QI
:
28944 case V8SF_FTYPE_V8SF
:
28945 case V8SF_FTYPE_V8SI
:
28946 case V8SF_FTYPE_V4SF
:
28947 case V8SF_FTYPE_V8HI
:
28948 case V4SI_FTYPE_V4SI
:
28949 case V4SI_FTYPE_V16QI
:
28950 case V4SI_FTYPE_V4SF
:
28951 case V4SI_FTYPE_V8SI
:
28952 case V4SI_FTYPE_V8HI
:
28953 case V4SI_FTYPE_V4DF
:
28954 case V4SI_FTYPE_V2DF
:
28955 case V4HI_FTYPE_V4HI
:
28956 case V4DF_FTYPE_V4DF
:
28957 case V4DF_FTYPE_V4SI
:
28958 case V4DF_FTYPE_V4SF
:
28959 case V4DF_FTYPE_V2DF
:
28960 case V4SF_FTYPE_V4SF
:
28961 case V4SF_FTYPE_V4SI
:
28962 case V4SF_FTYPE_V8SF
:
28963 case V4SF_FTYPE_V4DF
:
28964 case V4SF_FTYPE_V8HI
:
28965 case V4SF_FTYPE_V2DF
:
28966 case V2DI_FTYPE_V2DI
:
28967 case V2DI_FTYPE_V16QI
:
28968 case V2DI_FTYPE_V8HI
:
28969 case V2DI_FTYPE_V4SI
:
28970 case V2DF_FTYPE_V2DF
:
28971 case V2DF_FTYPE_V4SI
:
28972 case V2DF_FTYPE_V4DF
:
28973 case V2DF_FTYPE_V4SF
:
28974 case V2DF_FTYPE_V2SI
:
28975 case V2SI_FTYPE_V2SI
:
28976 case V2SI_FTYPE_V4SF
:
28977 case V2SI_FTYPE_V2SF
:
28978 case V2SI_FTYPE_V2DF
:
28979 case V2SF_FTYPE_V2SF
:
28980 case V2SF_FTYPE_V2SI
:
28981 case V32QI_FTYPE_V32QI
:
28982 case V32QI_FTYPE_V16QI
:
28983 case V16HI_FTYPE_V16HI
:
28984 case V16HI_FTYPE_V8HI
:
28985 case V8SI_FTYPE_V8SI
:
28986 case V16HI_FTYPE_V16QI
:
28987 case V8SI_FTYPE_V16QI
:
28988 case V4DI_FTYPE_V16QI
:
28989 case V8SI_FTYPE_V8HI
:
28990 case V4DI_FTYPE_V8HI
:
28991 case V4DI_FTYPE_V4SI
:
28992 case V4DI_FTYPE_V2DI
:
28995 case V4SF_FTYPE_V4SF_VEC_MERGE
:
28996 case V2DF_FTYPE_V2DF_VEC_MERGE
:
28997 return ix86_expand_unop_vec_merge_builtin (icode
, exp
, target
);
28998 case FLOAT128_FTYPE_FLOAT128_FLOAT128
:
28999 case V16QI_FTYPE_V16QI_V16QI
:
29000 case V16QI_FTYPE_V8HI_V8HI
:
29001 case V8QI_FTYPE_V8QI_V8QI
:
29002 case V8QI_FTYPE_V4HI_V4HI
:
29003 case V8HI_FTYPE_V8HI_V8HI
:
29004 case V8HI_FTYPE_V16QI_V16QI
:
29005 case V8HI_FTYPE_V4SI_V4SI
:
29006 case V8SF_FTYPE_V8SF_V8SF
:
29007 case V8SF_FTYPE_V8SF_V8SI
:
29008 case V4SI_FTYPE_V4SI_V4SI
:
29009 case V4SI_FTYPE_V8HI_V8HI
:
29010 case V4SI_FTYPE_V4SF_V4SF
:
29011 case V4SI_FTYPE_V2DF_V2DF
:
29012 case V4HI_FTYPE_V4HI_V4HI
:
29013 case V4HI_FTYPE_V8QI_V8QI
:
29014 case V4HI_FTYPE_V2SI_V2SI
:
29015 case V4DF_FTYPE_V4DF_V4DF
:
29016 case V4DF_FTYPE_V4DF_V4DI
:
29017 case V4SF_FTYPE_V4SF_V4SF
:
29018 case V4SF_FTYPE_V4SF_V4SI
:
29019 case V4SF_FTYPE_V4SF_V2SI
:
29020 case V4SF_FTYPE_V4SF_V2DF
:
29021 case V4SF_FTYPE_V4SF_DI
:
29022 case V4SF_FTYPE_V4SF_SI
:
29023 case V2DI_FTYPE_V2DI_V2DI
:
29024 case V2DI_FTYPE_V16QI_V16QI
:
29025 case V2DI_FTYPE_V4SI_V4SI
:
29026 case V2DI_FTYPE_V2DI_V16QI
:
29027 case V2DI_FTYPE_V2DF_V2DF
:
29028 case V2SI_FTYPE_V2SI_V2SI
:
29029 case V2SI_FTYPE_V4HI_V4HI
:
29030 case V2SI_FTYPE_V2SF_V2SF
:
29031 case V2DF_FTYPE_V2DF_V2DF
:
29032 case V2DF_FTYPE_V2DF_V4SF
:
29033 case V2DF_FTYPE_V2DF_V2DI
:
29034 case V2DF_FTYPE_V2DF_DI
:
29035 case V2DF_FTYPE_V2DF_SI
:
29036 case V2SF_FTYPE_V2SF_V2SF
:
29037 case V1DI_FTYPE_V1DI_V1DI
:
29038 case V1DI_FTYPE_V8QI_V8QI
:
29039 case V1DI_FTYPE_V2SI_V2SI
:
29040 case V32QI_FTYPE_V16HI_V16HI
:
29041 case V16HI_FTYPE_V8SI_V8SI
:
29042 case V32QI_FTYPE_V32QI_V32QI
:
29043 case V16HI_FTYPE_V32QI_V32QI
:
29044 case V16HI_FTYPE_V16HI_V16HI
:
29045 case V8SI_FTYPE_V4DF_V4DF
:
29046 case V8SI_FTYPE_V8SI_V8SI
:
29047 case V8SI_FTYPE_V16HI_V16HI
:
29048 case V4DI_FTYPE_V4DI_V4DI
:
29049 case V4DI_FTYPE_V8SI_V8SI
:
29050 if (comparison
== UNKNOWN
)
29051 return ix86_expand_binop_builtin (icode
, exp
, target
);
29054 case V4SF_FTYPE_V4SF_V4SF_SWAP
:
29055 case V2DF_FTYPE_V2DF_V2DF_SWAP
:
29056 gcc_assert (comparison
!= UNKNOWN
);
29060 case V16HI_FTYPE_V16HI_V8HI_COUNT
:
29061 case V16HI_FTYPE_V16HI_SI_COUNT
:
29062 case V8SI_FTYPE_V8SI_V4SI_COUNT
:
29063 case V8SI_FTYPE_V8SI_SI_COUNT
:
29064 case V4DI_FTYPE_V4DI_V2DI_COUNT
:
29065 case V4DI_FTYPE_V4DI_INT_COUNT
:
29066 case V8HI_FTYPE_V8HI_V8HI_COUNT
:
29067 case V8HI_FTYPE_V8HI_SI_COUNT
:
29068 case V4SI_FTYPE_V4SI_V4SI_COUNT
:
29069 case V4SI_FTYPE_V4SI_SI_COUNT
:
29070 case V4HI_FTYPE_V4HI_V4HI_COUNT
:
29071 case V4HI_FTYPE_V4HI_SI_COUNT
:
29072 case V2DI_FTYPE_V2DI_V2DI_COUNT
:
29073 case V2DI_FTYPE_V2DI_SI_COUNT
:
29074 case V2SI_FTYPE_V2SI_V2SI_COUNT
:
29075 case V2SI_FTYPE_V2SI_SI_COUNT
:
29076 case V1DI_FTYPE_V1DI_V1DI_COUNT
:
29077 case V1DI_FTYPE_V1DI_SI_COUNT
:
29079 last_arg_count
= true;
29081 case UINT64_FTYPE_UINT64_UINT64
:
29082 case UINT_FTYPE_UINT_UINT
:
29083 case UINT_FTYPE_UINT_USHORT
:
29084 case UINT_FTYPE_UINT_UCHAR
:
29085 case UINT16_FTYPE_UINT16_INT
:
29086 case UINT8_FTYPE_UINT8_INT
:
29089 case V2DI_FTYPE_V2DI_INT_CONVERT
:
29092 nargs_constant
= 1;
29094 case V4DI_FTYPE_V4DI_INT_CONVERT
:
29097 nargs_constant
= 1;
29099 case V8HI_FTYPE_V8HI_INT
:
29100 case V8HI_FTYPE_V8SF_INT
:
29101 case V8HI_FTYPE_V4SF_INT
:
29102 case V8SF_FTYPE_V8SF_INT
:
29103 case V4SI_FTYPE_V4SI_INT
:
29104 case V4SI_FTYPE_V8SI_INT
:
29105 case V4HI_FTYPE_V4HI_INT
:
29106 case V4DF_FTYPE_V4DF_INT
:
29107 case V4SF_FTYPE_V4SF_INT
:
29108 case V4SF_FTYPE_V8SF_INT
:
29109 case V2DI_FTYPE_V2DI_INT
:
29110 case V2DF_FTYPE_V2DF_INT
:
29111 case V2DF_FTYPE_V4DF_INT
:
29112 case V16HI_FTYPE_V16HI_INT
:
29113 case V8SI_FTYPE_V8SI_INT
:
29114 case V4DI_FTYPE_V4DI_INT
:
29115 case V2DI_FTYPE_V4DI_INT
:
29117 nargs_constant
= 1;
29119 case V16QI_FTYPE_V16QI_V16QI_V16QI
:
29120 case V8SF_FTYPE_V8SF_V8SF_V8SF
:
29121 case V4DF_FTYPE_V4DF_V4DF_V4DF
:
29122 case V4SF_FTYPE_V4SF_V4SF_V4SF
:
29123 case V2DF_FTYPE_V2DF_V2DF_V2DF
:
29124 case V32QI_FTYPE_V32QI_V32QI_V32QI
:
29127 case V32QI_FTYPE_V32QI_V32QI_INT
:
29128 case V16HI_FTYPE_V16HI_V16HI_INT
:
29129 case V16QI_FTYPE_V16QI_V16QI_INT
:
29130 case V4DI_FTYPE_V4DI_V4DI_INT
:
29131 case V8HI_FTYPE_V8HI_V8HI_INT
:
29132 case V8SI_FTYPE_V8SI_V8SI_INT
:
29133 case V8SI_FTYPE_V8SI_V4SI_INT
:
29134 case V8SF_FTYPE_V8SF_V8SF_INT
:
29135 case V8SF_FTYPE_V8SF_V4SF_INT
:
29136 case V4SI_FTYPE_V4SI_V4SI_INT
:
29137 case V4DF_FTYPE_V4DF_V4DF_INT
:
29138 case V4DF_FTYPE_V4DF_V2DF_INT
:
29139 case V4SF_FTYPE_V4SF_V4SF_INT
:
29140 case V2DI_FTYPE_V2DI_V2DI_INT
:
29141 case V4DI_FTYPE_V4DI_V2DI_INT
:
29142 case V2DF_FTYPE_V2DF_V2DF_INT
:
29144 nargs_constant
= 1;
29146 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT
:
29149 nargs_constant
= 1;
29151 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT
:
29154 nargs_constant
= 1;
29156 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT
:
29159 nargs_constant
= 1;
29161 case V2DI_FTYPE_V2DI_UINT_UINT
:
29163 nargs_constant
= 2;
29165 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT
:
29166 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT
:
29167 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT
:
29168 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT
:
29170 nargs_constant
= 1;
29172 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT
:
29174 nargs_constant
= 2;
29177 gcc_unreachable ();
29180 gcc_assert (nargs
<= ARRAY_SIZE (args
));
29182 if (comparison
!= UNKNOWN
)
29184 gcc_assert (nargs
== 2);
29185 return ix86_expand_sse_compare (d
, exp
, target
, swap
);
29188 if (rmode
== VOIDmode
|| rmode
== tmode
)
29192 || GET_MODE (target
) != tmode
29193 || !insn_p
->operand
[0].predicate (target
, tmode
))
29194 target
= gen_reg_rtx (tmode
);
29195 real_target
= target
;
29199 target
= gen_reg_rtx (rmode
);
29200 real_target
= simplify_gen_subreg (tmode
, target
, rmode
, 0);
29203 for (i
= 0; i
< nargs
; i
++)
29205 tree arg
= CALL_EXPR_ARG (exp
, i
);
29206 rtx op
= expand_normal (arg
);
29207 enum machine_mode mode
= insn_p
->operand
[i
+ 1].mode
;
29208 bool match
= insn_p
->operand
[i
+ 1].predicate (op
, mode
);
29210 if (last_arg_count
&& (i
+ 1) == nargs
)
29212 /* SIMD shift insns take either an 8-bit immediate or
29213 register as count. But builtin functions take int as
29214 count. If count doesn't match, we put it in register. */
29217 op
= simplify_gen_subreg (SImode
, op
, GET_MODE (op
), 0);
29218 if (!insn_p
->operand
[i
+ 1].predicate (op
, mode
))
29219 op
= copy_to_reg (op
);
29222 else if ((nargs
- i
) <= nargs_constant
)
29227 case CODE_FOR_avx2_inserti128
:
29228 case CODE_FOR_avx2_extracti128
:
29229 error ("the last argument must be an 1-bit immediate");
29232 case CODE_FOR_sse4_1_roundsd
:
29233 case CODE_FOR_sse4_1_roundss
:
29235 case CODE_FOR_sse4_1_roundpd
:
29236 case CODE_FOR_sse4_1_roundps
:
29237 case CODE_FOR_avx_roundpd256
:
29238 case CODE_FOR_avx_roundps256
:
29240 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix
:
29241 case CODE_FOR_sse4_1_roundps_sfix
:
29242 case CODE_FOR_avx_roundpd_vec_pack_sfix256
:
29243 case CODE_FOR_avx_roundps_sfix256
:
29245 case CODE_FOR_sse4_1_blendps
:
29246 case CODE_FOR_avx_blendpd256
:
29247 case CODE_FOR_avx_vpermilv4df
:
29248 error ("the last argument must be a 4-bit immediate");
29251 case CODE_FOR_sse4_1_blendpd
:
29252 case CODE_FOR_avx_vpermilv2df
:
29253 case CODE_FOR_xop_vpermil2v2df3
:
29254 case CODE_FOR_xop_vpermil2v4sf3
:
29255 case CODE_FOR_xop_vpermil2v4df3
:
29256 case CODE_FOR_xop_vpermil2v8sf3
:
29257 error ("the last argument must be a 2-bit immediate");
29260 case CODE_FOR_avx_vextractf128v4df
:
29261 case CODE_FOR_avx_vextractf128v8sf
:
29262 case CODE_FOR_avx_vextractf128v8si
:
29263 case CODE_FOR_avx_vinsertf128v4df
:
29264 case CODE_FOR_avx_vinsertf128v8sf
:
29265 case CODE_FOR_avx_vinsertf128v8si
:
29266 error ("the last argument must be a 1-bit immediate");
29269 case CODE_FOR_avx_vmcmpv2df3
:
29270 case CODE_FOR_avx_vmcmpv4sf3
:
29271 case CODE_FOR_avx_cmpv2df3
:
29272 case CODE_FOR_avx_cmpv4sf3
:
29273 case CODE_FOR_avx_cmpv4df3
:
29274 case CODE_FOR_avx_cmpv8sf3
:
29275 error ("the last argument must be a 5-bit immediate");
29279 switch (nargs_constant
)
29282 if ((nargs
- i
) == nargs_constant
)
29284 error ("the next to last argument must be an 8-bit immediate");
29288 error ("the last argument must be an 8-bit immediate");
29291 gcc_unreachable ();
29298 if (VECTOR_MODE_P (mode
))
29299 op
= safe_vector_operand (op
, mode
);
29301 /* If we aren't optimizing, only allow one memory operand to
29303 if (memory_operand (op
, mode
))
29306 if (GET_MODE (op
) == mode
|| GET_MODE (op
) == VOIDmode
)
29308 if (optimize
|| !match
|| num_memory
> 1)
29309 op
= copy_to_mode_reg (mode
, op
);
29313 op
= copy_to_reg (op
);
29314 op
= simplify_gen_subreg (mode
, op
, GET_MODE (op
), 0);
29319 args
[i
].mode
= mode
;
29325 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
);
29328 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
);
29331 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
,
29335 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
,
29336 args
[2].op
, args
[3].op
);
29339 gcc_unreachable ();
29349 /* Subroutine of ix86_expand_builtin to take care of special insns
29350 with variable number of operands. */
29353 ix86_expand_special_args_builtin (const struct builtin_description
*d
,
29354 tree exp
, rtx target
)
29358 unsigned int i
, nargs
, arg_adjust
, memory
;
29362 enum machine_mode mode
;
29364 enum insn_code icode
= d
->icode
;
29365 bool last_arg_constant
= false;
29366 const struct insn_data_d
*insn_p
= &insn_data
[icode
];
29367 enum machine_mode tmode
= insn_p
->operand
[0].mode
;
29368 enum { load
, store
} klass
;
29370 switch ((enum ix86_builtin_func_type
) d
->flag
)
29372 case VOID_FTYPE_VOID
:
29373 if (icode
== CODE_FOR_avx_vzeroupper
)
29374 target
= GEN_INT (vzeroupper_intrinsic
);
29375 emit_insn (GEN_FCN (icode
) (target
));
29377 case VOID_FTYPE_UINT64
:
29378 case VOID_FTYPE_UNSIGNED
:
29383 case UINT64_FTYPE_VOID
:
29384 case UNSIGNED_FTYPE_VOID
:
29389 case UINT64_FTYPE_PUNSIGNED
:
29390 case V2DI_FTYPE_PV2DI
:
29391 case V4DI_FTYPE_PV4DI
:
29392 case V32QI_FTYPE_PCCHAR
:
29393 case V16QI_FTYPE_PCCHAR
:
29394 case V8SF_FTYPE_PCV4SF
:
29395 case V8SF_FTYPE_PCFLOAT
:
29396 case V4SF_FTYPE_PCFLOAT
:
29397 case V4DF_FTYPE_PCV2DF
:
29398 case V4DF_FTYPE_PCDOUBLE
:
29399 case V2DF_FTYPE_PCDOUBLE
:
29400 case VOID_FTYPE_PVOID
:
29405 case VOID_FTYPE_PV2SF_V4SF
:
29406 case VOID_FTYPE_PV4DI_V4DI
:
29407 case VOID_FTYPE_PV2DI_V2DI
:
29408 case VOID_FTYPE_PCHAR_V32QI
:
29409 case VOID_FTYPE_PCHAR_V16QI
:
29410 case VOID_FTYPE_PFLOAT_V8SF
:
29411 case VOID_FTYPE_PFLOAT_V4SF
:
29412 case VOID_FTYPE_PDOUBLE_V4DF
:
29413 case VOID_FTYPE_PDOUBLE_V2DF
:
29414 case VOID_FTYPE_PLONGLONG_LONGLONG
:
29415 case VOID_FTYPE_PULONGLONG_ULONGLONG
:
29416 case VOID_FTYPE_PINT_INT
:
29419 /* Reserve memory operand for target. */
29420 memory
= ARRAY_SIZE (args
);
29422 case V4SF_FTYPE_V4SF_PCV2SF
:
29423 case V2DF_FTYPE_V2DF_PCDOUBLE
:
29428 case V8SF_FTYPE_PCV8SF_V8SI
:
29429 case V4DF_FTYPE_PCV4DF_V4DI
:
29430 case V4SF_FTYPE_PCV4SF_V4SI
:
29431 case V2DF_FTYPE_PCV2DF_V2DI
:
29432 case V8SI_FTYPE_PCV8SI_V8SI
:
29433 case V4DI_FTYPE_PCV4DI_V4DI
:
29434 case V4SI_FTYPE_PCV4SI_V4SI
:
29435 case V2DI_FTYPE_PCV2DI_V2DI
:
29440 case VOID_FTYPE_PV8SF_V8SI_V8SF
:
29441 case VOID_FTYPE_PV4DF_V4DI_V4DF
:
29442 case VOID_FTYPE_PV4SF_V4SI_V4SF
:
29443 case VOID_FTYPE_PV2DF_V2DI_V2DF
:
29444 case VOID_FTYPE_PV8SI_V8SI_V8SI
:
29445 case VOID_FTYPE_PV4DI_V4DI_V4DI
:
29446 case VOID_FTYPE_PV4SI_V4SI_V4SI
:
29447 case VOID_FTYPE_PV2DI_V2DI_V2DI
:
29450 /* Reserve memory operand for target. */
29451 memory
= ARRAY_SIZE (args
);
29453 case VOID_FTYPE_UINT_UINT_UINT
:
29454 case VOID_FTYPE_UINT64_UINT_UINT
:
29455 case UCHAR_FTYPE_UINT_UINT_UINT
:
29456 case UCHAR_FTYPE_UINT64_UINT_UINT
:
29459 memory
= ARRAY_SIZE (args
);
29460 last_arg_constant
= true;
29463 gcc_unreachable ();
29466 gcc_assert (nargs
<= ARRAY_SIZE (args
));
29468 if (klass
== store
)
29470 arg
= CALL_EXPR_ARG (exp
, 0);
29471 op
= expand_normal (arg
);
29472 gcc_assert (target
== 0);
29475 if (GET_MODE (op
) != Pmode
)
29476 op
= convert_to_mode (Pmode
, op
, 1);
29477 target
= gen_rtx_MEM (tmode
, force_reg (Pmode
, op
));
29480 target
= force_reg (tmode
, op
);
29488 || GET_MODE (target
) != tmode
29489 || !insn_p
->operand
[0].predicate (target
, tmode
))
29490 target
= gen_reg_rtx (tmode
);
29493 for (i
= 0; i
< nargs
; i
++)
29495 enum machine_mode mode
= insn_p
->operand
[i
+ 1].mode
;
29498 arg
= CALL_EXPR_ARG (exp
, i
+ arg_adjust
);
29499 op
= expand_normal (arg
);
29500 match
= insn_p
->operand
[i
+ 1].predicate (op
, mode
);
29502 if (last_arg_constant
&& (i
+ 1) == nargs
)
29506 if (icode
== CODE_FOR_lwp_lwpvalsi3
29507 || icode
== CODE_FOR_lwp_lwpinssi3
29508 || icode
== CODE_FOR_lwp_lwpvaldi3
29509 || icode
== CODE_FOR_lwp_lwpinsdi3
)
29510 error ("the last argument must be a 32-bit immediate");
29512 error ("the last argument must be an 8-bit immediate");
29520 /* This must be the memory operand. */
29521 if (GET_MODE (op
) != Pmode
)
29522 op
= convert_to_mode (Pmode
, op
, 1);
29523 op
= gen_rtx_MEM (mode
, force_reg (Pmode
, op
));
29524 gcc_assert (GET_MODE (op
) == mode
29525 || GET_MODE (op
) == VOIDmode
);
29529 /* This must be register. */
29530 if (VECTOR_MODE_P (mode
))
29531 op
= safe_vector_operand (op
, mode
);
29533 gcc_assert (GET_MODE (op
) == mode
29534 || GET_MODE (op
) == VOIDmode
);
29535 op
= copy_to_mode_reg (mode
, op
);
29540 args
[i
].mode
= mode
;
29546 pat
= GEN_FCN (icode
) (target
);
29549 pat
= GEN_FCN (icode
) (target
, args
[0].op
);
29552 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
);
29555 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
);
29558 gcc_unreachable ();
29564 return klass
== store
? 0 : target
;
29567 /* Return the integer constant in ARG. Constrain it to be in the range
29568 of the subparts of VEC_TYPE; issue an error if not. */
29571 get_element_number (tree vec_type
, tree arg
)
29573 unsigned HOST_WIDE_INT elt
, max
= TYPE_VECTOR_SUBPARTS (vec_type
) - 1;
29575 if (!host_integerp (arg
, 1)
29576 || (elt
= tree_low_cst (arg
, 1), elt
> max
))
29578 error ("selector must be an integer constant in the range 0..%wi", max
);
29585 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
29586 ix86_expand_vector_init. We DO have language-level syntax for this, in
29587 the form of (type){ init-list }. Except that since we can't place emms
29588 instructions from inside the compiler, we can't allow the use of MMX
29589 registers unless the user explicitly asks for it. So we do *not* define
29590 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
29591 we have builtins invoked by mmintrin.h that gives us license to emit
29592 these sorts of instructions. */
29595 ix86_expand_vec_init_builtin (tree type
, tree exp
, rtx target
)
29597 enum machine_mode tmode
= TYPE_MODE (type
);
29598 enum machine_mode inner_mode
= GET_MODE_INNER (tmode
);
29599 int i
, n_elt
= GET_MODE_NUNITS (tmode
);
29600 rtvec v
= rtvec_alloc (n_elt
);
29602 gcc_assert (VECTOR_MODE_P (tmode
));
29603 gcc_assert (call_expr_nargs (exp
) == n_elt
);
29605 for (i
= 0; i
< n_elt
; ++i
)
29607 rtx x
= expand_normal (CALL_EXPR_ARG (exp
, i
));
29608 RTVEC_ELT (v
, i
) = gen_lowpart (inner_mode
, x
);
29611 if (!target
|| !register_operand (target
, tmode
))
29612 target
= gen_reg_rtx (tmode
);
29614 ix86_expand_vector_init (true, target
, gen_rtx_PARALLEL (tmode
, v
));
29618 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
29619 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
29620 had a language-level syntax for referencing vector elements. */
29623 ix86_expand_vec_ext_builtin (tree exp
, rtx target
)
29625 enum machine_mode tmode
, mode0
;
29630 arg0
= CALL_EXPR_ARG (exp
, 0);
29631 arg1
= CALL_EXPR_ARG (exp
, 1);
29633 op0
= expand_normal (arg0
);
29634 elt
= get_element_number (TREE_TYPE (arg0
), arg1
);
29636 tmode
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
29637 mode0
= TYPE_MODE (TREE_TYPE (arg0
));
29638 gcc_assert (VECTOR_MODE_P (mode0
));
29640 op0
= force_reg (mode0
, op0
);
29642 if (optimize
|| !target
|| !register_operand (target
, tmode
))
29643 target
= gen_reg_rtx (tmode
);
29645 ix86_expand_vector_extract (true, target
, op0
, elt
);
29650 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
29651 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
29652 a language-level syntax for referencing vector elements. */
29655 ix86_expand_vec_set_builtin (tree exp
)
29657 enum machine_mode tmode
, mode1
;
29658 tree arg0
, arg1
, arg2
;
29660 rtx op0
, op1
, target
;
29662 arg0
= CALL_EXPR_ARG (exp
, 0);
29663 arg1
= CALL_EXPR_ARG (exp
, 1);
29664 arg2
= CALL_EXPR_ARG (exp
, 2);
29666 tmode
= TYPE_MODE (TREE_TYPE (arg0
));
29667 mode1
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
29668 gcc_assert (VECTOR_MODE_P (tmode
));
29670 op0
= expand_expr (arg0
, NULL_RTX
, tmode
, EXPAND_NORMAL
);
29671 op1
= expand_expr (arg1
, NULL_RTX
, mode1
, EXPAND_NORMAL
);
29672 elt
= get_element_number (TREE_TYPE (arg0
), arg2
);
29674 if (GET_MODE (op1
) != mode1
&& GET_MODE (op1
) != VOIDmode
)
29675 op1
= convert_modes (mode1
, GET_MODE (op1
), op1
, true);
29677 op0
= force_reg (tmode
, op0
);
29678 op1
= force_reg (mode1
, op1
);
29680 /* OP0 is the source of these builtin functions and shouldn't be
29681 modified. Create a copy, use it and return it as target. */
29682 target
= gen_reg_rtx (tmode
);
29683 emit_move_insn (target
, op0
);
29684 ix86_expand_vector_set (true, target
, op1
, elt
);
29689 /* Expand an expression EXP that calls a built-in function,
29690 with result going to TARGET if that's convenient
29691 (and in mode MODE if that's convenient).
29692 SUBTARGET may be used as the target for computing one of EXP's operands.
29693 IGNORE is nonzero if the value is to be ignored. */
29696 ix86_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
29697 enum machine_mode mode ATTRIBUTE_UNUSED
,
29698 int ignore ATTRIBUTE_UNUSED
)
29700 const struct builtin_description
*d
;
29702 enum insn_code icode
;
29703 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
29704 tree arg0
, arg1
, arg2
, arg3
, arg4
;
29705 rtx op0
, op1
, op2
, op3
, op4
, pat
;
29706 enum machine_mode mode0
, mode1
, mode2
, mode3
, mode4
;
29707 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
29709 /* Determine whether the builtin function is available under the current ISA.
29710 Originally the builtin was not created if it wasn't applicable to the
29711 current ISA based on the command line switches. With function specific
29712 options, we need to check in the context of the function making the call
29713 whether it is supported. */
29714 if (ix86_builtins_isa
[fcode
].isa
29715 && !(ix86_builtins_isa
[fcode
].isa
& ix86_isa_flags
))
29717 char *opts
= ix86_target_string (ix86_builtins_isa
[fcode
].isa
, 0, NULL
,
29718 NULL
, (enum fpmath_unit
) 0, false);
29721 error ("%qE needs unknown isa option", fndecl
);
29724 gcc_assert (opts
!= NULL
);
29725 error ("%qE needs isa option %s", fndecl
, opts
);
29733 case IX86_BUILTIN_MASKMOVQ
:
29734 case IX86_BUILTIN_MASKMOVDQU
:
29735 icode
= (fcode
== IX86_BUILTIN_MASKMOVQ
29736 ? CODE_FOR_mmx_maskmovq
29737 : CODE_FOR_sse2_maskmovdqu
);
29738 /* Note the arg order is different from the operand order. */
29739 arg1
= CALL_EXPR_ARG (exp
, 0);
29740 arg2
= CALL_EXPR_ARG (exp
, 1);
29741 arg0
= CALL_EXPR_ARG (exp
, 2);
29742 op0
= expand_normal (arg0
);
29743 op1
= expand_normal (arg1
);
29744 op2
= expand_normal (arg2
);
29745 mode0
= insn_data
[icode
].operand
[0].mode
;
29746 mode1
= insn_data
[icode
].operand
[1].mode
;
29747 mode2
= insn_data
[icode
].operand
[2].mode
;
29749 if (GET_MODE (op0
) != Pmode
)
29750 op0
= convert_to_mode (Pmode
, op0
, 1);
29751 op0
= gen_rtx_MEM (mode1
, force_reg (Pmode
, op0
));
29753 if (!insn_data
[icode
].operand
[0].predicate (op0
, mode0
))
29754 op0
= copy_to_mode_reg (mode0
, op0
);
29755 if (!insn_data
[icode
].operand
[1].predicate (op1
, mode1
))
29756 op1
= copy_to_mode_reg (mode1
, op1
);
29757 if (!insn_data
[icode
].operand
[2].predicate (op2
, mode2
))
29758 op2
= copy_to_mode_reg (mode2
, op2
);
29759 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
29765 case IX86_BUILTIN_LDMXCSR
:
29766 op0
= expand_normal (CALL_EXPR_ARG (exp
, 0));
29767 target
= assign_386_stack_local (SImode
, SLOT_VIRTUAL
);
29768 emit_move_insn (target
, op0
);
29769 emit_insn (gen_sse_ldmxcsr (target
));
29772 case IX86_BUILTIN_STMXCSR
:
29773 target
= assign_386_stack_local (SImode
, SLOT_VIRTUAL
);
29774 emit_insn (gen_sse_stmxcsr (target
));
29775 return copy_to_mode_reg (SImode
, target
);
29777 case IX86_BUILTIN_CLFLUSH
:
29778 arg0
= CALL_EXPR_ARG (exp
, 0);
29779 op0
= expand_normal (arg0
);
29780 icode
= CODE_FOR_sse2_clflush
;
29781 if (!insn_data
[icode
].operand
[0].predicate (op0
, Pmode
))
29783 if (GET_MODE (op0
) != Pmode
)
29784 op0
= convert_to_mode (Pmode
, op0
, 1);
29785 op0
= force_reg (Pmode
, op0
);
29788 emit_insn (gen_sse2_clflush (op0
));
29791 case IX86_BUILTIN_MONITOR
:
29792 arg0
= CALL_EXPR_ARG (exp
, 0);
29793 arg1
= CALL_EXPR_ARG (exp
, 1);
29794 arg2
= CALL_EXPR_ARG (exp
, 2);
29795 op0
= expand_normal (arg0
);
29796 op1
= expand_normal (arg1
);
29797 op2
= expand_normal (arg2
);
29800 if (GET_MODE (op0
) != Pmode
)
29801 op0
= convert_to_mode (Pmode
, op0
, 1);
29802 op0
= force_reg (Pmode
, op0
);
29805 op1
= copy_to_mode_reg (SImode
, op1
);
29807 op2
= copy_to_mode_reg (SImode
, op2
);
29808 emit_insn (ix86_gen_monitor (op0
, op1
, op2
));
29811 case IX86_BUILTIN_MWAIT
:
29812 arg0
= CALL_EXPR_ARG (exp
, 0);
29813 arg1
= CALL_EXPR_ARG (exp
, 1);
29814 op0
= expand_normal (arg0
);
29815 op1
= expand_normal (arg1
);
29817 op0
= copy_to_mode_reg (SImode
, op0
);
29819 op1
= copy_to_mode_reg (SImode
, op1
);
29820 emit_insn (gen_sse3_mwait (op0
, op1
));
29823 case IX86_BUILTIN_VEC_INIT_V2SI
:
29824 case IX86_BUILTIN_VEC_INIT_V4HI
:
29825 case IX86_BUILTIN_VEC_INIT_V8QI
:
29826 return ix86_expand_vec_init_builtin (TREE_TYPE (exp
), exp
, target
);
29828 case IX86_BUILTIN_VEC_EXT_V2DF
:
29829 case IX86_BUILTIN_VEC_EXT_V2DI
:
29830 case IX86_BUILTIN_VEC_EXT_V4SF
:
29831 case IX86_BUILTIN_VEC_EXT_V4SI
:
29832 case IX86_BUILTIN_VEC_EXT_V8HI
:
29833 case IX86_BUILTIN_VEC_EXT_V2SI
:
29834 case IX86_BUILTIN_VEC_EXT_V4HI
:
29835 case IX86_BUILTIN_VEC_EXT_V16QI
:
29836 return ix86_expand_vec_ext_builtin (exp
, target
);
29838 case IX86_BUILTIN_VEC_SET_V2DI
:
29839 case IX86_BUILTIN_VEC_SET_V4SF
:
29840 case IX86_BUILTIN_VEC_SET_V4SI
:
29841 case IX86_BUILTIN_VEC_SET_V8HI
:
29842 case IX86_BUILTIN_VEC_SET_V4HI
:
29843 case IX86_BUILTIN_VEC_SET_V16QI
:
29844 return ix86_expand_vec_set_builtin (exp
);
29846 case IX86_BUILTIN_INFQ
:
29847 case IX86_BUILTIN_HUGE_VALQ
:
29849 REAL_VALUE_TYPE inf
;
29853 tmp
= CONST_DOUBLE_FROM_REAL_VALUE (inf
, mode
);
29855 tmp
= validize_mem (force_const_mem (mode
, tmp
));
29858 target
= gen_reg_rtx (mode
);
29860 emit_move_insn (target
, tmp
);
29864 case IX86_BUILTIN_LLWPCB
:
29865 arg0
= CALL_EXPR_ARG (exp
, 0);
29866 op0
= expand_normal (arg0
);
29867 icode
= CODE_FOR_lwp_llwpcb
;
29868 if (!insn_data
[icode
].operand
[0].predicate (op0
, Pmode
))
29870 if (GET_MODE (op0
) != Pmode
)
29871 op0
= convert_to_mode (Pmode
, op0
, 1);
29872 op0
= force_reg (Pmode
, op0
);
29874 emit_insn (gen_lwp_llwpcb (op0
));
29877 case IX86_BUILTIN_SLWPCB
:
29878 icode
= CODE_FOR_lwp_slwpcb
;
29880 || !insn_data
[icode
].operand
[0].predicate (target
, Pmode
))
29881 target
= gen_reg_rtx (Pmode
);
29882 emit_insn (gen_lwp_slwpcb (target
));
29885 case IX86_BUILTIN_BEXTRI32
:
29886 case IX86_BUILTIN_BEXTRI64
:
29887 arg0
= CALL_EXPR_ARG (exp
, 0);
29888 arg1
= CALL_EXPR_ARG (exp
, 1);
29889 op0
= expand_normal (arg0
);
29890 op1
= expand_normal (arg1
);
29891 icode
= (fcode
== IX86_BUILTIN_BEXTRI32
29892 ? CODE_FOR_tbm_bextri_si
29893 : CODE_FOR_tbm_bextri_di
);
29894 if (!CONST_INT_P (op1
))
29896 error ("last argument must be an immediate");
29901 unsigned char length
= (INTVAL (op1
) >> 8) & 0xFF;
29902 unsigned char lsb_index
= INTVAL (op1
) & 0xFF;
29903 op1
= GEN_INT (length
);
29904 op2
= GEN_INT (lsb_index
);
29905 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
29911 case IX86_BUILTIN_RDRAND16_STEP
:
29912 icode
= CODE_FOR_rdrandhi_1
;
29916 case IX86_BUILTIN_RDRAND32_STEP
:
29917 icode
= CODE_FOR_rdrandsi_1
;
29921 case IX86_BUILTIN_RDRAND64_STEP
:
29922 icode
= CODE_FOR_rdranddi_1
;
29926 op0
= gen_reg_rtx (mode0
);
29927 emit_insn (GEN_FCN (icode
) (op0
));
29929 arg0
= CALL_EXPR_ARG (exp
, 0);
29930 op1
= expand_normal (arg0
);
29931 if (!address_operand (op1
, VOIDmode
))
29933 op1
= convert_memory_address (Pmode
, op1
);
29934 op1
= copy_addr_to_reg (op1
);
29936 emit_move_insn (gen_rtx_MEM (mode0
, op1
), op0
);
29938 op1
= gen_reg_rtx (SImode
);
29939 emit_move_insn (op1
, CONST1_RTX (SImode
));
29941 /* Emit SImode conditional move. */
29942 if (mode0
== HImode
)
29944 op2
= gen_reg_rtx (SImode
);
29945 emit_insn (gen_zero_extendhisi2 (op2
, op0
));
29947 else if (mode0
== SImode
)
29950 op2
= gen_rtx_SUBREG (SImode
, op0
, 0);
29953 target
= gen_reg_rtx (SImode
);
29955 pat
= gen_rtx_GEU (VOIDmode
, gen_rtx_REG (CCCmode
, FLAGS_REG
),
29957 emit_insn (gen_rtx_SET (VOIDmode
, target
,
29958 gen_rtx_IF_THEN_ELSE (SImode
, pat
, op2
, op1
)));
29961 case IX86_BUILTIN_GATHERSIV2DF
:
29962 icode
= CODE_FOR_avx2_gathersiv2df
;
29964 case IX86_BUILTIN_GATHERSIV4DF
:
29965 icode
= CODE_FOR_avx2_gathersiv4df
;
29967 case IX86_BUILTIN_GATHERDIV2DF
:
29968 icode
= CODE_FOR_avx2_gatherdiv2df
;
29970 case IX86_BUILTIN_GATHERDIV4DF
:
29971 icode
= CODE_FOR_avx2_gatherdiv4df
;
29973 case IX86_BUILTIN_GATHERSIV4SF
:
29974 icode
= CODE_FOR_avx2_gathersiv4sf
;
29976 case IX86_BUILTIN_GATHERSIV8SF
:
29977 icode
= CODE_FOR_avx2_gathersiv8sf
;
29979 case IX86_BUILTIN_GATHERDIV4SF
:
29980 icode
= CODE_FOR_avx2_gatherdiv4sf
;
29982 case IX86_BUILTIN_GATHERDIV8SF
:
29983 icode
= CODE_FOR_avx2_gatherdiv8sf
;
29985 case IX86_BUILTIN_GATHERSIV2DI
:
29986 icode
= CODE_FOR_avx2_gathersiv2di
;
29988 case IX86_BUILTIN_GATHERSIV4DI
:
29989 icode
= CODE_FOR_avx2_gathersiv4di
;
29991 case IX86_BUILTIN_GATHERDIV2DI
:
29992 icode
= CODE_FOR_avx2_gatherdiv2di
;
29994 case IX86_BUILTIN_GATHERDIV4DI
:
29995 icode
= CODE_FOR_avx2_gatherdiv4di
;
29997 case IX86_BUILTIN_GATHERSIV4SI
:
29998 icode
= CODE_FOR_avx2_gathersiv4si
;
30000 case IX86_BUILTIN_GATHERSIV8SI
:
30001 icode
= CODE_FOR_avx2_gathersiv8si
;
30003 case IX86_BUILTIN_GATHERDIV4SI
:
30004 icode
= CODE_FOR_avx2_gatherdiv4si
;
30006 case IX86_BUILTIN_GATHERDIV8SI
:
30007 icode
= CODE_FOR_avx2_gatherdiv8si
;
30009 case IX86_BUILTIN_GATHERALTSIV4DF
:
30010 icode
= CODE_FOR_avx2_gathersiv4df
;
30012 case IX86_BUILTIN_GATHERALTDIV8SF
:
30013 icode
= CODE_FOR_avx2_gatherdiv8sf
;
30015 case IX86_BUILTIN_GATHERALTSIV4DI
:
30016 icode
= CODE_FOR_avx2_gathersiv4df
;
30018 case IX86_BUILTIN_GATHERALTDIV8SI
:
30019 icode
= CODE_FOR_avx2_gatherdiv8si
;
30023 arg0
= CALL_EXPR_ARG (exp
, 0);
30024 arg1
= CALL_EXPR_ARG (exp
, 1);
30025 arg2
= CALL_EXPR_ARG (exp
, 2);
30026 arg3
= CALL_EXPR_ARG (exp
, 3);
30027 arg4
= CALL_EXPR_ARG (exp
, 4);
30028 op0
= expand_normal (arg0
);
30029 op1
= expand_normal (arg1
);
30030 op2
= expand_normal (arg2
);
30031 op3
= expand_normal (arg3
);
30032 op4
= expand_normal (arg4
);
30033 /* Note the arg order is different from the operand order. */
30034 mode0
= insn_data
[icode
].operand
[1].mode
;
30035 mode2
= insn_data
[icode
].operand
[3].mode
;
30036 mode3
= insn_data
[icode
].operand
[4].mode
;
30037 mode4
= insn_data
[icode
].operand
[5].mode
;
30039 if (target
== NULL_RTX
30040 || GET_MODE (target
) != insn_data
[icode
].operand
[0].mode
)
30041 subtarget
= gen_reg_rtx (insn_data
[icode
].operand
[0].mode
);
30043 subtarget
= target
;
30045 if (fcode
== IX86_BUILTIN_GATHERALTSIV4DF
30046 || fcode
== IX86_BUILTIN_GATHERALTSIV4DI
)
30048 rtx half
= gen_reg_rtx (V4SImode
);
30049 if (!nonimmediate_operand (op2
, V8SImode
))
30050 op2
= copy_to_mode_reg (V8SImode
, op2
);
30051 emit_insn (gen_vec_extract_lo_v8si (half
, op2
));
30054 else if (fcode
== IX86_BUILTIN_GATHERALTDIV8SF
30055 || fcode
== IX86_BUILTIN_GATHERALTDIV8SI
)
30057 rtx (*gen
) (rtx
, rtx
);
30058 rtx half
= gen_reg_rtx (mode0
);
30059 if (mode0
== V4SFmode
)
30060 gen
= gen_vec_extract_lo_v8sf
;
30062 gen
= gen_vec_extract_lo_v8si
;
30063 if (!nonimmediate_operand (op0
, GET_MODE (op0
)))
30064 op0
= copy_to_mode_reg (GET_MODE (op0
), op0
);
30065 emit_insn (gen (half
, op0
));
30067 if (!nonimmediate_operand (op3
, GET_MODE (op3
)))
30068 op3
= copy_to_mode_reg (GET_MODE (op3
), op3
);
30069 emit_insn (gen (half
, op3
));
30073 /* Force memory operand only with base register here. But we
30074 don't want to do it on memory operand for other builtin
30076 if (GET_MODE (op1
) != Pmode
)
30077 op1
= convert_to_mode (Pmode
, op1
, 1);
30078 op1
= force_reg (Pmode
, op1
);
30080 if (!insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
30081 op0
= copy_to_mode_reg (mode0
, op0
);
30082 if (!insn_data
[icode
].operand
[2].predicate (op1
, Pmode
))
30083 op1
= copy_to_mode_reg (Pmode
, op1
);
30084 if (!insn_data
[icode
].operand
[3].predicate (op2
, mode2
))
30085 op2
= copy_to_mode_reg (mode2
, op2
);
30086 if (!insn_data
[icode
].operand
[4].predicate (op3
, mode3
))
30087 op3
= copy_to_mode_reg (mode3
, op3
);
30088 if (!insn_data
[icode
].operand
[5].predicate (op4
, mode4
))
30090 error ("last argument must be scale 1, 2, 4, 8");
30094 /* Optimize. If mask is known to have all high bits set,
30095 replace op0 with pc_rtx to signal that the instruction
30096 overwrites the whole destination and doesn't use its
30097 previous contents. */
30100 if (TREE_CODE (arg3
) == VECTOR_CST
)
30103 unsigned int negative
= 0;
30104 for (elt
= TREE_VECTOR_CST_ELTS (arg3
);
30105 elt
; elt
= TREE_CHAIN (elt
))
30107 tree cst
= TREE_VALUE (elt
);
30108 if (TREE_CODE (cst
) == INTEGER_CST
30109 && tree_int_cst_sign_bit (cst
))
30111 else if (TREE_CODE (cst
) == REAL_CST
30112 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst
)))
30115 if (negative
== TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3
)))
30118 else if (TREE_CODE (arg3
) == SSA_NAME
)
30120 /* Recognize also when mask is like:
30121 __v2df src = _mm_setzero_pd ();
30122 __v2df mask = _mm_cmpeq_pd (src, src);
30124 __v8sf src = _mm256_setzero_ps ();
30125 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
30126 as that is a cheaper way to load all ones into
30127 a register than having to load a constant from
30129 gimple def_stmt
= SSA_NAME_DEF_STMT (arg3
);
30130 if (is_gimple_call (def_stmt
))
30132 tree fndecl
= gimple_call_fndecl (def_stmt
);
30134 && DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_MD
)
30135 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl
))
30137 case IX86_BUILTIN_CMPPD
:
30138 case IX86_BUILTIN_CMPPS
:
30139 case IX86_BUILTIN_CMPPD256
:
30140 case IX86_BUILTIN_CMPPS256
:
30141 if (!integer_zerop (gimple_call_arg (def_stmt
, 2)))
30144 case IX86_BUILTIN_CMPEQPD
:
30145 case IX86_BUILTIN_CMPEQPS
:
30146 if (initializer_zerop (gimple_call_arg (def_stmt
, 0))
30147 && initializer_zerop (gimple_call_arg (def_stmt
,
30158 pat
= GEN_FCN (icode
) (subtarget
, op0
, op1
, op2
, op3
, op4
);
30163 if (fcode
== IX86_BUILTIN_GATHERDIV8SF
30164 || fcode
== IX86_BUILTIN_GATHERDIV8SI
)
30166 enum machine_mode tmode
= GET_MODE (subtarget
) == V8SFmode
30167 ? V4SFmode
: V4SImode
;
30168 if (target
== NULL_RTX
)
30169 target
= gen_reg_rtx (tmode
);
30170 if (tmode
== V4SFmode
)
30171 emit_insn (gen_vec_extract_lo_v8sf (target
, subtarget
));
30173 emit_insn (gen_vec_extract_lo_v8si (target
, subtarget
));
30176 target
= subtarget
;
30184 for (i
= 0, d
= bdesc_special_args
;
30185 i
< ARRAY_SIZE (bdesc_special_args
);
30187 if (d
->code
== fcode
)
30188 return ix86_expand_special_args_builtin (d
, exp
, target
);
30190 for (i
= 0, d
= bdesc_args
;
30191 i
< ARRAY_SIZE (bdesc_args
);
30193 if (d
->code
== fcode
)
30196 case IX86_BUILTIN_FABSQ
:
30197 case IX86_BUILTIN_COPYSIGNQ
:
30199 /* Emit a normal call if SSE2 isn't available. */
30200 return expand_call (exp
, target
, ignore
);
30202 return ix86_expand_args_builtin (d
, exp
, target
);
30205 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
30206 if (d
->code
== fcode
)
30207 return ix86_expand_sse_comi (d
, exp
, target
);
30209 for (i
= 0, d
= bdesc_pcmpestr
;
30210 i
< ARRAY_SIZE (bdesc_pcmpestr
);
30212 if (d
->code
== fcode
)
30213 return ix86_expand_sse_pcmpestr (d
, exp
, target
);
30215 for (i
= 0, d
= bdesc_pcmpistr
;
30216 i
< ARRAY_SIZE (bdesc_pcmpistr
);
30218 if (d
->code
== fcode
)
30219 return ix86_expand_sse_pcmpistr (d
, exp
, target
);
30221 for (i
= 0, d
= bdesc_multi_arg
; i
< ARRAY_SIZE (bdesc_multi_arg
); i
++, d
++)
30222 if (d
->code
== fcode
)
30223 return ix86_expand_multi_arg_builtin (d
->icode
, exp
, target
,
30224 (enum ix86_builtin_func_type
)
30225 d
->flag
, d
->comparison
);
30227 gcc_unreachable ();
30230 /* Returns a function decl for a vectorized version of the builtin function
30231 with builtin function code FN and the result vector type TYPE, or NULL_TREE
30232 if it is not available. */
30235 ix86_builtin_vectorized_function (tree fndecl
, tree type_out
,
30238 enum machine_mode in_mode
, out_mode
;
30240 enum built_in_function fn
= DECL_FUNCTION_CODE (fndecl
);
30242 if (TREE_CODE (type_out
) != VECTOR_TYPE
30243 || TREE_CODE (type_in
) != VECTOR_TYPE
30244 || DECL_BUILT_IN_CLASS (fndecl
) != BUILT_IN_NORMAL
)
30247 out_mode
= TYPE_MODE (TREE_TYPE (type_out
));
30248 out_n
= TYPE_VECTOR_SUBPARTS (type_out
);
30249 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
30250 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
30254 case BUILT_IN_SQRT
:
30255 if (out_mode
== DFmode
&& in_mode
== DFmode
)
30257 if (out_n
== 2 && in_n
== 2)
30258 return ix86_builtins
[IX86_BUILTIN_SQRTPD
];
30259 else if (out_n
== 4 && in_n
== 4)
30260 return ix86_builtins
[IX86_BUILTIN_SQRTPD256
];
30264 case BUILT_IN_SQRTF
:
30265 if (out_mode
== SFmode
&& in_mode
== SFmode
)
30267 if (out_n
== 4 && in_n
== 4)
30268 return ix86_builtins
[IX86_BUILTIN_SQRTPS_NR
];
30269 else if (out_n
== 8 && in_n
== 8)
30270 return ix86_builtins
[IX86_BUILTIN_SQRTPS_NR256
];
30274 case BUILT_IN_IFLOOR
:
30275 case BUILT_IN_LFLOOR
:
30276 case BUILT_IN_LLFLOOR
:
30277 /* The round insn does not trap on denormals. */
30278 if (flag_trapping_math
|| !TARGET_ROUND
)
30281 if (out_mode
== SImode
&& in_mode
== DFmode
)
30283 if (out_n
== 4 && in_n
== 2)
30284 return ix86_builtins
[IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX
];
30285 else if (out_n
== 8 && in_n
== 4)
30286 return ix86_builtins
[IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256
];
30290 case BUILT_IN_IFLOORF
:
30291 case BUILT_IN_LFLOORF
:
30292 case BUILT_IN_LLFLOORF
:
30293 /* The round insn does not trap on denormals. */
30294 if (flag_trapping_math
|| !TARGET_ROUND
)
30297 if (out_mode
== SImode
&& in_mode
== SFmode
)
30299 if (out_n
== 4 && in_n
== 4)
30300 return ix86_builtins
[IX86_BUILTIN_FLOORPS_SFIX
];
30301 else if (out_n
== 8 && in_n
== 8)
30302 return ix86_builtins
[IX86_BUILTIN_FLOORPS_SFIX256
];
30306 case BUILT_IN_ICEIL
:
30307 case BUILT_IN_LCEIL
:
30308 case BUILT_IN_LLCEIL
:
30309 /* The round insn does not trap on denormals. */
30310 if (flag_trapping_math
|| !TARGET_ROUND
)
30313 if (out_mode
== SImode
&& in_mode
== DFmode
)
30315 if (out_n
== 4 && in_n
== 2)
30316 return ix86_builtins
[IX86_BUILTIN_CEILPD_VEC_PACK_SFIX
];
30317 else if (out_n
== 8 && in_n
== 4)
30318 return ix86_builtins
[IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256
];
30322 case BUILT_IN_ICEILF
:
30323 case BUILT_IN_LCEILF
:
30324 case BUILT_IN_LLCEILF
:
30325 /* The round insn does not trap on denormals. */
30326 if (flag_trapping_math
|| !TARGET_ROUND
)
30329 if (out_mode
== SImode
&& in_mode
== SFmode
)
30331 if (out_n
== 4 && in_n
== 4)
30332 return ix86_builtins
[IX86_BUILTIN_CEILPS_SFIX
];
30333 else if (out_n
== 8 && in_n
== 8)
30334 return ix86_builtins
[IX86_BUILTIN_CEILPS_SFIX256
];
30338 case BUILT_IN_IRINT
:
30339 case BUILT_IN_LRINT
:
30340 case BUILT_IN_LLRINT
:
30341 if (out_mode
== SImode
&& in_mode
== DFmode
)
30343 if (out_n
== 4 && in_n
== 2)
30344 return ix86_builtins
[IX86_BUILTIN_VEC_PACK_SFIX
];
30345 else if (out_n
== 8 && in_n
== 4)
30346 return ix86_builtins
[IX86_BUILTIN_VEC_PACK_SFIX256
];
30350 case BUILT_IN_IRINTF
:
30351 case BUILT_IN_LRINTF
:
30352 case BUILT_IN_LLRINTF
:
30353 if (out_mode
== SImode
&& in_mode
== SFmode
)
30355 if (out_n
== 4 && in_n
== 4)
30356 return ix86_builtins
[IX86_BUILTIN_CVTPS2DQ
];
30357 else if (out_n
== 8 && in_n
== 8)
30358 return ix86_builtins
[IX86_BUILTIN_CVTPS2DQ256
];
30362 case BUILT_IN_IROUND
:
30363 case BUILT_IN_LROUND
:
30364 case BUILT_IN_LLROUND
:
30365 /* The round insn does not trap on denormals. */
30366 if (flag_trapping_math
|| !TARGET_ROUND
)
30369 if (out_mode
== SImode
&& in_mode
== DFmode
)
30371 if (out_n
== 4 && in_n
== 2)
30372 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX
];
30373 else if (out_n
== 8 && in_n
== 4)
30374 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256
];
30378 case BUILT_IN_IROUNDF
:
30379 case BUILT_IN_LROUNDF
:
30380 case BUILT_IN_LLROUNDF
:
30381 /* The round insn does not trap on denormals. */
30382 if (flag_trapping_math
|| !TARGET_ROUND
)
30385 if (out_mode
== SImode
&& in_mode
== SFmode
)
30387 if (out_n
== 4 && in_n
== 4)
30388 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ_SFIX
];
30389 else if (out_n
== 8 && in_n
== 8)
30390 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ_SFIX256
];
30394 case BUILT_IN_COPYSIGN
:
30395 if (out_mode
== DFmode
&& in_mode
== DFmode
)
30397 if (out_n
== 2 && in_n
== 2)
30398 return ix86_builtins
[IX86_BUILTIN_CPYSGNPD
];
30399 else if (out_n
== 4 && in_n
== 4)
30400 return ix86_builtins
[IX86_BUILTIN_CPYSGNPD256
];
30404 case BUILT_IN_COPYSIGNF
:
30405 if (out_mode
== SFmode
&& in_mode
== SFmode
)
30407 if (out_n
== 4 && in_n
== 4)
30408 return ix86_builtins
[IX86_BUILTIN_CPYSGNPS
];
30409 else if (out_n
== 8 && in_n
== 8)
30410 return ix86_builtins
[IX86_BUILTIN_CPYSGNPS256
];
30414 case BUILT_IN_FLOOR
:
30415 /* The round insn does not trap on denormals. */
30416 if (flag_trapping_math
|| !TARGET_ROUND
)
30419 if (out_mode
== DFmode
&& in_mode
== DFmode
)
30421 if (out_n
== 2 && in_n
== 2)
30422 return ix86_builtins
[IX86_BUILTIN_FLOORPD
];
30423 else if (out_n
== 4 && in_n
== 4)
30424 return ix86_builtins
[IX86_BUILTIN_FLOORPD256
];
30428 case BUILT_IN_FLOORF
:
30429 /* The round insn does not trap on denormals. */
30430 if (flag_trapping_math
|| !TARGET_ROUND
)
30433 if (out_mode
== SFmode
&& in_mode
== SFmode
)
30435 if (out_n
== 4 && in_n
== 4)
30436 return ix86_builtins
[IX86_BUILTIN_FLOORPS
];
30437 else if (out_n
== 8 && in_n
== 8)
30438 return ix86_builtins
[IX86_BUILTIN_FLOORPS256
];
30442 case BUILT_IN_CEIL
:
30443 /* The round insn does not trap on denormals. */
30444 if (flag_trapping_math
|| !TARGET_ROUND
)
30447 if (out_mode
== DFmode
&& in_mode
== DFmode
)
30449 if (out_n
== 2 && in_n
== 2)
30450 return ix86_builtins
[IX86_BUILTIN_CEILPD
];
30451 else if (out_n
== 4 && in_n
== 4)
30452 return ix86_builtins
[IX86_BUILTIN_CEILPD256
];
30456 case BUILT_IN_CEILF
:
30457 /* The round insn does not trap on denormals. */
30458 if (flag_trapping_math
|| !TARGET_ROUND
)
30461 if (out_mode
== SFmode
&& in_mode
== SFmode
)
30463 if (out_n
== 4 && in_n
== 4)
30464 return ix86_builtins
[IX86_BUILTIN_CEILPS
];
30465 else if (out_n
== 8 && in_n
== 8)
30466 return ix86_builtins
[IX86_BUILTIN_CEILPS256
];
30470 case BUILT_IN_TRUNC
:
30471 /* The round insn does not trap on denormals. */
30472 if (flag_trapping_math
|| !TARGET_ROUND
)
30475 if (out_mode
== DFmode
&& in_mode
== DFmode
)
30477 if (out_n
== 2 && in_n
== 2)
30478 return ix86_builtins
[IX86_BUILTIN_TRUNCPD
];
30479 else if (out_n
== 4 && in_n
== 4)
30480 return ix86_builtins
[IX86_BUILTIN_TRUNCPD256
];
30484 case BUILT_IN_TRUNCF
:
30485 /* The round insn does not trap on denormals. */
30486 if (flag_trapping_math
|| !TARGET_ROUND
)
30489 if (out_mode
== SFmode
&& in_mode
== SFmode
)
30491 if (out_n
== 4 && in_n
== 4)
30492 return ix86_builtins
[IX86_BUILTIN_TRUNCPS
];
30493 else if (out_n
== 8 && in_n
== 8)
30494 return ix86_builtins
[IX86_BUILTIN_TRUNCPS256
];
30498 case BUILT_IN_RINT
:
30499 /* The round insn does not trap on denormals. */
30500 if (flag_trapping_math
|| !TARGET_ROUND
)
30503 if (out_mode
== DFmode
&& in_mode
== DFmode
)
30505 if (out_n
== 2 && in_n
== 2)
30506 return ix86_builtins
[IX86_BUILTIN_RINTPD
];
30507 else if (out_n
== 4 && in_n
== 4)
30508 return ix86_builtins
[IX86_BUILTIN_RINTPD256
];
30512 case BUILT_IN_RINTF
:
30513 /* The round insn does not trap on denormals. */
30514 if (flag_trapping_math
|| !TARGET_ROUND
)
30517 if (out_mode
== SFmode
&& in_mode
== SFmode
)
30519 if (out_n
== 4 && in_n
== 4)
30520 return ix86_builtins
[IX86_BUILTIN_RINTPS
];
30521 else if (out_n
== 8 && in_n
== 8)
30522 return ix86_builtins
[IX86_BUILTIN_RINTPS256
];
30526 case BUILT_IN_ROUND
:
30527 /* The round insn does not trap on denormals. */
30528 if (flag_trapping_math
|| !TARGET_ROUND
)
30531 if (out_mode
== DFmode
&& in_mode
== DFmode
)
30533 if (out_n
== 2 && in_n
== 2)
30534 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ
];
30535 else if (out_n
== 4 && in_n
== 4)
30536 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ256
];
30540 case BUILT_IN_ROUNDF
:
30541 /* The round insn does not trap on denormals. */
30542 if (flag_trapping_math
|| !TARGET_ROUND
)
30545 if (out_mode
== SFmode
&& in_mode
== SFmode
)
30547 if (out_n
== 4 && in_n
== 4)
30548 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ
];
30549 else if (out_n
== 8 && in_n
== 8)
30550 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ256
];
30555 if (out_mode
== DFmode
&& in_mode
== DFmode
)
30557 if (out_n
== 2 && in_n
== 2)
30558 return ix86_builtins
[IX86_BUILTIN_VFMADDPD
];
30559 if (out_n
== 4 && in_n
== 4)
30560 return ix86_builtins
[IX86_BUILTIN_VFMADDPD256
];
30564 case BUILT_IN_FMAF
:
30565 if (out_mode
== SFmode
&& in_mode
== SFmode
)
30567 if (out_n
== 4 && in_n
== 4)
30568 return ix86_builtins
[IX86_BUILTIN_VFMADDPS
];
30569 if (out_n
== 8 && in_n
== 8)
30570 return ix86_builtins
[IX86_BUILTIN_VFMADDPS256
];
30578 /* Dispatch to a handler for a vectorization library. */
30579 if (ix86_veclib_handler
)
30580 return ix86_veclib_handler ((enum built_in_function
) fn
, type_out
,
30586 /* Handler for an SVML-style interface to
30587 a library with vectorized intrinsics. */
30590 ix86_veclibabi_svml (enum built_in_function fn
, tree type_out
, tree type_in
)
30593 tree fntype
, new_fndecl
, args
;
30596 enum machine_mode el_mode
, in_mode
;
30599 /* The SVML is suitable for unsafe math only. */
30600 if (!flag_unsafe_math_optimizations
)
30603 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
30604 n
= TYPE_VECTOR_SUBPARTS (type_out
);
30605 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
30606 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
30607 if (el_mode
!= in_mode
30615 case BUILT_IN_LOG10
:
30617 case BUILT_IN_TANH
:
30619 case BUILT_IN_ATAN
:
30620 case BUILT_IN_ATAN2
:
30621 case BUILT_IN_ATANH
:
30622 case BUILT_IN_CBRT
:
30623 case BUILT_IN_SINH
:
30625 case BUILT_IN_ASINH
:
30626 case BUILT_IN_ASIN
:
30627 case BUILT_IN_COSH
:
30629 case BUILT_IN_ACOSH
:
30630 case BUILT_IN_ACOS
:
30631 if (el_mode
!= DFmode
|| n
!= 2)
30635 case BUILT_IN_EXPF
:
30636 case BUILT_IN_LOGF
:
30637 case BUILT_IN_LOG10F
:
30638 case BUILT_IN_POWF
:
30639 case BUILT_IN_TANHF
:
30640 case BUILT_IN_TANF
:
30641 case BUILT_IN_ATANF
:
30642 case BUILT_IN_ATAN2F
:
30643 case BUILT_IN_ATANHF
:
30644 case BUILT_IN_CBRTF
:
30645 case BUILT_IN_SINHF
:
30646 case BUILT_IN_SINF
:
30647 case BUILT_IN_ASINHF
:
30648 case BUILT_IN_ASINF
:
30649 case BUILT_IN_COSHF
:
30650 case BUILT_IN_COSF
:
30651 case BUILT_IN_ACOSHF
:
30652 case BUILT_IN_ACOSF
:
30653 if (el_mode
!= SFmode
|| n
!= 4)
30661 bname
= IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn
)));
30663 if (fn
== BUILT_IN_LOGF
)
30664 strcpy (name
, "vmlsLn4");
30665 else if (fn
== BUILT_IN_LOG
)
30666 strcpy (name
, "vmldLn2");
30669 sprintf (name
, "vmls%s", bname
+10);
30670 name
[strlen (name
)-1] = '4';
30673 sprintf (name
, "vmld%s2", bname
+10);
30675 /* Convert to uppercase. */
30679 for (args
= DECL_ARGUMENTS (builtin_decl_implicit (fn
));
30681 args
= TREE_CHAIN (args
))
30685 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
30687 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
30689 /* Build a function declaration for the vectorized function. */
30690 new_fndecl
= build_decl (BUILTINS_LOCATION
,
30691 FUNCTION_DECL
, get_identifier (name
), fntype
);
30692 TREE_PUBLIC (new_fndecl
) = 1;
30693 DECL_EXTERNAL (new_fndecl
) = 1;
30694 DECL_IS_NOVOPS (new_fndecl
) = 1;
30695 TREE_READONLY (new_fndecl
) = 1;
30700 /* Handler for an ACML-style interface to
30701 a library with vectorized intrinsics. */
30704 ix86_veclibabi_acml (enum built_in_function fn
, tree type_out
, tree type_in
)
30706 char name
[20] = "__vr.._";
30707 tree fntype
, new_fndecl
, args
;
30710 enum machine_mode el_mode
, in_mode
;
30713 /* The ACML is 64bits only and suitable for unsafe math only as
30714 it does not correctly support parts of IEEE with the required
30715 precision such as denormals. */
30717 || !flag_unsafe_math_optimizations
)
30720 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
30721 n
= TYPE_VECTOR_SUBPARTS (type_out
);
30722 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
30723 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
30724 if (el_mode
!= in_mode
30734 case BUILT_IN_LOG2
:
30735 case BUILT_IN_LOG10
:
30738 if (el_mode
!= DFmode
30743 case BUILT_IN_SINF
:
30744 case BUILT_IN_COSF
:
30745 case BUILT_IN_EXPF
:
30746 case BUILT_IN_POWF
:
30747 case BUILT_IN_LOGF
:
30748 case BUILT_IN_LOG2F
:
30749 case BUILT_IN_LOG10F
:
30752 if (el_mode
!= SFmode
30761 bname
= IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn
)));
30762 sprintf (name
+ 7, "%s", bname
+10);
30765 for (args
= DECL_ARGUMENTS (builtin_decl_implicit (fn
));
30767 args
= TREE_CHAIN (args
))
30771 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
30773 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
30775 /* Build a function declaration for the vectorized function. */
30776 new_fndecl
= build_decl (BUILTINS_LOCATION
,
30777 FUNCTION_DECL
, get_identifier (name
), fntype
);
30778 TREE_PUBLIC (new_fndecl
) = 1;
30779 DECL_EXTERNAL (new_fndecl
) = 1;
30780 DECL_IS_NOVOPS (new_fndecl
) = 1;
30781 TREE_READONLY (new_fndecl
) = 1;
30786 /* Returns a decl of a function that implements gather load with
30787 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
30788 Return NULL_TREE if it is not available. */
30791 ix86_vectorize_builtin_gather (const_tree mem_vectype
,
30792 const_tree index_type
, int scale
)
30795 enum ix86_builtins code
;
30800 if ((TREE_CODE (index_type
) != INTEGER_TYPE
30801 && !POINTER_TYPE_P (index_type
))
30802 || (TYPE_MODE (index_type
) != SImode
30803 && TYPE_MODE (index_type
) != DImode
))
30806 if (TYPE_PRECISION (index_type
) > POINTER_SIZE
)
30809 /* v*gather* insn sign extends index to pointer mode. */
30810 if (TYPE_PRECISION (index_type
) < POINTER_SIZE
30811 && TYPE_UNSIGNED (index_type
))
30816 || (scale
& (scale
- 1)) != 0)
30819 si
= TYPE_MODE (index_type
) == SImode
;
30820 switch (TYPE_MODE (mem_vectype
))
30823 code
= si
? IX86_BUILTIN_GATHERSIV2DF
: IX86_BUILTIN_GATHERDIV2DF
;
30826 code
= si
? IX86_BUILTIN_GATHERALTSIV4DF
: IX86_BUILTIN_GATHERDIV4DF
;
30829 code
= si
? IX86_BUILTIN_GATHERSIV2DI
: IX86_BUILTIN_GATHERDIV2DI
;
30832 code
= si
? IX86_BUILTIN_GATHERALTSIV4DI
: IX86_BUILTIN_GATHERDIV4DI
;
30835 code
= si
? IX86_BUILTIN_GATHERSIV4SF
: IX86_BUILTIN_GATHERDIV4SF
;
30838 code
= si
? IX86_BUILTIN_GATHERSIV8SF
: IX86_BUILTIN_GATHERALTDIV8SF
;
30841 code
= si
? IX86_BUILTIN_GATHERSIV4SI
: IX86_BUILTIN_GATHERDIV4SI
;
30844 code
= si
? IX86_BUILTIN_GATHERSIV8SI
: IX86_BUILTIN_GATHERALTDIV8SI
;
30850 return ix86_builtins
[code
];
30853 /* Returns a code for a target-specific builtin that implements
30854 reciprocal of the function, or NULL_TREE if not available. */
30857 ix86_builtin_reciprocal (unsigned int fn
, bool md_fn
,
30858 bool sqrt ATTRIBUTE_UNUSED
)
30860 if (! (TARGET_SSE_MATH
&& !optimize_insn_for_size_p ()
30861 && flag_finite_math_only
&& !flag_trapping_math
30862 && flag_unsafe_math_optimizations
))
30866 /* Machine dependent builtins. */
30869 /* Vectorized version of sqrt to rsqrt conversion. */
30870 case IX86_BUILTIN_SQRTPS_NR
:
30871 return ix86_builtins
[IX86_BUILTIN_RSQRTPS_NR
];
30873 case IX86_BUILTIN_SQRTPS_NR256
:
30874 return ix86_builtins
[IX86_BUILTIN_RSQRTPS_NR256
];
30880 /* Normal builtins. */
30883 /* Sqrt to rsqrt conversion. */
30884 case BUILT_IN_SQRTF
:
30885 return ix86_builtins
[IX86_BUILTIN_RSQRTF
];
30892 /* Helper for avx_vpermilps256_operand et al. This is also used by
30893 the expansion functions to turn the parallel back into a mask.
30894 The return value is 0 for no match and the imm8+1 for a match. */
30897 avx_vpermilp_parallel (rtx par
, enum machine_mode mode
)
30899 unsigned i
, nelt
= GET_MODE_NUNITS (mode
);
30901 unsigned char ipar
[8];
30903 if (XVECLEN (par
, 0) != (int) nelt
)
30906 /* Validate that all of the elements are constants, and not totally
30907 out of range. Copy the data into an integral array to make the
30908 subsequent checks easier. */
30909 for (i
= 0; i
< nelt
; ++i
)
30911 rtx er
= XVECEXP (par
, 0, i
);
30912 unsigned HOST_WIDE_INT ei
;
30914 if (!CONST_INT_P (er
))
30925 /* In the 256-bit DFmode case, we can only move elements within
30927 for (i
= 0; i
< 2; ++i
)
30931 mask
|= ipar
[i
] << i
;
30933 for (i
= 2; i
< 4; ++i
)
30937 mask
|= (ipar
[i
] - 2) << i
;
30942 /* In the 256-bit SFmode case, we have full freedom of movement
30943 within the low 128-bit lane, but the high 128-bit lane must
30944 mirror the exact same pattern. */
30945 for (i
= 0; i
< 4; ++i
)
30946 if (ipar
[i
] + 4 != ipar
[i
+ 4])
30953 /* In the 128-bit case, we've full freedom in the placement of
30954 the elements from the source operand. */
30955 for (i
= 0; i
< nelt
; ++i
)
30956 mask
|= ipar
[i
] << (i
* (nelt
/ 2));
30960 gcc_unreachable ();
30963 /* Make sure success has a non-zero value by adding one. */
30967 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
30968 the expansion functions to turn the parallel back into a mask.
30969 The return value is 0 for no match and the imm8+1 for a match. */
30972 avx_vperm2f128_parallel (rtx par
, enum machine_mode mode
)
30974 unsigned i
, nelt
= GET_MODE_NUNITS (mode
), nelt2
= nelt
/ 2;
30976 unsigned char ipar
[8];
30978 if (XVECLEN (par
, 0) != (int) nelt
)
30981 /* Validate that all of the elements are constants, and not totally
30982 out of range. Copy the data into an integral array to make the
30983 subsequent checks easier. */
30984 for (i
= 0; i
< nelt
; ++i
)
30986 rtx er
= XVECEXP (par
, 0, i
);
30987 unsigned HOST_WIDE_INT ei
;
30989 if (!CONST_INT_P (er
))
30992 if (ei
>= 2 * nelt
)
30997 /* Validate that the halves of the permute are halves. */
30998 for (i
= 0; i
< nelt2
- 1; ++i
)
30999 if (ipar
[i
] + 1 != ipar
[i
+ 1])
31001 for (i
= nelt2
; i
< nelt
- 1; ++i
)
31002 if (ipar
[i
] + 1 != ipar
[i
+ 1])
31005 /* Reconstruct the mask. */
31006 for (i
= 0; i
< 2; ++i
)
31008 unsigned e
= ipar
[i
* nelt2
];
31012 mask
|= e
<< (i
* 4);
31015 /* Make sure success has a non-zero value by adding one. */
31019 /* Store OPERAND to the memory after reload is completed. This means
31020 that we can't easily use assign_stack_local. */
31022 ix86_force_to_memory (enum machine_mode mode
, rtx operand
)
31026 gcc_assert (reload_completed
);
31027 if (ix86_using_red_zone ())
31029 result
= gen_rtx_MEM (mode
,
31030 gen_rtx_PLUS (Pmode
,
31032 GEN_INT (-RED_ZONE_SIZE
)));
31033 emit_move_insn (result
, operand
);
31035 else if (TARGET_64BIT
)
31041 operand
= gen_lowpart (DImode
, operand
);
31045 gen_rtx_SET (VOIDmode
,
31046 gen_rtx_MEM (DImode
,
31047 gen_rtx_PRE_DEC (DImode
,
31048 stack_pointer_rtx
)),
31052 gcc_unreachable ();
31054 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
31063 split_double_mode (mode
, &operand
, 1, operands
, operands
+ 1);
31065 gen_rtx_SET (VOIDmode
,
31066 gen_rtx_MEM (SImode
,
31067 gen_rtx_PRE_DEC (Pmode
,
31068 stack_pointer_rtx
)),
31071 gen_rtx_SET (VOIDmode
,
31072 gen_rtx_MEM (SImode
,
31073 gen_rtx_PRE_DEC (Pmode
,
31074 stack_pointer_rtx
)),
31079 /* Store HImodes as SImodes. */
31080 operand
= gen_lowpart (SImode
, operand
);
31084 gen_rtx_SET (VOIDmode
,
31085 gen_rtx_MEM (GET_MODE (operand
),
31086 gen_rtx_PRE_DEC (SImode
,
31087 stack_pointer_rtx
)),
31091 gcc_unreachable ();
31093 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
31098 /* Free operand from the memory. */
31100 ix86_free_from_memory (enum machine_mode mode
)
31102 if (!ix86_using_red_zone ())
31106 if (mode
== DImode
|| TARGET_64BIT
)
31110 /* Use LEA to deallocate stack space. In peephole2 it will be converted
31111 to pop or add instruction if registers are available. */
31112 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
31113 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
31118 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
31120 Put float CONST_DOUBLE in the constant pool instead of fp regs.
31121 QImode must go into class Q_REGS.
31122 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
31123 movdf to do mem-to-mem moves through integer regs. */
31126 ix86_preferred_reload_class (rtx x
, reg_class_t regclass
)
31128 enum machine_mode mode
= GET_MODE (x
);
31130 /* We're only allowed to return a subclass of CLASS. Many of the
31131 following checks fail for NO_REGS, so eliminate that early. */
31132 if (regclass
== NO_REGS
)
31135 /* All classes can load zeros. */
31136 if (x
== CONST0_RTX (mode
))
31139 /* Force constants into memory if we are loading a (nonzero) constant into
31140 an MMX or SSE register. This is because there are no MMX/SSE instructions
31141 to load from a constant. */
31143 && (MAYBE_MMX_CLASS_P (regclass
) || MAYBE_SSE_CLASS_P (regclass
)))
31146 /* Prefer SSE regs only, if we can use them for math. */
31147 if (TARGET_SSE_MATH
&& !TARGET_MIX_SSE_I387
&& SSE_FLOAT_MODE_P (mode
))
31148 return SSE_CLASS_P (regclass
) ? regclass
: NO_REGS
;
31150 /* Floating-point constants need more complex checks. */
31151 if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) != VOIDmode
)
31153 /* General regs can load everything. */
31154 if (reg_class_subset_p (regclass
, GENERAL_REGS
))
31157 /* Floats can load 0 and 1 plus some others. Note that we eliminated
31158 zero above. We only want to wind up preferring 80387 registers if
31159 we plan on doing computation with them. */
31161 && standard_80387_constant_p (x
) > 0)
31163 /* Limit class to non-sse. */
31164 if (regclass
== FLOAT_SSE_REGS
)
31166 if (regclass
== FP_TOP_SSE_REGS
)
31168 if (regclass
== FP_SECOND_SSE_REGS
)
31169 return FP_SECOND_REG
;
31170 if (regclass
== FLOAT_INT_REGS
|| regclass
== FLOAT_REGS
)
31177 /* Generally when we see PLUS here, it's the function invariant
31178 (plus soft-fp const_int). Which can only be computed into general
31180 if (GET_CODE (x
) == PLUS
)
31181 return reg_class_subset_p (regclass
, GENERAL_REGS
) ? regclass
: NO_REGS
;
31183 /* QImode constants are easy to load, but non-constant QImode data
31184 must go into Q_REGS. */
31185 if (GET_MODE (x
) == QImode
&& !CONSTANT_P (x
))
31187 if (reg_class_subset_p (regclass
, Q_REGS
))
31189 if (reg_class_subset_p (Q_REGS
, regclass
))
31197 /* Discourage putting floating-point values in SSE registers unless
31198 SSE math is being used, and likewise for the 387 registers. */
31200 ix86_preferred_output_reload_class (rtx x
, reg_class_t regclass
)
31202 enum machine_mode mode
= GET_MODE (x
);
31204 /* Restrict the output reload class to the register bank that we are doing
31205 math on. If we would like not to return a subset of CLASS, reject this
31206 alternative: if reload cannot do this, it will still use its choice. */
31207 mode
= GET_MODE (x
);
31208 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
31209 return MAYBE_SSE_CLASS_P (regclass
) ? SSE_REGS
: NO_REGS
;
31211 if (X87_FLOAT_MODE_P (mode
))
31213 if (regclass
== FP_TOP_SSE_REGS
)
31215 else if (regclass
== FP_SECOND_SSE_REGS
)
31216 return FP_SECOND_REG
;
31218 return FLOAT_CLASS_P (regclass
) ? regclass
: NO_REGS
;
31225 ix86_secondary_reload (bool in_p
, rtx x
, reg_class_t rclass
,
31226 enum machine_mode mode
, secondary_reload_info
*sri
)
31228 /* Double-word spills from general registers to non-offsettable memory
31229 references (zero-extended addresses) require special handling. */
31232 && GET_MODE_SIZE (mode
) > UNITS_PER_WORD
31233 && rclass
== GENERAL_REGS
31234 && !offsettable_memref_p (x
))
31237 ? CODE_FOR_reload_noff_load
31238 : CODE_FOR_reload_noff_store
);
31239 /* Add the cost of moving address to a temporary. */
31240 sri
->extra_cost
= 1;
31245 /* QImode spills from non-QI registers require
31246 intermediate register on 32bit targets. */
31248 && !in_p
&& mode
== QImode
31249 && (rclass
== GENERAL_REGS
31250 || rclass
== LEGACY_REGS
31251 || rclass
== INDEX_REGS
))
31260 if (regno
>= FIRST_PSEUDO_REGISTER
|| GET_CODE (x
) == SUBREG
)
31261 regno
= true_regnum (x
);
31263 /* Return Q_REGS if the operand is in memory. */
31268 /* This condition handles corner case where an expression involving
31269 pointers gets vectorized. We're trying to use the address of a
31270 stack slot as a vector initializer.
31272 (set (reg:V2DI 74 [ vect_cst_.2 ])
31273 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
31275 Eventually frame gets turned into sp+offset like this:
31277 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
31278 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
31279 (const_int 392 [0x188]))))
31281 That later gets turned into:
31283 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
31284 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
31285 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
31287 We'll have the following reload recorded:
31289 Reload 0: reload_in (DI) =
31290 (plus:DI (reg/f:DI 7 sp)
31291 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
31292 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
31293 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
31294 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
31295 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
31296 reload_reg_rtx: (reg:V2DI 22 xmm1)
31298 Which isn't going to work since SSE instructions can't handle scalar
31299 additions. Returning GENERAL_REGS forces the addition into integer
31300 register and reload can handle subsequent reloads without problems. */
31302 if (in_p
&& GET_CODE (x
) == PLUS
31303 && SSE_CLASS_P (rclass
)
31304 && SCALAR_INT_MODE_P (mode
))
31305 return GENERAL_REGS
;
31310 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
31313 ix86_class_likely_spilled_p (reg_class_t rclass
)
31324 case SSE_FIRST_REG
:
31326 case FP_SECOND_REG
:
31336 /* If we are copying between general and FP registers, we need a memory
31337 location. The same is true for SSE and MMX registers.
31339 To optimize register_move_cost performance, allow inline variant.
31341 The macro can't work reliably when one of the CLASSES is class containing
31342 registers from multiple units (SSE, MMX, integer). We avoid this by never
31343 combining those units in single alternative in the machine description.
31344 Ensure that this constraint holds to avoid unexpected surprises.
31346 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
31347 enforce these sanity checks. */
31350 inline_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
31351 enum machine_mode mode
, int strict
)
31353 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
31354 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
31355 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
31356 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
31357 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
31358 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
))
31360 gcc_assert (!strict
);
31364 if (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
))
31367 /* ??? This is a lie. We do have moves between mmx/general, and for
31368 mmx/sse2. But by saying we need secondary memory we discourage the
31369 register allocator from using the mmx registers unless needed. */
31370 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
))
31373 if (SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
31375 /* SSE1 doesn't have any direct moves from other classes. */
31379 /* If the target says that inter-unit moves are more expensive
31380 than moving through memory, then don't generate them. */
31381 if (!TARGET_INTER_UNIT_MOVES
)
31384 /* Between SSE and general, we have moves no larger than word size. */
31385 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
31393 ix86_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
31394 enum machine_mode mode
, int strict
)
31396 return inline_secondary_memory_needed (class1
, class2
, mode
, strict
);
31399 /* Implement the TARGET_CLASS_MAX_NREGS hook.
31401 On the 80386, this is the size of MODE in words,
31402 except in the FP regs, where a single reg is always enough. */
31404 static unsigned char
31405 ix86_class_max_nregs (reg_class_t rclass
, enum machine_mode mode
)
31407 if (MAYBE_INTEGER_CLASS_P (rclass
))
31409 if (mode
== XFmode
)
31410 return (TARGET_64BIT
? 2 : 3);
31411 else if (mode
== XCmode
)
31412 return (TARGET_64BIT
? 4 : 6);
31414 return ((GET_MODE_SIZE (mode
) + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
);
31418 if (COMPLEX_MODE_P (mode
))
31425 /* Return true if the registers in CLASS cannot represent the change from
31426 modes FROM to TO. */
31429 ix86_cannot_change_mode_class (enum machine_mode from
, enum machine_mode to
,
31430 enum reg_class regclass
)
31435 /* x87 registers can't do subreg at all, as all values are reformatted
31436 to extended precision. */
31437 if (MAYBE_FLOAT_CLASS_P (regclass
))
31440 if (MAYBE_SSE_CLASS_P (regclass
) || MAYBE_MMX_CLASS_P (regclass
))
31442 /* Vector registers do not support QI or HImode loads. If we don't
31443 disallow a change to these modes, reload will assume it's ok to
31444 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
31445 the vec_dupv4hi pattern. */
31446 if (GET_MODE_SIZE (from
) < 4)
31449 /* Vector registers do not support subreg with nonzero offsets, which
31450 are otherwise valid for integer registers. Since we can't see
31451 whether we have a nonzero offset from here, prohibit all
31452 nonparadoxical subregs changing size. */
31453 if (GET_MODE_SIZE (to
) < GET_MODE_SIZE (from
))
31460 /* Return the cost of moving data of mode M between a
31461 register and memory. A value of 2 is the default; this cost is
31462 relative to those in `REGISTER_MOVE_COST'.
31464 This function is used extensively by register_move_cost that is used to
31465 build tables at startup. Make it inline in this case.
31466 When IN is 2, return maximum of in and out move cost.
31468 If moving between registers and memory is more expensive than
31469 between two registers, you should define this macro to express the
31472 Model also increased moving costs of QImode registers in non
31476 inline_memory_move_cost (enum machine_mode mode
, enum reg_class regclass
,
31480 if (FLOAT_CLASS_P (regclass
))
31498 return MAX (ix86_cost
->fp_load
[index
], ix86_cost
->fp_store
[index
]);
31499 return in
? ix86_cost
->fp_load
[index
] : ix86_cost
->fp_store
[index
];
31501 if (SSE_CLASS_P (regclass
))
31504 switch (GET_MODE_SIZE (mode
))
31519 return MAX (ix86_cost
->sse_load
[index
], ix86_cost
->sse_store
[index
]);
31520 return in
? ix86_cost
->sse_load
[index
] : ix86_cost
->sse_store
[index
];
31522 if (MMX_CLASS_P (regclass
))
31525 switch (GET_MODE_SIZE (mode
))
31537 return MAX (ix86_cost
->mmx_load
[index
], ix86_cost
->mmx_store
[index
]);
31538 return in
? ix86_cost
->mmx_load
[index
] : ix86_cost
->mmx_store
[index
];
31540 switch (GET_MODE_SIZE (mode
))
31543 if (Q_CLASS_P (regclass
) || TARGET_64BIT
)
31546 return ix86_cost
->int_store
[0];
31547 if (TARGET_PARTIAL_REG_DEPENDENCY
31548 && optimize_function_for_speed_p (cfun
))
31549 cost
= ix86_cost
->movzbl_load
;
31551 cost
= ix86_cost
->int_load
[0];
31553 return MAX (cost
, ix86_cost
->int_store
[0]);
31559 return MAX (ix86_cost
->movzbl_load
, ix86_cost
->int_store
[0] + 4);
31561 return ix86_cost
->movzbl_load
;
31563 return ix86_cost
->int_store
[0] + 4;
31568 return MAX (ix86_cost
->int_load
[1], ix86_cost
->int_store
[1]);
31569 return in
? ix86_cost
->int_load
[1] : ix86_cost
->int_store
[1];
31571 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
31572 if (mode
== TFmode
)
31575 cost
= MAX (ix86_cost
->int_load
[2] , ix86_cost
->int_store
[2]);
31577 cost
= ix86_cost
->int_load
[2];
31579 cost
= ix86_cost
->int_store
[2];
31580 return (cost
* (((int) GET_MODE_SIZE (mode
)
31581 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
));
31586 ix86_memory_move_cost (enum machine_mode mode
, reg_class_t regclass
,
31589 return inline_memory_move_cost (mode
, (enum reg_class
) regclass
, in
? 1 : 0);
31593 /* Return the cost of moving data from a register in class CLASS1 to
31594 one in class CLASS2.
31596 It is not required that the cost always equal 2 when FROM is the same as TO;
31597 on some machines it is expensive to move between registers if they are not
31598 general registers. */
31601 ix86_register_move_cost (enum machine_mode mode
, reg_class_t class1_i
,
31602 reg_class_t class2_i
)
31604 enum reg_class class1
= (enum reg_class
) class1_i
;
31605 enum reg_class class2
= (enum reg_class
) class2_i
;
31607 /* In case we require secondary memory, compute cost of the store followed
31608 by load. In order to avoid bad register allocation choices, we need
31609 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
31611 if (inline_secondary_memory_needed (class1
, class2
, mode
, 0))
31615 cost
+= inline_memory_move_cost (mode
, class1
, 2);
31616 cost
+= inline_memory_move_cost (mode
, class2
, 2);
31618 /* In case of copying from general_purpose_register we may emit multiple
31619 stores followed by single load causing memory size mismatch stall.
31620 Count this as arbitrarily high cost of 20. */
31621 if (targetm
.class_max_nregs (class1
, mode
)
31622 > targetm
.class_max_nregs (class2
, mode
))
31625 /* In the case of FP/MMX moves, the registers actually overlap, and we
31626 have to switch modes in order to treat them differently. */
31627 if ((MMX_CLASS_P (class1
) && MAYBE_FLOAT_CLASS_P (class2
))
31628 || (MMX_CLASS_P (class2
) && MAYBE_FLOAT_CLASS_P (class1
)))
31634 /* Moves between SSE/MMX and integer unit are expensive. */
31635 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
31636 || SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
31638 /* ??? By keeping returned value relatively high, we limit the number
31639 of moves between integer and MMX/SSE registers for all targets.
31640 Additionally, high value prevents problem with x86_modes_tieable_p(),
31641 where integer modes in MMX/SSE registers are not tieable
31642 because of missing QImode and HImode moves to, from or between
31643 MMX/SSE registers. */
31644 return MAX (8, ix86_cost
->mmxsse_to_integer
);
31646 if (MAYBE_FLOAT_CLASS_P (class1
))
31647 return ix86_cost
->fp_move
;
31648 if (MAYBE_SSE_CLASS_P (class1
))
31649 return ix86_cost
->sse_move
;
31650 if (MAYBE_MMX_CLASS_P (class1
))
31651 return ix86_cost
->mmx_move
;
31655 /* Return TRUE if hard register REGNO can hold a value of machine-mode
31659 ix86_hard_regno_mode_ok (int regno
, enum machine_mode mode
)
31661 /* Flags and only flags can only hold CCmode values. */
31662 if (CC_REGNO_P (regno
))
31663 return GET_MODE_CLASS (mode
) == MODE_CC
;
31664 if (GET_MODE_CLASS (mode
) == MODE_CC
31665 || GET_MODE_CLASS (mode
) == MODE_RANDOM
31666 || GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
31668 if (FP_REGNO_P (regno
))
31669 return VALID_FP_MODE_P (mode
);
31670 if (SSE_REGNO_P (regno
))
31672 /* We implement the move patterns for all vector modes into and
31673 out of SSE registers, even when no operation instructions
31674 are available. OImode move is available only when AVX is
31676 return ((TARGET_AVX
&& mode
== OImode
)
31677 || VALID_AVX256_REG_MODE (mode
)
31678 || VALID_SSE_REG_MODE (mode
)
31679 || VALID_SSE2_REG_MODE (mode
)
31680 || VALID_MMX_REG_MODE (mode
)
31681 || VALID_MMX_REG_MODE_3DNOW (mode
));
31683 if (MMX_REGNO_P (regno
))
31685 /* We implement the move patterns for 3DNOW modes even in MMX mode,
31686 so if the register is available at all, then we can move data of
31687 the given mode into or out of it. */
31688 return (VALID_MMX_REG_MODE (mode
)
31689 || VALID_MMX_REG_MODE_3DNOW (mode
));
31692 if (mode
== QImode
)
31694 /* Take care for QImode values - they can be in non-QI regs,
31695 but then they do cause partial register stalls. */
31696 if (regno
<= BX_REG
|| TARGET_64BIT
)
31698 if (!TARGET_PARTIAL_REG_STALL
)
31700 return !can_create_pseudo_p ();
31702 /* We handle both integer and floats in the general purpose registers. */
31703 else if (VALID_INT_MODE_P (mode
))
31705 else if (VALID_FP_MODE_P (mode
))
31707 else if (VALID_DFP_MODE_P (mode
))
31709 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
31710 on to use that value in smaller contexts, this can easily force a
31711 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
31712 supporting DImode, allow it. */
31713 else if (VALID_MMX_REG_MODE_3DNOW (mode
) || VALID_MMX_REG_MODE (mode
))
31719 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
31720 tieable integer mode. */
31723 ix86_tieable_integer_mode_p (enum machine_mode mode
)
31732 return TARGET_64BIT
|| !TARGET_PARTIAL_REG_STALL
;
31735 return TARGET_64BIT
;
31742 /* Return true if MODE1 is accessible in a register that can hold MODE2
31743 without copying. That is, all register classes that can hold MODE2
31744 can also hold MODE1. */
31747 ix86_modes_tieable_p (enum machine_mode mode1
, enum machine_mode mode2
)
31749 if (mode1
== mode2
)
31752 if (ix86_tieable_integer_mode_p (mode1
)
31753 && ix86_tieable_integer_mode_p (mode2
))
31756 /* MODE2 being XFmode implies fp stack or general regs, which means we
31757 can tie any smaller floating point modes to it. Note that we do not
31758 tie this with TFmode. */
31759 if (mode2
== XFmode
)
31760 return mode1
== SFmode
|| mode1
== DFmode
;
31762 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
31763 that we can tie it with SFmode. */
31764 if (mode2
== DFmode
)
31765 return mode1
== SFmode
;
31767 /* If MODE2 is only appropriate for an SSE register, then tie with
31768 any other mode acceptable to SSE registers. */
31769 if (GET_MODE_SIZE (mode2
) == 16
31770 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
31771 return (GET_MODE_SIZE (mode1
) == 16
31772 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
31774 /* If MODE2 is appropriate for an MMX register, then tie
31775 with any other mode acceptable to MMX registers. */
31776 if (GET_MODE_SIZE (mode2
) == 8
31777 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode2
))
31778 return (GET_MODE_SIZE (mode1
) == 8
31779 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode1
));
31784 /* Compute a (partial) cost for rtx X. Return true if the complete
31785 cost has been computed, and false if subexpressions should be
31786 scanned. In either case, *TOTAL contains the cost result. */
31789 ix86_rtx_costs (rtx x
, int code
, int outer_code_i
, int opno
, int *total
,
31792 enum rtx_code outer_code
= (enum rtx_code
) outer_code_i
;
31793 enum machine_mode mode
= GET_MODE (x
);
31794 const struct processor_costs
*cost
= speed
? ix86_cost
: &ix86_size_cost
;
31802 if (TARGET_64BIT
&& !x86_64_immediate_operand (x
, VOIDmode
))
31804 else if (TARGET_64BIT
&& !x86_64_zext_immediate_operand (x
, VOIDmode
))
31806 else if (flag_pic
&& SYMBOLIC_CONST (x
)
31808 || (!GET_CODE (x
) != LABEL_REF
31809 && (GET_CODE (x
) != SYMBOL_REF
31810 || !SYMBOL_REF_LOCAL_P (x
)))))
31817 if (mode
== VOIDmode
)
31820 switch (standard_80387_constant_p (x
))
31825 default: /* Other constants */
31830 /* Start with (MEM (SYMBOL_REF)), since that's where
31831 it'll probably end up. Add a penalty for size. */
31832 *total
= (COSTS_N_INSNS (1)
31833 + (flag_pic
!= 0 && !TARGET_64BIT
)
31834 + (mode
== SFmode
? 0 : mode
== DFmode
? 1 : 2));
31840 /* The zero extensions is often completely free on x86_64, so make
31841 it as cheap as possible. */
31842 if (TARGET_64BIT
&& mode
== DImode
31843 && GET_MODE (XEXP (x
, 0)) == SImode
)
31845 else if (TARGET_ZERO_EXTEND_WITH_AND
)
31846 *total
= cost
->add
;
31848 *total
= cost
->movzx
;
31852 *total
= cost
->movsx
;
31856 if (CONST_INT_P (XEXP (x
, 1))
31857 && (GET_MODE (XEXP (x
, 0)) != DImode
|| TARGET_64BIT
))
31859 HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
31862 *total
= cost
->add
;
31865 if ((value
== 2 || value
== 3)
31866 && cost
->lea
<= cost
->shift_const
)
31868 *total
= cost
->lea
;
31878 if (!TARGET_64BIT
&& GET_MODE (XEXP (x
, 0)) == DImode
)
31880 if (CONST_INT_P (XEXP (x
, 1)))
31882 if (INTVAL (XEXP (x
, 1)) > 32)
31883 *total
= cost
->shift_const
+ COSTS_N_INSNS (2);
31885 *total
= cost
->shift_const
* 2;
31889 if (GET_CODE (XEXP (x
, 1)) == AND
)
31890 *total
= cost
->shift_var
* 2;
31892 *total
= cost
->shift_var
* 6 + COSTS_N_INSNS (2);
31897 if (CONST_INT_P (XEXP (x
, 1)))
31898 *total
= cost
->shift_const
;
31900 *total
= cost
->shift_var
;
31908 gcc_assert (FLOAT_MODE_P (mode
));
31909 gcc_assert (TARGET_FMA
|| TARGET_FMA4
);
31911 /* ??? SSE scalar/vector cost should be used here. */
31912 /* ??? Bald assumption that fma has the same cost as fmul. */
31913 *total
= cost
->fmul
;
31914 *total
+= rtx_cost (XEXP (x
, 1), FMA
, 1, speed
);
31916 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
31918 if (GET_CODE (sub
) == NEG
)
31919 sub
= XEXP (sub
, 0);
31920 *total
+= rtx_cost (sub
, FMA
, 0, speed
);
31923 if (GET_CODE (sub
) == NEG
)
31924 sub
= XEXP (sub
, 0);
31925 *total
+= rtx_cost (sub
, FMA
, 2, speed
);
31930 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
31932 /* ??? SSE scalar cost should be used here. */
31933 *total
= cost
->fmul
;
31936 else if (X87_FLOAT_MODE_P (mode
))
31938 *total
= cost
->fmul
;
31941 else if (FLOAT_MODE_P (mode
))
31943 /* ??? SSE vector cost should be used here. */
31944 *total
= cost
->fmul
;
31949 rtx op0
= XEXP (x
, 0);
31950 rtx op1
= XEXP (x
, 1);
31952 if (CONST_INT_P (XEXP (x
, 1)))
31954 unsigned HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
31955 for (nbits
= 0; value
!= 0; value
&= value
- 1)
31959 /* This is arbitrary. */
31962 /* Compute costs correctly for widening multiplication. */
31963 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op0
) == ZERO_EXTEND
)
31964 && GET_MODE_SIZE (GET_MODE (XEXP (op0
, 0))) * 2
31965 == GET_MODE_SIZE (mode
))
31967 int is_mulwiden
= 0;
31968 enum machine_mode inner_mode
= GET_MODE (op0
);
31970 if (GET_CODE (op0
) == GET_CODE (op1
))
31971 is_mulwiden
= 1, op1
= XEXP (op1
, 0);
31972 else if (CONST_INT_P (op1
))
31974 if (GET_CODE (op0
) == SIGN_EXTEND
)
31975 is_mulwiden
= trunc_int_for_mode (INTVAL (op1
), inner_mode
)
31978 is_mulwiden
= !(INTVAL (op1
) & ~GET_MODE_MASK (inner_mode
));
31982 op0
= XEXP (op0
, 0), mode
= GET_MODE (op0
);
31985 *total
= (cost
->mult_init
[MODE_INDEX (mode
)]
31986 + nbits
* cost
->mult_bit
31987 + rtx_cost (op0
, outer_code
, opno
, speed
)
31988 + rtx_cost (op1
, outer_code
, opno
, speed
));
31997 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
31998 /* ??? SSE cost should be used here. */
31999 *total
= cost
->fdiv
;
32000 else if (X87_FLOAT_MODE_P (mode
))
32001 *total
= cost
->fdiv
;
32002 else if (FLOAT_MODE_P (mode
))
32003 /* ??? SSE vector cost should be used here. */
32004 *total
= cost
->fdiv
;
32006 *total
= cost
->divide
[MODE_INDEX (mode
)];
32010 if (GET_MODE_CLASS (mode
) == MODE_INT
32011 && GET_MODE_BITSIZE (mode
) <= GET_MODE_BITSIZE (Pmode
))
32013 if (GET_CODE (XEXP (x
, 0)) == PLUS
32014 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
32015 && CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 0), 1))
32016 && CONSTANT_P (XEXP (x
, 1)))
32018 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1));
32019 if (val
== 2 || val
== 4 || val
== 8)
32021 *total
= cost
->lea
;
32022 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1),
32023 outer_code
, opno
, speed
);
32024 *total
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
32025 outer_code
, opno
, speed
);
32026 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
32030 else if (GET_CODE (XEXP (x
, 0)) == MULT
32031 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
32033 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (x
, 0), 1));
32034 if (val
== 2 || val
== 4 || val
== 8)
32036 *total
= cost
->lea
;
32037 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0),
32038 outer_code
, opno
, speed
);
32039 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
32043 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
32045 *total
= cost
->lea
;
32046 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0),
32047 outer_code
, opno
, speed
);
32048 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1),
32049 outer_code
, opno
, speed
);
32050 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
32057 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
32059 /* ??? SSE cost should be used here. */
32060 *total
= cost
->fadd
;
32063 else if (X87_FLOAT_MODE_P (mode
))
32065 *total
= cost
->fadd
;
32068 else if (FLOAT_MODE_P (mode
))
32070 /* ??? SSE vector cost should be used here. */
32071 *total
= cost
->fadd
;
32079 if (!TARGET_64BIT
&& mode
== DImode
)
32081 *total
= (cost
->add
* 2
32082 + (rtx_cost (XEXP (x
, 0), outer_code
, opno
, speed
)
32083 << (GET_MODE (XEXP (x
, 0)) != DImode
))
32084 + (rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
)
32085 << (GET_MODE (XEXP (x
, 1)) != DImode
)));
32091 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
32093 /* ??? SSE cost should be used here. */
32094 *total
= cost
->fchs
;
32097 else if (X87_FLOAT_MODE_P (mode
))
32099 *total
= cost
->fchs
;
32102 else if (FLOAT_MODE_P (mode
))
32104 /* ??? SSE vector cost should be used here. */
32105 *total
= cost
->fchs
;
32111 if (!TARGET_64BIT
&& mode
== DImode
)
32112 *total
= cost
->add
* 2;
32114 *total
= cost
->add
;
32118 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTRACT
32119 && XEXP (XEXP (x
, 0), 1) == const1_rtx
32120 && CONST_INT_P (XEXP (XEXP (x
, 0), 2))
32121 && XEXP (x
, 1) == const0_rtx
)
32123 /* This kind of construct is implemented using test[bwl].
32124 Treat it as if we had an AND. */
32125 *total
= (cost
->add
32126 + rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
, opno
, speed
)
32127 + rtx_cost (const1_rtx
, outer_code
, opno
, speed
));
32133 if (!(SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
))
32138 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
32139 /* ??? SSE cost should be used here. */
32140 *total
= cost
->fabs
;
32141 else if (X87_FLOAT_MODE_P (mode
))
32142 *total
= cost
->fabs
;
32143 else if (FLOAT_MODE_P (mode
))
32144 /* ??? SSE vector cost should be used here. */
32145 *total
= cost
->fabs
;
32149 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
32150 /* ??? SSE cost should be used here. */
32151 *total
= cost
->fsqrt
;
32152 else if (X87_FLOAT_MODE_P (mode
))
32153 *total
= cost
->fsqrt
;
32154 else if (FLOAT_MODE_P (mode
))
32155 /* ??? SSE vector cost should be used here. */
32156 *total
= cost
->fsqrt
;
32160 if (XINT (x
, 1) == UNSPEC_TP
)
32167 case VEC_DUPLICATE
:
32168 /* ??? Assume all of these vector manipulation patterns are
32169 recognizable. In which case they all pretty much have the
32171 *total
= COSTS_N_INSNS (1);
32181 static int current_machopic_label_num
;
32183 /* Given a symbol name and its associated stub, write out the
32184 definition of the stub. */
32187 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
32189 unsigned int length
;
32190 char *binder_name
, *symbol_name
, lazy_ptr_name
[32];
32191 int label
= ++current_machopic_label_num
;
32193 /* For 64-bit we shouldn't get here. */
32194 gcc_assert (!TARGET_64BIT
);
32196 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
32197 symb
= targetm
.strip_name_encoding (symb
);
32199 length
= strlen (stub
);
32200 binder_name
= XALLOCAVEC (char, length
+ 32);
32201 GEN_BINDER_NAME_FOR_STUB (binder_name
, stub
, length
);
32203 length
= strlen (symb
);
32204 symbol_name
= XALLOCAVEC (char, length
+ 32);
32205 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
32207 sprintf (lazy_ptr_name
, "L%d$lz", label
);
32209 if (MACHOPIC_ATT_STUB
)
32210 switch_to_section (darwin_sections
[machopic_picsymbol_stub3_section
]);
32211 else if (MACHOPIC_PURE
)
32212 switch_to_section (darwin_sections
[machopic_picsymbol_stub2_section
]);
32214 switch_to_section (darwin_sections
[machopic_symbol_stub_section
]);
32216 fprintf (file
, "%s:\n", stub
);
32217 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
32219 if (MACHOPIC_ATT_STUB
)
32221 fprintf (file
, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
32223 else if (MACHOPIC_PURE
)
32226 /* 25-byte PIC stub using "CALL get_pc_thunk". */
32227 rtx tmp
= gen_rtx_REG (SImode
, 2 /* ECX */);
32228 output_set_got (tmp
, NULL_RTX
); /* "CALL ___<cpu>.get_pc_thunk.cx". */
32229 fprintf (file
, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
32230 label
, lazy_ptr_name
, label
);
32231 fprintf (file
, "\tjmp\t*%%ecx\n");
32234 fprintf (file
, "\tjmp\t*%s\n", lazy_ptr_name
);
32236 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
32237 it needs no stub-binding-helper. */
32238 if (MACHOPIC_ATT_STUB
)
32241 fprintf (file
, "%s:\n", binder_name
);
32245 fprintf (file
, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name
, binder_name
);
32246 fprintf (file
, "\tpushl\t%%ecx\n");
32249 fprintf (file
, "\tpushl\t$%s\n", lazy_ptr_name
);
32251 fputs ("\tjmp\tdyld_stub_binding_helper\n", file
);
32253 /* N.B. Keep the correspondence of these
32254 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
32255 old-pic/new-pic/non-pic stubs; altering this will break
32256 compatibility with existing dylibs. */
32259 /* 25-byte PIC stub using "CALL get_pc_thunk". */
32260 switch_to_section (darwin_sections
[machopic_lazy_symbol_ptr2_section
]);
32263 /* 16-byte -mdynamic-no-pic stub. */
32264 switch_to_section(darwin_sections
[machopic_lazy_symbol_ptr3_section
]);
32266 fprintf (file
, "%s:\n", lazy_ptr_name
);
32267 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
32268 fprintf (file
, ASM_LONG
"%s\n", binder_name
);
32270 #endif /* TARGET_MACHO */
32272 /* Order the registers for register allocator. */
32275 x86_order_regs_for_local_alloc (void)
32280 /* First allocate the local general purpose registers. */
32281 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
32282 if (GENERAL_REGNO_P (i
) && call_used_regs
[i
])
32283 reg_alloc_order
[pos
++] = i
;
32285 /* Global general purpose registers. */
32286 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
32287 if (GENERAL_REGNO_P (i
) && !call_used_regs
[i
])
32288 reg_alloc_order
[pos
++] = i
;
32290 /* x87 registers come first in case we are doing FP math
32292 if (!TARGET_SSE_MATH
)
32293 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
32294 reg_alloc_order
[pos
++] = i
;
32296 /* SSE registers. */
32297 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
32298 reg_alloc_order
[pos
++] = i
;
32299 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
32300 reg_alloc_order
[pos
++] = i
;
32302 /* x87 registers. */
32303 if (TARGET_SSE_MATH
)
32304 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
32305 reg_alloc_order
[pos
++] = i
;
32307 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
32308 reg_alloc_order
[pos
++] = i
;
32310 /* Initialize the rest of array as we do not allocate some registers
32312 while (pos
< FIRST_PSEUDO_REGISTER
)
32313 reg_alloc_order
[pos
++] = 0;
32316 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
32317 in struct attribute_spec handler. */
32319 ix86_handle_callee_pop_aggregate_return (tree
*node
, tree name
,
32321 int flags ATTRIBUTE_UNUSED
,
32322 bool *no_add_attrs
)
32324 if (TREE_CODE (*node
) != FUNCTION_TYPE
32325 && TREE_CODE (*node
) != METHOD_TYPE
32326 && TREE_CODE (*node
) != FIELD_DECL
32327 && TREE_CODE (*node
) != TYPE_DECL
)
32329 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
32331 *no_add_attrs
= true;
32336 warning (OPT_Wattributes
, "%qE attribute only available for 32-bit",
32338 *no_add_attrs
= true;
32341 if (is_attribute_p ("callee_pop_aggregate_return", name
))
32345 cst
= TREE_VALUE (args
);
32346 if (TREE_CODE (cst
) != INTEGER_CST
)
32348 warning (OPT_Wattributes
,
32349 "%qE attribute requires an integer constant argument",
32351 *no_add_attrs
= true;
32353 else if (compare_tree_int (cst
, 0) != 0
32354 && compare_tree_int (cst
, 1) != 0)
32356 warning (OPT_Wattributes
,
32357 "argument to %qE attribute is neither zero, nor one",
32359 *no_add_attrs
= true;
32368 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
32369 struct attribute_spec.handler. */
32371 ix86_handle_abi_attribute (tree
*node
, tree name
,
32372 tree args ATTRIBUTE_UNUSED
,
32373 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
32375 if (TREE_CODE (*node
) != FUNCTION_TYPE
32376 && TREE_CODE (*node
) != METHOD_TYPE
32377 && TREE_CODE (*node
) != FIELD_DECL
32378 && TREE_CODE (*node
) != TYPE_DECL
)
32380 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
32382 *no_add_attrs
= true;
32386 /* Can combine regparm with all attributes but fastcall. */
32387 if (is_attribute_p ("ms_abi", name
))
32389 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node
)))
32391 error ("ms_abi and sysv_abi attributes are not compatible");
32396 else if (is_attribute_p ("sysv_abi", name
))
32398 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node
)))
32400 error ("ms_abi and sysv_abi attributes are not compatible");
32409 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
32410 struct attribute_spec.handler. */
32412 ix86_handle_struct_attribute (tree
*node
, tree name
,
32413 tree args ATTRIBUTE_UNUSED
,
32414 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
32417 if (DECL_P (*node
))
32419 if (TREE_CODE (*node
) == TYPE_DECL
)
32420 type
= &TREE_TYPE (*node
);
32425 if (!(type
&& (TREE_CODE (*type
) == RECORD_TYPE
32426 || TREE_CODE (*type
) == UNION_TYPE
)))
32428 warning (OPT_Wattributes
, "%qE attribute ignored",
32430 *no_add_attrs
= true;
32433 else if ((is_attribute_p ("ms_struct", name
)
32434 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type
)))
32435 || ((is_attribute_p ("gcc_struct", name
)
32436 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type
)))))
32438 warning (OPT_Wattributes
, "%qE incompatible attribute ignored",
32440 *no_add_attrs
= true;
32447 ix86_handle_fndecl_attribute (tree
*node
, tree name
,
32448 tree args ATTRIBUTE_UNUSED
,
32449 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
32451 if (TREE_CODE (*node
) != FUNCTION_DECL
)
32453 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
32455 *no_add_attrs
= true;
32461 ix86_ms_bitfield_layout_p (const_tree record_type
)
32463 return ((TARGET_MS_BITFIELD_LAYOUT
32464 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
32465 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
)));
32468 /* Returns an expression indicating where the this parameter is
32469 located on entry to the FUNCTION. */
32472 x86_this_parameter (tree function
)
32474 tree type
= TREE_TYPE (function
);
32475 bool aggr
= aggregate_value_p (TREE_TYPE (type
), type
) != 0;
32480 const int *parm_regs
;
32482 if (ix86_function_type_abi (type
) == MS_ABI
)
32483 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
32485 parm_regs
= x86_64_int_parameter_registers
;
32486 return gen_rtx_REG (DImode
, parm_regs
[aggr
]);
32489 nregs
= ix86_function_regparm (type
, function
);
32491 if (nregs
> 0 && !stdarg_p (type
))
32494 unsigned int ccvt
= ix86_get_callcvt (type
);
32496 if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
32497 regno
= aggr
? DX_REG
: CX_REG
;
32498 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
32502 return gen_rtx_MEM (SImode
,
32503 plus_constant (stack_pointer_rtx
, 4));
32512 return gen_rtx_MEM (SImode
,
32513 plus_constant (stack_pointer_rtx
, 4));
32516 return gen_rtx_REG (SImode
, regno
);
32519 return gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, aggr
? 8 : 4));
32522 /* Determine whether x86_output_mi_thunk can succeed. */
32525 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED
,
32526 HOST_WIDE_INT delta ATTRIBUTE_UNUSED
,
32527 HOST_WIDE_INT vcall_offset
, const_tree function
)
32529 /* 64-bit can handle anything. */
32533 /* For 32-bit, everything's fine if we have one free register. */
32534 if (ix86_function_regparm (TREE_TYPE (function
), function
) < 3)
32537 /* Need a free register for vcall_offset. */
32541 /* Need a free register for GOT references. */
32542 if (flag_pic
&& !targetm
.binds_local_p (function
))
32545 /* Otherwise ok. */
32549 /* Output the assembler code for a thunk function. THUNK_DECL is the
32550 declaration for the thunk function itself, FUNCTION is the decl for
32551 the target function. DELTA is an immediate constant offset to be
32552 added to THIS. If VCALL_OFFSET is nonzero, the word at
32553 *(*this + vcall_offset) should be added to THIS. */
32556 x86_output_mi_thunk (FILE *file
,
32557 tree thunk ATTRIBUTE_UNUSED
, HOST_WIDE_INT delta
,
32558 HOST_WIDE_INT vcall_offset
, tree function
)
32560 rtx this_param
= x86_this_parameter (function
);
32561 rtx this_reg
, tmp
, fnaddr
;
32563 emit_note (NOTE_INSN_PROLOGUE_END
);
32565 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
32566 pull it in now and let DELTA benefit. */
32567 if (REG_P (this_param
))
32568 this_reg
= this_param
;
32569 else if (vcall_offset
)
32571 /* Put the this parameter into %eax. */
32572 this_reg
= gen_rtx_REG (Pmode
, AX_REG
);
32573 emit_move_insn (this_reg
, this_param
);
32576 this_reg
= NULL_RTX
;
32578 /* Adjust the this parameter by a fixed constant. */
32581 rtx delta_rtx
= GEN_INT (delta
);
32582 rtx delta_dst
= this_reg
? this_reg
: this_param
;
32586 if (!x86_64_general_operand (delta_rtx
, Pmode
))
32588 tmp
= gen_rtx_REG (Pmode
, R10_REG
);
32589 emit_move_insn (tmp
, delta_rtx
);
32594 ix86_emit_binop (PLUS
, Pmode
, delta_dst
, delta_rtx
);
32597 /* Adjust the this parameter by a value stored in the vtable. */
32600 rtx vcall_addr
, vcall_mem
, this_mem
;
32601 unsigned int tmp_regno
;
32604 tmp_regno
= R10_REG
;
32607 unsigned int ccvt
= ix86_get_callcvt (TREE_TYPE (function
));
32608 if ((ccvt
& (IX86_CALLCVT_FASTCALL
| IX86_CALLCVT_THISCALL
)) != 0)
32609 tmp_regno
= AX_REG
;
32611 tmp_regno
= CX_REG
;
32613 tmp
= gen_rtx_REG (Pmode
, tmp_regno
);
32615 this_mem
= gen_rtx_MEM (ptr_mode
, this_reg
);
32616 if (Pmode
!= ptr_mode
)
32617 this_mem
= gen_rtx_ZERO_EXTEND (Pmode
, this_mem
);
32618 emit_move_insn (tmp
, this_mem
);
32620 /* Adjust the this parameter. */
32621 vcall_addr
= plus_constant (tmp
, vcall_offset
);
32623 && !ix86_legitimate_address_p (ptr_mode
, vcall_addr
, true))
32625 rtx tmp2
= gen_rtx_REG (Pmode
, R11_REG
);
32626 emit_move_insn (tmp2
, GEN_INT (vcall_offset
));
32627 vcall_addr
= gen_rtx_PLUS (Pmode
, tmp
, tmp2
);
32630 vcall_mem
= gen_rtx_MEM (ptr_mode
, vcall_addr
);
32631 if (Pmode
!= ptr_mode
)
32632 emit_insn (gen_addsi_1_zext (this_reg
,
32633 gen_rtx_REG (ptr_mode
,
32637 ix86_emit_binop (PLUS
, Pmode
, this_reg
, vcall_mem
);
32640 /* If necessary, drop THIS back to its stack slot. */
32641 if (this_reg
&& this_reg
!= this_param
)
32642 emit_move_insn (this_param
, this_reg
);
32644 fnaddr
= XEXP (DECL_RTL (function
), 0);
32647 if (!flag_pic
|| targetm
.binds_local_p (function
)
32648 || cfun
->machine
->call_abi
== MS_ABI
)
32652 tmp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, fnaddr
), UNSPEC_GOTPCREL
);
32653 tmp
= gen_rtx_CONST (Pmode
, tmp
);
32654 fnaddr
= gen_rtx_MEM (Pmode
, tmp
);
32659 if (!flag_pic
|| targetm
.binds_local_p (function
))
32662 else if (TARGET_MACHO
)
32664 fnaddr
= machopic_indirect_call_target (DECL_RTL (function
));
32665 fnaddr
= XEXP (fnaddr
, 0);
32667 #endif /* TARGET_MACHO */
32670 tmp
= gen_rtx_REG (Pmode
, CX_REG
);
32671 output_set_got (tmp
, NULL_RTX
);
32673 fnaddr
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, fnaddr
), UNSPEC_GOT
);
32674 fnaddr
= gen_rtx_PLUS (Pmode
, fnaddr
, tmp
);
32675 fnaddr
= gen_rtx_MEM (Pmode
, fnaddr
);
32679 /* Our sibling call patterns do not allow memories, because we have no
32680 predicate that can distinguish between frame and non-frame memory.
32681 For our purposes here, we can get away with (ab)using a jump pattern,
32682 because we're going to do no optimization. */
32683 if (MEM_P (fnaddr
))
32684 emit_jump_insn (gen_indirect_jump (fnaddr
));
32687 tmp
= gen_rtx_MEM (QImode
, fnaddr
);
32688 tmp
= gen_rtx_CALL (VOIDmode
, tmp
, const0_rtx
);
32689 tmp
= emit_call_insn (tmp
);
32690 SIBLING_CALL_P (tmp
) = 1;
32694 /* Emit just enough of rest_of_compilation to get the insns emitted.
32695 Note that use_thunk calls assemble_start_function et al. */
32696 tmp
= get_insns ();
32697 insn_locators_alloc ();
32698 shorten_branches (tmp
);
32699 final_start_function (tmp
, file
, 1);
32700 final (tmp
, file
, 1);
32701 final_end_function ();
32705 x86_file_start (void)
32707 default_file_start ();
32709 darwin_file_start ();
32711 if (X86_FILE_START_VERSION_DIRECTIVE
)
32712 fputs ("\t.version\t\"01.01\"\n", asm_out_file
);
32713 if (X86_FILE_START_FLTUSED
)
32714 fputs ("\t.global\t__fltused\n", asm_out_file
);
32715 if (ix86_asm_dialect
== ASM_INTEL
)
32716 fputs ("\t.intel_syntax noprefix\n", asm_out_file
);
32720 x86_field_alignment (tree field
, int computed
)
32722 enum machine_mode mode
;
32723 tree type
= TREE_TYPE (field
);
32725 if (TARGET_64BIT
|| TARGET_ALIGN_DOUBLE
)
32727 mode
= TYPE_MODE (strip_array_types (type
));
32728 if (mode
== DFmode
|| mode
== DCmode
32729 || GET_MODE_CLASS (mode
) == MODE_INT
32730 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
32731 return MIN (32, computed
);
32735 /* Output assembler code to FILE to increment profiler label # LABELNO
32736 for profiling a function entry. */
32738 x86_function_profiler (FILE *file
, int labelno ATTRIBUTE_UNUSED
)
32740 const char *mcount_name
= (flag_fentry
? MCOUNT_NAME_BEFORE_PROLOGUE
32745 #ifndef NO_PROFILE_COUNTERS
32746 fprintf (file
, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX
, labelno
);
32749 if (DEFAULT_ABI
== SYSV_ABI
&& flag_pic
)
32750 fprintf (file
, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name
);
32752 fprintf (file
, "\tcall\t%s\n", mcount_name
);
32756 #ifndef NO_PROFILE_COUNTERS
32757 fprintf (file
, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER
"\n",
32760 fprintf (file
, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name
);
32764 #ifndef NO_PROFILE_COUNTERS
32765 fprintf (file
, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER
"\n",
32768 fprintf (file
, "\tcall\t%s\n", mcount_name
);
32772 /* We don't have exact information about the insn sizes, but we may assume
32773 quite safely that we are informed about all 1 byte insns and memory
32774 address sizes. This is enough to eliminate unnecessary padding in
32778 min_insn_size (rtx insn
)
32782 if (!INSN_P (insn
) || !active_insn_p (insn
))
32785 /* Discard alignments we've emit and jump instructions. */
32786 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
32787 && XINT (PATTERN (insn
), 1) == UNSPECV_ALIGN
)
32789 if (JUMP_TABLE_DATA_P (insn
))
32792 /* Important case - calls are always 5 bytes.
32793 It is common to have many calls in the row. */
32795 && symbolic_reference_mentioned_p (PATTERN (insn
))
32796 && !SIBLING_CALL_P (insn
))
32798 len
= get_attr_length (insn
);
32802 /* For normal instructions we rely on get_attr_length being exact,
32803 with a few exceptions. */
32804 if (!JUMP_P (insn
))
32806 enum attr_type type
= get_attr_type (insn
);
32811 if (GET_CODE (PATTERN (insn
)) == ASM_INPUT
32812 || asm_noperands (PATTERN (insn
)) >= 0)
32819 /* Otherwise trust get_attr_length. */
32823 l
= get_attr_length_address (insn
);
32824 if (l
< 4 && symbolic_reference_mentioned_p (PATTERN (insn
)))
32833 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
32835 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
32839 ix86_avoid_jump_mispredicts (void)
32841 rtx insn
, start
= get_insns ();
32842 int nbytes
= 0, njumps
= 0;
32845 /* Look for all minimal intervals of instructions containing 4 jumps.
32846 The intervals are bounded by START and INSN. NBYTES is the total
32847 size of instructions in the interval including INSN and not including
32848 START. When the NBYTES is smaller than 16 bytes, it is possible
32849 that the end of START and INSN ends up in the same 16byte page.
32851 The smallest offset in the page INSN can start is the case where START
32852 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
32853 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
32855 for (insn
= start
; insn
; insn
= NEXT_INSN (insn
))
32859 if (LABEL_P (insn
))
32861 int align
= label_to_alignment (insn
);
32862 int max_skip
= label_to_max_skip (insn
);
32866 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
32867 already in the current 16 byte page, because otherwise
32868 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
32869 bytes to reach 16 byte boundary. */
32871 || (align
<= 3 && max_skip
!= (1 << align
) - 1))
32874 fprintf (dump_file
, "Label %i with max_skip %i\n",
32875 INSN_UID (insn
), max_skip
);
32878 while (nbytes
+ max_skip
>= 16)
32880 start
= NEXT_INSN (start
);
32881 if ((JUMP_P (start
)
32882 && GET_CODE (PATTERN (start
)) != ADDR_VEC
32883 && GET_CODE (PATTERN (start
)) != ADDR_DIFF_VEC
)
32885 njumps
--, isjump
= 1;
32888 nbytes
-= min_insn_size (start
);
32894 min_size
= min_insn_size (insn
);
32895 nbytes
+= min_size
;
32897 fprintf (dump_file
, "Insn %i estimated to %i bytes\n",
32898 INSN_UID (insn
), min_size
);
32900 && GET_CODE (PATTERN (insn
)) != ADDR_VEC
32901 && GET_CODE (PATTERN (insn
)) != ADDR_DIFF_VEC
)
32909 start
= NEXT_INSN (start
);
32910 if ((JUMP_P (start
)
32911 && GET_CODE (PATTERN (start
)) != ADDR_VEC
32912 && GET_CODE (PATTERN (start
)) != ADDR_DIFF_VEC
)
32914 njumps
--, isjump
= 1;
32917 nbytes
-= min_insn_size (start
);
32919 gcc_assert (njumps
>= 0);
32921 fprintf (dump_file
, "Interval %i to %i has %i bytes\n",
32922 INSN_UID (start
), INSN_UID (insn
), nbytes
);
32924 if (njumps
== 3 && isjump
&& nbytes
< 16)
32926 int padsize
= 15 - nbytes
+ min_insn_size (insn
);
32929 fprintf (dump_file
, "Padding insn %i by %i bytes!\n",
32930 INSN_UID (insn
), padsize
);
32931 emit_insn_before (gen_pad (GEN_INT (padsize
)), insn
);
32937 /* AMD Athlon works faster
32938 when RET is not destination of conditional jump or directly preceded
32939 by other jump instruction. We avoid the penalty by inserting NOP just
32940 before the RET instructions in such cases. */
32942 ix86_pad_returns (void)
32947 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
32949 basic_block bb
= e
->src
;
32950 rtx ret
= BB_END (bb
);
32952 bool replace
= false;
32954 if (!JUMP_P (ret
) || !ANY_RETURN_P (PATTERN (ret
))
32955 || optimize_bb_for_size_p (bb
))
32957 for (prev
= PREV_INSN (ret
); prev
; prev
= PREV_INSN (prev
))
32958 if (active_insn_p (prev
) || LABEL_P (prev
))
32960 if (prev
&& LABEL_P (prev
))
32965 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
32966 if (EDGE_FREQUENCY (e
) && e
->src
->index
>= 0
32967 && !(e
->flags
& EDGE_FALLTHRU
))
32972 prev
= prev_active_insn (ret
);
32974 && ((JUMP_P (prev
) && any_condjump_p (prev
))
32977 /* Empty functions get branch mispredict even when
32978 the jump destination is not visible to us. */
32979 if (!prev
&& !optimize_function_for_size_p (cfun
))
32984 emit_jump_insn_before (gen_simple_return_internal_long (), ret
);
32990 /* Count the minimum number of instructions in BB. Return 4 if the
32991 number of instructions >= 4. */
32994 ix86_count_insn_bb (basic_block bb
)
32997 int insn_count
= 0;
32999 /* Count number of instructions in this block. Return 4 if the number
33000 of instructions >= 4. */
33001 FOR_BB_INSNS (bb
, insn
)
33003 /* Only happen in exit blocks. */
33005 && ANY_RETURN_P (PATTERN (insn
)))
33008 if (NONDEBUG_INSN_P (insn
)
33009 && GET_CODE (PATTERN (insn
)) != USE
33010 && GET_CODE (PATTERN (insn
)) != CLOBBER
)
33013 if (insn_count
>= 4)
33022 /* Count the minimum number of instructions in code path in BB.
33023 Return 4 if the number of instructions >= 4. */
33026 ix86_count_insn (basic_block bb
)
33030 int min_prev_count
;
33032 /* Only bother counting instructions along paths with no
33033 more than 2 basic blocks between entry and exit. Given
33034 that BB has an edge to exit, determine if a predecessor
33035 of BB has an edge from entry. If so, compute the number
33036 of instructions in the predecessor block. If there
33037 happen to be multiple such blocks, compute the minimum. */
33038 min_prev_count
= 4;
33039 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
33042 edge_iterator prev_ei
;
33044 if (e
->src
== ENTRY_BLOCK_PTR
)
33046 min_prev_count
= 0;
33049 FOR_EACH_EDGE (prev_e
, prev_ei
, e
->src
->preds
)
33051 if (prev_e
->src
== ENTRY_BLOCK_PTR
)
33053 int count
= ix86_count_insn_bb (e
->src
);
33054 if (count
< min_prev_count
)
33055 min_prev_count
= count
;
33061 if (min_prev_count
< 4)
33062 min_prev_count
+= ix86_count_insn_bb (bb
);
33064 return min_prev_count
;
33067 /* Pad short funtion to 4 instructions. */
33070 ix86_pad_short_function (void)
33075 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
33077 rtx ret
= BB_END (e
->src
);
33078 if (JUMP_P (ret
) && ANY_RETURN_P (PATTERN (ret
)))
33080 int insn_count
= ix86_count_insn (e
->src
);
33082 /* Pad short function. */
33083 if (insn_count
< 4)
33087 /* Find epilogue. */
33090 || NOTE_KIND (insn
) != NOTE_INSN_EPILOGUE_BEG
))
33091 insn
= PREV_INSN (insn
);
33096 /* Two NOPs count as one instruction. */
33097 insn_count
= 2 * (4 - insn_count
);
33098 emit_insn_before (gen_nops (GEN_INT (insn_count
)), insn
);
33104 /* Implement machine specific optimizations. We implement padding of returns
33105 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
33109 /* We are freeing block_for_insn in the toplev to keep compatibility
33110 with old MDEP_REORGS that are not CFG based. Recompute it now. */
33111 compute_bb_for_insn ();
33113 /* Run the vzeroupper optimization if needed. */
33114 if (TARGET_VZEROUPPER
)
33115 move_or_delete_vzeroupper ();
33117 if (optimize
&& optimize_function_for_speed_p (cfun
))
33119 if (TARGET_PAD_SHORT_FUNCTION
)
33120 ix86_pad_short_function ();
33121 else if (TARGET_PAD_RETURNS
)
33122 ix86_pad_returns ();
33123 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
33124 if (TARGET_FOUR_JUMP_LIMIT
)
33125 ix86_avoid_jump_mispredicts ();
33130 /* Return nonzero when QImode register that must be represented via REX prefix
33133 x86_extended_QIreg_mentioned_p (rtx insn
)
33136 extract_insn_cached (insn
);
33137 for (i
= 0; i
< recog_data
.n_operands
; i
++)
33138 if (REG_P (recog_data
.operand
[i
])
33139 && REGNO (recog_data
.operand
[i
]) > BX_REG
)
33144 /* Return nonzero when P points to register encoded via REX prefix.
33145 Called via for_each_rtx. */
33147 extended_reg_mentioned_1 (rtx
*p
, void *data ATTRIBUTE_UNUSED
)
33149 unsigned int regno
;
33152 regno
= REGNO (*p
);
33153 return REX_INT_REGNO_P (regno
) || REX_SSE_REGNO_P (regno
);
33156 /* Return true when INSN mentions register that must be encoded using REX
33159 x86_extended_reg_mentioned_p (rtx insn
)
33161 return for_each_rtx (INSN_P (insn
) ? &PATTERN (insn
) : &insn
,
33162 extended_reg_mentioned_1
, NULL
);
33165 /* If profitable, negate (without causing overflow) integer constant
33166 of mode MODE at location LOC. Return true in this case. */
33168 x86_maybe_negate_const_int (rtx
*loc
, enum machine_mode mode
)
33172 if (!CONST_INT_P (*loc
))
33178 /* DImode x86_64 constants must fit in 32 bits. */
33179 gcc_assert (x86_64_immediate_operand (*loc
, mode
));
33190 gcc_unreachable ();
33193 /* Avoid overflows. */
33194 if (mode_signbit_p (mode
, *loc
))
33197 val
= INTVAL (*loc
);
33199 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
33200 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
33201 if ((val
< 0 && val
!= -128)
33204 *loc
= GEN_INT (-val
);
33211 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
33212 optabs would emit if we didn't have TFmode patterns. */
33215 x86_emit_floatuns (rtx operands
[2])
33217 rtx neglab
, donelab
, i0
, i1
, f0
, in
, out
;
33218 enum machine_mode mode
, inmode
;
33220 inmode
= GET_MODE (operands
[1]);
33221 gcc_assert (inmode
== SImode
|| inmode
== DImode
);
33224 in
= force_reg (inmode
, operands
[1]);
33225 mode
= GET_MODE (out
);
33226 neglab
= gen_label_rtx ();
33227 donelab
= gen_label_rtx ();
33228 f0
= gen_reg_rtx (mode
);
33230 emit_cmp_and_jump_insns (in
, const0_rtx
, LT
, const0_rtx
, inmode
, 0, neglab
);
33232 expand_float (out
, in
, 0);
33234 emit_jump_insn (gen_jump (donelab
));
33237 emit_label (neglab
);
33239 i0
= expand_simple_binop (inmode
, LSHIFTRT
, in
, const1_rtx
, NULL
,
33241 i1
= expand_simple_binop (inmode
, AND
, in
, const1_rtx
, NULL
,
33243 i0
= expand_simple_binop (inmode
, IOR
, i0
, i1
, i0
, 1, OPTAB_DIRECT
);
33245 expand_float (f0
, i0
, 0);
33247 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_PLUS (mode
, f0
, f0
)));
33249 emit_label (donelab
);
33252 /* AVX2 does support 32-byte integer vector operations,
33253 thus the longest vector we are faced with is V32QImode. */
33254 #define MAX_VECT_LEN 32
33256 struct expand_vec_perm_d
33258 rtx target
, op0
, op1
;
33259 unsigned char perm
[MAX_VECT_LEN
];
33260 enum machine_mode vmode
;
33261 unsigned char nelt
;
33265 static bool expand_vec_perm_1 (struct expand_vec_perm_d
*d
);
33266 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d
*d
);
33268 /* Get a vector mode of the same size as the original but with elements
33269 twice as wide. This is only guaranteed to apply to integral vectors. */
33271 static inline enum machine_mode
33272 get_mode_wider_vector (enum machine_mode o
)
33274 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
33275 enum machine_mode n
= GET_MODE_WIDER_MODE (o
);
33276 gcc_assert (GET_MODE_NUNITS (o
) == GET_MODE_NUNITS (n
) * 2);
33277 gcc_assert (GET_MODE_SIZE (o
) == GET_MODE_SIZE (n
));
33281 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
33282 with all elements equal to VAR. Return true if successful. */
33285 ix86_expand_vector_init_duplicate (bool mmx_ok
, enum machine_mode mode
,
33286 rtx target
, rtx val
)
33309 /* First attempt to recognize VAL as-is. */
33310 dup
= gen_rtx_VEC_DUPLICATE (mode
, val
);
33311 insn
= emit_insn (gen_rtx_SET (VOIDmode
, target
, dup
));
33312 if (recog_memoized (insn
) < 0)
33315 /* If that fails, force VAL into a register. */
33318 XEXP (dup
, 0) = force_reg (GET_MODE_INNER (mode
), val
);
33319 seq
= get_insns ();
33322 emit_insn_before (seq
, insn
);
33324 ok
= recog_memoized (insn
) >= 0;
33333 if (TARGET_SSE
|| TARGET_3DNOW_A
)
33337 val
= gen_lowpart (SImode
, val
);
33338 x
= gen_rtx_TRUNCATE (HImode
, val
);
33339 x
= gen_rtx_VEC_DUPLICATE (mode
, x
);
33340 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
33353 struct expand_vec_perm_d dperm
;
33357 memset (&dperm
, 0, sizeof (dperm
));
33358 dperm
.target
= target
;
33359 dperm
.vmode
= mode
;
33360 dperm
.nelt
= GET_MODE_NUNITS (mode
);
33361 dperm
.op0
= dperm
.op1
= gen_reg_rtx (mode
);
33363 /* Extend to SImode using a paradoxical SUBREG. */
33364 tmp1
= gen_reg_rtx (SImode
);
33365 emit_move_insn (tmp1
, gen_lowpart (SImode
, val
));
33367 /* Insert the SImode value as low element of a V4SImode vector. */
33368 tmp2
= gen_lowpart (V4SImode
, dperm
.op0
);
33369 emit_insn (gen_vec_setv4si_0 (tmp2
, CONST0_RTX (V4SImode
), tmp1
));
33371 ok
= (expand_vec_perm_1 (&dperm
)
33372 || expand_vec_perm_broadcast_1 (&dperm
));
33384 /* Replicate the value once into the next wider mode and recurse. */
33386 enum machine_mode smode
, wsmode
, wvmode
;
33389 smode
= GET_MODE_INNER (mode
);
33390 wvmode
= get_mode_wider_vector (mode
);
33391 wsmode
= GET_MODE_INNER (wvmode
);
33393 val
= convert_modes (wsmode
, smode
, val
, true);
33394 x
= expand_simple_binop (wsmode
, ASHIFT
, val
,
33395 GEN_INT (GET_MODE_BITSIZE (smode
)),
33396 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
33397 val
= expand_simple_binop (wsmode
, IOR
, val
, x
, x
, 1, OPTAB_LIB_WIDEN
);
33399 x
= gen_lowpart (wvmode
, target
);
33400 ok
= ix86_expand_vector_init_duplicate (mmx_ok
, wvmode
, x
, val
);
33408 enum machine_mode hvmode
= (mode
== V16HImode
? V8HImode
: V16QImode
);
33409 rtx x
= gen_reg_rtx (hvmode
);
33411 ok
= ix86_expand_vector_init_duplicate (false, hvmode
, x
, val
);
33414 x
= gen_rtx_VEC_CONCAT (mode
, x
, x
);
33415 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
33424 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
33425 whose ONE_VAR element is VAR, and other elements are zero. Return true
33429 ix86_expand_vector_init_one_nonzero (bool mmx_ok
, enum machine_mode mode
,
33430 rtx target
, rtx var
, int one_var
)
33432 enum machine_mode vsimode
;
33435 bool use_vector_set
= false;
33440 /* For SSE4.1, we normally use vector set. But if the second
33441 element is zero and inter-unit moves are OK, we use movq
33443 use_vector_set
= (TARGET_64BIT
33445 && !(TARGET_INTER_UNIT_MOVES
33451 use_vector_set
= TARGET_SSE4_1
;
33454 use_vector_set
= TARGET_SSE2
;
33457 use_vector_set
= TARGET_SSE
|| TARGET_3DNOW_A
;
33464 use_vector_set
= TARGET_AVX
;
33467 /* Use ix86_expand_vector_set in 64bit mode only. */
33468 use_vector_set
= TARGET_AVX
&& TARGET_64BIT
;
33474 if (use_vector_set
)
33476 emit_insn (gen_rtx_SET (VOIDmode
, target
, CONST0_RTX (mode
)));
33477 var
= force_reg (GET_MODE_INNER (mode
), var
);
33478 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
33494 var
= force_reg (GET_MODE_INNER (mode
), var
);
33495 x
= gen_rtx_VEC_CONCAT (mode
, var
, CONST0_RTX (GET_MODE_INNER (mode
)));
33496 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
33501 if (!REG_P (target
) || REGNO (target
) < FIRST_PSEUDO_REGISTER
)
33502 new_target
= gen_reg_rtx (mode
);
33504 new_target
= target
;
33505 var
= force_reg (GET_MODE_INNER (mode
), var
);
33506 x
= gen_rtx_VEC_DUPLICATE (mode
, var
);
33507 x
= gen_rtx_VEC_MERGE (mode
, x
, CONST0_RTX (mode
), const1_rtx
);
33508 emit_insn (gen_rtx_SET (VOIDmode
, new_target
, x
));
33511 /* We need to shuffle the value to the correct position, so
33512 create a new pseudo to store the intermediate result. */
33514 /* With SSE2, we can use the integer shuffle insns. */
33515 if (mode
!= V4SFmode
&& TARGET_SSE2
)
33517 emit_insn (gen_sse2_pshufd_1 (new_target
, new_target
,
33519 GEN_INT (one_var
== 1 ? 0 : 1),
33520 GEN_INT (one_var
== 2 ? 0 : 1),
33521 GEN_INT (one_var
== 3 ? 0 : 1)));
33522 if (target
!= new_target
)
33523 emit_move_insn (target
, new_target
);
33527 /* Otherwise convert the intermediate result to V4SFmode and
33528 use the SSE1 shuffle instructions. */
33529 if (mode
!= V4SFmode
)
33531 tmp
= gen_reg_rtx (V4SFmode
);
33532 emit_move_insn (tmp
, gen_lowpart (V4SFmode
, new_target
));
33537 emit_insn (gen_sse_shufps_v4sf (tmp
, tmp
, tmp
,
33539 GEN_INT (one_var
== 1 ? 0 : 1),
33540 GEN_INT (one_var
== 2 ? 0+4 : 1+4),
33541 GEN_INT (one_var
== 3 ? 0+4 : 1+4)));
33543 if (mode
!= V4SFmode
)
33544 emit_move_insn (target
, gen_lowpart (V4SImode
, tmp
));
33545 else if (tmp
!= target
)
33546 emit_move_insn (target
, tmp
);
33548 else if (target
!= new_target
)
33549 emit_move_insn (target
, new_target
);
33554 vsimode
= V4SImode
;
33560 vsimode
= V2SImode
;
33566 /* Zero extend the variable element to SImode and recurse. */
33567 var
= convert_modes (SImode
, GET_MODE_INNER (mode
), var
, true);
33569 x
= gen_reg_rtx (vsimode
);
33570 if (!ix86_expand_vector_init_one_nonzero (mmx_ok
, vsimode
, x
,
33572 gcc_unreachable ();
33574 emit_move_insn (target
, gen_lowpart (mode
, x
));
33582 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
33583 consisting of the values in VALS. It is known that all elements
33584 except ONE_VAR are constants. Return true if successful. */
33587 ix86_expand_vector_init_one_var (bool mmx_ok
, enum machine_mode mode
,
33588 rtx target
, rtx vals
, int one_var
)
33590 rtx var
= XVECEXP (vals
, 0, one_var
);
33591 enum machine_mode wmode
;
33594 const_vec
= copy_rtx (vals
);
33595 XVECEXP (const_vec
, 0, one_var
) = CONST0_RTX (GET_MODE_INNER (mode
));
33596 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (const_vec
, 0));
33604 /* For the two element vectors, it's just as easy to use
33605 the general case. */
33609 /* Use ix86_expand_vector_set in 64bit mode only. */
33632 /* There's no way to set one QImode entry easily. Combine
33633 the variable value with its adjacent constant value, and
33634 promote to an HImode set. */
33635 x
= XVECEXP (vals
, 0, one_var
^ 1);
33638 var
= convert_modes (HImode
, QImode
, var
, true);
33639 var
= expand_simple_binop (HImode
, ASHIFT
, var
, GEN_INT (8),
33640 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
33641 x
= GEN_INT (INTVAL (x
) & 0xff);
33645 var
= convert_modes (HImode
, QImode
, var
, true);
33646 x
= gen_int_mode (INTVAL (x
) << 8, HImode
);
33648 if (x
!= const0_rtx
)
33649 var
= expand_simple_binop (HImode
, IOR
, var
, x
, var
,
33650 1, OPTAB_LIB_WIDEN
);
33652 x
= gen_reg_rtx (wmode
);
33653 emit_move_insn (x
, gen_lowpart (wmode
, const_vec
));
33654 ix86_expand_vector_set (mmx_ok
, x
, var
, one_var
>> 1);
33656 emit_move_insn (target
, gen_lowpart (mode
, x
));
33663 emit_move_insn (target
, const_vec
);
33664 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
33668 /* A subroutine of ix86_expand_vector_init_general. Use vector
33669 concatenate to handle the most general case: all values variable,
33670 and none identical. */
33673 ix86_expand_vector_init_concat (enum machine_mode mode
,
33674 rtx target
, rtx
*ops
, int n
)
33676 enum machine_mode cmode
, hmode
= VOIDmode
;
33677 rtx first
[8], second
[4];
33717 gcc_unreachable ();
33720 if (!register_operand (ops
[1], cmode
))
33721 ops
[1] = force_reg (cmode
, ops
[1]);
33722 if (!register_operand (ops
[0], cmode
))
33723 ops
[0] = force_reg (cmode
, ops
[0]);
33724 emit_insn (gen_rtx_SET (VOIDmode
, target
,
33725 gen_rtx_VEC_CONCAT (mode
, ops
[0],
33745 gcc_unreachable ();
33761 gcc_unreachable ();
33766 /* FIXME: We process inputs backward to help RA. PR 36222. */
33769 for (; i
> 0; i
-= 2, j
--)
33771 first
[j
] = gen_reg_rtx (cmode
);
33772 v
= gen_rtvec (2, ops
[i
- 1], ops
[i
]);
33773 ix86_expand_vector_init (false, first
[j
],
33774 gen_rtx_PARALLEL (cmode
, v
));
33780 gcc_assert (hmode
!= VOIDmode
);
33781 for (i
= j
= 0; i
< n
; i
+= 2, j
++)
33783 second
[j
] = gen_reg_rtx (hmode
);
33784 ix86_expand_vector_init_concat (hmode
, second
[j
],
33788 ix86_expand_vector_init_concat (mode
, target
, second
, n
);
33791 ix86_expand_vector_init_concat (mode
, target
, first
, n
);
33795 gcc_unreachable ();
33799 /* A subroutine of ix86_expand_vector_init_general. Use vector
33800 interleave to handle the most general case: all values variable,
33801 and none identical. */
33804 ix86_expand_vector_init_interleave (enum machine_mode mode
,
33805 rtx target
, rtx
*ops
, int n
)
33807 enum machine_mode first_imode
, second_imode
, third_imode
, inner_mode
;
33810 rtx (*gen_load_even
) (rtx
, rtx
, rtx
);
33811 rtx (*gen_interleave_first_low
) (rtx
, rtx
, rtx
);
33812 rtx (*gen_interleave_second_low
) (rtx
, rtx
, rtx
);
33817 gen_load_even
= gen_vec_setv8hi
;
33818 gen_interleave_first_low
= gen_vec_interleave_lowv4si
;
33819 gen_interleave_second_low
= gen_vec_interleave_lowv2di
;
33820 inner_mode
= HImode
;
33821 first_imode
= V4SImode
;
33822 second_imode
= V2DImode
;
33823 third_imode
= VOIDmode
;
33826 gen_load_even
= gen_vec_setv16qi
;
33827 gen_interleave_first_low
= gen_vec_interleave_lowv8hi
;
33828 gen_interleave_second_low
= gen_vec_interleave_lowv4si
;
33829 inner_mode
= QImode
;
33830 first_imode
= V8HImode
;
33831 second_imode
= V4SImode
;
33832 third_imode
= V2DImode
;
33835 gcc_unreachable ();
33838 for (i
= 0; i
< n
; i
++)
33840 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
33841 op0
= gen_reg_rtx (SImode
);
33842 emit_move_insn (op0
, gen_lowpart (SImode
, ops
[i
+ i
]));
33844 /* Insert the SImode value as low element of V4SImode vector. */
33845 op1
= gen_reg_rtx (V4SImode
);
33846 op0
= gen_rtx_VEC_MERGE (V4SImode
,
33847 gen_rtx_VEC_DUPLICATE (V4SImode
,
33849 CONST0_RTX (V4SImode
),
33851 emit_insn (gen_rtx_SET (VOIDmode
, op1
, op0
));
33853 /* Cast the V4SImode vector back to a vector in orignal mode. */
33854 op0
= gen_reg_rtx (mode
);
33855 emit_move_insn (op0
, gen_lowpart (mode
, op1
));
33857 /* Load even elements into the second positon. */
33858 emit_insn (gen_load_even (op0
,
33859 force_reg (inner_mode
,
33863 /* Cast vector to FIRST_IMODE vector. */
33864 ops
[i
] = gen_reg_rtx (first_imode
);
33865 emit_move_insn (ops
[i
], gen_lowpart (first_imode
, op0
));
33868 /* Interleave low FIRST_IMODE vectors. */
33869 for (i
= j
= 0; i
< n
; i
+= 2, j
++)
33871 op0
= gen_reg_rtx (first_imode
);
33872 emit_insn (gen_interleave_first_low (op0
, ops
[i
], ops
[i
+ 1]));
33874 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
33875 ops
[j
] = gen_reg_rtx (second_imode
);
33876 emit_move_insn (ops
[j
], gen_lowpart (second_imode
, op0
));
33879 /* Interleave low SECOND_IMODE vectors. */
33880 switch (second_imode
)
33883 for (i
= j
= 0; i
< n
/ 2; i
+= 2, j
++)
33885 op0
= gen_reg_rtx (second_imode
);
33886 emit_insn (gen_interleave_second_low (op0
, ops
[i
],
33889 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
33891 ops
[j
] = gen_reg_rtx (third_imode
);
33892 emit_move_insn (ops
[j
], gen_lowpart (third_imode
, op0
));
33894 second_imode
= V2DImode
;
33895 gen_interleave_second_low
= gen_vec_interleave_lowv2di
;
33899 op0
= gen_reg_rtx (second_imode
);
33900 emit_insn (gen_interleave_second_low (op0
, ops
[0],
33903 /* Cast the SECOND_IMODE vector back to a vector on original
33905 emit_insn (gen_rtx_SET (VOIDmode
, target
,
33906 gen_lowpart (mode
, op0
)));
33910 gcc_unreachable ();
33914 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
33915 all values variable, and none identical. */
33918 ix86_expand_vector_init_general (bool mmx_ok
, enum machine_mode mode
,
33919 rtx target
, rtx vals
)
33921 rtx ops
[32], op0
, op1
;
33922 enum machine_mode half_mode
= VOIDmode
;
33929 if (!mmx_ok
&& !TARGET_SSE
)
33941 n
= GET_MODE_NUNITS (mode
);
33942 for (i
= 0; i
< n
; i
++)
33943 ops
[i
] = XVECEXP (vals
, 0, i
);
33944 ix86_expand_vector_init_concat (mode
, target
, ops
, n
);
33948 half_mode
= V16QImode
;
33952 half_mode
= V8HImode
;
33956 n
= GET_MODE_NUNITS (mode
);
33957 for (i
= 0; i
< n
; i
++)
33958 ops
[i
] = XVECEXP (vals
, 0, i
);
33959 op0
= gen_reg_rtx (half_mode
);
33960 op1
= gen_reg_rtx (half_mode
);
33961 ix86_expand_vector_init_interleave (half_mode
, op0
, ops
,
33963 ix86_expand_vector_init_interleave (half_mode
, op1
,
33964 &ops
[n
>> 1], n
>> 2);
33965 emit_insn (gen_rtx_SET (VOIDmode
, target
,
33966 gen_rtx_VEC_CONCAT (mode
, op0
, op1
)));
33970 if (!TARGET_SSE4_1
)
33978 /* Don't use ix86_expand_vector_init_interleave if we can't
33979 move from GPR to SSE register directly. */
33980 if (!TARGET_INTER_UNIT_MOVES
)
33983 n
= GET_MODE_NUNITS (mode
);
33984 for (i
= 0; i
< n
; i
++)
33985 ops
[i
] = XVECEXP (vals
, 0, i
);
33986 ix86_expand_vector_init_interleave (mode
, target
, ops
, n
>> 1);
33994 gcc_unreachable ();
33998 int i
, j
, n_elts
, n_words
, n_elt_per_word
;
33999 enum machine_mode inner_mode
;
34000 rtx words
[4], shift
;
34002 inner_mode
= GET_MODE_INNER (mode
);
34003 n_elts
= GET_MODE_NUNITS (mode
);
34004 n_words
= GET_MODE_SIZE (mode
) / UNITS_PER_WORD
;
34005 n_elt_per_word
= n_elts
/ n_words
;
34006 shift
= GEN_INT (GET_MODE_BITSIZE (inner_mode
));
34008 for (i
= 0; i
< n_words
; ++i
)
34010 rtx word
= NULL_RTX
;
34012 for (j
= 0; j
< n_elt_per_word
; ++j
)
34014 rtx elt
= XVECEXP (vals
, 0, (i
+1)*n_elt_per_word
- j
- 1);
34015 elt
= convert_modes (word_mode
, inner_mode
, elt
, true);
34021 word
= expand_simple_binop (word_mode
, ASHIFT
, word
, shift
,
34022 word
, 1, OPTAB_LIB_WIDEN
);
34023 word
= expand_simple_binop (word_mode
, IOR
, word
, elt
,
34024 word
, 1, OPTAB_LIB_WIDEN
);
34032 emit_move_insn (target
, gen_lowpart (mode
, words
[0]));
34033 else if (n_words
== 2)
34035 rtx tmp
= gen_reg_rtx (mode
);
34036 emit_clobber (tmp
);
34037 emit_move_insn (gen_lowpart (word_mode
, tmp
), words
[0]);
34038 emit_move_insn (gen_highpart (word_mode
, tmp
), words
[1]);
34039 emit_move_insn (target
, tmp
);
34041 else if (n_words
== 4)
34043 rtx tmp
= gen_reg_rtx (V4SImode
);
34044 gcc_assert (word_mode
== SImode
);
34045 vals
= gen_rtx_PARALLEL (V4SImode
, gen_rtvec_v (4, words
));
34046 ix86_expand_vector_init_general (false, V4SImode
, tmp
, vals
);
34047 emit_move_insn (target
, gen_lowpart (mode
, tmp
));
34050 gcc_unreachable ();
34054 /* Initialize vector TARGET via VALS. Suppress the use of MMX
34055 instructions unless MMX_OK is true. */
34058 ix86_expand_vector_init (bool mmx_ok
, rtx target
, rtx vals
)
34060 enum machine_mode mode
= GET_MODE (target
);
34061 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
34062 int n_elts
= GET_MODE_NUNITS (mode
);
34063 int n_var
= 0, one_var
= -1;
34064 bool all_same
= true, all_const_zero
= true;
34068 for (i
= 0; i
< n_elts
; ++i
)
34070 x
= XVECEXP (vals
, 0, i
);
34071 if (!(CONST_INT_P (x
)
34072 || GET_CODE (x
) == CONST_DOUBLE
34073 || GET_CODE (x
) == CONST_FIXED
))
34074 n_var
++, one_var
= i
;
34075 else if (x
!= CONST0_RTX (inner_mode
))
34076 all_const_zero
= false;
34077 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
34081 /* Constants are best loaded from the constant pool. */
34084 emit_move_insn (target
, gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
34088 /* If all values are identical, broadcast the value. */
34090 && ix86_expand_vector_init_duplicate (mmx_ok
, mode
, target
,
34091 XVECEXP (vals
, 0, 0)))
34094 /* Values where only one field is non-constant are best loaded from
34095 the pool and overwritten via move later. */
34099 && ix86_expand_vector_init_one_nonzero (mmx_ok
, mode
, target
,
34100 XVECEXP (vals
, 0, one_var
),
34104 if (ix86_expand_vector_init_one_var (mmx_ok
, mode
, target
, vals
, one_var
))
34108 ix86_expand_vector_init_general (mmx_ok
, mode
, target
, vals
);
34112 ix86_expand_vector_set (bool mmx_ok
, rtx target
, rtx val
, int elt
)
34114 enum machine_mode mode
= GET_MODE (target
);
34115 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
34116 enum machine_mode half_mode
;
34117 bool use_vec_merge
= false;
34119 static rtx (*gen_extract
[6][2]) (rtx
, rtx
)
34121 { gen_vec_extract_lo_v32qi
, gen_vec_extract_hi_v32qi
},
34122 { gen_vec_extract_lo_v16hi
, gen_vec_extract_hi_v16hi
},
34123 { gen_vec_extract_lo_v8si
, gen_vec_extract_hi_v8si
},
34124 { gen_vec_extract_lo_v4di
, gen_vec_extract_hi_v4di
},
34125 { gen_vec_extract_lo_v8sf
, gen_vec_extract_hi_v8sf
},
34126 { gen_vec_extract_lo_v4df
, gen_vec_extract_hi_v4df
}
34128 static rtx (*gen_insert
[6][2]) (rtx
, rtx
, rtx
)
34130 { gen_vec_set_lo_v32qi
, gen_vec_set_hi_v32qi
},
34131 { gen_vec_set_lo_v16hi
, gen_vec_set_hi_v16hi
},
34132 { gen_vec_set_lo_v8si
, gen_vec_set_hi_v8si
},
34133 { gen_vec_set_lo_v4di
, gen_vec_set_hi_v4di
},
34134 { gen_vec_set_lo_v8sf
, gen_vec_set_hi_v8sf
},
34135 { gen_vec_set_lo_v4df
, gen_vec_set_hi_v4df
}
34145 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
34146 ix86_expand_vector_extract (true, tmp
, target
, 1 - elt
);
34148 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
34150 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
34151 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
34157 use_vec_merge
= TARGET_SSE4_1
&& TARGET_64BIT
;
34161 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
34162 ix86_expand_vector_extract (false, tmp
, target
, 1 - elt
);
34164 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
34166 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
34167 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
34174 /* For the two element vectors, we implement a VEC_CONCAT with
34175 the extraction of the other element. */
34177 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (1 - elt
)));
34178 tmp
= gen_rtx_VEC_SELECT (inner_mode
, target
, tmp
);
34181 op0
= val
, op1
= tmp
;
34183 op0
= tmp
, op1
= val
;
34185 tmp
= gen_rtx_VEC_CONCAT (mode
, op0
, op1
);
34186 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
34191 use_vec_merge
= TARGET_SSE4_1
;
34198 use_vec_merge
= true;
34202 /* tmp = target = A B C D */
34203 tmp
= copy_to_reg (target
);
34204 /* target = A A B B */
34205 emit_insn (gen_vec_interleave_lowv4sf (target
, target
, target
));
34206 /* target = X A B B */
34207 ix86_expand_vector_set (false, target
, val
, 0);
34208 /* target = A X C D */
34209 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
34210 const1_rtx
, const0_rtx
,
34211 GEN_INT (2+4), GEN_INT (3+4)));
34215 /* tmp = target = A B C D */
34216 tmp
= copy_to_reg (target
);
34217 /* tmp = X B C D */
34218 ix86_expand_vector_set (false, tmp
, val
, 0);
34219 /* target = A B X D */
34220 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
34221 const0_rtx
, const1_rtx
,
34222 GEN_INT (0+4), GEN_INT (3+4)));
34226 /* tmp = target = A B C D */
34227 tmp
= copy_to_reg (target
);
34228 /* tmp = X B C D */
34229 ix86_expand_vector_set (false, tmp
, val
, 0);
34230 /* target = A B X D */
34231 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
34232 const0_rtx
, const1_rtx
,
34233 GEN_INT (2+4), GEN_INT (0+4)));
34237 gcc_unreachable ();
34242 use_vec_merge
= TARGET_SSE4_1
;
34246 /* Element 0 handled by vec_merge below. */
34249 use_vec_merge
= true;
34255 /* With SSE2, use integer shuffles to swap element 0 and ELT,
34256 store into element 0, then shuffle them back. */
34260 order
[0] = GEN_INT (elt
);
34261 order
[1] = const1_rtx
;
34262 order
[2] = const2_rtx
;
34263 order
[3] = GEN_INT (3);
34264 order
[elt
] = const0_rtx
;
34266 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
34267 order
[1], order
[2], order
[3]));
34269 ix86_expand_vector_set (false, target
, val
, 0);
34271 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
34272 order
[1], order
[2], order
[3]));
34276 /* For SSE1, we have to reuse the V4SF code. */
34277 ix86_expand_vector_set (false, gen_lowpart (V4SFmode
, target
),
34278 gen_lowpart (SFmode
, val
), elt
);
34283 use_vec_merge
= TARGET_SSE2
;
34286 use_vec_merge
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
34290 use_vec_merge
= TARGET_SSE4_1
;
34297 half_mode
= V16QImode
;
34303 half_mode
= V8HImode
;
34309 half_mode
= V4SImode
;
34315 half_mode
= V2DImode
;
34321 half_mode
= V4SFmode
;
34327 half_mode
= V2DFmode
;
34333 /* Compute offset. */
34337 gcc_assert (i
<= 1);
34339 /* Extract the half. */
34340 tmp
= gen_reg_rtx (half_mode
);
34341 emit_insn (gen_extract
[j
][i
] (tmp
, target
));
34343 /* Put val in tmp at elt. */
34344 ix86_expand_vector_set (false, tmp
, val
, elt
);
34347 emit_insn (gen_insert
[j
][i
] (target
, target
, tmp
));
34356 tmp
= gen_rtx_VEC_DUPLICATE (mode
, val
);
34357 tmp
= gen_rtx_VEC_MERGE (mode
, tmp
, target
, GEN_INT (1 << elt
));
34358 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
34362 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
), false);
34364 emit_move_insn (mem
, target
);
34366 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
34367 emit_move_insn (tmp
, val
);
34369 emit_move_insn (target
, mem
);
34374 ix86_expand_vector_extract (bool mmx_ok
, rtx target
, rtx vec
, int elt
)
34376 enum machine_mode mode
= GET_MODE (vec
);
34377 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
34378 bool use_vec_extr
= false;
34391 use_vec_extr
= true;
34395 use_vec_extr
= TARGET_SSE4_1
;
34407 tmp
= gen_reg_rtx (mode
);
34408 emit_insn (gen_sse_shufps_v4sf (tmp
, vec
, vec
,
34409 GEN_INT (elt
), GEN_INT (elt
),
34410 GEN_INT (elt
+4), GEN_INT (elt
+4)));
34414 tmp
= gen_reg_rtx (mode
);
34415 emit_insn (gen_vec_interleave_highv4sf (tmp
, vec
, vec
));
34419 gcc_unreachable ();
34422 use_vec_extr
= true;
34427 use_vec_extr
= TARGET_SSE4_1
;
34441 tmp
= gen_reg_rtx (mode
);
34442 emit_insn (gen_sse2_pshufd_1 (tmp
, vec
,
34443 GEN_INT (elt
), GEN_INT (elt
),
34444 GEN_INT (elt
), GEN_INT (elt
)));
34448 tmp
= gen_reg_rtx (mode
);
34449 emit_insn (gen_vec_interleave_highv4si (tmp
, vec
, vec
));
34453 gcc_unreachable ();
34456 use_vec_extr
= true;
34461 /* For SSE1, we have to reuse the V4SF code. */
34462 ix86_expand_vector_extract (false, gen_lowpart (SFmode
, target
),
34463 gen_lowpart (V4SFmode
, vec
), elt
);
34469 use_vec_extr
= TARGET_SSE2
;
34472 use_vec_extr
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
34476 use_vec_extr
= TARGET_SSE4_1
;
34482 tmp
= gen_reg_rtx (V4SFmode
);
34484 emit_insn (gen_vec_extract_lo_v8sf (tmp
, vec
));
34486 emit_insn (gen_vec_extract_hi_v8sf (tmp
, vec
));
34487 ix86_expand_vector_extract (false, target
, tmp
, elt
& 3);
34495 tmp
= gen_reg_rtx (V2DFmode
);
34497 emit_insn (gen_vec_extract_lo_v4df (tmp
, vec
));
34499 emit_insn (gen_vec_extract_hi_v4df (tmp
, vec
));
34500 ix86_expand_vector_extract (false, target
, tmp
, elt
& 1);
34508 tmp
= gen_reg_rtx (V16QImode
);
34510 emit_insn (gen_vec_extract_lo_v32qi (tmp
, vec
));
34512 emit_insn (gen_vec_extract_hi_v32qi (tmp
, vec
));
34513 ix86_expand_vector_extract (false, target
, tmp
, elt
& 15);
34521 tmp
= gen_reg_rtx (V8HImode
);
34523 emit_insn (gen_vec_extract_lo_v16hi (tmp
, vec
));
34525 emit_insn (gen_vec_extract_hi_v16hi (tmp
, vec
));
34526 ix86_expand_vector_extract (false, target
, tmp
, elt
& 7);
34534 tmp
= gen_reg_rtx (V4SImode
);
34536 emit_insn (gen_vec_extract_lo_v8si (tmp
, vec
));
34538 emit_insn (gen_vec_extract_hi_v8si (tmp
, vec
));
34539 ix86_expand_vector_extract (false, target
, tmp
, elt
& 3);
34547 tmp
= gen_reg_rtx (V2DImode
);
34549 emit_insn (gen_vec_extract_lo_v4di (tmp
, vec
));
34551 emit_insn (gen_vec_extract_hi_v4di (tmp
, vec
));
34552 ix86_expand_vector_extract (false, target
, tmp
, elt
& 1);
34558 /* ??? Could extract the appropriate HImode element and shift. */
34565 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (elt
)));
34566 tmp
= gen_rtx_VEC_SELECT (inner_mode
, vec
, tmp
);
34568 /* Let the rtl optimizers know about the zero extension performed. */
34569 if (inner_mode
== QImode
|| inner_mode
== HImode
)
34571 tmp
= gen_rtx_ZERO_EXTEND (SImode
, tmp
);
34572 target
= gen_lowpart (SImode
, target
);
34575 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
34579 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
), false);
34581 emit_move_insn (mem
, vec
);
34583 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
34584 emit_move_insn (target
, tmp
);
34588 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
34589 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
34590 The upper bits of DEST are undefined, though they shouldn't cause
34591 exceptions (some bits from src or all zeros are ok). */
34594 emit_reduc_half (rtx dest
, rtx src
, int i
)
34597 switch (GET_MODE (src
))
34601 tem
= gen_sse_movhlps (dest
, src
, src
);
34603 tem
= gen_sse_shufps_v4sf (dest
, src
, src
, const1_rtx
, const1_rtx
,
34604 GEN_INT (1 + 4), GEN_INT (1 + 4));
34607 tem
= gen_vec_interleave_highv2df (dest
, src
, src
);
34613 tem
= gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode
, dest
),
34614 gen_lowpart (V1TImode
, src
),
34619 tem
= gen_avx_vperm2f128v8sf3 (dest
, src
, src
, const1_rtx
);
34621 tem
= gen_avx_shufps256 (dest
, src
, src
,
34622 GEN_INT (i
== 128 ? 2 + (3 << 2) : 1));
34626 tem
= gen_avx_vperm2f128v4df3 (dest
, src
, src
, const1_rtx
);
34628 tem
= gen_avx_shufpd256 (dest
, src
, src
, const1_rtx
);
34635 tem
= gen_avx2_permv2ti (gen_lowpart (V4DImode
, dest
),
34636 gen_lowpart (V4DImode
, src
),
34637 gen_lowpart (V4DImode
, src
),
34640 tem
= gen_avx2_lshrv2ti3 (gen_lowpart (V2TImode
, dest
),
34641 gen_lowpart (V2TImode
, src
),
34645 gcc_unreachable ();
34650 /* Expand a vector reduction. FN is the binary pattern to reduce;
34651 DEST is the destination; IN is the input vector. */
34654 ix86_expand_reduc (rtx (*fn
) (rtx
, rtx
, rtx
), rtx dest
, rtx in
)
34656 rtx half
, dst
, vec
= in
;
34657 enum machine_mode mode
= GET_MODE (in
);
34660 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
34662 && mode
== V8HImode
34663 && fn
== gen_uminv8hi3
)
34665 emit_insn (gen_sse4_1_phminposuw (dest
, in
));
34669 for (i
= GET_MODE_BITSIZE (mode
);
34670 i
> GET_MODE_BITSIZE (GET_MODE_INNER (mode
));
34673 half
= gen_reg_rtx (mode
);
34674 emit_reduc_half (half
, vec
, i
);
34675 if (i
== GET_MODE_BITSIZE (GET_MODE_INNER (mode
)) * 2)
34678 dst
= gen_reg_rtx (mode
);
34679 emit_insn (fn (dst
, half
, vec
));
34684 /* Target hook for scalar_mode_supported_p. */
34686 ix86_scalar_mode_supported_p (enum machine_mode mode
)
34688 if (DECIMAL_FLOAT_MODE_P (mode
))
34689 return default_decimal_float_supported_p ();
34690 else if (mode
== TFmode
)
34693 return default_scalar_mode_supported_p (mode
);
34696 /* Implements target hook vector_mode_supported_p. */
34698 ix86_vector_mode_supported_p (enum machine_mode mode
)
34700 if (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
34702 if (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
34704 if (TARGET_AVX
&& VALID_AVX256_REG_MODE (mode
))
34706 if (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
))
34708 if (TARGET_3DNOW
&& VALID_MMX_REG_MODE_3DNOW (mode
))
34713 /* Target hook for c_mode_for_suffix. */
34714 static enum machine_mode
34715 ix86_c_mode_for_suffix (char suffix
)
34725 /* Worker function for TARGET_MD_ASM_CLOBBERS.
34727 We do this in the new i386 backend to maintain source compatibility
34728 with the old cc0-based compiler. */
34731 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED
,
34732 tree inputs ATTRIBUTE_UNUSED
,
34735 clobbers
= tree_cons (NULL_TREE
, build_string (5, "flags"),
34737 clobbers
= tree_cons (NULL_TREE
, build_string (4, "fpsr"),
34742 /* Implements target vector targetm.asm.encode_section_info. */
34744 static void ATTRIBUTE_UNUSED
34745 ix86_encode_section_info (tree decl
, rtx rtl
, int first
)
34747 default_encode_section_info (decl
, rtl
, first
);
34749 if (TREE_CODE (decl
) == VAR_DECL
34750 && (TREE_STATIC (decl
) || DECL_EXTERNAL (decl
))
34751 && ix86_in_large_data_p (decl
))
34752 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= SYMBOL_FLAG_FAR_ADDR
;
34755 /* Worker function for REVERSE_CONDITION. */
34758 ix86_reverse_condition (enum rtx_code code
, enum machine_mode mode
)
34760 return (mode
!= CCFPmode
&& mode
!= CCFPUmode
34761 ? reverse_condition (code
)
34762 : reverse_condition_maybe_unordered (code
));
34765 /* Output code to perform an x87 FP register move, from OPERANDS[1]
34769 output_387_reg_move (rtx insn
, rtx
*operands
)
34771 if (REG_P (operands
[0]))
34773 if (REG_P (operands
[1])
34774 && find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
34776 if (REGNO (operands
[0]) == FIRST_STACK_REG
)
34777 return output_387_ffreep (operands
, 0);
34778 return "fstp\t%y0";
34780 if (STACK_TOP_P (operands
[0]))
34781 return "fld%Z1\t%y1";
34784 else if (MEM_P (operands
[0]))
34786 gcc_assert (REG_P (operands
[1]));
34787 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
34788 return "fstp%Z0\t%y0";
34791 /* There is no non-popping store to memory for XFmode.
34792 So if we need one, follow the store with a load. */
34793 if (GET_MODE (operands
[0]) == XFmode
)
34794 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
34796 return "fst%Z0\t%y0";
34803 /* Output code to perform a conditional jump to LABEL, if C2 flag in
34804 FP status register is set. */
34807 ix86_emit_fp_unordered_jump (rtx label
)
34809 rtx reg
= gen_reg_rtx (HImode
);
34812 emit_insn (gen_x86_fnstsw_1 (reg
));
34814 if (TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_insn_for_size_p ()))
34816 emit_insn (gen_x86_sahf_1 (reg
));
34818 temp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
34819 temp
= gen_rtx_UNORDERED (VOIDmode
, temp
, const0_rtx
);
34823 emit_insn (gen_testqi_ext_ccno_0 (reg
, GEN_INT (0x04)));
34825 temp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
34826 temp
= gen_rtx_NE (VOIDmode
, temp
, const0_rtx
);
34829 temp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, temp
,
34830 gen_rtx_LABEL_REF (VOIDmode
, label
),
34832 temp
= gen_rtx_SET (VOIDmode
, pc_rtx
, temp
);
34834 emit_jump_insn (temp
);
34835 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
34838 /* Output code to perform a log1p XFmode calculation. */
34840 void ix86_emit_i387_log1p (rtx op0
, rtx op1
)
34842 rtx label1
= gen_label_rtx ();
34843 rtx label2
= gen_label_rtx ();
34845 rtx tmp
= gen_reg_rtx (XFmode
);
34846 rtx tmp2
= gen_reg_rtx (XFmode
);
34849 emit_insn (gen_absxf2 (tmp
, op1
));
34850 test
= gen_rtx_GE (VOIDmode
, tmp
,
34851 CONST_DOUBLE_FROM_REAL_VALUE (
34852 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode
),
34854 emit_jump_insn (gen_cbranchxf4 (test
, XEXP (test
, 0), XEXP (test
, 1), label1
));
34856 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
34857 emit_insn (gen_fyl2xp1xf3_i387 (op0
, op1
, tmp2
));
34858 emit_jump (label2
);
34860 emit_label (label1
);
34861 emit_move_insn (tmp
, CONST1_RTX (XFmode
));
34862 emit_insn (gen_addxf3 (tmp
, op1
, tmp
));
34863 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
34864 emit_insn (gen_fyl2xxf3_i387 (op0
, tmp
, tmp2
));
34866 emit_label (label2
);
34869 /* Emit code for round calculation. */
34870 void ix86_emit_i387_round (rtx op0
, rtx op1
)
34872 enum machine_mode inmode
= GET_MODE (op1
);
34873 enum machine_mode outmode
= GET_MODE (op0
);
34874 rtx e1
, e2
, res
, tmp
, tmp1
, half
;
34875 rtx scratch
= gen_reg_rtx (HImode
);
34876 rtx flags
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
34877 rtx jump_label
= gen_label_rtx ();
34879 rtx (*gen_abs
) (rtx
, rtx
);
34880 rtx (*gen_neg
) (rtx
, rtx
);
34885 gen_abs
= gen_abssf2
;
34888 gen_abs
= gen_absdf2
;
34891 gen_abs
= gen_absxf2
;
34894 gcc_unreachable ();
34900 gen_neg
= gen_negsf2
;
34903 gen_neg
= gen_negdf2
;
34906 gen_neg
= gen_negxf2
;
34909 gen_neg
= gen_neghi2
;
34912 gen_neg
= gen_negsi2
;
34915 gen_neg
= gen_negdi2
;
34918 gcc_unreachable ();
34921 e1
= gen_reg_rtx (inmode
);
34922 e2
= gen_reg_rtx (inmode
);
34923 res
= gen_reg_rtx (outmode
);
34925 half
= CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf
, inmode
);
34927 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
34929 /* scratch = fxam(op1) */
34930 emit_insn (gen_rtx_SET (VOIDmode
, scratch
,
34931 gen_rtx_UNSPEC (HImode
, gen_rtvec (1, op1
),
34933 /* e1 = fabs(op1) */
34934 emit_insn (gen_abs (e1
, op1
));
34936 /* e2 = e1 + 0.5 */
34937 half
= force_reg (inmode
, half
);
34938 emit_insn (gen_rtx_SET (VOIDmode
, e2
,
34939 gen_rtx_PLUS (inmode
, e1
, half
)));
34941 /* res = floor(e2) */
34942 if (inmode
!= XFmode
)
34944 tmp1
= gen_reg_rtx (XFmode
);
34946 emit_insn (gen_rtx_SET (VOIDmode
, tmp1
,
34947 gen_rtx_FLOAT_EXTEND (XFmode
, e2
)));
34957 rtx tmp0
= gen_reg_rtx (XFmode
);
34959 emit_insn (gen_frndintxf2_floor (tmp0
, tmp1
));
34961 emit_insn (gen_rtx_SET (VOIDmode
, res
,
34962 gen_rtx_UNSPEC (outmode
, gen_rtvec (1, tmp0
),
34963 UNSPEC_TRUNC_NOOP
)));
34967 emit_insn (gen_frndintxf2_floor (res
, tmp1
));
34970 emit_insn (gen_lfloorxfhi2 (res
, tmp1
));
34973 emit_insn (gen_lfloorxfsi2 (res
, tmp1
));
34976 emit_insn (gen_lfloorxfdi2 (res
, tmp1
));
34979 gcc_unreachable ();
34982 /* flags = signbit(a) */
34983 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x02)));
34985 /* if (flags) then res = -res */
34986 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
,
34987 gen_rtx_EQ (VOIDmode
, flags
, const0_rtx
),
34988 gen_rtx_LABEL_REF (VOIDmode
, jump_label
),
34990 insn
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
34991 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
34992 JUMP_LABEL (insn
) = jump_label
;
34994 emit_insn (gen_neg (res
, res
));
34996 emit_label (jump_label
);
34997 LABEL_NUSES (jump_label
) = 1;
34999 emit_move_insn (op0
, res
);
35002 /* Output code to perform a Newton-Rhapson approximation of a single precision
35003 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
35005 void ix86_emit_swdivsf (rtx res
, rtx a
, rtx b
, enum machine_mode mode
)
35007 rtx x0
, x1
, e0
, e1
;
35009 x0
= gen_reg_rtx (mode
);
35010 e0
= gen_reg_rtx (mode
);
35011 e1
= gen_reg_rtx (mode
);
35012 x1
= gen_reg_rtx (mode
);
35014 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
35016 b
= force_reg (mode
, b
);
35018 /* x0 = rcp(b) estimate */
35019 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
35020 gen_rtx_UNSPEC (mode
, gen_rtvec (1, b
),
35023 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
35024 gen_rtx_MULT (mode
, x0
, b
)));
35027 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
35028 gen_rtx_MULT (mode
, x0
, e0
)));
35031 emit_insn (gen_rtx_SET (VOIDmode
, e1
,
35032 gen_rtx_PLUS (mode
, x0
, x0
)));
35035 emit_insn (gen_rtx_SET (VOIDmode
, x1
,
35036 gen_rtx_MINUS (mode
, e1
, e0
)));
35039 emit_insn (gen_rtx_SET (VOIDmode
, res
,
35040 gen_rtx_MULT (mode
, a
, x1
)));
35043 /* Output code to perform a Newton-Rhapson approximation of a
35044 single precision floating point [reciprocal] square root. */
35046 void ix86_emit_swsqrtsf (rtx res
, rtx a
, enum machine_mode mode
,
35049 rtx x0
, e0
, e1
, e2
, e3
, mthree
, mhalf
;
35052 x0
= gen_reg_rtx (mode
);
35053 e0
= gen_reg_rtx (mode
);
35054 e1
= gen_reg_rtx (mode
);
35055 e2
= gen_reg_rtx (mode
);
35056 e3
= gen_reg_rtx (mode
);
35058 real_from_integer (&r
, VOIDmode
, -3, -1, 0);
35059 mthree
= CONST_DOUBLE_FROM_REAL_VALUE (r
, SFmode
);
35061 real_arithmetic (&r
, NEGATE_EXPR
, &dconsthalf
, NULL
);
35062 mhalf
= CONST_DOUBLE_FROM_REAL_VALUE (r
, SFmode
);
35064 if (VECTOR_MODE_P (mode
))
35066 mthree
= ix86_build_const_vector (mode
, true, mthree
);
35067 mhalf
= ix86_build_const_vector (mode
, true, mhalf
);
35070 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
35071 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
35073 a
= force_reg (mode
, a
);
35075 /* x0 = rsqrt(a) estimate */
35076 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
35077 gen_rtx_UNSPEC (mode
, gen_rtvec (1, a
),
35080 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
35085 zero
= gen_reg_rtx (mode
);
35086 mask
= gen_reg_rtx (mode
);
35088 zero
= force_reg (mode
, CONST0_RTX(mode
));
35089 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
35090 gen_rtx_NE (mode
, zero
, a
)));
35092 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
35093 gen_rtx_AND (mode
, x0
, mask
)));
35097 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
35098 gen_rtx_MULT (mode
, x0
, a
)));
35100 emit_insn (gen_rtx_SET (VOIDmode
, e1
,
35101 gen_rtx_MULT (mode
, e0
, x0
)));
35104 mthree
= force_reg (mode
, mthree
);
35105 emit_insn (gen_rtx_SET (VOIDmode
, e2
,
35106 gen_rtx_PLUS (mode
, e1
, mthree
)));
35108 mhalf
= force_reg (mode
, mhalf
);
35110 /* e3 = -.5 * x0 */
35111 emit_insn (gen_rtx_SET (VOIDmode
, e3
,
35112 gen_rtx_MULT (mode
, x0
, mhalf
)));
35114 /* e3 = -.5 * e0 */
35115 emit_insn (gen_rtx_SET (VOIDmode
, e3
,
35116 gen_rtx_MULT (mode
, e0
, mhalf
)));
35117 /* ret = e2 * e3 */
35118 emit_insn (gen_rtx_SET (VOIDmode
, res
,
35119 gen_rtx_MULT (mode
, e2
, e3
)));
35122 #ifdef TARGET_SOLARIS
35123 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
35126 i386_solaris_elf_named_section (const char *name
, unsigned int flags
,
35129 /* With Binutils 2.15, the "@unwind" marker must be specified on
35130 every occurrence of the ".eh_frame" section, not just the first
35133 && strcmp (name
, ".eh_frame") == 0)
35135 fprintf (asm_out_file
, "\t.section\t%s,\"%s\",@unwind\n", name
,
35136 flags
& SECTION_WRITE
? "aw" : "a");
35141 if (HAVE_COMDAT_GROUP
&& flags
& SECTION_LINKONCE
)
35143 solaris_elf_asm_comdat_section (name
, flags
, decl
);
35148 default_elf_asm_named_section (name
, flags
, decl
);
35150 #endif /* TARGET_SOLARIS */
35152 /* Return the mangling of TYPE if it is an extended fundamental type. */
35154 static const char *
35155 ix86_mangle_type (const_tree type
)
35157 type
= TYPE_MAIN_VARIANT (type
);
35159 if (TREE_CODE (type
) != VOID_TYPE
&& TREE_CODE (type
) != BOOLEAN_TYPE
35160 && TREE_CODE (type
) != INTEGER_TYPE
&& TREE_CODE (type
) != REAL_TYPE
)
35163 switch (TYPE_MODE (type
))
35166 /* __float128 is "g". */
35169 /* "long double" or __float80 is "e". */
35176 /* For 32-bit code we can save PIC register setup by using
35177 __stack_chk_fail_local hidden function instead of calling
35178 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
35179 register, so it is better to call __stack_chk_fail directly. */
35181 static tree ATTRIBUTE_UNUSED
35182 ix86_stack_protect_fail (void)
35184 return TARGET_64BIT
35185 ? default_external_stack_protect_fail ()
35186 : default_hidden_stack_protect_fail ();
35189 /* Select a format to encode pointers in exception handling data. CODE
35190 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
35191 true if the symbol may be affected by dynamic relocations.
35193 ??? All x86 object file formats are capable of representing this.
35194 After all, the relocation needed is the same as for the call insn.
35195 Whether or not a particular assembler allows us to enter such, I
35196 guess we'll have to see. */
35198 asm_preferred_eh_data_format (int code
, int global
)
35202 int type
= DW_EH_PE_sdata8
;
35204 || ix86_cmodel
== CM_SMALL_PIC
35205 || (ix86_cmodel
== CM_MEDIUM_PIC
&& (global
|| code
)))
35206 type
= DW_EH_PE_sdata4
;
35207 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
35209 if (ix86_cmodel
== CM_SMALL
35210 || (ix86_cmodel
== CM_MEDIUM
&& code
))
35211 return DW_EH_PE_udata4
;
35212 return DW_EH_PE_absptr
;
35215 /* Expand copysign from SIGN to the positive value ABS_VALUE
35216 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
35219 ix86_sse_copysign_to_positive (rtx result
, rtx abs_value
, rtx sign
, rtx mask
)
35221 enum machine_mode mode
= GET_MODE (sign
);
35222 rtx sgn
= gen_reg_rtx (mode
);
35223 if (mask
== NULL_RTX
)
35225 enum machine_mode vmode
;
35227 if (mode
== SFmode
)
35229 else if (mode
== DFmode
)
35234 mask
= ix86_build_signbit_mask (vmode
, VECTOR_MODE_P (mode
), false);
35235 if (!VECTOR_MODE_P (mode
))
35237 /* We need to generate a scalar mode mask in this case. */
35238 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
35239 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
35240 mask
= gen_reg_rtx (mode
);
35241 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
35245 mask
= gen_rtx_NOT (mode
, mask
);
35246 emit_insn (gen_rtx_SET (VOIDmode
, sgn
,
35247 gen_rtx_AND (mode
, mask
, sign
)));
35248 emit_insn (gen_rtx_SET (VOIDmode
, result
,
35249 gen_rtx_IOR (mode
, abs_value
, sgn
)));
35252 /* Expand fabs (OP0) and return a new rtx that holds the result. The
35253 mask for masking out the sign-bit is stored in *SMASK, if that is
35256 ix86_expand_sse_fabs (rtx op0
, rtx
*smask
)
35258 enum machine_mode vmode
, mode
= GET_MODE (op0
);
35261 xa
= gen_reg_rtx (mode
);
35262 if (mode
== SFmode
)
35264 else if (mode
== DFmode
)
35268 mask
= ix86_build_signbit_mask (vmode
, VECTOR_MODE_P (mode
), true);
35269 if (!VECTOR_MODE_P (mode
))
35271 /* We need to generate a scalar mode mask in this case. */
35272 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
35273 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
35274 mask
= gen_reg_rtx (mode
);
35275 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
35277 emit_insn (gen_rtx_SET (VOIDmode
, xa
,
35278 gen_rtx_AND (mode
, op0
, mask
)));
35286 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
35287 swapping the operands if SWAP_OPERANDS is true. The expanded
35288 code is a forward jump to a newly created label in case the
35289 comparison is true. The generated label rtx is returned. */
35291 ix86_expand_sse_compare_and_jump (enum rtx_code code
, rtx op0
, rtx op1
,
35292 bool swap_operands
)
35303 label
= gen_label_rtx ();
35304 tmp
= gen_rtx_REG (CCFPUmode
, FLAGS_REG
);
35305 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
35306 gen_rtx_COMPARE (CCFPUmode
, op0
, op1
)));
35307 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
, tmp
, const0_rtx
);
35308 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
35309 gen_rtx_LABEL_REF (VOIDmode
, label
), pc_rtx
);
35310 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
35311 JUMP_LABEL (tmp
) = label
;
35316 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
35317 using comparison code CODE. Operands are swapped for the comparison if
35318 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
35320 ix86_expand_sse_compare_mask (enum rtx_code code
, rtx op0
, rtx op1
,
35321 bool swap_operands
)
35323 rtx (*insn
)(rtx
, rtx
, rtx
, rtx
);
35324 enum machine_mode mode
= GET_MODE (op0
);
35325 rtx mask
= gen_reg_rtx (mode
);
35334 insn
= mode
== DFmode
? gen_setcc_df_sse
: gen_setcc_sf_sse
;
35336 emit_insn (insn (mask
, op0
, op1
,
35337 gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
35341 /* Generate and return a rtx of mode MODE for 2**n where n is the number
35342 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
35344 ix86_gen_TWO52 (enum machine_mode mode
)
35346 REAL_VALUE_TYPE TWO52r
;
35349 real_ldexp (&TWO52r
, &dconst1
, mode
== DFmode
? 52 : 23);
35350 TWO52
= const_double_from_real_value (TWO52r
, mode
);
35351 TWO52
= force_reg (mode
, TWO52
);
35356 /* Expand SSE sequence for computing lround from OP1 storing
35359 ix86_expand_lround (rtx op0
, rtx op1
)
35361 /* C code for the stuff we're doing below:
35362 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
35365 enum machine_mode mode
= GET_MODE (op1
);
35366 const struct real_format
*fmt
;
35367 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
35370 /* load nextafter (0.5, 0.0) */
35371 fmt
= REAL_MODE_FORMAT (mode
);
35372 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
35373 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
35375 /* adj = copysign (0.5, op1) */
35376 adj
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
35377 ix86_sse_copysign_to_positive (adj
, adj
, force_reg (mode
, op1
), NULL_RTX
);
35379 /* adj = op1 + adj */
35380 adj
= expand_simple_binop (mode
, PLUS
, adj
, op1
, NULL_RTX
, 0, OPTAB_DIRECT
);
35382 /* op0 = (imode)adj */
35383 expand_fix (op0
, adj
, 0);
35386 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
35389 ix86_expand_lfloorceil (rtx op0
, rtx op1
, bool do_floor
)
35391 /* C code for the stuff we're doing below (for do_floor):
35393 xi -= (double)xi > op1 ? 1 : 0;
35396 enum machine_mode fmode
= GET_MODE (op1
);
35397 enum machine_mode imode
= GET_MODE (op0
);
35398 rtx ireg
, freg
, label
, tmp
;
35400 /* reg = (long)op1 */
35401 ireg
= gen_reg_rtx (imode
);
35402 expand_fix (ireg
, op1
, 0);
35404 /* freg = (double)reg */
35405 freg
= gen_reg_rtx (fmode
);
35406 expand_float (freg
, ireg
, 0);
35408 /* ireg = (freg > op1) ? ireg - 1 : ireg */
35409 label
= ix86_expand_sse_compare_and_jump (UNLE
,
35410 freg
, op1
, !do_floor
);
35411 tmp
= expand_simple_binop (imode
, do_floor
? MINUS
: PLUS
,
35412 ireg
, const1_rtx
, NULL_RTX
, 0, OPTAB_DIRECT
);
35413 emit_move_insn (ireg
, tmp
);
35415 emit_label (label
);
35416 LABEL_NUSES (label
) = 1;
35418 emit_move_insn (op0
, ireg
);
35421 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
35422 result in OPERAND0. */
35424 ix86_expand_rint (rtx operand0
, rtx operand1
)
35426 /* C code for the stuff we're doing below:
35427 xa = fabs (operand1);
35428 if (!isless (xa, 2**52))
35430 xa = xa + 2**52 - 2**52;
35431 return copysign (xa, operand1);
35433 enum machine_mode mode
= GET_MODE (operand0
);
35434 rtx res
, xa
, label
, TWO52
, mask
;
35436 res
= gen_reg_rtx (mode
);
35437 emit_move_insn (res
, operand1
);
35439 /* xa = abs (operand1) */
35440 xa
= ix86_expand_sse_fabs (res
, &mask
);
35442 /* if (!isless (xa, TWO52)) goto label; */
35443 TWO52
= ix86_gen_TWO52 (mode
);
35444 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
35446 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
35447 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
35449 ix86_sse_copysign_to_positive (res
, xa
, res
, mask
);
35451 emit_label (label
);
35452 LABEL_NUSES (label
) = 1;
35454 emit_move_insn (operand0
, res
);
35457 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
35460 ix86_expand_floorceildf_32 (rtx operand0
, rtx operand1
, bool do_floor
)
35462 /* C code for the stuff we expand below.
35463 double xa = fabs (x), x2;
35464 if (!isless (xa, TWO52))
35466 xa = xa + TWO52 - TWO52;
35467 x2 = copysign (xa, x);
35476 enum machine_mode mode
= GET_MODE (operand0
);
35477 rtx xa
, TWO52
, tmp
, label
, one
, res
, mask
;
35479 TWO52
= ix86_gen_TWO52 (mode
);
35481 /* Temporary for holding the result, initialized to the input
35482 operand to ease control flow. */
35483 res
= gen_reg_rtx (mode
);
35484 emit_move_insn (res
, operand1
);
35486 /* xa = abs (operand1) */
35487 xa
= ix86_expand_sse_fabs (res
, &mask
);
35489 /* if (!isless (xa, TWO52)) goto label; */
35490 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
35492 /* xa = xa + TWO52 - TWO52; */
35493 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
35494 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
35496 /* xa = copysign (xa, operand1) */
35497 ix86_sse_copysign_to_positive (xa
, xa
, res
, mask
);
35499 /* generate 1.0 or -1.0 */
35500 one
= force_reg (mode
,
35501 const_double_from_real_value (do_floor
35502 ? dconst1
: dconstm1
, mode
));
35504 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
35505 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
35506 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
35507 gen_rtx_AND (mode
, one
, tmp
)));
35508 /* We always need to subtract here to preserve signed zero. */
35509 tmp
= expand_simple_binop (mode
, MINUS
,
35510 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
35511 emit_move_insn (res
, tmp
);
35513 emit_label (label
);
35514 LABEL_NUSES (label
) = 1;
35516 emit_move_insn (operand0
, res
);
35519 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
35522 ix86_expand_floorceil (rtx operand0
, rtx operand1
, bool do_floor
)
35524 /* C code for the stuff we expand below.
35525 double xa = fabs (x), x2;
35526 if (!isless (xa, TWO52))
35528 x2 = (double)(long)x;
35535 if (HONOR_SIGNED_ZEROS (mode))
35536 return copysign (x2, x);
35539 enum machine_mode mode
= GET_MODE (operand0
);
35540 rtx xa
, xi
, TWO52
, tmp
, label
, one
, res
, mask
;
35542 TWO52
= ix86_gen_TWO52 (mode
);
35544 /* Temporary for holding the result, initialized to the input
35545 operand to ease control flow. */
35546 res
= gen_reg_rtx (mode
);
35547 emit_move_insn (res
, operand1
);
35549 /* xa = abs (operand1) */
35550 xa
= ix86_expand_sse_fabs (res
, &mask
);
35552 /* if (!isless (xa, TWO52)) goto label; */
35553 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
35555 /* xa = (double)(long)x */
35556 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
35557 expand_fix (xi
, res
, 0);
35558 expand_float (xa
, xi
, 0);
35561 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
35563 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
35564 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
35565 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
35566 gen_rtx_AND (mode
, one
, tmp
)));
35567 tmp
= expand_simple_binop (mode
, do_floor
? MINUS
: PLUS
,
35568 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
35569 emit_move_insn (res
, tmp
);
35571 if (HONOR_SIGNED_ZEROS (mode
))
35572 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
35574 emit_label (label
);
35575 LABEL_NUSES (label
) = 1;
35577 emit_move_insn (operand0
, res
);
35580 /* Expand SSE sequence for computing round from OPERAND1 storing
35581 into OPERAND0. Sequence that works without relying on DImode truncation
35582 via cvttsd2siq that is only available on 64bit targets. */
35584 ix86_expand_rounddf_32 (rtx operand0
, rtx operand1
)
35586 /* C code for the stuff we expand below.
35587 double xa = fabs (x), xa2, x2;
35588 if (!isless (xa, TWO52))
35590 Using the absolute value and copying back sign makes
35591 -0.0 -> -0.0 correct.
35592 xa2 = xa + TWO52 - TWO52;
35597 else if (dxa > 0.5)
35599 x2 = copysign (xa2, x);
35602 enum machine_mode mode
= GET_MODE (operand0
);
35603 rtx xa
, xa2
, dxa
, TWO52
, tmp
, label
, half
, mhalf
, one
, res
, mask
;
35605 TWO52
= ix86_gen_TWO52 (mode
);
35607 /* Temporary for holding the result, initialized to the input
35608 operand to ease control flow. */
35609 res
= gen_reg_rtx (mode
);
35610 emit_move_insn (res
, operand1
);
35612 /* xa = abs (operand1) */
35613 xa
= ix86_expand_sse_fabs (res
, &mask
);
35615 /* if (!isless (xa, TWO52)) goto label; */
35616 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
35618 /* xa2 = xa + TWO52 - TWO52; */
35619 xa2
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
35620 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, TWO52
, xa2
, 0, OPTAB_DIRECT
);
35622 /* dxa = xa2 - xa; */
35623 dxa
= expand_simple_binop (mode
, MINUS
, xa2
, xa
, NULL_RTX
, 0, OPTAB_DIRECT
);
35625 /* generate 0.5, 1.0 and -0.5 */
35626 half
= force_reg (mode
, const_double_from_real_value (dconsthalf
, mode
));
35627 one
= expand_simple_binop (mode
, PLUS
, half
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
35628 mhalf
= expand_simple_binop (mode
, MINUS
, half
, one
, NULL_RTX
,
35632 tmp
= gen_reg_rtx (mode
);
35633 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
35634 tmp
= ix86_expand_sse_compare_mask (UNGT
, dxa
, half
, false);
35635 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
35636 gen_rtx_AND (mode
, one
, tmp
)));
35637 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
35638 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
35639 tmp
= ix86_expand_sse_compare_mask (UNGE
, mhalf
, dxa
, false);
35640 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
35641 gen_rtx_AND (mode
, one
, tmp
)));
35642 xa2
= expand_simple_binop (mode
, PLUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
35644 /* res = copysign (xa2, operand1) */
35645 ix86_sse_copysign_to_positive (res
, xa2
, force_reg (mode
, operand1
), mask
);
35647 emit_label (label
);
35648 LABEL_NUSES (label
) = 1;
35650 emit_move_insn (operand0
, res
);
35653 /* Expand SSE sequence for computing trunc from OPERAND1 storing
35656 ix86_expand_trunc (rtx operand0
, rtx operand1
)
35658 /* C code for SSE variant we expand below.
35659 double xa = fabs (x), x2;
35660 if (!isless (xa, TWO52))
35662 x2 = (double)(long)x;
35663 if (HONOR_SIGNED_ZEROS (mode))
35664 return copysign (x2, x);
35667 enum machine_mode mode
= GET_MODE (operand0
);
35668 rtx xa
, xi
, TWO52
, label
, res
, mask
;
35670 TWO52
= ix86_gen_TWO52 (mode
);
35672 /* Temporary for holding the result, initialized to the input
35673 operand to ease control flow. */
35674 res
= gen_reg_rtx (mode
);
35675 emit_move_insn (res
, operand1
);
35677 /* xa = abs (operand1) */
35678 xa
= ix86_expand_sse_fabs (res
, &mask
);
35680 /* if (!isless (xa, TWO52)) goto label; */
35681 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
35683 /* x = (double)(long)x */
35684 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
35685 expand_fix (xi
, res
, 0);
35686 expand_float (res
, xi
, 0);
35688 if (HONOR_SIGNED_ZEROS (mode
))
35689 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
35691 emit_label (label
);
35692 LABEL_NUSES (label
) = 1;
35694 emit_move_insn (operand0
, res
);
35697 /* Expand SSE sequence for computing trunc from OPERAND1 storing
35700 ix86_expand_truncdf_32 (rtx operand0
, rtx operand1
)
35702 enum machine_mode mode
= GET_MODE (operand0
);
35703 rtx xa
, mask
, TWO52
, label
, one
, res
, smask
, tmp
;
35705 /* C code for SSE variant we expand below.
35706 double xa = fabs (x), x2;
35707 if (!isless (xa, TWO52))
35709 xa2 = xa + TWO52 - TWO52;
35713 x2 = copysign (xa2, x);
35717 TWO52
= ix86_gen_TWO52 (mode
);
35719 /* Temporary for holding the result, initialized to the input
35720 operand to ease control flow. */
35721 res
= gen_reg_rtx (mode
);
35722 emit_move_insn (res
, operand1
);
35724 /* xa = abs (operand1) */
35725 xa
= ix86_expand_sse_fabs (res
, &smask
);
35727 /* if (!isless (xa, TWO52)) goto label; */
35728 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
35730 /* res = xa + TWO52 - TWO52; */
35731 tmp
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
35732 tmp
= expand_simple_binop (mode
, MINUS
, tmp
, TWO52
, tmp
, 0, OPTAB_DIRECT
);
35733 emit_move_insn (res
, tmp
);
35736 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
35738 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
35739 mask
= ix86_expand_sse_compare_mask (UNGT
, res
, xa
, false);
35740 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
35741 gen_rtx_AND (mode
, mask
, one
)));
35742 tmp
= expand_simple_binop (mode
, MINUS
,
35743 res
, mask
, NULL_RTX
, 0, OPTAB_DIRECT
);
35744 emit_move_insn (res
, tmp
);
35746 /* res = copysign (res, operand1) */
35747 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), smask
);
35749 emit_label (label
);
35750 LABEL_NUSES (label
) = 1;
35752 emit_move_insn (operand0
, res
);
35755 /* Expand SSE sequence for computing round from OPERAND1 storing
35758 ix86_expand_round (rtx operand0
, rtx operand1
)
35760 /* C code for the stuff we're doing below:
35761 double xa = fabs (x);
35762 if (!isless (xa, TWO52))
35764 xa = (double)(long)(xa + nextafter (0.5, 0.0));
35765 return copysign (xa, x);
35767 enum machine_mode mode
= GET_MODE (operand0
);
35768 rtx res
, TWO52
, xa
, label
, xi
, half
, mask
;
35769 const struct real_format
*fmt
;
35770 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
35772 /* Temporary for holding the result, initialized to the input
35773 operand to ease control flow. */
35774 res
= gen_reg_rtx (mode
);
35775 emit_move_insn (res
, operand1
);
35777 TWO52
= ix86_gen_TWO52 (mode
);
35778 xa
= ix86_expand_sse_fabs (res
, &mask
);
35779 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
35781 /* load nextafter (0.5, 0.0) */
35782 fmt
= REAL_MODE_FORMAT (mode
);
35783 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
35784 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
35786 /* xa = xa + 0.5 */
35787 half
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
35788 xa
= expand_simple_binop (mode
, PLUS
, xa
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
35790 /* xa = (double)(int64_t)xa */
35791 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
35792 expand_fix (xi
, xa
, 0);
35793 expand_float (xa
, xi
, 0);
35795 /* res = copysign (xa, operand1) */
35796 ix86_sse_copysign_to_positive (res
, xa
, force_reg (mode
, operand1
), mask
);
35798 emit_label (label
);
35799 LABEL_NUSES (label
) = 1;
35801 emit_move_insn (operand0
, res
);
35804 /* Expand SSE sequence for computing round
35805 from OP1 storing into OP0 using sse4 round insn. */
35807 ix86_expand_round_sse4 (rtx op0
, rtx op1
)
35809 enum machine_mode mode
= GET_MODE (op0
);
35810 rtx e1
, e2
, res
, half
;
35811 const struct real_format
*fmt
;
35812 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
35813 rtx (*gen_copysign
) (rtx
, rtx
, rtx
);
35814 rtx (*gen_round
) (rtx
, rtx
, rtx
);
35819 gen_copysign
= gen_copysignsf3
;
35820 gen_round
= gen_sse4_1_roundsf2
;
35823 gen_copysign
= gen_copysigndf3
;
35824 gen_round
= gen_sse4_1_rounddf2
;
35827 gcc_unreachable ();
35830 /* round (a) = trunc (a + copysign (0.5, a)) */
35832 /* load nextafter (0.5, 0.0) */
35833 fmt
= REAL_MODE_FORMAT (mode
);
35834 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
35835 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
35836 half
= const_double_from_real_value (pred_half
, mode
);
35838 /* e1 = copysign (0.5, op1) */
35839 e1
= gen_reg_rtx (mode
);
35840 emit_insn (gen_copysign (e1
, half
, op1
));
35842 /* e2 = op1 + e1 */
35843 e2
= expand_simple_binop (mode
, PLUS
, op1
, e1
, NULL_RTX
, 0, OPTAB_DIRECT
);
35845 /* res = trunc (e2) */
35846 res
= gen_reg_rtx (mode
);
35847 emit_insn (gen_round (res
, e2
, GEN_INT (ROUND_TRUNC
)));
35849 emit_move_insn (op0
, res
);
35853 /* Table of valid machine attributes. */
35854 static const struct attribute_spec ix86_attribute_table
[] =
35856 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
35857 affects_type_identity } */
35858 /* Stdcall attribute says callee is responsible for popping arguments
35859 if they are not variable. */
35860 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
35862 /* Fastcall attribute says callee is responsible for popping arguments
35863 if they are not variable. */
35864 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
35866 /* Thiscall attribute says callee is responsible for popping arguments
35867 if they are not variable. */
35868 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
35870 /* Cdecl attribute says the callee is a normal C declaration */
35871 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
35873 /* Regparm attribute specifies how many integer arguments are to be
35874 passed in registers. */
35875 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute
,
35877 /* Sseregparm attribute says we are using x86_64 calling conventions
35878 for FP arguments. */
35879 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
35881 /* The transactional memory builtins are implicitly regparm or fastcall
35882 depending on the ABI. Override the generic do-nothing attribute that
35883 these builtins were declared with. */
35884 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute
,
35886 /* force_align_arg_pointer says this function realigns the stack at entry. */
35887 { (const char *)&ix86_force_align_arg_pointer_string
, 0, 0,
35888 false, true, true, ix86_handle_cconv_attribute
, false },
35889 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
35890 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
, false },
35891 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
, false },
35892 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute
,
35895 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
,
35897 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
,
35899 #ifdef SUBTARGET_ATTRIBUTE_TABLE
35900 SUBTARGET_ATTRIBUTE_TABLE
,
35902 /* ms_abi and sysv_abi calling convention function attributes. */
35903 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute
, true },
35904 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute
, true },
35905 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute
,
35907 { "callee_pop_aggregate_return", 1, 1, false, true, true,
35908 ix86_handle_callee_pop_aggregate_return
, true },
35910 { NULL
, 0, 0, false, false, false, NULL
, false }
35913 /* Implement targetm.vectorize.builtin_vectorization_cost. */
35915 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
35916 tree vectype ATTRIBUTE_UNUSED
,
35917 int misalign ATTRIBUTE_UNUSED
)
35919 switch (type_of_cost
)
35922 return ix86_cost
->scalar_stmt_cost
;
35925 return ix86_cost
->scalar_load_cost
;
35928 return ix86_cost
->scalar_store_cost
;
35931 return ix86_cost
->vec_stmt_cost
;
35934 return ix86_cost
->vec_align_load_cost
;
35937 return ix86_cost
->vec_store_cost
;
35939 case vec_to_scalar
:
35940 return ix86_cost
->vec_to_scalar_cost
;
35942 case scalar_to_vec
:
35943 return ix86_cost
->scalar_to_vec_cost
;
35945 case unaligned_load
:
35946 case unaligned_store
:
35947 return ix86_cost
->vec_unalign_load_cost
;
35949 case cond_branch_taken
:
35950 return ix86_cost
->cond_taken_branch_cost
;
35952 case cond_branch_not_taken
:
35953 return ix86_cost
->cond_not_taken_branch_cost
;
35959 gcc_unreachable ();
35964 /* Return a vector mode with twice as many elements as VMODE. */
35965 /* ??? Consider moving this to a table generated by genmodes.c. */
35967 static enum machine_mode
35968 doublesize_vector_mode (enum machine_mode vmode
)
35972 case V2SFmode
: return V4SFmode
;
35973 case V1DImode
: return V2DImode
;
35974 case V2SImode
: return V4SImode
;
35975 case V4HImode
: return V8HImode
;
35976 case V8QImode
: return V16QImode
;
35978 case V2DFmode
: return V4DFmode
;
35979 case V4SFmode
: return V8SFmode
;
35980 case V2DImode
: return V4DImode
;
35981 case V4SImode
: return V8SImode
;
35982 case V8HImode
: return V16HImode
;
35983 case V16QImode
: return V32QImode
;
35985 case V4DFmode
: return V8DFmode
;
35986 case V8SFmode
: return V16SFmode
;
35987 case V4DImode
: return V8DImode
;
35988 case V8SImode
: return V16SImode
;
35989 case V16HImode
: return V32HImode
;
35990 case V32QImode
: return V64QImode
;
35993 gcc_unreachable ();
35997 /* Construct (set target (vec_select op0 (parallel perm))) and
35998 return true if that's a valid instruction in the active ISA. */
36001 expand_vselect (rtx target
, rtx op0
, const unsigned char *perm
, unsigned nelt
)
36003 rtx rperm
[MAX_VECT_LEN
], x
;
36006 for (i
= 0; i
< nelt
; ++i
)
36007 rperm
[i
] = GEN_INT (perm
[i
]);
36009 x
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (nelt
, rperm
));
36010 x
= gen_rtx_VEC_SELECT (GET_MODE (target
), op0
, x
);
36011 x
= gen_rtx_SET (VOIDmode
, target
, x
);
36014 if (recog_memoized (x
) < 0)
36022 /* Similar, but generate a vec_concat from op0 and op1 as well. */
36025 expand_vselect_vconcat (rtx target
, rtx op0
, rtx op1
,
36026 const unsigned char *perm
, unsigned nelt
)
36028 enum machine_mode v2mode
;
36031 v2mode
= doublesize_vector_mode (GET_MODE (op0
));
36032 x
= gen_rtx_VEC_CONCAT (v2mode
, op0
, op1
);
36033 return expand_vselect (target
, x
, perm
, nelt
);
36036 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
36037 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
36040 expand_vec_perm_blend (struct expand_vec_perm_d
*d
)
36042 enum machine_mode vmode
= d
->vmode
;
36043 unsigned i
, mask
, nelt
= d
->nelt
;
36044 rtx target
, op0
, op1
, x
;
36045 rtx rperm
[32], vperm
;
36047 if (d
->op0
== d
->op1
)
36049 if (TARGET_AVX2
&& GET_MODE_SIZE (vmode
) == 32)
36051 else if (TARGET_AVX
&& (vmode
== V4DFmode
|| vmode
== V8SFmode
))
36053 else if (TARGET_SSE4_1
&& GET_MODE_SIZE (vmode
) == 16)
36058 /* This is a blend, not a permute. Elements must stay in their
36059 respective lanes. */
36060 for (i
= 0; i
< nelt
; ++i
)
36062 unsigned e
= d
->perm
[i
];
36063 if (!(e
== i
|| e
== i
+ nelt
))
36070 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
36071 decision should be extracted elsewhere, so that we only try that
36072 sequence once all budget==3 options have been tried. */
36073 target
= d
->target
;
36086 for (i
= 0; i
< nelt
; ++i
)
36087 mask
|= (d
->perm
[i
] >= nelt
) << i
;
36091 for (i
= 0; i
< 2; ++i
)
36092 mask
|= (d
->perm
[i
] >= 2 ? 15 : 0) << (i
* 4);
36097 for (i
= 0; i
< 4; ++i
)
36098 mask
|= (d
->perm
[i
] >= 4 ? 3 : 0) << (i
* 2);
36103 /* See if bytes move in pairs so we can use pblendw with
36104 an immediate argument, rather than pblendvb with a vector
36106 for (i
= 0; i
< 16; i
+= 2)
36107 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
36110 for (i
= 0; i
< nelt
; ++i
)
36111 rperm
[i
] = (d
->perm
[i
] < nelt
? const0_rtx
: constm1_rtx
);
36114 vperm
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
36115 vperm
= force_reg (vmode
, vperm
);
36117 if (GET_MODE_SIZE (vmode
) == 16)
36118 emit_insn (gen_sse4_1_pblendvb (target
, op0
, op1
, vperm
));
36120 emit_insn (gen_avx2_pblendvb (target
, op0
, op1
, vperm
));
36124 for (i
= 0; i
< 8; ++i
)
36125 mask
|= (d
->perm
[i
* 2] >= 16) << i
;
36130 target
= gen_lowpart (vmode
, target
);
36131 op0
= gen_lowpart (vmode
, op0
);
36132 op1
= gen_lowpart (vmode
, op1
);
36136 /* See if bytes move in pairs. If not, vpblendvb must be used. */
36137 for (i
= 0; i
< 32; i
+= 2)
36138 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
36140 /* See if bytes move in quadruplets. If yes, vpblendd
36141 with immediate can be used. */
36142 for (i
= 0; i
< 32; i
+= 4)
36143 if (d
->perm
[i
] + 2 != d
->perm
[i
+ 2])
36147 /* See if bytes move the same in both lanes. If yes,
36148 vpblendw with immediate can be used. */
36149 for (i
= 0; i
< 16; i
+= 2)
36150 if (d
->perm
[i
] + 16 != d
->perm
[i
+ 16])
36153 /* Use vpblendw. */
36154 for (i
= 0; i
< 16; ++i
)
36155 mask
|= (d
->perm
[i
* 2] >= 32) << i
;
36160 /* Use vpblendd. */
36161 for (i
= 0; i
< 8; ++i
)
36162 mask
|= (d
->perm
[i
* 4] >= 32) << i
;
36167 /* See if words move in pairs. If yes, vpblendd can be used. */
36168 for (i
= 0; i
< 16; i
+= 2)
36169 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
36173 /* See if words move the same in both lanes. If not,
36174 vpblendvb must be used. */
36175 for (i
= 0; i
< 8; i
++)
36176 if (d
->perm
[i
] + 8 != d
->perm
[i
+ 8])
36178 /* Use vpblendvb. */
36179 for (i
= 0; i
< 32; ++i
)
36180 rperm
[i
] = (d
->perm
[i
/ 2] < 16 ? const0_rtx
: constm1_rtx
);
36184 target
= gen_lowpart (vmode
, target
);
36185 op0
= gen_lowpart (vmode
, op0
);
36186 op1
= gen_lowpart (vmode
, op1
);
36187 goto finish_pblendvb
;
36190 /* Use vpblendw. */
36191 for (i
= 0; i
< 16; ++i
)
36192 mask
|= (d
->perm
[i
] >= 16) << i
;
36196 /* Use vpblendd. */
36197 for (i
= 0; i
< 8; ++i
)
36198 mask
|= (d
->perm
[i
* 2] >= 16) << i
;
36203 /* Use vpblendd. */
36204 for (i
= 0; i
< 4; ++i
)
36205 mask
|= (d
->perm
[i
] >= 4 ? 3 : 0) << (i
* 2);
36210 gcc_unreachable ();
36213 /* This matches five different patterns with the different modes. */
36214 x
= gen_rtx_VEC_MERGE (vmode
, op1
, op0
, GEN_INT (mask
));
36215 x
= gen_rtx_SET (VOIDmode
, target
, x
);
36221 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
36222 in terms of the variable form of vpermilps.
36224 Note that we will have already failed the immediate input vpermilps,
36225 which requires that the high and low part shuffle be identical; the
36226 variable form doesn't require that. */
36229 expand_vec_perm_vpermil (struct expand_vec_perm_d
*d
)
36231 rtx rperm
[8], vperm
;
36234 if (!TARGET_AVX
|| d
->vmode
!= V8SFmode
|| d
->op0
!= d
->op1
)
36237 /* We can only permute within the 128-bit lane. */
36238 for (i
= 0; i
< 8; ++i
)
36240 unsigned e
= d
->perm
[i
];
36241 if (i
< 4 ? e
>= 4 : e
< 4)
36248 for (i
= 0; i
< 8; ++i
)
36250 unsigned e
= d
->perm
[i
];
36252 /* Within each 128-bit lane, the elements of op0 are numbered
36253 from 0 and the elements of op1 are numbered from 4. */
36259 rperm
[i
] = GEN_INT (e
);
36262 vperm
= gen_rtx_CONST_VECTOR (V8SImode
, gen_rtvec_v (8, rperm
));
36263 vperm
= force_reg (V8SImode
, vperm
);
36264 emit_insn (gen_avx_vpermilvarv8sf3 (d
->target
, d
->op0
, vperm
));
36269 /* Return true if permutation D can be performed as VMODE permutation
36273 valid_perm_using_mode_p (enum machine_mode vmode
, struct expand_vec_perm_d
*d
)
36275 unsigned int i
, j
, chunk
;
36277 if (GET_MODE_CLASS (vmode
) != MODE_VECTOR_INT
36278 || GET_MODE_CLASS (d
->vmode
) != MODE_VECTOR_INT
36279 || GET_MODE_SIZE (vmode
) != GET_MODE_SIZE (d
->vmode
))
36282 if (GET_MODE_NUNITS (vmode
) >= d
->nelt
)
36285 chunk
= d
->nelt
/ GET_MODE_NUNITS (vmode
);
36286 for (i
= 0; i
< d
->nelt
; i
+= chunk
)
36287 if (d
->perm
[i
] & (chunk
- 1))
36290 for (j
= 1; j
< chunk
; ++j
)
36291 if (d
->perm
[i
] + j
!= d
->perm
[i
+ j
])
36297 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
36298 in terms of pshufb, vpperm, vpermq, vpermd or vperm2i128. */
36301 expand_vec_perm_pshufb (struct expand_vec_perm_d
*d
)
36303 unsigned i
, nelt
, eltsz
, mask
;
36304 unsigned char perm
[32];
36305 enum machine_mode vmode
= V16QImode
;
36306 rtx rperm
[32], vperm
, target
, op0
, op1
;
36310 if (d
->op0
!= d
->op1
)
36312 if (!TARGET_XOP
|| GET_MODE_SIZE (d
->vmode
) != 16)
36315 && valid_perm_using_mode_p (V2TImode
, d
))
36320 /* Use vperm2i128 insn. The pattern uses
36321 V4DImode instead of V2TImode. */
36322 target
= gen_lowpart (V4DImode
, d
->target
);
36323 op0
= gen_lowpart (V4DImode
, d
->op0
);
36324 op1
= gen_lowpart (V4DImode
, d
->op1
);
36326 = GEN_INT (((d
->perm
[0] & (nelt
/ 2)) ? 1 : 0)
36327 || ((d
->perm
[nelt
/ 2] & (nelt
/ 2)) ? 2 : 0));
36328 emit_insn (gen_avx2_permv2ti (target
, op0
, op1
, rperm
[0]));
36336 if (GET_MODE_SIZE (d
->vmode
) == 16)
36341 else if (GET_MODE_SIZE (d
->vmode
) == 32)
36346 /* V4DImode should be already handled through
36347 expand_vselect by vpermq instruction. */
36348 gcc_assert (d
->vmode
!= V4DImode
);
36351 if (d
->vmode
== V8SImode
36352 || d
->vmode
== V16HImode
36353 || d
->vmode
== V32QImode
)
36355 /* First see if vpermq can be used for
36356 V8SImode/V16HImode/V32QImode. */
36357 if (valid_perm_using_mode_p (V4DImode
, d
))
36359 for (i
= 0; i
< 4; i
++)
36360 perm
[i
] = (d
->perm
[i
* nelt
/ 4] * 4 / nelt
) & 3;
36363 return expand_vselect (gen_lowpart (V4DImode
, d
->target
),
36364 gen_lowpart (V4DImode
, d
->op0
),
36368 /* Next see if vpermd can be used. */
36369 if (valid_perm_using_mode_p (V8SImode
, d
))
36373 if (vmode
== V32QImode
)
36375 /* vpshufb only works intra lanes, it is not
36376 possible to shuffle bytes in between the lanes. */
36377 for (i
= 0; i
< nelt
; ++i
)
36378 if ((d
->perm
[i
] ^ i
) & (nelt
/ 2))
36389 if (vmode
== V8SImode
)
36390 for (i
= 0; i
< 8; ++i
)
36391 rperm
[i
] = GEN_INT ((d
->perm
[i
* nelt
/ 8] * 8 / nelt
) & 7);
36394 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
36395 if (d
->op0
!= d
->op1
)
36396 mask
= 2 * nelt
- 1;
36397 else if (vmode
== V16QImode
)
36400 mask
= nelt
/ 2 - 1;
36402 for (i
= 0; i
< nelt
; ++i
)
36404 unsigned j
, e
= d
->perm
[i
] & mask
;
36405 for (j
= 0; j
< eltsz
; ++j
)
36406 rperm
[i
* eltsz
+ j
] = GEN_INT (e
* eltsz
+ j
);
36410 vperm
= gen_rtx_CONST_VECTOR (vmode
,
36411 gen_rtvec_v (GET_MODE_NUNITS (vmode
), rperm
));
36412 vperm
= force_reg (vmode
, vperm
);
36414 target
= gen_lowpart (vmode
, d
->target
);
36415 op0
= gen_lowpart (vmode
, d
->op0
);
36416 if (d
->op0
== d
->op1
)
36418 if (vmode
== V16QImode
)
36419 emit_insn (gen_ssse3_pshufbv16qi3 (target
, op0
, vperm
));
36420 else if (vmode
== V32QImode
)
36421 emit_insn (gen_avx2_pshufbv32qi3 (target
, op0
, vperm
));
36423 emit_insn (gen_avx2_permvarv8si (target
, vperm
, op0
));
36427 op1
= gen_lowpart (vmode
, d
->op1
);
36428 emit_insn (gen_xop_pperm (target
, op0
, op1
, vperm
));
36434 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
36435 in a single instruction. */
36438 expand_vec_perm_1 (struct expand_vec_perm_d
*d
)
36440 unsigned i
, nelt
= d
->nelt
;
36441 unsigned char perm2
[MAX_VECT_LEN
];
36443 /* Check plain VEC_SELECT first, because AVX has instructions that could
36444 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
36445 input where SEL+CONCAT may not. */
36446 if (d
->op0
== d
->op1
)
36448 int mask
= nelt
- 1;
36449 bool identity_perm
= true;
36450 bool broadcast_perm
= true;
36452 for (i
= 0; i
< nelt
; i
++)
36454 perm2
[i
] = d
->perm
[i
] & mask
;
36456 identity_perm
= false;
36458 broadcast_perm
= false;
36464 emit_move_insn (d
->target
, d
->op0
);
36467 else if (broadcast_perm
&& TARGET_AVX2
)
36469 /* Use vpbroadcast{b,w,d}. */
36470 rtx op
= d
->op0
, (*gen
) (rtx
, rtx
) = NULL
;
36474 op
= gen_lowpart (V16QImode
, op
);
36475 gen
= gen_avx2_pbroadcastv32qi
;
36478 op
= gen_lowpart (V8HImode
, op
);
36479 gen
= gen_avx2_pbroadcastv16hi
;
36482 op
= gen_lowpart (V4SImode
, op
);
36483 gen
= gen_avx2_pbroadcastv8si
;
36486 gen
= gen_avx2_pbroadcastv16qi
;
36489 gen
= gen_avx2_pbroadcastv8hi
;
36491 /* For other modes prefer other shuffles this function creates. */
36497 emit_insn (gen (d
->target
, op
));
36502 if (expand_vselect (d
->target
, d
->op0
, perm2
, nelt
))
36505 /* There are plenty of patterns in sse.md that are written for
36506 SEL+CONCAT and are not replicated for a single op. Perhaps
36507 that should be changed, to avoid the nastiness here. */
36509 /* Recognize interleave style patterns, which means incrementing
36510 every other permutation operand. */
36511 for (i
= 0; i
< nelt
; i
+= 2)
36513 perm2
[i
] = d
->perm
[i
] & mask
;
36514 perm2
[i
+ 1] = (d
->perm
[i
+ 1] & mask
) + nelt
;
36516 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op0
, perm2
, nelt
))
36519 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
36522 for (i
= 0; i
< nelt
; i
+= 4)
36524 perm2
[i
+ 0] = d
->perm
[i
+ 0] & mask
;
36525 perm2
[i
+ 1] = d
->perm
[i
+ 1] & mask
;
36526 perm2
[i
+ 2] = (d
->perm
[i
+ 2] & mask
) + nelt
;
36527 perm2
[i
+ 3] = (d
->perm
[i
+ 3] & mask
) + nelt
;
36530 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op0
, perm2
, nelt
))
36535 /* Finally, try the fully general two operand permute. */
36536 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op1
, d
->perm
, nelt
))
36539 /* Recognize interleave style patterns with reversed operands. */
36540 if (d
->op0
!= d
->op1
)
36542 for (i
= 0; i
< nelt
; ++i
)
36544 unsigned e
= d
->perm
[i
];
36552 if (expand_vselect_vconcat (d
->target
, d
->op1
, d
->op0
, perm2
, nelt
))
36556 /* Try the SSE4.1 blend variable merge instructions. */
36557 if (expand_vec_perm_blend (d
))
36560 /* Try one of the AVX vpermil variable permutations. */
36561 if (expand_vec_perm_vpermil (d
))
36564 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
36565 vpshufb, vpermd or vpermq variable permutation. */
36566 if (expand_vec_perm_pshufb (d
))
36572 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
36573 in terms of a pair of pshuflw + pshufhw instructions. */
36576 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d
*d
)
36578 unsigned char perm2
[MAX_VECT_LEN
];
36582 if (d
->vmode
!= V8HImode
|| d
->op0
!= d
->op1
)
36585 /* The two permutations only operate in 64-bit lanes. */
36586 for (i
= 0; i
< 4; ++i
)
36587 if (d
->perm
[i
] >= 4)
36589 for (i
= 4; i
< 8; ++i
)
36590 if (d
->perm
[i
] < 4)
36596 /* Emit the pshuflw. */
36597 memcpy (perm2
, d
->perm
, 4);
36598 for (i
= 4; i
< 8; ++i
)
36600 ok
= expand_vselect (d
->target
, d
->op0
, perm2
, 8);
36603 /* Emit the pshufhw. */
36604 memcpy (perm2
+ 4, d
->perm
+ 4, 4);
36605 for (i
= 0; i
< 4; ++i
)
36607 ok
= expand_vselect (d
->target
, d
->target
, perm2
, 8);
36613 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
36614 the permutation using the SSSE3 palignr instruction. This succeeds
36615 when all of the elements in PERM fit within one vector and we merely
36616 need to shift them down so that a single vector permutation has a
36617 chance to succeed. */
36620 expand_vec_perm_palignr (struct expand_vec_perm_d
*d
)
36622 unsigned i
, nelt
= d
->nelt
;
36627 /* Even with AVX, palignr only operates on 128-bit vectors. */
36628 if (!TARGET_SSSE3
|| GET_MODE_SIZE (d
->vmode
) != 16)
36631 min
= nelt
, max
= 0;
36632 for (i
= 0; i
< nelt
; ++i
)
36634 unsigned e
= d
->perm
[i
];
36640 if (min
== 0 || max
- min
>= nelt
)
36643 /* Given that we have SSSE3, we know we'll be able to implement the
36644 single operand permutation after the palignr with pshufb. */
36648 shift
= GEN_INT (min
* GET_MODE_BITSIZE (GET_MODE_INNER (d
->vmode
)));
36649 emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode
, d
->target
),
36650 gen_lowpart (TImode
, d
->op1
),
36651 gen_lowpart (TImode
, d
->op0
), shift
));
36653 d
->op0
= d
->op1
= d
->target
;
36656 for (i
= 0; i
< nelt
; ++i
)
36658 unsigned e
= d
->perm
[i
] - min
;
36664 /* Test for the degenerate case where the alignment by itself
36665 produces the desired permutation. */
36669 ok
= expand_vec_perm_1 (d
);
36675 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
36676 a two vector permutation into a single vector permutation by using
36677 an interleave operation to merge the vectors. */
36680 expand_vec_perm_interleave2 (struct expand_vec_perm_d
*d
)
36682 struct expand_vec_perm_d dremap
, dfinal
;
36683 unsigned i
, nelt
= d
->nelt
, nelt2
= nelt
/ 2;
36684 unsigned HOST_WIDE_INT contents
;
36685 unsigned char remap
[2 * MAX_VECT_LEN
];
36687 bool ok
, same_halves
= false;
36689 if (GET_MODE_SIZE (d
->vmode
) == 16)
36691 if (d
->op0
== d
->op1
)
36694 else if (GET_MODE_SIZE (d
->vmode
) == 32)
36698 /* For 32-byte modes allow even d->op0 == d->op1.
36699 The lack of cross-lane shuffling in some instructions
36700 might prevent a single insn shuffle. */
36705 /* Examine from whence the elements come. */
36707 for (i
= 0; i
< nelt
; ++i
)
36708 contents
|= ((unsigned HOST_WIDE_INT
) 1) << d
->perm
[i
];
36710 memset (remap
, 0xff, sizeof (remap
));
36713 if (GET_MODE_SIZE (d
->vmode
) == 16)
36715 unsigned HOST_WIDE_INT h1
, h2
, h3
, h4
;
36717 /* Split the two input vectors into 4 halves. */
36718 h1
= (((unsigned HOST_WIDE_INT
) 1) << nelt2
) - 1;
36723 /* If the elements from the low halves use interleave low, and similarly
36724 for interleave high. If the elements are from mis-matched halves, we
36725 can use shufps for V4SF/V4SI or do a DImode shuffle. */
36726 if ((contents
& (h1
| h3
)) == contents
)
36729 for (i
= 0; i
< nelt2
; ++i
)
36732 remap
[i
+ nelt
] = i
* 2 + 1;
36733 dremap
.perm
[i
* 2] = i
;
36734 dremap
.perm
[i
* 2 + 1] = i
+ nelt
;
36736 if (!TARGET_SSE2
&& d
->vmode
== V4SImode
)
36737 dremap
.vmode
= V4SFmode
;
36739 else if ((contents
& (h2
| h4
)) == contents
)
36742 for (i
= 0; i
< nelt2
; ++i
)
36744 remap
[i
+ nelt2
] = i
* 2;
36745 remap
[i
+ nelt
+ nelt2
] = i
* 2 + 1;
36746 dremap
.perm
[i
* 2] = i
+ nelt2
;
36747 dremap
.perm
[i
* 2 + 1] = i
+ nelt
+ nelt2
;
36749 if (!TARGET_SSE2
&& d
->vmode
== V4SImode
)
36750 dremap
.vmode
= V4SFmode
;
36752 else if ((contents
& (h1
| h4
)) == contents
)
36755 for (i
= 0; i
< nelt2
; ++i
)
36758 remap
[i
+ nelt
+ nelt2
] = i
+ nelt2
;
36759 dremap
.perm
[i
] = i
;
36760 dremap
.perm
[i
+ nelt2
] = i
+ nelt
+ nelt2
;
36765 dremap
.vmode
= V2DImode
;
36767 dremap
.perm
[0] = 0;
36768 dremap
.perm
[1] = 3;
36771 else if ((contents
& (h2
| h3
)) == contents
)
36774 for (i
= 0; i
< nelt2
; ++i
)
36776 remap
[i
+ nelt2
] = i
;
36777 remap
[i
+ nelt
] = i
+ nelt2
;
36778 dremap
.perm
[i
] = i
+ nelt2
;
36779 dremap
.perm
[i
+ nelt2
] = i
+ nelt
;
36784 dremap
.vmode
= V2DImode
;
36786 dremap
.perm
[0] = 1;
36787 dremap
.perm
[1] = 2;
36795 unsigned int nelt4
= nelt
/ 4, nzcnt
= 0;
36796 unsigned HOST_WIDE_INT q
[8];
36797 unsigned int nonzero_halves
[4];
36799 /* Split the two input vectors into 8 quarters. */
36800 q
[0] = (((unsigned HOST_WIDE_INT
) 1) << nelt4
) - 1;
36801 for (i
= 1; i
< 8; ++i
)
36802 q
[i
] = q
[0] << (nelt4
* i
);
36803 for (i
= 0; i
< 4; ++i
)
36804 if (((q
[2 * i
] | q
[2 * i
+ 1]) & contents
) != 0)
36806 nonzero_halves
[nzcnt
] = i
;
36812 gcc_assert (d
->op0
== d
->op1
);
36813 nonzero_halves
[1] = nonzero_halves
[0];
36814 same_halves
= true;
36816 else if (d
->op0
== d
->op1
)
36818 gcc_assert (nonzero_halves
[0] == 0);
36819 gcc_assert (nonzero_halves
[1] == 1);
36824 if (d
->perm
[0] / nelt2
== nonzero_halves
[1])
36826 /* Attempt to increase the likelyhood that dfinal
36827 shuffle will be intra-lane. */
36828 char tmph
= nonzero_halves
[0];
36829 nonzero_halves
[0] = nonzero_halves
[1];
36830 nonzero_halves
[1] = tmph
;
36833 /* vperm2f128 or vperm2i128. */
36834 for (i
= 0; i
< nelt2
; ++i
)
36836 remap
[i
+ nonzero_halves
[1] * nelt2
] = i
+ nelt2
;
36837 remap
[i
+ nonzero_halves
[0] * nelt2
] = i
;
36838 dremap
.perm
[i
+ nelt2
] = i
+ nonzero_halves
[1] * nelt2
;
36839 dremap
.perm
[i
] = i
+ nonzero_halves
[0] * nelt2
;
36842 if (d
->vmode
!= V8SFmode
36843 && d
->vmode
!= V4DFmode
36844 && d
->vmode
!= V8SImode
)
36846 dremap
.vmode
= V8SImode
;
36848 for (i
= 0; i
< 4; ++i
)
36850 dremap
.perm
[i
] = i
+ nonzero_halves
[0] * 4;
36851 dremap
.perm
[i
+ 4] = i
+ nonzero_halves
[1] * 4;
36855 else if (d
->op0
== d
->op1
)
36857 else if (TARGET_AVX2
36858 && (contents
& (q
[0] | q
[2] | q
[4] | q
[6])) == contents
)
36861 for (i
= 0; i
< nelt4
; ++i
)
36864 remap
[i
+ nelt
] = i
* 2 + 1;
36865 remap
[i
+ nelt2
] = i
* 2 + nelt2
;
36866 remap
[i
+ nelt
+ nelt2
] = i
* 2 + nelt2
+ 1;
36867 dremap
.perm
[i
* 2] = i
;
36868 dremap
.perm
[i
* 2 + 1] = i
+ nelt
;
36869 dremap
.perm
[i
* 2 + nelt2
] = i
+ nelt2
;
36870 dremap
.perm
[i
* 2 + nelt2
+ 1] = i
+ nelt
+ nelt2
;
36873 else if (TARGET_AVX2
36874 && (contents
& (q
[1] | q
[3] | q
[5] | q
[7])) == contents
)
36877 for (i
= 0; i
< nelt4
; ++i
)
36879 remap
[i
+ nelt4
] = i
* 2;
36880 remap
[i
+ nelt
+ nelt4
] = i
* 2 + 1;
36881 remap
[i
+ nelt2
+ nelt4
] = i
* 2 + nelt2
;
36882 remap
[i
+ nelt
+ nelt2
+ nelt4
] = i
* 2 + nelt2
+ 1;
36883 dremap
.perm
[i
* 2] = i
+ nelt4
;
36884 dremap
.perm
[i
* 2 + 1] = i
+ nelt
+ nelt4
;
36885 dremap
.perm
[i
* 2 + nelt2
] = i
+ nelt2
+ nelt4
;
36886 dremap
.perm
[i
* 2 + nelt2
+ 1] = i
+ nelt
+ nelt2
+ nelt4
;
36893 /* Use the remapping array set up above to move the elements from their
36894 swizzled locations into their final destinations. */
36896 for (i
= 0; i
< nelt
; ++i
)
36898 unsigned e
= remap
[d
->perm
[i
]];
36899 gcc_assert (e
< nelt
);
36900 /* If same_halves is true, both halves of the remapped vector are the
36901 same. Avoid cross-lane accesses if possible. */
36902 if (same_halves
&& i
>= nelt2
)
36904 gcc_assert (e
< nelt2
);
36905 dfinal
.perm
[i
] = e
+ nelt2
;
36908 dfinal
.perm
[i
] = e
;
36910 dfinal
.op0
= gen_reg_rtx (dfinal
.vmode
);
36911 dfinal
.op1
= dfinal
.op0
;
36912 dremap
.target
= dfinal
.op0
;
36914 /* Test if the final remap can be done with a single insn. For V4SFmode or
36915 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
36917 ok
= expand_vec_perm_1 (&dfinal
);
36918 seq
= get_insns ();
36927 if (dremap
.vmode
!= dfinal
.vmode
)
36929 dremap
.target
= gen_lowpart (dremap
.vmode
, dremap
.target
);
36930 dremap
.op0
= gen_lowpart (dremap
.vmode
, dremap
.op0
);
36931 dremap
.op1
= gen_lowpart (dremap
.vmode
, dremap
.op1
);
36934 ok
= expand_vec_perm_1 (&dremap
);
36941 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
36942 a single vector cross-lane permutation into vpermq followed
36943 by any of the single insn permutations. */
36946 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d
*d
)
36948 struct expand_vec_perm_d dremap
, dfinal
;
36949 unsigned i
, j
, nelt
= d
->nelt
, nelt2
= nelt
/ 2, nelt4
= nelt
/ 4;
36950 unsigned contents
[2];
36954 && (d
->vmode
== V32QImode
|| d
->vmode
== V16HImode
)
36955 && d
->op0
== d
->op1
))
36960 for (i
= 0; i
< nelt2
; ++i
)
36962 contents
[0] |= 1u << (d
->perm
[i
] / nelt4
);
36963 contents
[1] |= 1u << (d
->perm
[i
+ nelt2
] / nelt4
);
36966 for (i
= 0; i
< 2; ++i
)
36968 unsigned int cnt
= 0;
36969 for (j
= 0; j
< 4; ++j
)
36970 if ((contents
[i
] & (1u << j
)) != 0 && ++cnt
> 2)
36978 dremap
.vmode
= V4DImode
;
36980 dremap
.target
= gen_reg_rtx (V4DImode
);
36981 dremap
.op0
= gen_lowpart (V4DImode
, d
->op0
);
36982 dremap
.op1
= dremap
.op0
;
36983 for (i
= 0; i
< 2; ++i
)
36985 unsigned int cnt
= 0;
36986 for (j
= 0; j
< 4; ++j
)
36987 if ((contents
[i
] & (1u << j
)) != 0)
36988 dremap
.perm
[2 * i
+ cnt
++] = j
;
36989 for (; cnt
< 2; ++cnt
)
36990 dremap
.perm
[2 * i
+ cnt
] = 0;
36994 dfinal
.op0
= gen_lowpart (dfinal
.vmode
, dremap
.target
);
36995 dfinal
.op1
= dfinal
.op0
;
36996 for (i
= 0, j
= 0; i
< nelt
; ++i
)
37000 dfinal
.perm
[i
] = (d
->perm
[i
] & (nelt4
- 1)) | (j
? nelt2
: 0);
37001 if ((d
->perm
[i
] / nelt4
) == dremap
.perm
[j
])
37003 else if ((d
->perm
[i
] / nelt4
) == dremap
.perm
[j
+ 1])
37004 dfinal
.perm
[i
] |= nelt4
;
37006 gcc_unreachable ();
37009 ok
= expand_vec_perm_1 (&dremap
);
37012 ok
= expand_vec_perm_1 (&dfinal
);
37018 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
37019 a two vector permutation using 2 intra-lane interleave insns
37020 and cross-lane shuffle for 32-byte vectors. */
37023 expand_vec_perm_interleave3 (struct expand_vec_perm_d
*d
)
37026 rtx (*gen
) (rtx
, rtx
, rtx
);
37028 if (d
->op0
== d
->op1
)
37030 if (TARGET_AVX2
&& GET_MODE_SIZE (d
->vmode
) == 32)
37032 else if (TARGET_AVX
&& (d
->vmode
== V8SFmode
|| d
->vmode
== V4DFmode
))
37038 if (d
->perm
[0] != 0 && d
->perm
[0] != nelt
/ 2)
37040 for (i
= 0; i
< nelt
; i
+= 2)
37041 if (d
->perm
[i
] != d
->perm
[0] + i
/ 2
37042 || d
->perm
[i
+ 1] != d
->perm
[0] + i
/ 2 + nelt
)
37052 gen
= gen_vec_interleave_highv32qi
;
37054 gen
= gen_vec_interleave_lowv32qi
;
37058 gen
= gen_vec_interleave_highv16hi
;
37060 gen
= gen_vec_interleave_lowv16hi
;
37064 gen
= gen_vec_interleave_highv8si
;
37066 gen
= gen_vec_interleave_lowv8si
;
37070 gen
= gen_vec_interleave_highv4di
;
37072 gen
= gen_vec_interleave_lowv4di
;
37076 gen
= gen_vec_interleave_highv8sf
;
37078 gen
= gen_vec_interleave_lowv8sf
;
37082 gen
= gen_vec_interleave_highv4df
;
37084 gen
= gen_vec_interleave_lowv4df
;
37087 gcc_unreachable ();
37090 emit_insn (gen (d
->target
, d
->op0
, d
->op1
));
37094 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
37095 permutation with two pshufb insns and an ior. We should have already
37096 failed all two instruction sequences. */
37099 expand_vec_perm_pshufb2 (struct expand_vec_perm_d
*d
)
37101 rtx rperm
[2][16], vperm
, l
, h
, op
, m128
;
37102 unsigned int i
, nelt
, eltsz
;
37104 if (!TARGET_SSSE3
|| GET_MODE_SIZE (d
->vmode
) != 16)
37106 gcc_assert (d
->op0
!= d
->op1
);
37109 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
37111 /* Generate two permutation masks. If the required element is within
37112 the given vector it is shuffled into the proper lane. If the required
37113 element is in the other vector, force a zero into the lane by setting
37114 bit 7 in the permutation mask. */
37115 m128
= GEN_INT (-128);
37116 for (i
= 0; i
< nelt
; ++i
)
37118 unsigned j
, e
= d
->perm
[i
];
37119 unsigned which
= (e
>= nelt
);
37123 for (j
= 0; j
< eltsz
; ++j
)
37125 rperm
[which
][i
*eltsz
+ j
] = GEN_INT (e
*eltsz
+ j
);
37126 rperm
[1-which
][i
*eltsz
+ j
] = m128
;
37130 vperm
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, rperm
[0]));
37131 vperm
= force_reg (V16QImode
, vperm
);
37133 l
= gen_reg_rtx (V16QImode
);
37134 op
= gen_lowpart (V16QImode
, d
->op0
);
37135 emit_insn (gen_ssse3_pshufbv16qi3 (l
, op
, vperm
));
37137 vperm
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, rperm
[1]));
37138 vperm
= force_reg (V16QImode
, vperm
);
37140 h
= gen_reg_rtx (V16QImode
);
37141 op
= gen_lowpart (V16QImode
, d
->op1
);
37142 emit_insn (gen_ssse3_pshufbv16qi3 (h
, op
, vperm
));
37144 op
= gen_lowpart (V16QImode
, d
->target
);
37145 emit_insn (gen_iorv16qi3 (op
, l
, h
));
37150 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
37151 with two vpshufb insns, vpermq and vpor. We should have already failed
37152 all two or three instruction sequences. */
37155 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d
*d
)
37157 rtx rperm
[2][32], vperm
, l
, h
, hp
, op
, m128
;
37158 unsigned int i
, nelt
, eltsz
;
37161 || d
->op0
!= d
->op1
37162 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
37169 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
37171 /* Generate two permutation masks. If the required element is within
37172 the same lane, it is shuffled in. If the required element from the
37173 other lane, force a zero by setting bit 7 in the permutation mask.
37174 In the other mask the mask has non-negative elements if element
37175 is requested from the other lane, but also moved to the other lane,
37176 so that the result of vpshufb can have the two V2TImode halves
37178 m128
= GEN_INT (-128);
37179 for (i
= 0; i
< nelt
; ++i
)
37181 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
37182 unsigned which
= ((d
->perm
[i
] ^ i
) & (nelt
/ 2)) * eltsz
;
37184 for (j
= 0; j
< eltsz
; ++j
)
37186 rperm
[!!which
][(i
* eltsz
+ j
) ^ which
] = GEN_INT (e
* eltsz
+ j
);
37187 rperm
[!which
][(i
* eltsz
+ j
) ^ (which
^ 16)] = m128
;
37191 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[1]));
37192 vperm
= force_reg (V32QImode
, vperm
);
37194 h
= gen_reg_rtx (V32QImode
);
37195 op
= gen_lowpart (V32QImode
, d
->op0
);
37196 emit_insn (gen_avx2_pshufbv32qi3 (h
, op
, vperm
));
37198 /* Swap the 128-byte lanes of h into hp. */
37199 hp
= gen_reg_rtx (V4DImode
);
37200 op
= gen_lowpart (V4DImode
, h
);
37201 emit_insn (gen_avx2_permv4di_1 (hp
, op
, const2_rtx
, GEN_INT (3), const0_rtx
,
37204 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[0]));
37205 vperm
= force_reg (V32QImode
, vperm
);
37207 l
= gen_reg_rtx (V32QImode
);
37208 op
= gen_lowpart (V32QImode
, d
->op0
);
37209 emit_insn (gen_avx2_pshufbv32qi3 (l
, op
, vperm
));
37211 op
= gen_lowpart (V32QImode
, d
->target
);
37212 emit_insn (gen_iorv32qi3 (op
, l
, gen_lowpart (V32QImode
, hp
)));
37217 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
37218 and extract-odd permutations of two V32QImode and V16QImode operand
37219 with two vpshufb insns, vpor and vpermq. We should have already
37220 failed all two or three instruction sequences. */
37223 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d
*d
)
37225 rtx rperm
[2][32], vperm
, l
, h
, ior
, op
, m128
;
37226 unsigned int i
, nelt
, eltsz
;
37229 || d
->op0
== d
->op1
37230 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
37233 for (i
= 0; i
< d
->nelt
; ++i
)
37234 if ((d
->perm
[i
] ^ (i
* 2)) & (3 * d
->nelt
/ 2))
37241 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
37243 /* Generate two permutation masks. In the first permutation mask
37244 the first quarter will contain indexes for the first half
37245 of the op0, the second quarter will contain bit 7 set, third quarter
37246 will contain indexes for the second half of the op0 and the
37247 last quarter bit 7 set. In the second permutation mask
37248 the first quarter will contain bit 7 set, the second quarter
37249 indexes for the first half of the op1, the third quarter bit 7 set
37250 and last quarter indexes for the second half of the op1.
37251 I.e. the first mask e.g. for V32QImode extract even will be:
37252 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
37253 (all values masked with 0xf except for -128) and second mask
37254 for extract even will be
37255 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
37256 m128
= GEN_INT (-128);
37257 for (i
= 0; i
< nelt
; ++i
)
37259 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
37260 unsigned which
= d
->perm
[i
] >= nelt
;
37261 unsigned xorv
= (i
>= nelt
/ 4 && i
< 3 * nelt
/ 4) ? 24 : 0;
37263 for (j
= 0; j
< eltsz
; ++j
)
37265 rperm
[which
][(i
* eltsz
+ j
) ^ xorv
] = GEN_INT (e
* eltsz
+ j
);
37266 rperm
[1 - which
][(i
* eltsz
+ j
) ^ xorv
] = m128
;
37270 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[0]));
37271 vperm
= force_reg (V32QImode
, vperm
);
37273 l
= gen_reg_rtx (V32QImode
);
37274 op
= gen_lowpart (V32QImode
, d
->op0
);
37275 emit_insn (gen_avx2_pshufbv32qi3 (l
, op
, vperm
));
37277 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[1]));
37278 vperm
= force_reg (V32QImode
, vperm
);
37280 h
= gen_reg_rtx (V32QImode
);
37281 op
= gen_lowpart (V32QImode
, d
->op1
);
37282 emit_insn (gen_avx2_pshufbv32qi3 (h
, op
, vperm
));
37284 ior
= gen_reg_rtx (V32QImode
);
37285 emit_insn (gen_iorv32qi3 (ior
, l
, h
));
37287 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
37288 op
= gen_lowpart (V4DImode
, d
->target
);
37289 ior
= gen_lowpart (V4DImode
, ior
);
37290 emit_insn (gen_avx2_permv4di_1 (op
, ior
, const0_rtx
, const2_rtx
,
37291 const1_rtx
, GEN_INT (3)));
37296 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
37297 and extract-odd permutations. */
37300 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d
*d
, unsigned odd
)
37307 t1
= gen_reg_rtx (V4DFmode
);
37308 t2
= gen_reg_rtx (V4DFmode
);
37310 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
37311 emit_insn (gen_avx_vperm2f128v4df3 (t1
, d
->op0
, d
->op1
, GEN_INT (0x20)));
37312 emit_insn (gen_avx_vperm2f128v4df3 (t2
, d
->op0
, d
->op1
, GEN_INT (0x31)));
37314 /* Now an unpck[lh]pd will produce the result required. */
37316 t3
= gen_avx_unpckhpd256 (d
->target
, t1
, t2
);
37318 t3
= gen_avx_unpcklpd256 (d
->target
, t1
, t2
);
37324 int mask
= odd
? 0xdd : 0x88;
37326 t1
= gen_reg_rtx (V8SFmode
);
37327 t2
= gen_reg_rtx (V8SFmode
);
37328 t3
= gen_reg_rtx (V8SFmode
);
37330 /* Shuffle within the 128-bit lanes to produce:
37331 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
37332 emit_insn (gen_avx_shufps256 (t1
, d
->op0
, d
->op1
,
37335 /* Shuffle the lanes around to produce:
37336 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
37337 emit_insn (gen_avx_vperm2f128v8sf3 (t2
, t1
, t1
,
37340 /* Shuffle within the 128-bit lanes to produce:
37341 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
37342 emit_insn (gen_avx_shufps256 (t3
, t1
, t2
, GEN_INT (0x44)));
37344 /* Shuffle within the 128-bit lanes to produce:
37345 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
37346 emit_insn (gen_avx_shufps256 (t2
, t1
, t2
, GEN_INT (0xee)));
37348 /* Shuffle the lanes around to produce:
37349 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
37350 emit_insn (gen_avx_vperm2f128v8sf3 (d
->target
, t3
, t2
,
37359 /* These are always directly implementable by expand_vec_perm_1. */
37360 gcc_unreachable ();
37364 return expand_vec_perm_pshufb2 (d
);
37367 /* We need 2*log2(N)-1 operations to achieve odd/even
37368 with interleave. */
37369 t1
= gen_reg_rtx (V8HImode
);
37370 t2
= gen_reg_rtx (V8HImode
);
37371 emit_insn (gen_vec_interleave_highv8hi (t1
, d
->op0
, d
->op1
));
37372 emit_insn (gen_vec_interleave_lowv8hi (d
->target
, d
->op0
, d
->op1
));
37373 emit_insn (gen_vec_interleave_highv8hi (t2
, d
->target
, t1
));
37374 emit_insn (gen_vec_interleave_lowv8hi (d
->target
, d
->target
, t1
));
37376 t3
= gen_vec_interleave_highv8hi (d
->target
, d
->target
, t2
);
37378 t3
= gen_vec_interleave_lowv8hi (d
->target
, d
->target
, t2
);
37385 return expand_vec_perm_pshufb2 (d
);
37388 t1
= gen_reg_rtx (V16QImode
);
37389 t2
= gen_reg_rtx (V16QImode
);
37390 t3
= gen_reg_rtx (V16QImode
);
37391 emit_insn (gen_vec_interleave_highv16qi (t1
, d
->op0
, d
->op1
));
37392 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->op0
, d
->op1
));
37393 emit_insn (gen_vec_interleave_highv16qi (t2
, d
->target
, t1
));
37394 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t1
));
37395 emit_insn (gen_vec_interleave_highv16qi (t3
, d
->target
, t2
));
37396 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t2
));
37398 t3
= gen_vec_interleave_highv16qi (d
->target
, d
->target
, t3
);
37400 t3
= gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t3
);
37407 return expand_vec_perm_vpshufb2_vpermq_even_odd (d
);
37412 struct expand_vec_perm_d d_copy
= *d
;
37413 d_copy
.vmode
= V4DFmode
;
37414 d_copy
.target
= gen_lowpart (V4DFmode
, d
->target
);
37415 d_copy
.op0
= gen_lowpart (V4DFmode
, d
->op0
);
37416 d_copy
.op1
= gen_lowpart (V4DFmode
, d
->op1
);
37417 return expand_vec_perm_even_odd_1 (&d_copy
, odd
);
37420 t1
= gen_reg_rtx (V4DImode
);
37421 t2
= gen_reg_rtx (V4DImode
);
37423 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
37424 emit_insn (gen_avx2_permv2ti (t1
, d
->op0
, d
->op1
, GEN_INT (0x20)));
37425 emit_insn (gen_avx2_permv2ti (t2
, d
->op0
, d
->op1
, GEN_INT (0x31)));
37427 /* Now an vpunpck[lh]qdq will produce the result required. */
37429 t3
= gen_avx2_interleave_highv4di (d
->target
, t1
, t2
);
37431 t3
= gen_avx2_interleave_lowv4di (d
->target
, t1
, t2
);
37438 struct expand_vec_perm_d d_copy
= *d
;
37439 d_copy
.vmode
= V8SFmode
;
37440 d_copy
.target
= gen_lowpart (V8SFmode
, d
->target
);
37441 d_copy
.op0
= gen_lowpart (V8SFmode
, d
->op0
);
37442 d_copy
.op1
= gen_lowpart (V8SFmode
, d
->op1
);
37443 return expand_vec_perm_even_odd_1 (&d_copy
, odd
);
37446 t1
= gen_reg_rtx (V8SImode
);
37447 t2
= gen_reg_rtx (V8SImode
);
37449 /* Shuffle the lanes around into
37450 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
37451 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode
, t1
),
37452 gen_lowpart (V4DImode
, d
->op0
),
37453 gen_lowpart (V4DImode
, d
->op1
),
37455 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode
, t2
),
37456 gen_lowpart (V4DImode
, d
->op0
),
37457 gen_lowpart (V4DImode
, d
->op1
),
37460 /* Swap the 2nd and 3rd position in each lane into
37461 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
37462 emit_insn (gen_avx2_pshufdv3 (t1
, t1
,
37463 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
37464 emit_insn (gen_avx2_pshufdv3 (t2
, t2
,
37465 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
37467 /* Now an vpunpck[lh]qdq will produce
37468 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
37470 t3
= gen_avx2_interleave_highv4di (gen_lowpart (V4DImode
, d
->target
),
37471 gen_lowpart (V4DImode
, t1
),
37472 gen_lowpart (V4DImode
, t2
));
37474 t3
= gen_avx2_interleave_lowv4di (gen_lowpart (V4DImode
, d
->target
),
37475 gen_lowpart (V4DImode
, t1
),
37476 gen_lowpart (V4DImode
, t2
));
37481 gcc_unreachable ();
37487 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
37488 extract-even and extract-odd permutations. */
37491 expand_vec_perm_even_odd (struct expand_vec_perm_d
*d
)
37493 unsigned i
, odd
, nelt
= d
->nelt
;
37496 if (odd
!= 0 && odd
!= 1)
37499 for (i
= 1; i
< nelt
; ++i
)
37500 if (d
->perm
[i
] != 2 * i
+ odd
)
37503 return expand_vec_perm_even_odd_1 (d
, odd
);
37506 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
37507 permutations. We assume that expand_vec_perm_1 has already failed. */
37510 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d
*d
)
37512 unsigned elt
= d
->perm
[0], nelt2
= d
->nelt
/ 2;
37513 enum machine_mode vmode
= d
->vmode
;
37514 unsigned char perm2
[4];
37522 /* These are special-cased in sse.md so that we can optionally
37523 use the vbroadcast instruction. They expand to two insns
37524 if the input happens to be in a register. */
37525 gcc_unreachable ();
37531 /* These are always implementable using standard shuffle patterns. */
37532 gcc_unreachable ();
37536 /* These can be implemented via interleave. We save one insn by
37537 stopping once we have promoted to V4SImode and then use pshufd. */
37540 optab otab
= vec_interleave_low_optab
;
37544 otab
= vec_interleave_high_optab
;
37549 op0
= expand_binop (vmode
, otab
, op0
, op0
, NULL
, 0, OPTAB_DIRECT
);
37550 vmode
= get_mode_wider_vector (vmode
);
37551 op0
= gen_lowpart (vmode
, op0
);
37553 while (vmode
!= V4SImode
);
37555 memset (perm2
, elt
, 4);
37556 ok
= expand_vselect (gen_lowpart (V4SImode
, d
->target
), op0
, perm2
, 4);
37564 /* For AVX2 broadcasts of the first element vpbroadcast* or
37565 vpermq should be used by expand_vec_perm_1. */
37566 gcc_assert (!TARGET_AVX2
|| d
->perm
[0]);
37570 gcc_unreachable ();
37574 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
37575 broadcast permutations. */
37578 expand_vec_perm_broadcast (struct expand_vec_perm_d
*d
)
37580 unsigned i
, elt
, nelt
= d
->nelt
;
37582 if (d
->op0
!= d
->op1
)
37586 for (i
= 1; i
< nelt
; ++i
)
37587 if (d
->perm
[i
] != elt
)
37590 return expand_vec_perm_broadcast_1 (d
);
37593 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
37594 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
37595 all the shorter instruction sequences. */
37598 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d
*d
)
37600 rtx rperm
[4][32], vperm
, l
[2], h
[2], op
, m128
;
37601 unsigned int i
, nelt
, eltsz
;
37605 || d
->op0
== d
->op1
37606 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
37613 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
37615 /* Generate 4 permutation masks. If the required element is within
37616 the same lane, it is shuffled in. If the required element from the
37617 other lane, force a zero by setting bit 7 in the permutation mask.
37618 In the other mask the mask has non-negative elements if element
37619 is requested from the other lane, but also moved to the other lane,
37620 so that the result of vpshufb can have the two V2TImode halves
37622 m128
= GEN_INT (-128);
37623 for (i
= 0; i
< 32; ++i
)
37625 rperm
[0][i
] = m128
;
37626 rperm
[1][i
] = m128
;
37627 rperm
[2][i
] = m128
;
37628 rperm
[3][i
] = m128
;
37634 for (i
= 0; i
< nelt
; ++i
)
37636 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
37637 unsigned xlane
= ((d
->perm
[i
] ^ i
) & (nelt
/ 2)) * eltsz
;
37638 unsigned int which
= ((d
->perm
[i
] & nelt
) ? 2 : 0) + (xlane
? 1 : 0);
37640 for (j
= 0; j
< eltsz
; ++j
)
37641 rperm
[which
][(i
* eltsz
+ j
) ^ xlane
] = GEN_INT (e
* eltsz
+ j
);
37642 used
[which
] = true;
37645 for (i
= 0; i
< 2; ++i
)
37647 if (!used
[2 * i
+ 1])
37652 vperm
= gen_rtx_CONST_VECTOR (V32QImode
,
37653 gen_rtvec_v (32, rperm
[2 * i
+ 1]));
37654 vperm
= force_reg (V32QImode
, vperm
);
37655 h
[i
] = gen_reg_rtx (V32QImode
);
37656 op
= gen_lowpart (V32QImode
, i
? d
->op1
: d
->op0
);
37657 emit_insn (gen_avx2_pshufbv32qi3 (h
[i
], op
, vperm
));
37660 /* Swap the 128-byte lanes of h[X]. */
37661 for (i
= 0; i
< 2; ++i
)
37663 if (h
[i
] == NULL_RTX
)
37665 op
= gen_reg_rtx (V4DImode
);
37666 emit_insn (gen_avx2_permv4di_1 (op
, gen_lowpart (V4DImode
, h
[i
]),
37667 const2_rtx
, GEN_INT (3), const0_rtx
,
37669 h
[i
] = gen_lowpart (V32QImode
, op
);
37672 for (i
= 0; i
< 2; ++i
)
37679 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[2 * i
]));
37680 vperm
= force_reg (V32QImode
, vperm
);
37681 l
[i
] = gen_reg_rtx (V32QImode
);
37682 op
= gen_lowpart (V32QImode
, i
? d
->op1
: d
->op0
);
37683 emit_insn (gen_avx2_pshufbv32qi3 (l
[i
], op
, vperm
));
37686 for (i
= 0; i
< 2; ++i
)
37690 op
= gen_reg_rtx (V32QImode
);
37691 emit_insn (gen_iorv32qi3 (op
, l
[i
], h
[i
]));
37698 gcc_assert (l
[0] && l
[1]);
37699 op
= gen_lowpart (V32QImode
, d
->target
);
37700 emit_insn (gen_iorv32qi3 (op
, l
[0], l
[1]));
37704 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
37705 With all of the interface bits taken care of, perform the expansion
37706 in D and return true on success. */
37709 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
37711 /* Try a single instruction expansion. */
37712 if (expand_vec_perm_1 (d
))
37715 /* Try sequences of two instructions. */
37717 if (expand_vec_perm_pshuflw_pshufhw (d
))
37720 if (expand_vec_perm_palignr (d
))
37723 if (expand_vec_perm_interleave2 (d
))
37726 if (expand_vec_perm_broadcast (d
))
37729 if (expand_vec_perm_vpermq_perm_1 (d
))
37732 /* Try sequences of three instructions. */
37734 if (expand_vec_perm_pshufb2 (d
))
37737 if (expand_vec_perm_interleave3 (d
))
37740 /* Try sequences of four instructions. */
37742 if (expand_vec_perm_vpshufb2_vpermq (d
))
37745 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d
))
37748 /* ??? Look for narrow permutations whose element orderings would
37749 allow the promotion to a wider mode. */
37751 /* ??? Look for sequences of interleave or a wider permute that place
37752 the data into the correct lanes for a half-vector shuffle like
37753 pshuf[lh]w or vpermilps. */
37755 /* ??? Look for sequences of interleave that produce the desired results.
37756 The combinatorics of punpck[lh] get pretty ugly... */
37758 if (expand_vec_perm_even_odd (d
))
37761 /* Even longer sequences. */
37762 if (expand_vec_perm_vpshufb4_vpermq2 (d
))
37769 ix86_expand_vec_perm_const (rtx operands
[4])
37771 struct expand_vec_perm_d d
;
37772 unsigned char perm
[MAX_VECT_LEN
];
37773 int i
, nelt
, which
;
37776 d
.target
= operands
[0];
37777 d
.op0
= operands
[1];
37778 d
.op1
= operands
[2];
37781 d
.vmode
= GET_MODE (d
.target
);
37782 gcc_assert (VECTOR_MODE_P (d
.vmode
));
37783 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
37784 d
.testing_p
= false;
37786 gcc_assert (GET_CODE (sel
) == CONST_VECTOR
);
37787 gcc_assert (XVECLEN (sel
, 0) == nelt
);
37788 gcc_checking_assert (sizeof (d
.perm
) == sizeof (perm
));
37790 for (i
= which
= 0; i
< nelt
; ++i
)
37792 rtx e
= XVECEXP (sel
, 0, i
);
37793 int ei
= INTVAL (e
) & (2 * nelt
- 1);
37795 which
|= (ei
< nelt
? 1 : 2);
37806 if (!rtx_equal_p (d
.op0
, d
.op1
))
37809 /* The elements of PERM do not suggest that only the first operand
37810 is used, but both operands are identical. Allow easier matching
37811 of the permutation by folding the permutation into the single
37813 for (i
= 0; i
< nelt
; ++i
)
37814 if (d
.perm
[i
] >= nelt
)
37823 for (i
= 0; i
< nelt
; ++i
)
37829 if (ix86_expand_vec_perm_const_1 (&d
))
37832 /* If the mask says both arguments are needed, but they are the same,
37833 the above tried to expand with d.op0 == d.op1. If that didn't work,
37834 retry with d.op0 != d.op1 as that is what testing has been done with. */
37835 if (which
== 3 && d
.op0
== d
.op1
)
37840 memcpy (d
.perm
, perm
, sizeof (perm
));
37841 d
.op1
= gen_reg_rtx (d
.vmode
);
37843 ok
= ix86_expand_vec_perm_const_1 (&d
);
37844 seq
= get_insns ();
37848 emit_move_insn (d
.op1
, d
.op0
);
37857 /* Implement targetm.vectorize.vec_perm_const_ok. */
37860 ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode
,
37861 const unsigned char *sel
)
37863 struct expand_vec_perm_d d
;
37864 unsigned int i
, nelt
, which
;
37868 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
37869 d
.testing_p
= true;
37871 /* Given sufficient ISA support we can just return true here
37872 for selected vector modes. */
37873 if (GET_MODE_SIZE (d
.vmode
) == 16)
37875 /* All implementable with a single vpperm insn. */
37878 /* All implementable with 2 pshufb + 1 ior. */
37881 /* All implementable with shufpd or unpck[lh]pd. */
37886 /* Extract the values from the vector CST into the permutation
37888 memcpy (d
.perm
, sel
, nelt
);
37889 for (i
= which
= 0; i
< nelt
; ++i
)
37891 unsigned char e
= d
.perm
[i
];
37892 gcc_assert (e
< 2 * nelt
);
37893 which
|= (e
< nelt
? 1 : 2);
37896 /* For all elements from second vector, fold the elements to first. */
37898 for (i
= 0; i
< nelt
; ++i
)
37901 /* Check whether the mask can be applied to the vector type. */
37902 one_vec
= (which
!= 3);
37904 /* Implementable with shufps or pshufd. */
37905 if (one_vec
&& (d
.vmode
== V4SFmode
|| d
.vmode
== V4SImode
))
37908 /* Otherwise we have to go through the motions and see if we can
37909 figure out how to generate the requested permutation. */
37910 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
37911 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
37913 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
37916 ret
= ix86_expand_vec_perm_const_1 (&d
);
37923 ix86_expand_vec_extract_even_odd (rtx targ
, rtx op0
, rtx op1
, unsigned odd
)
37925 struct expand_vec_perm_d d
;
37931 d
.vmode
= GET_MODE (targ
);
37932 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
37933 d
.testing_p
= false;
37935 for (i
= 0; i
< nelt
; ++i
)
37936 d
.perm
[i
] = i
* 2 + odd
;
37938 /* We'll either be able to implement the permutation directly... */
37939 if (expand_vec_perm_1 (&d
))
37942 /* ... or we use the special-case patterns. */
37943 expand_vec_perm_even_odd_1 (&d
, odd
);
37946 /* Expand an insert into a vector register through pinsr insn.
37947 Return true if successful. */
37950 ix86_expand_pinsr (rtx
*operands
)
37952 rtx dst
= operands
[0];
37953 rtx src
= operands
[3];
37955 unsigned int size
= INTVAL (operands
[1]);
37956 unsigned int pos
= INTVAL (operands
[2]);
37958 if (GET_CODE (dst
) == SUBREG
)
37960 pos
+= SUBREG_BYTE (dst
) * BITS_PER_UNIT
;
37961 dst
= SUBREG_REG (dst
);
37964 if (GET_CODE (src
) == SUBREG
)
37965 src
= SUBREG_REG (src
);
37967 switch (GET_MODE (dst
))
37974 enum machine_mode srcmode
, dstmode
;
37975 rtx (*pinsr
)(rtx
, rtx
, rtx
, rtx
);
37977 srcmode
= mode_for_size (size
, MODE_INT
, 0);
37982 if (!TARGET_SSE4_1
)
37984 dstmode
= V16QImode
;
37985 pinsr
= gen_sse4_1_pinsrb
;
37991 dstmode
= V8HImode
;
37992 pinsr
= gen_sse2_pinsrw
;
37996 if (!TARGET_SSE4_1
)
37998 dstmode
= V4SImode
;
37999 pinsr
= gen_sse4_1_pinsrd
;
38003 gcc_assert (TARGET_64BIT
);
38004 if (!TARGET_SSE4_1
)
38006 dstmode
= V2DImode
;
38007 pinsr
= gen_sse4_1_pinsrq
;
38014 dst
= gen_lowpart (dstmode
, dst
);
38015 src
= gen_lowpart (srcmode
, src
);
38019 emit_insn (pinsr (dst
, dst
, src
, GEN_INT (1 << pos
)));
38028 /* This function returns the calling abi specific va_list type node.
38029 It returns the FNDECL specific va_list type. */
38032 ix86_fn_abi_va_list (tree fndecl
)
38035 return va_list_type_node
;
38036 gcc_assert (fndecl
!= NULL_TREE
);
38038 if (ix86_function_abi ((const_tree
) fndecl
) == MS_ABI
)
38039 return ms_va_list_type_node
;
38041 return sysv_va_list_type_node
;
38044 /* Returns the canonical va_list type specified by TYPE. If there
38045 is no valid TYPE provided, it return NULL_TREE. */
38048 ix86_canonical_va_list_type (tree type
)
38052 /* Resolve references and pointers to va_list type. */
38053 if (TREE_CODE (type
) == MEM_REF
)
38054 type
= TREE_TYPE (type
);
38055 else if (POINTER_TYPE_P (type
) && POINTER_TYPE_P (TREE_TYPE(type
)))
38056 type
= TREE_TYPE (type
);
38057 else if (POINTER_TYPE_P (type
) && TREE_CODE (TREE_TYPE (type
)) == ARRAY_TYPE
)
38058 type
= TREE_TYPE (type
);
38060 if (TARGET_64BIT
&& va_list_type_node
!= NULL_TREE
)
38062 wtype
= va_list_type_node
;
38063 gcc_assert (wtype
!= NULL_TREE
);
38065 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
38067 /* If va_list is an array type, the argument may have decayed
38068 to a pointer type, e.g. by being passed to another function.
38069 In that case, unwrap both types so that we can compare the
38070 underlying records. */
38071 if (TREE_CODE (htype
) == ARRAY_TYPE
38072 || POINTER_TYPE_P (htype
))
38074 wtype
= TREE_TYPE (wtype
);
38075 htype
= TREE_TYPE (htype
);
38078 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
38079 return va_list_type_node
;
38080 wtype
= sysv_va_list_type_node
;
38081 gcc_assert (wtype
!= NULL_TREE
);
38083 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
38085 /* If va_list is an array type, the argument may have decayed
38086 to a pointer type, e.g. by being passed to another function.
38087 In that case, unwrap both types so that we can compare the
38088 underlying records. */
38089 if (TREE_CODE (htype
) == ARRAY_TYPE
38090 || POINTER_TYPE_P (htype
))
38092 wtype
= TREE_TYPE (wtype
);
38093 htype
= TREE_TYPE (htype
);
38096 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
38097 return sysv_va_list_type_node
;
38098 wtype
= ms_va_list_type_node
;
38099 gcc_assert (wtype
!= NULL_TREE
);
38101 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
38103 /* If va_list is an array type, the argument may have decayed
38104 to a pointer type, e.g. by being passed to another function.
38105 In that case, unwrap both types so that we can compare the
38106 underlying records. */
38107 if (TREE_CODE (htype
) == ARRAY_TYPE
38108 || POINTER_TYPE_P (htype
))
38110 wtype
= TREE_TYPE (wtype
);
38111 htype
= TREE_TYPE (htype
);
38114 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
38115 return ms_va_list_type_node
;
38118 return std_canonical_va_list_type (type
);
38121 /* Iterate through the target-specific builtin types for va_list.
38122 IDX denotes the iterator, *PTREE is set to the result type of
38123 the va_list builtin, and *PNAME to its internal type.
38124 Returns zero if there is no element for this index, otherwise
38125 IDX should be increased upon the next call.
38126 Note, do not iterate a base builtin's name like __builtin_va_list.
38127 Used from c_common_nodes_and_builtins. */
38130 ix86_enum_va_list (int idx
, const char **pname
, tree
*ptree
)
38140 *ptree
= ms_va_list_type_node
;
38141 *pname
= "__builtin_ms_va_list";
38145 *ptree
= sysv_va_list_type_node
;
38146 *pname
= "__builtin_sysv_va_list";
38154 #undef TARGET_SCHED_DISPATCH
38155 #define TARGET_SCHED_DISPATCH has_dispatch
38156 #undef TARGET_SCHED_DISPATCH_DO
38157 #define TARGET_SCHED_DISPATCH_DO do_dispatch
38158 #undef TARGET_SCHED_REASSOCIATION_WIDTH
38159 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
38161 /* The size of the dispatch window is the total number of bytes of
38162 object code allowed in a window. */
38163 #define DISPATCH_WINDOW_SIZE 16
38165 /* Number of dispatch windows considered for scheduling. */
38166 #define MAX_DISPATCH_WINDOWS 3
38168 /* Maximum number of instructions in a window. */
38171 /* Maximum number of immediate operands in a window. */
38174 /* Maximum number of immediate bits allowed in a window. */
38175 #define MAX_IMM_SIZE 128
38177 /* Maximum number of 32 bit immediates allowed in a window. */
38178 #define MAX_IMM_32 4
38180 /* Maximum number of 64 bit immediates allowed in a window. */
38181 #define MAX_IMM_64 2
38183 /* Maximum total of loads or prefetches allowed in a window. */
38186 /* Maximum total of stores allowed in a window. */
38187 #define MAX_STORE 1
38193 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
38194 enum dispatch_group
{
38209 /* Number of allowable groups in a dispatch window. It is an array
38210 indexed by dispatch_group enum. 100 is used as a big number,
38211 because the number of these kind of operations does not have any
38212 effect in dispatch window, but we need them for other reasons in
38214 static unsigned int num_allowable_groups
[disp_last
] = {
38215 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG
, BIG
38218 char group_name
[disp_last
+ 1][16] = {
38219 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
38220 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
38221 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
38224 /* Instruction path. */
38227 path_single
, /* Single micro op. */
38228 path_double
, /* Double micro op. */
38229 path_multi
, /* Instructions with more than 2 micro op.. */
38233 /* sched_insn_info defines a window to the instructions scheduled in
38234 the basic block. It contains a pointer to the insn_info table and
38235 the instruction scheduled.
38237 Windows are allocated for each basic block and are linked
38239 typedef struct sched_insn_info_s
{
38241 enum dispatch_group group
;
38242 enum insn_path path
;
38247 /* Linked list of dispatch windows. This is a two way list of
38248 dispatch windows of a basic block. It contains information about
38249 the number of uops in the window and the total number of
38250 instructions and of bytes in the object code for this dispatch
38252 typedef struct dispatch_windows_s
{
38253 int num_insn
; /* Number of insn in the window. */
38254 int num_uops
; /* Number of uops in the window. */
38255 int window_size
; /* Number of bytes in the window. */
38256 int window_num
; /* Window number between 0 or 1. */
38257 int num_imm
; /* Number of immediates in an insn. */
38258 int num_imm_32
; /* Number of 32 bit immediates in an insn. */
38259 int num_imm_64
; /* Number of 64 bit immediates in an insn. */
38260 int imm_size
; /* Total immediates in the window. */
38261 int num_loads
; /* Total memory loads in the window. */
38262 int num_stores
; /* Total memory stores in the window. */
38263 int violation
; /* Violation exists in window. */
38264 sched_insn_info
*window
; /* Pointer to the window. */
38265 struct dispatch_windows_s
*next
;
38266 struct dispatch_windows_s
*prev
;
38267 } dispatch_windows
;
38269 /* Immediate valuse used in an insn. */
38270 typedef struct imm_info_s
38277 static dispatch_windows
*dispatch_window_list
;
38278 static dispatch_windows
*dispatch_window_list1
;
38280 /* Get dispatch group of insn. */
38282 static enum dispatch_group
38283 get_mem_group (rtx insn
)
38285 enum attr_memory memory
;
38287 if (INSN_CODE (insn
) < 0)
38288 return disp_no_group
;
38289 memory
= get_attr_memory (insn
);
38290 if (memory
== MEMORY_STORE
)
38293 if (memory
== MEMORY_LOAD
)
38296 if (memory
== MEMORY_BOTH
)
38297 return disp_load_store
;
38299 return disp_no_group
;
38302 /* Return true if insn is a compare instruction. */
38307 enum attr_type type
;
38309 type
= get_attr_type (insn
);
38310 return (type
== TYPE_TEST
38311 || type
== TYPE_ICMP
38312 || type
== TYPE_FCMP
38313 || GET_CODE (PATTERN (insn
)) == COMPARE
);
38316 /* Return true if a dispatch violation encountered. */
38319 dispatch_violation (void)
38321 if (dispatch_window_list
->next
)
38322 return dispatch_window_list
->next
->violation
;
38323 return dispatch_window_list
->violation
;
38326 /* Return true if insn is a branch instruction. */
38329 is_branch (rtx insn
)
38331 return (CALL_P (insn
) || JUMP_P (insn
));
38334 /* Return true if insn is a prefetch instruction. */
38337 is_prefetch (rtx insn
)
38339 return NONJUMP_INSN_P (insn
) && GET_CODE (PATTERN (insn
)) == PREFETCH
;
38342 /* This function initializes a dispatch window and the list container holding a
38343 pointer to the window. */
38346 init_window (int window_num
)
38349 dispatch_windows
*new_list
;
38351 if (window_num
== 0)
38352 new_list
= dispatch_window_list
;
38354 new_list
= dispatch_window_list1
;
38356 new_list
->num_insn
= 0;
38357 new_list
->num_uops
= 0;
38358 new_list
->window_size
= 0;
38359 new_list
->next
= NULL
;
38360 new_list
->prev
= NULL
;
38361 new_list
->window_num
= window_num
;
38362 new_list
->num_imm
= 0;
38363 new_list
->num_imm_32
= 0;
38364 new_list
->num_imm_64
= 0;
38365 new_list
->imm_size
= 0;
38366 new_list
->num_loads
= 0;
38367 new_list
->num_stores
= 0;
38368 new_list
->violation
= false;
38370 for (i
= 0; i
< MAX_INSN
; i
++)
38372 new_list
->window
[i
].insn
= NULL
;
38373 new_list
->window
[i
].group
= disp_no_group
;
38374 new_list
->window
[i
].path
= no_path
;
38375 new_list
->window
[i
].byte_len
= 0;
38376 new_list
->window
[i
].imm_bytes
= 0;
38381 /* This function allocates and initializes a dispatch window and the
38382 list container holding a pointer to the window. */
38384 static dispatch_windows
*
38385 allocate_window (void)
38387 dispatch_windows
*new_list
= XNEW (struct dispatch_windows_s
);
38388 new_list
->window
= XNEWVEC (struct sched_insn_info_s
, MAX_INSN
+ 1);
38393 /* This routine initializes the dispatch scheduling information. It
38394 initiates building dispatch scheduler tables and constructs the
38395 first dispatch window. */
38398 init_dispatch_sched (void)
38400 /* Allocate a dispatch list and a window. */
38401 dispatch_window_list
= allocate_window ();
38402 dispatch_window_list1
= allocate_window ();
38407 /* This function returns true if a branch is detected. End of a basic block
38408 does not have to be a branch, but here we assume only branches end a
38412 is_end_basic_block (enum dispatch_group group
)
38414 return group
== disp_branch
;
38417 /* This function is called when the end of a window processing is reached. */
38420 process_end_window (void)
38422 gcc_assert (dispatch_window_list
->num_insn
<= MAX_INSN
);
38423 if (dispatch_window_list
->next
)
38425 gcc_assert (dispatch_window_list1
->num_insn
<= MAX_INSN
);
38426 gcc_assert (dispatch_window_list
->window_size
38427 + dispatch_window_list1
->window_size
<= 48);
38433 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
38434 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
38435 for 48 bytes of instructions. Note that these windows are not dispatch
38436 windows that their sizes are DISPATCH_WINDOW_SIZE. */
38438 static dispatch_windows
*
38439 allocate_next_window (int window_num
)
38441 if (window_num
== 0)
38443 if (dispatch_window_list
->next
)
38446 return dispatch_window_list
;
38449 dispatch_window_list
->next
= dispatch_window_list1
;
38450 dispatch_window_list1
->prev
= dispatch_window_list
;
38452 return dispatch_window_list1
;
38455 /* Increment the number of immediate operands of an instruction. */
38458 find_constant_1 (rtx
*in_rtx
, imm_info
*imm_values
)
38463 switch ( GET_CODE (*in_rtx
))
38468 (imm_values
->imm
)++;
38469 if (x86_64_immediate_operand (*in_rtx
, SImode
))
38470 (imm_values
->imm32
)++;
38472 (imm_values
->imm64
)++;
38476 (imm_values
->imm
)++;
38477 (imm_values
->imm64
)++;
38481 if (LABEL_KIND (*in_rtx
) == LABEL_NORMAL
)
38483 (imm_values
->imm
)++;
38484 (imm_values
->imm32
)++;
38495 /* Compute number of immediate operands of an instruction. */
38498 find_constant (rtx in_rtx
, imm_info
*imm_values
)
38500 for_each_rtx (INSN_P (in_rtx
) ? &PATTERN (in_rtx
) : &in_rtx
,
38501 (rtx_function
) find_constant_1
, (void *) imm_values
);
38504 /* Return total size of immediate operands of an instruction along with number
38505 of corresponding immediate-operands. It initializes its parameters to zero
38506 befor calling FIND_CONSTANT.
38507 INSN is the input instruction. IMM is the total of immediates.
38508 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
38512 get_num_immediates (rtx insn
, int *imm
, int *imm32
, int *imm64
)
38514 imm_info imm_values
= {0, 0, 0};
38516 find_constant (insn
, &imm_values
);
38517 *imm
= imm_values
.imm
;
38518 *imm32
= imm_values
.imm32
;
38519 *imm64
= imm_values
.imm64
;
38520 return imm_values
.imm32
* 4 + imm_values
.imm64
* 8;
38523 /* This function indicates if an operand of an instruction is an
38527 has_immediate (rtx insn
)
38529 int num_imm_operand
;
38530 int num_imm32_operand
;
38531 int num_imm64_operand
;
38534 return get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
38535 &num_imm64_operand
);
38539 /* Return single or double path for instructions. */
38541 static enum insn_path
38542 get_insn_path (rtx insn
)
38544 enum attr_amdfam10_decode path
= get_attr_amdfam10_decode (insn
);
38546 if ((int)path
== 0)
38547 return path_single
;
38549 if ((int)path
== 1)
38550 return path_double
;
38555 /* Return insn dispatch group. */
38557 static enum dispatch_group
38558 get_insn_group (rtx insn
)
38560 enum dispatch_group group
= get_mem_group (insn
);
38564 if (is_branch (insn
))
38565 return disp_branch
;
38570 if (has_immediate (insn
))
38573 if (is_prefetch (insn
))
38574 return disp_prefetch
;
38576 return disp_no_group
;
38579 /* Count number of GROUP restricted instructions in a dispatch
38580 window WINDOW_LIST. */
38583 count_num_restricted (rtx insn
, dispatch_windows
*window_list
)
38585 enum dispatch_group group
= get_insn_group (insn
);
38587 int num_imm_operand
;
38588 int num_imm32_operand
;
38589 int num_imm64_operand
;
38591 if (group
== disp_no_group
)
38594 if (group
== disp_imm
)
38596 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
38597 &num_imm64_operand
);
38598 if (window_list
->imm_size
+ imm_size
> MAX_IMM_SIZE
38599 || num_imm_operand
+ window_list
->num_imm
> MAX_IMM
38600 || (num_imm32_operand
> 0
38601 && (window_list
->num_imm_32
+ num_imm32_operand
> MAX_IMM_32
38602 || window_list
->num_imm_64
* 2 + num_imm32_operand
> MAX_IMM_32
))
38603 || (num_imm64_operand
> 0
38604 && (window_list
->num_imm_64
+ num_imm64_operand
> MAX_IMM_64
38605 || window_list
->num_imm_32
+ num_imm64_operand
* 2 > MAX_IMM_32
))
38606 || (window_list
->imm_size
+ imm_size
== MAX_IMM_SIZE
38607 && num_imm64_operand
> 0
38608 && ((window_list
->num_imm_64
> 0
38609 && window_list
->num_insn
>= 2)
38610 || window_list
->num_insn
>= 3)))
38616 if ((group
== disp_load_store
38617 && (window_list
->num_loads
>= MAX_LOAD
38618 || window_list
->num_stores
>= MAX_STORE
))
38619 || ((group
== disp_load
38620 || group
== disp_prefetch
)
38621 && window_list
->num_loads
>= MAX_LOAD
)
38622 || (group
== disp_store
38623 && window_list
->num_stores
>= MAX_STORE
))
38629 /* This function returns true if insn satisfies dispatch rules on the
38630 last window scheduled. */
38633 fits_dispatch_window (rtx insn
)
38635 dispatch_windows
*window_list
= dispatch_window_list
;
38636 dispatch_windows
*window_list_next
= dispatch_window_list
->next
;
38637 unsigned int num_restrict
;
38638 enum dispatch_group group
= get_insn_group (insn
);
38639 enum insn_path path
= get_insn_path (insn
);
38642 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
38643 instructions should be given the lowest priority in the
38644 scheduling process in Haifa scheduler to make sure they will be
38645 scheduled in the same dispatch window as the refrence to them. */
38646 if (group
== disp_jcc
|| group
== disp_cmp
)
38649 /* Check nonrestricted. */
38650 if (group
== disp_no_group
|| group
== disp_branch
)
38653 /* Get last dispatch window. */
38654 if (window_list_next
)
38655 window_list
= window_list_next
;
38657 if (window_list
->window_num
== 1)
38659 sum
= window_list
->prev
->window_size
+ window_list
->window_size
;
38662 || (min_insn_size (insn
) + sum
) >= 48)
38663 /* Window 1 is full. Go for next window. */
38667 num_restrict
= count_num_restricted (insn
, window_list
);
38669 if (num_restrict
> num_allowable_groups
[group
])
38672 /* See if it fits in the first window. */
38673 if (window_list
->window_num
== 0)
38675 /* The first widow should have only single and double path
38677 if (path
== path_double
38678 && (window_list
->num_uops
+ 2) > MAX_INSN
)
38680 else if (path
!= path_single
)
38686 /* Add an instruction INSN with NUM_UOPS micro-operations to the
38687 dispatch window WINDOW_LIST. */
38690 add_insn_window (rtx insn
, dispatch_windows
*window_list
, int num_uops
)
38692 int byte_len
= min_insn_size (insn
);
38693 int num_insn
= window_list
->num_insn
;
38695 sched_insn_info
*window
= window_list
->window
;
38696 enum dispatch_group group
= get_insn_group (insn
);
38697 enum insn_path path
= get_insn_path (insn
);
38698 int num_imm_operand
;
38699 int num_imm32_operand
;
38700 int num_imm64_operand
;
38702 if (!window_list
->violation
&& group
!= disp_cmp
38703 && !fits_dispatch_window (insn
))
38704 window_list
->violation
= true;
38706 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
38707 &num_imm64_operand
);
38709 /* Initialize window with new instruction. */
38710 window
[num_insn
].insn
= insn
;
38711 window
[num_insn
].byte_len
= byte_len
;
38712 window
[num_insn
].group
= group
;
38713 window
[num_insn
].path
= path
;
38714 window
[num_insn
].imm_bytes
= imm_size
;
38716 window_list
->window_size
+= byte_len
;
38717 window_list
->num_insn
= num_insn
+ 1;
38718 window_list
->num_uops
= window_list
->num_uops
+ num_uops
;
38719 window_list
->imm_size
+= imm_size
;
38720 window_list
->num_imm
+= num_imm_operand
;
38721 window_list
->num_imm_32
+= num_imm32_operand
;
38722 window_list
->num_imm_64
+= num_imm64_operand
;
38724 if (group
== disp_store
)
38725 window_list
->num_stores
+= 1;
38726 else if (group
== disp_load
38727 || group
== disp_prefetch
)
38728 window_list
->num_loads
+= 1;
38729 else if (group
== disp_load_store
)
38731 window_list
->num_stores
+= 1;
38732 window_list
->num_loads
+= 1;
38736 /* Adds a scheduled instruction, INSN, to the current dispatch window.
38737 If the total bytes of instructions or the number of instructions in
38738 the window exceed allowable, it allocates a new window. */
38741 add_to_dispatch_window (rtx insn
)
38744 dispatch_windows
*window_list
;
38745 dispatch_windows
*next_list
;
38746 dispatch_windows
*window0_list
;
38747 enum insn_path path
;
38748 enum dispatch_group insn_group
;
38756 if (INSN_CODE (insn
) < 0)
38759 byte_len
= min_insn_size (insn
);
38760 window_list
= dispatch_window_list
;
38761 next_list
= window_list
->next
;
38762 path
= get_insn_path (insn
);
38763 insn_group
= get_insn_group (insn
);
38765 /* Get the last dispatch window. */
38767 window_list
= dispatch_window_list
->next
;
38769 if (path
== path_single
)
38771 else if (path
== path_double
)
38774 insn_num_uops
= (int) path
;
38776 /* If current window is full, get a new window.
38777 Window number zero is full, if MAX_INSN uops are scheduled in it.
38778 Window number one is full, if window zero's bytes plus window
38779 one's bytes is 32, or if the bytes of the new instruction added
38780 to the total makes it greater than 48, or it has already MAX_INSN
38781 instructions in it. */
38782 num_insn
= window_list
->num_insn
;
38783 num_uops
= window_list
->num_uops
;
38784 window_num
= window_list
->window_num
;
38785 insn_fits
= fits_dispatch_window (insn
);
38787 if (num_insn
>= MAX_INSN
38788 || num_uops
+ insn_num_uops
> MAX_INSN
38791 window_num
= ~window_num
& 1;
38792 window_list
= allocate_next_window (window_num
);
38795 if (window_num
== 0)
38797 add_insn_window (insn
, window_list
, insn_num_uops
);
38798 if (window_list
->num_insn
>= MAX_INSN
38799 && insn_group
== disp_branch
)
38801 process_end_window ();
38805 else if (window_num
== 1)
38807 window0_list
= window_list
->prev
;
38808 sum
= window0_list
->window_size
+ window_list
->window_size
;
38810 || (byte_len
+ sum
) >= 48)
38812 process_end_window ();
38813 window_list
= dispatch_window_list
;
38816 add_insn_window (insn
, window_list
, insn_num_uops
);
38819 gcc_unreachable ();
38821 if (is_end_basic_block (insn_group
))
38823 /* End of basic block is reached do end-basic-block process. */
38824 process_end_window ();
38829 /* Print the dispatch window, WINDOW_NUM, to FILE. */
38831 DEBUG_FUNCTION
static void
38832 debug_dispatch_window_file (FILE *file
, int window_num
)
38834 dispatch_windows
*list
;
38837 if (window_num
== 0)
38838 list
= dispatch_window_list
;
38840 list
= dispatch_window_list1
;
38842 fprintf (file
, "Window #%d:\n", list
->window_num
);
38843 fprintf (file
, " num_insn = %d, num_uops = %d, window_size = %d\n",
38844 list
->num_insn
, list
->num_uops
, list
->window_size
);
38845 fprintf (file
, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
38846 list
->num_imm
, list
->num_imm_32
, list
->num_imm_64
, list
->imm_size
);
38848 fprintf (file
, " num_loads = %d, num_stores = %d\n", list
->num_loads
,
38850 fprintf (file
, " insn info:\n");
38852 for (i
= 0; i
< MAX_INSN
; i
++)
38854 if (!list
->window
[i
].insn
)
38856 fprintf (file
, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
38857 i
, group_name
[list
->window
[i
].group
],
38858 i
, (void *)list
->window
[i
].insn
,
38859 i
, list
->window
[i
].path
,
38860 i
, list
->window
[i
].byte_len
,
38861 i
, list
->window
[i
].imm_bytes
);
38865 /* Print to stdout a dispatch window. */
38867 DEBUG_FUNCTION
void
38868 debug_dispatch_window (int window_num
)
38870 debug_dispatch_window_file (stdout
, window_num
);
38873 /* Print INSN dispatch information to FILE. */
38875 DEBUG_FUNCTION
static void
38876 debug_insn_dispatch_info_file (FILE *file
, rtx insn
)
38879 enum insn_path path
;
38880 enum dispatch_group group
;
38882 int num_imm_operand
;
38883 int num_imm32_operand
;
38884 int num_imm64_operand
;
38886 if (INSN_CODE (insn
) < 0)
38889 byte_len
= min_insn_size (insn
);
38890 path
= get_insn_path (insn
);
38891 group
= get_insn_group (insn
);
38892 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
38893 &num_imm64_operand
);
38895 fprintf (file
, " insn info:\n");
38896 fprintf (file
, " group = %s, path = %d, byte_len = %d\n",
38897 group_name
[group
], path
, byte_len
);
38898 fprintf (file
, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
38899 num_imm_operand
, num_imm32_operand
, num_imm64_operand
, imm_size
);
38902 /* Print to STDERR the status of the ready list with respect to
38903 dispatch windows. */
38905 DEBUG_FUNCTION
void
38906 debug_ready_dispatch (void)
38909 int no_ready
= number_in_ready ();
38911 fprintf (stdout
, "Number of ready: %d\n", no_ready
);
38913 for (i
= 0; i
< no_ready
; i
++)
38914 debug_insn_dispatch_info_file (stdout
, get_ready_element (i
));
38917 /* This routine is the driver of the dispatch scheduler. */
38920 do_dispatch (rtx insn
, int mode
)
38922 if (mode
== DISPATCH_INIT
)
38923 init_dispatch_sched ();
38924 else if (mode
== ADD_TO_DISPATCH_WINDOW
)
38925 add_to_dispatch_window (insn
);
38928 /* Return TRUE if Dispatch Scheduling is supported. */
38931 has_dispatch (rtx insn
, int action
)
38933 if ((ix86_tune
== PROCESSOR_BDVER1
|| ix86_tune
== PROCESSOR_BDVER2
)
38934 && flag_dispatch_scheduler
)
38940 case IS_DISPATCH_ON
:
38945 return is_cmp (insn
);
38947 case DISPATCH_VIOLATION
:
38948 return dispatch_violation ();
38950 case FITS_DISPATCH_WINDOW
:
38951 return fits_dispatch_window (insn
);
38957 /* Implementation of reassociation_width target hook used by
38958 reassoc phase to identify parallelism level in reassociated
38959 tree. Statements tree_code is passed in OPC. Arguments type
38962 Currently parallel reassociation is enabled for Atom
38963 processors only and we set reassociation width to be 2
38964 because Atom may issue up to 2 instructions per cycle.
38966 Return value should be fixed if parallel reassociation is
38967 enabled for other processors. */
38970 ix86_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED
,
38971 enum machine_mode mode
)
38975 if (INTEGRAL_MODE_P (mode
) && TARGET_REASSOC_INT_TO_PARALLEL
)
38977 else if (FLOAT_MODE_P (mode
) && TARGET_REASSOC_FP_TO_PARALLEL
)
38983 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
38984 place emms and femms instructions. */
38986 static enum machine_mode
38987 ix86_preferred_simd_mode (enum machine_mode mode
)
38995 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V32QImode
: V16QImode
;
38997 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V16HImode
: V8HImode
;
38999 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V8SImode
: V4SImode
;
39001 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V4DImode
: V2DImode
;
39004 if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
39010 if (!TARGET_VECTORIZE_DOUBLE
)
39012 else if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
39014 else if (TARGET_SSE2
)
39023 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
39026 static unsigned int
39027 ix86_autovectorize_vector_sizes (void)
39029 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? 32 | 16 : 0;
39032 /* Initialize the GCC target structure. */
39033 #undef TARGET_RETURN_IN_MEMORY
39034 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
39036 #undef TARGET_LEGITIMIZE_ADDRESS
39037 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
39039 #undef TARGET_ATTRIBUTE_TABLE
39040 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
39041 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
39042 # undef TARGET_MERGE_DECL_ATTRIBUTES
39043 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
39046 #undef TARGET_COMP_TYPE_ATTRIBUTES
39047 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
39049 #undef TARGET_INIT_BUILTINS
39050 #define TARGET_INIT_BUILTINS ix86_init_builtins
39051 #undef TARGET_BUILTIN_DECL
39052 #define TARGET_BUILTIN_DECL ix86_builtin_decl
39053 #undef TARGET_EXPAND_BUILTIN
39054 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
39056 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
39057 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
39058 ix86_builtin_vectorized_function
39060 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
39061 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
39063 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
39064 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
39066 #undef TARGET_VECTORIZE_BUILTIN_GATHER
39067 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
39069 #undef TARGET_BUILTIN_RECIPROCAL
39070 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
39072 #undef TARGET_ASM_FUNCTION_EPILOGUE
39073 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
39075 #undef TARGET_ENCODE_SECTION_INFO
39076 #ifndef SUBTARGET_ENCODE_SECTION_INFO
39077 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
39079 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
39082 #undef TARGET_ASM_OPEN_PAREN
39083 #define TARGET_ASM_OPEN_PAREN ""
39084 #undef TARGET_ASM_CLOSE_PAREN
39085 #define TARGET_ASM_CLOSE_PAREN ""
39087 #undef TARGET_ASM_BYTE_OP
39088 #define TARGET_ASM_BYTE_OP ASM_BYTE
39090 #undef TARGET_ASM_ALIGNED_HI_OP
39091 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
39092 #undef TARGET_ASM_ALIGNED_SI_OP
39093 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
39095 #undef TARGET_ASM_ALIGNED_DI_OP
39096 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
39099 #undef TARGET_PROFILE_BEFORE_PROLOGUE
39100 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
39102 #undef TARGET_ASM_UNALIGNED_HI_OP
39103 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
39104 #undef TARGET_ASM_UNALIGNED_SI_OP
39105 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
39106 #undef TARGET_ASM_UNALIGNED_DI_OP
39107 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
39109 #undef TARGET_PRINT_OPERAND
39110 #define TARGET_PRINT_OPERAND ix86_print_operand
39111 #undef TARGET_PRINT_OPERAND_ADDRESS
39112 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
39113 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
39114 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
39115 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
39116 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
39118 #undef TARGET_SCHED_INIT_GLOBAL
39119 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
39120 #undef TARGET_SCHED_ADJUST_COST
39121 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
39122 #undef TARGET_SCHED_ISSUE_RATE
39123 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
39124 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
39125 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
39126 ia32_multipass_dfa_lookahead
39128 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
39129 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
39132 #undef TARGET_HAVE_TLS
39133 #define TARGET_HAVE_TLS true
39135 #undef TARGET_CANNOT_FORCE_CONST_MEM
39136 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
39137 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
39138 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
39140 #undef TARGET_DELEGITIMIZE_ADDRESS
39141 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
39143 #undef TARGET_MS_BITFIELD_LAYOUT_P
39144 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
39147 #undef TARGET_BINDS_LOCAL_P
39148 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
39150 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
39151 #undef TARGET_BINDS_LOCAL_P
39152 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
39155 #undef TARGET_ASM_OUTPUT_MI_THUNK
39156 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
39157 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
39158 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
39160 #undef TARGET_ASM_FILE_START
39161 #define TARGET_ASM_FILE_START x86_file_start
39163 #undef TARGET_OPTION_OVERRIDE
39164 #define TARGET_OPTION_OVERRIDE ix86_option_override
39166 #undef TARGET_REGISTER_MOVE_COST
39167 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
39168 #undef TARGET_MEMORY_MOVE_COST
39169 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
39170 #undef TARGET_RTX_COSTS
39171 #define TARGET_RTX_COSTS ix86_rtx_costs
39172 #undef TARGET_ADDRESS_COST
39173 #define TARGET_ADDRESS_COST ix86_address_cost
39175 #undef TARGET_FIXED_CONDITION_CODE_REGS
39176 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
39177 #undef TARGET_CC_MODES_COMPATIBLE
39178 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
39180 #undef TARGET_MACHINE_DEPENDENT_REORG
39181 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
39183 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
39184 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
39186 #undef TARGET_BUILD_BUILTIN_VA_LIST
39187 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
39189 #undef TARGET_ENUM_VA_LIST_P
39190 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
39192 #undef TARGET_FN_ABI_VA_LIST
39193 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
39195 #undef TARGET_CANONICAL_VA_LIST_TYPE
39196 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
39198 #undef TARGET_EXPAND_BUILTIN_VA_START
39199 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
39201 #undef TARGET_MD_ASM_CLOBBERS
39202 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
39204 #undef TARGET_PROMOTE_PROTOTYPES
39205 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
39206 #undef TARGET_STRUCT_VALUE_RTX
39207 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
39208 #undef TARGET_SETUP_INCOMING_VARARGS
39209 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
39210 #undef TARGET_MUST_PASS_IN_STACK
39211 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
39212 #undef TARGET_FUNCTION_ARG_ADVANCE
39213 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
39214 #undef TARGET_FUNCTION_ARG
39215 #define TARGET_FUNCTION_ARG ix86_function_arg
39216 #undef TARGET_FUNCTION_ARG_BOUNDARY
39217 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
39218 #undef TARGET_PASS_BY_REFERENCE
39219 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
39220 #undef TARGET_INTERNAL_ARG_POINTER
39221 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
39222 #undef TARGET_UPDATE_STACK_BOUNDARY
39223 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
39224 #undef TARGET_GET_DRAP_RTX
39225 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
39226 #undef TARGET_STRICT_ARGUMENT_NAMING
39227 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
39228 #undef TARGET_STATIC_CHAIN
39229 #define TARGET_STATIC_CHAIN ix86_static_chain
39230 #undef TARGET_TRAMPOLINE_INIT
39231 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
39232 #undef TARGET_RETURN_POPS_ARGS
39233 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
39235 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
39236 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
39238 #undef TARGET_SCALAR_MODE_SUPPORTED_P
39239 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
39241 #undef TARGET_VECTOR_MODE_SUPPORTED_P
39242 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
39244 #undef TARGET_C_MODE_FOR_SUFFIX
39245 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
39248 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
39249 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
39252 #ifdef SUBTARGET_INSERT_ATTRIBUTES
39253 #undef TARGET_INSERT_ATTRIBUTES
39254 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
39257 #undef TARGET_MANGLE_TYPE
39258 #define TARGET_MANGLE_TYPE ix86_mangle_type
39260 #ifndef TARGET_MACHO
39261 #undef TARGET_STACK_PROTECT_FAIL
39262 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
39265 #undef TARGET_FUNCTION_VALUE
39266 #define TARGET_FUNCTION_VALUE ix86_function_value
39268 #undef TARGET_FUNCTION_VALUE_REGNO_P
39269 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
39271 #undef TARGET_PROMOTE_FUNCTION_MODE
39272 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
39274 #undef TARGET_SECONDARY_RELOAD
39275 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
39277 #undef TARGET_CLASS_MAX_NREGS
39278 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
39280 #undef TARGET_PREFERRED_RELOAD_CLASS
39281 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
39282 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
39283 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
39284 #undef TARGET_CLASS_LIKELY_SPILLED_P
39285 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
39287 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
39288 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
39289 ix86_builtin_vectorization_cost
39290 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
39291 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
39292 ix86_vectorize_vec_perm_const_ok
39293 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
39294 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
39295 ix86_preferred_simd_mode
39296 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
39297 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
39298 ix86_autovectorize_vector_sizes
39300 #undef TARGET_SET_CURRENT_FUNCTION
39301 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
39303 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
39304 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
39306 #undef TARGET_OPTION_SAVE
39307 #define TARGET_OPTION_SAVE ix86_function_specific_save
39309 #undef TARGET_OPTION_RESTORE
39310 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
39312 #undef TARGET_OPTION_PRINT
39313 #define TARGET_OPTION_PRINT ix86_function_specific_print
39315 #undef TARGET_CAN_INLINE_P
39316 #define TARGET_CAN_INLINE_P ix86_can_inline_p
39318 #undef TARGET_EXPAND_TO_RTL_HOOK
39319 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
39321 #undef TARGET_LEGITIMATE_ADDRESS_P
39322 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
39324 #undef TARGET_LEGITIMATE_CONSTANT_P
39325 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
39327 #undef TARGET_FRAME_POINTER_REQUIRED
39328 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
39330 #undef TARGET_CAN_ELIMINATE
39331 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
39333 #undef TARGET_EXTRA_LIVE_ON_ENTRY
39334 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
39336 #undef TARGET_ASM_CODE_END
39337 #define TARGET_ASM_CODE_END ix86_code_end
39339 #undef TARGET_CONDITIONAL_REGISTER_USAGE
39340 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
39343 #undef TARGET_INIT_LIBFUNCS
39344 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
39347 struct gcc_target targetm
= TARGET_INITIALIZER
;
39349 #include "gt-i386.h"