i386.c (ix86_expand_sse_movcc): Use blendvps, blendvpd and pblendvb if possible.
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
3 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
4 Free Software Foundation, Inc.
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
12
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
33 #include "output.h"
34 #include "insn-codes.h"
35 #include "insn-attr.h"
36 #include "flags.h"
37 #include "except.h"
38 #include "function.h"
39 #include "recog.h"
40 #include "expr.h"
41 #include "optabs.h"
42 #include "diagnostic-core.h"
43 #include "toplev.h"
44 #include "basic-block.h"
45 #include "ggc.h"
46 #include "target.h"
47 #include "target-def.h"
48 #include "common/common-target.h"
49 #include "langhooks.h"
50 #include "cgraph.h"
51 #include "gimple.h"
52 #include "dwarf2.h"
53 #include "df.h"
54 #include "tm-constrs.h"
55 #include "params.h"
56 #include "cselib.h"
57 #include "debug.h"
58 #include "sched-int.h"
59 #include "sbitmap.h"
60 #include "fibheap.h"
61 #include "opts.h"
62 #include "diagnostic.h"
63
64 enum upper_128bits_state
65 {
66 unknown = 0,
67 unused,
68 used
69 };
70
71 typedef struct block_info_def
72 {
73 /* State of the upper 128bits of AVX registers at exit. */
74 enum upper_128bits_state state;
75 /* TRUE if state of the upper 128bits of AVX registers is unchanged
76 in this block. */
77 bool unchanged;
78 /* TRUE if block has been processed. */
79 bool processed;
80 /* TRUE if block has been scanned. */
81 bool scanned;
82 /* Previous state of the upper 128bits of AVX registers at entry. */
83 enum upper_128bits_state prev;
84 } *block_info;
85
86 #define BLOCK_INFO(B) ((block_info) (B)->aux)
87
88 enum call_avx256_state
89 {
90 /* Callee returns 256bit AVX register. */
91 callee_return_avx256 = -1,
92 /* Callee returns and passes 256bit AVX register. */
93 callee_return_pass_avx256,
94 /* Callee passes 256bit AVX register. */
95 callee_pass_avx256,
96 /* Callee doesn't return nor passe 256bit AVX register, or no
97 256bit AVX register in function return. */
98 call_no_avx256,
99 /* vzeroupper intrinsic. */
100 vzeroupper_intrinsic
101 };
102
103 /* Check if a 256bit AVX register is referenced in stores. */
104
105 static void
106 check_avx256_stores (rtx dest, const_rtx set, void *data)
107 {
108 if ((REG_P (dest)
109 && VALID_AVX256_REG_MODE (GET_MODE (dest)))
110 || (GET_CODE (set) == SET
111 && REG_P (SET_SRC (set))
112 && VALID_AVX256_REG_MODE (GET_MODE (SET_SRC (set)))))
113 {
114 enum upper_128bits_state *state
115 = (enum upper_128bits_state *) data;
116 *state = used;
117 }
118 }
119
120 /* Helper function for move_or_delete_vzeroupper_1. Look for vzeroupper
121 in basic block BB. Delete it if upper 128bit AVX registers are
122 unused. If it isn't deleted, move it to just before a jump insn.
123
124 STATE is state of the upper 128bits of AVX registers at entry. */
125
126 static void
127 move_or_delete_vzeroupper_2 (basic_block bb,
128 enum upper_128bits_state state)
129 {
130 rtx insn, bb_end;
131 rtx vzeroupper_insn = NULL_RTX;
132 rtx pat;
133 int avx256;
134 bool unchanged;
135
136 if (BLOCK_INFO (bb)->unchanged)
137 {
138 if (dump_file)
139 fprintf (dump_file, " [bb %i] unchanged: upper 128bits: %d\n",
140 bb->index, state);
141
142 BLOCK_INFO (bb)->state = state;
143 return;
144 }
145
146 if (BLOCK_INFO (bb)->scanned && BLOCK_INFO (bb)->prev == state)
147 {
148 if (dump_file)
149 fprintf (dump_file, " [bb %i] scanned: upper 128bits: %d\n",
150 bb->index, BLOCK_INFO (bb)->state);
151 return;
152 }
153
154 BLOCK_INFO (bb)->prev = state;
155
156 if (dump_file)
157 fprintf (dump_file, " [bb %i] entry: upper 128bits: %d\n",
158 bb->index, state);
159
160 unchanged = true;
161
162 /* BB_END changes when it is deleted. */
163 bb_end = BB_END (bb);
164 insn = BB_HEAD (bb);
165 while (insn != bb_end)
166 {
167 insn = NEXT_INSN (insn);
168
169 if (!NONDEBUG_INSN_P (insn))
170 continue;
171
172 /* Move vzeroupper before jump/call. */
173 if (JUMP_P (insn) || CALL_P (insn))
174 {
175 if (!vzeroupper_insn)
176 continue;
177
178 if (PREV_INSN (insn) != vzeroupper_insn)
179 {
180 if (dump_file)
181 {
182 fprintf (dump_file, "Move vzeroupper after:\n");
183 print_rtl_single (dump_file, PREV_INSN (insn));
184 fprintf (dump_file, "before:\n");
185 print_rtl_single (dump_file, insn);
186 }
187 reorder_insns_nobb (vzeroupper_insn, vzeroupper_insn,
188 PREV_INSN (insn));
189 }
190 vzeroupper_insn = NULL_RTX;
191 continue;
192 }
193
194 pat = PATTERN (insn);
195
196 /* Check insn for vzeroupper intrinsic. */
197 if (GET_CODE (pat) == UNSPEC_VOLATILE
198 && XINT (pat, 1) == UNSPECV_VZEROUPPER)
199 {
200 if (dump_file)
201 {
202 /* Found vzeroupper intrinsic. */
203 fprintf (dump_file, "Found vzeroupper:\n");
204 print_rtl_single (dump_file, insn);
205 }
206 }
207 else
208 {
209 /* Check insn for vzeroall intrinsic. */
210 if (GET_CODE (pat) == PARALLEL
211 && GET_CODE (XVECEXP (pat, 0, 0)) == UNSPEC_VOLATILE
212 && XINT (XVECEXP (pat, 0, 0), 1) == UNSPECV_VZEROALL)
213 {
214 state = unused;
215 unchanged = false;
216
217 /* Delete pending vzeroupper insertion. */
218 if (vzeroupper_insn)
219 {
220 delete_insn (vzeroupper_insn);
221 vzeroupper_insn = NULL_RTX;
222 }
223 }
224 else if (state != used)
225 {
226 note_stores (pat, check_avx256_stores, &state);
227 if (state == used)
228 unchanged = false;
229 }
230 continue;
231 }
232
233 /* Process vzeroupper intrinsic. */
234 avx256 = INTVAL (XVECEXP (pat, 0, 0));
235
236 if (state == unused)
237 {
238 /* Since the upper 128bits are cleared, callee must not pass
239 256bit AVX register. We only need to check if callee
240 returns 256bit AVX register. */
241 if (avx256 == callee_return_avx256)
242 {
243 state = used;
244 unchanged = false;
245 }
246
247 /* Remove unnecessary vzeroupper since upper 128bits are
248 cleared. */
249 if (dump_file)
250 {
251 fprintf (dump_file, "Delete redundant vzeroupper:\n");
252 print_rtl_single (dump_file, insn);
253 }
254 delete_insn (insn);
255 }
256 else
257 {
258 /* Set state to UNUSED if callee doesn't return 256bit AVX
259 register. */
260 if (avx256 != callee_return_pass_avx256)
261 state = unused;
262
263 if (avx256 == callee_return_pass_avx256
264 || avx256 == callee_pass_avx256)
265 {
266 /* Must remove vzeroupper since callee passes in 256bit
267 AVX register. */
268 if (dump_file)
269 {
270 fprintf (dump_file, "Delete callee pass vzeroupper:\n");
271 print_rtl_single (dump_file, insn);
272 }
273 delete_insn (insn);
274 }
275 else
276 {
277 vzeroupper_insn = insn;
278 unchanged = false;
279 }
280 }
281 }
282
283 BLOCK_INFO (bb)->state = state;
284 BLOCK_INFO (bb)->unchanged = unchanged;
285 BLOCK_INFO (bb)->scanned = true;
286
287 if (dump_file)
288 fprintf (dump_file, " [bb %i] exit: %s: upper 128bits: %d\n",
289 bb->index, unchanged ? "unchanged" : "changed",
290 state);
291 }
292
293 /* Helper function for move_or_delete_vzeroupper. Process vzeroupper
294 in BLOCK and check its predecessor blocks. Treat UNKNOWN state
295 as USED if UNKNOWN_IS_UNUSED is true. Return TRUE if the exit
296 state is changed. */
297
298 static bool
299 move_or_delete_vzeroupper_1 (basic_block block, bool unknown_is_unused)
300 {
301 edge e;
302 edge_iterator ei;
303 enum upper_128bits_state state, old_state, new_state;
304 bool seen_unknown;
305
306 if (dump_file)
307 fprintf (dump_file, " Process [bb %i]: status: %d\n",
308 block->index, BLOCK_INFO (block)->processed);
309
310 if (BLOCK_INFO (block)->processed)
311 return false;
312
313 state = unused;
314
315 /* Check all predecessor edges of this block. */
316 seen_unknown = false;
317 FOR_EACH_EDGE (e, ei, block->preds)
318 {
319 if (e->src == block)
320 continue;
321 switch (BLOCK_INFO (e->src)->state)
322 {
323 case unknown:
324 if (!unknown_is_unused)
325 seen_unknown = true;
326 case unused:
327 break;
328 case used:
329 state = used;
330 goto done;
331 }
332 }
333
334 if (seen_unknown)
335 state = unknown;
336
337 done:
338 old_state = BLOCK_INFO (block)->state;
339 move_or_delete_vzeroupper_2 (block, state);
340 new_state = BLOCK_INFO (block)->state;
341
342 if (state != unknown || new_state == used)
343 BLOCK_INFO (block)->processed = true;
344
345 /* Need to rescan if the upper 128bits of AVX registers are changed
346 to USED at exit. */
347 if (new_state != old_state)
348 {
349 if (new_state == used)
350 cfun->machine->rescan_vzeroupper_p = 1;
351 return true;
352 }
353 else
354 return false;
355 }
356
357 /* Go through the instruction stream looking for vzeroupper. Delete
358 it if upper 128bit AVX registers are unused. If it isn't deleted,
359 move it to just before a jump insn. */
360
361 static void
362 move_or_delete_vzeroupper (void)
363 {
364 edge e;
365 edge_iterator ei;
366 basic_block bb;
367 fibheap_t worklist, pending, fibheap_swap;
368 sbitmap visited, in_worklist, in_pending, sbitmap_swap;
369 int *bb_order;
370 int *rc_order;
371 int i;
372
373 /* Set up block info for each basic block. */
374 alloc_aux_for_blocks (sizeof (struct block_info_def));
375
376 /* Process outgoing edges of entry point. */
377 if (dump_file)
378 fprintf (dump_file, "Process outgoing edges of entry point\n");
379
380 FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR->succs)
381 {
382 move_or_delete_vzeroupper_2 (e->dest,
383 cfun->machine->caller_pass_avx256_p
384 ? used : unused);
385 BLOCK_INFO (e->dest)->processed = true;
386 }
387
388 /* Compute reverse completion order of depth first search of the CFG
389 so that the data-flow runs faster. */
390 rc_order = XNEWVEC (int, n_basic_blocks - NUM_FIXED_BLOCKS);
391 bb_order = XNEWVEC (int, last_basic_block);
392 pre_and_rev_post_order_compute (NULL, rc_order, false);
393 for (i = 0; i < n_basic_blocks - NUM_FIXED_BLOCKS; i++)
394 bb_order[rc_order[i]] = i;
395 free (rc_order);
396
397 worklist = fibheap_new ();
398 pending = fibheap_new ();
399 visited = sbitmap_alloc (last_basic_block);
400 in_worklist = sbitmap_alloc (last_basic_block);
401 in_pending = sbitmap_alloc (last_basic_block);
402 sbitmap_zero (in_worklist);
403
404 /* Don't check outgoing edges of entry point. */
405 sbitmap_ones (in_pending);
406 FOR_EACH_BB (bb)
407 if (BLOCK_INFO (bb)->processed)
408 RESET_BIT (in_pending, bb->index);
409 else
410 {
411 move_or_delete_vzeroupper_1 (bb, false);
412 fibheap_insert (pending, bb_order[bb->index], bb);
413 }
414
415 if (dump_file)
416 fprintf (dump_file, "Check remaining basic blocks\n");
417
418 while (!fibheap_empty (pending))
419 {
420 fibheap_swap = pending;
421 pending = worklist;
422 worklist = fibheap_swap;
423 sbitmap_swap = in_pending;
424 in_pending = in_worklist;
425 in_worklist = sbitmap_swap;
426
427 sbitmap_zero (visited);
428
429 cfun->machine->rescan_vzeroupper_p = 0;
430
431 while (!fibheap_empty (worklist))
432 {
433 bb = (basic_block) fibheap_extract_min (worklist);
434 RESET_BIT (in_worklist, bb->index);
435 gcc_assert (!TEST_BIT (visited, bb->index));
436 if (!TEST_BIT (visited, bb->index))
437 {
438 edge_iterator ei;
439
440 SET_BIT (visited, bb->index);
441
442 if (move_or_delete_vzeroupper_1 (bb, false))
443 FOR_EACH_EDGE (e, ei, bb->succs)
444 {
445 if (e->dest == EXIT_BLOCK_PTR
446 || BLOCK_INFO (e->dest)->processed)
447 continue;
448
449 if (TEST_BIT (visited, e->dest->index))
450 {
451 if (!TEST_BIT (in_pending, e->dest->index))
452 {
453 /* Send E->DEST to next round. */
454 SET_BIT (in_pending, e->dest->index);
455 fibheap_insert (pending,
456 bb_order[e->dest->index],
457 e->dest);
458 }
459 }
460 else if (!TEST_BIT (in_worklist, e->dest->index))
461 {
462 /* Add E->DEST to current round. */
463 SET_BIT (in_worklist, e->dest->index);
464 fibheap_insert (worklist, bb_order[e->dest->index],
465 e->dest);
466 }
467 }
468 }
469 }
470
471 if (!cfun->machine->rescan_vzeroupper_p)
472 break;
473 }
474
475 free (bb_order);
476 fibheap_delete (worklist);
477 fibheap_delete (pending);
478 sbitmap_free (visited);
479 sbitmap_free (in_worklist);
480 sbitmap_free (in_pending);
481
482 if (dump_file)
483 fprintf (dump_file, "Process remaining basic blocks\n");
484
485 FOR_EACH_BB (bb)
486 move_or_delete_vzeroupper_1 (bb, true);
487
488 free_aux_for_blocks ();
489 }
490
491 static rtx legitimize_dllimport_symbol (rtx, bool);
492
493 #ifndef CHECK_STACK_LIMIT
494 #define CHECK_STACK_LIMIT (-1)
495 #endif
496
497 /* Return index of given mode in mult and division cost tables. */
498 #define MODE_INDEX(mode) \
499 ((mode) == QImode ? 0 \
500 : (mode) == HImode ? 1 \
501 : (mode) == SImode ? 2 \
502 : (mode) == DImode ? 3 \
503 : 4)
504
505 /* Processor costs (relative to an add) */
506 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
507 #define COSTS_N_BYTES(N) ((N) * 2)
508
509 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
510
511 const
512 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
513 COSTS_N_BYTES (2), /* cost of an add instruction */
514 COSTS_N_BYTES (3), /* cost of a lea instruction */
515 COSTS_N_BYTES (2), /* variable shift costs */
516 COSTS_N_BYTES (3), /* constant shift costs */
517 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
518 COSTS_N_BYTES (3), /* HI */
519 COSTS_N_BYTES (3), /* SI */
520 COSTS_N_BYTES (3), /* DI */
521 COSTS_N_BYTES (5)}, /* other */
522 0, /* cost of multiply per each bit set */
523 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
524 COSTS_N_BYTES (3), /* HI */
525 COSTS_N_BYTES (3), /* SI */
526 COSTS_N_BYTES (3), /* DI */
527 COSTS_N_BYTES (5)}, /* other */
528 COSTS_N_BYTES (3), /* cost of movsx */
529 COSTS_N_BYTES (3), /* cost of movzx */
530 0, /* "large" insn */
531 2, /* MOVE_RATIO */
532 2, /* cost for loading QImode using movzbl */
533 {2, 2, 2}, /* cost of loading integer registers
534 in QImode, HImode and SImode.
535 Relative to reg-reg move (2). */
536 {2, 2, 2}, /* cost of storing integer registers */
537 2, /* cost of reg,reg fld/fst */
538 {2, 2, 2}, /* cost of loading fp registers
539 in SFmode, DFmode and XFmode */
540 {2, 2, 2}, /* cost of storing fp registers
541 in SFmode, DFmode and XFmode */
542 3, /* cost of moving MMX register */
543 {3, 3}, /* cost of loading MMX registers
544 in SImode and DImode */
545 {3, 3}, /* cost of storing MMX registers
546 in SImode and DImode */
547 3, /* cost of moving SSE register */
548 {3, 3, 3}, /* cost of loading SSE registers
549 in SImode, DImode and TImode */
550 {3, 3, 3}, /* cost of storing SSE registers
551 in SImode, DImode and TImode */
552 3, /* MMX or SSE register to integer */
553 0, /* size of l1 cache */
554 0, /* size of l2 cache */
555 0, /* size of prefetch block */
556 0, /* number of parallel prefetches */
557 2, /* Branch cost */
558 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
559 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
560 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
561 COSTS_N_BYTES (2), /* cost of FABS instruction. */
562 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
563 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
564 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
565 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
566 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
567 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
568 1, /* scalar_stmt_cost. */
569 1, /* scalar load_cost. */
570 1, /* scalar_store_cost. */
571 1, /* vec_stmt_cost. */
572 1, /* vec_to_scalar_cost. */
573 1, /* scalar_to_vec_cost. */
574 1, /* vec_align_load_cost. */
575 1, /* vec_unalign_load_cost. */
576 1, /* vec_store_cost. */
577 1, /* cond_taken_branch_cost. */
578 1, /* cond_not_taken_branch_cost. */
579 };
580
581 /* Processor costs (relative to an add) */
582 static const
583 struct processor_costs i386_cost = { /* 386 specific costs */
584 COSTS_N_INSNS (1), /* cost of an add instruction */
585 COSTS_N_INSNS (1), /* cost of a lea instruction */
586 COSTS_N_INSNS (3), /* variable shift costs */
587 COSTS_N_INSNS (2), /* constant shift costs */
588 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
589 COSTS_N_INSNS (6), /* HI */
590 COSTS_N_INSNS (6), /* SI */
591 COSTS_N_INSNS (6), /* DI */
592 COSTS_N_INSNS (6)}, /* other */
593 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
594 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
595 COSTS_N_INSNS (23), /* HI */
596 COSTS_N_INSNS (23), /* SI */
597 COSTS_N_INSNS (23), /* DI */
598 COSTS_N_INSNS (23)}, /* other */
599 COSTS_N_INSNS (3), /* cost of movsx */
600 COSTS_N_INSNS (2), /* cost of movzx */
601 15, /* "large" insn */
602 3, /* MOVE_RATIO */
603 4, /* cost for loading QImode using movzbl */
604 {2, 4, 2}, /* cost of loading integer registers
605 in QImode, HImode and SImode.
606 Relative to reg-reg move (2). */
607 {2, 4, 2}, /* cost of storing integer registers */
608 2, /* cost of reg,reg fld/fst */
609 {8, 8, 8}, /* cost of loading fp registers
610 in SFmode, DFmode and XFmode */
611 {8, 8, 8}, /* cost of storing fp registers
612 in SFmode, DFmode and XFmode */
613 2, /* cost of moving MMX register */
614 {4, 8}, /* cost of loading MMX registers
615 in SImode and DImode */
616 {4, 8}, /* cost of storing MMX registers
617 in SImode and DImode */
618 2, /* cost of moving SSE register */
619 {4, 8, 16}, /* cost of loading SSE registers
620 in SImode, DImode and TImode */
621 {4, 8, 16}, /* cost of storing SSE registers
622 in SImode, DImode and TImode */
623 3, /* MMX or SSE register to integer */
624 0, /* size of l1 cache */
625 0, /* size of l2 cache */
626 0, /* size of prefetch block */
627 0, /* number of parallel prefetches */
628 1, /* Branch cost */
629 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
630 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
631 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
632 COSTS_N_INSNS (22), /* cost of FABS instruction. */
633 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
634 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
635 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
636 DUMMY_STRINGOP_ALGS},
637 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
638 DUMMY_STRINGOP_ALGS},
639 1, /* scalar_stmt_cost. */
640 1, /* scalar load_cost. */
641 1, /* scalar_store_cost. */
642 1, /* vec_stmt_cost. */
643 1, /* vec_to_scalar_cost. */
644 1, /* scalar_to_vec_cost. */
645 1, /* vec_align_load_cost. */
646 2, /* vec_unalign_load_cost. */
647 1, /* vec_store_cost. */
648 3, /* cond_taken_branch_cost. */
649 1, /* cond_not_taken_branch_cost. */
650 };
651
652 static const
653 struct processor_costs i486_cost = { /* 486 specific costs */
654 COSTS_N_INSNS (1), /* cost of an add instruction */
655 COSTS_N_INSNS (1), /* cost of a lea instruction */
656 COSTS_N_INSNS (3), /* variable shift costs */
657 COSTS_N_INSNS (2), /* constant shift costs */
658 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
659 COSTS_N_INSNS (12), /* HI */
660 COSTS_N_INSNS (12), /* SI */
661 COSTS_N_INSNS (12), /* DI */
662 COSTS_N_INSNS (12)}, /* other */
663 1, /* cost of multiply per each bit set */
664 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
665 COSTS_N_INSNS (40), /* HI */
666 COSTS_N_INSNS (40), /* SI */
667 COSTS_N_INSNS (40), /* DI */
668 COSTS_N_INSNS (40)}, /* other */
669 COSTS_N_INSNS (3), /* cost of movsx */
670 COSTS_N_INSNS (2), /* cost of movzx */
671 15, /* "large" insn */
672 3, /* MOVE_RATIO */
673 4, /* cost for loading QImode using movzbl */
674 {2, 4, 2}, /* cost of loading integer registers
675 in QImode, HImode and SImode.
676 Relative to reg-reg move (2). */
677 {2, 4, 2}, /* cost of storing integer registers */
678 2, /* cost of reg,reg fld/fst */
679 {8, 8, 8}, /* cost of loading fp registers
680 in SFmode, DFmode and XFmode */
681 {8, 8, 8}, /* cost of storing fp registers
682 in SFmode, DFmode and XFmode */
683 2, /* cost of moving MMX register */
684 {4, 8}, /* cost of loading MMX registers
685 in SImode and DImode */
686 {4, 8}, /* cost of storing MMX registers
687 in SImode and DImode */
688 2, /* cost of moving SSE register */
689 {4, 8, 16}, /* cost of loading SSE registers
690 in SImode, DImode and TImode */
691 {4, 8, 16}, /* cost of storing SSE registers
692 in SImode, DImode and TImode */
693 3, /* MMX or SSE register to integer */
694 4, /* size of l1 cache. 486 has 8kB cache
695 shared for code and data, so 4kB is
696 not really precise. */
697 4, /* size of l2 cache */
698 0, /* size of prefetch block */
699 0, /* number of parallel prefetches */
700 1, /* Branch cost */
701 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
702 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
703 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
704 COSTS_N_INSNS (3), /* cost of FABS instruction. */
705 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
706 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
707 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
708 DUMMY_STRINGOP_ALGS},
709 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
710 DUMMY_STRINGOP_ALGS},
711 1, /* scalar_stmt_cost. */
712 1, /* scalar load_cost. */
713 1, /* scalar_store_cost. */
714 1, /* vec_stmt_cost. */
715 1, /* vec_to_scalar_cost. */
716 1, /* scalar_to_vec_cost. */
717 1, /* vec_align_load_cost. */
718 2, /* vec_unalign_load_cost. */
719 1, /* vec_store_cost. */
720 3, /* cond_taken_branch_cost. */
721 1, /* cond_not_taken_branch_cost. */
722 };
723
724 static const
725 struct processor_costs pentium_cost = {
726 COSTS_N_INSNS (1), /* cost of an add instruction */
727 COSTS_N_INSNS (1), /* cost of a lea instruction */
728 COSTS_N_INSNS (4), /* variable shift costs */
729 COSTS_N_INSNS (1), /* constant shift costs */
730 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
731 COSTS_N_INSNS (11), /* HI */
732 COSTS_N_INSNS (11), /* SI */
733 COSTS_N_INSNS (11), /* DI */
734 COSTS_N_INSNS (11)}, /* other */
735 0, /* cost of multiply per each bit set */
736 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
737 COSTS_N_INSNS (25), /* HI */
738 COSTS_N_INSNS (25), /* SI */
739 COSTS_N_INSNS (25), /* DI */
740 COSTS_N_INSNS (25)}, /* other */
741 COSTS_N_INSNS (3), /* cost of movsx */
742 COSTS_N_INSNS (2), /* cost of movzx */
743 8, /* "large" insn */
744 6, /* MOVE_RATIO */
745 6, /* cost for loading QImode using movzbl */
746 {2, 4, 2}, /* cost of loading integer registers
747 in QImode, HImode and SImode.
748 Relative to reg-reg move (2). */
749 {2, 4, 2}, /* cost of storing integer registers */
750 2, /* cost of reg,reg fld/fst */
751 {2, 2, 6}, /* cost of loading fp registers
752 in SFmode, DFmode and XFmode */
753 {4, 4, 6}, /* cost of storing fp registers
754 in SFmode, DFmode and XFmode */
755 8, /* cost of moving MMX register */
756 {8, 8}, /* cost of loading MMX registers
757 in SImode and DImode */
758 {8, 8}, /* cost of storing MMX registers
759 in SImode and DImode */
760 2, /* cost of moving SSE register */
761 {4, 8, 16}, /* cost of loading SSE registers
762 in SImode, DImode and TImode */
763 {4, 8, 16}, /* cost of storing SSE registers
764 in SImode, DImode and TImode */
765 3, /* MMX or SSE register to integer */
766 8, /* size of l1 cache. */
767 8, /* size of l2 cache */
768 0, /* size of prefetch block */
769 0, /* number of parallel prefetches */
770 2, /* Branch cost */
771 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
772 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
773 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
774 COSTS_N_INSNS (1), /* cost of FABS instruction. */
775 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
776 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
777 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
778 DUMMY_STRINGOP_ALGS},
779 {{libcall, {{-1, rep_prefix_4_byte}}},
780 DUMMY_STRINGOP_ALGS},
781 1, /* scalar_stmt_cost. */
782 1, /* scalar load_cost. */
783 1, /* scalar_store_cost. */
784 1, /* vec_stmt_cost. */
785 1, /* vec_to_scalar_cost. */
786 1, /* scalar_to_vec_cost. */
787 1, /* vec_align_load_cost. */
788 2, /* vec_unalign_load_cost. */
789 1, /* vec_store_cost. */
790 3, /* cond_taken_branch_cost. */
791 1, /* cond_not_taken_branch_cost. */
792 };
793
794 static const
795 struct processor_costs pentiumpro_cost = {
796 COSTS_N_INSNS (1), /* cost of an add instruction */
797 COSTS_N_INSNS (1), /* cost of a lea instruction */
798 COSTS_N_INSNS (1), /* variable shift costs */
799 COSTS_N_INSNS (1), /* constant shift costs */
800 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
801 COSTS_N_INSNS (4), /* HI */
802 COSTS_N_INSNS (4), /* SI */
803 COSTS_N_INSNS (4), /* DI */
804 COSTS_N_INSNS (4)}, /* other */
805 0, /* cost of multiply per each bit set */
806 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
807 COSTS_N_INSNS (17), /* HI */
808 COSTS_N_INSNS (17), /* SI */
809 COSTS_N_INSNS (17), /* DI */
810 COSTS_N_INSNS (17)}, /* other */
811 COSTS_N_INSNS (1), /* cost of movsx */
812 COSTS_N_INSNS (1), /* cost of movzx */
813 8, /* "large" insn */
814 6, /* MOVE_RATIO */
815 2, /* cost for loading QImode using movzbl */
816 {4, 4, 4}, /* cost of loading integer registers
817 in QImode, HImode and SImode.
818 Relative to reg-reg move (2). */
819 {2, 2, 2}, /* cost of storing integer registers */
820 2, /* cost of reg,reg fld/fst */
821 {2, 2, 6}, /* cost of loading fp registers
822 in SFmode, DFmode and XFmode */
823 {4, 4, 6}, /* cost of storing fp registers
824 in SFmode, DFmode and XFmode */
825 2, /* cost of moving MMX register */
826 {2, 2}, /* cost of loading MMX registers
827 in SImode and DImode */
828 {2, 2}, /* cost of storing MMX registers
829 in SImode and DImode */
830 2, /* cost of moving SSE register */
831 {2, 2, 8}, /* cost of loading SSE registers
832 in SImode, DImode and TImode */
833 {2, 2, 8}, /* cost of storing SSE registers
834 in SImode, DImode and TImode */
835 3, /* MMX or SSE register to integer */
836 8, /* size of l1 cache. */
837 256, /* size of l2 cache */
838 32, /* size of prefetch block */
839 6, /* number of parallel prefetches */
840 2, /* Branch cost */
841 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
842 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
843 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
844 COSTS_N_INSNS (2), /* cost of FABS instruction. */
845 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
846 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
847 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
848 (we ensure the alignment). For small blocks inline loop is still a
849 noticeable win, for bigger blocks either rep movsl or rep movsb is
850 way to go. Rep movsb has apparently more expensive startup time in CPU,
851 but after 4K the difference is down in the noise. */
852 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
853 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
854 DUMMY_STRINGOP_ALGS},
855 {{rep_prefix_4_byte, {{1024, unrolled_loop},
856 {8192, rep_prefix_4_byte}, {-1, libcall}}},
857 DUMMY_STRINGOP_ALGS},
858 1, /* scalar_stmt_cost. */
859 1, /* scalar load_cost. */
860 1, /* scalar_store_cost. */
861 1, /* vec_stmt_cost. */
862 1, /* vec_to_scalar_cost. */
863 1, /* scalar_to_vec_cost. */
864 1, /* vec_align_load_cost. */
865 2, /* vec_unalign_load_cost. */
866 1, /* vec_store_cost. */
867 3, /* cond_taken_branch_cost. */
868 1, /* cond_not_taken_branch_cost. */
869 };
870
871 static const
872 struct processor_costs geode_cost = {
873 COSTS_N_INSNS (1), /* cost of an add instruction */
874 COSTS_N_INSNS (1), /* cost of a lea instruction */
875 COSTS_N_INSNS (2), /* variable shift costs */
876 COSTS_N_INSNS (1), /* constant shift costs */
877 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
878 COSTS_N_INSNS (4), /* HI */
879 COSTS_N_INSNS (7), /* SI */
880 COSTS_N_INSNS (7), /* DI */
881 COSTS_N_INSNS (7)}, /* other */
882 0, /* cost of multiply per each bit set */
883 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
884 COSTS_N_INSNS (23), /* HI */
885 COSTS_N_INSNS (39), /* SI */
886 COSTS_N_INSNS (39), /* DI */
887 COSTS_N_INSNS (39)}, /* other */
888 COSTS_N_INSNS (1), /* cost of movsx */
889 COSTS_N_INSNS (1), /* cost of movzx */
890 8, /* "large" insn */
891 4, /* MOVE_RATIO */
892 1, /* cost for loading QImode using movzbl */
893 {1, 1, 1}, /* cost of loading integer registers
894 in QImode, HImode and SImode.
895 Relative to reg-reg move (2). */
896 {1, 1, 1}, /* cost of storing integer registers */
897 1, /* cost of reg,reg fld/fst */
898 {1, 1, 1}, /* cost of loading fp registers
899 in SFmode, DFmode and XFmode */
900 {4, 6, 6}, /* cost of storing fp registers
901 in SFmode, DFmode and XFmode */
902
903 1, /* cost of moving MMX register */
904 {1, 1}, /* cost of loading MMX registers
905 in SImode and DImode */
906 {1, 1}, /* cost of storing MMX registers
907 in SImode and DImode */
908 1, /* cost of moving SSE register */
909 {1, 1, 1}, /* cost of loading SSE registers
910 in SImode, DImode and TImode */
911 {1, 1, 1}, /* cost of storing SSE registers
912 in SImode, DImode and TImode */
913 1, /* MMX or SSE register to integer */
914 64, /* size of l1 cache. */
915 128, /* size of l2 cache. */
916 32, /* size of prefetch block */
917 1, /* number of parallel prefetches */
918 1, /* Branch cost */
919 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
920 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
921 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
922 COSTS_N_INSNS (1), /* cost of FABS instruction. */
923 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
924 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
925 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
926 DUMMY_STRINGOP_ALGS},
927 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
928 DUMMY_STRINGOP_ALGS},
929 1, /* scalar_stmt_cost. */
930 1, /* scalar load_cost. */
931 1, /* scalar_store_cost. */
932 1, /* vec_stmt_cost. */
933 1, /* vec_to_scalar_cost. */
934 1, /* scalar_to_vec_cost. */
935 1, /* vec_align_load_cost. */
936 2, /* vec_unalign_load_cost. */
937 1, /* vec_store_cost. */
938 3, /* cond_taken_branch_cost. */
939 1, /* cond_not_taken_branch_cost. */
940 };
941
942 static const
943 struct processor_costs k6_cost = {
944 COSTS_N_INSNS (1), /* cost of an add instruction */
945 COSTS_N_INSNS (2), /* cost of a lea instruction */
946 COSTS_N_INSNS (1), /* variable shift costs */
947 COSTS_N_INSNS (1), /* constant shift costs */
948 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
949 COSTS_N_INSNS (3), /* HI */
950 COSTS_N_INSNS (3), /* SI */
951 COSTS_N_INSNS (3), /* DI */
952 COSTS_N_INSNS (3)}, /* other */
953 0, /* cost of multiply per each bit set */
954 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
955 COSTS_N_INSNS (18), /* HI */
956 COSTS_N_INSNS (18), /* SI */
957 COSTS_N_INSNS (18), /* DI */
958 COSTS_N_INSNS (18)}, /* other */
959 COSTS_N_INSNS (2), /* cost of movsx */
960 COSTS_N_INSNS (2), /* cost of movzx */
961 8, /* "large" insn */
962 4, /* MOVE_RATIO */
963 3, /* cost for loading QImode using movzbl */
964 {4, 5, 4}, /* cost of loading integer registers
965 in QImode, HImode and SImode.
966 Relative to reg-reg move (2). */
967 {2, 3, 2}, /* cost of storing integer registers */
968 4, /* cost of reg,reg fld/fst */
969 {6, 6, 6}, /* cost of loading fp registers
970 in SFmode, DFmode and XFmode */
971 {4, 4, 4}, /* cost of storing fp registers
972 in SFmode, DFmode and XFmode */
973 2, /* cost of moving MMX register */
974 {2, 2}, /* cost of loading MMX registers
975 in SImode and DImode */
976 {2, 2}, /* cost of storing MMX registers
977 in SImode and DImode */
978 2, /* cost of moving SSE register */
979 {2, 2, 8}, /* cost of loading SSE registers
980 in SImode, DImode and TImode */
981 {2, 2, 8}, /* cost of storing SSE registers
982 in SImode, DImode and TImode */
983 6, /* MMX or SSE register to integer */
984 32, /* size of l1 cache. */
985 32, /* size of l2 cache. Some models
986 have integrated l2 cache, but
987 optimizing for k6 is not important
988 enough to worry about that. */
989 32, /* size of prefetch block */
990 1, /* number of parallel prefetches */
991 1, /* Branch cost */
992 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
993 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
994 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
995 COSTS_N_INSNS (2), /* cost of FABS instruction. */
996 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
997 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
998 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
999 DUMMY_STRINGOP_ALGS},
1000 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
1001 DUMMY_STRINGOP_ALGS},
1002 1, /* scalar_stmt_cost. */
1003 1, /* scalar load_cost. */
1004 1, /* scalar_store_cost. */
1005 1, /* vec_stmt_cost. */
1006 1, /* vec_to_scalar_cost. */
1007 1, /* scalar_to_vec_cost. */
1008 1, /* vec_align_load_cost. */
1009 2, /* vec_unalign_load_cost. */
1010 1, /* vec_store_cost. */
1011 3, /* cond_taken_branch_cost. */
1012 1, /* cond_not_taken_branch_cost. */
1013 };
1014
1015 static const
1016 struct processor_costs athlon_cost = {
1017 COSTS_N_INSNS (1), /* cost of an add instruction */
1018 COSTS_N_INSNS (2), /* cost of a lea instruction */
1019 COSTS_N_INSNS (1), /* variable shift costs */
1020 COSTS_N_INSNS (1), /* constant shift costs */
1021 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
1022 COSTS_N_INSNS (5), /* HI */
1023 COSTS_N_INSNS (5), /* SI */
1024 COSTS_N_INSNS (5), /* DI */
1025 COSTS_N_INSNS (5)}, /* other */
1026 0, /* cost of multiply per each bit set */
1027 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1028 COSTS_N_INSNS (26), /* HI */
1029 COSTS_N_INSNS (42), /* SI */
1030 COSTS_N_INSNS (74), /* DI */
1031 COSTS_N_INSNS (74)}, /* other */
1032 COSTS_N_INSNS (1), /* cost of movsx */
1033 COSTS_N_INSNS (1), /* cost of movzx */
1034 8, /* "large" insn */
1035 9, /* MOVE_RATIO */
1036 4, /* cost for loading QImode using movzbl */
1037 {3, 4, 3}, /* cost of loading integer registers
1038 in QImode, HImode and SImode.
1039 Relative to reg-reg move (2). */
1040 {3, 4, 3}, /* cost of storing integer registers */
1041 4, /* cost of reg,reg fld/fst */
1042 {4, 4, 12}, /* cost of loading fp registers
1043 in SFmode, DFmode and XFmode */
1044 {6, 6, 8}, /* cost of storing fp registers
1045 in SFmode, DFmode and XFmode */
1046 2, /* cost of moving MMX register */
1047 {4, 4}, /* cost of loading MMX registers
1048 in SImode and DImode */
1049 {4, 4}, /* cost of storing MMX registers
1050 in SImode and DImode */
1051 2, /* cost of moving SSE register */
1052 {4, 4, 6}, /* cost of loading SSE registers
1053 in SImode, DImode and TImode */
1054 {4, 4, 5}, /* cost of storing SSE registers
1055 in SImode, DImode and TImode */
1056 5, /* MMX or SSE register to integer */
1057 64, /* size of l1 cache. */
1058 256, /* size of l2 cache. */
1059 64, /* size of prefetch block */
1060 6, /* number of parallel prefetches */
1061 5, /* Branch cost */
1062 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1063 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1064 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
1065 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1066 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1067 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1068 /* For some reason, Athlon deals better with REP prefix (relative to loops)
1069 compared to K8. Alignment becomes important after 8 bytes for memcpy and
1070 128 bytes for memset. */
1071 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
1072 DUMMY_STRINGOP_ALGS},
1073 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
1074 DUMMY_STRINGOP_ALGS},
1075 1, /* scalar_stmt_cost. */
1076 1, /* scalar load_cost. */
1077 1, /* scalar_store_cost. */
1078 1, /* vec_stmt_cost. */
1079 1, /* vec_to_scalar_cost. */
1080 1, /* scalar_to_vec_cost. */
1081 1, /* vec_align_load_cost. */
1082 2, /* vec_unalign_load_cost. */
1083 1, /* vec_store_cost. */
1084 3, /* cond_taken_branch_cost. */
1085 1, /* cond_not_taken_branch_cost. */
1086 };
1087
1088 static const
1089 struct processor_costs k8_cost = {
1090 COSTS_N_INSNS (1), /* cost of an add instruction */
1091 COSTS_N_INSNS (2), /* cost of a lea instruction */
1092 COSTS_N_INSNS (1), /* variable shift costs */
1093 COSTS_N_INSNS (1), /* constant shift costs */
1094 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1095 COSTS_N_INSNS (4), /* HI */
1096 COSTS_N_INSNS (3), /* SI */
1097 COSTS_N_INSNS (4), /* DI */
1098 COSTS_N_INSNS (5)}, /* other */
1099 0, /* cost of multiply per each bit set */
1100 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1101 COSTS_N_INSNS (26), /* HI */
1102 COSTS_N_INSNS (42), /* SI */
1103 COSTS_N_INSNS (74), /* DI */
1104 COSTS_N_INSNS (74)}, /* other */
1105 COSTS_N_INSNS (1), /* cost of movsx */
1106 COSTS_N_INSNS (1), /* cost of movzx */
1107 8, /* "large" insn */
1108 9, /* MOVE_RATIO */
1109 4, /* cost for loading QImode using movzbl */
1110 {3, 4, 3}, /* cost of loading integer registers
1111 in QImode, HImode and SImode.
1112 Relative to reg-reg move (2). */
1113 {3, 4, 3}, /* cost of storing integer registers */
1114 4, /* cost of reg,reg fld/fst */
1115 {4, 4, 12}, /* cost of loading fp registers
1116 in SFmode, DFmode and XFmode */
1117 {6, 6, 8}, /* cost of storing fp registers
1118 in SFmode, DFmode and XFmode */
1119 2, /* cost of moving MMX register */
1120 {3, 3}, /* cost of loading MMX registers
1121 in SImode and DImode */
1122 {4, 4}, /* cost of storing MMX registers
1123 in SImode and DImode */
1124 2, /* cost of moving SSE register */
1125 {4, 3, 6}, /* cost of loading SSE registers
1126 in SImode, DImode and TImode */
1127 {4, 4, 5}, /* cost of storing SSE registers
1128 in SImode, DImode and TImode */
1129 5, /* MMX or SSE register to integer */
1130 64, /* size of l1 cache. */
1131 512, /* size of l2 cache. */
1132 64, /* size of prefetch block */
1133 /* New AMD processors never drop prefetches; if they cannot be performed
1134 immediately, they are queued. We set number of simultaneous prefetches
1135 to a large constant to reflect this (it probably is not a good idea not
1136 to limit number of prefetches at all, as their execution also takes some
1137 time). */
1138 100, /* number of parallel prefetches */
1139 3, /* Branch cost */
1140 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1141 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1142 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1143 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1144 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1145 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1146 /* K8 has optimized REP instruction for medium sized blocks, but for very
1147 small blocks it is better to use loop. For large blocks, libcall can
1148 do nontemporary accesses and beat inline considerably. */
1149 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1150 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1151 {{libcall, {{8, loop}, {24, unrolled_loop},
1152 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1153 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1154 4, /* scalar_stmt_cost. */
1155 2, /* scalar load_cost. */
1156 2, /* scalar_store_cost. */
1157 5, /* vec_stmt_cost. */
1158 0, /* vec_to_scalar_cost. */
1159 2, /* scalar_to_vec_cost. */
1160 2, /* vec_align_load_cost. */
1161 3, /* vec_unalign_load_cost. */
1162 3, /* vec_store_cost. */
1163 3, /* cond_taken_branch_cost. */
1164 2, /* cond_not_taken_branch_cost. */
1165 };
1166
1167 struct processor_costs amdfam10_cost = {
1168 COSTS_N_INSNS (1), /* cost of an add instruction */
1169 COSTS_N_INSNS (2), /* cost of a lea instruction */
1170 COSTS_N_INSNS (1), /* variable shift costs */
1171 COSTS_N_INSNS (1), /* constant shift costs */
1172 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1173 COSTS_N_INSNS (4), /* HI */
1174 COSTS_N_INSNS (3), /* SI */
1175 COSTS_N_INSNS (4), /* DI */
1176 COSTS_N_INSNS (5)}, /* other */
1177 0, /* cost of multiply per each bit set */
1178 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1179 COSTS_N_INSNS (35), /* HI */
1180 COSTS_N_INSNS (51), /* SI */
1181 COSTS_N_INSNS (83), /* DI */
1182 COSTS_N_INSNS (83)}, /* other */
1183 COSTS_N_INSNS (1), /* cost of movsx */
1184 COSTS_N_INSNS (1), /* cost of movzx */
1185 8, /* "large" insn */
1186 9, /* MOVE_RATIO */
1187 4, /* cost for loading QImode using movzbl */
1188 {3, 4, 3}, /* cost of loading integer registers
1189 in QImode, HImode and SImode.
1190 Relative to reg-reg move (2). */
1191 {3, 4, 3}, /* cost of storing integer registers */
1192 4, /* cost of reg,reg fld/fst */
1193 {4, 4, 12}, /* cost of loading fp registers
1194 in SFmode, DFmode and XFmode */
1195 {6, 6, 8}, /* cost of storing fp registers
1196 in SFmode, DFmode and XFmode */
1197 2, /* cost of moving MMX register */
1198 {3, 3}, /* cost of loading MMX registers
1199 in SImode and DImode */
1200 {4, 4}, /* cost of storing MMX registers
1201 in SImode and DImode */
1202 2, /* cost of moving SSE register */
1203 {4, 4, 3}, /* cost of loading SSE registers
1204 in SImode, DImode and TImode */
1205 {4, 4, 5}, /* cost of storing SSE registers
1206 in SImode, DImode and TImode */
1207 3, /* MMX or SSE register to integer */
1208 /* On K8:
1209 MOVD reg64, xmmreg Double FSTORE 4
1210 MOVD reg32, xmmreg Double FSTORE 4
1211 On AMDFAM10:
1212 MOVD reg64, xmmreg Double FADD 3
1213 1/1 1/1
1214 MOVD reg32, xmmreg Double FADD 3
1215 1/1 1/1 */
1216 64, /* size of l1 cache. */
1217 512, /* size of l2 cache. */
1218 64, /* size of prefetch block */
1219 /* New AMD processors never drop prefetches; if they cannot be performed
1220 immediately, they are queued. We set number of simultaneous prefetches
1221 to a large constant to reflect this (it probably is not a good idea not
1222 to limit number of prefetches at all, as their execution also takes some
1223 time). */
1224 100, /* number of parallel prefetches */
1225 2, /* Branch cost */
1226 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1227 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1228 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1229 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1230 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1231 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1232
1233 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
1234 very small blocks it is better to use loop. For large blocks, libcall can
1235 do nontemporary accesses and beat inline considerably. */
1236 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1237 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1238 {{libcall, {{8, loop}, {24, unrolled_loop},
1239 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1240 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1241 4, /* scalar_stmt_cost. */
1242 2, /* scalar load_cost. */
1243 2, /* scalar_store_cost. */
1244 6, /* vec_stmt_cost. */
1245 0, /* vec_to_scalar_cost. */
1246 2, /* scalar_to_vec_cost. */
1247 2, /* vec_align_load_cost. */
1248 2, /* vec_unalign_load_cost. */
1249 2, /* vec_store_cost. */
1250 2, /* cond_taken_branch_cost. */
1251 1, /* cond_not_taken_branch_cost. */
1252 };
1253
1254 struct processor_costs bdver1_cost = {
1255 COSTS_N_INSNS (1), /* cost of an add instruction */
1256 COSTS_N_INSNS (1), /* cost of a lea instruction */
1257 COSTS_N_INSNS (1), /* variable shift costs */
1258 COSTS_N_INSNS (1), /* constant shift costs */
1259 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1260 COSTS_N_INSNS (4), /* HI */
1261 COSTS_N_INSNS (4), /* SI */
1262 COSTS_N_INSNS (6), /* DI */
1263 COSTS_N_INSNS (6)}, /* other */
1264 0, /* cost of multiply per each bit set */
1265 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1266 COSTS_N_INSNS (35), /* HI */
1267 COSTS_N_INSNS (51), /* SI */
1268 COSTS_N_INSNS (83), /* DI */
1269 COSTS_N_INSNS (83)}, /* other */
1270 COSTS_N_INSNS (1), /* cost of movsx */
1271 COSTS_N_INSNS (1), /* cost of movzx */
1272 8, /* "large" insn */
1273 9, /* MOVE_RATIO */
1274 4, /* cost for loading QImode using movzbl */
1275 {5, 5, 4}, /* cost of loading integer registers
1276 in QImode, HImode and SImode.
1277 Relative to reg-reg move (2). */
1278 {4, 4, 4}, /* cost of storing integer registers */
1279 2, /* cost of reg,reg fld/fst */
1280 {5, 5, 12}, /* cost of loading fp registers
1281 in SFmode, DFmode and XFmode */
1282 {4, 4, 8}, /* cost of storing fp registers
1283 in SFmode, DFmode and XFmode */
1284 2, /* cost of moving MMX register */
1285 {4, 4}, /* cost of loading MMX registers
1286 in SImode and DImode */
1287 {4, 4}, /* cost of storing MMX registers
1288 in SImode and DImode */
1289 2, /* cost of moving SSE register */
1290 {4, 4, 4}, /* cost of loading SSE registers
1291 in SImode, DImode and TImode */
1292 {4, 4, 4}, /* cost of storing SSE registers
1293 in SImode, DImode and TImode */
1294 2, /* MMX or SSE register to integer */
1295 /* On K8:
1296 MOVD reg64, xmmreg Double FSTORE 4
1297 MOVD reg32, xmmreg Double FSTORE 4
1298 On AMDFAM10:
1299 MOVD reg64, xmmreg Double FADD 3
1300 1/1 1/1
1301 MOVD reg32, xmmreg Double FADD 3
1302 1/1 1/1 */
1303 16, /* size of l1 cache. */
1304 2048, /* size of l2 cache. */
1305 64, /* size of prefetch block */
1306 /* New AMD processors never drop prefetches; if they cannot be performed
1307 immediately, they are queued. We set number of simultaneous prefetches
1308 to a large constant to reflect this (it probably is not a good idea not
1309 to limit number of prefetches at all, as their execution also takes some
1310 time). */
1311 100, /* number of parallel prefetches */
1312 2, /* Branch cost */
1313 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1314 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1315 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1316 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1317 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1318 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1319
1320 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
1321 very small blocks it is better to use loop. For large blocks, libcall
1322 can do nontemporary accesses and beat inline considerably. */
1323 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1324 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1325 {{libcall, {{8, loop}, {24, unrolled_loop},
1326 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1327 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1328 6, /* scalar_stmt_cost. */
1329 4, /* scalar load_cost. */
1330 4, /* scalar_store_cost. */
1331 6, /* vec_stmt_cost. */
1332 0, /* vec_to_scalar_cost. */
1333 2, /* scalar_to_vec_cost. */
1334 4, /* vec_align_load_cost. */
1335 4, /* vec_unalign_load_cost. */
1336 4, /* vec_store_cost. */
1337 2, /* cond_taken_branch_cost. */
1338 1, /* cond_not_taken_branch_cost. */
1339 };
1340
1341 struct processor_costs bdver2_cost = {
1342 COSTS_N_INSNS (1), /* cost of an add instruction */
1343 COSTS_N_INSNS (1), /* cost of a lea instruction */
1344 COSTS_N_INSNS (1), /* variable shift costs */
1345 COSTS_N_INSNS (1), /* constant shift costs */
1346 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1347 COSTS_N_INSNS (4), /* HI */
1348 COSTS_N_INSNS (4), /* SI */
1349 COSTS_N_INSNS (6), /* DI */
1350 COSTS_N_INSNS (6)}, /* other */
1351 0, /* cost of multiply per each bit set */
1352 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1353 COSTS_N_INSNS (35), /* HI */
1354 COSTS_N_INSNS (51), /* SI */
1355 COSTS_N_INSNS (83), /* DI */
1356 COSTS_N_INSNS (83)}, /* other */
1357 COSTS_N_INSNS (1), /* cost of movsx */
1358 COSTS_N_INSNS (1), /* cost of movzx */
1359 8, /* "large" insn */
1360 9, /* MOVE_RATIO */
1361 4, /* cost for loading QImode using movzbl */
1362 {5, 5, 4}, /* cost of loading integer registers
1363 in QImode, HImode and SImode.
1364 Relative to reg-reg move (2). */
1365 {4, 4, 4}, /* cost of storing integer registers */
1366 2, /* cost of reg,reg fld/fst */
1367 {5, 5, 12}, /* cost of loading fp registers
1368 in SFmode, DFmode and XFmode */
1369 {4, 4, 8}, /* cost of storing fp registers
1370 in SFmode, DFmode and XFmode */
1371 2, /* cost of moving MMX register */
1372 {4, 4}, /* cost of loading MMX registers
1373 in SImode and DImode */
1374 {4, 4}, /* cost of storing MMX registers
1375 in SImode and DImode */
1376 2, /* cost of moving SSE register */
1377 {4, 4, 4}, /* cost of loading SSE registers
1378 in SImode, DImode and TImode */
1379 {4, 4, 4}, /* cost of storing SSE registers
1380 in SImode, DImode and TImode */
1381 2, /* MMX or SSE register to integer */
1382 /* On K8:
1383 MOVD reg64, xmmreg Double FSTORE 4
1384 MOVD reg32, xmmreg Double FSTORE 4
1385 On AMDFAM10:
1386 MOVD reg64, xmmreg Double FADD 3
1387 1/1 1/1
1388 MOVD reg32, xmmreg Double FADD 3
1389 1/1 1/1 */
1390 16, /* size of l1 cache. */
1391 2048, /* size of l2 cache. */
1392 64, /* size of prefetch block */
1393 /* New AMD processors never drop prefetches; if they cannot be performed
1394 immediately, they are queued. We set number of simultaneous prefetches
1395 to a large constant to reflect this (it probably is not a good idea not
1396 to limit number of prefetches at all, as their execution also takes some
1397 time). */
1398 100, /* number of parallel prefetches */
1399 2, /* Branch cost */
1400 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1401 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1402 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1403 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1404 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1405 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1406
1407 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1408 very small blocks it is better to use loop. For large blocks, libcall
1409 can do nontemporary accesses and beat inline considerably. */
1410 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1411 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1412 {{libcall, {{8, loop}, {24, unrolled_loop},
1413 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1414 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1415 6, /* scalar_stmt_cost. */
1416 4, /* scalar load_cost. */
1417 4, /* scalar_store_cost. */
1418 6, /* vec_stmt_cost. */
1419 0, /* vec_to_scalar_cost. */
1420 2, /* scalar_to_vec_cost. */
1421 4, /* vec_align_load_cost. */
1422 4, /* vec_unalign_load_cost. */
1423 4, /* vec_store_cost. */
1424 2, /* cond_taken_branch_cost. */
1425 1, /* cond_not_taken_branch_cost. */
1426 };
1427
1428 struct processor_costs btver1_cost = {
1429 COSTS_N_INSNS (1), /* cost of an add instruction */
1430 COSTS_N_INSNS (2), /* cost of a lea instruction */
1431 COSTS_N_INSNS (1), /* variable shift costs */
1432 COSTS_N_INSNS (1), /* constant shift costs */
1433 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1434 COSTS_N_INSNS (4), /* HI */
1435 COSTS_N_INSNS (3), /* SI */
1436 COSTS_N_INSNS (4), /* DI */
1437 COSTS_N_INSNS (5)}, /* other */
1438 0, /* cost of multiply per each bit set */
1439 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1440 COSTS_N_INSNS (35), /* HI */
1441 COSTS_N_INSNS (51), /* SI */
1442 COSTS_N_INSNS (83), /* DI */
1443 COSTS_N_INSNS (83)}, /* other */
1444 COSTS_N_INSNS (1), /* cost of movsx */
1445 COSTS_N_INSNS (1), /* cost of movzx */
1446 8, /* "large" insn */
1447 9, /* MOVE_RATIO */
1448 4, /* cost for loading QImode using movzbl */
1449 {3, 4, 3}, /* cost of loading integer registers
1450 in QImode, HImode and SImode.
1451 Relative to reg-reg move (2). */
1452 {3, 4, 3}, /* cost of storing integer registers */
1453 4, /* cost of reg,reg fld/fst */
1454 {4, 4, 12}, /* cost of loading fp registers
1455 in SFmode, DFmode and XFmode */
1456 {6, 6, 8}, /* cost of storing fp registers
1457 in SFmode, DFmode and XFmode */
1458 2, /* cost of moving MMX register */
1459 {3, 3}, /* cost of loading MMX registers
1460 in SImode and DImode */
1461 {4, 4}, /* cost of storing MMX registers
1462 in SImode and DImode */
1463 2, /* cost of moving SSE register */
1464 {4, 4, 3}, /* cost of loading SSE registers
1465 in SImode, DImode and TImode */
1466 {4, 4, 5}, /* cost of storing SSE registers
1467 in SImode, DImode and TImode */
1468 3, /* MMX or SSE register to integer */
1469 /* On K8:
1470 MOVD reg64, xmmreg Double FSTORE 4
1471 MOVD reg32, xmmreg Double FSTORE 4
1472 On AMDFAM10:
1473 MOVD reg64, xmmreg Double FADD 3
1474 1/1 1/1
1475 MOVD reg32, xmmreg Double FADD 3
1476 1/1 1/1 */
1477 32, /* size of l1 cache. */
1478 512, /* size of l2 cache. */
1479 64, /* size of prefetch block */
1480 100, /* number of parallel prefetches */
1481 2, /* Branch cost */
1482 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1483 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1484 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1485 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1486 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1487 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1488
1489 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1490 very small blocks it is better to use loop. For large blocks, libcall can
1491 do nontemporary accesses and beat inline considerably. */
1492 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1493 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1494 {{libcall, {{8, loop}, {24, unrolled_loop},
1495 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1496 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1497 4, /* scalar_stmt_cost. */
1498 2, /* scalar load_cost. */
1499 2, /* scalar_store_cost. */
1500 6, /* vec_stmt_cost. */
1501 0, /* vec_to_scalar_cost. */
1502 2, /* scalar_to_vec_cost. */
1503 2, /* vec_align_load_cost. */
1504 2, /* vec_unalign_load_cost. */
1505 2, /* vec_store_cost. */
1506 2, /* cond_taken_branch_cost. */
1507 1, /* cond_not_taken_branch_cost. */
1508 };
1509
1510 static const
1511 struct processor_costs pentium4_cost = {
1512 COSTS_N_INSNS (1), /* cost of an add instruction */
1513 COSTS_N_INSNS (3), /* cost of a lea instruction */
1514 COSTS_N_INSNS (4), /* variable shift costs */
1515 COSTS_N_INSNS (4), /* constant shift costs */
1516 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1517 COSTS_N_INSNS (15), /* HI */
1518 COSTS_N_INSNS (15), /* SI */
1519 COSTS_N_INSNS (15), /* DI */
1520 COSTS_N_INSNS (15)}, /* other */
1521 0, /* cost of multiply per each bit set */
1522 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1523 COSTS_N_INSNS (56), /* HI */
1524 COSTS_N_INSNS (56), /* SI */
1525 COSTS_N_INSNS (56), /* DI */
1526 COSTS_N_INSNS (56)}, /* other */
1527 COSTS_N_INSNS (1), /* cost of movsx */
1528 COSTS_N_INSNS (1), /* cost of movzx */
1529 16, /* "large" insn */
1530 6, /* MOVE_RATIO */
1531 2, /* cost for loading QImode using movzbl */
1532 {4, 5, 4}, /* cost of loading integer registers
1533 in QImode, HImode and SImode.
1534 Relative to reg-reg move (2). */
1535 {2, 3, 2}, /* cost of storing integer registers */
1536 2, /* cost of reg,reg fld/fst */
1537 {2, 2, 6}, /* cost of loading fp registers
1538 in SFmode, DFmode and XFmode */
1539 {4, 4, 6}, /* cost of storing fp registers
1540 in SFmode, DFmode and XFmode */
1541 2, /* cost of moving MMX register */
1542 {2, 2}, /* cost of loading MMX registers
1543 in SImode and DImode */
1544 {2, 2}, /* cost of storing MMX registers
1545 in SImode and DImode */
1546 12, /* cost of moving SSE register */
1547 {12, 12, 12}, /* cost of loading SSE registers
1548 in SImode, DImode and TImode */
1549 {2, 2, 8}, /* cost of storing SSE registers
1550 in SImode, DImode and TImode */
1551 10, /* MMX or SSE register to integer */
1552 8, /* size of l1 cache. */
1553 256, /* size of l2 cache. */
1554 64, /* size of prefetch block */
1555 6, /* number of parallel prefetches */
1556 2, /* Branch cost */
1557 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1558 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1559 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1560 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1561 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1562 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1563 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1564 DUMMY_STRINGOP_ALGS},
1565 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1566 {-1, libcall}}},
1567 DUMMY_STRINGOP_ALGS},
1568 1, /* scalar_stmt_cost. */
1569 1, /* scalar load_cost. */
1570 1, /* scalar_store_cost. */
1571 1, /* vec_stmt_cost. */
1572 1, /* vec_to_scalar_cost. */
1573 1, /* scalar_to_vec_cost. */
1574 1, /* vec_align_load_cost. */
1575 2, /* vec_unalign_load_cost. */
1576 1, /* vec_store_cost. */
1577 3, /* cond_taken_branch_cost. */
1578 1, /* cond_not_taken_branch_cost. */
1579 };
1580
1581 static const
1582 struct processor_costs nocona_cost = {
1583 COSTS_N_INSNS (1), /* cost of an add instruction */
1584 COSTS_N_INSNS (1), /* cost of a lea instruction */
1585 COSTS_N_INSNS (1), /* variable shift costs */
1586 COSTS_N_INSNS (1), /* constant shift costs */
1587 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1588 COSTS_N_INSNS (10), /* HI */
1589 COSTS_N_INSNS (10), /* SI */
1590 COSTS_N_INSNS (10), /* DI */
1591 COSTS_N_INSNS (10)}, /* other */
1592 0, /* cost of multiply per each bit set */
1593 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1594 COSTS_N_INSNS (66), /* HI */
1595 COSTS_N_INSNS (66), /* SI */
1596 COSTS_N_INSNS (66), /* DI */
1597 COSTS_N_INSNS (66)}, /* other */
1598 COSTS_N_INSNS (1), /* cost of movsx */
1599 COSTS_N_INSNS (1), /* cost of movzx */
1600 16, /* "large" insn */
1601 17, /* MOVE_RATIO */
1602 4, /* cost for loading QImode using movzbl */
1603 {4, 4, 4}, /* cost of loading integer registers
1604 in QImode, HImode and SImode.
1605 Relative to reg-reg move (2). */
1606 {4, 4, 4}, /* cost of storing integer registers */
1607 3, /* cost of reg,reg fld/fst */
1608 {12, 12, 12}, /* cost of loading fp registers
1609 in SFmode, DFmode and XFmode */
1610 {4, 4, 4}, /* cost of storing fp registers
1611 in SFmode, DFmode and XFmode */
1612 6, /* cost of moving MMX register */
1613 {12, 12}, /* cost of loading MMX registers
1614 in SImode and DImode */
1615 {12, 12}, /* cost of storing MMX registers
1616 in SImode and DImode */
1617 6, /* cost of moving SSE register */
1618 {12, 12, 12}, /* cost of loading SSE registers
1619 in SImode, DImode and TImode */
1620 {12, 12, 12}, /* cost of storing SSE registers
1621 in SImode, DImode and TImode */
1622 8, /* MMX or SSE register to integer */
1623 8, /* size of l1 cache. */
1624 1024, /* size of l2 cache. */
1625 128, /* size of prefetch block */
1626 8, /* number of parallel prefetches */
1627 1, /* Branch cost */
1628 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1629 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1630 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1631 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1632 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1633 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1634 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1635 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
1636 {100000, unrolled_loop}, {-1, libcall}}}},
1637 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1638 {-1, libcall}}},
1639 {libcall, {{24, loop}, {64, unrolled_loop},
1640 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1641 1, /* scalar_stmt_cost. */
1642 1, /* scalar load_cost. */
1643 1, /* scalar_store_cost. */
1644 1, /* vec_stmt_cost. */
1645 1, /* vec_to_scalar_cost. */
1646 1, /* scalar_to_vec_cost. */
1647 1, /* vec_align_load_cost. */
1648 2, /* vec_unalign_load_cost. */
1649 1, /* vec_store_cost. */
1650 3, /* cond_taken_branch_cost. */
1651 1, /* cond_not_taken_branch_cost. */
1652 };
1653
1654 static const
1655 struct processor_costs atom_cost = {
1656 COSTS_N_INSNS (1), /* cost of an add instruction */
1657 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1658 COSTS_N_INSNS (1), /* variable shift costs */
1659 COSTS_N_INSNS (1), /* constant shift costs */
1660 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1661 COSTS_N_INSNS (4), /* HI */
1662 COSTS_N_INSNS (3), /* SI */
1663 COSTS_N_INSNS (4), /* DI */
1664 COSTS_N_INSNS (2)}, /* other */
1665 0, /* cost of multiply per each bit set */
1666 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1667 COSTS_N_INSNS (26), /* HI */
1668 COSTS_N_INSNS (42), /* SI */
1669 COSTS_N_INSNS (74), /* DI */
1670 COSTS_N_INSNS (74)}, /* other */
1671 COSTS_N_INSNS (1), /* cost of movsx */
1672 COSTS_N_INSNS (1), /* cost of movzx */
1673 8, /* "large" insn */
1674 17, /* MOVE_RATIO */
1675 2, /* cost for loading QImode using movzbl */
1676 {4, 4, 4}, /* cost of loading integer registers
1677 in QImode, HImode and SImode.
1678 Relative to reg-reg move (2). */
1679 {4, 4, 4}, /* cost of storing integer registers */
1680 4, /* cost of reg,reg fld/fst */
1681 {12, 12, 12}, /* cost of loading fp registers
1682 in SFmode, DFmode and XFmode */
1683 {6, 6, 8}, /* cost of storing fp registers
1684 in SFmode, DFmode and XFmode */
1685 2, /* cost of moving MMX register */
1686 {8, 8}, /* cost of loading MMX registers
1687 in SImode and DImode */
1688 {8, 8}, /* cost of storing MMX registers
1689 in SImode and DImode */
1690 2, /* cost of moving SSE register */
1691 {8, 8, 8}, /* cost of loading SSE registers
1692 in SImode, DImode and TImode */
1693 {8, 8, 8}, /* cost of storing SSE registers
1694 in SImode, DImode and TImode */
1695 5, /* MMX or SSE register to integer */
1696 32, /* size of l1 cache. */
1697 256, /* size of l2 cache. */
1698 64, /* size of prefetch block */
1699 6, /* number of parallel prefetches */
1700 3, /* Branch cost */
1701 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1702 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1703 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1704 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1705 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1706 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1707 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1708 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1709 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1710 {{libcall, {{8, loop}, {15, unrolled_loop},
1711 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1712 {libcall, {{24, loop}, {32, unrolled_loop},
1713 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1714 1, /* scalar_stmt_cost. */
1715 1, /* scalar load_cost. */
1716 1, /* scalar_store_cost. */
1717 1, /* vec_stmt_cost. */
1718 1, /* vec_to_scalar_cost. */
1719 1, /* scalar_to_vec_cost. */
1720 1, /* vec_align_load_cost. */
1721 2, /* vec_unalign_load_cost. */
1722 1, /* vec_store_cost. */
1723 3, /* cond_taken_branch_cost. */
1724 1, /* cond_not_taken_branch_cost. */
1725 };
1726
1727 /* Generic64 should produce code tuned for Nocona and K8. */
1728 static const
1729 struct processor_costs generic64_cost = {
1730 COSTS_N_INSNS (1), /* cost of an add instruction */
1731 /* On all chips taken into consideration lea is 2 cycles and more. With
1732 this cost however our current implementation of synth_mult results in
1733 use of unnecessary temporary registers causing regression on several
1734 SPECfp benchmarks. */
1735 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1736 COSTS_N_INSNS (1), /* variable shift costs */
1737 COSTS_N_INSNS (1), /* constant shift costs */
1738 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1739 COSTS_N_INSNS (4), /* HI */
1740 COSTS_N_INSNS (3), /* SI */
1741 COSTS_N_INSNS (4), /* DI */
1742 COSTS_N_INSNS (2)}, /* other */
1743 0, /* cost of multiply per each bit set */
1744 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1745 COSTS_N_INSNS (26), /* HI */
1746 COSTS_N_INSNS (42), /* SI */
1747 COSTS_N_INSNS (74), /* DI */
1748 COSTS_N_INSNS (74)}, /* other */
1749 COSTS_N_INSNS (1), /* cost of movsx */
1750 COSTS_N_INSNS (1), /* cost of movzx */
1751 8, /* "large" insn */
1752 17, /* MOVE_RATIO */
1753 4, /* cost for loading QImode using movzbl */
1754 {4, 4, 4}, /* cost of loading integer registers
1755 in QImode, HImode and SImode.
1756 Relative to reg-reg move (2). */
1757 {4, 4, 4}, /* cost of storing integer registers */
1758 4, /* cost of reg,reg fld/fst */
1759 {12, 12, 12}, /* cost of loading fp registers
1760 in SFmode, DFmode and XFmode */
1761 {6, 6, 8}, /* cost of storing fp registers
1762 in SFmode, DFmode and XFmode */
1763 2, /* cost of moving MMX register */
1764 {8, 8}, /* cost of loading MMX registers
1765 in SImode and DImode */
1766 {8, 8}, /* cost of storing MMX registers
1767 in SImode and DImode */
1768 2, /* cost of moving SSE register */
1769 {8, 8, 8}, /* cost of loading SSE registers
1770 in SImode, DImode and TImode */
1771 {8, 8, 8}, /* cost of storing SSE registers
1772 in SImode, DImode and TImode */
1773 5, /* MMX or SSE register to integer */
1774 32, /* size of l1 cache. */
1775 512, /* size of l2 cache. */
1776 64, /* size of prefetch block */
1777 6, /* number of parallel prefetches */
1778 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1779 value is increased to perhaps more appropriate value of 5. */
1780 3, /* Branch cost */
1781 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1782 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1783 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1784 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1785 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1786 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1787 {DUMMY_STRINGOP_ALGS,
1788 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1789 {DUMMY_STRINGOP_ALGS,
1790 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1791 1, /* scalar_stmt_cost. */
1792 1, /* scalar load_cost. */
1793 1, /* scalar_store_cost. */
1794 1, /* vec_stmt_cost. */
1795 1, /* vec_to_scalar_cost. */
1796 1, /* scalar_to_vec_cost. */
1797 1, /* vec_align_load_cost. */
1798 2, /* vec_unalign_load_cost. */
1799 1, /* vec_store_cost. */
1800 3, /* cond_taken_branch_cost. */
1801 1, /* cond_not_taken_branch_cost. */
1802 };
1803
1804 /* Generic32 should produce code tuned for PPro, Pentium4, Nocona,
1805 Athlon and K8. */
1806 static const
1807 struct processor_costs generic32_cost = {
1808 COSTS_N_INSNS (1), /* cost of an add instruction */
1809 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1810 COSTS_N_INSNS (1), /* variable shift costs */
1811 COSTS_N_INSNS (1), /* constant shift costs */
1812 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1813 COSTS_N_INSNS (4), /* HI */
1814 COSTS_N_INSNS (3), /* SI */
1815 COSTS_N_INSNS (4), /* DI */
1816 COSTS_N_INSNS (2)}, /* other */
1817 0, /* cost of multiply per each bit set */
1818 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1819 COSTS_N_INSNS (26), /* HI */
1820 COSTS_N_INSNS (42), /* SI */
1821 COSTS_N_INSNS (74), /* DI */
1822 COSTS_N_INSNS (74)}, /* other */
1823 COSTS_N_INSNS (1), /* cost of movsx */
1824 COSTS_N_INSNS (1), /* cost of movzx */
1825 8, /* "large" insn */
1826 17, /* MOVE_RATIO */
1827 4, /* cost for loading QImode using movzbl */
1828 {4, 4, 4}, /* cost of loading integer registers
1829 in QImode, HImode and SImode.
1830 Relative to reg-reg move (2). */
1831 {4, 4, 4}, /* cost of storing integer registers */
1832 4, /* cost of reg,reg fld/fst */
1833 {12, 12, 12}, /* cost of loading fp registers
1834 in SFmode, DFmode and XFmode */
1835 {6, 6, 8}, /* cost of storing fp registers
1836 in SFmode, DFmode and XFmode */
1837 2, /* cost of moving MMX register */
1838 {8, 8}, /* cost of loading MMX registers
1839 in SImode and DImode */
1840 {8, 8}, /* cost of storing MMX registers
1841 in SImode and DImode */
1842 2, /* cost of moving SSE register */
1843 {8, 8, 8}, /* cost of loading SSE registers
1844 in SImode, DImode and TImode */
1845 {8, 8, 8}, /* cost of storing SSE registers
1846 in SImode, DImode and TImode */
1847 5, /* MMX or SSE register to integer */
1848 32, /* size of l1 cache. */
1849 256, /* size of l2 cache. */
1850 64, /* size of prefetch block */
1851 6, /* number of parallel prefetches */
1852 3, /* Branch cost */
1853 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1854 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1855 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1856 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1857 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1858 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1859 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1860 DUMMY_STRINGOP_ALGS},
1861 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1862 DUMMY_STRINGOP_ALGS},
1863 1, /* scalar_stmt_cost. */
1864 1, /* scalar load_cost. */
1865 1, /* scalar_store_cost. */
1866 1, /* vec_stmt_cost. */
1867 1, /* vec_to_scalar_cost. */
1868 1, /* scalar_to_vec_cost. */
1869 1, /* vec_align_load_cost. */
1870 2, /* vec_unalign_load_cost. */
1871 1, /* vec_store_cost. */
1872 3, /* cond_taken_branch_cost. */
1873 1, /* cond_not_taken_branch_cost. */
1874 };
1875
1876 const struct processor_costs *ix86_cost = &pentium_cost;
1877
1878 /* Processor feature/optimization bitmasks. */
1879 #define m_386 (1<<PROCESSOR_I386)
1880 #define m_486 (1<<PROCESSOR_I486)
1881 #define m_PENT (1<<PROCESSOR_PENTIUM)
1882 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1883 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1884 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1885 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
1886 #define m_CORE2_32 (1<<PROCESSOR_CORE2_32)
1887 #define m_CORE2_64 (1<<PROCESSOR_CORE2_64)
1888 #define m_COREI7_32 (1<<PROCESSOR_COREI7_32)
1889 #define m_COREI7_64 (1<<PROCESSOR_COREI7_64)
1890 #define m_COREI7 (m_COREI7_32 | m_COREI7_64)
1891 #define m_CORE2I7_32 (m_CORE2_32 | m_COREI7_32)
1892 #define m_CORE2I7_64 (m_CORE2_64 | m_COREI7_64)
1893 #define m_CORE2I7 (m_CORE2I7_32 | m_CORE2I7_64)
1894 #define m_ATOM (1<<PROCESSOR_ATOM)
1895
1896 #define m_GEODE (1<<PROCESSOR_GEODE)
1897 #define m_K6 (1<<PROCESSOR_K6)
1898 #define m_K6_GEODE (m_K6 | m_GEODE)
1899 #define m_K8 (1<<PROCESSOR_K8)
1900 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1901 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1902 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1903 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
1904 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
1905 #define m_BDVER (m_BDVER1 | m_BDVER2)
1906 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
1907 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1)
1908
1909 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1910 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1911
1912 /* Generic instruction choice should be common subset of supported CPUs
1913 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1914 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1915
1916 /* Feature tests against the various tunings. */
1917 unsigned char ix86_tune_features[X86_TUNE_LAST];
1918
1919 /* Feature tests against the various tunings used to create ix86_tune_features
1920 based on the processor mask. */
1921 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1922 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1923 negatively, so enabling for Generic64 seems like good code size
1924 tradeoff. We can't enable it for 32bit generic because it does not
1925 work well with PPro base chips. */
1926 m_386 | m_CORE2I7_64 | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC64,
1927
1928 /* X86_TUNE_PUSH_MEMORY */
1929 m_386 | m_P4_NOCONA | m_CORE2I7 | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
1930
1931 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1932 m_486 | m_PENT,
1933
1934 /* X86_TUNE_UNROLL_STRLEN */
1935 m_486 | m_PENT | m_PPRO | m_ATOM | m_CORE2I7 | m_K6 | m_AMD_MULTIPLE | m_GENERIC,
1936
1937 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1938 on simulation result. But after P4 was made, no performance benefit
1939 was observed with branch hints. It also increases the code size.
1940 As a result, icc never generates branch hints. */
1941 0,
1942
1943 /* X86_TUNE_DOUBLE_WITH_ADD */
1944 ~m_386,
1945
1946 /* X86_TUNE_USE_SAHF */
1947 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1 | m_GENERIC,
1948
1949 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1950 partial dependencies. */
1951 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_GEODE | m_AMD_MULTIPLE | m_GENERIC,
1952
1953 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1954 register stalls on Generic32 compilation setting as well. However
1955 in current implementation the partial register stalls are not eliminated
1956 very well - they can be introduced via subregs synthesized by combine
1957 and can happen in caller/callee saving sequences. Because this option
1958 pays back little on PPro based chips and is in conflict with partial reg
1959 dependencies used by Athlon/P4 based chips, it is better to leave it off
1960 for generic32 for now. */
1961 m_PPRO,
1962
1963 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1964 m_CORE2I7 | m_GENERIC,
1965
1966 /* X86_TUNE_USE_HIMODE_FIOP */
1967 m_386 | m_486 | m_K6_GEODE,
1968
1969 /* X86_TUNE_USE_SIMODE_FIOP */
1970 ~(m_PENT | m_PPRO | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC),
1971
1972 /* X86_TUNE_USE_MOV0 */
1973 m_K6,
1974
1975 /* X86_TUNE_USE_CLTD */
1976 ~(m_PENT | m_CORE2I7 | m_ATOM | m_K6 | m_GENERIC),
1977
1978 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1979 m_PENT4,
1980
1981 /* X86_TUNE_SPLIT_LONG_MOVES */
1982 m_PPRO,
1983
1984 /* X86_TUNE_READ_MODIFY_WRITE */
1985 ~m_PENT,
1986
1987 /* X86_TUNE_READ_MODIFY */
1988 ~(m_PENT | m_PPRO),
1989
1990 /* X86_TUNE_PROMOTE_QIMODE */
1991 m_386 | m_486 | m_PENT | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
1992
1993 /* X86_TUNE_FAST_PREFIX */
1994 ~(m_386 | m_486 | m_PENT),
1995
1996 /* X86_TUNE_SINGLE_STRINGOP */
1997 m_386 | m_P4_NOCONA,
1998
1999 /* X86_TUNE_QIMODE_MATH */
2000 ~0,
2001
2002 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
2003 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
2004 might be considered for Generic32 if our scheme for avoiding partial
2005 stalls was more effective. */
2006 ~m_PPRO,
2007
2008 /* X86_TUNE_PROMOTE_QI_REGS */
2009 0,
2010
2011 /* X86_TUNE_PROMOTE_HI_REGS */
2012 m_PPRO,
2013
2014 /* X86_TUNE_SINGLE_POP: Enable if single pop insn is preferred
2015 over esp addition. */
2016 m_386 | m_486 | m_PENT | m_PPRO,
2017
2018 /* X86_TUNE_DOUBLE_POP: Enable if double pop insn is preferred
2019 over esp addition. */
2020 m_PENT,
2021
2022 /* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred
2023 over esp subtraction. */
2024 m_386 | m_486 | m_PENT | m_K6_GEODE,
2025
2026 /* X86_TUNE_DOUBLE_PUSH. Enable if double push insn is preferred
2027 over esp subtraction. */
2028 m_PENT | m_K6_GEODE,
2029
2030 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
2031 for DFmode copies */
2032 ~(m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_GEODE | m_AMD_MULTIPLE | m_ATOM | m_GENERIC),
2033
2034 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
2035 m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
2036
2037 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
2038 conflict here in between PPro/Pentium4 based chips that thread 128bit
2039 SSE registers as single units versus K8 based chips that divide SSE
2040 registers to two 64bit halves. This knob promotes all store destinations
2041 to be 128bit to allow register renaming on 128bit SSE units, but usually
2042 results in one extra microop on 64bit SSE units. Experimental results
2043 shows that disabling this option on P4 brings over 20% SPECfp regression,
2044 while enabling it on K8 brings roughly 2.4% regression that can be partly
2045 masked by careful scheduling of moves. */
2046 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMDFAM10 | m_BDVER | m_GENERIC,
2047
2048 /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
2049 m_COREI7 | m_AMDFAM10 | m_BDVER | m_BTVER1,
2050
2051 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
2052 m_COREI7 | m_BDVER,
2053
2054 /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
2055 m_BDVER ,
2056
2057 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
2058 are resolved on SSE register parts instead of whole registers, so we may
2059 maintain just lower part of scalar values in proper format leaving the
2060 upper part undefined. */
2061 m_ATHLON_K8,
2062
2063 /* X86_TUNE_SSE_TYPELESS_STORES */
2064 m_AMD_MULTIPLE,
2065
2066 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
2067 m_PPRO | m_P4_NOCONA,
2068
2069 /* X86_TUNE_MEMORY_MISMATCH_STALL */
2070 m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
2071
2072 /* X86_TUNE_PROLOGUE_USING_MOVE */
2073 m_PPRO | m_CORE2I7 | m_ATOM | m_ATHLON_K8 | m_GENERIC,
2074
2075 /* X86_TUNE_EPILOGUE_USING_MOVE */
2076 m_PPRO | m_CORE2I7 | m_ATOM | m_ATHLON_K8 | m_GENERIC,
2077
2078 /* X86_TUNE_SHIFT1 */
2079 ~m_486,
2080
2081 /* X86_TUNE_USE_FFREEP */
2082 m_AMD_MULTIPLE,
2083
2084 /* X86_TUNE_INTER_UNIT_MOVES */
2085 ~(m_AMD_MULTIPLE | m_GENERIC),
2086
2087 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
2088 ~(m_AMDFAM10 | m_BDVER ),
2089
2090 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
2091 than 4 branch instructions in the 16 byte window. */
2092 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
2093
2094 /* X86_TUNE_SCHEDULE */
2095 m_PENT | m_PPRO | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
2096
2097 /* X86_TUNE_USE_BT */
2098 m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
2099
2100 /* X86_TUNE_USE_INCDEC */
2101 ~(m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_GENERIC),
2102
2103 /* X86_TUNE_PAD_RETURNS */
2104 m_CORE2I7 | m_AMD_MULTIPLE | m_GENERIC,
2105
2106 /* X86_TUNE_PAD_SHORT_FUNCTION: Pad short funtion. */
2107 m_ATOM,
2108
2109 /* X86_TUNE_EXT_80387_CONSTANTS */
2110 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_ATHLON_K8 | m_GENERIC,
2111
2112 /* X86_TUNE_SHORTEN_X87_SSE */
2113 ~m_K8,
2114
2115 /* X86_TUNE_AVOID_VECTOR_DECODE */
2116 m_CORE2I7_64 | m_K8 | m_GENERIC64,
2117
2118 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
2119 and SImode multiply, but 386 and 486 do HImode multiply faster. */
2120 ~(m_386 | m_486),
2121
2122 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
2123 vector path on AMD machines. */
2124 m_CORE2I7_64 | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1 | m_GENERIC64,
2125
2126 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
2127 machines. */
2128 m_CORE2I7_64 | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1 | m_GENERIC64,
2129
2130 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
2131 than a MOV. */
2132 m_PENT,
2133
2134 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
2135 but one byte longer. */
2136 m_PENT,
2137
2138 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
2139 operand that cannot be represented using a modRM byte. The XOR
2140 replacement is long decoded, so this split helps here as well. */
2141 m_K6,
2142
2143 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
2144 from FP to FP. */
2145 m_CORE2I7 | m_AMDFAM10 | m_GENERIC,
2146
2147 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
2148 from integer to FP. */
2149 m_AMDFAM10,
2150
2151 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
2152 with a subsequent conditional jump instruction into a single
2153 compare-and-branch uop. */
2154 m_BDVER,
2155
2156 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
2157 will impact LEA instruction selection. */
2158 m_ATOM,
2159
2160 /* X86_TUNE_VECTORIZE_DOUBLE: Enable double precision vector
2161 instructions. */
2162 ~m_ATOM,
2163
2164 /* X86_SOFTARE_PREFETCHING_BENEFICIAL: Enable software prefetching
2165 at -O3. For the moment, the prefetching seems badly tuned for Intel
2166 chips. */
2167 m_K6_GEODE | m_AMD_MULTIPLE,
2168
2169 /* X86_TUNE_AVX128_OPTIMAL: Enable 128-bit AVX instruction generation for
2170 the auto-vectorizer. */
2171 m_BDVER,
2172
2173 /* X86_TUNE_REASSOC_INT_TO_PARALLEL: Try to produce parallel computations
2174 during reassociation of integer computation. */
2175 m_ATOM,
2176
2177 /* X86_TUNE_REASSOC_FP_TO_PARALLEL: Try to produce parallel computations
2178 during reassociation of fp computation. */
2179 m_ATOM
2180 };
2181
2182 /* Feature tests against the various architecture variations. */
2183 unsigned char ix86_arch_features[X86_ARCH_LAST];
2184
2185 /* Feature tests against the various architecture variations, used to create
2186 ix86_arch_features based on the processor mask. */
2187 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2188 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
2189 ~(m_386 | m_486 | m_PENT | m_K6),
2190
2191 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2192 ~m_386,
2193
2194 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2195 ~(m_386 | m_486),
2196
2197 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2198 ~m_386,
2199
2200 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2201 ~m_386,
2202 };
2203
2204 static const unsigned int x86_accumulate_outgoing_args
2205 = m_PPRO | m_P4_NOCONA | m_ATOM | m_CORE2I7 | m_AMD_MULTIPLE | m_GENERIC;
2206
2207 static const unsigned int x86_arch_always_fancy_math_387
2208 = m_PENT | m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC;
2209
2210 static const unsigned int x86_avx256_split_unaligned_load
2211 = m_COREI7 | m_GENERIC;
2212
2213 static const unsigned int x86_avx256_split_unaligned_store
2214 = m_COREI7 | m_BDVER | m_GENERIC;
2215
2216 /* In case the average insn count for single function invocation is
2217 lower than this constant, emit fast (but longer) prologue and
2218 epilogue code. */
2219 #define FAST_PROLOGUE_INSN_COUNT 20
2220
2221 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2222 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2223 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2224 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2225
2226 /* Array of the smallest class containing reg number REGNO, indexed by
2227 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2228
2229 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2230 {
2231 /* ax, dx, cx, bx */
2232 AREG, DREG, CREG, BREG,
2233 /* si, di, bp, sp */
2234 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2235 /* FP registers */
2236 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2237 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2238 /* arg pointer */
2239 NON_Q_REGS,
2240 /* flags, fpsr, fpcr, frame */
2241 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2242 /* SSE registers */
2243 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2244 SSE_REGS, SSE_REGS,
2245 /* MMX registers */
2246 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2247 MMX_REGS, MMX_REGS,
2248 /* REX registers */
2249 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2250 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2251 /* SSE REX registers */
2252 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2253 SSE_REGS, SSE_REGS,
2254 };
2255
2256 /* The "default" register map used in 32bit mode. */
2257
2258 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2259 {
2260 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2261 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2262 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2263 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2264 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2265 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2266 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2267 };
2268
2269 /* The "default" register map used in 64bit mode. */
2270
2271 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2272 {
2273 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2274 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2275 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2276 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2277 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2278 8,9,10,11,12,13,14,15, /* extended integer registers */
2279 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2280 };
2281
2282 /* Define the register numbers to be used in Dwarf debugging information.
2283 The SVR4 reference port C compiler uses the following register numbers
2284 in its Dwarf output code:
2285 0 for %eax (gcc regno = 0)
2286 1 for %ecx (gcc regno = 2)
2287 2 for %edx (gcc regno = 1)
2288 3 for %ebx (gcc regno = 3)
2289 4 for %esp (gcc regno = 7)
2290 5 for %ebp (gcc regno = 6)
2291 6 for %esi (gcc regno = 4)
2292 7 for %edi (gcc regno = 5)
2293 The following three DWARF register numbers are never generated by
2294 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2295 believes these numbers have these meanings.
2296 8 for %eip (no gcc equivalent)
2297 9 for %eflags (gcc regno = 17)
2298 10 for %trapno (no gcc equivalent)
2299 It is not at all clear how we should number the FP stack registers
2300 for the x86 architecture. If the version of SDB on x86/svr4 were
2301 a bit less brain dead with respect to floating-point then we would
2302 have a precedent to follow with respect to DWARF register numbers
2303 for x86 FP registers, but the SDB on x86/svr4 is so completely
2304 broken with respect to FP registers that it is hardly worth thinking
2305 of it as something to strive for compatibility with.
2306 The version of x86/svr4 SDB I have at the moment does (partially)
2307 seem to believe that DWARF register number 11 is associated with
2308 the x86 register %st(0), but that's about all. Higher DWARF
2309 register numbers don't seem to be associated with anything in
2310 particular, and even for DWARF regno 11, SDB only seems to under-
2311 stand that it should say that a variable lives in %st(0) (when
2312 asked via an `=' command) if we said it was in DWARF regno 11,
2313 but SDB still prints garbage when asked for the value of the
2314 variable in question (via a `/' command).
2315 (Also note that the labels SDB prints for various FP stack regs
2316 when doing an `x' command are all wrong.)
2317 Note that these problems generally don't affect the native SVR4
2318 C compiler because it doesn't allow the use of -O with -g and
2319 because when it is *not* optimizing, it allocates a memory
2320 location for each floating-point variable, and the memory
2321 location is what gets described in the DWARF AT_location
2322 attribute for the variable in question.
2323 Regardless of the severe mental illness of the x86/svr4 SDB, we
2324 do something sensible here and we use the following DWARF
2325 register numbers. Note that these are all stack-top-relative
2326 numbers.
2327 11 for %st(0) (gcc regno = 8)
2328 12 for %st(1) (gcc regno = 9)
2329 13 for %st(2) (gcc regno = 10)
2330 14 for %st(3) (gcc regno = 11)
2331 15 for %st(4) (gcc regno = 12)
2332 16 for %st(5) (gcc regno = 13)
2333 17 for %st(6) (gcc regno = 14)
2334 18 for %st(7) (gcc regno = 15)
2335 */
2336 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2337 {
2338 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2339 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2340 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2341 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2342 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2343 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2344 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2345 };
2346
2347 /* Define parameter passing and return registers. */
2348
2349 static int const x86_64_int_parameter_registers[6] =
2350 {
2351 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2352 };
2353
2354 static int const x86_64_ms_abi_int_parameter_registers[4] =
2355 {
2356 CX_REG, DX_REG, R8_REG, R9_REG
2357 };
2358
2359 static int const x86_64_int_return_registers[4] =
2360 {
2361 AX_REG, DX_REG, DI_REG, SI_REG
2362 };
2363
2364 /* Define the structure for the machine field in struct function. */
2365
2366 struct GTY(()) stack_local_entry {
2367 unsigned short mode;
2368 unsigned short n;
2369 rtx rtl;
2370 struct stack_local_entry *next;
2371 };
2372
2373 /* Structure describing stack frame layout.
2374 Stack grows downward:
2375
2376 [arguments]
2377 <- ARG_POINTER
2378 saved pc
2379
2380 saved static chain if ix86_static_chain_on_stack
2381
2382 saved frame pointer if frame_pointer_needed
2383 <- HARD_FRAME_POINTER
2384 [saved regs]
2385 <- regs_save_offset
2386 [padding0]
2387
2388 [saved SSE regs]
2389 <- sse_regs_save_offset
2390 [padding1] |
2391 | <- FRAME_POINTER
2392 [va_arg registers] |
2393 |
2394 [frame] |
2395 |
2396 [padding2] | = to_allocate
2397 <- STACK_POINTER
2398 */
2399 struct ix86_frame
2400 {
2401 int nsseregs;
2402 int nregs;
2403 int va_arg_size;
2404 int red_zone_size;
2405 int outgoing_arguments_size;
2406 HOST_WIDE_INT frame;
2407
2408 /* The offsets relative to ARG_POINTER. */
2409 HOST_WIDE_INT frame_pointer_offset;
2410 HOST_WIDE_INT hard_frame_pointer_offset;
2411 HOST_WIDE_INT stack_pointer_offset;
2412 HOST_WIDE_INT hfp_save_offset;
2413 HOST_WIDE_INT reg_save_offset;
2414 HOST_WIDE_INT sse_reg_save_offset;
2415
2416 /* When save_regs_using_mov is set, emit prologue using
2417 move instead of push instructions. */
2418 bool save_regs_using_mov;
2419 };
2420
2421 /* Which cpu are we scheduling for. */
2422 enum attr_cpu ix86_schedule;
2423
2424 /* Which cpu are we optimizing for. */
2425 enum processor_type ix86_tune;
2426
2427 /* Which instruction set architecture to use. */
2428 enum processor_type ix86_arch;
2429
2430 /* true if sse prefetch instruction is not NOOP. */
2431 int x86_prefetch_sse;
2432
2433 /* -mstackrealign option */
2434 static const char ix86_force_align_arg_pointer_string[]
2435 = "force_align_arg_pointer";
2436
2437 static rtx (*ix86_gen_leave) (void);
2438 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2439 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2440 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2441 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2442 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2443 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2444 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2445 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2446 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2447
2448 /* Preferred alignment for stack boundary in bits. */
2449 unsigned int ix86_preferred_stack_boundary;
2450
2451 /* Alignment for incoming stack boundary in bits specified at
2452 command line. */
2453 static unsigned int ix86_user_incoming_stack_boundary;
2454
2455 /* Default alignment for incoming stack boundary in bits. */
2456 static unsigned int ix86_default_incoming_stack_boundary;
2457
2458 /* Alignment for incoming stack boundary in bits. */
2459 unsigned int ix86_incoming_stack_boundary;
2460
2461 /* Calling abi specific va_list type nodes. */
2462 static GTY(()) tree sysv_va_list_type_node;
2463 static GTY(()) tree ms_va_list_type_node;
2464
2465 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2466 char internal_label_prefix[16];
2467 int internal_label_prefix_len;
2468
2469 /* Fence to use after loop using movnt. */
2470 tree x86_mfence;
2471
2472 /* Register class used for passing given 64bit part of the argument.
2473 These represent classes as documented by the PS ABI, with the exception
2474 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2475 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2476
2477 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2478 whenever possible (upper half does contain padding). */
2479 enum x86_64_reg_class
2480 {
2481 X86_64_NO_CLASS,
2482 X86_64_INTEGER_CLASS,
2483 X86_64_INTEGERSI_CLASS,
2484 X86_64_SSE_CLASS,
2485 X86_64_SSESF_CLASS,
2486 X86_64_SSEDF_CLASS,
2487 X86_64_SSEUP_CLASS,
2488 X86_64_X87_CLASS,
2489 X86_64_X87UP_CLASS,
2490 X86_64_COMPLEX_X87_CLASS,
2491 X86_64_MEMORY_CLASS
2492 };
2493
2494 #define MAX_CLASSES 4
2495
2496 /* Table of constants used by fldpi, fldln2, etc.... */
2497 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2498 static bool ext_80387_constants_init = 0;
2499
2500 \f
2501 static struct machine_function * ix86_init_machine_status (void);
2502 static rtx ix86_function_value (const_tree, const_tree, bool);
2503 static bool ix86_function_value_regno_p (const unsigned int);
2504 static unsigned int ix86_function_arg_boundary (enum machine_mode,
2505 const_tree);
2506 static rtx ix86_static_chain (const_tree, bool);
2507 static int ix86_function_regparm (const_tree, const_tree);
2508 static void ix86_compute_frame_layout (struct ix86_frame *);
2509 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
2510 rtx, rtx, int);
2511 static void ix86_add_new_builtins (HOST_WIDE_INT);
2512 static rtx ix86_expand_vec_perm_builtin (tree);
2513 static tree ix86_canonical_va_list_type (tree);
2514 static void predict_jump (int);
2515 static unsigned int split_stack_prologue_scratch_regno (void);
2516 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2517
2518 enum ix86_function_specific_strings
2519 {
2520 IX86_FUNCTION_SPECIFIC_ARCH,
2521 IX86_FUNCTION_SPECIFIC_TUNE,
2522 IX86_FUNCTION_SPECIFIC_MAX
2523 };
2524
2525 static char *ix86_target_string (HOST_WIDE_INT, int, const char *,
2526 const char *, enum fpmath_unit, bool);
2527 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
2528 static void ix86_function_specific_save (struct cl_target_option *);
2529 static void ix86_function_specific_restore (struct cl_target_option *);
2530 static void ix86_function_specific_print (FILE *, int,
2531 struct cl_target_option *);
2532 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2533 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2534 struct gcc_options *);
2535 static bool ix86_can_inline_p (tree, tree);
2536 static void ix86_set_current_function (tree);
2537 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2538
2539 static enum calling_abi ix86_function_abi (const_tree);
2540
2541 \f
2542 #ifndef SUBTARGET32_DEFAULT_CPU
2543 #define SUBTARGET32_DEFAULT_CPU "i386"
2544 #endif
2545
2546 /* The svr4 ABI for the i386 says that records and unions are returned
2547 in memory. */
2548 #ifndef DEFAULT_PCC_STRUCT_RETURN
2549 #define DEFAULT_PCC_STRUCT_RETURN 1
2550 #endif
2551
2552 /* Whether -mtune= or -march= were specified */
2553 static int ix86_tune_defaulted;
2554 static int ix86_arch_specified;
2555
2556 /* Vectorization library interface and handlers. */
2557 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2558
2559 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2560 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2561
2562 /* Processor target table, indexed by processor number */
2563 struct ptt
2564 {
2565 const struct processor_costs *cost; /* Processor costs */
2566 const int align_loop; /* Default alignments. */
2567 const int align_loop_max_skip;
2568 const int align_jump;
2569 const int align_jump_max_skip;
2570 const int align_func;
2571 };
2572
2573 static const struct ptt processor_target_table[PROCESSOR_max] =
2574 {
2575 {&i386_cost, 4, 3, 4, 3, 4},
2576 {&i486_cost, 16, 15, 16, 15, 16},
2577 {&pentium_cost, 16, 7, 16, 7, 16},
2578 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2579 {&geode_cost, 0, 0, 0, 0, 0},
2580 {&k6_cost, 32, 7, 32, 7, 32},
2581 {&athlon_cost, 16, 7, 16, 7, 16},
2582 {&pentium4_cost, 0, 0, 0, 0, 0},
2583 {&k8_cost, 16, 7, 16, 7, 16},
2584 {&nocona_cost, 0, 0, 0, 0, 0},
2585 /* Core 2 32-bit. */
2586 {&generic32_cost, 16, 10, 16, 10, 16},
2587 /* Core 2 64-bit. */
2588 {&generic64_cost, 16, 10, 16, 10, 16},
2589 /* Core i7 32-bit. */
2590 {&generic32_cost, 16, 10, 16, 10, 16},
2591 /* Core i7 64-bit. */
2592 {&generic64_cost, 16, 10, 16, 10, 16},
2593 {&generic32_cost, 16, 7, 16, 7, 16},
2594 {&generic64_cost, 16, 10, 16, 10, 16},
2595 {&amdfam10_cost, 32, 24, 32, 7, 32},
2596 {&bdver1_cost, 32, 24, 32, 7, 32},
2597 {&bdver2_cost, 32, 24, 32, 7, 32},
2598 {&btver1_cost, 32, 24, 32, 7, 32},
2599 {&atom_cost, 16, 7, 16, 7, 16}
2600 };
2601
2602 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2603 {
2604 "generic",
2605 "i386",
2606 "i486",
2607 "pentium",
2608 "pentium-mmx",
2609 "pentiumpro",
2610 "pentium2",
2611 "pentium3",
2612 "pentium4",
2613 "pentium-m",
2614 "prescott",
2615 "nocona",
2616 "core2",
2617 "corei7",
2618 "atom",
2619 "geode",
2620 "k6",
2621 "k6-2",
2622 "k6-3",
2623 "athlon",
2624 "athlon-4",
2625 "k8",
2626 "amdfam10",
2627 "bdver1",
2628 "bdver2",
2629 "btver1"
2630 };
2631 \f
2632 /* Return true if a red-zone is in use. */
2633
2634 static inline bool
2635 ix86_using_red_zone (void)
2636 {
2637 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2638 }
2639 \f
2640 /* Return a string that documents the current -m options. The caller is
2641 responsible for freeing the string. */
2642
2643 static char *
2644 ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
2645 const char *tune, enum fpmath_unit fpmath,
2646 bool add_nl_p)
2647 {
2648 struct ix86_target_opts
2649 {
2650 const char *option; /* option string */
2651 HOST_WIDE_INT mask; /* isa mask options */
2652 };
2653
2654 /* This table is ordered so that options like -msse4.2 that imply
2655 preceding options while match those first. */
2656 static struct ix86_target_opts isa_opts[] =
2657 {
2658 { "-m64", OPTION_MASK_ISA_64BIT },
2659 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2660 { "-mfma", OPTION_MASK_ISA_FMA },
2661 { "-mxop", OPTION_MASK_ISA_XOP },
2662 { "-mlwp", OPTION_MASK_ISA_LWP },
2663 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2664 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2665 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2666 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2667 { "-msse3", OPTION_MASK_ISA_SSE3 },
2668 { "-msse2", OPTION_MASK_ISA_SSE2 },
2669 { "-msse", OPTION_MASK_ISA_SSE },
2670 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2671 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2672 { "-mmmx", OPTION_MASK_ISA_MMX },
2673 { "-mabm", OPTION_MASK_ISA_ABM },
2674 { "-mbmi", OPTION_MASK_ISA_BMI },
2675 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
2676 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
2677 { "-mtbm", OPTION_MASK_ISA_TBM },
2678 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2679 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2680 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2681 { "-maes", OPTION_MASK_ISA_AES },
2682 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2683 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2684 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2685 { "-mf16c", OPTION_MASK_ISA_F16C },
2686 };
2687
2688 /* Flag options. */
2689 static struct ix86_target_opts flag_opts[] =
2690 {
2691 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2692 { "-m80387", MASK_80387 },
2693 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2694 { "-malign-double", MASK_ALIGN_DOUBLE },
2695 { "-mcld", MASK_CLD },
2696 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2697 { "-mieee-fp", MASK_IEEE_FP },
2698 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2699 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2700 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2701 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2702 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2703 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2704 { "-mno-red-zone", MASK_NO_RED_ZONE },
2705 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2706 { "-mrecip", MASK_RECIP },
2707 { "-mrtd", MASK_RTD },
2708 { "-msseregparm", MASK_SSEREGPARM },
2709 { "-mstack-arg-probe", MASK_STACK_PROBE },
2710 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2711 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2712 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2713 { "-mvzeroupper", MASK_VZEROUPPER },
2714 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
2715 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
2716 { "-mprefer-avx128", MASK_PREFER_AVX128},
2717 };
2718
2719 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2720
2721 char isa_other[40];
2722 char target_other[40];
2723 unsigned num = 0;
2724 unsigned i, j;
2725 char *ret;
2726 char *ptr;
2727 size_t len;
2728 size_t line_len;
2729 size_t sep_len;
2730
2731 memset (opts, '\0', sizeof (opts));
2732
2733 /* Add -march= option. */
2734 if (arch)
2735 {
2736 opts[num][0] = "-march=";
2737 opts[num++][1] = arch;
2738 }
2739
2740 /* Add -mtune= option. */
2741 if (tune)
2742 {
2743 opts[num][0] = "-mtune=";
2744 opts[num++][1] = tune;
2745 }
2746
2747 /* Pick out the options in isa options. */
2748 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2749 {
2750 if ((isa & isa_opts[i].mask) != 0)
2751 {
2752 opts[num++][0] = isa_opts[i].option;
2753 isa &= ~ isa_opts[i].mask;
2754 }
2755 }
2756
2757 if (isa && add_nl_p)
2758 {
2759 opts[num++][0] = isa_other;
2760 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)",
2761 isa);
2762 }
2763
2764 /* Add flag options. */
2765 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2766 {
2767 if ((flags & flag_opts[i].mask) != 0)
2768 {
2769 opts[num++][0] = flag_opts[i].option;
2770 flags &= ~ flag_opts[i].mask;
2771 }
2772 }
2773
2774 if (flags && add_nl_p)
2775 {
2776 opts[num++][0] = target_other;
2777 sprintf (target_other, "(other flags: %#x)", flags);
2778 }
2779
2780 /* Add -fpmath= option. */
2781 if (fpmath)
2782 {
2783 opts[num][0] = "-mfpmath=";
2784 switch ((int) fpmath)
2785 {
2786 case FPMATH_387:
2787 opts[num++][1] = "387";
2788 break;
2789
2790 case FPMATH_SSE:
2791 opts[num++][1] = "sse";
2792 break;
2793
2794 case FPMATH_387 | FPMATH_SSE:
2795 opts[num++][1] = "sse+387";
2796 break;
2797
2798 default:
2799 gcc_unreachable ();
2800 }
2801 }
2802
2803 /* Any options? */
2804 if (num == 0)
2805 return NULL;
2806
2807 gcc_assert (num < ARRAY_SIZE (opts));
2808
2809 /* Size the string. */
2810 len = 0;
2811 sep_len = (add_nl_p) ? 3 : 1;
2812 for (i = 0; i < num; i++)
2813 {
2814 len += sep_len;
2815 for (j = 0; j < 2; j++)
2816 if (opts[i][j])
2817 len += strlen (opts[i][j]);
2818 }
2819
2820 /* Build the string. */
2821 ret = ptr = (char *) xmalloc (len);
2822 line_len = 0;
2823
2824 for (i = 0; i < num; i++)
2825 {
2826 size_t len2[2];
2827
2828 for (j = 0; j < 2; j++)
2829 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2830
2831 if (i != 0)
2832 {
2833 *ptr++ = ' ';
2834 line_len++;
2835
2836 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2837 {
2838 *ptr++ = '\\';
2839 *ptr++ = '\n';
2840 line_len = 0;
2841 }
2842 }
2843
2844 for (j = 0; j < 2; j++)
2845 if (opts[i][j])
2846 {
2847 memcpy (ptr, opts[i][j], len2[j]);
2848 ptr += len2[j];
2849 line_len += len2[j];
2850 }
2851 }
2852
2853 *ptr = '\0';
2854 gcc_assert (ret + len >= ptr);
2855
2856 return ret;
2857 }
2858
2859 /* Return true, if profiling code should be emitted before
2860 prologue. Otherwise it returns false.
2861 Note: For x86 with "hotfix" it is sorried. */
2862 static bool
2863 ix86_profile_before_prologue (void)
2864 {
2865 return flag_fentry != 0;
2866 }
2867
2868 /* Function that is callable from the debugger to print the current
2869 options. */
2870 void
2871 ix86_debug_options (void)
2872 {
2873 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2874 ix86_arch_string, ix86_tune_string,
2875 ix86_fpmath, true);
2876
2877 if (opts)
2878 {
2879 fprintf (stderr, "%s\n\n", opts);
2880 free (opts);
2881 }
2882 else
2883 fputs ("<no options>\n\n", stderr);
2884
2885 return;
2886 }
2887 \f
2888 /* Override various settings based on options. If MAIN_ARGS_P, the
2889 options are from the command line, otherwise they are from
2890 attributes. */
2891
2892 static void
2893 ix86_option_override_internal (bool main_args_p)
2894 {
2895 int i;
2896 unsigned int ix86_arch_mask, ix86_tune_mask;
2897 const bool ix86_tune_specified = (ix86_tune_string != NULL);
2898 const char *prefix;
2899 const char *suffix;
2900 const char *sw;
2901
2902 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
2903 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
2904 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
2905 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
2906 #define PTA_AES (HOST_WIDE_INT_1 << 4)
2907 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
2908 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
2909 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
2910 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
2911 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
2912 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
2913 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
2914 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
2915 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
2916 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
2917 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
2918 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
2919 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
2920 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
2921 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
2922 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
2923 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
2924 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
2925 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
2926 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
2927 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
2928 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
2929 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
2930 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
2931 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
2932 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
2933 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
2934 /* if this reaches 64, need to widen struct pta flags below */
2935
2936 static struct pta
2937 {
2938 const char *const name; /* processor name or nickname. */
2939 const enum processor_type processor;
2940 const enum attr_cpu schedule;
2941 const unsigned HOST_WIDE_INT flags;
2942 }
2943 const processor_alias_table[] =
2944 {
2945 {"i386", PROCESSOR_I386, CPU_NONE, 0},
2946 {"i486", PROCESSOR_I486, CPU_NONE, 0},
2947 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2948 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2949 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
2950 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
2951 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2952 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2953 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
2954 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2955 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2956 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
2957 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2958 PTA_MMX | PTA_SSE},
2959 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2960 PTA_MMX | PTA_SSE},
2961 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2962 PTA_MMX | PTA_SSE | PTA_SSE2},
2963 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
2964 PTA_MMX |PTA_SSE | PTA_SSE2},
2965 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
2966 PTA_MMX | PTA_SSE | PTA_SSE2},
2967 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
2968 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2969 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
2970 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2971 | PTA_CX16 | PTA_NO_SAHF},
2972 {"core2", PROCESSOR_CORE2_64, CPU_CORE2,
2973 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2974 | PTA_SSSE3 | PTA_CX16},
2975 {"corei7", PROCESSOR_COREI7_64, CPU_COREI7,
2976 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2977 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_CX16},
2978 {"corei7-avx", PROCESSOR_COREI7_64, CPU_COREI7,
2979 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2980 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX
2981 | PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL},
2982 {"core-avx-i", PROCESSOR_COREI7_64, CPU_COREI7,
2983 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2984 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX
2985 | PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL | PTA_FSGSBASE
2986 | PTA_RDRND | PTA_F16C},
2987 {"core-avx2", PROCESSOR_COREI7_64, CPU_COREI7,
2988 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2989 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX | PTA_AVX2
2990 | PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL | PTA_FSGSBASE
2991 | PTA_RDRND | PTA_F16C | PTA_BMI | PTA_BMI2 | PTA_LZCNT
2992 | PTA_FMA | PTA_MOVBE},
2993 {"atom", PROCESSOR_ATOM, CPU_ATOM,
2994 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2995 | PTA_SSSE3 | PTA_CX16 | PTA_MOVBE},
2996 {"geode", PROCESSOR_GEODE, CPU_GEODE,
2997 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
2998 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
2999 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
3000 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
3001 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3002 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
3003 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3004 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
3005 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3006 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
3007 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3008 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
3009 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3010 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
3011 {"x86-64", PROCESSOR_K8, CPU_K8,
3012 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
3013 {"k8", PROCESSOR_K8, CPU_K8,
3014 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3015 | PTA_SSE2 | PTA_NO_SAHF},
3016 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3017 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3018 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3019 {"opteron", PROCESSOR_K8, CPU_K8,
3020 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3021 | PTA_SSE2 | PTA_NO_SAHF},
3022 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3023 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3024 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3025 {"athlon64", PROCESSOR_K8, CPU_K8,
3026 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3027 | PTA_SSE2 | PTA_NO_SAHF},
3028 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3029 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3030 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3031 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3032 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3033 | PTA_SSE2 | PTA_NO_SAHF},
3034 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3035 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3036 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
3037 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3038 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3039 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
3040 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3041 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3042 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3043 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3044 | PTA_XOP | PTA_LWP},
3045 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3046 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3047 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3048 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3049 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3050 | PTA_FMA},
3051 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC64,
3052 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3053 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16},
3054 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
3055 0 /* flags are only used for -march switch. */ },
3056 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
3057 PTA_64BIT /* flags are only used for -march switch. */ },
3058 };
3059
3060 int const pta_size = ARRAY_SIZE (processor_alias_table);
3061
3062 /* Set up prefix/suffix so the error messages refer to either the command
3063 line argument, or the attribute(target). */
3064 if (main_args_p)
3065 {
3066 prefix = "-m";
3067 suffix = "";
3068 sw = "switch";
3069 }
3070 else
3071 {
3072 prefix = "option(\"";
3073 suffix = "\")";
3074 sw = "attribute";
3075 }
3076
3077 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3078 SUBTARGET_OVERRIDE_OPTIONS;
3079 #endif
3080
3081 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3082 SUBSUBTARGET_OVERRIDE_OPTIONS;
3083 #endif
3084
3085 if (TARGET_X32)
3086 ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3087
3088 /* -fPIC is the default for x86_64. */
3089 if (TARGET_MACHO && TARGET_64BIT)
3090 flag_pic = 2;
3091
3092 /* Need to check -mtune=generic first. */
3093 if (ix86_tune_string)
3094 {
3095 if (!strcmp (ix86_tune_string, "generic")
3096 || !strcmp (ix86_tune_string, "i686")
3097 /* As special support for cross compilers we read -mtune=native
3098 as -mtune=generic. With native compilers we won't see the
3099 -mtune=native, as it was changed by the driver. */
3100 || !strcmp (ix86_tune_string, "native"))
3101 {
3102 if (TARGET_64BIT)
3103 ix86_tune_string = "generic64";
3104 else
3105 ix86_tune_string = "generic32";
3106 }
3107 /* If this call is for setting the option attribute, allow the
3108 generic32/generic64 that was previously set. */
3109 else if (!main_args_p
3110 && (!strcmp (ix86_tune_string, "generic32")
3111 || !strcmp (ix86_tune_string, "generic64")))
3112 ;
3113 else if (!strncmp (ix86_tune_string, "generic", 7))
3114 error ("bad value (%s) for %stune=%s %s",
3115 ix86_tune_string, prefix, suffix, sw);
3116 else if (!strcmp (ix86_tune_string, "x86-64"))
3117 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3118 "%stune=k8%s or %stune=generic%s instead as appropriate",
3119 prefix, suffix, prefix, suffix, prefix, suffix);
3120 }
3121 else
3122 {
3123 if (ix86_arch_string)
3124 ix86_tune_string = ix86_arch_string;
3125 if (!ix86_tune_string)
3126 {
3127 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
3128 ix86_tune_defaulted = 1;
3129 }
3130
3131 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
3132 need to use a sensible tune option. */
3133 if (!strcmp (ix86_tune_string, "generic")
3134 || !strcmp (ix86_tune_string, "x86-64")
3135 || !strcmp (ix86_tune_string, "i686"))
3136 {
3137 if (TARGET_64BIT)
3138 ix86_tune_string = "generic64";
3139 else
3140 ix86_tune_string = "generic32";
3141 }
3142 }
3143
3144 if (ix86_stringop_alg == rep_prefix_8_byte && !TARGET_64BIT)
3145 {
3146 /* rep; movq isn't available in 32-bit code. */
3147 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3148 ix86_stringop_alg = no_stringop;
3149 }
3150
3151 if (!ix86_arch_string)
3152 ix86_arch_string = TARGET_64BIT ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3153 else
3154 ix86_arch_specified = 1;
3155
3156 if (!global_options_set.x_ix86_abi)
3157 ix86_abi = DEFAULT_ABI;
3158
3159 if (global_options_set.x_ix86_cmodel)
3160 {
3161 switch (ix86_cmodel)
3162 {
3163 case CM_SMALL:
3164 case CM_SMALL_PIC:
3165 if (flag_pic)
3166 ix86_cmodel = CM_SMALL_PIC;
3167 if (!TARGET_64BIT)
3168 error ("code model %qs not supported in the %s bit mode",
3169 "small", "32");
3170 break;
3171
3172 case CM_MEDIUM:
3173 case CM_MEDIUM_PIC:
3174 if (flag_pic)
3175 ix86_cmodel = CM_MEDIUM_PIC;
3176 if (!TARGET_64BIT)
3177 error ("code model %qs not supported in the %s bit mode",
3178 "medium", "32");
3179 else if (TARGET_X32)
3180 error ("code model %qs not supported in x32 mode",
3181 "medium");
3182 break;
3183
3184 case CM_LARGE:
3185 case CM_LARGE_PIC:
3186 if (flag_pic)
3187 ix86_cmodel = CM_LARGE_PIC;
3188 if (!TARGET_64BIT)
3189 error ("code model %qs not supported in the %s bit mode",
3190 "large", "32");
3191 else if (TARGET_X32)
3192 error ("code model %qs not supported in x32 mode",
3193 "medium");
3194 break;
3195
3196 case CM_32:
3197 if (flag_pic)
3198 error ("code model %s does not support PIC mode", "32");
3199 if (TARGET_64BIT)
3200 error ("code model %qs not supported in the %s bit mode",
3201 "32", "64");
3202 break;
3203
3204 case CM_KERNEL:
3205 if (flag_pic)
3206 {
3207 error ("code model %s does not support PIC mode", "kernel");
3208 ix86_cmodel = CM_32;
3209 }
3210 if (!TARGET_64BIT)
3211 error ("code model %qs not supported in the %s bit mode",
3212 "kernel", "32");
3213 break;
3214
3215 default:
3216 gcc_unreachable ();
3217 }
3218 }
3219 else
3220 {
3221 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3222 use of rip-relative addressing. This eliminates fixups that
3223 would otherwise be needed if this object is to be placed in a
3224 DLL, and is essentially just as efficient as direct addressing. */
3225 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
3226 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
3227 else if (TARGET_64BIT)
3228 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
3229 else
3230 ix86_cmodel = CM_32;
3231 }
3232 if (TARGET_MACHO && ix86_asm_dialect == ASM_INTEL)
3233 {
3234 error ("-masm=intel not supported in this configuration");
3235 ix86_asm_dialect = ASM_ATT;
3236 }
3237 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3238 sorry ("%i-bit mode not compiled in",
3239 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3240
3241 for (i = 0; i < pta_size; i++)
3242 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
3243 {
3244 ix86_schedule = processor_alias_table[i].schedule;
3245 ix86_arch = processor_alias_table[i].processor;
3246 /* Default cpu tuning to the architecture. */
3247 ix86_tune = ix86_arch;
3248
3249 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3250 error ("CPU you selected does not support x86-64 "
3251 "instruction set");
3252
3253 if (processor_alias_table[i].flags & PTA_MMX
3254 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3255 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3256 if (processor_alias_table[i].flags & PTA_3DNOW
3257 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3258 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3259 if (processor_alias_table[i].flags & PTA_3DNOW_A
3260 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3261 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3262 if (processor_alias_table[i].flags & PTA_SSE
3263 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3264 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3265 if (processor_alias_table[i].flags & PTA_SSE2
3266 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3267 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3268 if (processor_alias_table[i].flags & PTA_SSE3
3269 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3270 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3271 if (processor_alias_table[i].flags & PTA_SSSE3
3272 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3273 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3274 if (processor_alias_table[i].flags & PTA_SSE4_1
3275 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3276 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3277 if (processor_alias_table[i].flags & PTA_SSE4_2
3278 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3279 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3280 if (processor_alias_table[i].flags & PTA_AVX
3281 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3282 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3283 if (processor_alias_table[i].flags & PTA_AVX2
3284 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
3285 ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
3286 if (processor_alias_table[i].flags & PTA_FMA
3287 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3288 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3289 if (processor_alias_table[i].flags & PTA_SSE4A
3290 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3291 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3292 if (processor_alias_table[i].flags & PTA_FMA4
3293 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3294 ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3295 if (processor_alias_table[i].flags & PTA_XOP
3296 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3297 ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3298 if (processor_alias_table[i].flags & PTA_LWP
3299 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3300 ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3301 if (processor_alias_table[i].flags & PTA_ABM
3302 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3303 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3304 if (processor_alias_table[i].flags & PTA_BMI
3305 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3306 ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3307 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
3308 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
3309 ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
3310 if (processor_alias_table[i].flags & PTA_TBM
3311 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3312 ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3313 if (processor_alias_table[i].flags & PTA_BMI2
3314 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
3315 ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
3316 if (processor_alias_table[i].flags & PTA_CX16
3317 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3318 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3319 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3320 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3321 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3322 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
3323 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3324 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3325 if (processor_alias_table[i].flags & PTA_MOVBE
3326 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3327 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3328 if (processor_alias_table[i].flags & PTA_AES
3329 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3330 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3331 if (processor_alias_table[i].flags & PTA_PCLMUL
3332 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3333 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3334 if (processor_alias_table[i].flags & PTA_FSGSBASE
3335 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3336 ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3337 if (processor_alias_table[i].flags & PTA_RDRND
3338 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3339 ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3340 if (processor_alias_table[i].flags & PTA_F16C
3341 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3342 ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3343 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3344 x86_prefetch_sse = true;
3345
3346 break;
3347 }
3348
3349 if (!strcmp (ix86_arch_string, "generic"))
3350 error ("generic CPU can be used only for %stune=%s %s",
3351 prefix, suffix, sw);
3352 else if (!strncmp (ix86_arch_string, "generic", 7) || i == pta_size)
3353 error ("bad value (%s) for %sarch=%s %s",
3354 ix86_arch_string, prefix, suffix, sw);
3355
3356 ix86_arch_mask = 1u << ix86_arch;
3357 for (i = 0; i < X86_ARCH_LAST; ++i)
3358 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3359
3360 for (i = 0; i < pta_size; i++)
3361 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
3362 {
3363 ix86_schedule = processor_alias_table[i].schedule;
3364 ix86_tune = processor_alias_table[i].processor;
3365 if (TARGET_64BIT)
3366 {
3367 if (!(processor_alias_table[i].flags & PTA_64BIT))
3368 {
3369 if (ix86_tune_defaulted)
3370 {
3371 ix86_tune_string = "x86-64";
3372 for (i = 0; i < pta_size; i++)
3373 if (! strcmp (ix86_tune_string,
3374 processor_alias_table[i].name))
3375 break;
3376 ix86_schedule = processor_alias_table[i].schedule;
3377 ix86_tune = processor_alias_table[i].processor;
3378 }
3379 else
3380 error ("CPU you selected does not support x86-64 "
3381 "instruction set");
3382 }
3383 }
3384 else
3385 {
3386 /* Adjust tuning when compiling for 32-bit ABI. */
3387 switch (ix86_tune)
3388 {
3389 case PROCESSOR_GENERIC64:
3390 ix86_tune = PROCESSOR_GENERIC32;
3391 ix86_schedule = CPU_PENTIUMPRO;
3392 break;
3393
3394 case PROCESSOR_CORE2_64:
3395 ix86_tune = PROCESSOR_CORE2_32;
3396 break;
3397
3398 case PROCESSOR_COREI7_64:
3399 ix86_tune = PROCESSOR_COREI7_32;
3400 break;
3401
3402 default:
3403 break;
3404 }
3405 }
3406 /* Intel CPUs have always interpreted SSE prefetch instructions as
3407 NOPs; so, we can enable SSE prefetch instructions even when
3408 -mtune (rather than -march) points us to a processor that has them.
3409 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3410 higher processors. */
3411 if (TARGET_CMOVE
3412 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3413 x86_prefetch_sse = true;
3414 break;
3415 }
3416
3417 if (ix86_tune_specified && i == pta_size)
3418 error ("bad value (%s) for %stune=%s %s",
3419 ix86_tune_string, prefix, suffix, sw);
3420
3421 ix86_tune_mask = 1u << ix86_tune;
3422 for (i = 0; i < X86_TUNE_LAST; ++i)
3423 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3424
3425 #ifndef USE_IX86_FRAME_POINTER
3426 #define USE_IX86_FRAME_POINTER 0
3427 #endif
3428
3429 #ifndef USE_X86_64_FRAME_POINTER
3430 #define USE_X86_64_FRAME_POINTER 0
3431 #endif
3432
3433 /* Set the default values for switches whose default depends on TARGET_64BIT
3434 in case they weren't overwritten by command line options. */
3435 if (TARGET_64BIT)
3436 {
3437 if (optimize > 1 && !global_options_set.x_flag_zee)
3438 flag_zee = 1;
3439 if (optimize >= 1 && !global_options_set.x_flag_omit_frame_pointer)
3440 flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3441 if (flag_asynchronous_unwind_tables == 2)
3442 flag_unwind_tables = flag_asynchronous_unwind_tables = 1;
3443 if (flag_pcc_struct_return == 2)
3444 flag_pcc_struct_return = 0;
3445 }
3446 else
3447 {
3448 if (optimize >= 1 && !global_options_set.x_flag_omit_frame_pointer)
3449 flag_omit_frame_pointer = !(USE_IX86_FRAME_POINTER || optimize_size);
3450 if (flag_asynchronous_unwind_tables == 2)
3451 flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3452 if (flag_pcc_struct_return == 2)
3453 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3454 }
3455
3456 if (optimize_size)
3457 ix86_cost = &ix86_size_cost;
3458 else
3459 ix86_cost = processor_target_table[ix86_tune].cost;
3460
3461 /* Arrange to set up i386_stack_locals for all functions. */
3462 init_machine_status = ix86_init_machine_status;
3463
3464 /* Validate -mregparm= value. */
3465 if (global_options_set.x_ix86_regparm)
3466 {
3467 if (TARGET_64BIT)
3468 warning (0, "-mregparm is ignored in 64-bit mode");
3469 if (ix86_regparm > REGPARM_MAX)
3470 {
3471 error ("-mregparm=%d is not between 0 and %d",
3472 ix86_regparm, REGPARM_MAX);
3473 ix86_regparm = 0;
3474 }
3475 }
3476 if (TARGET_64BIT)
3477 ix86_regparm = REGPARM_MAX;
3478
3479 /* Default align_* from the processor table. */
3480 if (align_loops == 0)
3481 {
3482 align_loops = processor_target_table[ix86_tune].align_loop;
3483 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3484 }
3485 if (align_jumps == 0)
3486 {
3487 align_jumps = processor_target_table[ix86_tune].align_jump;
3488 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3489 }
3490 if (align_functions == 0)
3491 {
3492 align_functions = processor_target_table[ix86_tune].align_func;
3493 }
3494
3495 /* Provide default for -mbranch-cost= value. */
3496 if (!global_options_set.x_ix86_branch_cost)
3497 ix86_branch_cost = ix86_cost->branch_cost;
3498
3499 if (TARGET_64BIT)
3500 {
3501 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3502
3503 /* Enable by default the SSE and MMX builtins. Do allow the user to
3504 explicitly disable any of these. In particular, disabling SSE and
3505 MMX for kernel code is extremely useful. */
3506 if (!ix86_arch_specified)
3507 ix86_isa_flags
3508 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3509 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3510
3511 if (TARGET_RTD)
3512 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3513 }
3514 else
3515 {
3516 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3517
3518 if (!ix86_arch_specified)
3519 ix86_isa_flags
3520 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3521
3522 /* i386 ABI does not specify red zone. It still makes sense to use it
3523 when programmer takes care to stack from being destroyed. */
3524 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3525 target_flags |= MASK_NO_RED_ZONE;
3526 }
3527
3528 /* Keep nonleaf frame pointers. */
3529 if (flag_omit_frame_pointer)
3530 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3531 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3532 flag_omit_frame_pointer = 1;
3533
3534 /* If we're doing fast math, we don't care about comparison order
3535 wrt NaNs. This lets us use a shorter comparison sequence. */
3536 if (flag_finite_math_only)
3537 target_flags &= ~MASK_IEEE_FP;
3538
3539 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3540 since the insns won't need emulation. */
3541 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3542 target_flags &= ~MASK_NO_FANCY_MATH_387;
3543
3544 /* Likewise, if the target doesn't have a 387, or we've specified
3545 software floating point, don't use 387 inline intrinsics. */
3546 if (!TARGET_80387)
3547 target_flags |= MASK_NO_FANCY_MATH_387;
3548
3549 /* Turn on MMX builtins for -msse. */
3550 if (TARGET_SSE)
3551 {
3552 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3553 x86_prefetch_sse = true;
3554 }
3555
3556 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3557 if (TARGET_SSE4_2 || TARGET_ABM)
3558 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3559
3560 /* Turn on lzcnt instruction for -mabm. */
3561 if (TARGET_ABM)
3562 ix86_isa_flags |= OPTION_MASK_ISA_LZCNT & ~ix86_isa_flags_explicit;
3563
3564 /* Validate -mpreferred-stack-boundary= value or default it to
3565 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3566 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3567 if (global_options_set.x_ix86_preferred_stack_boundary_arg)
3568 {
3569 int min = (TARGET_64BIT ? 4 : 2);
3570 int max = (TARGET_SEH ? 4 : 12);
3571
3572 if (ix86_preferred_stack_boundary_arg < min
3573 || ix86_preferred_stack_boundary_arg > max)
3574 {
3575 if (min == max)
3576 error ("-mpreferred-stack-boundary is not supported "
3577 "for this target");
3578 else
3579 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3580 ix86_preferred_stack_boundary_arg, min, max);
3581 }
3582 else
3583 ix86_preferred_stack_boundary
3584 = (1 << ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
3585 }
3586
3587 /* Set the default value for -mstackrealign. */
3588 if (ix86_force_align_arg_pointer == -1)
3589 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3590
3591 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3592
3593 /* Validate -mincoming-stack-boundary= value or default it to
3594 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3595 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3596 if (global_options_set.x_ix86_incoming_stack_boundary_arg)
3597 {
3598 if (ix86_incoming_stack_boundary_arg < (TARGET_64BIT ? 4 : 2)
3599 || ix86_incoming_stack_boundary_arg > 12)
3600 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3601 ix86_incoming_stack_boundary_arg, TARGET_64BIT ? 4 : 2);
3602 else
3603 {
3604 ix86_user_incoming_stack_boundary
3605 = (1 << ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
3606 ix86_incoming_stack_boundary
3607 = ix86_user_incoming_stack_boundary;
3608 }
3609 }
3610
3611 /* Accept -msseregparm only if at least SSE support is enabled. */
3612 if (TARGET_SSEREGPARM
3613 && ! TARGET_SSE)
3614 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3615
3616 if (global_options_set.x_ix86_fpmath)
3617 {
3618 if (ix86_fpmath & FPMATH_SSE)
3619 {
3620 if (!TARGET_SSE)
3621 {
3622 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3623 ix86_fpmath = FPMATH_387;
3624 }
3625 else if ((ix86_fpmath & FPMATH_387) && !TARGET_80387)
3626 {
3627 warning (0, "387 instruction set disabled, using SSE arithmetics");
3628 ix86_fpmath = FPMATH_SSE;
3629 }
3630 }
3631 }
3632 else
3633 ix86_fpmath = TARGET_FPMATH_DEFAULT;
3634
3635 /* If the i387 is disabled, then do not return values in it. */
3636 if (!TARGET_80387)
3637 target_flags &= ~MASK_FLOAT_RETURNS;
3638
3639 /* Use external vectorized library in vectorizing intrinsics. */
3640 if (global_options_set.x_ix86_veclibabi_type)
3641 switch (ix86_veclibabi_type)
3642 {
3643 case ix86_veclibabi_type_svml:
3644 ix86_veclib_handler = ix86_veclibabi_svml;
3645 break;
3646
3647 case ix86_veclibabi_type_acml:
3648 ix86_veclib_handler = ix86_veclibabi_acml;
3649 break;
3650
3651 default:
3652 gcc_unreachable ();
3653 }
3654
3655 if ((!USE_IX86_FRAME_POINTER
3656 || (x86_accumulate_outgoing_args & ix86_tune_mask))
3657 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3658 && !optimize_size)
3659 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3660
3661 /* ??? Unwind info is not correct around the CFG unless either a frame
3662 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3663 unwind info generation to be aware of the CFG and propagating states
3664 around edges. */
3665 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
3666 || flag_exceptions || flag_non_call_exceptions)
3667 && flag_omit_frame_pointer
3668 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3669 {
3670 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3671 warning (0, "unwind tables currently require either a frame pointer "
3672 "or %saccumulate-outgoing-args%s for correctness",
3673 prefix, suffix);
3674 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3675 }
3676
3677 /* If stack probes are required, the space used for large function
3678 arguments on the stack must also be probed, so enable
3679 -maccumulate-outgoing-args so this happens in the prologue. */
3680 if (TARGET_STACK_PROBE
3681 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3682 {
3683 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3684 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3685 "for correctness", prefix, suffix);
3686 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3687 }
3688
3689 /* For sane SSE instruction set generation we need fcomi instruction.
3690 It is safe to enable all CMOVE instructions. Also, RDRAND intrinsic
3691 expands to a sequence that includes conditional move. */
3692 if (TARGET_SSE || TARGET_RDRND)
3693 TARGET_CMOVE = 1;
3694
3695 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3696 {
3697 char *p;
3698 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
3699 p = strchr (internal_label_prefix, 'X');
3700 internal_label_prefix_len = p - internal_label_prefix;
3701 *p = '\0';
3702 }
3703
3704 /* When scheduling description is not available, disable scheduler pass
3705 so it won't slow down the compilation and make x87 code slower. */
3706 if (!TARGET_SCHEDULE)
3707 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
3708
3709 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3710 ix86_cost->simultaneous_prefetches,
3711 global_options.x_param_values,
3712 global_options_set.x_param_values);
3713 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, ix86_cost->prefetch_block,
3714 global_options.x_param_values,
3715 global_options_set.x_param_values);
3716 maybe_set_param_value (PARAM_L1_CACHE_SIZE, ix86_cost->l1_cache_size,
3717 global_options.x_param_values,
3718 global_options_set.x_param_values);
3719 maybe_set_param_value (PARAM_L2_CACHE_SIZE, ix86_cost->l2_cache_size,
3720 global_options.x_param_values,
3721 global_options_set.x_param_values);
3722
3723 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
3724 if (flag_prefetch_loop_arrays < 0
3725 && HAVE_prefetch
3726 && optimize >= 3
3727 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
3728 flag_prefetch_loop_arrays = 1;
3729
3730 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3731 can be optimized to ap = __builtin_next_arg (0). */
3732 if (!TARGET_64BIT && !flag_split_stack)
3733 targetm.expand_builtin_va_start = NULL;
3734
3735 if (TARGET_64BIT)
3736 {
3737 ix86_gen_leave = gen_leave_rex64;
3738 ix86_gen_add3 = gen_adddi3;
3739 ix86_gen_sub3 = gen_subdi3;
3740 ix86_gen_sub3_carry = gen_subdi3_carry;
3741 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
3742 ix86_gen_monitor = gen_sse3_monitor64;
3743 ix86_gen_andsp = gen_anddi3;
3744 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
3745 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
3746 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
3747 }
3748 else
3749 {
3750 ix86_gen_leave = gen_leave;
3751 ix86_gen_add3 = gen_addsi3;
3752 ix86_gen_sub3 = gen_subsi3;
3753 ix86_gen_sub3_carry = gen_subsi3_carry;
3754 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
3755 ix86_gen_monitor = gen_sse3_monitor;
3756 ix86_gen_andsp = gen_andsi3;
3757 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
3758 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
3759 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
3760 }
3761
3762 #ifdef USE_IX86_CLD
3763 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3764 if (!TARGET_64BIT)
3765 target_flags |= MASK_CLD & ~target_flags_explicit;
3766 #endif
3767
3768 if (!TARGET_64BIT && flag_pic)
3769 {
3770 if (flag_fentry > 0)
3771 sorry ("-mfentry isn%'t supported for 32-bit in combination "
3772 "with -fpic");
3773 flag_fentry = 0;
3774 }
3775 else if (TARGET_SEH)
3776 {
3777 if (flag_fentry == 0)
3778 sorry ("-mno-fentry isn%'t compatible with SEH");
3779 flag_fentry = 1;
3780 }
3781 else if (flag_fentry < 0)
3782 {
3783 #if defined(PROFILE_BEFORE_PROLOGUE)
3784 flag_fentry = 1;
3785 #else
3786 flag_fentry = 0;
3787 #endif
3788 }
3789
3790 if (TARGET_AVX)
3791 {
3792 /* When not optimize for size, enable vzeroupper optimization for
3793 TARGET_AVX with -fexpensive-optimizations and split 32-byte
3794 AVX unaligned load/store. */
3795 if (!optimize_size)
3796 {
3797 if (flag_expensive_optimizations
3798 && !(target_flags_explicit & MASK_VZEROUPPER))
3799 target_flags |= MASK_VZEROUPPER;
3800 if ((x86_avx256_split_unaligned_load & ix86_tune_mask)
3801 && !(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
3802 target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
3803 if ((x86_avx256_split_unaligned_store & ix86_tune_mask)
3804 && !(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_STORE))
3805 target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
3806 /* Enable 128-bit AVX instruction generation for the auto-vectorizer. */
3807 if (TARGET_AVX128_OPTIMAL && !(target_flags_explicit & MASK_PREFER_AVX128))
3808 target_flags |= MASK_PREFER_AVX128;
3809 }
3810 }
3811 else
3812 {
3813 /* Disable vzeroupper pass if TARGET_AVX is disabled. */
3814 target_flags &= ~MASK_VZEROUPPER;
3815 }
3816
3817 /* Save the initial options in case the user does function specific
3818 options. */
3819 if (main_args_p)
3820 target_option_default_node = target_option_current_node
3821 = build_target_option_node ();
3822 }
3823
3824 /* Return TRUE if VAL is passed in register with 256bit AVX modes. */
3825
3826 static bool
3827 function_pass_avx256_p (const_rtx val)
3828 {
3829 if (!val)
3830 return false;
3831
3832 if (REG_P (val) && VALID_AVX256_REG_MODE (GET_MODE (val)))
3833 return true;
3834
3835 if (GET_CODE (val) == PARALLEL)
3836 {
3837 int i;
3838 rtx r;
3839
3840 for (i = XVECLEN (val, 0) - 1; i >= 0; i--)
3841 {
3842 r = XVECEXP (val, 0, i);
3843 if (GET_CODE (r) == EXPR_LIST
3844 && XEXP (r, 0)
3845 && REG_P (XEXP (r, 0))
3846 && (GET_MODE (XEXP (r, 0)) == OImode
3847 || VALID_AVX256_REG_MODE (GET_MODE (XEXP (r, 0)))))
3848 return true;
3849 }
3850 }
3851
3852 return false;
3853 }
3854
3855 /* Implement the TARGET_OPTION_OVERRIDE hook. */
3856
3857 static void
3858 ix86_option_override (void)
3859 {
3860 ix86_option_override_internal (true);
3861 }
3862
3863 /* Update register usage after having seen the compiler flags. */
3864
3865 static void
3866 ix86_conditional_register_usage (void)
3867 {
3868 int i;
3869 unsigned int j;
3870
3871 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3872 {
3873 if (fixed_regs[i] > 1)
3874 fixed_regs[i] = (fixed_regs[i] == (TARGET_64BIT ? 3 : 2));
3875 if (call_used_regs[i] > 1)
3876 call_used_regs[i] = (call_used_regs[i] == (TARGET_64BIT ? 3 : 2));
3877 }
3878
3879 /* The PIC register, if it exists, is fixed. */
3880 j = PIC_OFFSET_TABLE_REGNUM;
3881 if (j != INVALID_REGNUM)
3882 fixed_regs[j] = call_used_regs[j] = 1;
3883
3884 /* The 64-bit MS_ABI changes the set of call-used registers. */
3885 if (TARGET_64BIT_MS_ABI)
3886 {
3887 call_used_regs[SI_REG] = 0;
3888 call_used_regs[DI_REG] = 0;
3889 call_used_regs[XMM6_REG] = 0;
3890 call_used_regs[XMM7_REG] = 0;
3891 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3892 call_used_regs[i] = 0;
3893 }
3894
3895 /* The default setting of CLOBBERED_REGS is for 32-bit; add in the
3896 other call-clobbered regs for 64-bit. */
3897 if (TARGET_64BIT)
3898 {
3899 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
3900
3901 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3902 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
3903 && call_used_regs[i])
3904 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
3905 }
3906
3907 /* If MMX is disabled, squash the registers. */
3908 if (! TARGET_MMX)
3909 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3910 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
3911 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3912
3913 /* If SSE is disabled, squash the registers. */
3914 if (! TARGET_SSE)
3915 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3916 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
3917 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3918
3919 /* If the FPU is disabled, squash the registers. */
3920 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
3921 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3922 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
3923 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3924
3925 /* If 32-bit, squash the 64-bit registers. */
3926 if (! TARGET_64BIT)
3927 {
3928 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
3929 reg_names[i] = "";
3930 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3931 reg_names[i] = "";
3932 }
3933 }
3934
3935 \f
3936 /* Save the current options */
3937
3938 static void
3939 ix86_function_specific_save (struct cl_target_option *ptr)
3940 {
3941 ptr->arch = ix86_arch;
3942 ptr->schedule = ix86_schedule;
3943 ptr->tune = ix86_tune;
3944 ptr->branch_cost = ix86_branch_cost;
3945 ptr->tune_defaulted = ix86_tune_defaulted;
3946 ptr->arch_specified = ix86_arch_specified;
3947 ptr->x_ix86_isa_flags_explicit = ix86_isa_flags_explicit;
3948 ptr->ix86_target_flags_explicit = target_flags_explicit;
3949
3950 /* The fields are char but the variables are not; make sure the
3951 values fit in the fields. */
3952 gcc_assert (ptr->arch == ix86_arch);
3953 gcc_assert (ptr->schedule == ix86_schedule);
3954 gcc_assert (ptr->tune == ix86_tune);
3955 gcc_assert (ptr->branch_cost == ix86_branch_cost);
3956 }
3957
3958 /* Restore the current options */
3959
3960 static void
3961 ix86_function_specific_restore (struct cl_target_option *ptr)
3962 {
3963 enum processor_type old_tune = ix86_tune;
3964 enum processor_type old_arch = ix86_arch;
3965 unsigned int ix86_arch_mask, ix86_tune_mask;
3966 int i;
3967
3968 ix86_arch = (enum processor_type) ptr->arch;
3969 ix86_schedule = (enum attr_cpu) ptr->schedule;
3970 ix86_tune = (enum processor_type) ptr->tune;
3971 ix86_branch_cost = ptr->branch_cost;
3972 ix86_tune_defaulted = ptr->tune_defaulted;
3973 ix86_arch_specified = ptr->arch_specified;
3974 ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
3975 target_flags_explicit = ptr->ix86_target_flags_explicit;
3976
3977 /* Recreate the arch feature tests if the arch changed */
3978 if (old_arch != ix86_arch)
3979 {
3980 ix86_arch_mask = 1u << ix86_arch;
3981 for (i = 0; i < X86_ARCH_LAST; ++i)
3982 ix86_arch_features[i]
3983 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3984 }
3985
3986 /* Recreate the tune optimization tests */
3987 if (old_tune != ix86_tune)
3988 {
3989 ix86_tune_mask = 1u << ix86_tune;
3990 for (i = 0; i < X86_TUNE_LAST; ++i)
3991 ix86_tune_features[i]
3992 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3993 }
3994 }
3995
3996 /* Print the current options */
3997
3998 static void
3999 ix86_function_specific_print (FILE *file, int indent,
4000 struct cl_target_option *ptr)
4001 {
4002 char *target_string
4003 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
4004 NULL, NULL, ptr->x_ix86_fpmath, false);
4005
4006 fprintf (file, "%*sarch = %d (%s)\n",
4007 indent, "",
4008 ptr->arch,
4009 ((ptr->arch < TARGET_CPU_DEFAULT_max)
4010 ? cpu_names[ptr->arch]
4011 : "<unknown>"));
4012
4013 fprintf (file, "%*stune = %d (%s)\n",
4014 indent, "",
4015 ptr->tune,
4016 ((ptr->tune < TARGET_CPU_DEFAULT_max)
4017 ? cpu_names[ptr->tune]
4018 : "<unknown>"));
4019
4020 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4021
4022 if (target_string)
4023 {
4024 fprintf (file, "%*s%s\n", indent, "", target_string);
4025 free (target_string);
4026 }
4027 }
4028
4029 \f
4030 /* Inner function to process the attribute((target(...))), take an argument and
4031 set the current options from the argument. If we have a list, recursively go
4032 over the list. */
4033
4034 static bool
4035 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
4036 struct gcc_options *enum_opts_set)
4037 {
4038 char *next_optstr;
4039 bool ret = true;
4040
4041 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4042 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4043 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4044 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4045 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4046
4047 enum ix86_opt_type
4048 {
4049 ix86_opt_unknown,
4050 ix86_opt_yes,
4051 ix86_opt_no,
4052 ix86_opt_str,
4053 ix86_opt_enum,
4054 ix86_opt_isa
4055 };
4056
4057 static const struct
4058 {
4059 const char *string;
4060 size_t len;
4061 enum ix86_opt_type type;
4062 int opt;
4063 int mask;
4064 } attrs[] = {
4065 /* isa options */
4066 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4067 IX86_ATTR_ISA ("abm", OPT_mabm),
4068 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4069 IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
4070 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
4071 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4072 IX86_ATTR_ISA ("aes", OPT_maes),
4073 IX86_ATTR_ISA ("avx", OPT_mavx),
4074 IX86_ATTR_ISA ("avx2", OPT_mavx2),
4075 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4076 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4077 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4078 IX86_ATTR_ISA ("sse", OPT_msse),
4079 IX86_ATTR_ISA ("sse2", OPT_msse2),
4080 IX86_ATTR_ISA ("sse3", OPT_msse3),
4081 IX86_ATTR_ISA ("sse4", OPT_msse4),
4082 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4083 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4084 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4085 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4086 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4087 IX86_ATTR_ISA ("fma", OPT_mfma),
4088 IX86_ATTR_ISA ("xop", OPT_mxop),
4089 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4090 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4091 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4092 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4093
4094 /* enum options */
4095 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
4096
4097 /* string options */
4098 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4099 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4100
4101 /* flag options */
4102 IX86_ATTR_YES ("cld",
4103 OPT_mcld,
4104 MASK_CLD),
4105
4106 IX86_ATTR_NO ("fancy-math-387",
4107 OPT_mfancy_math_387,
4108 MASK_NO_FANCY_MATH_387),
4109
4110 IX86_ATTR_YES ("ieee-fp",
4111 OPT_mieee_fp,
4112 MASK_IEEE_FP),
4113
4114 IX86_ATTR_YES ("inline-all-stringops",
4115 OPT_minline_all_stringops,
4116 MASK_INLINE_ALL_STRINGOPS),
4117
4118 IX86_ATTR_YES ("inline-stringops-dynamically",
4119 OPT_minline_stringops_dynamically,
4120 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4121
4122 IX86_ATTR_NO ("align-stringops",
4123 OPT_mno_align_stringops,
4124 MASK_NO_ALIGN_STRINGOPS),
4125
4126 IX86_ATTR_YES ("recip",
4127 OPT_mrecip,
4128 MASK_RECIP),
4129
4130 };
4131
4132 /* If this is a list, recurse to get the options. */
4133 if (TREE_CODE (args) == TREE_LIST)
4134 {
4135 bool ret = true;
4136
4137 for (; args; args = TREE_CHAIN (args))
4138 if (TREE_VALUE (args)
4139 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
4140 p_strings, enum_opts_set))
4141 ret = false;
4142
4143 return ret;
4144 }
4145
4146 else if (TREE_CODE (args) != STRING_CST)
4147 gcc_unreachable ();
4148
4149 /* Handle multiple arguments separated by commas. */
4150 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4151
4152 while (next_optstr && *next_optstr != '\0')
4153 {
4154 char *p = next_optstr;
4155 char *orig_p = p;
4156 char *comma = strchr (next_optstr, ',');
4157 const char *opt_string;
4158 size_t len, opt_len;
4159 int opt;
4160 bool opt_set_p;
4161 char ch;
4162 unsigned i;
4163 enum ix86_opt_type type = ix86_opt_unknown;
4164 int mask = 0;
4165
4166 if (comma)
4167 {
4168 *comma = '\0';
4169 len = comma - next_optstr;
4170 next_optstr = comma + 1;
4171 }
4172 else
4173 {
4174 len = strlen (p);
4175 next_optstr = NULL;
4176 }
4177
4178 /* Recognize no-xxx. */
4179 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4180 {
4181 opt_set_p = false;
4182 p += 3;
4183 len -= 3;
4184 }
4185 else
4186 opt_set_p = true;
4187
4188 /* Find the option. */
4189 ch = *p;
4190 opt = N_OPTS;
4191 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4192 {
4193 type = attrs[i].type;
4194 opt_len = attrs[i].len;
4195 if (ch == attrs[i].string[0]
4196 && ((type != ix86_opt_str && type != ix86_opt_enum)
4197 ? len == opt_len
4198 : len > opt_len)
4199 && memcmp (p, attrs[i].string, opt_len) == 0)
4200 {
4201 opt = attrs[i].opt;
4202 mask = attrs[i].mask;
4203 opt_string = attrs[i].string;
4204 break;
4205 }
4206 }
4207
4208 /* Process the option. */
4209 if (opt == N_OPTS)
4210 {
4211 error ("attribute(target(\"%s\")) is unknown", orig_p);
4212 ret = false;
4213 }
4214
4215 else if (type == ix86_opt_isa)
4216 {
4217 struct cl_decoded_option decoded;
4218
4219 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
4220 ix86_handle_option (&global_options, &global_options_set,
4221 &decoded, input_location);
4222 }
4223
4224 else if (type == ix86_opt_yes || type == ix86_opt_no)
4225 {
4226 if (type == ix86_opt_no)
4227 opt_set_p = !opt_set_p;
4228
4229 if (opt_set_p)
4230 target_flags |= mask;
4231 else
4232 target_flags &= ~mask;
4233 }
4234
4235 else if (type == ix86_opt_str)
4236 {
4237 if (p_strings[opt])
4238 {
4239 error ("option(\"%s\") was already specified", opt_string);
4240 ret = false;
4241 }
4242 else
4243 p_strings[opt] = xstrdup (p + opt_len);
4244 }
4245
4246 else if (type == ix86_opt_enum)
4247 {
4248 bool arg_ok;
4249 int value;
4250
4251 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
4252 if (arg_ok)
4253 set_option (&global_options, enum_opts_set, opt, value,
4254 p + opt_len, DK_UNSPECIFIED, input_location,
4255 global_dc);
4256 else
4257 {
4258 error ("attribute(target(\"%s\")) is unknown", orig_p);
4259 ret = false;
4260 }
4261 }
4262
4263 else
4264 gcc_unreachable ();
4265 }
4266
4267 return ret;
4268 }
4269
4270 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4271
4272 tree
4273 ix86_valid_target_attribute_tree (tree args)
4274 {
4275 const char *orig_arch_string = ix86_arch_string;
4276 const char *orig_tune_string = ix86_tune_string;
4277 enum fpmath_unit orig_fpmath_set = global_options_set.x_ix86_fpmath;
4278 int orig_tune_defaulted = ix86_tune_defaulted;
4279 int orig_arch_specified = ix86_arch_specified;
4280 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
4281 tree t = NULL_TREE;
4282 int i;
4283 struct cl_target_option *def
4284 = TREE_TARGET_OPTION (target_option_default_node);
4285 struct gcc_options enum_opts_set;
4286
4287 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
4288
4289 /* Process each of the options on the chain. */
4290 if (! ix86_valid_target_attribute_inner_p (args, option_strings,
4291 &enum_opts_set))
4292 return NULL_TREE;
4293
4294 /* If the changed options are different from the default, rerun
4295 ix86_option_override_internal, and then save the options away.
4296 The string options are are attribute options, and will be undone
4297 when we copy the save structure. */
4298 if (ix86_isa_flags != def->x_ix86_isa_flags
4299 || target_flags != def->x_target_flags
4300 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4301 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4302 || enum_opts_set.x_ix86_fpmath)
4303 {
4304 /* If we are using the default tune= or arch=, undo the string assigned,
4305 and use the default. */
4306 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4307 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4308 else if (!orig_arch_specified)
4309 ix86_arch_string = NULL;
4310
4311 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4312 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4313 else if (orig_tune_defaulted)
4314 ix86_tune_string = NULL;
4315
4316 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4317 if (enum_opts_set.x_ix86_fpmath)
4318 global_options_set.x_ix86_fpmath = (enum fpmath_unit) 1;
4319 else if (!TARGET_64BIT && TARGET_SSE)
4320 {
4321 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
4322 global_options_set.x_ix86_fpmath = (enum fpmath_unit) 1;
4323 }
4324
4325 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4326 ix86_option_override_internal (false);
4327
4328 /* Add any builtin functions with the new isa if any. */
4329 ix86_add_new_builtins (ix86_isa_flags);
4330
4331 /* Save the current options unless we are validating options for
4332 #pragma. */
4333 t = build_target_option_node ();
4334
4335 ix86_arch_string = orig_arch_string;
4336 ix86_tune_string = orig_tune_string;
4337 global_options_set.x_ix86_fpmath = orig_fpmath_set;
4338
4339 /* Free up memory allocated to hold the strings */
4340 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4341 free (option_strings[i]);
4342 }
4343
4344 return t;
4345 }
4346
4347 /* Hook to validate attribute((target("string"))). */
4348
4349 static bool
4350 ix86_valid_target_attribute_p (tree fndecl,
4351 tree ARG_UNUSED (name),
4352 tree args,
4353 int ARG_UNUSED (flags))
4354 {
4355 struct cl_target_option cur_target;
4356 bool ret = true;
4357 tree old_optimize = build_optimization_node ();
4358 tree new_target, new_optimize;
4359 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4360
4361 /* If the function changed the optimization levels as well as setting target
4362 options, start with the optimizations specified. */
4363 if (func_optimize && func_optimize != old_optimize)
4364 cl_optimization_restore (&global_options,
4365 TREE_OPTIMIZATION (func_optimize));
4366
4367 /* The target attributes may also change some optimization flags, so update
4368 the optimization options if necessary. */
4369 cl_target_option_save (&cur_target, &global_options);
4370 new_target = ix86_valid_target_attribute_tree (args);
4371 new_optimize = build_optimization_node ();
4372
4373 if (!new_target)
4374 ret = false;
4375
4376 else if (fndecl)
4377 {
4378 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4379
4380 if (old_optimize != new_optimize)
4381 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4382 }
4383
4384 cl_target_option_restore (&global_options, &cur_target);
4385
4386 if (old_optimize != new_optimize)
4387 cl_optimization_restore (&global_options,
4388 TREE_OPTIMIZATION (old_optimize));
4389
4390 return ret;
4391 }
4392
4393 \f
4394 /* Hook to determine if one function can safely inline another. */
4395
4396 static bool
4397 ix86_can_inline_p (tree caller, tree callee)
4398 {
4399 bool ret = false;
4400 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4401 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4402
4403 /* If callee has no option attributes, then it is ok to inline. */
4404 if (!callee_tree)
4405 ret = true;
4406
4407 /* If caller has no option attributes, but callee does then it is not ok to
4408 inline. */
4409 else if (!caller_tree)
4410 ret = false;
4411
4412 else
4413 {
4414 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4415 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4416
4417 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4418 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4419 function. */
4420 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
4421 != callee_opts->x_ix86_isa_flags)
4422 ret = false;
4423
4424 /* See if we have the same non-isa options. */
4425 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
4426 ret = false;
4427
4428 /* See if arch, tune, etc. are the same. */
4429 else if (caller_opts->arch != callee_opts->arch)
4430 ret = false;
4431
4432 else if (caller_opts->tune != callee_opts->tune)
4433 ret = false;
4434
4435 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
4436 ret = false;
4437
4438 else if (caller_opts->branch_cost != callee_opts->branch_cost)
4439 ret = false;
4440
4441 else
4442 ret = true;
4443 }
4444
4445 return ret;
4446 }
4447
4448 \f
4449 /* Remember the last target of ix86_set_current_function. */
4450 static GTY(()) tree ix86_previous_fndecl;
4451
4452 /* Establish appropriate back-end context for processing the function
4453 FNDECL. The argument might be NULL to indicate processing at top
4454 level, outside of any function scope. */
4455 static void
4456 ix86_set_current_function (tree fndecl)
4457 {
4458 /* Only change the context if the function changes. This hook is called
4459 several times in the course of compiling a function, and we don't want to
4460 slow things down too much or call target_reinit when it isn't safe. */
4461 if (fndecl && fndecl != ix86_previous_fndecl)
4462 {
4463 tree old_tree = (ix86_previous_fndecl
4464 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
4465 : NULL_TREE);
4466
4467 tree new_tree = (fndecl
4468 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
4469 : NULL_TREE);
4470
4471 ix86_previous_fndecl = fndecl;
4472 if (old_tree == new_tree)
4473 ;
4474
4475 else if (new_tree)
4476 {
4477 cl_target_option_restore (&global_options,
4478 TREE_TARGET_OPTION (new_tree));
4479 target_reinit ();
4480 }
4481
4482 else if (old_tree)
4483 {
4484 struct cl_target_option *def
4485 = TREE_TARGET_OPTION (target_option_current_node);
4486
4487 cl_target_option_restore (&global_options, def);
4488 target_reinit ();
4489 }
4490 }
4491 }
4492
4493 \f
4494 /* Return true if this goes in large data/bss. */
4495
4496 static bool
4497 ix86_in_large_data_p (tree exp)
4498 {
4499 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
4500 return false;
4501
4502 /* Functions are never large data. */
4503 if (TREE_CODE (exp) == FUNCTION_DECL)
4504 return false;
4505
4506 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
4507 {
4508 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
4509 if (strcmp (section, ".ldata") == 0
4510 || strcmp (section, ".lbss") == 0)
4511 return true;
4512 return false;
4513 }
4514 else
4515 {
4516 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
4517
4518 /* If this is an incomplete type with size 0, then we can't put it
4519 in data because it might be too big when completed. */
4520 if (!size || size > ix86_section_threshold)
4521 return true;
4522 }
4523
4524 return false;
4525 }
4526
4527 /* Switch to the appropriate section for output of DECL.
4528 DECL is either a `VAR_DECL' node or a constant of some sort.
4529 RELOC indicates whether forming the initial value of DECL requires
4530 link-time relocations. */
4531
4532 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
4533 ATTRIBUTE_UNUSED;
4534
4535 static section *
4536 x86_64_elf_select_section (tree decl, int reloc,
4537 unsigned HOST_WIDE_INT align)
4538 {
4539 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4540 && ix86_in_large_data_p (decl))
4541 {
4542 const char *sname = NULL;
4543 unsigned int flags = SECTION_WRITE;
4544 switch (categorize_decl_for_section (decl, reloc))
4545 {
4546 case SECCAT_DATA:
4547 sname = ".ldata";
4548 break;
4549 case SECCAT_DATA_REL:
4550 sname = ".ldata.rel";
4551 break;
4552 case SECCAT_DATA_REL_LOCAL:
4553 sname = ".ldata.rel.local";
4554 break;
4555 case SECCAT_DATA_REL_RO:
4556 sname = ".ldata.rel.ro";
4557 break;
4558 case SECCAT_DATA_REL_RO_LOCAL:
4559 sname = ".ldata.rel.ro.local";
4560 break;
4561 case SECCAT_BSS:
4562 sname = ".lbss";
4563 flags |= SECTION_BSS;
4564 break;
4565 case SECCAT_RODATA:
4566 case SECCAT_RODATA_MERGE_STR:
4567 case SECCAT_RODATA_MERGE_STR_INIT:
4568 case SECCAT_RODATA_MERGE_CONST:
4569 sname = ".lrodata";
4570 flags = 0;
4571 break;
4572 case SECCAT_SRODATA:
4573 case SECCAT_SDATA:
4574 case SECCAT_SBSS:
4575 gcc_unreachable ();
4576 case SECCAT_TEXT:
4577 case SECCAT_TDATA:
4578 case SECCAT_TBSS:
4579 /* We don't split these for medium model. Place them into
4580 default sections and hope for best. */
4581 break;
4582 }
4583 if (sname)
4584 {
4585 /* We might get called with string constants, but get_named_section
4586 doesn't like them as they are not DECLs. Also, we need to set
4587 flags in that case. */
4588 if (!DECL_P (decl))
4589 return get_section (sname, flags, NULL);
4590 return get_named_section (decl, sname, reloc);
4591 }
4592 }
4593 return default_elf_select_section (decl, reloc, align);
4594 }
4595
4596 /* Build up a unique section name, expressed as a
4597 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4598 RELOC indicates whether the initial value of EXP requires
4599 link-time relocations. */
4600
4601 static void ATTRIBUTE_UNUSED
4602 x86_64_elf_unique_section (tree decl, int reloc)
4603 {
4604 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4605 && ix86_in_large_data_p (decl))
4606 {
4607 const char *prefix = NULL;
4608 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4609 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
4610
4611 switch (categorize_decl_for_section (decl, reloc))
4612 {
4613 case SECCAT_DATA:
4614 case SECCAT_DATA_REL:
4615 case SECCAT_DATA_REL_LOCAL:
4616 case SECCAT_DATA_REL_RO:
4617 case SECCAT_DATA_REL_RO_LOCAL:
4618 prefix = one_only ? ".ld" : ".ldata";
4619 break;
4620 case SECCAT_BSS:
4621 prefix = one_only ? ".lb" : ".lbss";
4622 break;
4623 case SECCAT_RODATA:
4624 case SECCAT_RODATA_MERGE_STR:
4625 case SECCAT_RODATA_MERGE_STR_INIT:
4626 case SECCAT_RODATA_MERGE_CONST:
4627 prefix = one_only ? ".lr" : ".lrodata";
4628 break;
4629 case SECCAT_SRODATA:
4630 case SECCAT_SDATA:
4631 case SECCAT_SBSS:
4632 gcc_unreachable ();
4633 case SECCAT_TEXT:
4634 case SECCAT_TDATA:
4635 case SECCAT_TBSS:
4636 /* We don't split these for medium model. Place them into
4637 default sections and hope for best. */
4638 break;
4639 }
4640 if (prefix)
4641 {
4642 const char *name, *linkonce;
4643 char *string;
4644
4645 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
4646 name = targetm.strip_name_encoding (name);
4647
4648 /* If we're using one_only, then there needs to be a .gnu.linkonce
4649 prefix to the section name. */
4650 linkonce = one_only ? ".gnu.linkonce" : "";
4651
4652 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
4653
4654 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
4655 return;
4656 }
4657 }
4658 default_unique_section (decl, reloc);
4659 }
4660
4661 #ifdef COMMON_ASM_OP
4662 /* This says how to output assembler code to declare an
4663 uninitialized external linkage data object.
4664
4665 For medium model x86-64 we need to use .largecomm opcode for
4666 large objects. */
4667 void
4668 x86_elf_aligned_common (FILE *file,
4669 const char *name, unsigned HOST_WIDE_INT size,
4670 int align)
4671 {
4672 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4673 && size > (unsigned int)ix86_section_threshold)
4674 fputs (".largecomm\t", file);
4675 else
4676 fputs (COMMON_ASM_OP, file);
4677 assemble_name (file, name);
4678 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
4679 size, align / BITS_PER_UNIT);
4680 }
4681 #endif
4682
4683 /* Utility function for targets to use in implementing
4684 ASM_OUTPUT_ALIGNED_BSS. */
4685
4686 void
4687 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
4688 const char *name, unsigned HOST_WIDE_INT size,
4689 int align)
4690 {
4691 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4692 && size > (unsigned int)ix86_section_threshold)
4693 switch_to_section (get_named_section (decl, ".lbss", 0));
4694 else
4695 switch_to_section (bss_section);
4696 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
4697 #ifdef ASM_DECLARE_OBJECT_NAME
4698 last_assemble_variable_decl = decl;
4699 ASM_DECLARE_OBJECT_NAME (file, name, decl);
4700 #else
4701 /* Standard thing is just output label for the object. */
4702 ASM_OUTPUT_LABEL (file, name);
4703 #endif /* ASM_DECLARE_OBJECT_NAME */
4704 ASM_OUTPUT_SKIP (file, size ? size : 1);
4705 }
4706 \f
4707 /* Decide whether we must probe the stack before any space allocation
4708 on this target. It's essentially TARGET_STACK_PROBE except when
4709 -fstack-check causes the stack to be already probed differently. */
4710
4711 bool
4712 ix86_target_stack_probe (void)
4713 {
4714 /* Do not probe the stack twice if static stack checking is enabled. */
4715 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
4716 return false;
4717
4718 return TARGET_STACK_PROBE;
4719 }
4720 \f
4721 /* Decide whether we can make a sibling call to a function. DECL is the
4722 declaration of the function being targeted by the call and EXP is the
4723 CALL_EXPR representing the call. */
4724
4725 static bool
4726 ix86_function_ok_for_sibcall (tree decl, tree exp)
4727 {
4728 tree type, decl_or_type;
4729 rtx a, b;
4730
4731 /* If we are generating position-independent code, we cannot sibcall
4732 optimize any indirect call, or a direct call to a global function,
4733 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
4734 if (!TARGET_MACHO
4735 && !TARGET_64BIT
4736 && flag_pic
4737 && (!decl || !targetm.binds_local_p (decl)))
4738 return false;
4739
4740 /* If we need to align the outgoing stack, then sibcalling would
4741 unalign the stack, which may break the called function. */
4742 if (ix86_minimum_incoming_stack_boundary (true)
4743 < PREFERRED_STACK_BOUNDARY)
4744 return false;
4745
4746 if (decl)
4747 {
4748 decl_or_type = decl;
4749 type = TREE_TYPE (decl);
4750 }
4751 else
4752 {
4753 /* We're looking at the CALL_EXPR, we need the type of the function. */
4754 type = CALL_EXPR_FN (exp); /* pointer expression */
4755 type = TREE_TYPE (type); /* pointer type */
4756 type = TREE_TYPE (type); /* function type */
4757 decl_or_type = type;
4758 }
4759
4760 /* Check that the return value locations are the same. Like
4761 if we are returning floats on the 80387 register stack, we cannot
4762 make a sibcall from a function that doesn't return a float to a
4763 function that does or, conversely, from a function that does return
4764 a float to a function that doesn't; the necessary stack adjustment
4765 would not be executed. This is also the place we notice
4766 differences in the return value ABI. Note that it is ok for one
4767 of the functions to have void return type as long as the return
4768 value of the other is passed in a register. */
4769 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
4770 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4771 cfun->decl, false);
4772 if (STACK_REG_P (a) || STACK_REG_P (b))
4773 {
4774 if (!rtx_equal_p (a, b))
4775 return false;
4776 }
4777 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4778 {
4779 /* Disable sibcall if we need to generate vzeroupper after
4780 callee returns. */
4781 if (TARGET_VZEROUPPER
4782 && cfun->machine->callee_return_avx256_p
4783 && !cfun->machine->caller_return_avx256_p)
4784 return false;
4785 }
4786 else if (!rtx_equal_p (a, b))
4787 return false;
4788
4789 if (TARGET_64BIT)
4790 {
4791 /* The SYSV ABI has more call-clobbered registers;
4792 disallow sibcalls from MS to SYSV. */
4793 if (cfun->machine->call_abi == MS_ABI
4794 && ix86_function_type_abi (type) == SYSV_ABI)
4795 return false;
4796 }
4797 else
4798 {
4799 /* If this call is indirect, we'll need to be able to use a
4800 call-clobbered register for the address of the target function.
4801 Make sure that all such registers are not used for passing
4802 parameters. Note that DLLIMPORT functions are indirect. */
4803 if (!decl
4804 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
4805 {
4806 if (ix86_function_regparm (type, NULL) >= 3)
4807 {
4808 /* ??? Need to count the actual number of registers to be used,
4809 not the possible number of registers. Fix later. */
4810 return false;
4811 }
4812 }
4813 }
4814
4815 /* Otherwise okay. That also includes certain types of indirect calls. */
4816 return true;
4817 }
4818
4819 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
4820 and "sseregparm" calling convention attributes;
4821 arguments as in struct attribute_spec.handler. */
4822
4823 static tree
4824 ix86_handle_cconv_attribute (tree *node, tree name,
4825 tree args,
4826 int flags ATTRIBUTE_UNUSED,
4827 bool *no_add_attrs)
4828 {
4829 if (TREE_CODE (*node) != FUNCTION_TYPE
4830 && TREE_CODE (*node) != METHOD_TYPE
4831 && TREE_CODE (*node) != FIELD_DECL
4832 && TREE_CODE (*node) != TYPE_DECL)
4833 {
4834 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4835 name);
4836 *no_add_attrs = true;
4837 return NULL_TREE;
4838 }
4839
4840 /* Can combine regparm with all attributes but fastcall, and thiscall. */
4841 if (is_attribute_p ("regparm", name))
4842 {
4843 tree cst;
4844
4845 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4846 {
4847 error ("fastcall and regparm attributes are not compatible");
4848 }
4849
4850 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4851 {
4852 error ("regparam and thiscall attributes are not compatible");
4853 }
4854
4855 cst = TREE_VALUE (args);
4856 if (TREE_CODE (cst) != INTEGER_CST)
4857 {
4858 warning (OPT_Wattributes,
4859 "%qE attribute requires an integer constant argument",
4860 name);
4861 *no_add_attrs = true;
4862 }
4863 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
4864 {
4865 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
4866 name, REGPARM_MAX);
4867 *no_add_attrs = true;
4868 }
4869
4870 return NULL_TREE;
4871 }
4872
4873 if (TARGET_64BIT)
4874 {
4875 /* Do not warn when emulating the MS ABI. */
4876 if ((TREE_CODE (*node) != FUNCTION_TYPE
4877 && TREE_CODE (*node) != METHOD_TYPE)
4878 || ix86_function_type_abi (*node) != MS_ABI)
4879 warning (OPT_Wattributes, "%qE attribute ignored",
4880 name);
4881 *no_add_attrs = true;
4882 return NULL_TREE;
4883 }
4884
4885 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
4886 if (is_attribute_p ("fastcall", name))
4887 {
4888 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4889 {
4890 error ("fastcall and cdecl attributes are not compatible");
4891 }
4892 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4893 {
4894 error ("fastcall and stdcall attributes are not compatible");
4895 }
4896 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
4897 {
4898 error ("fastcall and regparm attributes are not compatible");
4899 }
4900 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4901 {
4902 error ("fastcall and thiscall attributes are not compatible");
4903 }
4904 }
4905
4906 /* Can combine stdcall with fastcall (redundant), regparm and
4907 sseregparm. */
4908 else if (is_attribute_p ("stdcall", name))
4909 {
4910 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4911 {
4912 error ("stdcall and cdecl attributes are not compatible");
4913 }
4914 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4915 {
4916 error ("stdcall and fastcall attributes are not compatible");
4917 }
4918 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4919 {
4920 error ("stdcall and thiscall attributes are not compatible");
4921 }
4922 }
4923
4924 /* Can combine cdecl with regparm and sseregparm. */
4925 else if (is_attribute_p ("cdecl", name))
4926 {
4927 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4928 {
4929 error ("stdcall and cdecl attributes are not compatible");
4930 }
4931 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4932 {
4933 error ("fastcall and cdecl attributes are not compatible");
4934 }
4935 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4936 {
4937 error ("cdecl and thiscall attributes are not compatible");
4938 }
4939 }
4940 else if (is_attribute_p ("thiscall", name))
4941 {
4942 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
4943 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
4944 name);
4945 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4946 {
4947 error ("stdcall and thiscall attributes are not compatible");
4948 }
4949 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4950 {
4951 error ("fastcall and thiscall attributes are not compatible");
4952 }
4953 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4954 {
4955 error ("cdecl and thiscall attributes are not compatible");
4956 }
4957 }
4958
4959 /* Can combine sseregparm with all attributes. */
4960
4961 return NULL_TREE;
4962 }
4963
4964 /* This function determines from TYPE the calling-convention. */
4965
4966 unsigned int
4967 ix86_get_callcvt (const_tree type)
4968 {
4969 unsigned int ret = 0;
4970 bool is_stdarg;
4971 tree attrs;
4972
4973 if (TARGET_64BIT)
4974 return IX86_CALLCVT_CDECL;
4975
4976 attrs = TYPE_ATTRIBUTES (type);
4977 if (attrs != NULL_TREE)
4978 {
4979 if (lookup_attribute ("cdecl", attrs))
4980 ret |= IX86_CALLCVT_CDECL;
4981 else if (lookup_attribute ("stdcall", attrs))
4982 ret |= IX86_CALLCVT_STDCALL;
4983 else if (lookup_attribute ("fastcall", attrs))
4984 ret |= IX86_CALLCVT_FASTCALL;
4985 else if (lookup_attribute ("thiscall", attrs))
4986 ret |= IX86_CALLCVT_THISCALL;
4987
4988 /* Regparam isn't allowed for thiscall and fastcall. */
4989 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
4990 {
4991 if (lookup_attribute ("regparm", attrs))
4992 ret |= IX86_CALLCVT_REGPARM;
4993 if (lookup_attribute ("sseregparm", attrs))
4994 ret |= IX86_CALLCVT_SSEREGPARM;
4995 }
4996
4997 if (IX86_BASE_CALLCVT(ret) != 0)
4998 return ret;
4999 }
5000
5001 is_stdarg = stdarg_p (type);
5002 if (TARGET_RTD && !is_stdarg)
5003 return IX86_CALLCVT_STDCALL | ret;
5004
5005 if (ret != 0
5006 || is_stdarg
5007 || TREE_CODE (type) != METHOD_TYPE
5008 || ix86_function_type_abi (type) != MS_ABI)
5009 return IX86_CALLCVT_CDECL | ret;
5010
5011 return IX86_CALLCVT_THISCALL;
5012 }
5013
5014 /* Return 0 if the attributes for two types are incompatible, 1 if they
5015 are compatible, and 2 if they are nearly compatible (which causes a
5016 warning to be generated). */
5017
5018 static int
5019 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5020 {
5021 unsigned int ccvt1, ccvt2;
5022
5023 if (TREE_CODE (type1) != FUNCTION_TYPE
5024 && TREE_CODE (type1) != METHOD_TYPE)
5025 return 1;
5026
5027 ccvt1 = ix86_get_callcvt (type1);
5028 ccvt2 = ix86_get_callcvt (type2);
5029 if (ccvt1 != ccvt2)
5030 return 0;
5031 if (ix86_function_regparm (type1, NULL)
5032 != ix86_function_regparm (type2, NULL))
5033 return 0;
5034
5035 return 1;
5036 }
5037 \f
5038 /* Return the regparm value for a function with the indicated TYPE and DECL.
5039 DECL may be NULL when calling function indirectly
5040 or considering a libcall. */
5041
5042 static int
5043 ix86_function_regparm (const_tree type, const_tree decl)
5044 {
5045 tree attr;
5046 int regparm;
5047 unsigned int ccvt;
5048
5049 if (TARGET_64BIT)
5050 return (ix86_function_type_abi (type) == SYSV_ABI
5051 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5052 ccvt = ix86_get_callcvt (type);
5053 regparm = ix86_regparm;
5054
5055 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
5056 {
5057 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5058 if (attr)
5059 {
5060 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5061 return regparm;
5062 }
5063 }
5064 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5065 return 2;
5066 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5067 return 1;
5068
5069 /* Use register calling convention for local functions when possible. */
5070 if (decl
5071 && TREE_CODE (decl) == FUNCTION_DECL
5072 && optimize
5073 && !(profile_flag && !flag_fentry))
5074 {
5075 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5076 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE (decl));
5077 if (i && i->local && i->can_change_signature)
5078 {
5079 int local_regparm, globals = 0, regno;
5080
5081 /* Make sure no regparm register is taken by a
5082 fixed register variable. */
5083 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
5084 if (fixed_regs[local_regparm])
5085 break;
5086
5087 /* We don't want to use regparm(3) for nested functions as
5088 these use a static chain pointer in the third argument. */
5089 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
5090 local_regparm = 2;
5091
5092 /* In 32-bit mode save a register for the split stack. */
5093 if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack)
5094 local_regparm = 2;
5095
5096 /* Each fixed register usage increases register pressure,
5097 so less registers should be used for argument passing.
5098 This functionality can be overriden by an explicit
5099 regparm value. */
5100 for (regno = 0; regno <= DI_REG; regno++)
5101 if (fixed_regs[regno])
5102 globals++;
5103
5104 local_regparm
5105 = globals < local_regparm ? local_regparm - globals : 0;
5106
5107 if (local_regparm > regparm)
5108 regparm = local_regparm;
5109 }
5110 }
5111
5112 return regparm;
5113 }
5114
5115 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5116 DFmode (2) arguments in SSE registers for a function with the
5117 indicated TYPE and DECL. DECL may be NULL when calling function
5118 indirectly or considering a libcall. Otherwise return 0. */
5119
5120 static int
5121 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5122 {
5123 gcc_assert (!TARGET_64BIT);
5124
5125 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5126 by the sseregparm attribute. */
5127 if (TARGET_SSEREGPARM
5128 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5129 {
5130 if (!TARGET_SSE)
5131 {
5132 if (warn)
5133 {
5134 if (decl)
5135 error ("calling %qD with attribute sseregparm without "
5136 "SSE/SSE2 enabled", decl);
5137 else
5138 error ("calling %qT with attribute sseregparm without "
5139 "SSE/SSE2 enabled", type);
5140 }
5141 return 0;
5142 }
5143
5144 return 2;
5145 }
5146
5147 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5148 (and DFmode for SSE2) arguments in SSE registers. */
5149 if (decl && TARGET_SSE_MATH && optimize
5150 && !(profile_flag && !flag_fentry))
5151 {
5152 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5153 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
5154 if (i && i->local && i->can_change_signature)
5155 return TARGET_SSE2 ? 2 : 1;
5156 }
5157
5158 return 0;
5159 }
5160
5161 /* Return true if EAX is live at the start of the function. Used by
5162 ix86_expand_prologue to determine if we need special help before
5163 calling allocate_stack_worker. */
5164
5165 static bool
5166 ix86_eax_live_at_start_p (void)
5167 {
5168 /* Cheat. Don't bother working forward from ix86_function_regparm
5169 to the function type to whether an actual argument is located in
5170 eax. Instead just look at cfg info, which is still close enough
5171 to correct at this point. This gives false positives for broken
5172 functions that might use uninitialized data that happens to be
5173 allocated in eax, but who cares? */
5174 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
5175 }
5176
5177 static bool
5178 ix86_keep_aggregate_return_pointer (tree fntype)
5179 {
5180 tree attr;
5181
5182 if (!TARGET_64BIT)
5183 {
5184 attr = lookup_attribute ("callee_pop_aggregate_return",
5185 TYPE_ATTRIBUTES (fntype));
5186 if (attr)
5187 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
5188
5189 /* For 32-bit MS-ABI the default is to keep aggregate
5190 return pointer. */
5191 if (ix86_function_type_abi (fntype) == MS_ABI)
5192 return true;
5193 }
5194 return KEEP_AGGREGATE_RETURN_POINTER != 0;
5195 }
5196
5197 /* Value is the number of bytes of arguments automatically
5198 popped when returning from a subroutine call.
5199 FUNDECL is the declaration node of the function (as a tree),
5200 FUNTYPE is the data type of the function (as a tree),
5201 or for a library call it is an identifier node for the subroutine name.
5202 SIZE is the number of bytes of arguments passed on the stack.
5203
5204 On the 80386, the RTD insn may be used to pop them if the number
5205 of args is fixed, but if the number is variable then the caller
5206 must pop them all. RTD can't be used for library calls now
5207 because the library is compiled with the Unix compiler.
5208 Use of RTD is a selectable option, since it is incompatible with
5209 standard Unix calling sequences. If the option is not selected,
5210 the caller must always pop the args.
5211
5212 The attribute stdcall is equivalent to RTD on a per module basis. */
5213
5214 static int
5215 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5216 {
5217 unsigned int ccvt;
5218
5219 /* None of the 64-bit ABIs pop arguments. */
5220 if (TARGET_64BIT)
5221 return 0;
5222
5223 ccvt = ix86_get_callcvt (funtype);
5224
5225 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
5226 | IX86_CALLCVT_THISCALL)) != 0
5227 && ! stdarg_p (funtype))
5228 return size;
5229
5230 /* Lose any fake structure return argument if it is passed on the stack. */
5231 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5232 && !ix86_keep_aggregate_return_pointer (funtype))
5233 {
5234 int nregs = ix86_function_regparm (funtype, fundecl);
5235 if (nregs == 0)
5236 return GET_MODE_SIZE (Pmode);
5237 }
5238
5239 return 0;
5240 }
5241 \f
5242 /* Argument support functions. */
5243
5244 /* Return true when register may be used to pass function parameters. */
5245 bool
5246 ix86_function_arg_regno_p (int regno)
5247 {
5248 int i;
5249 const int *parm_regs;
5250
5251 if (!TARGET_64BIT)
5252 {
5253 if (TARGET_MACHO)
5254 return (regno < REGPARM_MAX
5255 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
5256 else
5257 return (regno < REGPARM_MAX
5258 || (TARGET_MMX && MMX_REGNO_P (regno)
5259 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
5260 || (TARGET_SSE && SSE_REGNO_P (regno)
5261 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
5262 }
5263
5264 if (TARGET_MACHO)
5265 {
5266 if (SSE_REGNO_P (regno) && TARGET_SSE)
5267 return true;
5268 }
5269 else
5270 {
5271 if (TARGET_SSE && SSE_REGNO_P (regno)
5272 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
5273 return true;
5274 }
5275
5276 /* TODO: The function should depend on current function ABI but
5277 builtins.c would need updating then. Therefore we use the
5278 default ABI. */
5279
5280 /* RAX is used as hidden argument to va_arg functions. */
5281 if (ix86_abi == SYSV_ABI && regno == AX_REG)
5282 return true;
5283
5284 if (ix86_abi == MS_ABI)
5285 parm_regs = x86_64_ms_abi_int_parameter_registers;
5286 else
5287 parm_regs = x86_64_int_parameter_registers;
5288 for (i = 0; i < (ix86_abi == MS_ABI
5289 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
5290 if (regno == parm_regs[i])
5291 return true;
5292 return false;
5293 }
5294
5295 /* Return if we do not know how to pass TYPE solely in registers. */
5296
5297 static bool
5298 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
5299 {
5300 if (must_pass_in_stack_var_size_or_pad (mode, type))
5301 return true;
5302
5303 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5304 The layout_type routine is crafty and tries to trick us into passing
5305 currently unsupported vector types on the stack by using TImode. */
5306 return (!TARGET_64BIT && mode == TImode
5307 && type && TREE_CODE (type) != VECTOR_TYPE);
5308 }
5309
5310 /* It returns the size, in bytes, of the area reserved for arguments passed
5311 in registers for the function represented by fndecl dependent to the used
5312 abi format. */
5313 int
5314 ix86_reg_parm_stack_space (const_tree fndecl)
5315 {
5316 enum calling_abi call_abi = SYSV_ABI;
5317 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
5318 call_abi = ix86_function_abi (fndecl);
5319 else
5320 call_abi = ix86_function_type_abi (fndecl);
5321 if (TARGET_64BIT && call_abi == MS_ABI)
5322 return 32;
5323 return 0;
5324 }
5325
5326 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5327 call abi used. */
5328 enum calling_abi
5329 ix86_function_type_abi (const_tree fntype)
5330 {
5331 if (fntype != NULL_TREE && TYPE_ATTRIBUTES (fntype) != NULL_TREE)
5332 {
5333 enum calling_abi abi = ix86_abi;
5334 if (abi == SYSV_ABI)
5335 {
5336 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
5337 abi = MS_ABI;
5338 }
5339 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
5340 abi = SYSV_ABI;
5341 return abi;
5342 }
5343 return ix86_abi;
5344 }
5345
5346 static bool
5347 ix86_function_ms_hook_prologue (const_tree fn)
5348 {
5349 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
5350 {
5351 if (decl_function_context (fn) != NULL_TREE)
5352 error_at (DECL_SOURCE_LOCATION (fn),
5353 "ms_hook_prologue is not compatible with nested function");
5354 else
5355 return true;
5356 }
5357 return false;
5358 }
5359
5360 static enum calling_abi
5361 ix86_function_abi (const_tree fndecl)
5362 {
5363 if (! fndecl)
5364 return ix86_abi;
5365 return ix86_function_type_abi (TREE_TYPE (fndecl));
5366 }
5367
5368 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
5369 call abi used. */
5370 enum calling_abi
5371 ix86_cfun_abi (void)
5372 {
5373 if (! cfun)
5374 return ix86_abi;
5375 return cfun->machine->call_abi;
5376 }
5377
5378 /* Write the extra assembler code needed to declare a function properly. */
5379
5380 void
5381 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
5382 tree decl)
5383 {
5384 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
5385
5386 if (is_ms_hook)
5387 {
5388 int i, filler_count = (TARGET_64BIT ? 32 : 16);
5389 unsigned int filler_cc = 0xcccccccc;
5390
5391 for (i = 0; i < filler_count; i += 4)
5392 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
5393 }
5394
5395 #ifdef SUBTARGET_ASM_UNWIND_INIT
5396 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
5397 #endif
5398
5399 ASM_OUTPUT_LABEL (asm_out_file, fname);
5400
5401 /* Output magic byte marker, if hot-patch attribute is set. */
5402 if (is_ms_hook)
5403 {
5404 if (TARGET_64BIT)
5405 {
5406 /* leaq [%rsp + 0], %rsp */
5407 asm_fprintf (asm_out_file, ASM_BYTE
5408 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
5409 }
5410 else
5411 {
5412 /* movl.s %edi, %edi
5413 push %ebp
5414 movl.s %esp, %ebp */
5415 asm_fprintf (asm_out_file, ASM_BYTE
5416 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
5417 }
5418 }
5419 }
5420
5421 /* regclass.c */
5422 extern void init_regs (void);
5423
5424 /* Implementation of call abi switching target hook. Specific to FNDECL
5425 the specific call register sets are set. See also
5426 ix86_conditional_register_usage for more details. */
5427 void
5428 ix86_call_abi_override (const_tree fndecl)
5429 {
5430 if (fndecl == NULL_TREE)
5431 cfun->machine->call_abi = ix86_abi;
5432 else
5433 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
5434 }
5435
5436 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
5437 expensive re-initialization of init_regs each time we switch function context
5438 since this is needed only during RTL expansion. */
5439 static void
5440 ix86_maybe_switch_abi (void)
5441 {
5442 if (TARGET_64BIT &&
5443 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
5444 reinit_regs ();
5445 }
5446
5447 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5448 for a call to a function whose data type is FNTYPE.
5449 For a library call, FNTYPE is 0. */
5450
5451 void
5452 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
5453 tree fntype, /* tree ptr for function decl */
5454 rtx libname, /* SYMBOL_REF of library name or 0 */
5455 tree fndecl,
5456 int caller)
5457 {
5458 struct cgraph_local_info *i;
5459 tree fnret_type;
5460
5461 memset (cum, 0, sizeof (*cum));
5462
5463 /* Initialize for the current callee. */
5464 if (caller)
5465 {
5466 cfun->machine->callee_pass_avx256_p = false;
5467 cfun->machine->callee_return_avx256_p = false;
5468 }
5469
5470 if (fndecl)
5471 {
5472 i = cgraph_local_info (fndecl);
5473 cum->call_abi = ix86_function_abi (fndecl);
5474 fnret_type = TREE_TYPE (TREE_TYPE (fndecl));
5475 }
5476 else
5477 {
5478 i = NULL;
5479 cum->call_abi = ix86_function_type_abi (fntype);
5480 if (fntype)
5481 fnret_type = TREE_TYPE (fntype);
5482 else
5483 fnret_type = NULL;
5484 }
5485
5486 if (TARGET_VZEROUPPER && fnret_type)
5487 {
5488 rtx fnret_value = ix86_function_value (fnret_type, fntype,
5489 false);
5490 if (function_pass_avx256_p (fnret_value))
5491 {
5492 /* The return value of this function uses 256bit AVX modes. */
5493 if (caller)
5494 cfun->machine->callee_return_avx256_p = true;
5495 else
5496 cfun->machine->caller_return_avx256_p = true;
5497 }
5498 }
5499
5500 cum->caller = caller;
5501
5502 /* Set up the number of registers to use for passing arguments. */
5503
5504 if (TARGET_64BIT && cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
5505 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
5506 "or subtarget optimization implying it");
5507 cum->nregs = ix86_regparm;
5508 if (TARGET_64BIT)
5509 {
5510 cum->nregs = (cum->call_abi == SYSV_ABI
5511 ? X86_64_REGPARM_MAX
5512 : X86_64_MS_REGPARM_MAX);
5513 }
5514 if (TARGET_SSE)
5515 {
5516 cum->sse_nregs = SSE_REGPARM_MAX;
5517 if (TARGET_64BIT)
5518 {
5519 cum->sse_nregs = (cum->call_abi == SYSV_ABI
5520 ? X86_64_SSE_REGPARM_MAX
5521 : X86_64_MS_SSE_REGPARM_MAX);
5522 }
5523 }
5524 if (TARGET_MMX)
5525 cum->mmx_nregs = MMX_REGPARM_MAX;
5526 cum->warn_avx = true;
5527 cum->warn_sse = true;
5528 cum->warn_mmx = true;
5529
5530 /* Because type might mismatch in between caller and callee, we need to
5531 use actual type of function for local calls.
5532 FIXME: cgraph_analyze can be told to actually record if function uses
5533 va_start so for local functions maybe_vaarg can be made aggressive
5534 helping K&R code.
5535 FIXME: once typesytem is fixed, we won't need this code anymore. */
5536 if (i && i->local && i->can_change_signature)
5537 fntype = TREE_TYPE (fndecl);
5538 cum->maybe_vaarg = (fntype
5539 ? (!prototype_p (fntype) || stdarg_p (fntype))
5540 : !libname);
5541
5542 if (!TARGET_64BIT)
5543 {
5544 /* If there are variable arguments, then we won't pass anything
5545 in registers in 32-bit mode. */
5546 if (stdarg_p (fntype))
5547 {
5548 cum->nregs = 0;
5549 cum->sse_nregs = 0;
5550 cum->mmx_nregs = 0;
5551 cum->warn_avx = 0;
5552 cum->warn_sse = 0;
5553 cum->warn_mmx = 0;
5554 return;
5555 }
5556
5557 /* Use ecx and edx registers if function has fastcall attribute,
5558 else look for regparm information. */
5559 if (fntype)
5560 {
5561 unsigned int ccvt = ix86_get_callcvt (fntype);
5562 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5563 {
5564 cum->nregs = 1;
5565 cum->fastcall = 1; /* Same first register as in fastcall. */
5566 }
5567 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5568 {
5569 cum->nregs = 2;
5570 cum->fastcall = 1;
5571 }
5572 else
5573 cum->nregs = ix86_function_regparm (fntype, fndecl);
5574 }
5575
5576 /* Set up the number of SSE registers used for passing SFmode
5577 and DFmode arguments. Warn for mismatching ABI. */
5578 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
5579 }
5580 }
5581
5582 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
5583 But in the case of vector types, it is some vector mode.
5584
5585 When we have only some of our vector isa extensions enabled, then there
5586 are some modes for which vector_mode_supported_p is false. For these
5587 modes, the generic vector support in gcc will choose some non-vector mode
5588 in order to implement the type. By computing the natural mode, we'll
5589 select the proper ABI location for the operand and not depend on whatever
5590 the middle-end decides to do with these vector types.
5591
5592 The midde-end can't deal with the vector types > 16 bytes. In this
5593 case, we return the original mode and warn ABI change if CUM isn't
5594 NULL. */
5595
5596 static enum machine_mode
5597 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum)
5598 {
5599 enum machine_mode mode = TYPE_MODE (type);
5600
5601 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
5602 {
5603 HOST_WIDE_INT size = int_size_in_bytes (type);
5604 if ((size == 8 || size == 16 || size == 32)
5605 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
5606 && TYPE_VECTOR_SUBPARTS (type) > 1)
5607 {
5608 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
5609
5610 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
5611 mode = MIN_MODE_VECTOR_FLOAT;
5612 else
5613 mode = MIN_MODE_VECTOR_INT;
5614
5615 /* Get the mode which has this inner mode and number of units. */
5616 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
5617 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
5618 && GET_MODE_INNER (mode) == innermode)
5619 {
5620 if (size == 32 && !TARGET_AVX)
5621 {
5622 static bool warnedavx;
5623
5624 if (cum
5625 && !warnedavx
5626 && cum->warn_avx)
5627 {
5628 warnedavx = true;
5629 warning (0, "AVX vector argument without AVX "
5630 "enabled changes the ABI");
5631 }
5632 return TYPE_MODE (type);
5633 }
5634 else
5635 return mode;
5636 }
5637
5638 gcc_unreachable ();
5639 }
5640 }
5641
5642 return mode;
5643 }
5644
5645 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
5646 this may not agree with the mode that the type system has chosen for the
5647 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
5648 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
5649
5650 static rtx
5651 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
5652 unsigned int regno)
5653 {
5654 rtx tmp;
5655
5656 if (orig_mode != BLKmode)
5657 tmp = gen_rtx_REG (orig_mode, regno);
5658 else
5659 {
5660 tmp = gen_rtx_REG (mode, regno);
5661 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
5662 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
5663 }
5664
5665 return tmp;
5666 }
5667
5668 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
5669 of this code is to classify each 8bytes of incoming argument by the register
5670 class and assign registers accordingly. */
5671
5672 /* Return the union class of CLASS1 and CLASS2.
5673 See the x86-64 PS ABI for details. */
5674
5675 static enum x86_64_reg_class
5676 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
5677 {
5678 /* Rule #1: If both classes are equal, this is the resulting class. */
5679 if (class1 == class2)
5680 return class1;
5681
5682 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
5683 the other class. */
5684 if (class1 == X86_64_NO_CLASS)
5685 return class2;
5686 if (class2 == X86_64_NO_CLASS)
5687 return class1;
5688
5689 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
5690 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
5691 return X86_64_MEMORY_CLASS;
5692
5693 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
5694 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
5695 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
5696 return X86_64_INTEGERSI_CLASS;
5697 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
5698 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
5699 return X86_64_INTEGER_CLASS;
5700
5701 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
5702 MEMORY is used. */
5703 if (class1 == X86_64_X87_CLASS
5704 || class1 == X86_64_X87UP_CLASS
5705 || class1 == X86_64_COMPLEX_X87_CLASS
5706 || class2 == X86_64_X87_CLASS
5707 || class2 == X86_64_X87UP_CLASS
5708 || class2 == X86_64_COMPLEX_X87_CLASS)
5709 return X86_64_MEMORY_CLASS;
5710
5711 /* Rule #6: Otherwise class SSE is used. */
5712 return X86_64_SSE_CLASS;
5713 }
5714
5715 /* Classify the argument of type TYPE and mode MODE.
5716 CLASSES will be filled by the register class used to pass each word
5717 of the operand. The number of words is returned. In case the parameter
5718 should be passed in memory, 0 is returned. As a special case for zero
5719 sized containers, classes[0] will be NO_CLASS and 1 is returned.
5720
5721 BIT_OFFSET is used internally for handling records and specifies offset
5722 of the offset in bits modulo 256 to avoid overflow cases.
5723
5724 See the x86-64 PS ABI for details.
5725 */
5726
5727 static int
5728 classify_argument (enum machine_mode mode, const_tree type,
5729 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
5730 {
5731 HOST_WIDE_INT bytes =
5732 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5733 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5734
5735 /* Variable sized entities are always passed/returned in memory. */
5736 if (bytes < 0)
5737 return 0;
5738
5739 if (mode != VOIDmode
5740 && targetm.calls.must_pass_in_stack (mode, type))
5741 return 0;
5742
5743 if (type && AGGREGATE_TYPE_P (type))
5744 {
5745 int i;
5746 tree field;
5747 enum x86_64_reg_class subclasses[MAX_CLASSES];
5748
5749 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
5750 if (bytes > 32)
5751 return 0;
5752
5753 for (i = 0; i < words; i++)
5754 classes[i] = X86_64_NO_CLASS;
5755
5756 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
5757 signalize memory class, so handle it as special case. */
5758 if (!words)
5759 {
5760 classes[0] = X86_64_NO_CLASS;
5761 return 1;
5762 }
5763
5764 /* Classify each field of record and merge classes. */
5765 switch (TREE_CODE (type))
5766 {
5767 case RECORD_TYPE:
5768 /* And now merge the fields of structure. */
5769 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5770 {
5771 if (TREE_CODE (field) == FIELD_DECL)
5772 {
5773 int num;
5774
5775 if (TREE_TYPE (field) == error_mark_node)
5776 continue;
5777
5778 /* Bitfields are always classified as integer. Handle them
5779 early, since later code would consider them to be
5780 misaligned integers. */
5781 if (DECL_BIT_FIELD (field))
5782 {
5783 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5784 i < ((int_bit_position (field) + (bit_offset % 64))
5785 + tree_low_cst (DECL_SIZE (field), 0)
5786 + 63) / 8 / 8; i++)
5787 classes[i] =
5788 merge_classes (X86_64_INTEGER_CLASS,
5789 classes[i]);
5790 }
5791 else
5792 {
5793 int pos;
5794
5795 type = TREE_TYPE (field);
5796
5797 /* Flexible array member is ignored. */
5798 if (TYPE_MODE (type) == BLKmode
5799 && TREE_CODE (type) == ARRAY_TYPE
5800 && TYPE_SIZE (type) == NULL_TREE
5801 && TYPE_DOMAIN (type) != NULL_TREE
5802 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
5803 == NULL_TREE))
5804 {
5805 static bool warned;
5806
5807 if (!warned && warn_psabi)
5808 {
5809 warned = true;
5810 inform (input_location,
5811 "the ABI of passing struct with"
5812 " a flexible array member has"
5813 " changed in GCC 4.4");
5814 }
5815 continue;
5816 }
5817 num = classify_argument (TYPE_MODE (type), type,
5818 subclasses,
5819 (int_bit_position (field)
5820 + bit_offset) % 256);
5821 if (!num)
5822 return 0;
5823 pos = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5824 for (i = 0; i < num && (i + pos) < words; i++)
5825 classes[i + pos] =
5826 merge_classes (subclasses[i], classes[i + pos]);
5827 }
5828 }
5829 }
5830 break;
5831
5832 case ARRAY_TYPE:
5833 /* Arrays are handled as small records. */
5834 {
5835 int num;
5836 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
5837 TREE_TYPE (type), subclasses, bit_offset);
5838 if (!num)
5839 return 0;
5840
5841 /* The partial classes are now full classes. */
5842 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
5843 subclasses[0] = X86_64_SSE_CLASS;
5844 if (subclasses[0] == X86_64_INTEGERSI_CLASS
5845 && !((bit_offset % 64) == 0 && bytes == 4))
5846 subclasses[0] = X86_64_INTEGER_CLASS;
5847
5848 for (i = 0; i < words; i++)
5849 classes[i] = subclasses[i % num];
5850
5851 break;
5852 }
5853 case UNION_TYPE:
5854 case QUAL_UNION_TYPE:
5855 /* Unions are similar to RECORD_TYPE but offset is always 0.
5856 */
5857 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5858 {
5859 if (TREE_CODE (field) == FIELD_DECL)
5860 {
5861 int num;
5862
5863 if (TREE_TYPE (field) == error_mark_node)
5864 continue;
5865
5866 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
5867 TREE_TYPE (field), subclasses,
5868 bit_offset);
5869 if (!num)
5870 return 0;
5871 for (i = 0; i < num; i++)
5872 classes[i] = merge_classes (subclasses[i], classes[i]);
5873 }
5874 }
5875 break;
5876
5877 default:
5878 gcc_unreachable ();
5879 }
5880
5881 if (words > 2)
5882 {
5883 /* When size > 16 bytes, if the first one isn't
5884 X86_64_SSE_CLASS or any other ones aren't
5885 X86_64_SSEUP_CLASS, everything should be passed in
5886 memory. */
5887 if (classes[0] != X86_64_SSE_CLASS)
5888 return 0;
5889
5890 for (i = 1; i < words; i++)
5891 if (classes[i] != X86_64_SSEUP_CLASS)
5892 return 0;
5893 }
5894
5895 /* Final merger cleanup. */
5896 for (i = 0; i < words; i++)
5897 {
5898 /* If one class is MEMORY, everything should be passed in
5899 memory. */
5900 if (classes[i] == X86_64_MEMORY_CLASS)
5901 return 0;
5902
5903 /* The X86_64_SSEUP_CLASS should be always preceded by
5904 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
5905 if (classes[i] == X86_64_SSEUP_CLASS
5906 && classes[i - 1] != X86_64_SSE_CLASS
5907 && classes[i - 1] != X86_64_SSEUP_CLASS)
5908 {
5909 /* The first one should never be X86_64_SSEUP_CLASS. */
5910 gcc_assert (i != 0);
5911 classes[i] = X86_64_SSE_CLASS;
5912 }
5913
5914 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
5915 everything should be passed in memory. */
5916 if (classes[i] == X86_64_X87UP_CLASS
5917 && (classes[i - 1] != X86_64_X87_CLASS))
5918 {
5919 static bool warned;
5920
5921 /* The first one should never be X86_64_X87UP_CLASS. */
5922 gcc_assert (i != 0);
5923 if (!warned && warn_psabi)
5924 {
5925 warned = true;
5926 inform (input_location,
5927 "the ABI of passing union with long double"
5928 " has changed in GCC 4.4");
5929 }
5930 return 0;
5931 }
5932 }
5933 return words;
5934 }
5935
5936 /* Compute alignment needed. We align all types to natural boundaries with
5937 exception of XFmode that is aligned to 64bits. */
5938 if (mode != VOIDmode && mode != BLKmode)
5939 {
5940 int mode_alignment = GET_MODE_BITSIZE (mode);
5941
5942 if (mode == XFmode)
5943 mode_alignment = 128;
5944 else if (mode == XCmode)
5945 mode_alignment = 256;
5946 if (COMPLEX_MODE_P (mode))
5947 mode_alignment /= 2;
5948 /* Misaligned fields are always returned in memory. */
5949 if (bit_offset % mode_alignment)
5950 return 0;
5951 }
5952
5953 /* for V1xx modes, just use the base mode */
5954 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
5955 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
5956 mode = GET_MODE_INNER (mode);
5957
5958 /* Classification of atomic types. */
5959 switch (mode)
5960 {
5961 case SDmode:
5962 case DDmode:
5963 classes[0] = X86_64_SSE_CLASS;
5964 return 1;
5965 case TDmode:
5966 classes[0] = X86_64_SSE_CLASS;
5967 classes[1] = X86_64_SSEUP_CLASS;
5968 return 2;
5969 case DImode:
5970 case SImode:
5971 case HImode:
5972 case QImode:
5973 case CSImode:
5974 case CHImode:
5975 case CQImode:
5976 {
5977 int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
5978
5979 if (size <= 32)
5980 {
5981 classes[0] = X86_64_INTEGERSI_CLASS;
5982 return 1;
5983 }
5984 else if (size <= 64)
5985 {
5986 classes[0] = X86_64_INTEGER_CLASS;
5987 return 1;
5988 }
5989 else if (size <= 64+32)
5990 {
5991 classes[0] = X86_64_INTEGER_CLASS;
5992 classes[1] = X86_64_INTEGERSI_CLASS;
5993 return 2;
5994 }
5995 else if (size <= 64+64)
5996 {
5997 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5998 return 2;
5999 }
6000 else
6001 gcc_unreachable ();
6002 }
6003 case CDImode:
6004 case TImode:
6005 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6006 return 2;
6007 case COImode:
6008 case OImode:
6009 /* OImode shouldn't be used directly. */
6010 gcc_unreachable ();
6011 case CTImode:
6012 return 0;
6013 case SFmode:
6014 if (!(bit_offset % 64))
6015 classes[0] = X86_64_SSESF_CLASS;
6016 else
6017 classes[0] = X86_64_SSE_CLASS;
6018 return 1;
6019 case DFmode:
6020 classes[0] = X86_64_SSEDF_CLASS;
6021 return 1;
6022 case XFmode:
6023 classes[0] = X86_64_X87_CLASS;
6024 classes[1] = X86_64_X87UP_CLASS;
6025 return 2;
6026 case TFmode:
6027 classes[0] = X86_64_SSE_CLASS;
6028 classes[1] = X86_64_SSEUP_CLASS;
6029 return 2;
6030 case SCmode:
6031 classes[0] = X86_64_SSE_CLASS;
6032 if (!(bit_offset % 64))
6033 return 1;
6034 else
6035 {
6036 static bool warned;
6037
6038 if (!warned && warn_psabi)
6039 {
6040 warned = true;
6041 inform (input_location,
6042 "the ABI of passing structure with complex float"
6043 " member has changed in GCC 4.4");
6044 }
6045 classes[1] = X86_64_SSESF_CLASS;
6046 return 2;
6047 }
6048 case DCmode:
6049 classes[0] = X86_64_SSEDF_CLASS;
6050 classes[1] = X86_64_SSEDF_CLASS;
6051 return 2;
6052 case XCmode:
6053 classes[0] = X86_64_COMPLEX_X87_CLASS;
6054 return 1;
6055 case TCmode:
6056 /* This modes is larger than 16 bytes. */
6057 return 0;
6058 case V8SFmode:
6059 case V8SImode:
6060 case V32QImode:
6061 case V16HImode:
6062 case V4DFmode:
6063 case V4DImode:
6064 classes[0] = X86_64_SSE_CLASS;
6065 classes[1] = X86_64_SSEUP_CLASS;
6066 classes[2] = X86_64_SSEUP_CLASS;
6067 classes[3] = X86_64_SSEUP_CLASS;
6068 return 4;
6069 case V4SFmode:
6070 case V4SImode:
6071 case V16QImode:
6072 case V8HImode:
6073 case V2DFmode:
6074 case V2DImode:
6075 classes[0] = X86_64_SSE_CLASS;
6076 classes[1] = X86_64_SSEUP_CLASS;
6077 return 2;
6078 case V1TImode:
6079 case V1DImode:
6080 case V2SFmode:
6081 case V2SImode:
6082 case V4HImode:
6083 case V8QImode:
6084 classes[0] = X86_64_SSE_CLASS;
6085 return 1;
6086 case BLKmode:
6087 case VOIDmode:
6088 return 0;
6089 default:
6090 gcc_assert (VECTOR_MODE_P (mode));
6091
6092 if (bytes > 16)
6093 return 0;
6094
6095 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
6096
6097 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
6098 classes[0] = X86_64_INTEGERSI_CLASS;
6099 else
6100 classes[0] = X86_64_INTEGER_CLASS;
6101 classes[1] = X86_64_INTEGER_CLASS;
6102 return 1 + (bytes > 8);
6103 }
6104 }
6105
6106 /* Examine the argument and return set number of register required in each
6107 class. Return 0 iff parameter should be passed in memory. */
6108 static int
6109 examine_argument (enum machine_mode mode, const_tree type, int in_return,
6110 int *int_nregs, int *sse_nregs)
6111 {
6112 enum x86_64_reg_class regclass[MAX_CLASSES];
6113 int n = classify_argument (mode, type, regclass, 0);
6114
6115 *int_nregs = 0;
6116 *sse_nregs = 0;
6117 if (!n)
6118 return 0;
6119 for (n--; n >= 0; n--)
6120 switch (regclass[n])
6121 {
6122 case X86_64_INTEGER_CLASS:
6123 case X86_64_INTEGERSI_CLASS:
6124 (*int_nregs)++;
6125 break;
6126 case X86_64_SSE_CLASS:
6127 case X86_64_SSESF_CLASS:
6128 case X86_64_SSEDF_CLASS:
6129 (*sse_nregs)++;
6130 break;
6131 case X86_64_NO_CLASS:
6132 case X86_64_SSEUP_CLASS:
6133 break;
6134 case X86_64_X87_CLASS:
6135 case X86_64_X87UP_CLASS:
6136 if (!in_return)
6137 return 0;
6138 break;
6139 case X86_64_COMPLEX_X87_CLASS:
6140 return in_return ? 2 : 0;
6141 case X86_64_MEMORY_CLASS:
6142 gcc_unreachable ();
6143 }
6144 return 1;
6145 }
6146
6147 /* Construct container for the argument used by GCC interface. See
6148 FUNCTION_ARG for the detailed description. */
6149
6150 static rtx
6151 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
6152 const_tree type, int in_return, int nintregs, int nsseregs,
6153 const int *intreg, int sse_regno)
6154 {
6155 /* The following variables hold the static issued_error state. */
6156 static bool issued_sse_arg_error;
6157 static bool issued_sse_ret_error;
6158 static bool issued_x87_ret_error;
6159
6160 enum machine_mode tmpmode;
6161 int bytes =
6162 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6163 enum x86_64_reg_class regclass[MAX_CLASSES];
6164 int n;
6165 int i;
6166 int nexps = 0;
6167 int needed_sseregs, needed_intregs;
6168 rtx exp[MAX_CLASSES];
6169 rtx ret;
6170
6171 n = classify_argument (mode, type, regclass, 0);
6172 if (!n)
6173 return NULL;
6174 if (!examine_argument (mode, type, in_return, &needed_intregs,
6175 &needed_sseregs))
6176 return NULL;
6177 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
6178 return NULL;
6179
6180 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6181 some less clueful developer tries to use floating-point anyway. */
6182 if (needed_sseregs && !TARGET_SSE)
6183 {
6184 if (in_return)
6185 {
6186 if (!issued_sse_ret_error)
6187 {
6188 error ("SSE register return with SSE disabled");
6189 issued_sse_ret_error = true;
6190 }
6191 }
6192 else if (!issued_sse_arg_error)
6193 {
6194 error ("SSE register argument with SSE disabled");
6195 issued_sse_arg_error = true;
6196 }
6197 return NULL;
6198 }
6199
6200 /* Likewise, error if the ABI requires us to return values in the
6201 x87 registers and the user specified -mno-80387. */
6202 if (!TARGET_80387 && in_return)
6203 for (i = 0; i < n; i++)
6204 if (regclass[i] == X86_64_X87_CLASS
6205 || regclass[i] == X86_64_X87UP_CLASS
6206 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
6207 {
6208 if (!issued_x87_ret_error)
6209 {
6210 error ("x87 register return with x87 disabled");
6211 issued_x87_ret_error = true;
6212 }
6213 return NULL;
6214 }
6215
6216 /* First construct simple cases. Avoid SCmode, since we want to use
6217 single register to pass this type. */
6218 if (n == 1 && mode != SCmode)
6219 switch (regclass[0])
6220 {
6221 case X86_64_INTEGER_CLASS:
6222 case X86_64_INTEGERSI_CLASS:
6223 return gen_rtx_REG (mode, intreg[0]);
6224 case X86_64_SSE_CLASS:
6225 case X86_64_SSESF_CLASS:
6226 case X86_64_SSEDF_CLASS:
6227 if (mode != BLKmode)
6228 return gen_reg_or_parallel (mode, orig_mode,
6229 SSE_REGNO (sse_regno));
6230 break;
6231 case X86_64_X87_CLASS:
6232 case X86_64_COMPLEX_X87_CLASS:
6233 return gen_rtx_REG (mode, FIRST_STACK_REG);
6234 case X86_64_NO_CLASS:
6235 /* Zero sized array, struct or class. */
6236 return NULL;
6237 default:
6238 gcc_unreachable ();
6239 }
6240 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
6241 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
6242 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
6243 if (n == 4
6244 && regclass[0] == X86_64_SSE_CLASS
6245 && regclass[1] == X86_64_SSEUP_CLASS
6246 && regclass[2] == X86_64_SSEUP_CLASS
6247 && regclass[3] == X86_64_SSEUP_CLASS
6248 && mode != BLKmode)
6249 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
6250
6251 if (n == 2
6252 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
6253 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
6254 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
6255 && regclass[1] == X86_64_INTEGER_CLASS
6256 && (mode == CDImode || mode == TImode || mode == TFmode)
6257 && intreg[0] + 1 == intreg[1])
6258 return gen_rtx_REG (mode, intreg[0]);
6259
6260 /* Otherwise figure out the entries of the PARALLEL. */
6261 for (i = 0; i < n; i++)
6262 {
6263 int pos;
6264
6265 switch (regclass[i])
6266 {
6267 case X86_64_NO_CLASS:
6268 break;
6269 case X86_64_INTEGER_CLASS:
6270 case X86_64_INTEGERSI_CLASS:
6271 /* Merge TImodes on aligned occasions here too. */
6272 if (i * 8 + 8 > bytes)
6273 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
6274 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
6275 tmpmode = SImode;
6276 else
6277 tmpmode = DImode;
6278 /* We've requested 24 bytes we don't have mode for. Use DImode. */
6279 if (tmpmode == BLKmode)
6280 tmpmode = DImode;
6281 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6282 gen_rtx_REG (tmpmode, *intreg),
6283 GEN_INT (i*8));
6284 intreg++;
6285 break;
6286 case X86_64_SSESF_CLASS:
6287 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6288 gen_rtx_REG (SFmode,
6289 SSE_REGNO (sse_regno)),
6290 GEN_INT (i*8));
6291 sse_regno++;
6292 break;
6293 case X86_64_SSEDF_CLASS:
6294 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6295 gen_rtx_REG (DFmode,
6296 SSE_REGNO (sse_regno)),
6297 GEN_INT (i*8));
6298 sse_regno++;
6299 break;
6300 case X86_64_SSE_CLASS:
6301 pos = i;
6302 switch (n)
6303 {
6304 case 1:
6305 tmpmode = DImode;
6306 break;
6307 case 2:
6308 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
6309 {
6310 tmpmode = TImode;
6311 i++;
6312 }
6313 else
6314 tmpmode = DImode;
6315 break;
6316 case 4:
6317 gcc_assert (i == 0
6318 && regclass[1] == X86_64_SSEUP_CLASS
6319 && regclass[2] == X86_64_SSEUP_CLASS
6320 && regclass[3] == X86_64_SSEUP_CLASS);
6321 tmpmode = OImode;
6322 i += 3;
6323 break;
6324 default:
6325 gcc_unreachable ();
6326 }
6327 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6328 gen_rtx_REG (tmpmode,
6329 SSE_REGNO (sse_regno)),
6330 GEN_INT (pos*8));
6331 sse_regno++;
6332 break;
6333 default:
6334 gcc_unreachable ();
6335 }
6336 }
6337
6338 /* Empty aligned struct, union or class. */
6339 if (nexps == 0)
6340 return NULL;
6341
6342 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
6343 for (i = 0; i < nexps; i++)
6344 XVECEXP (ret, 0, i) = exp [i];
6345 return ret;
6346 }
6347
6348 /* Update the data in CUM to advance over an argument of mode MODE
6349 and data type TYPE. (TYPE is null for libcalls where that information
6350 may not be available.) */
6351
6352 static void
6353 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6354 const_tree type, HOST_WIDE_INT bytes,
6355 HOST_WIDE_INT words)
6356 {
6357 switch (mode)
6358 {
6359 default:
6360 break;
6361
6362 case BLKmode:
6363 if (bytes < 0)
6364 break;
6365 /* FALLTHRU */
6366
6367 case DImode:
6368 case SImode:
6369 case HImode:
6370 case QImode:
6371 cum->words += words;
6372 cum->nregs -= words;
6373 cum->regno += words;
6374
6375 if (cum->nregs <= 0)
6376 {
6377 cum->nregs = 0;
6378 cum->regno = 0;
6379 }
6380 break;
6381
6382 case OImode:
6383 /* OImode shouldn't be used directly. */
6384 gcc_unreachable ();
6385
6386 case DFmode:
6387 if (cum->float_in_sse < 2)
6388 break;
6389 case SFmode:
6390 if (cum->float_in_sse < 1)
6391 break;
6392 /* FALLTHRU */
6393
6394 case V8SFmode:
6395 case V8SImode:
6396 case V32QImode:
6397 case V16HImode:
6398 case V4DFmode:
6399 case V4DImode:
6400 case TImode:
6401 case V16QImode:
6402 case V8HImode:
6403 case V4SImode:
6404 case V2DImode:
6405 case V4SFmode:
6406 case V2DFmode:
6407 if (!type || !AGGREGATE_TYPE_P (type))
6408 {
6409 cum->sse_words += words;
6410 cum->sse_nregs -= 1;
6411 cum->sse_regno += 1;
6412 if (cum->sse_nregs <= 0)
6413 {
6414 cum->sse_nregs = 0;
6415 cum->sse_regno = 0;
6416 }
6417 }
6418 break;
6419
6420 case V8QImode:
6421 case V4HImode:
6422 case V2SImode:
6423 case V2SFmode:
6424 case V1TImode:
6425 case V1DImode:
6426 if (!type || !AGGREGATE_TYPE_P (type))
6427 {
6428 cum->mmx_words += words;
6429 cum->mmx_nregs -= 1;
6430 cum->mmx_regno += 1;
6431 if (cum->mmx_nregs <= 0)
6432 {
6433 cum->mmx_nregs = 0;
6434 cum->mmx_regno = 0;
6435 }
6436 }
6437 break;
6438 }
6439 }
6440
6441 static void
6442 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6443 const_tree type, HOST_WIDE_INT words, bool named)
6444 {
6445 int int_nregs, sse_nregs;
6446
6447 /* Unnamed 256bit vector mode parameters are passed on stack. */
6448 if (!named && VALID_AVX256_REG_MODE (mode))
6449 return;
6450
6451 if (examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
6452 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
6453 {
6454 cum->nregs -= int_nregs;
6455 cum->sse_nregs -= sse_nregs;
6456 cum->regno += int_nregs;
6457 cum->sse_regno += sse_nregs;
6458 }
6459 else
6460 {
6461 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
6462 cum->words = (cum->words + align - 1) & ~(align - 1);
6463 cum->words += words;
6464 }
6465 }
6466
6467 static void
6468 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
6469 HOST_WIDE_INT words)
6470 {
6471 /* Otherwise, this should be passed indirect. */
6472 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
6473
6474 cum->words += words;
6475 if (cum->nregs > 0)
6476 {
6477 cum->nregs -= 1;
6478 cum->regno += 1;
6479 }
6480 }
6481
6482 /* Update the data in CUM to advance over an argument of mode MODE and
6483 data type TYPE. (TYPE is null for libcalls where that information
6484 may not be available.) */
6485
6486 static void
6487 ix86_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
6488 const_tree type, bool named)
6489 {
6490 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6491 HOST_WIDE_INT bytes, words;
6492
6493 if (mode == BLKmode)
6494 bytes = int_size_in_bytes (type);
6495 else
6496 bytes = GET_MODE_SIZE (mode);
6497 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6498
6499 if (type)
6500 mode = type_natural_mode (type, NULL);
6501
6502 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6503 function_arg_advance_ms_64 (cum, bytes, words);
6504 else if (TARGET_64BIT)
6505 function_arg_advance_64 (cum, mode, type, words, named);
6506 else
6507 function_arg_advance_32 (cum, mode, type, bytes, words);
6508 }
6509
6510 /* Define where to put the arguments to a function.
6511 Value is zero to push the argument on the stack,
6512 or a hard register in which to store the argument.
6513
6514 MODE is the argument's machine mode.
6515 TYPE is the data type of the argument (as a tree).
6516 This is null for libcalls where that information may
6517 not be available.
6518 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6519 the preceding args and about the function being called.
6520 NAMED is nonzero if this argument is a named parameter
6521 (otherwise it is an extra parameter matching an ellipsis). */
6522
6523 static rtx
6524 function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6525 enum machine_mode orig_mode, const_tree type,
6526 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
6527 {
6528 static bool warnedsse, warnedmmx;
6529
6530 /* Avoid the AL settings for the Unix64 ABI. */
6531 if (mode == VOIDmode)
6532 return constm1_rtx;
6533
6534 switch (mode)
6535 {
6536 default:
6537 break;
6538
6539 case BLKmode:
6540 if (bytes < 0)
6541 break;
6542 /* FALLTHRU */
6543 case DImode:
6544 case SImode:
6545 case HImode:
6546 case QImode:
6547 if (words <= cum->nregs)
6548 {
6549 int regno = cum->regno;
6550
6551 /* Fastcall allocates the first two DWORD (SImode) or
6552 smaller arguments to ECX and EDX if it isn't an
6553 aggregate type . */
6554 if (cum->fastcall)
6555 {
6556 if (mode == BLKmode
6557 || mode == DImode
6558 || (type && AGGREGATE_TYPE_P (type)))
6559 break;
6560
6561 /* ECX not EAX is the first allocated register. */
6562 if (regno == AX_REG)
6563 regno = CX_REG;
6564 }
6565 return gen_rtx_REG (mode, regno);
6566 }
6567 break;
6568
6569 case DFmode:
6570 if (cum->float_in_sse < 2)
6571 break;
6572 case SFmode:
6573 if (cum->float_in_sse < 1)
6574 break;
6575 /* FALLTHRU */
6576 case TImode:
6577 /* In 32bit, we pass TImode in xmm registers. */
6578 case V16QImode:
6579 case V8HImode:
6580 case V4SImode:
6581 case V2DImode:
6582 case V4SFmode:
6583 case V2DFmode:
6584 if (!type || !AGGREGATE_TYPE_P (type))
6585 {
6586 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
6587 {
6588 warnedsse = true;
6589 warning (0, "SSE vector argument without SSE enabled "
6590 "changes the ABI");
6591 }
6592 if (cum->sse_nregs)
6593 return gen_reg_or_parallel (mode, orig_mode,
6594 cum->sse_regno + FIRST_SSE_REG);
6595 }
6596 break;
6597
6598 case OImode:
6599 /* OImode shouldn't be used directly. */
6600 gcc_unreachable ();
6601
6602 case V8SFmode:
6603 case V8SImode:
6604 case V32QImode:
6605 case V16HImode:
6606 case V4DFmode:
6607 case V4DImode:
6608 if (!type || !AGGREGATE_TYPE_P (type))
6609 {
6610 if (cum->sse_nregs)
6611 return gen_reg_or_parallel (mode, orig_mode,
6612 cum->sse_regno + FIRST_SSE_REG);
6613 }
6614 break;
6615
6616 case V8QImode:
6617 case V4HImode:
6618 case V2SImode:
6619 case V2SFmode:
6620 case V1TImode:
6621 case V1DImode:
6622 if (!type || !AGGREGATE_TYPE_P (type))
6623 {
6624 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
6625 {
6626 warnedmmx = true;
6627 warning (0, "MMX vector argument without MMX enabled "
6628 "changes the ABI");
6629 }
6630 if (cum->mmx_nregs)
6631 return gen_reg_or_parallel (mode, orig_mode,
6632 cum->mmx_regno + FIRST_MMX_REG);
6633 }
6634 break;
6635 }
6636
6637 return NULL_RTX;
6638 }
6639
6640 static rtx
6641 function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6642 enum machine_mode orig_mode, const_tree type, bool named)
6643 {
6644 /* Handle a hidden AL argument containing number of registers
6645 for varargs x86-64 functions. */
6646 if (mode == VOIDmode)
6647 return GEN_INT (cum->maybe_vaarg
6648 ? (cum->sse_nregs < 0
6649 ? X86_64_SSE_REGPARM_MAX
6650 : cum->sse_regno)
6651 : -1);
6652
6653 switch (mode)
6654 {
6655 default:
6656 break;
6657
6658 case V8SFmode:
6659 case V8SImode:
6660 case V32QImode:
6661 case V16HImode:
6662 case V4DFmode:
6663 case V4DImode:
6664 /* Unnamed 256bit vector mode parameters are passed on stack. */
6665 if (!named)
6666 return NULL;
6667 break;
6668 }
6669
6670 return construct_container (mode, orig_mode, type, 0, cum->nregs,
6671 cum->sse_nregs,
6672 &x86_64_int_parameter_registers [cum->regno],
6673 cum->sse_regno);
6674 }
6675
6676 static rtx
6677 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6678 enum machine_mode orig_mode, bool named,
6679 HOST_WIDE_INT bytes)
6680 {
6681 unsigned int regno;
6682
6683 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
6684 We use value of -2 to specify that current function call is MSABI. */
6685 if (mode == VOIDmode)
6686 return GEN_INT (-2);
6687
6688 /* If we've run out of registers, it goes on the stack. */
6689 if (cum->nregs == 0)
6690 return NULL_RTX;
6691
6692 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
6693
6694 /* Only floating point modes are passed in anything but integer regs. */
6695 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
6696 {
6697 if (named)
6698 regno = cum->regno + FIRST_SSE_REG;
6699 else
6700 {
6701 rtx t1, t2;
6702
6703 /* Unnamed floating parameters are passed in both the
6704 SSE and integer registers. */
6705 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
6706 t2 = gen_rtx_REG (mode, regno);
6707 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
6708 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
6709 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
6710 }
6711 }
6712 /* Handle aggregated types passed in register. */
6713 if (orig_mode == BLKmode)
6714 {
6715 if (bytes > 0 && bytes <= 8)
6716 mode = (bytes > 4 ? DImode : SImode);
6717 if (mode == BLKmode)
6718 mode = DImode;
6719 }
6720
6721 return gen_reg_or_parallel (mode, orig_mode, regno);
6722 }
6723
6724 /* Return where to put the arguments to a function.
6725 Return zero to push the argument on the stack, or a hard register in which to store the argument.
6726
6727 MODE is the argument's machine mode. TYPE is the data type of the
6728 argument. It is null for libcalls where that information may not be
6729 available. CUM gives information about the preceding args and about
6730 the function being called. NAMED is nonzero if this argument is a
6731 named parameter (otherwise it is an extra parameter matching an
6732 ellipsis). */
6733
6734 static rtx
6735 ix86_function_arg (cumulative_args_t cum_v, enum machine_mode omode,
6736 const_tree type, bool named)
6737 {
6738 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6739 enum machine_mode mode = omode;
6740 HOST_WIDE_INT bytes, words;
6741 rtx arg;
6742
6743 if (mode == BLKmode)
6744 bytes = int_size_in_bytes (type);
6745 else
6746 bytes = GET_MODE_SIZE (mode);
6747 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6748
6749 /* To simplify the code below, represent vector types with a vector mode
6750 even if MMX/SSE are not active. */
6751 if (type && TREE_CODE (type) == VECTOR_TYPE)
6752 mode = type_natural_mode (type, cum);
6753
6754 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6755 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
6756 else if (TARGET_64BIT)
6757 arg = function_arg_64 (cum, mode, omode, type, named);
6758 else
6759 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
6760
6761 if (TARGET_VZEROUPPER && function_pass_avx256_p (arg))
6762 {
6763 /* This argument uses 256bit AVX modes. */
6764 if (cum->caller)
6765 cfun->machine->callee_pass_avx256_p = true;
6766 else
6767 cfun->machine->caller_pass_avx256_p = true;
6768 }
6769
6770 return arg;
6771 }
6772
6773 /* A C expression that indicates when an argument must be passed by
6774 reference. If nonzero for an argument, a copy of that argument is
6775 made in memory and a pointer to the argument is passed instead of
6776 the argument itself. The pointer is passed in whatever way is
6777 appropriate for passing a pointer to that type. */
6778
6779 static bool
6780 ix86_pass_by_reference (cumulative_args_t cum_v ATTRIBUTE_UNUSED,
6781 enum machine_mode mode ATTRIBUTE_UNUSED,
6782 const_tree type, bool named ATTRIBUTE_UNUSED)
6783 {
6784 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6785
6786 /* See Windows x64 Software Convention. */
6787 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6788 {
6789 int msize = (int) GET_MODE_SIZE (mode);
6790 if (type)
6791 {
6792 /* Arrays are passed by reference. */
6793 if (TREE_CODE (type) == ARRAY_TYPE)
6794 return true;
6795
6796 if (AGGREGATE_TYPE_P (type))
6797 {
6798 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
6799 are passed by reference. */
6800 msize = int_size_in_bytes (type);
6801 }
6802 }
6803
6804 /* __m128 is passed by reference. */
6805 switch (msize) {
6806 case 1: case 2: case 4: case 8:
6807 break;
6808 default:
6809 return true;
6810 }
6811 }
6812 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
6813 return 1;
6814
6815 return 0;
6816 }
6817
6818 /* Return true when TYPE should be 128bit aligned for 32bit argument
6819 passing ABI. XXX: This function is obsolete and is only used for
6820 checking psABI compatibility with previous versions of GCC. */
6821
6822 static bool
6823 ix86_compat_aligned_value_p (const_tree type)
6824 {
6825 enum machine_mode mode = TYPE_MODE (type);
6826 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
6827 || mode == TDmode
6828 || mode == TFmode
6829 || mode == TCmode)
6830 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
6831 return true;
6832 if (TYPE_ALIGN (type) < 128)
6833 return false;
6834
6835 if (AGGREGATE_TYPE_P (type))
6836 {
6837 /* Walk the aggregates recursively. */
6838 switch (TREE_CODE (type))
6839 {
6840 case RECORD_TYPE:
6841 case UNION_TYPE:
6842 case QUAL_UNION_TYPE:
6843 {
6844 tree field;
6845
6846 /* Walk all the structure fields. */
6847 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6848 {
6849 if (TREE_CODE (field) == FIELD_DECL
6850 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
6851 return true;
6852 }
6853 break;
6854 }
6855
6856 case ARRAY_TYPE:
6857 /* Just for use if some languages passes arrays by value. */
6858 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
6859 return true;
6860 break;
6861
6862 default:
6863 gcc_unreachable ();
6864 }
6865 }
6866 return false;
6867 }
6868
6869 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
6870 XXX: This function is obsolete and is only used for checking psABI
6871 compatibility with previous versions of GCC. */
6872
6873 static unsigned int
6874 ix86_compat_function_arg_boundary (enum machine_mode mode,
6875 const_tree type, unsigned int align)
6876 {
6877 /* In 32bit, only _Decimal128 and __float128 are aligned to their
6878 natural boundaries. */
6879 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
6880 {
6881 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
6882 make an exception for SSE modes since these require 128bit
6883 alignment.
6884
6885 The handling here differs from field_alignment. ICC aligns MMX
6886 arguments to 4 byte boundaries, while structure fields are aligned
6887 to 8 byte boundaries. */
6888 if (!type)
6889 {
6890 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
6891 align = PARM_BOUNDARY;
6892 }
6893 else
6894 {
6895 if (!ix86_compat_aligned_value_p (type))
6896 align = PARM_BOUNDARY;
6897 }
6898 }
6899 if (align > BIGGEST_ALIGNMENT)
6900 align = BIGGEST_ALIGNMENT;
6901 return align;
6902 }
6903
6904 /* Return true when TYPE should be 128bit aligned for 32bit argument
6905 passing ABI. */
6906
6907 static bool
6908 ix86_contains_aligned_value_p (const_tree type)
6909 {
6910 enum machine_mode mode = TYPE_MODE (type);
6911
6912 if (mode == XFmode || mode == XCmode)
6913 return false;
6914
6915 if (TYPE_ALIGN (type) < 128)
6916 return false;
6917
6918 if (AGGREGATE_TYPE_P (type))
6919 {
6920 /* Walk the aggregates recursively. */
6921 switch (TREE_CODE (type))
6922 {
6923 case RECORD_TYPE:
6924 case UNION_TYPE:
6925 case QUAL_UNION_TYPE:
6926 {
6927 tree field;
6928
6929 /* Walk all the structure fields. */
6930 for (field = TYPE_FIELDS (type);
6931 field;
6932 field = DECL_CHAIN (field))
6933 {
6934 if (TREE_CODE (field) == FIELD_DECL
6935 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
6936 return true;
6937 }
6938 break;
6939 }
6940
6941 case ARRAY_TYPE:
6942 /* Just for use if some languages passes arrays by value. */
6943 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
6944 return true;
6945 break;
6946
6947 default:
6948 gcc_unreachable ();
6949 }
6950 }
6951 else
6952 return TYPE_ALIGN (type) >= 128;
6953
6954 return false;
6955 }
6956
6957 /* Gives the alignment boundary, in bits, of an argument with the
6958 specified mode and type. */
6959
6960 static unsigned int
6961 ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
6962 {
6963 unsigned int align;
6964 if (type)
6965 {
6966 /* Since the main variant type is used for call, we convert it to
6967 the main variant type. */
6968 type = TYPE_MAIN_VARIANT (type);
6969 align = TYPE_ALIGN (type);
6970 }
6971 else
6972 align = GET_MODE_ALIGNMENT (mode);
6973 if (align < PARM_BOUNDARY)
6974 align = PARM_BOUNDARY;
6975 else
6976 {
6977 static bool warned;
6978 unsigned int saved_align = align;
6979
6980 if (!TARGET_64BIT)
6981 {
6982 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
6983 if (!type)
6984 {
6985 if (mode == XFmode || mode == XCmode)
6986 align = PARM_BOUNDARY;
6987 }
6988 else if (!ix86_contains_aligned_value_p (type))
6989 align = PARM_BOUNDARY;
6990
6991 if (align < 128)
6992 align = PARM_BOUNDARY;
6993 }
6994
6995 if (warn_psabi
6996 && !warned
6997 && align != ix86_compat_function_arg_boundary (mode, type,
6998 saved_align))
6999 {
7000 warned = true;
7001 inform (input_location,
7002 "The ABI for passing parameters with %d-byte"
7003 " alignment has changed in GCC 4.6",
7004 align / BITS_PER_UNIT);
7005 }
7006 }
7007
7008 return align;
7009 }
7010
7011 /* Return true if N is a possible register number of function value. */
7012
7013 static bool
7014 ix86_function_value_regno_p (const unsigned int regno)
7015 {
7016 switch (regno)
7017 {
7018 case AX_REG:
7019 return true;
7020
7021 case FIRST_FLOAT_REG:
7022 /* TODO: The function should depend on current function ABI but
7023 builtins.c would need updating then. Therefore we use the
7024 default ABI. */
7025 if (TARGET_64BIT && ix86_abi == MS_ABI)
7026 return false;
7027 return TARGET_FLOAT_RETURNS_IN_80387;
7028
7029 case FIRST_SSE_REG:
7030 return TARGET_SSE;
7031
7032 case FIRST_MMX_REG:
7033 if (TARGET_MACHO || TARGET_64BIT)
7034 return false;
7035 return TARGET_MMX;
7036 }
7037
7038 return false;
7039 }
7040
7041 /* Define how to find the value returned by a function.
7042 VALTYPE is the data type of the value (as a tree).
7043 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7044 otherwise, FUNC is 0. */
7045
7046 static rtx
7047 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
7048 const_tree fntype, const_tree fn)
7049 {
7050 unsigned int regno;
7051
7052 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7053 we normally prevent this case when mmx is not available. However
7054 some ABIs may require the result to be returned like DImode. */
7055 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7056 regno = FIRST_MMX_REG;
7057
7058 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7059 we prevent this case when sse is not available. However some ABIs
7060 may require the result to be returned like integer TImode. */
7061 else if (mode == TImode
7062 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7063 regno = FIRST_SSE_REG;
7064
7065 /* 32-byte vector modes in %ymm0. */
7066 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
7067 regno = FIRST_SSE_REG;
7068
7069 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7070 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
7071 regno = FIRST_FLOAT_REG;
7072 else
7073 /* Most things go in %eax. */
7074 regno = AX_REG;
7075
7076 /* Override FP return register with %xmm0 for local functions when
7077 SSE math is enabled or for functions with sseregparm attribute. */
7078 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
7079 {
7080 int sse_level = ix86_function_sseregparm (fntype, fn, false);
7081 if ((sse_level >= 1 && mode == SFmode)
7082 || (sse_level == 2 && mode == DFmode))
7083 regno = FIRST_SSE_REG;
7084 }
7085
7086 /* OImode shouldn't be used directly. */
7087 gcc_assert (mode != OImode);
7088
7089 return gen_rtx_REG (orig_mode, regno);
7090 }
7091
7092 static rtx
7093 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
7094 const_tree valtype)
7095 {
7096 rtx ret;
7097
7098 /* Handle libcalls, which don't provide a type node. */
7099 if (valtype == NULL)
7100 {
7101 unsigned int regno;
7102
7103 switch (mode)
7104 {
7105 case SFmode:
7106 case SCmode:
7107 case DFmode:
7108 case DCmode:
7109 case TFmode:
7110 case SDmode:
7111 case DDmode:
7112 case TDmode:
7113 regno = FIRST_SSE_REG;
7114 break;
7115 case XFmode:
7116 case XCmode:
7117 regno = FIRST_FLOAT_REG;
7118 break;
7119 case TCmode:
7120 return NULL;
7121 default:
7122 regno = AX_REG;
7123 }
7124
7125 return gen_rtx_REG (mode, regno);
7126 }
7127 else if (POINTER_TYPE_P (valtype))
7128 {
7129 /* Pointers are always returned in Pmode. */
7130 mode = Pmode;
7131 }
7132
7133 ret = construct_container (mode, orig_mode, valtype, 1,
7134 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
7135 x86_64_int_return_registers, 0);
7136
7137 /* For zero sized structures, construct_container returns NULL, but we
7138 need to keep rest of compiler happy by returning meaningful value. */
7139 if (!ret)
7140 ret = gen_rtx_REG (orig_mode, AX_REG);
7141
7142 return ret;
7143 }
7144
7145 static rtx
7146 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
7147 {
7148 unsigned int regno = AX_REG;
7149
7150 if (TARGET_SSE)
7151 {
7152 switch (GET_MODE_SIZE (mode))
7153 {
7154 case 16:
7155 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7156 && !COMPLEX_MODE_P (mode))
7157 regno = FIRST_SSE_REG;
7158 break;
7159 case 8:
7160 case 4:
7161 if (mode == SFmode || mode == DFmode)
7162 regno = FIRST_SSE_REG;
7163 break;
7164 default:
7165 break;
7166 }
7167 }
7168 return gen_rtx_REG (orig_mode, regno);
7169 }
7170
7171 static rtx
7172 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
7173 enum machine_mode orig_mode, enum machine_mode mode)
7174 {
7175 const_tree fn, fntype;
7176
7177 fn = NULL_TREE;
7178 if (fntype_or_decl && DECL_P (fntype_or_decl))
7179 fn = fntype_or_decl;
7180 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
7181
7182 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
7183 return function_value_ms_64 (orig_mode, mode);
7184 else if (TARGET_64BIT)
7185 return function_value_64 (orig_mode, mode, valtype);
7186 else
7187 return function_value_32 (orig_mode, mode, fntype, fn);
7188 }
7189
7190 static rtx
7191 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
7192 bool outgoing ATTRIBUTE_UNUSED)
7193 {
7194 enum machine_mode mode, orig_mode;
7195
7196 orig_mode = TYPE_MODE (valtype);
7197 mode = type_natural_mode (valtype, NULL);
7198 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
7199 }
7200
7201 /* Pointer function arguments and return values are promoted to Pmode. */
7202
7203 static enum machine_mode
7204 ix86_promote_function_mode (const_tree type, enum machine_mode mode,
7205 int *punsignedp, const_tree fntype,
7206 int for_return)
7207 {
7208 if (type != NULL_TREE && POINTER_TYPE_P (type))
7209 {
7210 *punsignedp = POINTERS_EXTEND_UNSIGNED;
7211 return Pmode;
7212 }
7213 return default_promote_function_mode (type, mode, punsignedp, fntype,
7214 for_return);
7215 }
7216
7217 rtx
7218 ix86_libcall_value (enum machine_mode mode)
7219 {
7220 return ix86_function_value_1 (NULL, NULL, mode, mode);
7221 }
7222
7223 /* Return true iff type is returned in memory. */
7224
7225 static bool ATTRIBUTE_UNUSED
7226 return_in_memory_32 (const_tree type, enum machine_mode mode)
7227 {
7228 HOST_WIDE_INT size;
7229
7230 if (mode == BLKmode)
7231 return true;
7232
7233 size = int_size_in_bytes (type);
7234
7235 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
7236 return false;
7237
7238 if (VECTOR_MODE_P (mode) || mode == TImode)
7239 {
7240 /* User-created vectors small enough to fit in EAX. */
7241 if (size < 8)
7242 return false;
7243
7244 /* MMX/3dNow values are returned in MM0,
7245 except when it doesn't exits or the ABI prescribes otherwise. */
7246 if (size == 8)
7247 return !TARGET_MMX || TARGET_VECT8_RETURNS;
7248
7249 /* SSE values are returned in XMM0, except when it doesn't exist. */
7250 if (size == 16)
7251 return !TARGET_SSE;
7252
7253 /* AVX values are returned in YMM0, except when it doesn't exist. */
7254 if (size == 32)
7255 return !TARGET_AVX;
7256 }
7257
7258 if (mode == XFmode)
7259 return false;
7260
7261 if (size > 12)
7262 return true;
7263
7264 /* OImode shouldn't be used directly. */
7265 gcc_assert (mode != OImode);
7266
7267 return false;
7268 }
7269
7270 static bool ATTRIBUTE_UNUSED
7271 return_in_memory_64 (const_tree type, enum machine_mode mode)
7272 {
7273 int needed_intregs, needed_sseregs;
7274 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
7275 }
7276
7277 static bool ATTRIBUTE_UNUSED
7278 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
7279 {
7280 HOST_WIDE_INT size = int_size_in_bytes (type);
7281
7282 /* __m128 is returned in xmm0. */
7283 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7284 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
7285 return false;
7286
7287 /* Otherwise, the size must be exactly in [1248]. */
7288 return size != 1 && size != 2 && size != 4 && size != 8;
7289 }
7290
7291 static bool
7292 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7293 {
7294 #ifdef SUBTARGET_RETURN_IN_MEMORY
7295 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
7296 #else
7297 const enum machine_mode mode = type_natural_mode (type, NULL);
7298
7299 if (TARGET_64BIT)
7300 {
7301 if (ix86_function_type_abi (fntype) == MS_ABI)
7302 return return_in_memory_ms_64 (type, mode);
7303 else
7304 return return_in_memory_64 (type, mode);
7305 }
7306 else
7307 return return_in_memory_32 (type, mode);
7308 #endif
7309 }
7310
7311 /* When returning SSE vector types, we have a choice of either
7312 (1) being abi incompatible with a -march switch, or
7313 (2) generating an error.
7314 Given no good solution, I think the safest thing is one warning.
7315 The user won't be able to use -Werror, but....
7316
7317 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
7318 called in response to actually generating a caller or callee that
7319 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
7320 via aggregate_value_p for general type probing from tree-ssa. */
7321
7322 static rtx
7323 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
7324 {
7325 static bool warnedsse, warnedmmx;
7326
7327 if (!TARGET_64BIT && type)
7328 {
7329 /* Look at the return type of the function, not the function type. */
7330 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
7331
7332 if (!TARGET_SSE && !warnedsse)
7333 {
7334 if (mode == TImode
7335 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7336 {
7337 warnedsse = true;
7338 warning (0, "SSE vector return without SSE enabled "
7339 "changes the ABI");
7340 }
7341 }
7342
7343 if (!TARGET_MMX && !warnedmmx)
7344 {
7345 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7346 {
7347 warnedmmx = true;
7348 warning (0, "MMX vector return without MMX enabled "
7349 "changes the ABI");
7350 }
7351 }
7352 }
7353
7354 return NULL;
7355 }
7356
7357 \f
7358 /* Create the va_list data type. */
7359
7360 /* Returns the calling convention specific va_list date type.
7361 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
7362
7363 static tree
7364 ix86_build_builtin_va_list_abi (enum calling_abi abi)
7365 {
7366 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
7367
7368 /* For i386 we use plain pointer to argument area. */
7369 if (!TARGET_64BIT || abi == MS_ABI)
7370 return build_pointer_type (char_type_node);
7371
7372 record = lang_hooks.types.make_type (RECORD_TYPE);
7373 type_decl = build_decl (BUILTINS_LOCATION,
7374 TYPE_DECL, get_identifier ("__va_list_tag"), record);
7375
7376 f_gpr = build_decl (BUILTINS_LOCATION,
7377 FIELD_DECL, get_identifier ("gp_offset"),
7378 unsigned_type_node);
7379 f_fpr = build_decl (BUILTINS_LOCATION,
7380 FIELD_DECL, get_identifier ("fp_offset"),
7381 unsigned_type_node);
7382 f_ovf = build_decl (BUILTINS_LOCATION,
7383 FIELD_DECL, get_identifier ("overflow_arg_area"),
7384 ptr_type_node);
7385 f_sav = build_decl (BUILTINS_LOCATION,
7386 FIELD_DECL, get_identifier ("reg_save_area"),
7387 ptr_type_node);
7388
7389 va_list_gpr_counter_field = f_gpr;
7390 va_list_fpr_counter_field = f_fpr;
7391
7392 DECL_FIELD_CONTEXT (f_gpr) = record;
7393 DECL_FIELD_CONTEXT (f_fpr) = record;
7394 DECL_FIELD_CONTEXT (f_ovf) = record;
7395 DECL_FIELD_CONTEXT (f_sav) = record;
7396
7397 TYPE_STUB_DECL (record) = type_decl;
7398 TYPE_NAME (record) = type_decl;
7399 TYPE_FIELDS (record) = f_gpr;
7400 DECL_CHAIN (f_gpr) = f_fpr;
7401 DECL_CHAIN (f_fpr) = f_ovf;
7402 DECL_CHAIN (f_ovf) = f_sav;
7403
7404 layout_type (record);
7405
7406 /* The correct type is an array type of one element. */
7407 return build_array_type (record, build_index_type (size_zero_node));
7408 }
7409
7410 /* Setup the builtin va_list data type and for 64-bit the additional
7411 calling convention specific va_list data types. */
7412
7413 static tree
7414 ix86_build_builtin_va_list (void)
7415 {
7416 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
7417
7418 /* Initialize abi specific va_list builtin types. */
7419 if (TARGET_64BIT)
7420 {
7421 tree t;
7422 if (ix86_abi == MS_ABI)
7423 {
7424 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
7425 if (TREE_CODE (t) != RECORD_TYPE)
7426 t = build_variant_type_copy (t);
7427 sysv_va_list_type_node = t;
7428 }
7429 else
7430 {
7431 t = ret;
7432 if (TREE_CODE (t) != RECORD_TYPE)
7433 t = build_variant_type_copy (t);
7434 sysv_va_list_type_node = t;
7435 }
7436 if (ix86_abi != MS_ABI)
7437 {
7438 t = ix86_build_builtin_va_list_abi (MS_ABI);
7439 if (TREE_CODE (t) != RECORD_TYPE)
7440 t = build_variant_type_copy (t);
7441 ms_va_list_type_node = t;
7442 }
7443 else
7444 {
7445 t = ret;
7446 if (TREE_CODE (t) != RECORD_TYPE)
7447 t = build_variant_type_copy (t);
7448 ms_va_list_type_node = t;
7449 }
7450 }
7451
7452 return ret;
7453 }
7454
7455 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
7456
7457 static void
7458 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
7459 {
7460 rtx save_area, mem;
7461 alias_set_type set;
7462 int i, max;
7463
7464 /* GPR size of varargs save area. */
7465 if (cfun->va_list_gpr_size)
7466 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
7467 else
7468 ix86_varargs_gpr_size = 0;
7469
7470 /* FPR size of varargs save area. We don't need it if we don't pass
7471 anything in SSE registers. */
7472 if (TARGET_SSE && cfun->va_list_fpr_size)
7473 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
7474 else
7475 ix86_varargs_fpr_size = 0;
7476
7477 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
7478 return;
7479
7480 save_area = frame_pointer_rtx;
7481 set = get_varargs_alias_set ();
7482
7483 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
7484 if (max > X86_64_REGPARM_MAX)
7485 max = X86_64_REGPARM_MAX;
7486
7487 for (i = cum->regno; i < max; i++)
7488 {
7489 mem = gen_rtx_MEM (Pmode,
7490 plus_constant (save_area, i * UNITS_PER_WORD));
7491 MEM_NOTRAP_P (mem) = 1;
7492 set_mem_alias_set (mem, set);
7493 emit_move_insn (mem, gen_rtx_REG (Pmode,
7494 x86_64_int_parameter_registers[i]));
7495 }
7496
7497 if (ix86_varargs_fpr_size)
7498 {
7499 enum machine_mode smode;
7500 rtx label, test;
7501
7502 /* Now emit code to save SSE registers. The AX parameter contains number
7503 of SSE parameter registers used to call this function, though all we
7504 actually check here is the zero/non-zero status. */
7505
7506 label = gen_label_rtx ();
7507 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
7508 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
7509 label));
7510
7511 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
7512 we used movdqa (i.e. TImode) instead? Perhaps even better would
7513 be if we could determine the real mode of the data, via a hook
7514 into pass_stdarg. Ignore all that for now. */
7515 smode = V4SFmode;
7516 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
7517 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
7518
7519 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
7520 if (max > X86_64_SSE_REGPARM_MAX)
7521 max = X86_64_SSE_REGPARM_MAX;
7522
7523 for (i = cum->sse_regno; i < max; ++i)
7524 {
7525 mem = plus_constant (save_area, i * 16 + ix86_varargs_gpr_size);
7526 mem = gen_rtx_MEM (smode, mem);
7527 MEM_NOTRAP_P (mem) = 1;
7528 set_mem_alias_set (mem, set);
7529 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
7530
7531 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
7532 }
7533
7534 emit_label (label);
7535 }
7536 }
7537
7538 static void
7539 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
7540 {
7541 alias_set_type set = get_varargs_alias_set ();
7542 int i;
7543
7544 /* Reset to zero, as there might be a sysv vaarg used
7545 before. */
7546 ix86_varargs_gpr_size = 0;
7547 ix86_varargs_fpr_size = 0;
7548
7549 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
7550 {
7551 rtx reg, mem;
7552
7553 mem = gen_rtx_MEM (Pmode,
7554 plus_constant (virtual_incoming_args_rtx,
7555 i * UNITS_PER_WORD));
7556 MEM_NOTRAP_P (mem) = 1;
7557 set_mem_alias_set (mem, set);
7558
7559 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
7560 emit_move_insn (mem, reg);
7561 }
7562 }
7563
7564 static void
7565 ix86_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
7566 tree type, int *pretend_size ATTRIBUTE_UNUSED,
7567 int no_rtl)
7568 {
7569 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7570 CUMULATIVE_ARGS next_cum;
7571 tree fntype;
7572
7573 /* This argument doesn't appear to be used anymore. Which is good,
7574 because the old code here didn't suppress rtl generation. */
7575 gcc_assert (!no_rtl);
7576
7577 if (!TARGET_64BIT)
7578 return;
7579
7580 fntype = TREE_TYPE (current_function_decl);
7581
7582 /* For varargs, we do not want to skip the dummy va_dcl argument.
7583 For stdargs, we do want to skip the last named argument. */
7584 next_cum = *cum;
7585 if (stdarg_p (fntype))
7586 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
7587 true);
7588
7589 if (cum->call_abi == MS_ABI)
7590 setup_incoming_varargs_ms_64 (&next_cum);
7591 else
7592 setup_incoming_varargs_64 (&next_cum);
7593 }
7594
7595 /* Checks if TYPE is of kind va_list char *. */
7596
7597 static bool
7598 is_va_list_char_pointer (tree type)
7599 {
7600 tree canonic;
7601
7602 /* For 32-bit it is always true. */
7603 if (!TARGET_64BIT)
7604 return true;
7605 canonic = ix86_canonical_va_list_type (type);
7606 return (canonic == ms_va_list_type_node
7607 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
7608 }
7609
7610 /* Implement va_start. */
7611
7612 static void
7613 ix86_va_start (tree valist, rtx nextarg)
7614 {
7615 HOST_WIDE_INT words, n_gpr, n_fpr;
7616 tree f_gpr, f_fpr, f_ovf, f_sav;
7617 tree gpr, fpr, ovf, sav, t;
7618 tree type;
7619 rtx ovf_rtx;
7620
7621 if (flag_split_stack
7622 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7623 {
7624 unsigned int scratch_regno;
7625
7626 /* When we are splitting the stack, we can't refer to the stack
7627 arguments using internal_arg_pointer, because they may be on
7628 the old stack. The split stack prologue will arrange to
7629 leave a pointer to the old stack arguments in a scratch
7630 register, which we here copy to a pseudo-register. The split
7631 stack prologue can't set the pseudo-register directly because
7632 it (the prologue) runs before any registers have been saved. */
7633
7634 scratch_regno = split_stack_prologue_scratch_regno ();
7635 if (scratch_regno != INVALID_REGNUM)
7636 {
7637 rtx reg, seq;
7638
7639 reg = gen_reg_rtx (Pmode);
7640 cfun->machine->split_stack_varargs_pointer = reg;
7641
7642 start_sequence ();
7643 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
7644 seq = get_insns ();
7645 end_sequence ();
7646
7647 push_topmost_sequence ();
7648 emit_insn_after (seq, entry_of_function ());
7649 pop_topmost_sequence ();
7650 }
7651 }
7652
7653 /* Only 64bit target needs something special. */
7654 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7655 {
7656 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7657 std_expand_builtin_va_start (valist, nextarg);
7658 else
7659 {
7660 rtx va_r, next;
7661
7662 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
7663 next = expand_binop (ptr_mode, add_optab,
7664 cfun->machine->split_stack_varargs_pointer,
7665 crtl->args.arg_offset_rtx,
7666 NULL_RTX, 0, OPTAB_LIB_WIDEN);
7667 convert_move (va_r, next, 0);
7668 }
7669 return;
7670 }
7671
7672 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7673 f_fpr = DECL_CHAIN (f_gpr);
7674 f_ovf = DECL_CHAIN (f_fpr);
7675 f_sav = DECL_CHAIN (f_ovf);
7676
7677 valist = build_simple_mem_ref (valist);
7678 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
7679 /* The following should be folded into the MEM_REF offset. */
7680 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
7681 f_gpr, NULL_TREE);
7682 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
7683 f_fpr, NULL_TREE);
7684 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
7685 f_ovf, NULL_TREE);
7686 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
7687 f_sav, NULL_TREE);
7688
7689 /* Count number of gp and fp argument registers used. */
7690 words = crtl->args.info.words;
7691 n_gpr = crtl->args.info.regno;
7692 n_fpr = crtl->args.info.sse_regno;
7693
7694 if (cfun->va_list_gpr_size)
7695 {
7696 type = TREE_TYPE (gpr);
7697 t = build2 (MODIFY_EXPR, type,
7698 gpr, build_int_cst (type, n_gpr * 8));
7699 TREE_SIDE_EFFECTS (t) = 1;
7700 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7701 }
7702
7703 if (TARGET_SSE && cfun->va_list_fpr_size)
7704 {
7705 type = TREE_TYPE (fpr);
7706 t = build2 (MODIFY_EXPR, type, fpr,
7707 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
7708 TREE_SIDE_EFFECTS (t) = 1;
7709 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7710 }
7711
7712 /* Find the overflow area. */
7713 type = TREE_TYPE (ovf);
7714 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7715 ovf_rtx = crtl->args.internal_arg_pointer;
7716 else
7717 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
7718 t = make_tree (type, ovf_rtx);
7719 if (words != 0)
7720 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
7721 t = build2 (MODIFY_EXPR, type, ovf, t);
7722 TREE_SIDE_EFFECTS (t) = 1;
7723 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7724
7725 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
7726 {
7727 /* Find the register save area.
7728 Prologue of the function save it right above stack frame. */
7729 type = TREE_TYPE (sav);
7730 t = make_tree (type, frame_pointer_rtx);
7731 if (!ix86_varargs_gpr_size)
7732 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
7733 t = build2 (MODIFY_EXPR, type, sav, t);
7734 TREE_SIDE_EFFECTS (t) = 1;
7735 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7736 }
7737 }
7738
7739 /* Implement va_arg. */
7740
7741 static tree
7742 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7743 gimple_seq *post_p)
7744 {
7745 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
7746 tree f_gpr, f_fpr, f_ovf, f_sav;
7747 tree gpr, fpr, ovf, sav, t;
7748 int size, rsize;
7749 tree lab_false, lab_over = NULL_TREE;
7750 tree addr, t2;
7751 rtx container;
7752 int indirect_p = 0;
7753 tree ptrtype;
7754 enum machine_mode nat_mode;
7755 unsigned int arg_boundary;
7756
7757 /* Only 64bit target needs something special. */
7758 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7759 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
7760
7761 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7762 f_fpr = DECL_CHAIN (f_gpr);
7763 f_ovf = DECL_CHAIN (f_fpr);
7764 f_sav = DECL_CHAIN (f_ovf);
7765
7766 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
7767 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
7768 valist = build_va_arg_indirect_ref (valist);
7769 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
7770 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
7771 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
7772
7773 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
7774 if (indirect_p)
7775 type = build_pointer_type (type);
7776 size = int_size_in_bytes (type);
7777 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7778
7779 nat_mode = type_natural_mode (type, NULL);
7780 switch (nat_mode)
7781 {
7782 case V8SFmode:
7783 case V8SImode:
7784 case V32QImode:
7785 case V16HImode:
7786 case V4DFmode:
7787 case V4DImode:
7788 /* Unnamed 256bit vector mode parameters are passed on stack. */
7789 if (!TARGET_64BIT_MS_ABI)
7790 {
7791 container = NULL;
7792 break;
7793 }
7794
7795 default:
7796 container = construct_container (nat_mode, TYPE_MODE (type),
7797 type, 0, X86_64_REGPARM_MAX,
7798 X86_64_SSE_REGPARM_MAX, intreg,
7799 0);
7800 break;
7801 }
7802
7803 /* Pull the value out of the saved registers. */
7804
7805 addr = create_tmp_var (ptr_type_node, "addr");
7806
7807 if (container)
7808 {
7809 int needed_intregs, needed_sseregs;
7810 bool need_temp;
7811 tree int_addr, sse_addr;
7812
7813 lab_false = create_artificial_label (UNKNOWN_LOCATION);
7814 lab_over = create_artificial_label (UNKNOWN_LOCATION);
7815
7816 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
7817
7818 need_temp = (!REG_P (container)
7819 && ((needed_intregs && TYPE_ALIGN (type) > 64)
7820 || TYPE_ALIGN (type) > 128));
7821
7822 /* In case we are passing structure, verify that it is consecutive block
7823 on the register save area. If not we need to do moves. */
7824 if (!need_temp && !REG_P (container))
7825 {
7826 /* Verify that all registers are strictly consecutive */
7827 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
7828 {
7829 int i;
7830
7831 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7832 {
7833 rtx slot = XVECEXP (container, 0, i);
7834 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
7835 || INTVAL (XEXP (slot, 1)) != i * 16)
7836 need_temp = 1;
7837 }
7838 }
7839 else
7840 {
7841 int i;
7842
7843 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7844 {
7845 rtx slot = XVECEXP (container, 0, i);
7846 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
7847 || INTVAL (XEXP (slot, 1)) != i * 8)
7848 need_temp = 1;
7849 }
7850 }
7851 }
7852 if (!need_temp)
7853 {
7854 int_addr = addr;
7855 sse_addr = addr;
7856 }
7857 else
7858 {
7859 int_addr = create_tmp_var (ptr_type_node, "int_addr");
7860 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
7861 }
7862
7863 /* First ensure that we fit completely in registers. */
7864 if (needed_intregs)
7865 {
7866 t = build_int_cst (TREE_TYPE (gpr),
7867 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
7868 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
7869 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7870 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7871 gimplify_and_add (t, pre_p);
7872 }
7873 if (needed_sseregs)
7874 {
7875 t = build_int_cst (TREE_TYPE (fpr),
7876 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
7877 + X86_64_REGPARM_MAX * 8);
7878 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
7879 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7880 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7881 gimplify_and_add (t, pre_p);
7882 }
7883
7884 /* Compute index to start of area used for integer regs. */
7885 if (needed_intregs)
7886 {
7887 /* int_addr = gpr + sav; */
7888 t = fold_build_pointer_plus (sav, gpr);
7889 gimplify_assign (int_addr, t, pre_p);
7890 }
7891 if (needed_sseregs)
7892 {
7893 /* sse_addr = fpr + sav; */
7894 t = fold_build_pointer_plus (sav, fpr);
7895 gimplify_assign (sse_addr, t, pre_p);
7896 }
7897 if (need_temp)
7898 {
7899 int i, prev_size = 0;
7900 tree temp = create_tmp_var (type, "va_arg_tmp");
7901
7902 /* addr = &temp; */
7903 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
7904 gimplify_assign (addr, t, pre_p);
7905
7906 for (i = 0; i < XVECLEN (container, 0); i++)
7907 {
7908 rtx slot = XVECEXP (container, 0, i);
7909 rtx reg = XEXP (slot, 0);
7910 enum machine_mode mode = GET_MODE (reg);
7911 tree piece_type;
7912 tree addr_type;
7913 tree daddr_type;
7914 tree src_addr, src;
7915 int src_offset;
7916 tree dest_addr, dest;
7917 int cur_size = GET_MODE_SIZE (mode);
7918
7919 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
7920 prev_size = INTVAL (XEXP (slot, 1));
7921 if (prev_size + cur_size > size)
7922 {
7923 cur_size = size - prev_size;
7924 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
7925 if (mode == BLKmode)
7926 mode = QImode;
7927 }
7928 piece_type = lang_hooks.types.type_for_mode (mode, 1);
7929 if (mode == GET_MODE (reg))
7930 addr_type = build_pointer_type (piece_type);
7931 else
7932 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
7933 true);
7934 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
7935 true);
7936
7937 if (SSE_REGNO_P (REGNO (reg)))
7938 {
7939 src_addr = sse_addr;
7940 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
7941 }
7942 else
7943 {
7944 src_addr = int_addr;
7945 src_offset = REGNO (reg) * 8;
7946 }
7947 src_addr = fold_convert (addr_type, src_addr);
7948 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
7949
7950 dest_addr = fold_convert (daddr_type, addr);
7951 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
7952 if (cur_size == GET_MODE_SIZE (mode))
7953 {
7954 src = build_va_arg_indirect_ref (src_addr);
7955 dest = build_va_arg_indirect_ref (dest_addr);
7956
7957 gimplify_assign (dest, src, pre_p);
7958 }
7959 else
7960 {
7961 tree copy
7962 = build_call_expr (implicit_built_in_decls[BUILT_IN_MEMCPY],
7963 3, dest_addr, src_addr,
7964 size_int (cur_size));
7965 gimplify_and_add (copy, pre_p);
7966 }
7967 prev_size += cur_size;
7968 }
7969 }
7970
7971 if (needed_intregs)
7972 {
7973 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
7974 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
7975 gimplify_assign (gpr, t, pre_p);
7976 }
7977
7978 if (needed_sseregs)
7979 {
7980 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
7981 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
7982 gimplify_assign (fpr, t, pre_p);
7983 }
7984
7985 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
7986
7987 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
7988 }
7989
7990 /* ... otherwise out of the overflow area. */
7991
7992 /* When we align parameter on stack for caller, if the parameter
7993 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
7994 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
7995 here with caller. */
7996 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
7997 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
7998 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
7999
8000 /* Care for on-stack alignment if needed. */
8001 if (arg_boundary <= 64 || size == 0)
8002 t = ovf;
8003 else
8004 {
8005 HOST_WIDE_INT align = arg_boundary / 8;
8006 t = fold_build_pointer_plus_hwi (ovf, align - 1);
8007 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
8008 build_int_cst (TREE_TYPE (t), -align));
8009 }
8010
8011 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
8012 gimplify_assign (addr, t, pre_p);
8013
8014 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
8015 gimplify_assign (unshare_expr (ovf), t, pre_p);
8016
8017 if (container)
8018 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
8019
8020 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
8021 addr = fold_convert (ptrtype, addr);
8022
8023 if (indirect_p)
8024 addr = build_va_arg_indirect_ref (addr);
8025 return build_va_arg_indirect_ref (addr);
8026 }
8027 \f
8028 /* Return true if OPNUM's MEM should be matched
8029 in movabs* patterns. */
8030
8031 bool
8032 ix86_check_movabs (rtx insn, int opnum)
8033 {
8034 rtx set, mem;
8035
8036 set = PATTERN (insn);
8037 if (GET_CODE (set) == PARALLEL)
8038 set = XVECEXP (set, 0, 0);
8039 gcc_assert (GET_CODE (set) == SET);
8040 mem = XEXP (set, opnum);
8041 while (GET_CODE (mem) == SUBREG)
8042 mem = SUBREG_REG (mem);
8043 gcc_assert (MEM_P (mem));
8044 return volatile_ok || !MEM_VOLATILE_P (mem);
8045 }
8046 \f
8047 /* Initialize the table of extra 80387 mathematical constants. */
8048
8049 static void
8050 init_ext_80387_constants (void)
8051 {
8052 static const char * cst[5] =
8053 {
8054 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8055 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8056 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8057 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8058 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8059 };
8060 int i;
8061
8062 for (i = 0; i < 5; i++)
8063 {
8064 real_from_string (&ext_80387_constants_table[i], cst[i]);
8065 /* Ensure each constant is rounded to XFmode precision. */
8066 real_convert (&ext_80387_constants_table[i],
8067 XFmode, &ext_80387_constants_table[i]);
8068 }
8069
8070 ext_80387_constants_init = 1;
8071 }
8072
8073 /* Return non-zero if the constant is something that
8074 can be loaded with a special instruction. */
8075
8076 int
8077 standard_80387_constant_p (rtx x)
8078 {
8079 enum machine_mode mode = GET_MODE (x);
8080
8081 REAL_VALUE_TYPE r;
8082
8083 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
8084 return -1;
8085
8086 if (x == CONST0_RTX (mode))
8087 return 1;
8088 if (x == CONST1_RTX (mode))
8089 return 2;
8090
8091 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8092
8093 /* For XFmode constants, try to find a special 80387 instruction when
8094 optimizing for size or on those CPUs that benefit from them. */
8095 if (mode == XFmode
8096 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
8097 {
8098 int i;
8099
8100 if (! ext_80387_constants_init)
8101 init_ext_80387_constants ();
8102
8103 for (i = 0; i < 5; i++)
8104 if (real_identical (&r, &ext_80387_constants_table[i]))
8105 return i + 3;
8106 }
8107
8108 /* Load of the constant -0.0 or -1.0 will be split as
8109 fldz;fchs or fld1;fchs sequence. */
8110 if (real_isnegzero (&r))
8111 return 8;
8112 if (real_identical (&r, &dconstm1))
8113 return 9;
8114
8115 return 0;
8116 }
8117
8118 /* Return the opcode of the special instruction to be used to load
8119 the constant X. */
8120
8121 const char *
8122 standard_80387_constant_opcode (rtx x)
8123 {
8124 switch (standard_80387_constant_p (x))
8125 {
8126 case 1:
8127 return "fldz";
8128 case 2:
8129 return "fld1";
8130 case 3:
8131 return "fldlg2";
8132 case 4:
8133 return "fldln2";
8134 case 5:
8135 return "fldl2e";
8136 case 6:
8137 return "fldl2t";
8138 case 7:
8139 return "fldpi";
8140 case 8:
8141 case 9:
8142 return "#";
8143 default:
8144 gcc_unreachable ();
8145 }
8146 }
8147
8148 /* Return the CONST_DOUBLE representing the 80387 constant that is
8149 loaded by the specified special instruction. The argument IDX
8150 matches the return value from standard_80387_constant_p. */
8151
8152 rtx
8153 standard_80387_constant_rtx (int idx)
8154 {
8155 int i;
8156
8157 if (! ext_80387_constants_init)
8158 init_ext_80387_constants ();
8159
8160 switch (idx)
8161 {
8162 case 3:
8163 case 4:
8164 case 5:
8165 case 6:
8166 case 7:
8167 i = idx - 3;
8168 break;
8169
8170 default:
8171 gcc_unreachable ();
8172 }
8173
8174 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
8175 XFmode);
8176 }
8177
8178 /* Return 1 if X is all 0s and 2 if x is all 1s
8179 in supported SSE/AVX vector mode. */
8180
8181 int
8182 standard_sse_constant_p (rtx x)
8183 {
8184 enum machine_mode mode = GET_MODE (x);
8185
8186 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
8187 return 1;
8188 if (vector_all_ones_operand (x, mode))
8189 switch (mode)
8190 {
8191 case V16QImode:
8192 case V8HImode:
8193 case V4SImode:
8194 case V2DImode:
8195 if (TARGET_SSE2)
8196 return 2;
8197 case V32QImode:
8198 case V16HImode:
8199 case V8SImode:
8200 case V4DImode:
8201 if (TARGET_AVX2)
8202 return 2;
8203 default:
8204 break;
8205 }
8206
8207 return 0;
8208 }
8209
8210 /* Return the opcode of the special instruction to be used to load
8211 the constant X. */
8212
8213 const char *
8214 standard_sse_constant_opcode (rtx insn, rtx x)
8215 {
8216 switch (standard_sse_constant_p (x))
8217 {
8218 case 1:
8219 switch (get_attr_mode (insn))
8220 {
8221 case MODE_TI:
8222 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8223 return "%vpxor\t%0, %d0";
8224 case MODE_V2DF:
8225 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8226 return "%vxorpd\t%0, %d0";
8227 case MODE_V4SF:
8228 return "%vxorps\t%0, %d0";
8229
8230 case MODE_OI:
8231 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8232 return "vpxor\t%x0, %x0, %x0";
8233 case MODE_V4DF:
8234 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8235 return "vxorpd\t%x0, %x0, %x0";
8236 case MODE_V8SF:
8237 return "vxorps\t%x0, %x0, %x0";
8238
8239 default:
8240 break;
8241 }
8242
8243 case 2:
8244 if (TARGET_AVX)
8245 return "vpcmpeqd\t%0, %0, %0";
8246 else
8247 return "pcmpeqd\t%0, %0";
8248
8249 default:
8250 break;
8251 }
8252 gcc_unreachable ();
8253 }
8254
8255 /* Returns true if OP contains a symbol reference */
8256
8257 bool
8258 symbolic_reference_mentioned_p (rtx op)
8259 {
8260 const char *fmt;
8261 int i;
8262
8263 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
8264 return true;
8265
8266 fmt = GET_RTX_FORMAT (GET_CODE (op));
8267 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
8268 {
8269 if (fmt[i] == 'E')
8270 {
8271 int j;
8272
8273 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
8274 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
8275 return true;
8276 }
8277
8278 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
8279 return true;
8280 }
8281
8282 return false;
8283 }
8284
8285 /* Return true if it is appropriate to emit `ret' instructions in the
8286 body of a function. Do this only if the epilogue is simple, needing a
8287 couple of insns. Prior to reloading, we can't tell how many registers
8288 must be saved, so return false then. Return false if there is no frame
8289 marker to de-allocate. */
8290
8291 bool
8292 ix86_can_use_return_insn_p (void)
8293 {
8294 struct ix86_frame frame;
8295
8296 if (! reload_completed || frame_pointer_needed)
8297 return 0;
8298
8299 /* Don't allow more than 32k pop, since that's all we can do
8300 with one instruction. */
8301 if (crtl->args.pops_args && crtl->args.size >= 32768)
8302 return 0;
8303
8304 ix86_compute_frame_layout (&frame);
8305 return (frame.stack_pointer_offset == UNITS_PER_WORD
8306 && (frame.nregs + frame.nsseregs) == 0);
8307 }
8308 \f
8309 /* Value should be nonzero if functions must have frame pointers.
8310 Zero means the frame pointer need not be set up (and parms may
8311 be accessed via the stack pointer) in functions that seem suitable. */
8312
8313 static bool
8314 ix86_frame_pointer_required (void)
8315 {
8316 /* If we accessed previous frames, then the generated code expects
8317 to be able to access the saved ebp value in our frame. */
8318 if (cfun->machine->accesses_prev_frame)
8319 return true;
8320
8321 /* Several x86 os'es need a frame pointer for other reasons,
8322 usually pertaining to setjmp. */
8323 if (SUBTARGET_FRAME_POINTER_REQUIRED)
8324 return true;
8325
8326 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
8327 turns off the frame pointer by default. Turn it back on now if
8328 we've not got a leaf function. */
8329 if (TARGET_OMIT_LEAF_FRAME_POINTER
8330 && (!current_function_is_leaf
8331 || ix86_current_function_calls_tls_descriptor))
8332 return true;
8333
8334 if (crtl->profile && !flag_fentry)
8335 return true;
8336
8337 return false;
8338 }
8339
8340 /* Record that the current function accesses previous call frames. */
8341
8342 void
8343 ix86_setup_frame_addresses (void)
8344 {
8345 cfun->machine->accesses_prev_frame = 1;
8346 }
8347 \f
8348 #ifndef USE_HIDDEN_LINKONCE
8349 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
8350 # define USE_HIDDEN_LINKONCE 1
8351 # else
8352 # define USE_HIDDEN_LINKONCE 0
8353 # endif
8354 #endif
8355
8356 static int pic_labels_used;
8357
8358 /* Fills in the label name that should be used for a pc thunk for
8359 the given register. */
8360
8361 static void
8362 get_pc_thunk_name (char name[32], unsigned int regno)
8363 {
8364 gcc_assert (!TARGET_64BIT);
8365
8366 if (USE_HIDDEN_LINKONCE)
8367 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
8368 else
8369 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
8370 }
8371
8372
8373 /* This function generates code for -fpic that loads %ebx with
8374 the return address of the caller and then returns. */
8375
8376 static void
8377 ix86_code_end (void)
8378 {
8379 rtx xops[2];
8380 int regno;
8381
8382 for (regno = AX_REG; regno <= SP_REG; regno++)
8383 {
8384 char name[32];
8385 tree decl;
8386
8387 if (!(pic_labels_used & (1 << regno)))
8388 continue;
8389
8390 get_pc_thunk_name (name, regno);
8391
8392 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
8393 get_identifier (name),
8394 build_function_type_list (void_type_node, NULL_TREE));
8395 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
8396 NULL_TREE, void_type_node);
8397 TREE_PUBLIC (decl) = 1;
8398 TREE_STATIC (decl) = 1;
8399
8400 #if TARGET_MACHO
8401 if (TARGET_MACHO)
8402 {
8403 switch_to_section (darwin_sections[text_coal_section]);
8404 fputs ("\t.weak_definition\t", asm_out_file);
8405 assemble_name (asm_out_file, name);
8406 fputs ("\n\t.private_extern\t", asm_out_file);
8407 assemble_name (asm_out_file, name);
8408 putc ('\n', asm_out_file);
8409 ASM_OUTPUT_LABEL (asm_out_file, name);
8410 DECL_WEAK (decl) = 1;
8411 }
8412 else
8413 #endif
8414 if (USE_HIDDEN_LINKONCE)
8415 {
8416 DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
8417
8418 targetm.asm_out.unique_section (decl, 0);
8419 switch_to_section (get_named_section (decl, NULL, 0));
8420
8421 targetm.asm_out.globalize_label (asm_out_file, name);
8422 fputs ("\t.hidden\t", asm_out_file);
8423 assemble_name (asm_out_file, name);
8424 putc ('\n', asm_out_file);
8425 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
8426 }
8427 else
8428 {
8429 switch_to_section (text_section);
8430 ASM_OUTPUT_LABEL (asm_out_file, name);
8431 }
8432
8433 DECL_INITIAL (decl) = make_node (BLOCK);
8434 current_function_decl = decl;
8435 init_function_start (decl);
8436 first_function_block_is_cold = false;
8437 /* Make sure unwind info is emitted for the thunk if needed. */
8438 final_start_function (emit_barrier (), asm_out_file, 1);
8439
8440 /* Pad stack IP move with 4 instructions (two NOPs count
8441 as one instruction). */
8442 if (TARGET_PAD_SHORT_FUNCTION)
8443 {
8444 int i = 8;
8445
8446 while (i--)
8447 fputs ("\tnop\n", asm_out_file);
8448 }
8449
8450 xops[0] = gen_rtx_REG (Pmode, regno);
8451 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
8452 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
8453 fputs ("\tret\n", asm_out_file);
8454 final_end_function ();
8455 init_insn_lengths ();
8456 free_after_compilation (cfun);
8457 set_cfun (NULL);
8458 current_function_decl = NULL;
8459 }
8460
8461 if (flag_split_stack)
8462 file_end_indicate_split_stack ();
8463 }
8464
8465 /* Emit code for the SET_GOT patterns. */
8466
8467 const char *
8468 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
8469 {
8470 rtx xops[3];
8471
8472 xops[0] = dest;
8473
8474 if (TARGET_VXWORKS_RTP && flag_pic)
8475 {
8476 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
8477 xops[2] = gen_rtx_MEM (Pmode,
8478 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
8479 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
8480
8481 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
8482 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
8483 an unadorned address. */
8484 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
8485 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
8486 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
8487 return "";
8488 }
8489
8490 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
8491
8492 if (!flag_pic)
8493 {
8494 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
8495
8496 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
8497
8498 #if TARGET_MACHO
8499 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8500 is what will be referenced by the Mach-O PIC subsystem. */
8501 if (!label)
8502 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8503 #endif
8504
8505 targetm.asm_out.internal_label (asm_out_file, "L",
8506 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
8507 }
8508 else
8509 {
8510 char name[32];
8511 get_pc_thunk_name (name, REGNO (dest));
8512 pic_labels_used |= 1 << REGNO (dest);
8513
8514 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
8515 xops[2] = gen_rtx_MEM (QImode, xops[2]);
8516 output_asm_insn ("call\t%X2", xops);
8517 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8518 is what will be referenced by the Mach-O PIC subsystem. */
8519 #if TARGET_MACHO
8520 if (!label)
8521 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8522 else
8523 targetm.asm_out.internal_label (asm_out_file, "L",
8524 CODE_LABEL_NUMBER (label));
8525 #endif
8526 }
8527
8528 if (!TARGET_MACHO)
8529 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
8530
8531 return "";
8532 }
8533
8534 /* Generate an "push" pattern for input ARG. */
8535
8536 static rtx
8537 gen_push (rtx arg)
8538 {
8539 struct machine_function *m = cfun->machine;
8540
8541 if (m->fs.cfa_reg == stack_pointer_rtx)
8542 m->fs.cfa_offset += UNITS_PER_WORD;
8543 m->fs.sp_offset += UNITS_PER_WORD;
8544
8545 return gen_rtx_SET (VOIDmode,
8546 gen_rtx_MEM (Pmode,
8547 gen_rtx_PRE_DEC (Pmode,
8548 stack_pointer_rtx)),
8549 arg);
8550 }
8551
8552 /* Generate an "pop" pattern for input ARG. */
8553
8554 static rtx
8555 gen_pop (rtx arg)
8556 {
8557 return gen_rtx_SET (VOIDmode,
8558 arg,
8559 gen_rtx_MEM (Pmode,
8560 gen_rtx_POST_INC (Pmode,
8561 stack_pointer_rtx)));
8562 }
8563
8564 /* Return >= 0 if there is an unused call-clobbered register available
8565 for the entire function. */
8566
8567 static unsigned int
8568 ix86_select_alt_pic_regnum (void)
8569 {
8570 if (current_function_is_leaf
8571 && !crtl->profile
8572 && !ix86_current_function_calls_tls_descriptor)
8573 {
8574 int i, drap;
8575 /* Can't use the same register for both PIC and DRAP. */
8576 if (crtl->drap_reg)
8577 drap = REGNO (crtl->drap_reg);
8578 else
8579 drap = -1;
8580 for (i = 2; i >= 0; --i)
8581 if (i != drap && !df_regs_ever_live_p (i))
8582 return i;
8583 }
8584
8585 return INVALID_REGNUM;
8586 }
8587
8588 /* Return TRUE if we need to save REGNO. */
8589
8590 static bool
8591 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
8592 {
8593 if (pic_offset_table_rtx
8594 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
8595 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
8596 || crtl->profile
8597 || crtl->calls_eh_return
8598 || crtl->uses_const_pool))
8599 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
8600
8601 if (crtl->calls_eh_return && maybe_eh_return)
8602 {
8603 unsigned i;
8604 for (i = 0; ; i++)
8605 {
8606 unsigned test = EH_RETURN_DATA_REGNO (i);
8607 if (test == INVALID_REGNUM)
8608 break;
8609 if (test == regno)
8610 return true;
8611 }
8612 }
8613
8614 if (crtl->drap_reg && regno == REGNO (crtl->drap_reg))
8615 return true;
8616
8617 return (df_regs_ever_live_p (regno)
8618 && !call_used_regs[regno]
8619 && !fixed_regs[regno]
8620 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
8621 }
8622
8623 /* Return number of saved general prupose registers. */
8624
8625 static int
8626 ix86_nsaved_regs (void)
8627 {
8628 int nregs = 0;
8629 int regno;
8630
8631 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8632 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8633 nregs ++;
8634 return nregs;
8635 }
8636
8637 /* Return number of saved SSE registrers. */
8638
8639 static int
8640 ix86_nsaved_sseregs (void)
8641 {
8642 int nregs = 0;
8643 int regno;
8644
8645 if (!TARGET_64BIT_MS_ABI)
8646 return 0;
8647 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8648 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8649 nregs ++;
8650 return nregs;
8651 }
8652
8653 /* Given FROM and TO register numbers, say whether this elimination is
8654 allowed. If stack alignment is needed, we can only replace argument
8655 pointer with hard frame pointer, or replace frame pointer with stack
8656 pointer. Otherwise, frame pointer elimination is automatically
8657 handled and all other eliminations are valid. */
8658
8659 static bool
8660 ix86_can_eliminate (const int from, const int to)
8661 {
8662 if (stack_realign_fp)
8663 return ((from == ARG_POINTER_REGNUM
8664 && to == HARD_FRAME_POINTER_REGNUM)
8665 || (from == FRAME_POINTER_REGNUM
8666 && to == STACK_POINTER_REGNUM));
8667 else
8668 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
8669 }
8670
8671 /* Return the offset between two registers, one to be eliminated, and the other
8672 its replacement, at the start of a routine. */
8673
8674 HOST_WIDE_INT
8675 ix86_initial_elimination_offset (int from, int to)
8676 {
8677 struct ix86_frame frame;
8678 ix86_compute_frame_layout (&frame);
8679
8680 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8681 return frame.hard_frame_pointer_offset;
8682 else if (from == FRAME_POINTER_REGNUM
8683 && to == HARD_FRAME_POINTER_REGNUM)
8684 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
8685 else
8686 {
8687 gcc_assert (to == STACK_POINTER_REGNUM);
8688
8689 if (from == ARG_POINTER_REGNUM)
8690 return frame.stack_pointer_offset;
8691
8692 gcc_assert (from == FRAME_POINTER_REGNUM);
8693 return frame.stack_pointer_offset - frame.frame_pointer_offset;
8694 }
8695 }
8696
8697 /* In a dynamically-aligned function, we can't know the offset from
8698 stack pointer to frame pointer, so we must ensure that setjmp
8699 eliminates fp against the hard fp (%ebp) rather than trying to
8700 index from %esp up to the top of the frame across a gap that is
8701 of unknown (at compile-time) size. */
8702 static rtx
8703 ix86_builtin_setjmp_frame_value (void)
8704 {
8705 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
8706 }
8707
8708 /* When using -fsplit-stack, the allocation routines set a field in
8709 the TCB to the bottom of the stack plus this much space, measured
8710 in bytes. */
8711
8712 #define SPLIT_STACK_AVAILABLE 256
8713
8714 /* Fill structure ix86_frame about frame of currently computed function. */
8715
8716 static void
8717 ix86_compute_frame_layout (struct ix86_frame *frame)
8718 {
8719 unsigned int stack_alignment_needed;
8720 HOST_WIDE_INT offset;
8721 unsigned int preferred_alignment;
8722 HOST_WIDE_INT size = get_frame_size ();
8723 HOST_WIDE_INT to_allocate;
8724
8725 frame->nregs = ix86_nsaved_regs ();
8726 frame->nsseregs = ix86_nsaved_sseregs ();
8727
8728 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
8729 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
8730
8731 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
8732 function prologues and leaf. */
8733 if ((TARGET_64BIT_MS_ABI && preferred_alignment < 16)
8734 && (!current_function_is_leaf || cfun->calls_alloca != 0
8735 || ix86_current_function_calls_tls_descriptor))
8736 {
8737 preferred_alignment = 16;
8738 stack_alignment_needed = 16;
8739 crtl->preferred_stack_boundary = 128;
8740 crtl->stack_alignment_needed = 128;
8741 }
8742
8743 gcc_assert (!size || stack_alignment_needed);
8744 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
8745 gcc_assert (preferred_alignment <= stack_alignment_needed);
8746
8747 /* For SEH we have to limit the amount of code movement into the prologue.
8748 At present we do this via a BLOCKAGE, at which point there's very little
8749 scheduling that can be done, which means that there's very little point
8750 in doing anything except PUSHs. */
8751 if (TARGET_SEH)
8752 cfun->machine->use_fast_prologue_epilogue = false;
8753
8754 /* During reload iteration the amount of registers saved can change.
8755 Recompute the value as needed. Do not recompute when amount of registers
8756 didn't change as reload does multiple calls to the function and does not
8757 expect the decision to change within single iteration. */
8758 else if (!optimize_function_for_size_p (cfun)
8759 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
8760 {
8761 int count = frame->nregs;
8762 struct cgraph_node *node = cgraph_get_node (current_function_decl);
8763
8764 cfun->machine->use_fast_prologue_epilogue_nregs = count;
8765
8766 /* The fast prologue uses move instead of push to save registers. This
8767 is significantly longer, but also executes faster as modern hardware
8768 can execute the moves in parallel, but can't do that for push/pop.
8769
8770 Be careful about choosing what prologue to emit: When function takes
8771 many instructions to execute we may use slow version as well as in
8772 case function is known to be outside hot spot (this is known with
8773 feedback only). Weight the size of function by number of registers
8774 to save as it is cheap to use one or two push instructions but very
8775 slow to use many of them. */
8776 if (count)
8777 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
8778 if (node->frequency < NODE_FREQUENCY_NORMAL
8779 || (flag_branch_probabilities
8780 && node->frequency < NODE_FREQUENCY_HOT))
8781 cfun->machine->use_fast_prologue_epilogue = false;
8782 else
8783 cfun->machine->use_fast_prologue_epilogue
8784 = !expensive_function_p (count);
8785 }
8786
8787 frame->save_regs_using_mov
8788 = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
8789 /* If static stack checking is enabled and done with probes,
8790 the registers need to be saved before allocating the frame. */
8791 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
8792
8793 /* Skip return address. */
8794 offset = UNITS_PER_WORD;
8795
8796 /* Skip pushed static chain. */
8797 if (ix86_static_chain_on_stack)
8798 offset += UNITS_PER_WORD;
8799
8800 /* Skip saved base pointer. */
8801 if (frame_pointer_needed)
8802 offset += UNITS_PER_WORD;
8803 frame->hfp_save_offset = offset;
8804
8805 /* The traditional frame pointer location is at the top of the frame. */
8806 frame->hard_frame_pointer_offset = offset;
8807
8808 /* Register save area */
8809 offset += frame->nregs * UNITS_PER_WORD;
8810 frame->reg_save_offset = offset;
8811
8812 /* Align and set SSE register save area. */
8813 if (frame->nsseregs)
8814 {
8815 /* The only ABI that has saved SSE registers (Win64) also has a
8816 16-byte aligned default stack, and thus we don't need to be
8817 within the re-aligned local stack frame to save them. */
8818 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
8819 offset = (offset + 16 - 1) & -16;
8820 offset += frame->nsseregs * 16;
8821 }
8822 frame->sse_reg_save_offset = offset;
8823
8824 /* The re-aligned stack starts here. Values before this point are not
8825 directly comparable with values below this point. In order to make
8826 sure that no value happens to be the same before and after, force
8827 the alignment computation below to add a non-zero value. */
8828 if (stack_realign_fp)
8829 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
8830
8831 /* Va-arg area */
8832 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
8833 offset += frame->va_arg_size;
8834
8835 /* Align start of frame for local function. */
8836 if (stack_realign_fp
8837 || offset != frame->sse_reg_save_offset
8838 || size != 0
8839 || !current_function_is_leaf
8840 || cfun->calls_alloca
8841 || ix86_current_function_calls_tls_descriptor)
8842 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
8843
8844 /* Frame pointer points here. */
8845 frame->frame_pointer_offset = offset;
8846
8847 offset += size;
8848
8849 /* Add outgoing arguments area. Can be skipped if we eliminated
8850 all the function calls as dead code.
8851 Skipping is however impossible when function calls alloca. Alloca
8852 expander assumes that last crtl->outgoing_args_size
8853 of stack frame are unused. */
8854 if (ACCUMULATE_OUTGOING_ARGS
8855 && (!current_function_is_leaf || cfun->calls_alloca
8856 || ix86_current_function_calls_tls_descriptor))
8857 {
8858 offset += crtl->outgoing_args_size;
8859 frame->outgoing_arguments_size = crtl->outgoing_args_size;
8860 }
8861 else
8862 frame->outgoing_arguments_size = 0;
8863
8864 /* Align stack boundary. Only needed if we're calling another function
8865 or using alloca. */
8866 if (!current_function_is_leaf || cfun->calls_alloca
8867 || ix86_current_function_calls_tls_descriptor)
8868 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
8869
8870 /* We've reached end of stack frame. */
8871 frame->stack_pointer_offset = offset;
8872
8873 /* Size prologue needs to allocate. */
8874 to_allocate = offset - frame->sse_reg_save_offset;
8875
8876 if ((!to_allocate && frame->nregs <= 1)
8877 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
8878 frame->save_regs_using_mov = false;
8879
8880 if (ix86_using_red_zone ()
8881 && current_function_sp_is_unchanging
8882 && current_function_is_leaf
8883 && !ix86_current_function_calls_tls_descriptor)
8884 {
8885 frame->red_zone_size = to_allocate;
8886 if (frame->save_regs_using_mov)
8887 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
8888 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
8889 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
8890 }
8891 else
8892 frame->red_zone_size = 0;
8893 frame->stack_pointer_offset -= frame->red_zone_size;
8894
8895 /* The SEH frame pointer location is near the bottom of the frame.
8896 This is enforced by the fact that the difference between the
8897 stack pointer and the frame pointer is limited to 240 bytes in
8898 the unwind data structure. */
8899 if (TARGET_SEH)
8900 {
8901 HOST_WIDE_INT diff;
8902
8903 /* If we can leave the frame pointer where it is, do so. */
8904 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
8905 if (diff > 240 || (diff & 15) != 0)
8906 {
8907 /* Ideally we'd determine what portion of the local stack frame
8908 (within the constraint of the lowest 240) is most heavily used.
8909 But without that complication, simply bias the frame pointer
8910 by 128 bytes so as to maximize the amount of the local stack
8911 frame that is addressable with 8-bit offsets. */
8912 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
8913 }
8914 }
8915 }
8916
8917 /* This is semi-inlined memory_address_length, but simplified
8918 since we know that we're always dealing with reg+offset, and
8919 to avoid having to create and discard all that rtl. */
8920
8921 static inline int
8922 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
8923 {
8924 int len = 4;
8925
8926 if (offset == 0)
8927 {
8928 /* EBP and R13 cannot be encoded without an offset. */
8929 len = (regno == BP_REG || regno == R13_REG);
8930 }
8931 else if (IN_RANGE (offset, -128, 127))
8932 len = 1;
8933
8934 /* ESP and R12 must be encoded with a SIB byte. */
8935 if (regno == SP_REG || regno == R12_REG)
8936 len++;
8937
8938 return len;
8939 }
8940
8941 /* Return an RTX that points to CFA_OFFSET within the stack frame.
8942 The valid base registers are taken from CFUN->MACHINE->FS. */
8943
8944 static rtx
8945 choose_baseaddr (HOST_WIDE_INT cfa_offset)
8946 {
8947 const struct machine_function *m = cfun->machine;
8948 rtx base_reg = NULL;
8949 HOST_WIDE_INT base_offset = 0;
8950
8951 if (m->use_fast_prologue_epilogue)
8952 {
8953 /* Choose the base register most likely to allow the most scheduling
8954 opportunities. Generally FP is valid througout the function,
8955 while DRAP must be reloaded within the epilogue. But choose either
8956 over the SP due to increased encoding size. */
8957
8958 if (m->fs.fp_valid)
8959 {
8960 base_reg = hard_frame_pointer_rtx;
8961 base_offset = m->fs.fp_offset - cfa_offset;
8962 }
8963 else if (m->fs.drap_valid)
8964 {
8965 base_reg = crtl->drap_reg;
8966 base_offset = 0 - cfa_offset;
8967 }
8968 else if (m->fs.sp_valid)
8969 {
8970 base_reg = stack_pointer_rtx;
8971 base_offset = m->fs.sp_offset - cfa_offset;
8972 }
8973 }
8974 else
8975 {
8976 HOST_WIDE_INT toffset;
8977 int len = 16, tlen;
8978
8979 /* Choose the base register with the smallest address encoding.
8980 With a tie, choose FP > DRAP > SP. */
8981 if (m->fs.sp_valid)
8982 {
8983 base_reg = stack_pointer_rtx;
8984 base_offset = m->fs.sp_offset - cfa_offset;
8985 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
8986 }
8987 if (m->fs.drap_valid)
8988 {
8989 toffset = 0 - cfa_offset;
8990 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
8991 if (tlen <= len)
8992 {
8993 base_reg = crtl->drap_reg;
8994 base_offset = toffset;
8995 len = tlen;
8996 }
8997 }
8998 if (m->fs.fp_valid)
8999 {
9000 toffset = m->fs.fp_offset - cfa_offset;
9001 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
9002 if (tlen <= len)
9003 {
9004 base_reg = hard_frame_pointer_rtx;
9005 base_offset = toffset;
9006 len = tlen;
9007 }
9008 }
9009 }
9010 gcc_assert (base_reg != NULL);
9011
9012 return plus_constant (base_reg, base_offset);
9013 }
9014
9015 /* Emit code to save registers in the prologue. */
9016
9017 static void
9018 ix86_emit_save_regs (void)
9019 {
9020 unsigned int regno;
9021 rtx insn;
9022
9023 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
9024 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9025 {
9026 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
9027 RTX_FRAME_RELATED_P (insn) = 1;
9028 }
9029 }
9030
9031 /* Emit a single register save at CFA - CFA_OFFSET. */
9032
9033 static void
9034 ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno,
9035 HOST_WIDE_INT cfa_offset)
9036 {
9037 struct machine_function *m = cfun->machine;
9038 rtx reg = gen_rtx_REG (mode, regno);
9039 rtx mem, addr, base, insn;
9040
9041 addr = choose_baseaddr (cfa_offset);
9042 mem = gen_frame_mem (mode, addr);
9043
9044 /* For SSE saves, we need to indicate the 128-bit alignment. */
9045 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
9046
9047 insn = emit_move_insn (mem, reg);
9048 RTX_FRAME_RELATED_P (insn) = 1;
9049
9050 base = addr;
9051 if (GET_CODE (base) == PLUS)
9052 base = XEXP (base, 0);
9053 gcc_checking_assert (REG_P (base));
9054
9055 /* When saving registers into a re-aligned local stack frame, avoid
9056 any tricky guessing by dwarf2out. */
9057 if (m->fs.realigned)
9058 {
9059 gcc_checking_assert (stack_realign_drap);
9060
9061 if (regno == REGNO (crtl->drap_reg))
9062 {
9063 /* A bit of a hack. We force the DRAP register to be saved in
9064 the re-aligned stack frame, which provides us with a copy
9065 of the CFA that will last past the prologue. Install it. */
9066 gcc_checking_assert (cfun->machine->fs.fp_valid);
9067 addr = plus_constant (hard_frame_pointer_rtx,
9068 cfun->machine->fs.fp_offset - cfa_offset);
9069 mem = gen_rtx_MEM (mode, addr);
9070 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
9071 }
9072 else
9073 {
9074 /* The frame pointer is a stable reference within the
9075 aligned frame. Use it. */
9076 gcc_checking_assert (cfun->machine->fs.fp_valid);
9077 addr = plus_constant (hard_frame_pointer_rtx,
9078 cfun->machine->fs.fp_offset - cfa_offset);
9079 mem = gen_rtx_MEM (mode, addr);
9080 add_reg_note (insn, REG_CFA_EXPRESSION,
9081 gen_rtx_SET (VOIDmode, mem, reg));
9082 }
9083 }
9084
9085 /* The memory may not be relative to the current CFA register,
9086 which means that we may need to generate a new pattern for
9087 use by the unwind info. */
9088 else if (base != m->fs.cfa_reg)
9089 {
9090 addr = plus_constant (m->fs.cfa_reg, m->fs.cfa_offset - cfa_offset);
9091 mem = gen_rtx_MEM (mode, addr);
9092 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
9093 }
9094 }
9095
9096 /* Emit code to save registers using MOV insns.
9097 First register is stored at CFA - CFA_OFFSET. */
9098 static void
9099 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
9100 {
9101 unsigned int regno;
9102
9103 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9104 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9105 {
9106 ix86_emit_save_reg_using_mov (Pmode, regno, cfa_offset);
9107 cfa_offset -= UNITS_PER_WORD;
9108 }
9109 }
9110
9111 /* Emit code to save SSE registers using MOV insns.
9112 First register is stored at CFA - CFA_OFFSET. */
9113 static void
9114 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
9115 {
9116 unsigned int regno;
9117
9118 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9119 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9120 {
9121 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
9122 cfa_offset -= 16;
9123 }
9124 }
9125
9126 static GTY(()) rtx queued_cfa_restores;
9127
9128 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9129 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9130 Don't add the note if the previously saved value will be left untouched
9131 within stack red-zone till return, as unwinders can find the same value
9132 in the register and on the stack. */
9133
9134 static void
9135 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
9136 {
9137 if (cfa_offset <= cfun->machine->fs.red_zone_offset)
9138 return;
9139
9140 if (insn)
9141 {
9142 add_reg_note (insn, REG_CFA_RESTORE, reg);
9143 RTX_FRAME_RELATED_P (insn) = 1;
9144 }
9145 else
9146 queued_cfa_restores
9147 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
9148 }
9149
9150 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9151
9152 static void
9153 ix86_add_queued_cfa_restore_notes (rtx insn)
9154 {
9155 rtx last;
9156 if (!queued_cfa_restores)
9157 return;
9158 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
9159 ;
9160 XEXP (last, 1) = REG_NOTES (insn);
9161 REG_NOTES (insn) = queued_cfa_restores;
9162 queued_cfa_restores = NULL_RTX;
9163 RTX_FRAME_RELATED_P (insn) = 1;
9164 }
9165
9166 /* Expand prologue or epilogue stack adjustment.
9167 The pattern exist to put a dependency on all ebp-based memory accesses.
9168 STYLE should be negative if instructions should be marked as frame related,
9169 zero if %r11 register is live and cannot be freely used and positive
9170 otherwise. */
9171
9172 static void
9173 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
9174 int style, bool set_cfa)
9175 {
9176 struct machine_function *m = cfun->machine;
9177 rtx insn;
9178 bool add_frame_related_expr = false;
9179
9180 if (! TARGET_64BIT)
9181 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
9182 else if (x86_64_immediate_operand (offset, DImode))
9183 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
9184 else
9185 {
9186 rtx tmp;
9187 /* r11 is used by indirect sibcall return as well, set before the
9188 epilogue and used after the epilogue. */
9189 if (style)
9190 tmp = gen_rtx_REG (DImode, R11_REG);
9191 else
9192 {
9193 gcc_assert (src != hard_frame_pointer_rtx
9194 && dest != hard_frame_pointer_rtx);
9195 tmp = hard_frame_pointer_rtx;
9196 }
9197 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
9198 if (style < 0)
9199 add_frame_related_expr = true;
9200
9201 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
9202 }
9203
9204 insn = emit_insn (insn);
9205 if (style >= 0)
9206 ix86_add_queued_cfa_restore_notes (insn);
9207
9208 if (set_cfa)
9209 {
9210 rtx r;
9211
9212 gcc_assert (m->fs.cfa_reg == src);
9213 m->fs.cfa_offset += INTVAL (offset);
9214 m->fs.cfa_reg = dest;
9215
9216 r = gen_rtx_PLUS (Pmode, src, offset);
9217 r = gen_rtx_SET (VOIDmode, dest, r);
9218 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
9219 RTX_FRAME_RELATED_P (insn) = 1;
9220 }
9221 else if (style < 0)
9222 {
9223 RTX_FRAME_RELATED_P (insn) = 1;
9224 if (add_frame_related_expr)
9225 {
9226 rtx r = gen_rtx_PLUS (Pmode, src, offset);
9227 r = gen_rtx_SET (VOIDmode, dest, r);
9228 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
9229 }
9230 }
9231
9232 if (dest == stack_pointer_rtx)
9233 {
9234 HOST_WIDE_INT ooffset = m->fs.sp_offset;
9235 bool valid = m->fs.sp_valid;
9236
9237 if (src == hard_frame_pointer_rtx)
9238 {
9239 valid = m->fs.fp_valid;
9240 ooffset = m->fs.fp_offset;
9241 }
9242 else if (src == crtl->drap_reg)
9243 {
9244 valid = m->fs.drap_valid;
9245 ooffset = 0;
9246 }
9247 else
9248 {
9249 /* Else there are two possibilities: SP itself, which we set
9250 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
9251 taken care of this by hand along the eh_return path. */
9252 gcc_checking_assert (src == stack_pointer_rtx
9253 || offset == const0_rtx);
9254 }
9255
9256 m->fs.sp_offset = ooffset - INTVAL (offset);
9257 m->fs.sp_valid = valid;
9258 }
9259 }
9260
9261 /* Find an available register to be used as dynamic realign argument
9262 pointer regsiter. Such a register will be written in prologue and
9263 used in begin of body, so it must not be
9264 1. parameter passing register.
9265 2. GOT pointer.
9266 We reuse static-chain register if it is available. Otherwise, we
9267 use DI for i386 and R13 for x86-64. We chose R13 since it has
9268 shorter encoding.
9269
9270 Return: the regno of chosen register. */
9271
9272 static unsigned int
9273 find_drap_reg (void)
9274 {
9275 tree decl = cfun->decl;
9276
9277 if (TARGET_64BIT)
9278 {
9279 /* Use R13 for nested function or function need static chain.
9280 Since function with tail call may use any caller-saved
9281 registers in epilogue, DRAP must not use caller-saved
9282 register in such case. */
9283 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
9284 return R13_REG;
9285
9286 return R10_REG;
9287 }
9288 else
9289 {
9290 /* Use DI for nested function or function need static chain.
9291 Since function with tail call may use any caller-saved
9292 registers in epilogue, DRAP must not use caller-saved
9293 register in such case. */
9294 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
9295 return DI_REG;
9296
9297 /* Reuse static chain register if it isn't used for parameter
9298 passing. */
9299 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
9300 {
9301 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
9302 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
9303 return CX_REG;
9304 }
9305 return DI_REG;
9306 }
9307 }
9308
9309 /* Return minimum incoming stack alignment. */
9310
9311 static unsigned int
9312 ix86_minimum_incoming_stack_boundary (bool sibcall)
9313 {
9314 unsigned int incoming_stack_boundary;
9315
9316 /* Prefer the one specified at command line. */
9317 if (ix86_user_incoming_stack_boundary)
9318 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
9319 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
9320 if -mstackrealign is used, it isn't used for sibcall check and
9321 estimated stack alignment is 128bit. */
9322 else if (!sibcall
9323 && !TARGET_64BIT
9324 && ix86_force_align_arg_pointer
9325 && crtl->stack_alignment_estimated == 128)
9326 incoming_stack_boundary = MIN_STACK_BOUNDARY;
9327 else
9328 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
9329
9330 /* Incoming stack alignment can be changed on individual functions
9331 via force_align_arg_pointer attribute. We use the smallest
9332 incoming stack boundary. */
9333 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
9334 && lookup_attribute (ix86_force_align_arg_pointer_string,
9335 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
9336 incoming_stack_boundary = MIN_STACK_BOUNDARY;
9337
9338 /* The incoming stack frame has to be aligned at least at
9339 parm_stack_boundary. */
9340 if (incoming_stack_boundary < crtl->parm_stack_boundary)
9341 incoming_stack_boundary = crtl->parm_stack_boundary;
9342
9343 /* Stack at entrance of main is aligned by runtime. We use the
9344 smallest incoming stack boundary. */
9345 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
9346 && DECL_NAME (current_function_decl)
9347 && MAIN_NAME_P (DECL_NAME (current_function_decl))
9348 && DECL_FILE_SCOPE_P (current_function_decl))
9349 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
9350
9351 return incoming_stack_boundary;
9352 }
9353
9354 /* Update incoming stack boundary and estimated stack alignment. */
9355
9356 static void
9357 ix86_update_stack_boundary (void)
9358 {
9359 ix86_incoming_stack_boundary
9360 = ix86_minimum_incoming_stack_boundary (false);
9361
9362 /* x86_64 vararg needs 16byte stack alignment for register save
9363 area. */
9364 if (TARGET_64BIT
9365 && cfun->stdarg
9366 && crtl->stack_alignment_estimated < 128)
9367 crtl->stack_alignment_estimated = 128;
9368 }
9369
9370 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
9371 needed or an rtx for DRAP otherwise. */
9372
9373 static rtx
9374 ix86_get_drap_rtx (void)
9375 {
9376 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
9377 crtl->need_drap = true;
9378
9379 if (stack_realign_drap)
9380 {
9381 /* Assign DRAP to vDRAP and returns vDRAP */
9382 unsigned int regno = find_drap_reg ();
9383 rtx drap_vreg;
9384 rtx arg_ptr;
9385 rtx seq, insn;
9386
9387 arg_ptr = gen_rtx_REG (Pmode, regno);
9388 crtl->drap_reg = arg_ptr;
9389
9390 start_sequence ();
9391 drap_vreg = copy_to_reg (arg_ptr);
9392 seq = get_insns ();
9393 end_sequence ();
9394
9395 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
9396 if (!optimize)
9397 {
9398 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
9399 RTX_FRAME_RELATED_P (insn) = 1;
9400 }
9401 return drap_vreg;
9402 }
9403 else
9404 return NULL;
9405 }
9406
9407 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
9408
9409 static rtx
9410 ix86_internal_arg_pointer (void)
9411 {
9412 return virtual_incoming_args_rtx;
9413 }
9414
9415 struct scratch_reg {
9416 rtx reg;
9417 bool saved;
9418 };
9419
9420 /* Return a short-lived scratch register for use on function entry.
9421 In 32-bit mode, it is valid only after the registers are saved
9422 in the prologue. This register must be released by means of
9423 release_scratch_register_on_entry once it is dead. */
9424
9425 static void
9426 get_scratch_register_on_entry (struct scratch_reg *sr)
9427 {
9428 int regno;
9429
9430 sr->saved = false;
9431
9432 if (TARGET_64BIT)
9433 {
9434 /* We always use R11 in 64-bit mode. */
9435 regno = R11_REG;
9436 }
9437 else
9438 {
9439 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
9440 bool fastcall_p
9441 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
9442 bool static_chain_p = DECL_STATIC_CHAIN (decl);
9443 int regparm = ix86_function_regparm (fntype, decl);
9444 int drap_regno
9445 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
9446
9447 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
9448 for the static chain register. */
9449 if ((regparm < 1 || (fastcall_p && !static_chain_p))
9450 && drap_regno != AX_REG)
9451 regno = AX_REG;
9452 else if (regparm < 2 && drap_regno != DX_REG)
9453 regno = DX_REG;
9454 /* ecx is the static chain register. */
9455 else if (regparm < 3 && !fastcall_p && !static_chain_p
9456 && drap_regno != CX_REG)
9457 regno = CX_REG;
9458 else if (ix86_save_reg (BX_REG, true))
9459 regno = BX_REG;
9460 /* esi is the static chain register. */
9461 else if (!(regparm == 3 && static_chain_p)
9462 && ix86_save_reg (SI_REG, true))
9463 regno = SI_REG;
9464 else if (ix86_save_reg (DI_REG, true))
9465 regno = DI_REG;
9466 else
9467 {
9468 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
9469 sr->saved = true;
9470 }
9471 }
9472
9473 sr->reg = gen_rtx_REG (Pmode, regno);
9474 if (sr->saved)
9475 {
9476 rtx insn = emit_insn (gen_push (sr->reg));
9477 RTX_FRAME_RELATED_P (insn) = 1;
9478 }
9479 }
9480
9481 /* Release a scratch register obtained from the preceding function. */
9482
9483 static void
9484 release_scratch_register_on_entry (struct scratch_reg *sr)
9485 {
9486 if (sr->saved)
9487 {
9488 rtx x, insn = emit_insn (gen_pop (sr->reg));
9489
9490 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
9491 RTX_FRAME_RELATED_P (insn) = 1;
9492 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
9493 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
9494 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
9495 }
9496 }
9497
9498 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
9499
9500 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
9501
9502 static void
9503 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
9504 {
9505 /* We skip the probe for the first interval + a small dope of 4 words and
9506 probe that many bytes past the specified size to maintain a protection
9507 area at the botton of the stack. */
9508 const int dope = 4 * UNITS_PER_WORD;
9509 rtx size_rtx = GEN_INT (size), last;
9510
9511 /* See if we have a constant small number of probes to generate. If so,
9512 that's the easy case. The run-time loop is made up of 11 insns in the
9513 generic case while the compile-time loop is made up of 3+2*(n-1) insns
9514 for n # of intervals. */
9515 if (size <= 5 * PROBE_INTERVAL)
9516 {
9517 HOST_WIDE_INT i, adjust;
9518 bool first_probe = true;
9519
9520 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
9521 values of N from 1 until it exceeds SIZE. If only one probe is
9522 needed, this will not generate any code. Then adjust and probe
9523 to PROBE_INTERVAL + SIZE. */
9524 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9525 {
9526 if (first_probe)
9527 {
9528 adjust = 2 * PROBE_INTERVAL + dope;
9529 first_probe = false;
9530 }
9531 else
9532 adjust = PROBE_INTERVAL;
9533
9534 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9535 plus_constant (stack_pointer_rtx, -adjust)));
9536 emit_stack_probe (stack_pointer_rtx);
9537 }
9538
9539 if (first_probe)
9540 adjust = size + PROBE_INTERVAL + dope;
9541 else
9542 adjust = size + PROBE_INTERVAL - i;
9543
9544 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9545 plus_constant (stack_pointer_rtx, -adjust)));
9546 emit_stack_probe (stack_pointer_rtx);
9547
9548 /* Adjust back to account for the additional first interval. */
9549 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9550 plus_constant (stack_pointer_rtx,
9551 PROBE_INTERVAL + dope)));
9552 }
9553
9554 /* Otherwise, do the same as above, but in a loop. Note that we must be
9555 extra careful with variables wrapping around because we might be at
9556 the very top (or the very bottom) of the address space and we have
9557 to be able to handle this case properly; in particular, we use an
9558 equality test for the loop condition. */
9559 else
9560 {
9561 HOST_WIDE_INT rounded_size;
9562 struct scratch_reg sr;
9563
9564 get_scratch_register_on_entry (&sr);
9565
9566
9567 /* Step 1: round SIZE to the previous multiple of the interval. */
9568
9569 rounded_size = size & -PROBE_INTERVAL;
9570
9571
9572 /* Step 2: compute initial and final value of the loop counter. */
9573
9574 /* SP = SP_0 + PROBE_INTERVAL. */
9575 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9576 plus_constant (stack_pointer_rtx,
9577 - (PROBE_INTERVAL + dope))));
9578
9579 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
9580 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
9581 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
9582 gen_rtx_PLUS (Pmode, sr.reg,
9583 stack_pointer_rtx)));
9584
9585
9586 /* Step 3: the loop
9587
9588 while (SP != LAST_ADDR)
9589 {
9590 SP = SP + PROBE_INTERVAL
9591 probe at SP
9592 }
9593
9594 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
9595 values of N from 1 until it is equal to ROUNDED_SIZE. */
9596
9597 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
9598
9599
9600 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
9601 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
9602
9603 if (size != rounded_size)
9604 {
9605 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9606 plus_constant (stack_pointer_rtx,
9607 rounded_size - size)));
9608 emit_stack_probe (stack_pointer_rtx);
9609 }
9610
9611 /* Adjust back to account for the additional first interval. */
9612 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9613 plus_constant (stack_pointer_rtx,
9614 PROBE_INTERVAL + dope)));
9615
9616 release_scratch_register_on_entry (&sr);
9617 }
9618
9619 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
9620
9621 /* Even if the stack pointer isn't the CFA register, we need to correctly
9622 describe the adjustments made to it, in particular differentiate the
9623 frame-related ones from the frame-unrelated ones. */
9624 if (size > 0)
9625 {
9626 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
9627 XVECEXP (expr, 0, 0)
9628 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9629 plus_constant (stack_pointer_rtx, -size));
9630 XVECEXP (expr, 0, 1)
9631 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9632 plus_constant (stack_pointer_rtx,
9633 PROBE_INTERVAL + dope + size));
9634 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
9635 RTX_FRAME_RELATED_P (last) = 1;
9636
9637 cfun->machine->fs.sp_offset += size;
9638 }
9639
9640 /* Make sure nothing is scheduled before we are done. */
9641 emit_insn (gen_blockage ());
9642 }
9643
9644 /* Adjust the stack pointer up to REG while probing it. */
9645
9646 const char *
9647 output_adjust_stack_and_probe (rtx reg)
9648 {
9649 static int labelno = 0;
9650 char loop_lab[32], end_lab[32];
9651 rtx xops[2];
9652
9653 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
9654 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
9655
9656 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
9657
9658 /* Jump to END_LAB if SP == LAST_ADDR. */
9659 xops[0] = stack_pointer_rtx;
9660 xops[1] = reg;
9661 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
9662 fputs ("\tje\t", asm_out_file);
9663 assemble_name_raw (asm_out_file, end_lab);
9664 fputc ('\n', asm_out_file);
9665
9666 /* SP = SP + PROBE_INTERVAL. */
9667 xops[1] = GEN_INT (PROBE_INTERVAL);
9668 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
9669
9670 /* Probe at SP. */
9671 xops[1] = const0_rtx;
9672 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
9673
9674 fprintf (asm_out_file, "\tjmp\t");
9675 assemble_name_raw (asm_out_file, loop_lab);
9676 fputc ('\n', asm_out_file);
9677
9678 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
9679
9680 return "";
9681 }
9682
9683 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
9684 inclusive. These are offsets from the current stack pointer. */
9685
9686 static void
9687 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
9688 {
9689 /* See if we have a constant small number of probes to generate. If so,
9690 that's the easy case. The run-time loop is made up of 7 insns in the
9691 generic case while the compile-time loop is made up of n insns for n #
9692 of intervals. */
9693 if (size <= 7 * PROBE_INTERVAL)
9694 {
9695 HOST_WIDE_INT i;
9696
9697 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
9698 it exceeds SIZE. If only one probe is needed, this will not
9699 generate any code. Then probe at FIRST + SIZE. */
9700 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9701 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + i)));
9702
9703 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + size)));
9704 }
9705
9706 /* Otherwise, do the same as above, but in a loop. Note that we must be
9707 extra careful with variables wrapping around because we might be at
9708 the very top (or the very bottom) of the address space and we have
9709 to be able to handle this case properly; in particular, we use an
9710 equality test for the loop condition. */
9711 else
9712 {
9713 HOST_WIDE_INT rounded_size, last;
9714 struct scratch_reg sr;
9715
9716 get_scratch_register_on_entry (&sr);
9717
9718
9719 /* Step 1: round SIZE to the previous multiple of the interval. */
9720
9721 rounded_size = size & -PROBE_INTERVAL;
9722
9723
9724 /* Step 2: compute initial and final value of the loop counter. */
9725
9726 /* TEST_OFFSET = FIRST. */
9727 emit_move_insn (sr.reg, GEN_INT (-first));
9728
9729 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
9730 last = first + rounded_size;
9731
9732
9733 /* Step 3: the loop
9734
9735 while (TEST_ADDR != LAST_ADDR)
9736 {
9737 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
9738 probe at TEST_ADDR
9739 }
9740
9741 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
9742 until it is equal to ROUNDED_SIZE. */
9743
9744 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
9745
9746
9747 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
9748 that SIZE is equal to ROUNDED_SIZE. */
9749
9750 if (size != rounded_size)
9751 emit_stack_probe (plus_constant (gen_rtx_PLUS (Pmode,
9752 stack_pointer_rtx,
9753 sr.reg),
9754 rounded_size - size));
9755
9756 release_scratch_register_on_entry (&sr);
9757 }
9758
9759 /* Make sure nothing is scheduled before we are done. */
9760 emit_insn (gen_blockage ());
9761 }
9762
9763 /* Probe a range of stack addresses from REG to END, inclusive. These are
9764 offsets from the current stack pointer. */
9765
9766 const char *
9767 output_probe_stack_range (rtx reg, rtx end)
9768 {
9769 static int labelno = 0;
9770 char loop_lab[32], end_lab[32];
9771 rtx xops[3];
9772
9773 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
9774 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
9775
9776 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
9777
9778 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
9779 xops[0] = reg;
9780 xops[1] = end;
9781 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
9782 fputs ("\tje\t", asm_out_file);
9783 assemble_name_raw (asm_out_file, end_lab);
9784 fputc ('\n', asm_out_file);
9785
9786 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
9787 xops[1] = GEN_INT (PROBE_INTERVAL);
9788 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
9789
9790 /* Probe at TEST_ADDR. */
9791 xops[0] = stack_pointer_rtx;
9792 xops[1] = reg;
9793 xops[2] = const0_rtx;
9794 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
9795
9796 fprintf (asm_out_file, "\tjmp\t");
9797 assemble_name_raw (asm_out_file, loop_lab);
9798 fputc ('\n', asm_out_file);
9799
9800 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
9801
9802 return "";
9803 }
9804
9805 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
9806 to be generated in correct form. */
9807 static void
9808 ix86_finalize_stack_realign_flags (void)
9809 {
9810 /* Check if stack realign is really needed after reload, and
9811 stores result in cfun */
9812 unsigned int incoming_stack_boundary
9813 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
9814 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
9815 unsigned int stack_realign = (incoming_stack_boundary
9816 < (current_function_is_leaf
9817 ? crtl->max_used_stack_slot_alignment
9818 : crtl->stack_alignment_needed));
9819
9820 if (crtl->stack_realign_finalized)
9821 {
9822 /* After stack_realign_needed is finalized, we can't no longer
9823 change it. */
9824 gcc_assert (crtl->stack_realign_needed == stack_realign);
9825 }
9826 else
9827 {
9828 crtl->stack_realign_needed = stack_realign;
9829 crtl->stack_realign_finalized = true;
9830 }
9831 }
9832
9833 /* Expand the prologue into a bunch of separate insns. */
9834
9835 void
9836 ix86_expand_prologue (void)
9837 {
9838 struct machine_function *m = cfun->machine;
9839 rtx insn, t;
9840 bool pic_reg_used;
9841 struct ix86_frame frame;
9842 HOST_WIDE_INT allocate;
9843 bool int_registers_saved;
9844
9845 ix86_finalize_stack_realign_flags ();
9846
9847 /* DRAP should not coexist with stack_realign_fp */
9848 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
9849
9850 memset (&m->fs, 0, sizeof (m->fs));
9851
9852 /* Initialize CFA state for before the prologue. */
9853 m->fs.cfa_reg = stack_pointer_rtx;
9854 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
9855
9856 /* Track SP offset to the CFA. We continue tracking this after we've
9857 swapped the CFA register away from SP. In the case of re-alignment
9858 this is fudged; we're interested to offsets within the local frame. */
9859 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
9860 m->fs.sp_valid = true;
9861
9862 ix86_compute_frame_layout (&frame);
9863
9864 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
9865 {
9866 /* We should have already generated an error for any use of
9867 ms_hook on a nested function. */
9868 gcc_checking_assert (!ix86_static_chain_on_stack);
9869
9870 /* Check if profiling is active and we shall use profiling before
9871 prologue variant. If so sorry. */
9872 if (crtl->profile && flag_fentry != 0)
9873 sorry ("ms_hook_prologue attribute isn%'t compatible "
9874 "with -mfentry for 32-bit");
9875
9876 /* In ix86_asm_output_function_label we emitted:
9877 8b ff movl.s %edi,%edi
9878 55 push %ebp
9879 8b ec movl.s %esp,%ebp
9880
9881 This matches the hookable function prologue in Win32 API
9882 functions in Microsoft Windows XP Service Pack 2 and newer.
9883 Wine uses this to enable Windows apps to hook the Win32 API
9884 functions provided by Wine.
9885
9886 What that means is that we've already set up the frame pointer. */
9887
9888 if (frame_pointer_needed
9889 && !(crtl->drap_reg && crtl->stack_realign_needed))
9890 {
9891 rtx push, mov;
9892
9893 /* We've decided to use the frame pointer already set up.
9894 Describe this to the unwinder by pretending that both
9895 push and mov insns happen right here.
9896
9897 Putting the unwind info here at the end of the ms_hook
9898 is done so that we can make absolutely certain we get
9899 the required byte sequence at the start of the function,
9900 rather than relying on an assembler that can produce
9901 the exact encoding required.
9902
9903 However it does mean (in the unpatched case) that we have
9904 a 1 insn window where the asynchronous unwind info is
9905 incorrect. However, if we placed the unwind info at
9906 its correct location we would have incorrect unwind info
9907 in the patched case. Which is probably all moot since
9908 I don't expect Wine generates dwarf2 unwind info for the
9909 system libraries that use this feature. */
9910
9911 insn = emit_insn (gen_blockage ());
9912
9913 push = gen_push (hard_frame_pointer_rtx);
9914 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
9915 stack_pointer_rtx);
9916 RTX_FRAME_RELATED_P (push) = 1;
9917 RTX_FRAME_RELATED_P (mov) = 1;
9918
9919 RTX_FRAME_RELATED_P (insn) = 1;
9920 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
9921 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
9922
9923 /* Note that gen_push incremented m->fs.cfa_offset, even
9924 though we didn't emit the push insn here. */
9925 m->fs.cfa_reg = hard_frame_pointer_rtx;
9926 m->fs.fp_offset = m->fs.cfa_offset;
9927 m->fs.fp_valid = true;
9928 }
9929 else
9930 {
9931 /* The frame pointer is not needed so pop %ebp again.
9932 This leaves us with a pristine state. */
9933 emit_insn (gen_pop (hard_frame_pointer_rtx));
9934 }
9935 }
9936
9937 /* The first insn of a function that accepts its static chain on the
9938 stack is to push the register that would be filled in by a direct
9939 call. This insn will be skipped by the trampoline. */
9940 else if (ix86_static_chain_on_stack)
9941 {
9942 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
9943 emit_insn (gen_blockage ());
9944
9945 /* We don't want to interpret this push insn as a register save,
9946 only as a stack adjustment. The real copy of the register as
9947 a save will be done later, if needed. */
9948 t = plus_constant (stack_pointer_rtx, -UNITS_PER_WORD);
9949 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
9950 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
9951 RTX_FRAME_RELATED_P (insn) = 1;
9952 }
9953
9954 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
9955 of DRAP is needed and stack realignment is really needed after reload */
9956 if (stack_realign_drap)
9957 {
9958 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
9959
9960 /* Only need to push parameter pointer reg if it is caller saved. */
9961 if (!call_used_regs[REGNO (crtl->drap_reg)])
9962 {
9963 /* Push arg pointer reg */
9964 insn = emit_insn (gen_push (crtl->drap_reg));
9965 RTX_FRAME_RELATED_P (insn) = 1;
9966 }
9967
9968 /* Grab the argument pointer. */
9969 t = plus_constant (stack_pointer_rtx, m->fs.sp_offset);
9970 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
9971 RTX_FRAME_RELATED_P (insn) = 1;
9972 m->fs.cfa_reg = crtl->drap_reg;
9973 m->fs.cfa_offset = 0;
9974
9975 /* Align the stack. */
9976 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
9977 stack_pointer_rtx,
9978 GEN_INT (-align_bytes)));
9979 RTX_FRAME_RELATED_P (insn) = 1;
9980
9981 /* Replicate the return address on the stack so that return
9982 address can be reached via (argp - 1) slot. This is needed
9983 to implement macro RETURN_ADDR_RTX and intrinsic function
9984 expand_builtin_return_addr etc. */
9985 t = plus_constant (crtl->drap_reg, -UNITS_PER_WORD);
9986 t = gen_frame_mem (Pmode, t);
9987 insn = emit_insn (gen_push (t));
9988 RTX_FRAME_RELATED_P (insn) = 1;
9989
9990 /* For the purposes of frame and register save area addressing,
9991 we've started over with a new frame. */
9992 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
9993 m->fs.realigned = true;
9994 }
9995
9996 if (frame_pointer_needed && !m->fs.fp_valid)
9997 {
9998 /* Note: AT&T enter does NOT have reversed args. Enter is probably
9999 slower on all targets. Also sdb doesn't like it. */
10000 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
10001 RTX_FRAME_RELATED_P (insn) = 1;
10002
10003 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
10004 {
10005 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
10006 RTX_FRAME_RELATED_P (insn) = 1;
10007
10008 if (m->fs.cfa_reg == stack_pointer_rtx)
10009 m->fs.cfa_reg = hard_frame_pointer_rtx;
10010 m->fs.fp_offset = m->fs.sp_offset;
10011 m->fs.fp_valid = true;
10012 }
10013 }
10014
10015 int_registers_saved = (frame.nregs == 0);
10016
10017 if (!int_registers_saved)
10018 {
10019 /* If saving registers via PUSH, do so now. */
10020 if (!frame.save_regs_using_mov)
10021 {
10022 ix86_emit_save_regs ();
10023 int_registers_saved = true;
10024 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
10025 }
10026
10027 /* When using red zone we may start register saving before allocating
10028 the stack frame saving one cycle of the prologue. However, avoid
10029 doing this if we have to probe the stack; at least on x86_64 the
10030 stack probe can turn into a call that clobbers a red zone location. */
10031 else if (ix86_using_red_zone ()
10032 && (! TARGET_STACK_PROBE
10033 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
10034 {
10035 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10036 int_registers_saved = true;
10037 }
10038 }
10039
10040 if (stack_realign_fp)
10041 {
10042 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10043 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
10044
10045 /* The computation of the size of the re-aligned stack frame means
10046 that we must allocate the size of the register save area before
10047 performing the actual alignment. Otherwise we cannot guarantee
10048 that there's enough storage above the realignment point. */
10049 if (m->fs.sp_offset != frame.sse_reg_save_offset)
10050 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10051 GEN_INT (m->fs.sp_offset
10052 - frame.sse_reg_save_offset),
10053 -1, false);
10054
10055 /* Align the stack. */
10056 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10057 stack_pointer_rtx,
10058 GEN_INT (-align_bytes)));
10059
10060 /* For the purposes of register save area addressing, the stack
10061 pointer is no longer valid. As for the value of sp_offset,
10062 see ix86_compute_frame_layout, which we need to match in order
10063 to pass verification of stack_pointer_offset at the end. */
10064 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
10065 m->fs.sp_valid = false;
10066 }
10067
10068 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
10069
10070 if (flag_stack_usage_info)
10071 {
10072 /* We start to count from ARG_POINTER. */
10073 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
10074
10075 /* If it was realigned, take into account the fake frame. */
10076 if (stack_realign_drap)
10077 {
10078 if (ix86_static_chain_on_stack)
10079 stack_size += UNITS_PER_WORD;
10080
10081 if (!call_used_regs[REGNO (crtl->drap_reg)])
10082 stack_size += UNITS_PER_WORD;
10083
10084 /* This over-estimates by 1 minimal-stack-alignment-unit but
10085 mitigates that by counting in the new return address slot. */
10086 current_function_dynamic_stack_size
10087 += crtl->stack_alignment_needed / BITS_PER_UNIT;
10088 }
10089
10090 current_function_static_stack_size = stack_size;
10091 }
10092
10093 /* The stack has already been decremented by the instruction calling us
10094 so probe if the size is non-negative to preserve the protection area. */
10095 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
10096 {
10097 /* We expect the registers to be saved when probes are used. */
10098 gcc_assert (int_registers_saved);
10099
10100 if (STACK_CHECK_MOVING_SP)
10101 {
10102 ix86_adjust_stack_and_probe (allocate);
10103 allocate = 0;
10104 }
10105 else
10106 {
10107 HOST_WIDE_INT size = allocate;
10108
10109 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
10110 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
10111
10112 if (TARGET_STACK_PROBE)
10113 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
10114 else
10115 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
10116 }
10117 }
10118
10119 if (allocate == 0)
10120 ;
10121 else if (!ix86_target_stack_probe ()
10122 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
10123 {
10124 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10125 GEN_INT (-allocate), -1,
10126 m->fs.cfa_reg == stack_pointer_rtx);
10127 }
10128 else
10129 {
10130 rtx eax = gen_rtx_REG (Pmode, AX_REG);
10131 rtx r10 = NULL;
10132 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
10133
10134 bool eax_live = false;
10135 bool r10_live = false;
10136
10137 if (TARGET_64BIT)
10138 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
10139 if (!TARGET_64BIT_MS_ABI)
10140 eax_live = ix86_eax_live_at_start_p ();
10141
10142 if (eax_live)
10143 {
10144 emit_insn (gen_push (eax));
10145 allocate -= UNITS_PER_WORD;
10146 }
10147 if (r10_live)
10148 {
10149 r10 = gen_rtx_REG (Pmode, R10_REG);
10150 emit_insn (gen_push (r10));
10151 allocate -= UNITS_PER_WORD;
10152 }
10153
10154 emit_move_insn (eax, GEN_INT (allocate));
10155 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
10156
10157 /* Use the fact that AX still contains ALLOCATE. */
10158 adjust_stack_insn = (TARGET_64BIT
10159 ? gen_pro_epilogue_adjust_stack_di_sub
10160 : gen_pro_epilogue_adjust_stack_si_sub);
10161
10162 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
10163 stack_pointer_rtx, eax));
10164
10165 /* Note that SEH directives need to continue tracking the stack
10166 pointer even after the frame pointer has been set up. */
10167 if (m->fs.cfa_reg == stack_pointer_rtx || TARGET_SEH)
10168 {
10169 if (m->fs.cfa_reg == stack_pointer_rtx)
10170 m->fs.cfa_offset += allocate;
10171
10172 RTX_FRAME_RELATED_P (insn) = 1;
10173 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10174 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10175 plus_constant (stack_pointer_rtx,
10176 -allocate)));
10177 }
10178 m->fs.sp_offset += allocate;
10179
10180 if (r10_live && eax_live)
10181 {
10182 t = choose_baseaddr (m->fs.sp_offset - allocate);
10183 emit_move_insn (r10, gen_frame_mem (Pmode, t));
10184 t = choose_baseaddr (m->fs.sp_offset - allocate - UNITS_PER_WORD);
10185 emit_move_insn (eax, gen_frame_mem (Pmode, t));
10186 }
10187 else if (eax_live || r10_live)
10188 {
10189 t = choose_baseaddr (m->fs.sp_offset - allocate);
10190 emit_move_insn ((eax_live ? eax : r10), gen_frame_mem (Pmode, t));
10191 }
10192 }
10193 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
10194
10195 /* If we havn't already set up the frame pointer, do so now. */
10196 if (frame_pointer_needed && !m->fs.fp_valid)
10197 {
10198 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
10199 GEN_INT (frame.stack_pointer_offset
10200 - frame.hard_frame_pointer_offset));
10201 insn = emit_insn (insn);
10202 RTX_FRAME_RELATED_P (insn) = 1;
10203 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
10204
10205 if (m->fs.cfa_reg == stack_pointer_rtx)
10206 m->fs.cfa_reg = hard_frame_pointer_rtx;
10207 m->fs.fp_offset = frame.hard_frame_pointer_offset;
10208 m->fs.fp_valid = true;
10209 }
10210
10211 if (!int_registers_saved)
10212 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10213 if (frame.nsseregs)
10214 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
10215
10216 pic_reg_used = false;
10217 if (pic_offset_table_rtx
10218 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
10219 || crtl->profile))
10220 {
10221 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
10222
10223 if (alt_pic_reg_used != INVALID_REGNUM)
10224 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
10225
10226 pic_reg_used = true;
10227 }
10228
10229 if (pic_reg_used)
10230 {
10231 if (TARGET_64BIT)
10232 {
10233 if (ix86_cmodel == CM_LARGE_PIC)
10234 {
10235 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
10236 rtx label = gen_label_rtx ();
10237 emit_label (label);
10238 LABEL_PRESERVE_P (label) = 1;
10239 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
10240 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
10241 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
10242 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
10243 pic_offset_table_rtx, tmp_reg));
10244 }
10245 else
10246 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
10247 }
10248 else
10249 {
10250 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
10251 RTX_FRAME_RELATED_P (insn) = 1;
10252 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
10253 }
10254 }
10255
10256 /* In the pic_reg_used case, make sure that the got load isn't deleted
10257 when mcount needs it. Blockage to avoid call movement across mcount
10258 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
10259 note. */
10260 if (crtl->profile && !flag_fentry && pic_reg_used)
10261 emit_insn (gen_prologue_use (pic_offset_table_rtx));
10262
10263 if (crtl->drap_reg && !crtl->stack_realign_needed)
10264 {
10265 /* vDRAP is setup but after reload it turns out stack realign
10266 isn't necessary, here we will emit prologue to setup DRAP
10267 without stack realign adjustment */
10268 t = choose_baseaddr (0);
10269 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10270 }
10271
10272 /* Prevent instructions from being scheduled into register save push
10273 sequence when access to the redzone area is done through frame pointer.
10274 The offset between the frame pointer and the stack pointer is calculated
10275 relative to the value of the stack pointer at the end of the function
10276 prologue, and moving instructions that access redzone area via frame
10277 pointer inside push sequence violates this assumption. */
10278 if (frame_pointer_needed && frame.red_zone_size)
10279 emit_insn (gen_memory_blockage ());
10280
10281 /* Emit cld instruction if stringops are used in the function. */
10282 if (TARGET_CLD && ix86_current_function_needs_cld)
10283 emit_insn (gen_cld ());
10284
10285 /* SEH requires that the prologue end within 256 bytes of the start of
10286 the function. Prevent instruction schedules that would extend that.
10287 Further, prevent alloca modifications to the stack pointer from being
10288 combined with prologue modifications. */
10289 if (TARGET_SEH)
10290 emit_insn (gen_prologue_use (stack_pointer_rtx));
10291 }
10292
10293 /* Emit code to restore REG using a POP insn. */
10294
10295 static void
10296 ix86_emit_restore_reg_using_pop (rtx reg)
10297 {
10298 struct machine_function *m = cfun->machine;
10299 rtx insn = emit_insn (gen_pop (reg));
10300
10301 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
10302 m->fs.sp_offset -= UNITS_PER_WORD;
10303
10304 if (m->fs.cfa_reg == crtl->drap_reg
10305 && REGNO (reg) == REGNO (crtl->drap_reg))
10306 {
10307 /* Previously we'd represented the CFA as an expression
10308 like *(%ebp - 8). We've just popped that value from
10309 the stack, which means we need to reset the CFA to
10310 the drap register. This will remain until we restore
10311 the stack pointer. */
10312 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
10313 RTX_FRAME_RELATED_P (insn) = 1;
10314
10315 /* This means that the DRAP register is valid for addressing too. */
10316 m->fs.drap_valid = true;
10317 return;
10318 }
10319
10320 if (m->fs.cfa_reg == stack_pointer_rtx)
10321 {
10322 rtx x = plus_constant (stack_pointer_rtx, UNITS_PER_WORD);
10323 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10324 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
10325 RTX_FRAME_RELATED_P (insn) = 1;
10326
10327 m->fs.cfa_offset -= UNITS_PER_WORD;
10328 }
10329
10330 /* When the frame pointer is the CFA, and we pop it, we are
10331 swapping back to the stack pointer as the CFA. This happens
10332 for stack frames that don't allocate other data, so we assume
10333 the stack pointer is now pointing at the return address, i.e.
10334 the function entry state, which makes the offset be 1 word. */
10335 if (reg == hard_frame_pointer_rtx)
10336 {
10337 m->fs.fp_valid = false;
10338 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
10339 {
10340 m->fs.cfa_reg = stack_pointer_rtx;
10341 m->fs.cfa_offset -= UNITS_PER_WORD;
10342
10343 add_reg_note (insn, REG_CFA_DEF_CFA,
10344 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10345 GEN_INT (m->fs.cfa_offset)));
10346 RTX_FRAME_RELATED_P (insn) = 1;
10347 }
10348 }
10349 }
10350
10351 /* Emit code to restore saved registers using POP insns. */
10352
10353 static void
10354 ix86_emit_restore_regs_using_pop (void)
10355 {
10356 unsigned int regno;
10357
10358 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10359 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
10360 ix86_emit_restore_reg_using_pop (gen_rtx_REG (Pmode, regno));
10361 }
10362
10363 /* Emit code and notes for the LEAVE instruction. */
10364
10365 static void
10366 ix86_emit_leave (void)
10367 {
10368 struct machine_function *m = cfun->machine;
10369 rtx insn = emit_insn (ix86_gen_leave ());
10370
10371 ix86_add_queued_cfa_restore_notes (insn);
10372
10373 gcc_assert (m->fs.fp_valid);
10374 m->fs.sp_valid = true;
10375 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
10376 m->fs.fp_valid = false;
10377
10378 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
10379 {
10380 m->fs.cfa_reg = stack_pointer_rtx;
10381 m->fs.cfa_offset = m->fs.sp_offset;
10382
10383 add_reg_note (insn, REG_CFA_DEF_CFA,
10384 plus_constant (stack_pointer_rtx, m->fs.sp_offset));
10385 RTX_FRAME_RELATED_P (insn) = 1;
10386 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
10387 m->fs.fp_offset);
10388 }
10389 }
10390
10391 /* Emit code to restore saved registers using MOV insns.
10392 First register is restored from CFA - CFA_OFFSET. */
10393 static void
10394 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
10395 bool maybe_eh_return)
10396 {
10397 struct machine_function *m = cfun->machine;
10398 unsigned int regno;
10399
10400 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10401 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10402 {
10403 rtx reg = gen_rtx_REG (Pmode, regno);
10404 rtx insn, mem;
10405
10406 mem = choose_baseaddr (cfa_offset);
10407 mem = gen_frame_mem (Pmode, mem);
10408 insn = emit_move_insn (reg, mem);
10409
10410 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
10411 {
10412 /* Previously we'd represented the CFA as an expression
10413 like *(%ebp - 8). We've just popped that value from
10414 the stack, which means we need to reset the CFA to
10415 the drap register. This will remain until we restore
10416 the stack pointer. */
10417 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
10418 RTX_FRAME_RELATED_P (insn) = 1;
10419
10420 /* This means that the DRAP register is valid for addressing. */
10421 m->fs.drap_valid = true;
10422 }
10423 else
10424 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10425
10426 cfa_offset -= UNITS_PER_WORD;
10427 }
10428 }
10429
10430 /* Emit code to restore saved registers using MOV insns.
10431 First register is restored from CFA - CFA_OFFSET. */
10432 static void
10433 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
10434 bool maybe_eh_return)
10435 {
10436 unsigned int regno;
10437
10438 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10439 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10440 {
10441 rtx reg = gen_rtx_REG (V4SFmode, regno);
10442 rtx mem;
10443
10444 mem = choose_baseaddr (cfa_offset);
10445 mem = gen_rtx_MEM (V4SFmode, mem);
10446 set_mem_align (mem, 128);
10447 emit_move_insn (reg, mem);
10448
10449 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10450
10451 cfa_offset -= 16;
10452 }
10453 }
10454
10455 /* Restore function stack, frame, and registers. */
10456
10457 void
10458 ix86_expand_epilogue (int style)
10459 {
10460 struct machine_function *m = cfun->machine;
10461 struct machine_frame_state frame_state_save = m->fs;
10462 struct ix86_frame frame;
10463 bool restore_regs_via_mov;
10464 bool using_drap;
10465
10466 ix86_finalize_stack_realign_flags ();
10467 ix86_compute_frame_layout (&frame);
10468
10469 m->fs.sp_valid = (!frame_pointer_needed
10470 || (current_function_sp_is_unchanging
10471 && !stack_realign_fp));
10472 gcc_assert (!m->fs.sp_valid
10473 || m->fs.sp_offset == frame.stack_pointer_offset);
10474
10475 /* The FP must be valid if the frame pointer is present. */
10476 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
10477 gcc_assert (!m->fs.fp_valid
10478 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
10479
10480 /* We must have *some* valid pointer to the stack frame. */
10481 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
10482
10483 /* The DRAP is never valid at this point. */
10484 gcc_assert (!m->fs.drap_valid);
10485
10486 /* See the comment about red zone and frame
10487 pointer usage in ix86_expand_prologue. */
10488 if (frame_pointer_needed && frame.red_zone_size)
10489 emit_insn (gen_memory_blockage ());
10490
10491 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
10492 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
10493
10494 /* Determine the CFA offset of the end of the red-zone. */
10495 m->fs.red_zone_offset = 0;
10496 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
10497 {
10498 /* The red-zone begins below the return address. */
10499 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
10500
10501 /* When the register save area is in the aligned portion of
10502 the stack, determine the maximum runtime displacement that
10503 matches up with the aligned frame. */
10504 if (stack_realign_drap)
10505 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
10506 + UNITS_PER_WORD);
10507 }
10508
10509 /* Special care must be taken for the normal return case of a function
10510 using eh_return: the eax and edx registers are marked as saved, but
10511 not restored along this path. Adjust the save location to match. */
10512 if (crtl->calls_eh_return && style != 2)
10513 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
10514
10515 /* EH_RETURN requires the use of moves to function properly. */
10516 if (crtl->calls_eh_return)
10517 restore_regs_via_mov = true;
10518 /* SEH requires the use of pops to identify the epilogue. */
10519 else if (TARGET_SEH)
10520 restore_regs_via_mov = false;
10521 /* If we're only restoring one register and sp is not valid then
10522 using a move instruction to restore the register since it's
10523 less work than reloading sp and popping the register. */
10524 else if (!m->fs.sp_valid && frame.nregs <= 1)
10525 restore_regs_via_mov = true;
10526 else if (TARGET_EPILOGUE_USING_MOVE
10527 && cfun->machine->use_fast_prologue_epilogue
10528 && (frame.nregs > 1
10529 || m->fs.sp_offset != frame.reg_save_offset))
10530 restore_regs_via_mov = true;
10531 else if (frame_pointer_needed
10532 && !frame.nregs
10533 && m->fs.sp_offset != frame.reg_save_offset)
10534 restore_regs_via_mov = true;
10535 else if (frame_pointer_needed
10536 && TARGET_USE_LEAVE
10537 && cfun->machine->use_fast_prologue_epilogue
10538 && frame.nregs == 1)
10539 restore_regs_via_mov = true;
10540 else
10541 restore_regs_via_mov = false;
10542
10543 if (restore_regs_via_mov || frame.nsseregs)
10544 {
10545 /* Ensure that the entire register save area is addressable via
10546 the stack pointer, if we will restore via sp. */
10547 if (TARGET_64BIT
10548 && m->fs.sp_offset > 0x7fffffff
10549 && !(m->fs.fp_valid || m->fs.drap_valid)
10550 && (frame.nsseregs + frame.nregs) != 0)
10551 {
10552 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10553 GEN_INT (m->fs.sp_offset
10554 - frame.sse_reg_save_offset),
10555 style,
10556 m->fs.cfa_reg == stack_pointer_rtx);
10557 }
10558 }
10559
10560 /* If there are any SSE registers to restore, then we have to do it
10561 via moves, since there's obviously no pop for SSE regs. */
10562 if (frame.nsseregs)
10563 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
10564 style == 2);
10565
10566 if (restore_regs_via_mov)
10567 {
10568 rtx t;
10569
10570 if (frame.nregs)
10571 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
10572
10573 /* eh_return epilogues need %ecx added to the stack pointer. */
10574 if (style == 2)
10575 {
10576 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
10577
10578 /* Stack align doesn't work with eh_return. */
10579 gcc_assert (!stack_realign_drap);
10580 /* Neither does regparm nested functions. */
10581 gcc_assert (!ix86_static_chain_on_stack);
10582
10583 if (frame_pointer_needed)
10584 {
10585 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
10586 t = plus_constant (t, m->fs.fp_offset - UNITS_PER_WORD);
10587 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
10588
10589 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
10590 insn = emit_move_insn (hard_frame_pointer_rtx, t);
10591
10592 /* Note that we use SA as a temporary CFA, as the return
10593 address is at the proper place relative to it. We
10594 pretend this happens at the FP restore insn because
10595 prior to this insn the FP would be stored at the wrong
10596 offset relative to SA, and after this insn we have no
10597 other reasonable register to use for the CFA. We don't
10598 bother resetting the CFA to the SP for the duration of
10599 the return insn. */
10600 add_reg_note (insn, REG_CFA_DEF_CFA,
10601 plus_constant (sa, UNITS_PER_WORD));
10602 ix86_add_queued_cfa_restore_notes (insn);
10603 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
10604 RTX_FRAME_RELATED_P (insn) = 1;
10605
10606 m->fs.cfa_reg = sa;
10607 m->fs.cfa_offset = UNITS_PER_WORD;
10608 m->fs.fp_valid = false;
10609
10610 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
10611 const0_rtx, style, false);
10612 }
10613 else
10614 {
10615 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
10616 t = plus_constant (t, m->fs.sp_offset - UNITS_PER_WORD);
10617 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
10618 ix86_add_queued_cfa_restore_notes (insn);
10619
10620 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
10621 if (m->fs.cfa_offset != UNITS_PER_WORD)
10622 {
10623 m->fs.cfa_offset = UNITS_PER_WORD;
10624 add_reg_note (insn, REG_CFA_DEF_CFA,
10625 plus_constant (stack_pointer_rtx,
10626 UNITS_PER_WORD));
10627 RTX_FRAME_RELATED_P (insn) = 1;
10628 }
10629 }
10630 m->fs.sp_offset = UNITS_PER_WORD;
10631 m->fs.sp_valid = true;
10632 }
10633 }
10634 else
10635 {
10636 /* SEH requires that the function end with (1) a stack adjustment
10637 if necessary, (2) a sequence of pops, and (3) a return or
10638 jump instruction. Prevent insns from the function body from
10639 being scheduled into this sequence. */
10640 if (TARGET_SEH)
10641 {
10642 /* Prevent a catch region from being adjacent to the standard
10643 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
10644 several other flags that would be interesting to test are
10645 not yet set up. */
10646 if (flag_non_call_exceptions)
10647 emit_insn (gen_nops (const1_rtx));
10648 else
10649 emit_insn (gen_blockage ());
10650 }
10651
10652 /* First step is to deallocate the stack frame so that we can
10653 pop the registers. */
10654 if (!m->fs.sp_valid)
10655 {
10656 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
10657 GEN_INT (m->fs.fp_offset
10658 - frame.reg_save_offset),
10659 style, false);
10660 }
10661 else if (m->fs.sp_offset != frame.reg_save_offset)
10662 {
10663 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10664 GEN_INT (m->fs.sp_offset
10665 - frame.reg_save_offset),
10666 style,
10667 m->fs.cfa_reg == stack_pointer_rtx);
10668 }
10669
10670 ix86_emit_restore_regs_using_pop ();
10671 }
10672
10673 /* If we used a stack pointer and haven't already got rid of it,
10674 then do so now. */
10675 if (m->fs.fp_valid)
10676 {
10677 /* If the stack pointer is valid and pointing at the frame
10678 pointer store address, then we only need a pop. */
10679 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
10680 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10681 /* Leave results in shorter dependency chains on CPUs that are
10682 able to grok it fast. */
10683 else if (TARGET_USE_LEAVE
10684 || optimize_function_for_size_p (cfun)
10685 || !cfun->machine->use_fast_prologue_epilogue)
10686 ix86_emit_leave ();
10687 else
10688 {
10689 pro_epilogue_adjust_stack (stack_pointer_rtx,
10690 hard_frame_pointer_rtx,
10691 const0_rtx, style, !using_drap);
10692 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10693 }
10694 }
10695
10696 if (using_drap)
10697 {
10698 int param_ptr_offset = UNITS_PER_WORD;
10699 rtx insn;
10700
10701 gcc_assert (stack_realign_drap);
10702
10703 if (ix86_static_chain_on_stack)
10704 param_ptr_offset += UNITS_PER_WORD;
10705 if (!call_used_regs[REGNO (crtl->drap_reg)])
10706 param_ptr_offset += UNITS_PER_WORD;
10707
10708 insn = emit_insn (gen_rtx_SET
10709 (VOIDmode, stack_pointer_rtx,
10710 gen_rtx_PLUS (Pmode,
10711 crtl->drap_reg,
10712 GEN_INT (-param_ptr_offset))));
10713 m->fs.cfa_reg = stack_pointer_rtx;
10714 m->fs.cfa_offset = param_ptr_offset;
10715 m->fs.sp_offset = param_ptr_offset;
10716 m->fs.realigned = false;
10717
10718 add_reg_note (insn, REG_CFA_DEF_CFA,
10719 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10720 GEN_INT (param_ptr_offset)));
10721 RTX_FRAME_RELATED_P (insn) = 1;
10722
10723 if (!call_used_regs[REGNO (crtl->drap_reg)])
10724 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
10725 }
10726
10727 /* At this point the stack pointer must be valid, and we must have
10728 restored all of the registers. We may not have deallocated the
10729 entire stack frame. We've delayed this until now because it may
10730 be possible to merge the local stack deallocation with the
10731 deallocation forced by ix86_static_chain_on_stack. */
10732 gcc_assert (m->fs.sp_valid);
10733 gcc_assert (!m->fs.fp_valid);
10734 gcc_assert (!m->fs.realigned);
10735 if (m->fs.sp_offset != UNITS_PER_WORD)
10736 {
10737 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10738 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
10739 style, true);
10740 }
10741
10742 /* Sibcall epilogues don't want a return instruction. */
10743 if (style == 0)
10744 {
10745 m->fs = frame_state_save;
10746 return;
10747 }
10748
10749 /* Emit vzeroupper if needed. */
10750 if (TARGET_VZEROUPPER
10751 && !TREE_THIS_VOLATILE (cfun->decl)
10752 && !cfun->machine->caller_return_avx256_p)
10753 emit_insn (gen_avx_vzeroupper (GEN_INT (call_no_avx256)));
10754
10755 if (crtl->args.pops_args && crtl->args.size)
10756 {
10757 rtx popc = GEN_INT (crtl->args.pops_args);
10758
10759 /* i386 can only pop 64K bytes. If asked to pop more, pop return
10760 address, do explicit add, and jump indirectly to the caller. */
10761
10762 if (crtl->args.pops_args >= 65536)
10763 {
10764 rtx ecx = gen_rtx_REG (SImode, CX_REG);
10765 rtx insn;
10766
10767 /* There is no "pascal" calling convention in any 64bit ABI. */
10768 gcc_assert (!TARGET_64BIT);
10769
10770 insn = emit_insn (gen_pop (ecx));
10771 m->fs.cfa_offset -= UNITS_PER_WORD;
10772 m->fs.sp_offset -= UNITS_PER_WORD;
10773
10774 add_reg_note (insn, REG_CFA_ADJUST_CFA,
10775 copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
10776 add_reg_note (insn, REG_CFA_REGISTER,
10777 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
10778 RTX_FRAME_RELATED_P (insn) = 1;
10779
10780 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10781 popc, -1, true);
10782 emit_jump_insn (gen_return_indirect_internal (ecx));
10783 }
10784 else
10785 emit_jump_insn (gen_return_pop_internal (popc));
10786 }
10787 else
10788 emit_jump_insn (gen_return_internal ());
10789
10790 /* Restore the state back to the state from the prologue,
10791 so that it's correct for the next epilogue. */
10792 m->fs = frame_state_save;
10793 }
10794
10795 /* Reset from the function's potential modifications. */
10796
10797 static void
10798 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
10799 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
10800 {
10801 if (pic_offset_table_rtx)
10802 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
10803 #if TARGET_MACHO
10804 /* Mach-O doesn't support labels at the end of objects, so if
10805 it looks like we might want one, insert a NOP. */
10806 {
10807 rtx insn = get_last_insn ();
10808 while (insn
10809 && NOTE_P (insn)
10810 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
10811 insn = PREV_INSN (insn);
10812 if (insn
10813 && (LABEL_P (insn)
10814 || (NOTE_P (insn)
10815 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
10816 fputs ("\tnop\n", file);
10817 }
10818 #endif
10819
10820 }
10821
10822 /* Return a scratch register to use in the split stack prologue. The
10823 split stack prologue is used for -fsplit-stack. It is the first
10824 instructions in the function, even before the regular prologue.
10825 The scratch register can be any caller-saved register which is not
10826 used for parameters or for the static chain. */
10827
10828 static unsigned int
10829 split_stack_prologue_scratch_regno (void)
10830 {
10831 if (TARGET_64BIT)
10832 return R11_REG;
10833 else
10834 {
10835 bool is_fastcall;
10836 int regparm;
10837
10838 is_fastcall = (lookup_attribute ("fastcall",
10839 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
10840 != NULL);
10841 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
10842
10843 if (is_fastcall)
10844 {
10845 if (DECL_STATIC_CHAIN (cfun->decl))
10846 {
10847 sorry ("-fsplit-stack does not support fastcall with "
10848 "nested function");
10849 return INVALID_REGNUM;
10850 }
10851 return AX_REG;
10852 }
10853 else if (regparm < 3)
10854 {
10855 if (!DECL_STATIC_CHAIN (cfun->decl))
10856 return CX_REG;
10857 else
10858 {
10859 if (regparm >= 2)
10860 {
10861 sorry ("-fsplit-stack does not support 2 register "
10862 " parameters for a nested function");
10863 return INVALID_REGNUM;
10864 }
10865 return DX_REG;
10866 }
10867 }
10868 else
10869 {
10870 /* FIXME: We could make this work by pushing a register
10871 around the addition and comparison. */
10872 sorry ("-fsplit-stack does not support 3 register parameters");
10873 return INVALID_REGNUM;
10874 }
10875 }
10876 }
10877
10878 /* A SYMBOL_REF for the function which allocates new stackspace for
10879 -fsplit-stack. */
10880
10881 static GTY(()) rtx split_stack_fn;
10882
10883 /* A SYMBOL_REF for the more stack function when using the large
10884 model. */
10885
10886 static GTY(()) rtx split_stack_fn_large;
10887
10888 /* Handle -fsplit-stack. These are the first instructions in the
10889 function, even before the regular prologue. */
10890
10891 void
10892 ix86_expand_split_stack_prologue (void)
10893 {
10894 struct ix86_frame frame;
10895 HOST_WIDE_INT allocate;
10896 unsigned HOST_WIDE_INT args_size;
10897 rtx label, limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
10898 rtx scratch_reg = NULL_RTX;
10899 rtx varargs_label = NULL_RTX;
10900 rtx fn;
10901
10902 gcc_assert (flag_split_stack && reload_completed);
10903
10904 ix86_finalize_stack_realign_flags ();
10905 ix86_compute_frame_layout (&frame);
10906 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
10907
10908 /* This is the label we will branch to if we have enough stack
10909 space. We expect the basic block reordering pass to reverse this
10910 branch if optimizing, so that we branch in the unlikely case. */
10911 label = gen_label_rtx ();
10912
10913 /* We need to compare the stack pointer minus the frame size with
10914 the stack boundary in the TCB. The stack boundary always gives
10915 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
10916 can compare directly. Otherwise we need to do an addition. */
10917
10918 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
10919 UNSPEC_STACK_CHECK);
10920 limit = gen_rtx_CONST (Pmode, limit);
10921 limit = gen_rtx_MEM (Pmode, limit);
10922 if (allocate < SPLIT_STACK_AVAILABLE)
10923 current = stack_pointer_rtx;
10924 else
10925 {
10926 unsigned int scratch_regno;
10927 rtx offset;
10928
10929 /* We need a scratch register to hold the stack pointer minus
10930 the required frame size. Since this is the very start of the
10931 function, the scratch register can be any caller-saved
10932 register which is not used for parameters. */
10933 offset = GEN_INT (- allocate);
10934 scratch_regno = split_stack_prologue_scratch_regno ();
10935 if (scratch_regno == INVALID_REGNUM)
10936 return;
10937 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
10938 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
10939 {
10940 /* We don't use ix86_gen_add3 in this case because it will
10941 want to split to lea, but when not optimizing the insn
10942 will not be split after this point. */
10943 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
10944 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10945 offset)));
10946 }
10947 else
10948 {
10949 emit_move_insn (scratch_reg, offset);
10950 emit_insn (gen_adddi3 (scratch_reg, scratch_reg,
10951 stack_pointer_rtx));
10952 }
10953 current = scratch_reg;
10954 }
10955
10956 ix86_expand_branch (GEU, current, limit, label);
10957 jump_insn = get_last_insn ();
10958 JUMP_LABEL (jump_insn) = label;
10959
10960 /* Mark the jump as very likely to be taken. */
10961 add_reg_note (jump_insn, REG_BR_PROB,
10962 GEN_INT (REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100));
10963
10964 if (split_stack_fn == NULL_RTX)
10965 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
10966 fn = split_stack_fn;
10967
10968 /* Get more stack space. We pass in the desired stack space and the
10969 size of the arguments to copy to the new stack. In 32-bit mode
10970 we push the parameters; __morestack will return on a new stack
10971 anyhow. In 64-bit mode we pass the parameters in r10 and
10972 r11. */
10973 allocate_rtx = GEN_INT (allocate);
10974 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
10975 call_fusage = NULL_RTX;
10976 if (TARGET_64BIT)
10977 {
10978 rtx reg10, reg11;
10979
10980 reg10 = gen_rtx_REG (Pmode, R10_REG);
10981 reg11 = gen_rtx_REG (Pmode, R11_REG);
10982
10983 /* If this function uses a static chain, it will be in %r10.
10984 Preserve it across the call to __morestack. */
10985 if (DECL_STATIC_CHAIN (cfun->decl))
10986 {
10987 rtx rax;
10988
10989 rax = gen_rtx_REG (Pmode, AX_REG);
10990 emit_move_insn (rax, reg10);
10991 use_reg (&call_fusage, rax);
10992 }
10993
10994 if (ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
10995 {
10996 HOST_WIDE_INT argval;
10997
10998 /* When using the large model we need to load the address
10999 into a register, and we've run out of registers. So we
11000 switch to a different calling convention, and we call a
11001 different function: __morestack_large. We pass the
11002 argument size in the upper 32 bits of r10 and pass the
11003 frame size in the lower 32 bits. */
11004 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
11005 gcc_assert ((args_size & 0xffffffff) == args_size);
11006
11007 if (split_stack_fn_large == NULL_RTX)
11008 split_stack_fn_large =
11009 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
11010
11011 if (ix86_cmodel == CM_LARGE_PIC)
11012 {
11013 rtx label, x;
11014
11015 label = gen_label_rtx ();
11016 emit_label (label);
11017 LABEL_PRESERVE_P (label) = 1;
11018 emit_insn (gen_set_rip_rex64 (reg10, label));
11019 emit_insn (gen_set_got_offset_rex64 (reg11, label));
11020 emit_insn (gen_adddi3 (reg10, reg10, reg11));
11021 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
11022 UNSPEC_GOT);
11023 x = gen_rtx_CONST (Pmode, x);
11024 emit_move_insn (reg11, x);
11025 x = gen_rtx_PLUS (Pmode, reg10, reg11);
11026 x = gen_const_mem (Pmode, x);
11027 emit_move_insn (reg11, x);
11028 }
11029 else
11030 emit_move_insn (reg11, split_stack_fn_large);
11031
11032 fn = reg11;
11033
11034 argval = ((args_size << 16) << 16) + allocate;
11035 emit_move_insn (reg10, GEN_INT (argval));
11036 }
11037 else
11038 {
11039 emit_move_insn (reg10, allocate_rtx);
11040 emit_move_insn (reg11, GEN_INT (args_size));
11041 use_reg (&call_fusage, reg11);
11042 }
11043
11044 use_reg (&call_fusage, reg10);
11045 }
11046 else
11047 {
11048 emit_insn (gen_push (GEN_INT (args_size)));
11049 emit_insn (gen_push (allocate_rtx));
11050 }
11051 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
11052 GEN_INT (UNITS_PER_WORD), constm1_rtx,
11053 NULL_RTX, false);
11054 add_function_usage_to (call_insn, call_fusage);
11055
11056 /* In order to make call/return prediction work right, we now need
11057 to execute a return instruction. See
11058 libgcc/config/i386/morestack.S for the details on how this works.
11059
11060 For flow purposes gcc must not see this as a return
11061 instruction--we need control flow to continue at the subsequent
11062 label. Therefore, we use an unspec. */
11063 gcc_assert (crtl->args.pops_args < 65536);
11064 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
11065
11066 /* If we are in 64-bit mode and this function uses a static chain,
11067 we saved %r10 in %rax before calling _morestack. */
11068 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
11069 emit_move_insn (gen_rtx_REG (Pmode, R10_REG),
11070 gen_rtx_REG (Pmode, AX_REG));
11071
11072 /* If this function calls va_start, we need to store a pointer to
11073 the arguments on the old stack, because they may not have been
11074 all copied to the new stack. At this point the old stack can be
11075 found at the frame pointer value used by __morestack, because
11076 __morestack has set that up before calling back to us. Here we
11077 store that pointer in a scratch register, and in
11078 ix86_expand_prologue we store the scratch register in a stack
11079 slot. */
11080 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11081 {
11082 unsigned int scratch_regno;
11083 rtx frame_reg;
11084 int words;
11085
11086 scratch_regno = split_stack_prologue_scratch_regno ();
11087 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11088 frame_reg = gen_rtx_REG (Pmode, BP_REG);
11089
11090 /* 64-bit:
11091 fp -> old fp value
11092 return address within this function
11093 return address of caller of this function
11094 stack arguments
11095 So we add three words to get to the stack arguments.
11096
11097 32-bit:
11098 fp -> old fp value
11099 return address within this function
11100 first argument to __morestack
11101 second argument to __morestack
11102 return address of caller of this function
11103 stack arguments
11104 So we add five words to get to the stack arguments.
11105 */
11106 words = TARGET_64BIT ? 3 : 5;
11107 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11108 gen_rtx_PLUS (Pmode, frame_reg,
11109 GEN_INT (words * UNITS_PER_WORD))));
11110
11111 varargs_label = gen_label_rtx ();
11112 emit_jump_insn (gen_jump (varargs_label));
11113 JUMP_LABEL (get_last_insn ()) = varargs_label;
11114
11115 emit_barrier ();
11116 }
11117
11118 emit_label (label);
11119 LABEL_NUSES (label) = 1;
11120
11121 /* If this function calls va_start, we now have to set the scratch
11122 register for the case where we do not call __morestack. In this
11123 case we need to set it based on the stack pointer. */
11124 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11125 {
11126 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11127 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11128 GEN_INT (UNITS_PER_WORD))));
11129
11130 emit_label (varargs_label);
11131 LABEL_NUSES (varargs_label) = 1;
11132 }
11133 }
11134
11135 /* We may have to tell the dataflow pass that the split stack prologue
11136 is initializing a scratch register. */
11137
11138 static void
11139 ix86_live_on_entry (bitmap regs)
11140 {
11141 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11142 {
11143 gcc_assert (flag_split_stack);
11144 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
11145 }
11146 }
11147 \f
11148 /* Determine if op is suitable SUBREG RTX for address. */
11149
11150 static bool
11151 ix86_address_subreg_operand (rtx op)
11152 {
11153 enum machine_mode mode;
11154
11155 if (!REG_P (op))
11156 return false;
11157
11158 mode = GET_MODE (op);
11159
11160 if (GET_MODE_CLASS (mode) != MODE_INT)
11161 return false;
11162
11163 /* Don't allow SUBREGs that span more than a word. It can lead to spill
11164 failures when the register is one word out of a two word structure. */
11165 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
11166 return false;
11167
11168 /* Allow only SUBREGs of non-eliminable hard registers. */
11169 return register_no_elim_operand (op, mode);
11170 }
11171
11172 /* Extract the parts of an RTL expression that is a valid memory address
11173 for an instruction. Return 0 if the structure of the address is
11174 grossly off. Return -1 if the address contains ASHIFT, so it is not
11175 strictly valid, but still used for computing length of lea instruction. */
11176
11177 int
11178 ix86_decompose_address (rtx addr, struct ix86_address *out)
11179 {
11180 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
11181 rtx base_reg, index_reg;
11182 HOST_WIDE_INT scale = 1;
11183 rtx scale_rtx = NULL_RTX;
11184 rtx tmp;
11185 int retval = 1;
11186 enum ix86_address_seg seg = SEG_DEFAULT;
11187
11188 /* Allow zero-extended SImode addresses,
11189 they will be emitted with addr32 prefix. */
11190 if (TARGET_64BIT && GET_MODE (addr) == DImode)
11191 {
11192 if (GET_CODE (addr) == ZERO_EXTEND
11193 && GET_MODE (XEXP (addr, 0)) == SImode)
11194 addr = XEXP (addr, 0);
11195 else if (GET_CODE (addr) == AND
11196 && const_32bit_mask (XEXP (addr, 1), DImode))
11197 {
11198 addr = XEXP (addr, 0);
11199
11200 /* Strip subreg. */
11201 if (GET_CODE (addr) == SUBREG
11202 && GET_MODE (SUBREG_REG (addr)) == SImode)
11203 addr = SUBREG_REG (addr);
11204 }
11205 }
11206
11207 if (REG_P (addr))
11208 base = addr;
11209 else if (GET_CODE (addr) == SUBREG)
11210 {
11211 if (ix86_address_subreg_operand (SUBREG_REG (addr)))
11212 base = addr;
11213 else
11214 return 0;
11215 }
11216 else if (GET_CODE (addr) == PLUS)
11217 {
11218 rtx addends[4], op;
11219 int n = 0, i;
11220
11221 op = addr;
11222 do
11223 {
11224 if (n >= 4)
11225 return 0;
11226 addends[n++] = XEXP (op, 1);
11227 op = XEXP (op, 0);
11228 }
11229 while (GET_CODE (op) == PLUS);
11230 if (n >= 4)
11231 return 0;
11232 addends[n] = op;
11233
11234 for (i = n; i >= 0; --i)
11235 {
11236 op = addends[i];
11237 switch (GET_CODE (op))
11238 {
11239 case MULT:
11240 if (index)
11241 return 0;
11242 index = XEXP (op, 0);
11243 scale_rtx = XEXP (op, 1);
11244 break;
11245
11246 case ASHIFT:
11247 if (index)
11248 return 0;
11249 index = XEXP (op, 0);
11250 tmp = XEXP (op, 1);
11251 if (!CONST_INT_P (tmp))
11252 return 0;
11253 scale = INTVAL (tmp);
11254 if ((unsigned HOST_WIDE_INT) scale > 3)
11255 return 0;
11256 scale = 1 << scale;
11257 break;
11258
11259 case UNSPEC:
11260 if (XINT (op, 1) == UNSPEC_TP
11261 && TARGET_TLS_DIRECT_SEG_REFS
11262 && seg == SEG_DEFAULT)
11263 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
11264 else
11265 return 0;
11266 break;
11267
11268 case SUBREG:
11269 if (!ix86_address_subreg_operand (SUBREG_REG (op)))
11270 return 0;
11271 /* FALLTHRU */
11272
11273 case REG:
11274 if (!base)
11275 base = op;
11276 else if (!index)
11277 index = op;
11278 else
11279 return 0;
11280 break;
11281
11282 case CONST:
11283 case CONST_INT:
11284 case SYMBOL_REF:
11285 case LABEL_REF:
11286 if (disp)
11287 return 0;
11288 disp = op;
11289 break;
11290
11291 default:
11292 return 0;
11293 }
11294 }
11295 }
11296 else if (GET_CODE (addr) == MULT)
11297 {
11298 index = XEXP (addr, 0); /* index*scale */
11299 scale_rtx = XEXP (addr, 1);
11300 }
11301 else if (GET_CODE (addr) == ASHIFT)
11302 {
11303 /* We're called for lea too, which implements ashift on occasion. */
11304 index = XEXP (addr, 0);
11305 tmp = XEXP (addr, 1);
11306 if (!CONST_INT_P (tmp))
11307 return 0;
11308 scale = INTVAL (tmp);
11309 if ((unsigned HOST_WIDE_INT) scale > 3)
11310 return 0;
11311 scale = 1 << scale;
11312 retval = -1;
11313 }
11314 else
11315 disp = addr; /* displacement */
11316
11317 if (index)
11318 {
11319 if (REG_P (index))
11320 ;
11321 else if (GET_CODE (index) == SUBREG
11322 && ix86_address_subreg_operand (SUBREG_REG (index)))
11323 ;
11324 else
11325 return 0;
11326 }
11327
11328 /* Extract the integral value of scale. */
11329 if (scale_rtx)
11330 {
11331 if (!CONST_INT_P (scale_rtx))
11332 return 0;
11333 scale = INTVAL (scale_rtx);
11334 }
11335
11336 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
11337 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
11338
11339 /* Avoid useless 0 displacement. */
11340 if (disp == const0_rtx && (base || index))
11341 disp = NULL_RTX;
11342
11343 /* Allow arg pointer and stack pointer as index if there is not scaling. */
11344 if (base_reg && index_reg && scale == 1
11345 && (index_reg == arg_pointer_rtx
11346 || index_reg == frame_pointer_rtx
11347 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
11348 {
11349 rtx tmp;
11350 tmp = base, base = index, index = tmp;
11351 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
11352 }
11353
11354 /* Special case: %ebp cannot be encoded as a base without a displacement.
11355 Similarly %r13. */
11356 if (!disp
11357 && base_reg
11358 && (base_reg == hard_frame_pointer_rtx
11359 || base_reg == frame_pointer_rtx
11360 || base_reg == arg_pointer_rtx
11361 || (REG_P (base_reg)
11362 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
11363 || REGNO (base_reg) == R13_REG))))
11364 disp = const0_rtx;
11365
11366 /* Special case: on K6, [%esi] makes the instruction vector decoded.
11367 Avoid this by transforming to [%esi+0].
11368 Reload calls address legitimization without cfun defined, so we need
11369 to test cfun for being non-NULL. */
11370 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
11371 && base_reg && !index_reg && !disp
11372 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
11373 disp = const0_rtx;
11374
11375 /* Special case: encode reg+reg instead of reg*2. */
11376 if (!base && index && scale == 2)
11377 base = index, base_reg = index_reg, scale = 1;
11378
11379 /* Special case: scaling cannot be encoded without base or displacement. */
11380 if (!base && !disp && index && scale != 1)
11381 disp = const0_rtx;
11382
11383 out->base = base;
11384 out->index = index;
11385 out->disp = disp;
11386 out->scale = scale;
11387 out->seg = seg;
11388
11389 return retval;
11390 }
11391 \f
11392 /* Return cost of the memory address x.
11393 For i386, it is better to use a complex address than let gcc copy
11394 the address into a reg and make a new pseudo. But not if the address
11395 requires to two regs - that would mean more pseudos with longer
11396 lifetimes. */
11397 static int
11398 ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
11399 {
11400 struct ix86_address parts;
11401 int cost = 1;
11402 int ok = ix86_decompose_address (x, &parts);
11403
11404 gcc_assert (ok);
11405
11406 if (parts.base && GET_CODE (parts.base) == SUBREG)
11407 parts.base = SUBREG_REG (parts.base);
11408 if (parts.index && GET_CODE (parts.index) == SUBREG)
11409 parts.index = SUBREG_REG (parts.index);
11410
11411 /* Attempt to minimize number of registers in the address. */
11412 if ((parts.base
11413 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
11414 || (parts.index
11415 && (!REG_P (parts.index)
11416 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
11417 cost++;
11418
11419 if (parts.base
11420 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
11421 && parts.index
11422 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
11423 && parts.base != parts.index)
11424 cost++;
11425
11426 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
11427 since it's predecode logic can't detect the length of instructions
11428 and it degenerates to vector decoded. Increase cost of such
11429 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
11430 to split such addresses or even refuse such addresses at all.
11431
11432 Following addressing modes are affected:
11433 [base+scale*index]
11434 [scale*index+disp]
11435 [base+index]
11436
11437 The first and last case may be avoidable by explicitly coding the zero in
11438 memory address, but I don't have AMD-K6 machine handy to check this
11439 theory. */
11440
11441 if (TARGET_K6
11442 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
11443 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
11444 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
11445 cost += 10;
11446
11447 return cost;
11448 }
11449 \f
11450 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
11451 this is used for to form addresses to local data when -fPIC is in
11452 use. */
11453
11454 static bool
11455 darwin_local_data_pic (rtx disp)
11456 {
11457 return (GET_CODE (disp) == UNSPEC
11458 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
11459 }
11460
11461 /* Determine if a given RTX is a valid constant. We already know this
11462 satisfies CONSTANT_P. */
11463
11464 static bool
11465 ix86_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
11466 {
11467 switch (GET_CODE (x))
11468 {
11469 case CONST:
11470 x = XEXP (x, 0);
11471
11472 if (GET_CODE (x) == PLUS)
11473 {
11474 if (!CONST_INT_P (XEXP (x, 1)))
11475 return false;
11476 x = XEXP (x, 0);
11477 }
11478
11479 if (TARGET_MACHO && darwin_local_data_pic (x))
11480 return true;
11481
11482 /* Only some unspecs are valid as "constants". */
11483 if (GET_CODE (x) == UNSPEC)
11484 switch (XINT (x, 1))
11485 {
11486 case UNSPEC_GOT:
11487 case UNSPEC_GOTOFF:
11488 case UNSPEC_PLTOFF:
11489 return TARGET_64BIT;
11490 case UNSPEC_TPOFF:
11491 case UNSPEC_NTPOFF:
11492 x = XVECEXP (x, 0, 0);
11493 return (GET_CODE (x) == SYMBOL_REF
11494 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11495 case UNSPEC_DTPOFF:
11496 x = XVECEXP (x, 0, 0);
11497 return (GET_CODE (x) == SYMBOL_REF
11498 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
11499 default:
11500 return false;
11501 }
11502
11503 /* We must have drilled down to a symbol. */
11504 if (GET_CODE (x) == LABEL_REF)
11505 return true;
11506 if (GET_CODE (x) != SYMBOL_REF)
11507 return false;
11508 /* FALLTHRU */
11509
11510 case SYMBOL_REF:
11511 /* TLS symbols are never valid. */
11512 if (SYMBOL_REF_TLS_MODEL (x))
11513 return false;
11514
11515 /* DLLIMPORT symbols are never valid. */
11516 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
11517 && SYMBOL_REF_DLLIMPORT_P (x))
11518 return false;
11519
11520 #if TARGET_MACHO
11521 /* mdynamic-no-pic */
11522 if (MACHO_DYNAMIC_NO_PIC_P)
11523 return machopic_symbol_defined_p (x);
11524 #endif
11525 break;
11526
11527 case CONST_DOUBLE:
11528 if (GET_MODE (x) == TImode
11529 && x != CONST0_RTX (TImode)
11530 && !TARGET_64BIT)
11531 return false;
11532 break;
11533
11534 case CONST_VECTOR:
11535 if (!standard_sse_constant_p (x))
11536 return false;
11537
11538 default:
11539 break;
11540 }
11541
11542 /* Otherwise we handle everything else in the move patterns. */
11543 return true;
11544 }
11545
11546 /* Determine if it's legal to put X into the constant pool. This
11547 is not possible for the address of thread-local symbols, which
11548 is checked above. */
11549
11550 static bool
11551 ix86_cannot_force_const_mem (enum machine_mode mode, rtx x)
11552 {
11553 /* We can always put integral constants and vectors in memory. */
11554 switch (GET_CODE (x))
11555 {
11556 case CONST_INT:
11557 case CONST_DOUBLE:
11558 case CONST_VECTOR:
11559 return false;
11560
11561 default:
11562 break;
11563 }
11564 return !ix86_legitimate_constant_p (mode, x);
11565 }
11566
11567
11568 /* Nonzero if the constant value X is a legitimate general operand
11569 when generating PIC code. It is given that flag_pic is on and
11570 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
11571
11572 bool
11573 legitimate_pic_operand_p (rtx x)
11574 {
11575 rtx inner;
11576
11577 switch (GET_CODE (x))
11578 {
11579 case CONST:
11580 inner = XEXP (x, 0);
11581 if (GET_CODE (inner) == PLUS
11582 && CONST_INT_P (XEXP (inner, 1)))
11583 inner = XEXP (inner, 0);
11584
11585 /* Only some unspecs are valid as "constants". */
11586 if (GET_CODE (inner) == UNSPEC)
11587 switch (XINT (inner, 1))
11588 {
11589 case UNSPEC_GOT:
11590 case UNSPEC_GOTOFF:
11591 case UNSPEC_PLTOFF:
11592 return TARGET_64BIT;
11593 case UNSPEC_TPOFF:
11594 x = XVECEXP (inner, 0, 0);
11595 return (GET_CODE (x) == SYMBOL_REF
11596 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11597 case UNSPEC_MACHOPIC_OFFSET:
11598 return legitimate_pic_address_disp_p (x);
11599 default:
11600 return false;
11601 }
11602 /* FALLTHRU */
11603
11604 case SYMBOL_REF:
11605 case LABEL_REF:
11606 return legitimate_pic_address_disp_p (x);
11607
11608 default:
11609 return true;
11610 }
11611 }
11612
11613 /* Determine if a given CONST RTX is a valid memory displacement
11614 in PIC mode. */
11615
11616 bool
11617 legitimate_pic_address_disp_p (rtx disp)
11618 {
11619 bool saw_plus;
11620
11621 /* In 64bit mode we can allow direct addresses of symbols and labels
11622 when they are not dynamic symbols. */
11623 if (TARGET_64BIT)
11624 {
11625 rtx op0 = disp, op1;
11626
11627 switch (GET_CODE (disp))
11628 {
11629 case LABEL_REF:
11630 return true;
11631
11632 case CONST:
11633 if (GET_CODE (XEXP (disp, 0)) != PLUS)
11634 break;
11635 op0 = XEXP (XEXP (disp, 0), 0);
11636 op1 = XEXP (XEXP (disp, 0), 1);
11637 if (!CONST_INT_P (op1)
11638 || INTVAL (op1) >= 16*1024*1024
11639 || INTVAL (op1) < -16*1024*1024)
11640 break;
11641 if (GET_CODE (op0) == LABEL_REF)
11642 return true;
11643 if (GET_CODE (op0) != SYMBOL_REF)
11644 break;
11645 /* FALLTHRU */
11646
11647 case SYMBOL_REF:
11648 /* TLS references should always be enclosed in UNSPEC. */
11649 if (SYMBOL_REF_TLS_MODEL (op0))
11650 return false;
11651 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
11652 && ix86_cmodel != CM_LARGE_PIC)
11653 return true;
11654 break;
11655
11656 default:
11657 break;
11658 }
11659 }
11660 if (GET_CODE (disp) != CONST)
11661 return false;
11662 disp = XEXP (disp, 0);
11663
11664 if (TARGET_64BIT)
11665 {
11666 /* We are unsafe to allow PLUS expressions. This limit allowed distance
11667 of GOT tables. We should not need these anyway. */
11668 if (GET_CODE (disp) != UNSPEC
11669 || (XINT (disp, 1) != UNSPEC_GOTPCREL
11670 && XINT (disp, 1) != UNSPEC_GOTOFF
11671 && XINT (disp, 1) != UNSPEC_PCREL
11672 && XINT (disp, 1) != UNSPEC_PLTOFF))
11673 return false;
11674
11675 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
11676 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
11677 return false;
11678 return true;
11679 }
11680
11681 saw_plus = false;
11682 if (GET_CODE (disp) == PLUS)
11683 {
11684 if (!CONST_INT_P (XEXP (disp, 1)))
11685 return false;
11686 disp = XEXP (disp, 0);
11687 saw_plus = true;
11688 }
11689
11690 if (TARGET_MACHO && darwin_local_data_pic (disp))
11691 return true;
11692
11693 if (GET_CODE (disp) != UNSPEC)
11694 return false;
11695
11696 switch (XINT (disp, 1))
11697 {
11698 case UNSPEC_GOT:
11699 if (saw_plus)
11700 return false;
11701 /* We need to check for both symbols and labels because VxWorks loads
11702 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
11703 details. */
11704 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
11705 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
11706 case UNSPEC_GOTOFF:
11707 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
11708 While ABI specify also 32bit relocation but we don't produce it in
11709 small PIC model at all. */
11710 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
11711 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
11712 && !TARGET_64BIT)
11713 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
11714 return false;
11715 case UNSPEC_GOTTPOFF:
11716 case UNSPEC_GOTNTPOFF:
11717 case UNSPEC_INDNTPOFF:
11718 if (saw_plus)
11719 return false;
11720 disp = XVECEXP (disp, 0, 0);
11721 return (GET_CODE (disp) == SYMBOL_REF
11722 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
11723 case UNSPEC_NTPOFF:
11724 disp = XVECEXP (disp, 0, 0);
11725 return (GET_CODE (disp) == SYMBOL_REF
11726 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
11727 case UNSPEC_DTPOFF:
11728 disp = XVECEXP (disp, 0, 0);
11729 return (GET_CODE (disp) == SYMBOL_REF
11730 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
11731 }
11732
11733 return false;
11734 }
11735
11736 /* Recognizes RTL expressions that are valid memory addresses for an
11737 instruction. The MODE argument is the machine mode for the MEM
11738 expression that wants to use this address.
11739
11740 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
11741 convert common non-canonical forms to canonical form so that they will
11742 be recognized. */
11743
11744 static bool
11745 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
11746 rtx addr, bool strict)
11747 {
11748 struct ix86_address parts;
11749 rtx base, index, disp;
11750 HOST_WIDE_INT scale;
11751
11752 if (ix86_decompose_address (addr, &parts) <= 0)
11753 /* Decomposition failed. */
11754 return false;
11755
11756 base = parts.base;
11757 index = parts.index;
11758 disp = parts.disp;
11759 scale = parts.scale;
11760
11761 /* Validate base register. */
11762 if (base)
11763 {
11764 rtx reg;
11765
11766 if (REG_P (base))
11767 reg = base;
11768 else if (GET_CODE (base) == SUBREG && REG_P (SUBREG_REG (base)))
11769 reg = SUBREG_REG (base);
11770 else
11771 /* Base is not a register. */
11772 return false;
11773
11774 if (GET_MODE (base) != SImode && GET_MODE (base) != DImode)
11775 return false;
11776
11777 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
11778 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
11779 /* Base is not valid. */
11780 return false;
11781 }
11782
11783 /* Validate index register. */
11784 if (index)
11785 {
11786 rtx reg;
11787
11788 if (REG_P (index))
11789 reg = index;
11790 else if (GET_CODE (index) == SUBREG && REG_P (SUBREG_REG (index)))
11791 reg = SUBREG_REG (index);
11792 else
11793 /* Index is not a register. */
11794 return false;
11795
11796 if (GET_MODE (index) != SImode && GET_MODE (index) != DImode)
11797 return false;
11798
11799 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
11800 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
11801 /* Index is not valid. */
11802 return false;
11803 }
11804
11805 /* Index and base should have the same mode. */
11806 if (base && index
11807 && GET_MODE (base) != GET_MODE (index))
11808 return false;
11809
11810 /* Validate scale factor. */
11811 if (scale != 1)
11812 {
11813 if (!index)
11814 /* Scale without index. */
11815 return false;
11816
11817 if (scale != 2 && scale != 4 && scale != 8)
11818 /* Scale is not a valid multiplier. */
11819 return false;
11820 }
11821
11822 /* Validate displacement. */
11823 if (disp)
11824 {
11825 if (GET_CODE (disp) == CONST
11826 && GET_CODE (XEXP (disp, 0)) == UNSPEC
11827 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
11828 switch (XINT (XEXP (disp, 0), 1))
11829 {
11830 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
11831 used. While ABI specify also 32bit relocations, we don't produce
11832 them at all and use IP relative instead. */
11833 case UNSPEC_GOT:
11834 case UNSPEC_GOTOFF:
11835 gcc_assert (flag_pic);
11836 if (!TARGET_64BIT)
11837 goto is_legitimate_pic;
11838
11839 /* 64bit address unspec. */
11840 return false;
11841
11842 case UNSPEC_GOTPCREL:
11843 case UNSPEC_PCREL:
11844 gcc_assert (flag_pic);
11845 goto is_legitimate_pic;
11846
11847 case UNSPEC_GOTTPOFF:
11848 case UNSPEC_GOTNTPOFF:
11849 case UNSPEC_INDNTPOFF:
11850 case UNSPEC_NTPOFF:
11851 case UNSPEC_DTPOFF:
11852 break;
11853
11854 case UNSPEC_STACK_CHECK:
11855 gcc_assert (flag_split_stack);
11856 break;
11857
11858 default:
11859 /* Invalid address unspec. */
11860 return false;
11861 }
11862
11863 else if (SYMBOLIC_CONST (disp)
11864 && (flag_pic
11865 || (TARGET_MACHO
11866 #if TARGET_MACHO
11867 && MACHOPIC_INDIRECT
11868 && !machopic_operand_p (disp)
11869 #endif
11870 )))
11871 {
11872
11873 is_legitimate_pic:
11874 if (TARGET_64BIT && (index || base))
11875 {
11876 /* foo@dtpoff(%rX) is ok. */
11877 if (GET_CODE (disp) != CONST
11878 || GET_CODE (XEXP (disp, 0)) != PLUS
11879 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
11880 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
11881 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
11882 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
11883 /* Non-constant pic memory reference. */
11884 return false;
11885 }
11886 else if ((!TARGET_MACHO || flag_pic)
11887 && ! legitimate_pic_address_disp_p (disp))
11888 /* Displacement is an invalid pic construct. */
11889 return false;
11890 #if TARGET_MACHO
11891 else if (MACHO_DYNAMIC_NO_PIC_P
11892 && !ix86_legitimate_constant_p (Pmode, disp))
11893 /* displacment must be referenced via non_lazy_pointer */
11894 return false;
11895 #endif
11896
11897 /* This code used to verify that a symbolic pic displacement
11898 includes the pic_offset_table_rtx register.
11899
11900 While this is good idea, unfortunately these constructs may
11901 be created by "adds using lea" optimization for incorrect
11902 code like:
11903
11904 int a;
11905 int foo(int i)
11906 {
11907 return *(&a+i);
11908 }
11909
11910 This code is nonsensical, but results in addressing
11911 GOT table with pic_offset_table_rtx base. We can't
11912 just refuse it easily, since it gets matched by
11913 "addsi3" pattern, that later gets split to lea in the
11914 case output register differs from input. While this
11915 can be handled by separate addsi pattern for this case
11916 that never results in lea, this seems to be easier and
11917 correct fix for crash to disable this test. */
11918 }
11919 else if (GET_CODE (disp) != LABEL_REF
11920 && !CONST_INT_P (disp)
11921 && (GET_CODE (disp) != CONST
11922 || !ix86_legitimate_constant_p (Pmode, disp))
11923 && (GET_CODE (disp) != SYMBOL_REF
11924 || !ix86_legitimate_constant_p (Pmode, disp)))
11925 /* Displacement is not constant. */
11926 return false;
11927 else if (TARGET_64BIT
11928 && !x86_64_immediate_operand (disp, VOIDmode))
11929 /* Displacement is out of range. */
11930 return false;
11931 }
11932
11933 /* Everything looks valid. */
11934 return true;
11935 }
11936
11937 /* Determine if a given RTX is a valid constant address. */
11938
11939 bool
11940 constant_address_p (rtx x)
11941 {
11942 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
11943 }
11944 \f
11945 /* Return a unique alias set for the GOT. */
11946
11947 static alias_set_type
11948 ix86_GOT_alias_set (void)
11949 {
11950 static alias_set_type set = -1;
11951 if (set == -1)
11952 set = new_alias_set ();
11953 return set;
11954 }
11955
11956 /* Return a legitimate reference for ORIG (an address) using the
11957 register REG. If REG is 0, a new pseudo is generated.
11958
11959 There are two types of references that must be handled:
11960
11961 1. Global data references must load the address from the GOT, via
11962 the PIC reg. An insn is emitted to do this load, and the reg is
11963 returned.
11964
11965 2. Static data references, constant pool addresses, and code labels
11966 compute the address as an offset from the GOT, whose base is in
11967 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
11968 differentiate them from global data objects. The returned
11969 address is the PIC reg + an unspec constant.
11970
11971 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
11972 reg also appears in the address. */
11973
11974 static rtx
11975 legitimize_pic_address (rtx orig, rtx reg)
11976 {
11977 rtx addr = orig;
11978 rtx new_rtx = orig;
11979 rtx base;
11980
11981 #if TARGET_MACHO
11982 if (TARGET_MACHO && !TARGET_64BIT)
11983 {
11984 if (reg == 0)
11985 reg = gen_reg_rtx (Pmode);
11986 /* Use the generic Mach-O PIC machinery. */
11987 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
11988 }
11989 #endif
11990
11991 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
11992 new_rtx = addr;
11993 else if (TARGET_64BIT
11994 && ix86_cmodel != CM_SMALL_PIC
11995 && gotoff_operand (addr, Pmode))
11996 {
11997 rtx tmpreg;
11998 /* This symbol may be referenced via a displacement from the PIC
11999 base address (@GOTOFF). */
12000
12001 if (reload_in_progress)
12002 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12003 if (GET_CODE (addr) == CONST)
12004 addr = XEXP (addr, 0);
12005 if (GET_CODE (addr) == PLUS)
12006 {
12007 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
12008 UNSPEC_GOTOFF);
12009 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
12010 }
12011 else
12012 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
12013 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12014 if (!reg)
12015 tmpreg = gen_reg_rtx (Pmode);
12016 else
12017 tmpreg = reg;
12018 emit_move_insn (tmpreg, new_rtx);
12019
12020 if (reg != 0)
12021 {
12022 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
12023 tmpreg, 1, OPTAB_DIRECT);
12024 new_rtx = reg;
12025 }
12026 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
12027 }
12028 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
12029 {
12030 /* This symbol may be referenced via a displacement from the PIC
12031 base address (@GOTOFF). */
12032
12033 if (reload_in_progress)
12034 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12035 if (GET_CODE (addr) == CONST)
12036 addr = XEXP (addr, 0);
12037 if (GET_CODE (addr) == PLUS)
12038 {
12039 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
12040 UNSPEC_GOTOFF);
12041 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
12042 }
12043 else
12044 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
12045 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12046 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12047
12048 if (reg != 0)
12049 {
12050 emit_move_insn (reg, new_rtx);
12051 new_rtx = reg;
12052 }
12053 }
12054 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
12055 /* We can't use @GOTOFF for text labels on VxWorks;
12056 see gotoff_operand. */
12057 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
12058 {
12059 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
12060 {
12061 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
12062 return legitimize_dllimport_symbol (addr, true);
12063 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
12064 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
12065 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
12066 {
12067 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
12068 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
12069 }
12070 }
12071
12072 /* For x64 PE-COFF there is no GOT table. So we use address
12073 directly. */
12074 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
12075 {
12076 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
12077 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12078
12079 if (reg == 0)
12080 reg = gen_reg_rtx (Pmode);
12081 emit_move_insn (reg, new_rtx);
12082 new_rtx = reg;
12083 }
12084 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
12085 {
12086 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
12087 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12088 new_rtx = gen_const_mem (Pmode, new_rtx);
12089 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
12090
12091 if (reg == 0)
12092 reg = gen_reg_rtx (Pmode);
12093 /* Use directly gen_movsi, otherwise the address is loaded
12094 into register for CSE. We don't want to CSE this addresses,
12095 instead we CSE addresses from the GOT table, so skip this. */
12096 emit_insn (gen_movsi (reg, new_rtx));
12097 new_rtx = reg;
12098 }
12099 else
12100 {
12101 /* This symbol must be referenced via a load from the
12102 Global Offset Table (@GOT). */
12103
12104 if (reload_in_progress)
12105 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12106 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
12107 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12108 if (TARGET_64BIT)
12109 new_rtx = force_reg (Pmode, new_rtx);
12110 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12111 new_rtx = gen_const_mem (Pmode, new_rtx);
12112 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
12113
12114 if (reg == 0)
12115 reg = gen_reg_rtx (Pmode);
12116 emit_move_insn (reg, new_rtx);
12117 new_rtx = reg;
12118 }
12119 }
12120 else
12121 {
12122 if (CONST_INT_P (addr)
12123 && !x86_64_immediate_operand (addr, VOIDmode))
12124 {
12125 if (reg)
12126 {
12127 emit_move_insn (reg, addr);
12128 new_rtx = reg;
12129 }
12130 else
12131 new_rtx = force_reg (Pmode, addr);
12132 }
12133 else if (GET_CODE (addr) == CONST)
12134 {
12135 addr = XEXP (addr, 0);
12136
12137 /* We must match stuff we generate before. Assume the only
12138 unspecs that can get here are ours. Not that we could do
12139 anything with them anyway.... */
12140 if (GET_CODE (addr) == UNSPEC
12141 || (GET_CODE (addr) == PLUS
12142 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
12143 return orig;
12144 gcc_assert (GET_CODE (addr) == PLUS);
12145 }
12146 if (GET_CODE (addr) == PLUS)
12147 {
12148 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
12149
12150 /* Check first to see if this is a constant offset from a @GOTOFF
12151 symbol reference. */
12152 if (gotoff_operand (op0, Pmode)
12153 && CONST_INT_P (op1))
12154 {
12155 if (!TARGET_64BIT)
12156 {
12157 if (reload_in_progress)
12158 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12159 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
12160 UNSPEC_GOTOFF);
12161 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
12162 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12163 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12164
12165 if (reg != 0)
12166 {
12167 emit_move_insn (reg, new_rtx);
12168 new_rtx = reg;
12169 }
12170 }
12171 else
12172 {
12173 if (INTVAL (op1) < -16*1024*1024
12174 || INTVAL (op1) >= 16*1024*1024)
12175 {
12176 if (!x86_64_immediate_operand (op1, Pmode))
12177 op1 = force_reg (Pmode, op1);
12178 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
12179 }
12180 }
12181 }
12182 else
12183 {
12184 base = legitimize_pic_address (XEXP (addr, 0), reg);
12185 new_rtx = legitimize_pic_address (XEXP (addr, 1),
12186 base == reg ? NULL_RTX : reg);
12187
12188 if (CONST_INT_P (new_rtx))
12189 new_rtx = plus_constant (base, INTVAL (new_rtx));
12190 else
12191 {
12192 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
12193 {
12194 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
12195 new_rtx = XEXP (new_rtx, 1);
12196 }
12197 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
12198 }
12199 }
12200 }
12201 }
12202 return new_rtx;
12203 }
12204 \f
12205 /* Load the thread pointer. If TO_REG is true, force it into a register. */
12206
12207 static rtx
12208 get_thread_pointer (bool to_reg)
12209 {
12210 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
12211
12212 if (GET_MODE (tp) != Pmode)
12213 tp = convert_to_mode (Pmode, tp, 1);
12214
12215 if (to_reg)
12216 tp = copy_addr_to_reg (tp);
12217
12218 return tp;
12219 }
12220
12221 /* Construct the SYMBOL_REF for the tls_get_addr function. */
12222
12223 static GTY(()) rtx ix86_tls_symbol;
12224
12225 static rtx
12226 ix86_tls_get_addr (void)
12227 {
12228 if (!ix86_tls_symbol)
12229 {
12230 const char *sym
12231 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
12232 ? "___tls_get_addr" : "__tls_get_addr");
12233
12234 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
12235 }
12236
12237 return ix86_tls_symbol;
12238 }
12239
12240 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
12241
12242 static GTY(()) rtx ix86_tls_module_base_symbol;
12243
12244 rtx
12245 ix86_tls_module_base (void)
12246 {
12247 if (!ix86_tls_module_base_symbol)
12248 {
12249 ix86_tls_module_base_symbol
12250 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
12251
12252 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
12253 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
12254 }
12255
12256 return ix86_tls_module_base_symbol;
12257 }
12258
12259 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
12260 false if we expect this to be used for a memory address and true if
12261 we expect to load the address into a register. */
12262
12263 static rtx
12264 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
12265 {
12266 rtx dest, base, off;
12267 rtx pic = NULL_RTX, tp = NULL_RTX;
12268 int type;
12269
12270 switch (model)
12271 {
12272 case TLS_MODEL_GLOBAL_DYNAMIC:
12273 dest = gen_reg_rtx (Pmode);
12274
12275 if (!TARGET_64BIT)
12276 {
12277 if (flag_pic)
12278 pic = pic_offset_table_rtx;
12279 else
12280 {
12281 pic = gen_reg_rtx (Pmode);
12282 emit_insn (gen_set_got (pic));
12283 }
12284 }
12285
12286 if (TARGET_GNU2_TLS)
12287 {
12288 if (TARGET_64BIT)
12289 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
12290 else
12291 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
12292
12293 tp = get_thread_pointer (true);
12294 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
12295
12296 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
12297 }
12298 else
12299 {
12300 rtx caddr = ix86_tls_get_addr ();
12301
12302 if (TARGET_64BIT)
12303 {
12304 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
12305
12306 start_sequence ();
12307 emit_call_insn (gen_tls_global_dynamic_64 (rax, x, caddr));
12308 insns = get_insns ();
12309 end_sequence ();
12310
12311 RTL_CONST_CALL_P (insns) = 1;
12312 emit_libcall_block (insns, dest, rax, x);
12313 }
12314 else
12315 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
12316 }
12317 break;
12318
12319 case TLS_MODEL_LOCAL_DYNAMIC:
12320 base = gen_reg_rtx (Pmode);
12321
12322 if (!TARGET_64BIT)
12323 {
12324 if (flag_pic)
12325 pic = pic_offset_table_rtx;
12326 else
12327 {
12328 pic = gen_reg_rtx (Pmode);
12329 emit_insn (gen_set_got (pic));
12330 }
12331 }
12332
12333 if (TARGET_GNU2_TLS)
12334 {
12335 rtx tmp = ix86_tls_module_base ();
12336
12337 if (TARGET_64BIT)
12338 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
12339 else
12340 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
12341
12342 tp = get_thread_pointer (true);
12343 set_unique_reg_note (get_last_insn (), REG_EQUAL,
12344 gen_rtx_MINUS (Pmode, tmp, tp));
12345 }
12346 else
12347 {
12348 rtx caddr = ix86_tls_get_addr ();
12349
12350 if (TARGET_64BIT)
12351 {
12352 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, eqv;
12353
12354 start_sequence ();
12355 emit_call_insn (gen_tls_local_dynamic_base_64 (rax, caddr));
12356 insns = get_insns ();
12357 end_sequence ();
12358
12359 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
12360 share the LD_BASE result with other LD model accesses. */
12361 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
12362 UNSPEC_TLS_LD_BASE);
12363
12364 RTL_CONST_CALL_P (insns) = 1;
12365 emit_libcall_block (insns, base, rax, eqv);
12366 }
12367 else
12368 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
12369 }
12370
12371 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
12372 off = gen_rtx_CONST (Pmode, off);
12373
12374 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
12375
12376 if (TARGET_GNU2_TLS)
12377 {
12378 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
12379
12380 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
12381 }
12382 break;
12383
12384 case TLS_MODEL_INITIAL_EXEC:
12385 if (TARGET_64BIT)
12386 {
12387 if (TARGET_SUN_TLS)
12388 {
12389 /* The Sun linker took the AMD64 TLS spec literally
12390 and can only handle %rax as destination of the
12391 initial executable code sequence. */
12392
12393 dest = gen_reg_rtx (Pmode);
12394 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
12395 return dest;
12396 }
12397
12398 pic = NULL;
12399 type = UNSPEC_GOTNTPOFF;
12400 }
12401 else if (flag_pic)
12402 {
12403 if (reload_in_progress)
12404 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12405 pic = pic_offset_table_rtx;
12406 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
12407 }
12408 else if (!TARGET_ANY_GNU_TLS)
12409 {
12410 pic = gen_reg_rtx (Pmode);
12411 emit_insn (gen_set_got (pic));
12412 type = UNSPEC_GOTTPOFF;
12413 }
12414 else
12415 {
12416 pic = NULL;
12417 type = UNSPEC_INDNTPOFF;
12418 }
12419
12420 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
12421 off = gen_rtx_CONST (Pmode, off);
12422 if (pic)
12423 off = gen_rtx_PLUS (Pmode, pic, off);
12424 off = gen_const_mem (Pmode, off);
12425 set_mem_alias_set (off, ix86_GOT_alias_set ());
12426
12427 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12428 {
12429 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12430 off = force_reg (Pmode, off);
12431 return gen_rtx_PLUS (Pmode, base, off);
12432 }
12433 else
12434 {
12435 base = get_thread_pointer (true);
12436 dest = gen_reg_rtx (Pmode);
12437 emit_insn (gen_subsi3 (dest, base, off));
12438 }
12439 break;
12440
12441 case TLS_MODEL_LOCAL_EXEC:
12442 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
12443 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12444 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
12445 off = gen_rtx_CONST (Pmode, off);
12446
12447 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12448 {
12449 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12450 return gen_rtx_PLUS (Pmode, base, off);
12451 }
12452 else
12453 {
12454 base = get_thread_pointer (true);
12455 dest = gen_reg_rtx (Pmode);
12456 emit_insn (gen_subsi3 (dest, base, off));
12457 }
12458 break;
12459
12460 default:
12461 gcc_unreachable ();
12462 }
12463
12464 return dest;
12465 }
12466
12467 /* Create or return the unique __imp_DECL dllimport symbol corresponding
12468 to symbol DECL. */
12469
12470 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
12471 htab_t dllimport_map;
12472
12473 static tree
12474 get_dllimport_decl (tree decl)
12475 {
12476 struct tree_map *h, in;
12477 void **loc;
12478 const char *name;
12479 const char *prefix;
12480 size_t namelen, prefixlen;
12481 char *imp_name;
12482 tree to;
12483 rtx rtl;
12484
12485 if (!dllimport_map)
12486 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
12487
12488 in.hash = htab_hash_pointer (decl);
12489 in.base.from = decl;
12490 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
12491 h = (struct tree_map *) *loc;
12492 if (h)
12493 return h->to;
12494
12495 *loc = h = ggc_alloc_tree_map ();
12496 h->hash = in.hash;
12497 h->base.from = decl;
12498 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
12499 VAR_DECL, NULL, ptr_type_node);
12500 DECL_ARTIFICIAL (to) = 1;
12501 DECL_IGNORED_P (to) = 1;
12502 DECL_EXTERNAL (to) = 1;
12503 TREE_READONLY (to) = 1;
12504
12505 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
12506 name = targetm.strip_name_encoding (name);
12507 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
12508 ? "*__imp_" : "*__imp__";
12509 namelen = strlen (name);
12510 prefixlen = strlen (prefix);
12511 imp_name = (char *) alloca (namelen + prefixlen + 1);
12512 memcpy (imp_name, prefix, prefixlen);
12513 memcpy (imp_name + prefixlen, name, namelen + 1);
12514
12515 name = ggc_alloc_string (imp_name, namelen + prefixlen);
12516 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
12517 SET_SYMBOL_REF_DECL (rtl, to);
12518 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
12519
12520 rtl = gen_const_mem (Pmode, rtl);
12521 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
12522
12523 SET_DECL_RTL (to, rtl);
12524 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
12525
12526 return to;
12527 }
12528
12529 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
12530 true if we require the result be a register. */
12531
12532 static rtx
12533 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
12534 {
12535 tree imp_decl;
12536 rtx x;
12537
12538 gcc_assert (SYMBOL_REF_DECL (symbol));
12539 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
12540
12541 x = DECL_RTL (imp_decl);
12542 if (want_reg)
12543 x = force_reg (Pmode, x);
12544 return x;
12545 }
12546
12547 /* Try machine-dependent ways of modifying an illegitimate address
12548 to be legitimate. If we find one, return the new, valid address.
12549 This macro is used in only one place: `memory_address' in explow.c.
12550
12551 OLDX is the address as it was before break_out_memory_refs was called.
12552 In some cases it is useful to look at this to decide what needs to be done.
12553
12554 It is always safe for this macro to do nothing. It exists to recognize
12555 opportunities to optimize the output.
12556
12557 For the 80386, we handle X+REG by loading X into a register R and
12558 using R+REG. R will go in a general reg and indexing will be used.
12559 However, if REG is a broken-out memory address or multiplication,
12560 nothing needs to be done because REG can certainly go in a general reg.
12561
12562 When -fpic is used, special handling is needed for symbolic references.
12563 See comments by legitimize_pic_address in i386.c for details. */
12564
12565 static rtx
12566 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
12567 enum machine_mode mode)
12568 {
12569 int changed = 0;
12570 unsigned log;
12571
12572 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
12573 if (log)
12574 return legitimize_tls_address (x, (enum tls_model) log, false);
12575 if (GET_CODE (x) == CONST
12576 && GET_CODE (XEXP (x, 0)) == PLUS
12577 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
12578 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
12579 {
12580 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
12581 (enum tls_model) log, false);
12582 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
12583 }
12584
12585 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
12586 {
12587 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
12588 return legitimize_dllimport_symbol (x, true);
12589 if (GET_CODE (x) == CONST
12590 && GET_CODE (XEXP (x, 0)) == PLUS
12591 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
12592 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
12593 {
12594 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
12595 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
12596 }
12597 }
12598
12599 if (flag_pic && SYMBOLIC_CONST (x))
12600 return legitimize_pic_address (x, 0);
12601
12602 #if TARGET_MACHO
12603 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
12604 return machopic_indirect_data_reference (x, 0);
12605 #endif
12606
12607 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
12608 if (GET_CODE (x) == ASHIFT
12609 && CONST_INT_P (XEXP (x, 1))
12610 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
12611 {
12612 changed = 1;
12613 log = INTVAL (XEXP (x, 1));
12614 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
12615 GEN_INT (1 << log));
12616 }
12617
12618 if (GET_CODE (x) == PLUS)
12619 {
12620 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
12621
12622 if (GET_CODE (XEXP (x, 0)) == ASHIFT
12623 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
12624 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
12625 {
12626 changed = 1;
12627 log = INTVAL (XEXP (XEXP (x, 0), 1));
12628 XEXP (x, 0) = gen_rtx_MULT (Pmode,
12629 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
12630 GEN_INT (1 << log));
12631 }
12632
12633 if (GET_CODE (XEXP (x, 1)) == ASHIFT
12634 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
12635 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
12636 {
12637 changed = 1;
12638 log = INTVAL (XEXP (XEXP (x, 1), 1));
12639 XEXP (x, 1) = gen_rtx_MULT (Pmode,
12640 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
12641 GEN_INT (1 << log));
12642 }
12643
12644 /* Put multiply first if it isn't already. */
12645 if (GET_CODE (XEXP (x, 1)) == MULT)
12646 {
12647 rtx tmp = XEXP (x, 0);
12648 XEXP (x, 0) = XEXP (x, 1);
12649 XEXP (x, 1) = tmp;
12650 changed = 1;
12651 }
12652
12653 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
12654 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
12655 created by virtual register instantiation, register elimination, and
12656 similar optimizations. */
12657 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
12658 {
12659 changed = 1;
12660 x = gen_rtx_PLUS (Pmode,
12661 gen_rtx_PLUS (Pmode, XEXP (x, 0),
12662 XEXP (XEXP (x, 1), 0)),
12663 XEXP (XEXP (x, 1), 1));
12664 }
12665
12666 /* Canonicalize
12667 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
12668 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
12669 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
12670 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
12671 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
12672 && CONSTANT_P (XEXP (x, 1)))
12673 {
12674 rtx constant;
12675 rtx other = NULL_RTX;
12676
12677 if (CONST_INT_P (XEXP (x, 1)))
12678 {
12679 constant = XEXP (x, 1);
12680 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
12681 }
12682 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
12683 {
12684 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
12685 other = XEXP (x, 1);
12686 }
12687 else
12688 constant = 0;
12689
12690 if (constant)
12691 {
12692 changed = 1;
12693 x = gen_rtx_PLUS (Pmode,
12694 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
12695 XEXP (XEXP (XEXP (x, 0), 1), 0)),
12696 plus_constant (other, INTVAL (constant)));
12697 }
12698 }
12699
12700 if (changed && ix86_legitimate_address_p (mode, x, false))
12701 return x;
12702
12703 if (GET_CODE (XEXP (x, 0)) == MULT)
12704 {
12705 changed = 1;
12706 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
12707 }
12708
12709 if (GET_CODE (XEXP (x, 1)) == MULT)
12710 {
12711 changed = 1;
12712 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
12713 }
12714
12715 if (changed
12716 && REG_P (XEXP (x, 1))
12717 && REG_P (XEXP (x, 0)))
12718 return x;
12719
12720 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
12721 {
12722 changed = 1;
12723 x = legitimize_pic_address (x, 0);
12724 }
12725
12726 if (changed && ix86_legitimate_address_p (mode, x, false))
12727 return x;
12728
12729 if (REG_P (XEXP (x, 0)))
12730 {
12731 rtx temp = gen_reg_rtx (Pmode);
12732 rtx val = force_operand (XEXP (x, 1), temp);
12733 if (val != temp)
12734 {
12735 if (GET_MODE (val) != Pmode)
12736 val = convert_to_mode (Pmode, val, 1);
12737 emit_move_insn (temp, val);
12738 }
12739
12740 XEXP (x, 1) = temp;
12741 return x;
12742 }
12743
12744 else if (REG_P (XEXP (x, 1)))
12745 {
12746 rtx temp = gen_reg_rtx (Pmode);
12747 rtx val = force_operand (XEXP (x, 0), temp);
12748 if (val != temp)
12749 {
12750 if (GET_MODE (val) != Pmode)
12751 val = convert_to_mode (Pmode, val, 1);
12752 emit_move_insn (temp, val);
12753 }
12754
12755 XEXP (x, 0) = temp;
12756 return x;
12757 }
12758 }
12759
12760 return x;
12761 }
12762 \f
12763 /* Print an integer constant expression in assembler syntax. Addition
12764 and subtraction are the only arithmetic that may appear in these
12765 expressions. FILE is the stdio stream to write to, X is the rtx, and
12766 CODE is the operand print code from the output string. */
12767
12768 static void
12769 output_pic_addr_const (FILE *file, rtx x, int code)
12770 {
12771 char buf[256];
12772
12773 switch (GET_CODE (x))
12774 {
12775 case PC:
12776 gcc_assert (flag_pic);
12777 putc ('.', file);
12778 break;
12779
12780 case SYMBOL_REF:
12781 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
12782 output_addr_const (file, x);
12783 else
12784 {
12785 const char *name = XSTR (x, 0);
12786
12787 /* Mark the decl as referenced so that cgraph will
12788 output the function. */
12789 if (SYMBOL_REF_DECL (x))
12790 mark_decl_referenced (SYMBOL_REF_DECL (x));
12791
12792 #if TARGET_MACHO
12793 if (MACHOPIC_INDIRECT
12794 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
12795 name = machopic_indirection_name (x, /*stub_p=*/true);
12796 #endif
12797 assemble_name (file, name);
12798 }
12799 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
12800 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
12801 fputs ("@PLT", file);
12802 break;
12803
12804 case LABEL_REF:
12805 x = XEXP (x, 0);
12806 /* FALLTHRU */
12807 case CODE_LABEL:
12808 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
12809 assemble_name (asm_out_file, buf);
12810 break;
12811
12812 case CONST_INT:
12813 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
12814 break;
12815
12816 case CONST:
12817 /* This used to output parentheses around the expression,
12818 but that does not work on the 386 (either ATT or BSD assembler). */
12819 output_pic_addr_const (file, XEXP (x, 0), code);
12820 break;
12821
12822 case CONST_DOUBLE:
12823 if (GET_MODE (x) == VOIDmode)
12824 {
12825 /* We can use %d if the number is <32 bits and positive. */
12826 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
12827 fprintf (file, "0x%lx%08lx",
12828 (unsigned long) CONST_DOUBLE_HIGH (x),
12829 (unsigned long) CONST_DOUBLE_LOW (x));
12830 else
12831 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
12832 }
12833 else
12834 /* We can't handle floating point constants;
12835 TARGET_PRINT_OPERAND must handle them. */
12836 output_operand_lossage ("floating constant misused");
12837 break;
12838
12839 case PLUS:
12840 /* Some assemblers need integer constants to appear first. */
12841 if (CONST_INT_P (XEXP (x, 0)))
12842 {
12843 output_pic_addr_const (file, XEXP (x, 0), code);
12844 putc ('+', file);
12845 output_pic_addr_const (file, XEXP (x, 1), code);
12846 }
12847 else
12848 {
12849 gcc_assert (CONST_INT_P (XEXP (x, 1)));
12850 output_pic_addr_const (file, XEXP (x, 1), code);
12851 putc ('+', file);
12852 output_pic_addr_const (file, XEXP (x, 0), code);
12853 }
12854 break;
12855
12856 case MINUS:
12857 if (!TARGET_MACHO)
12858 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
12859 output_pic_addr_const (file, XEXP (x, 0), code);
12860 putc ('-', file);
12861 output_pic_addr_const (file, XEXP (x, 1), code);
12862 if (!TARGET_MACHO)
12863 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
12864 break;
12865
12866 case UNSPEC:
12867 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
12868 {
12869 bool f = i386_asm_output_addr_const_extra (file, x);
12870 gcc_assert (f);
12871 break;
12872 }
12873
12874 gcc_assert (XVECLEN (x, 0) == 1);
12875 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
12876 switch (XINT (x, 1))
12877 {
12878 case UNSPEC_GOT:
12879 fputs ("@GOT", file);
12880 break;
12881 case UNSPEC_GOTOFF:
12882 fputs ("@GOTOFF", file);
12883 break;
12884 case UNSPEC_PLTOFF:
12885 fputs ("@PLTOFF", file);
12886 break;
12887 case UNSPEC_PCREL:
12888 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12889 "(%rip)" : "[rip]", file);
12890 break;
12891 case UNSPEC_GOTPCREL:
12892 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12893 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
12894 break;
12895 case UNSPEC_GOTTPOFF:
12896 /* FIXME: This might be @TPOFF in Sun ld too. */
12897 fputs ("@gottpoff", file);
12898 break;
12899 case UNSPEC_TPOFF:
12900 fputs ("@tpoff", file);
12901 break;
12902 case UNSPEC_NTPOFF:
12903 if (TARGET_64BIT)
12904 fputs ("@tpoff", file);
12905 else
12906 fputs ("@ntpoff", file);
12907 break;
12908 case UNSPEC_DTPOFF:
12909 fputs ("@dtpoff", file);
12910 break;
12911 case UNSPEC_GOTNTPOFF:
12912 if (TARGET_64BIT)
12913 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12914 "@gottpoff(%rip)": "@gottpoff[rip]", file);
12915 else
12916 fputs ("@gotntpoff", file);
12917 break;
12918 case UNSPEC_INDNTPOFF:
12919 fputs ("@indntpoff", file);
12920 break;
12921 #if TARGET_MACHO
12922 case UNSPEC_MACHOPIC_OFFSET:
12923 putc ('-', file);
12924 machopic_output_function_base_name (file);
12925 break;
12926 #endif
12927 default:
12928 output_operand_lossage ("invalid UNSPEC as operand");
12929 break;
12930 }
12931 break;
12932
12933 default:
12934 output_operand_lossage ("invalid expression as operand");
12935 }
12936 }
12937
12938 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
12939 We need to emit DTP-relative relocations. */
12940
12941 static void ATTRIBUTE_UNUSED
12942 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
12943 {
12944 fputs (ASM_LONG, file);
12945 output_addr_const (file, x);
12946 fputs ("@dtpoff", file);
12947 switch (size)
12948 {
12949 case 4:
12950 break;
12951 case 8:
12952 fputs (", 0", file);
12953 break;
12954 default:
12955 gcc_unreachable ();
12956 }
12957 }
12958
12959 /* Return true if X is a representation of the PIC register. This copes
12960 with calls from ix86_find_base_term, where the register might have
12961 been replaced by a cselib value. */
12962
12963 static bool
12964 ix86_pic_register_p (rtx x)
12965 {
12966 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
12967 return (pic_offset_table_rtx
12968 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
12969 else
12970 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
12971 }
12972
12973 /* Helper function for ix86_delegitimize_address.
12974 Attempt to delegitimize TLS local-exec accesses. */
12975
12976 static rtx
12977 ix86_delegitimize_tls_address (rtx orig_x)
12978 {
12979 rtx x = orig_x, unspec;
12980 struct ix86_address addr;
12981
12982 if (!TARGET_TLS_DIRECT_SEG_REFS)
12983 return orig_x;
12984 if (MEM_P (x))
12985 x = XEXP (x, 0);
12986 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
12987 return orig_x;
12988 if (ix86_decompose_address (x, &addr) == 0
12989 || addr.seg != (TARGET_64BIT ? SEG_FS : SEG_GS)
12990 || addr.disp == NULL_RTX
12991 || GET_CODE (addr.disp) != CONST)
12992 return orig_x;
12993 unspec = XEXP (addr.disp, 0);
12994 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
12995 unspec = XEXP (unspec, 0);
12996 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
12997 return orig_x;
12998 x = XVECEXP (unspec, 0, 0);
12999 gcc_assert (GET_CODE (x) == SYMBOL_REF);
13000 if (unspec != XEXP (addr.disp, 0))
13001 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
13002 if (addr.index)
13003 {
13004 rtx idx = addr.index;
13005 if (addr.scale != 1)
13006 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
13007 x = gen_rtx_PLUS (Pmode, idx, x);
13008 }
13009 if (addr.base)
13010 x = gen_rtx_PLUS (Pmode, addr.base, x);
13011 if (MEM_P (orig_x))
13012 x = replace_equiv_address_nv (orig_x, x);
13013 return x;
13014 }
13015
13016 /* In the name of slightly smaller debug output, and to cater to
13017 general assembler lossage, recognize PIC+GOTOFF and turn it back
13018 into a direct symbol reference.
13019
13020 On Darwin, this is necessary to avoid a crash, because Darwin
13021 has a different PIC label for each routine but the DWARF debugging
13022 information is not associated with any particular routine, so it's
13023 necessary to remove references to the PIC label from RTL stored by
13024 the DWARF output code. */
13025
13026 static rtx
13027 ix86_delegitimize_address (rtx x)
13028 {
13029 rtx orig_x = delegitimize_mem_from_attrs (x);
13030 /* addend is NULL or some rtx if x is something+GOTOFF where
13031 something doesn't include the PIC register. */
13032 rtx addend = NULL_RTX;
13033 /* reg_addend is NULL or a multiple of some register. */
13034 rtx reg_addend = NULL_RTX;
13035 /* const_addend is NULL or a const_int. */
13036 rtx const_addend = NULL_RTX;
13037 /* This is the result, or NULL. */
13038 rtx result = NULL_RTX;
13039
13040 x = orig_x;
13041
13042 if (MEM_P (x))
13043 x = XEXP (x, 0);
13044
13045 if (TARGET_64BIT)
13046 {
13047 if (GET_CODE (x) != CONST
13048 || GET_CODE (XEXP (x, 0)) != UNSPEC
13049 || (XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
13050 && XINT (XEXP (x, 0), 1) != UNSPEC_PCREL)
13051 || !MEM_P (orig_x))
13052 return ix86_delegitimize_tls_address (orig_x);
13053 x = XVECEXP (XEXP (x, 0), 0, 0);
13054 if (GET_MODE (orig_x) != GET_MODE (x))
13055 {
13056 x = simplify_gen_subreg (GET_MODE (orig_x), x,
13057 GET_MODE (x), 0);
13058 if (x == NULL_RTX)
13059 return orig_x;
13060 }
13061 return x;
13062 }
13063
13064 if (GET_CODE (x) != PLUS
13065 || GET_CODE (XEXP (x, 1)) != CONST)
13066 return ix86_delegitimize_tls_address (orig_x);
13067
13068 if (ix86_pic_register_p (XEXP (x, 0)))
13069 /* %ebx + GOT/GOTOFF */
13070 ;
13071 else if (GET_CODE (XEXP (x, 0)) == PLUS)
13072 {
13073 /* %ebx + %reg * scale + GOT/GOTOFF */
13074 reg_addend = XEXP (x, 0);
13075 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
13076 reg_addend = XEXP (reg_addend, 1);
13077 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
13078 reg_addend = XEXP (reg_addend, 0);
13079 else
13080 {
13081 reg_addend = NULL_RTX;
13082 addend = XEXP (x, 0);
13083 }
13084 }
13085 else
13086 addend = XEXP (x, 0);
13087
13088 x = XEXP (XEXP (x, 1), 0);
13089 if (GET_CODE (x) == PLUS
13090 && CONST_INT_P (XEXP (x, 1)))
13091 {
13092 const_addend = XEXP (x, 1);
13093 x = XEXP (x, 0);
13094 }
13095
13096 if (GET_CODE (x) == UNSPEC
13097 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
13098 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
13099 result = XVECEXP (x, 0, 0);
13100
13101 if (TARGET_MACHO && darwin_local_data_pic (x)
13102 && !MEM_P (orig_x))
13103 result = XVECEXP (x, 0, 0);
13104
13105 if (! result)
13106 return ix86_delegitimize_tls_address (orig_x);
13107
13108 if (const_addend)
13109 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
13110 if (reg_addend)
13111 result = gen_rtx_PLUS (Pmode, reg_addend, result);
13112 if (addend)
13113 {
13114 /* If the rest of original X doesn't involve the PIC register, add
13115 addend and subtract pic_offset_table_rtx. This can happen e.g.
13116 for code like:
13117 leal (%ebx, %ecx, 4), %ecx
13118 ...
13119 movl foo@GOTOFF(%ecx), %edx
13120 in which case we return (%ecx - %ebx) + foo. */
13121 if (pic_offset_table_rtx)
13122 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
13123 pic_offset_table_rtx),
13124 result);
13125 else
13126 return orig_x;
13127 }
13128 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
13129 {
13130 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
13131 if (result == NULL_RTX)
13132 return orig_x;
13133 }
13134 return result;
13135 }
13136
13137 /* If X is a machine specific address (i.e. a symbol or label being
13138 referenced as a displacement from the GOT implemented using an
13139 UNSPEC), then return the base term. Otherwise return X. */
13140
13141 rtx
13142 ix86_find_base_term (rtx x)
13143 {
13144 rtx term;
13145
13146 if (TARGET_64BIT)
13147 {
13148 if (GET_CODE (x) != CONST)
13149 return x;
13150 term = XEXP (x, 0);
13151 if (GET_CODE (term) == PLUS
13152 && (CONST_INT_P (XEXP (term, 1))
13153 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
13154 term = XEXP (term, 0);
13155 if (GET_CODE (term) != UNSPEC
13156 || (XINT (term, 1) != UNSPEC_GOTPCREL
13157 && XINT (term, 1) != UNSPEC_PCREL))
13158 return x;
13159
13160 return XVECEXP (term, 0, 0);
13161 }
13162
13163 return ix86_delegitimize_address (x);
13164 }
13165 \f
13166 static void
13167 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
13168 int fp, FILE *file)
13169 {
13170 const char *suffix;
13171
13172 if (mode == CCFPmode || mode == CCFPUmode)
13173 {
13174 code = ix86_fp_compare_code_to_integer (code);
13175 mode = CCmode;
13176 }
13177 if (reverse)
13178 code = reverse_condition (code);
13179
13180 switch (code)
13181 {
13182 case EQ:
13183 switch (mode)
13184 {
13185 case CCAmode:
13186 suffix = "a";
13187 break;
13188
13189 case CCCmode:
13190 suffix = "c";
13191 break;
13192
13193 case CCOmode:
13194 suffix = "o";
13195 break;
13196
13197 case CCSmode:
13198 suffix = "s";
13199 break;
13200
13201 default:
13202 suffix = "e";
13203 }
13204 break;
13205 case NE:
13206 switch (mode)
13207 {
13208 case CCAmode:
13209 suffix = "na";
13210 break;
13211
13212 case CCCmode:
13213 suffix = "nc";
13214 break;
13215
13216 case CCOmode:
13217 suffix = "no";
13218 break;
13219
13220 case CCSmode:
13221 suffix = "ns";
13222 break;
13223
13224 default:
13225 suffix = "ne";
13226 }
13227 break;
13228 case GT:
13229 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
13230 suffix = "g";
13231 break;
13232 case GTU:
13233 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
13234 Those same assemblers have the same but opposite lossage on cmov. */
13235 if (mode == CCmode)
13236 suffix = fp ? "nbe" : "a";
13237 else if (mode == CCCmode)
13238 suffix = "b";
13239 else
13240 gcc_unreachable ();
13241 break;
13242 case LT:
13243 switch (mode)
13244 {
13245 case CCNOmode:
13246 case CCGOCmode:
13247 suffix = "s";
13248 break;
13249
13250 case CCmode:
13251 case CCGCmode:
13252 suffix = "l";
13253 break;
13254
13255 default:
13256 gcc_unreachable ();
13257 }
13258 break;
13259 case LTU:
13260 gcc_assert (mode == CCmode || mode == CCCmode);
13261 suffix = "b";
13262 break;
13263 case GE:
13264 switch (mode)
13265 {
13266 case CCNOmode:
13267 case CCGOCmode:
13268 suffix = "ns";
13269 break;
13270
13271 case CCmode:
13272 case CCGCmode:
13273 suffix = "ge";
13274 break;
13275
13276 default:
13277 gcc_unreachable ();
13278 }
13279 break;
13280 case GEU:
13281 /* ??? As above. */
13282 gcc_assert (mode == CCmode || mode == CCCmode);
13283 suffix = fp ? "nb" : "ae";
13284 break;
13285 case LE:
13286 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
13287 suffix = "le";
13288 break;
13289 case LEU:
13290 /* ??? As above. */
13291 if (mode == CCmode)
13292 suffix = "be";
13293 else if (mode == CCCmode)
13294 suffix = fp ? "nb" : "ae";
13295 else
13296 gcc_unreachable ();
13297 break;
13298 case UNORDERED:
13299 suffix = fp ? "u" : "p";
13300 break;
13301 case ORDERED:
13302 suffix = fp ? "nu" : "np";
13303 break;
13304 default:
13305 gcc_unreachable ();
13306 }
13307 fputs (suffix, file);
13308 }
13309
13310 /* Print the name of register X to FILE based on its machine mode and number.
13311 If CODE is 'w', pretend the mode is HImode.
13312 If CODE is 'b', pretend the mode is QImode.
13313 If CODE is 'k', pretend the mode is SImode.
13314 If CODE is 'q', pretend the mode is DImode.
13315 If CODE is 'x', pretend the mode is V4SFmode.
13316 If CODE is 't', pretend the mode is V8SFmode.
13317 If CODE is 'h', pretend the reg is the 'high' byte register.
13318 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
13319 If CODE is 'd', duplicate the operand for AVX instruction.
13320 */
13321
13322 void
13323 print_reg (rtx x, int code, FILE *file)
13324 {
13325 const char *reg;
13326 bool duplicated = code == 'd' && TARGET_AVX;
13327
13328 gcc_assert (x == pc_rtx
13329 || (REGNO (x) != ARG_POINTER_REGNUM
13330 && REGNO (x) != FRAME_POINTER_REGNUM
13331 && REGNO (x) != FLAGS_REG
13332 && REGNO (x) != FPSR_REG
13333 && REGNO (x) != FPCR_REG));
13334
13335 if (ASSEMBLER_DIALECT == ASM_ATT)
13336 putc ('%', file);
13337
13338 if (x == pc_rtx)
13339 {
13340 gcc_assert (TARGET_64BIT);
13341 fputs ("rip", file);
13342 return;
13343 }
13344
13345 if (code == 'w' || MMX_REG_P (x))
13346 code = 2;
13347 else if (code == 'b')
13348 code = 1;
13349 else if (code == 'k')
13350 code = 4;
13351 else if (code == 'q')
13352 code = 8;
13353 else if (code == 'y')
13354 code = 3;
13355 else if (code == 'h')
13356 code = 0;
13357 else if (code == 'x')
13358 code = 16;
13359 else if (code == 't')
13360 code = 32;
13361 else
13362 code = GET_MODE_SIZE (GET_MODE (x));
13363
13364 /* Irritatingly, AMD extended registers use different naming convention
13365 from the normal registers. */
13366 if (REX_INT_REG_P (x))
13367 {
13368 gcc_assert (TARGET_64BIT);
13369 switch (code)
13370 {
13371 case 0:
13372 error ("extended registers have no high halves");
13373 break;
13374 case 1:
13375 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
13376 break;
13377 case 2:
13378 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
13379 break;
13380 case 4:
13381 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
13382 break;
13383 case 8:
13384 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
13385 break;
13386 default:
13387 error ("unsupported operand size for extended register");
13388 break;
13389 }
13390 return;
13391 }
13392
13393 reg = NULL;
13394 switch (code)
13395 {
13396 case 3:
13397 if (STACK_TOP_P (x))
13398 {
13399 reg = "st(0)";
13400 break;
13401 }
13402 /* FALLTHRU */
13403 case 8:
13404 case 4:
13405 case 12:
13406 if (! ANY_FP_REG_P (x))
13407 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
13408 /* FALLTHRU */
13409 case 16:
13410 case 2:
13411 normal:
13412 reg = hi_reg_name[REGNO (x)];
13413 break;
13414 case 1:
13415 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
13416 goto normal;
13417 reg = qi_reg_name[REGNO (x)];
13418 break;
13419 case 0:
13420 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
13421 goto normal;
13422 reg = qi_high_reg_name[REGNO (x)];
13423 break;
13424 case 32:
13425 if (SSE_REG_P (x))
13426 {
13427 gcc_assert (!duplicated);
13428 putc ('y', file);
13429 fputs (hi_reg_name[REGNO (x)] + 1, file);
13430 return;
13431 }
13432 break;
13433 default:
13434 gcc_unreachable ();
13435 }
13436
13437 fputs (reg, file);
13438 if (duplicated)
13439 {
13440 if (ASSEMBLER_DIALECT == ASM_ATT)
13441 fprintf (file, ", %%%s", reg);
13442 else
13443 fprintf (file, ", %s", reg);
13444 }
13445 }
13446
13447 /* Locate some local-dynamic symbol still in use by this function
13448 so that we can print its name in some tls_local_dynamic_base
13449 pattern. */
13450
13451 static int
13452 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
13453 {
13454 rtx x = *px;
13455
13456 if (GET_CODE (x) == SYMBOL_REF
13457 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
13458 {
13459 cfun->machine->some_ld_name = XSTR (x, 0);
13460 return 1;
13461 }
13462
13463 return 0;
13464 }
13465
13466 static const char *
13467 get_some_local_dynamic_name (void)
13468 {
13469 rtx insn;
13470
13471 if (cfun->machine->some_ld_name)
13472 return cfun->machine->some_ld_name;
13473
13474 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
13475 if (NONDEBUG_INSN_P (insn)
13476 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
13477 return cfun->machine->some_ld_name;
13478
13479 return NULL;
13480 }
13481
13482 /* Meaning of CODE:
13483 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
13484 C -- print opcode suffix for set/cmov insn.
13485 c -- like C, but print reversed condition
13486 F,f -- likewise, but for floating-point.
13487 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
13488 otherwise nothing
13489 R -- print the prefix for register names.
13490 z -- print the opcode suffix for the size of the current operand.
13491 Z -- likewise, with special suffixes for x87 instructions.
13492 * -- print a star (in certain assembler syntax)
13493 A -- print an absolute memory reference.
13494 w -- print the operand as if it's a "word" (HImode) even if it isn't.
13495 s -- print a shift double count, followed by the assemblers argument
13496 delimiter.
13497 b -- print the QImode name of the register for the indicated operand.
13498 %b0 would print %al if operands[0] is reg 0.
13499 w -- likewise, print the HImode name of the register.
13500 k -- likewise, print the SImode name of the register.
13501 q -- likewise, print the DImode name of the register.
13502 x -- likewise, print the V4SFmode name of the register.
13503 t -- likewise, print the V8SFmode name of the register.
13504 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
13505 y -- print "st(0)" instead of "st" as a register.
13506 d -- print duplicated register operand for AVX instruction.
13507 D -- print condition for SSE cmp instruction.
13508 P -- if PIC, print an @PLT suffix.
13509 p -- print raw symbol name.
13510 X -- don't print any sort of PIC '@' suffix for a symbol.
13511 & -- print some in-use local-dynamic symbol name.
13512 H -- print a memory address offset by 8; used for sse high-parts
13513 Y -- print condition for XOP pcom* instruction.
13514 + -- print a branch hint as 'cs' or 'ds' prefix
13515 ; -- print a semicolon (after prefixes due to bug in older gas).
13516 @ -- print a segment register of thread base pointer load
13517 */
13518
13519 void
13520 ix86_print_operand (FILE *file, rtx x, int code)
13521 {
13522 if (code)
13523 {
13524 switch (code)
13525 {
13526 case '*':
13527 if (ASSEMBLER_DIALECT == ASM_ATT)
13528 putc ('*', file);
13529 return;
13530
13531 case '&':
13532 {
13533 const char *name = get_some_local_dynamic_name ();
13534 if (name == NULL)
13535 output_operand_lossage ("'%%&' used without any "
13536 "local dynamic TLS references");
13537 else
13538 assemble_name (file, name);
13539 return;
13540 }
13541
13542 case 'A':
13543 switch (ASSEMBLER_DIALECT)
13544 {
13545 case ASM_ATT:
13546 putc ('*', file);
13547 break;
13548
13549 case ASM_INTEL:
13550 /* Intel syntax. For absolute addresses, registers should not
13551 be surrounded by braces. */
13552 if (!REG_P (x))
13553 {
13554 putc ('[', file);
13555 ix86_print_operand (file, x, 0);
13556 putc (']', file);
13557 return;
13558 }
13559 break;
13560
13561 default:
13562 gcc_unreachable ();
13563 }
13564
13565 ix86_print_operand (file, x, 0);
13566 return;
13567
13568
13569 case 'L':
13570 if (ASSEMBLER_DIALECT == ASM_ATT)
13571 putc ('l', file);
13572 return;
13573
13574 case 'W':
13575 if (ASSEMBLER_DIALECT == ASM_ATT)
13576 putc ('w', file);
13577 return;
13578
13579 case 'B':
13580 if (ASSEMBLER_DIALECT == ASM_ATT)
13581 putc ('b', file);
13582 return;
13583
13584 case 'Q':
13585 if (ASSEMBLER_DIALECT == ASM_ATT)
13586 putc ('l', file);
13587 return;
13588
13589 case 'S':
13590 if (ASSEMBLER_DIALECT == ASM_ATT)
13591 putc ('s', file);
13592 return;
13593
13594 case 'T':
13595 if (ASSEMBLER_DIALECT == ASM_ATT)
13596 putc ('t', file);
13597 return;
13598
13599 case 'z':
13600 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
13601 {
13602 /* Opcodes don't get size suffixes if using Intel opcodes. */
13603 if (ASSEMBLER_DIALECT == ASM_INTEL)
13604 return;
13605
13606 switch (GET_MODE_SIZE (GET_MODE (x)))
13607 {
13608 case 1:
13609 putc ('b', file);
13610 return;
13611
13612 case 2:
13613 putc ('w', file);
13614 return;
13615
13616 case 4:
13617 putc ('l', file);
13618 return;
13619
13620 case 8:
13621 putc ('q', file);
13622 return;
13623
13624 default:
13625 output_operand_lossage
13626 ("invalid operand size for operand code '%c'", code);
13627 return;
13628 }
13629 }
13630
13631 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
13632 warning
13633 (0, "non-integer operand used with operand code '%c'", code);
13634 /* FALLTHRU */
13635
13636 case 'Z':
13637 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
13638 if (ASSEMBLER_DIALECT == ASM_INTEL)
13639 return;
13640
13641 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
13642 {
13643 switch (GET_MODE_SIZE (GET_MODE (x)))
13644 {
13645 case 2:
13646 #ifdef HAVE_AS_IX86_FILDS
13647 putc ('s', file);
13648 #endif
13649 return;
13650
13651 case 4:
13652 putc ('l', file);
13653 return;
13654
13655 case 8:
13656 #ifdef HAVE_AS_IX86_FILDQ
13657 putc ('q', file);
13658 #else
13659 fputs ("ll", file);
13660 #endif
13661 return;
13662
13663 default:
13664 break;
13665 }
13666 }
13667 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
13668 {
13669 /* 387 opcodes don't get size suffixes
13670 if the operands are registers. */
13671 if (STACK_REG_P (x))
13672 return;
13673
13674 switch (GET_MODE_SIZE (GET_MODE (x)))
13675 {
13676 case 4:
13677 putc ('s', file);
13678 return;
13679
13680 case 8:
13681 putc ('l', file);
13682 return;
13683
13684 case 12:
13685 case 16:
13686 putc ('t', file);
13687 return;
13688
13689 default:
13690 break;
13691 }
13692 }
13693 else
13694 {
13695 output_operand_lossage
13696 ("invalid operand type used with operand code '%c'", code);
13697 return;
13698 }
13699
13700 output_operand_lossage
13701 ("invalid operand size for operand code '%c'", code);
13702 return;
13703
13704 case 'd':
13705 case 'b':
13706 case 'w':
13707 case 'k':
13708 case 'q':
13709 case 'h':
13710 case 't':
13711 case 'y':
13712 case 'x':
13713 case 'X':
13714 case 'P':
13715 case 'p':
13716 break;
13717
13718 case 's':
13719 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
13720 {
13721 ix86_print_operand (file, x, 0);
13722 fputs (", ", file);
13723 }
13724 return;
13725
13726 case 'D':
13727 /* Little bit of braindamage here. The SSE compare instructions
13728 does use completely different names for the comparisons that the
13729 fp conditional moves. */
13730 if (TARGET_AVX)
13731 {
13732 switch (GET_CODE (x))
13733 {
13734 case EQ:
13735 fputs ("eq", file);
13736 break;
13737 case UNEQ:
13738 fputs ("eq_us", file);
13739 break;
13740 case LT:
13741 fputs ("lt", file);
13742 break;
13743 case UNLT:
13744 fputs ("nge", file);
13745 break;
13746 case LE:
13747 fputs ("le", file);
13748 break;
13749 case UNLE:
13750 fputs ("ngt", file);
13751 break;
13752 case UNORDERED:
13753 fputs ("unord", file);
13754 break;
13755 case NE:
13756 fputs ("neq", file);
13757 break;
13758 case LTGT:
13759 fputs ("neq_oq", file);
13760 break;
13761 case GE:
13762 fputs ("ge", file);
13763 break;
13764 case UNGE:
13765 fputs ("nlt", file);
13766 break;
13767 case GT:
13768 fputs ("gt", file);
13769 break;
13770 case UNGT:
13771 fputs ("nle", file);
13772 break;
13773 case ORDERED:
13774 fputs ("ord", file);
13775 break;
13776 default:
13777 output_operand_lossage ("operand is not a condition code, "
13778 "invalid operand code 'D'");
13779 return;
13780 }
13781 }
13782 else
13783 {
13784 switch (GET_CODE (x))
13785 {
13786 case EQ:
13787 case UNEQ:
13788 fputs ("eq", file);
13789 break;
13790 case LT:
13791 case UNLT:
13792 fputs ("lt", file);
13793 break;
13794 case LE:
13795 case UNLE:
13796 fputs ("le", file);
13797 break;
13798 case UNORDERED:
13799 fputs ("unord", file);
13800 break;
13801 case NE:
13802 case LTGT:
13803 fputs ("neq", file);
13804 break;
13805 case UNGE:
13806 case GE:
13807 fputs ("nlt", file);
13808 break;
13809 case UNGT:
13810 case GT:
13811 fputs ("nle", file);
13812 break;
13813 case ORDERED:
13814 fputs ("ord", file);
13815 break;
13816 default:
13817 output_operand_lossage ("operand is not a condition code, "
13818 "invalid operand code 'D'");
13819 return;
13820 }
13821 }
13822 return;
13823 case 'O':
13824 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13825 if (ASSEMBLER_DIALECT == ASM_ATT)
13826 {
13827 switch (GET_MODE (x))
13828 {
13829 case HImode: putc ('w', file); break;
13830 case SImode:
13831 case SFmode: putc ('l', file); break;
13832 case DImode:
13833 case DFmode: putc ('q', file); break;
13834 default: gcc_unreachable ();
13835 }
13836 putc ('.', file);
13837 }
13838 #endif
13839 return;
13840 case 'C':
13841 if (!COMPARISON_P (x))
13842 {
13843 output_operand_lossage ("operand is neither a constant nor a "
13844 "condition code, invalid operand code "
13845 "'C'");
13846 return;
13847 }
13848 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
13849 return;
13850 case 'F':
13851 if (!COMPARISON_P (x))
13852 {
13853 output_operand_lossage ("operand is neither a constant nor a "
13854 "condition code, invalid operand code "
13855 "'F'");
13856 return;
13857 }
13858 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13859 if (ASSEMBLER_DIALECT == ASM_ATT)
13860 putc ('.', file);
13861 #endif
13862 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
13863 return;
13864
13865 /* Like above, but reverse condition */
13866 case 'c':
13867 /* Check to see if argument to %c is really a constant
13868 and not a condition code which needs to be reversed. */
13869 if (!COMPARISON_P (x))
13870 {
13871 output_operand_lossage ("operand is neither a constant nor a "
13872 "condition code, invalid operand "
13873 "code 'c'");
13874 return;
13875 }
13876 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
13877 return;
13878 case 'f':
13879 if (!COMPARISON_P (x))
13880 {
13881 output_operand_lossage ("operand is neither a constant nor a "
13882 "condition code, invalid operand "
13883 "code 'f'");
13884 return;
13885 }
13886 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13887 if (ASSEMBLER_DIALECT == ASM_ATT)
13888 putc ('.', file);
13889 #endif
13890 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
13891 return;
13892
13893 case 'H':
13894 /* It doesn't actually matter what mode we use here, as we're
13895 only going to use this for printing. */
13896 x = adjust_address_nv (x, DImode, 8);
13897 break;
13898
13899 case '+':
13900 {
13901 rtx x;
13902
13903 if (!optimize
13904 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
13905 return;
13906
13907 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
13908 if (x)
13909 {
13910 int pred_val = INTVAL (XEXP (x, 0));
13911
13912 if (pred_val < REG_BR_PROB_BASE * 45 / 100
13913 || pred_val > REG_BR_PROB_BASE * 55 / 100)
13914 {
13915 int taken = pred_val > REG_BR_PROB_BASE / 2;
13916 int cputaken = final_forward_branch_p (current_output_insn) == 0;
13917
13918 /* Emit hints only in the case default branch prediction
13919 heuristics would fail. */
13920 if (taken != cputaken)
13921 {
13922 /* We use 3e (DS) prefix for taken branches and
13923 2e (CS) prefix for not taken branches. */
13924 if (taken)
13925 fputs ("ds ; ", file);
13926 else
13927 fputs ("cs ; ", file);
13928 }
13929 }
13930 }
13931 return;
13932 }
13933
13934 case 'Y':
13935 switch (GET_CODE (x))
13936 {
13937 case NE:
13938 fputs ("neq", file);
13939 break;
13940 case EQ:
13941 fputs ("eq", file);
13942 break;
13943 case GE:
13944 case GEU:
13945 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
13946 break;
13947 case GT:
13948 case GTU:
13949 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
13950 break;
13951 case LE:
13952 case LEU:
13953 fputs ("le", file);
13954 break;
13955 case LT:
13956 case LTU:
13957 fputs ("lt", file);
13958 break;
13959 case UNORDERED:
13960 fputs ("unord", file);
13961 break;
13962 case ORDERED:
13963 fputs ("ord", file);
13964 break;
13965 case UNEQ:
13966 fputs ("ueq", file);
13967 break;
13968 case UNGE:
13969 fputs ("nlt", file);
13970 break;
13971 case UNGT:
13972 fputs ("nle", file);
13973 break;
13974 case UNLE:
13975 fputs ("ule", file);
13976 break;
13977 case UNLT:
13978 fputs ("ult", file);
13979 break;
13980 case LTGT:
13981 fputs ("une", file);
13982 break;
13983 default:
13984 output_operand_lossage ("operand is not a condition code, "
13985 "invalid operand code 'Y'");
13986 return;
13987 }
13988 return;
13989
13990 case ';':
13991 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
13992 putc (';', file);
13993 #endif
13994 return;
13995
13996 case '@':
13997 if (ASSEMBLER_DIALECT == ASM_ATT)
13998 putc ('%', file);
13999
14000 /* The kernel uses a different segment register for performance
14001 reasons; a system call would not have to trash the userspace
14002 segment register, which would be expensive. */
14003 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
14004 fputs ("fs", file);
14005 else
14006 fputs ("gs", file);
14007 return;
14008
14009 default:
14010 output_operand_lossage ("invalid operand code '%c'", code);
14011 }
14012 }
14013
14014 if (REG_P (x))
14015 print_reg (x, code, file);
14016
14017 else if (MEM_P (x))
14018 {
14019 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
14020 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
14021 && GET_MODE (x) != BLKmode)
14022 {
14023 const char * size;
14024 switch (GET_MODE_SIZE (GET_MODE (x)))
14025 {
14026 case 1: size = "BYTE"; break;
14027 case 2: size = "WORD"; break;
14028 case 4: size = "DWORD"; break;
14029 case 8: size = "QWORD"; break;
14030 case 12: size = "TBYTE"; break;
14031 case 16:
14032 if (GET_MODE (x) == XFmode)
14033 size = "TBYTE";
14034 else
14035 size = "XMMWORD";
14036 break;
14037 case 32: size = "YMMWORD"; break;
14038 default:
14039 gcc_unreachable ();
14040 }
14041
14042 /* Check for explicit size override (codes 'b', 'w' and 'k') */
14043 if (code == 'b')
14044 size = "BYTE";
14045 else if (code == 'w')
14046 size = "WORD";
14047 else if (code == 'k')
14048 size = "DWORD";
14049
14050 fputs (size, file);
14051 fputs (" PTR ", file);
14052 }
14053
14054 x = XEXP (x, 0);
14055 /* Avoid (%rip) for call operands. */
14056 if (CONSTANT_ADDRESS_P (x) && code == 'P'
14057 && !CONST_INT_P (x))
14058 output_addr_const (file, x);
14059 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
14060 output_operand_lossage ("invalid constraints for operand");
14061 else
14062 output_address (x);
14063 }
14064
14065 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
14066 {
14067 REAL_VALUE_TYPE r;
14068 long l;
14069
14070 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
14071 REAL_VALUE_TO_TARGET_SINGLE (r, l);
14072
14073 if (ASSEMBLER_DIALECT == ASM_ATT)
14074 putc ('$', file);
14075 /* Sign extend 32bit SFmode immediate to 8 bytes. */
14076 if (code == 'q')
14077 fprintf (file, "0x%08llx", (unsigned long long) (int) l);
14078 else
14079 fprintf (file, "0x%08x", (unsigned int) l);
14080 }
14081
14082 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
14083 {
14084 REAL_VALUE_TYPE r;
14085 long l[2];
14086
14087 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
14088 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
14089
14090 if (ASSEMBLER_DIALECT == ASM_ATT)
14091 putc ('$', file);
14092 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
14093 }
14094
14095 /* These float cases don't actually occur as immediate operands. */
14096 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
14097 {
14098 char dstr[30];
14099
14100 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
14101 fputs (dstr, file);
14102 }
14103
14104 else
14105 {
14106 /* We have patterns that allow zero sets of memory, for instance.
14107 In 64-bit mode, we should probably support all 8-byte vectors,
14108 since we can in fact encode that into an immediate. */
14109 if (GET_CODE (x) == CONST_VECTOR)
14110 {
14111 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
14112 x = const0_rtx;
14113 }
14114
14115 if (code != 'P' && code != 'p')
14116 {
14117 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
14118 {
14119 if (ASSEMBLER_DIALECT == ASM_ATT)
14120 putc ('$', file);
14121 }
14122 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
14123 || GET_CODE (x) == LABEL_REF)
14124 {
14125 if (ASSEMBLER_DIALECT == ASM_ATT)
14126 putc ('$', file);
14127 else
14128 fputs ("OFFSET FLAT:", file);
14129 }
14130 }
14131 if (CONST_INT_P (x))
14132 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14133 else if (flag_pic || MACHOPIC_INDIRECT)
14134 output_pic_addr_const (file, x, code);
14135 else
14136 output_addr_const (file, x);
14137 }
14138 }
14139
14140 static bool
14141 ix86_print_operand_punct_valid_p (unsigned char code)
14142 {
14143 return (code == '@' || code == '*' || code == '+'
14144 || code == '&' || code == ';');
14145 }
14146 \f
14147 /* Print a memory operand whose address is ADDR. */
14148
14149 static void
14150 ix86_print_operand_address (FILE *file, rtx addr)
14151 {
14152 struct ix86_address parts;
14153 rtx base, index, disp;
14154 int scale;
14155 int ok = ix86_decompose_address (addr, &parts);
14156
14157 gcc_assert (ok);
14158
14159 if (parts.base && GET_CODE (parts.base) == SUBREG)
14160 {
14161 rtx tmp = SUBREG_REG (parts.base);
14162 parts.base = simplify_subreg (GET_MODE (parts.base),
14163 tmp, GET_MODE (tmp), 0);
14164 }
14165
14166 if (parts.index && GET_CODE (parts.index) == SUBREG)
14167 {
14168 rtx tmp = SUBREG_REG (parts.index);
14169 parts.index = simplify_subreg (GET_MODE (parts.index),
14170 tmp, GET_MODE (tmp), 0);
14171 }
14172
14173 base = parts.base;
14174 index = parts.index;
14175 disp = parts.disp;
14176 scale = parts.scale;
14177
14178 switch (parts.seg)
14179 {
14180 case SEG_DEFAULT:
14181 break;
14182 case SEG_FS:
14183 case SEG_GS:
14184 if (ASSEMBLER_DIALECT == ASM_ATT)
14185 putc ('%', file);
14186 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
14187 break;
14188 default:
14189 gcc_unreachable ();
14190 }
14191
14192 /* Use one byte shorter RIP relative addressing for 64bit mode. */
14193 if (TARGET_64BIT && !base && !index)
14194 {
14195 rtx symbol = disp;
14196
14197 if (GET_CODE (disp) == CONST
14198 && GET_CODE (XEXP (disp, 0)) == PLUS
14199 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
14200 symbol = XEXP (XEXP (disp, 0), 0);
14201
14202 if (GET_CODE (symbol) == LABEL_REF
14203 || (GET_CODE (symbol) == SYMBOL_REF
14204 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
14205 base = pc_rtx;
14206 }
14207 if (!base && !index)
14208 {
14209 /* Displacement only requires special attention. */
14210
14211 if (CONST_INT_P (disp))
14212 {
14213 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
14214 fputs ("ds:", file);
14215 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
14216 }
14217 else if (flag_pic)
14218 output_pic_addr_const (file, disp, 0);
14219 else
14220 output_addr_const (file, disp);
14221 }
14222 else
14223 {
14224 int code = 0;
14225
14226 /* Print SImode registers for zero-extended addresses to force
14227 addr32 prefix. Otherwise print DImode registers to avoid it. */
14228 if (TARGET_64BIT)
14229 code = ((GET_CODE (addr) == ZERO_EXTEND
14230 || GET_CODE (addr) == AND)
14231 ? 'l'
14232 : 'q');
14233
14234 if (ASSEMBLER_DIALECT == ASM_ATT)
14235 {
14236 if (disp)
14237 {
14238 if (flag_pic)
14239 output_pic_addr_const (file, disp, 0);
14240 else if (GET_CODE (disp) == LABEL_REF)
14241 output_asm_label (disp);
14242 else
14243 output_addr_const (file, disp);
14244 }
14245
14246 putc ('(', file);
14247 if (base)
14248 print_reg (base, code, file);
14249 if (index)
14250 {
14251 putc (',', file);
14252 print_reg (index, code, file);
14253 if (scale != 1)
14254 fprintf (file, ",%d", scale);
14255 }
14256 putc (')', file);
14257 }
14258 else
14259 {
14260 rtx offset = NULL_RTX;
14261
14262 if (disp)
14263 {
14264 /* Pull out the offset of a symbol; print any symbol itself. */
14265 if (GET_CODE (disp) == CONST
14266 && GET_CODE (XEXP (disp, 0)) == PLUS
14267 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
14268 {
14269 offset = XEXP (XEXP (disp, 0), 1);
14270 disp = gen_rtx_CONST (VOIDmode,
14271 XEXP (XEXP (disp, 0), 0));
14272 }
14273
14274 if (flag_pic)
14275 output_pic_addr_const (file, disp, 0);
14276 else if (GET_CODE (disp) == LABEL_REF)
14277 output_asm_label (disp);
14278 else if (CONST_INT_P (disp))
14279 offset = disp;
14280 else
14281 output_addr_const (file, disp);
14282 }
14283
14284 putc ('[', file);
14285 if (base)
14286 {
14287 print_reg (base, code, file);
14288 if (offset)
14289 {
14290 if (INTVAL (offset) >= 0)
14291 putc ('+', file);
14292 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
14293 }
14294 }
14295 else if (offset)
14296 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
14297 else
14298 putc ('0', file);
14299
14300 if (index)
14301 {
14302 putc ('+', file);
14303 print_reg (index, code, file);
14304 if (scale != 1)
14305 fprintf (file, "*%d", scale);
14306 }
14307 putc (']', file);
14308 }
14309 }
14310 }
14311
14312 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
14313
14314 static bool
14315 i386_asm_output_addr_const_extra (FILE *file, rtx x)
14316 {
14317 rtx op;
14318
14319 if (GET_CODE (x) != UNSPEC)
14320 return false;
14321
14322 op = XVECEXP (x, 0, 0);
14323 switch (XINT (x, 1))
14324 {
14325 case UNSPEC_GOTTPOFF:
14326 output_addr_const (file, op);
14327 /* FIXME: This might be @TPOFF in Sun ld. */
14328 fputs ("@gottpoff", file);
14329 break;
14330 case UNSPEC_TPOFF:
14331 output_addr_const (file, op);
14332 fputs ("@tpoff", file);
14333 break;
14334 case UNSPEC_NTPOFF:
14335 output_addr_const (file, op);
14336 if (TARGET_64BIT)
14337 fputs ("@tpoff", file);
14338 else
14339 fputs ("@ntpoff", file);
14340 break;
14341 case UNSPEC_DTPOFF:
14342 output_addr_const (file, op);
14343 fputs ("@dtpoff", file);
14344 break;
14345 case UNSPEC_GOTNTPOFF:
14346 output_addr_const (file, op);
14347 if (TARGET_64BIT)
14348 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14349 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
14350 else
14351 fputs ("@gotntpoff", file);
14352 break;
14353 case UNSPEC_INDNTPOFF:
14354 output_addr_const (file, op);
14355 fputs ("@indntpoff", file);
14356 break;
14357 #if TARGET_MACHO
14358 case UNSPEC_MACHOPIC_OFFSET:
14359 output_addr_const (file, op);
14360 putc ('-', file);
14361 machopic_output_function_base_name (file);
14362 break;
14363 #endif
14364
14365 case UNSPEC_STACK_CHECK:
14366 {
14367 int offset;
14368
14369 gcc_assert (flag_split_stack);
14370
14371 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
14372 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
14373 #else
14374 gcc_unreachable ();
14375 #endif
14376
14377 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
14378 }
14379 break;
14380
14381 default:
14382 return false;
14383 }
14384
14385 return true;
14386 }
14387 \f
14388 /* Split one or more double-mode RTL references into pairs of half-mode
14389 references. The RTL can be REG, offsettable MEM, integer constant, or
14390 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
14391 split and "num" is its length. lo_half and hi_half are output arrays
14392 that parallel "operands". */
14393
14394 void
14395 split_double_mode (enum machine_mode mode, rtx operands[],
14396 int num, rtx lo_half[], rtx hi_half[])
14397 {
14398 enum machine_mode half_mode;
14399 unsigned int byte;
14400
14401 switch (mode)
14402 {
14403 case TImode:
14404 half_mode = DImode;
14405 break;
14406 case DImode:
14407 half_mode = SImode;
14408 break;
14409 default:
14410 gcc_unreachable ();
14411 }
14412
14413 byte = GET_MODE_SIZE (half_mode);
14414
14415 while (num--)
14416 {
14417 rtx op = operands[num];
14418
14419 /* simplify_subreg refuse to split volatile memory addresses,
14420 but we still have to handle it. */
14421 if (MEM_P (op))
14422 {
14423 lo_half[num] = adjust_address (op, half_mode, 0);
14424 hi_half[num] = adjust_address (op, half_mode, byte);
14425 }
14426 else
14427 {
14428 lo_half[num] = simplify_gen_subreg (half_mode, op,
14429 GET_MODE (op) == VOIDmode
14430 ? mode : GET_MODE (op), 0);
14431 hi_half[num] = simplify_gen_subreg (half_mode, op,
14432 GET_MODE (op) == VOIDmode
14433 ? mode : GET_MODE (op), byte);
14434 }
14435 }
14436 }
14437 \f
14438 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
14439 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
14440 is the expression of the binary operation. The output may either be
14441 emitted here, or returned to the caller, like all output_* functions.
14442
14443 There is no guarantee that the operands are the same mode, as they
14444 might be within FLOAT or FLOAT_EXTEND expressions. */
14445
14446 #ifndef SYSV386_COMPAT
14447 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
14448 wants to fix the assemblers because that causes incompatibility
14449 with gcc. No-one wants to fix gcc because that causes
14450 incompatibility with assemblers... You can use the option of
14451 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
14452 #define SYSV386_COMPAT 1
14453 #endif
14454
14455 const char *
14456 output_387_binary_op (rtx insn, rtx *operands)
14457 {
14458 static char buf[40];
14459 const char *p;
14460 const char *ssep;
14461 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
14462
14463 #ifdef ENABLE_CHECKING
14464 /* Even if we do not want to check the inputs, this documents input
14465 constraints. Which helps in understanding the following code. */
14466 if (STACK_REG_P (operands[0])
14467 && ((REG_P (operands[1])
14468 && REGNO (operands[0]) == REGNO (operands[1])
14469 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
14470 || (REG_P (operands[2])
14471 && REGNO (operands[0]) == REGNO (operands[2])
14472 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
14473 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
14474 ; /* ok */
14475 else
14476 gcc_assert (is_sse);
14477 #endif
14478
14479 switch (GET_CODE (operands[3]))
14480 {
14481 case PLUS:
14482 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14483 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14484 p = "fiadd";
14485 else
14486 p = "fadd";
14487 ssep = "vadd";
14488 break;
14489
14490 case MINUS:
14491 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14492 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14493 p = "fisub";
14494 else
14495 p = "fsub";
14496 ssep = "vsub";
14497 break;
14498
14499 case MULT:
14500 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14501 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14502 p = "fimul";
14503 else
14504 p = "fmul";
14505 ssep = "vmul";
14506 break;
14507
14508 case DIV:
14509 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14510 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14511 p = "fidiv";
14512 else
14513 p = "fdiv";
14514 ssep = "vdiv";
14515 break;
14516
14517 default:
14518 gcc_unreachable ();
14519 }
14520
14521 if (is_sse)
14522 {
14523 if (TARGET_AVX)
14524 {
14525 strcpy (buf, ssep);
14526 if (GET_MODE (operands[0]) == SFmode)
14527 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
14528 else
14529 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
14530 }
14531 else
14532 {
14533 strcpy (buf, ssep + 1);
14534 if (GET_MODE (operands[0]) == SFmode)
14535 strcat (buf, "ss\t{%2, %0|%0, %2}");
14536 else
14537 strcat (buf, "sd\t{%2, %0|%0, %2}");
14538 }
14539 return buf;
14540 }
14541 strcpy (buf, p);
14542
14543 switch (GET_CODE (operands[3]))
14544 {
14545 case MULT:
14546 case PLUS:
14547 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
14548 {
14549 rtx temp = operands[2];
14550 operands[2] = operands[1];
14551 operands[1] = temp;
14552 }
14553
14554 /* know operands[0] == operands[1]. */
14555
14556 if (MEM_P (operands[2]))
14557 {
14558 p = "%Z2\t%2";
14559 break;
14560 }
14561
14562 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
14563 {
14564 if (STACK_TOP_P (operands[0]))
14565 /* How is it that we are storing to a dead operand[2]?
14566 Well, presumably operands[1] is dead too. We can't
14567 store the result to st(0) as st(0) gets popped on this
14568 instruction. Instead store to operands[2] (which I
14569 think has to be st(1)). st(1) will be popped later.
14570 gcc <= 2.8.1 didn't have this check and generated
14571 assembly code that the Unixware assembler rejected. */
14572 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14573 else
14574 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14575 break;
14576 }
14577
14578 if (STACK_TOP_P (operands[0]))
14579 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14580 else
14581 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
14582 break;
14583
14584 case MINUS:
14585 case DIV:
14586 if (MEM_P (operands[1]))
14587 {
14588 p = "r%Z1\t%1";
14589 break;
14590 }
14591
14592 if (MEM_P (operands[2]))
14593 {
14594 p = "%Z2\t%2";
14595 break;
14596 }
14597
14598 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
14599 {
14600 #if SYSV386_COMPAT
14601 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
14602 derived assemblers, confusingly reverse the direction of
14603 the operation for fsub{r} and fdiv{r} when the
14604 destination register is not st(0). The Intel assembler
14605 doesn't have this brain damage. Read !SYSV386_COMPAT to
14606 figure out what the hardware really does. */
14607 if (STACK_TOP_P (operands[0]))
14608 p = "{p\t%0, %2|rp\t%2, %0}";
14609 else
14610 p = "{rp\t%2, %0|p\t%0, %2}";
14611 #else
14612 if (STACK_TOP_P (operands[0]))
14613 /* As above for fmul/fadd, we can't store to st(0). */
14614 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14615 else
14616 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14617 #endif
14618 break;
14619 }
14620
14621 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
14622 {
14623 #if SYSV386_COMPAT
14624 if (STACK_TOP_P (operands[0]))
14625 p = "{rp\t%0, %1|p\t%1, %0}";
14626 else
14627 p = "{p\t%1, %0|rp\t%0, %1}";
14628 #else
14629 if (STACK_TOP_P (operands[0]))
14630 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
14631 else
14632 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
14633 #endif
14634 break;
14635 }
14636
14637 if (STACK_TOP_P (operands[0]))
14638 {
14639 if (STACK_TOP_P (operands[1]))
14640 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14641 else
14642 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
14643 break;
14644 }
14645 else if (STACK_TOP_P (operands[1]))
14646 {
14647 #if SYSV386_COMPAT
14648 p = "{\t%1, %0|r\t%0, %1}";
14649 #else
14650 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
14651 #endif
14652 }
14653 else
14654 {
14655 #if SYSV386_COMPAT
14656 p = "{r\t%2, %0|\t%0, %2}";
14657 #else
14658 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
14659 #endif
14660 }
14661 break;
14662
14663 default:
14664 gcc_unreachable ();
14665 }
14666
14667 strcat (buf, p);
14668 return buf;
14669 }
14670
14671 /* Return needed mode for entity in optimize_mode_switching pass. */
14672
14673 int
14674 ix86_mode_needed (int entity, rtx insn)
14675 {
14676 enum attr_i387_cw mode;
14677
14678 /* The mode UNINITIALIZED is used to store control word after a
14679 function call or ASM pattern. The mode ANY specify that function
14680 has no requirements on the control word and make no changes in the
14681 bits we are interested in. */
14682
14683 if (CALL_P (insn)
14684 || (NONJUMP_INSN_P (insn)
14685 && (asm_noperands (PATTERN (insn)) >= 0
14686 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
14687 return I387_CW_UNINITIALIZED;
14688
14689 if (recog_memoized (insn) < 0)
14690 return I387_CW_ANY;
14691
14692 mode = get_attr_i387_cw (insn);
14693
14694 switch (entity)
14695 {
14696 case I387_TRUNC:
14697 if (mode == I387_CW_TRUNC)
14698 return mode;
14699 break;
14700
14701 case I387_FLOOR:
14702 if (mode == I387_CW_FLOOR)
14703 return mode;
14704 break;
14705
14706 case I387_CEIL:
14707 if (mode == I387_CW_CEIL)
14708 return mode;
14709 break;
14710
14711 case I387_MASK_PM:
14712 if (mode == I387_CW_MASK_PM)
14713 return mode;
14714 break;
14715
14716 default:
14717 gcc_unreachable ();
14718 }
14719
14720 return I387_CW_ANY;
14721 }
14722
14723 /* Output code to initialize control word copies used by trunc?f?i and
14724 rounding patterns. CURRENT_MODE is set to current control word,
14725 while NEW_MODE is set to new control word. */
14726
14727 void
14728 emit_i387_cw_initialization (int mode)
14729 {
14730 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
14731 rtx new_mode;
14732
14733 enum ix86_stack_slot slot;
14734
14735 rtx reg = gen_reg_rtx (HImode);
14736
14737 emit_insn (gen_x86_fnstcw_1 (stored_mode));
14738 emit_move_insn (reg, copy_rtx (stored_mode));
14739
14740 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
14741 || optimize_function_for_size_p (cfun))
14742 {
14743 switch (mode)
14744 {
14745 case I387_CW_TRUNC:
14746 /* round toward zero (truncate) */
14747 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
14748 slot = SLOT_CW_TRUNC;
14749 break;
14750
14751 case I387_CW_FLOOR:
14752 /* round down toward -oo */
14753 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
14754 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
14755 slot = SLOT_CW_FLOOR;
14756 break;
14757
14758 case I387_CW_CEIL:
14759 /* round up toward +oo */
14760 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
14761 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
14762 slot = SLOT_CW_CEIL;
14763 break;
14764
14765 case I387_CW_MASK_PM:
14766 /* mask precision exception for nearbyint() */
14767 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
14768 slot = SLOT_CW_MASK_PM;
14769 break;
14770
14771 default:
14772 gcc_unreachable ();
14773 }
14774 }
14775 else
14776 {
14777 switch (mode)
14778 {
14779 case I387_CW_TRUNC:
14780 /* round toward zero (truncate) */
14781 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
14782 slot = SLOT_CW_TRUNC;
14783 break;
14784
14785 case I387_CW_FLOOR:
14786 /* round down toward -oo */
14787 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
14788 slot = SLOT_CW_FLOOR;
14789 break;
14790
14791 case I387_CW_CEIL:
14792 /* round up toward +oo */
14793 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
14794 slot = SLOT_CW_CEIL;
14795 break;
14796
14797 case I387_CW_MASK_PM:
14798 /* mask precision exception for nearbyint() */
14799 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
14800 slot = SLOT_CW_MASK_PM;
14801 break;
14802
14803 default:
14804 gcc_unreachable ();
14805 }
14806 }
14807
14808 gcc_assert (slot < MAX_386_STACK_LOCALS);
14809
14810 new_mode = assign_386_stack_local (HImode, slot);
14811 emit_move_insn (new_mode, reg);
14812 }
14813
14814 /* Output code for INSN to convert a float to a signed int. OPERANDS
14815 are the insn operands. The output may be [HSD]Imode and the input
14816 operand may be [SDX]Fmode. */
14817
14818 const char *
14819 output_fix_trunc (rtx insn, rtx *operands, bool fisttp)
14820 {
14821 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
14822 int dimode_p = GET_MODE (operands[0]) == DImode;
14823 int round_mode = get_attr_i387_cw (insn);
14824
14825 /* Jump through a hoop or two for DImode, since the hardware has no
14826 non-popping instruction. We used to do this a different way, but
14827 that was somewhat fragile and broke with post-reload splitters. */
14828 if ((dimode_p || fisttp) && !stack_top_dies)
14829 output_asm_insn ("fld\t%y1", operands);
14830
14831 gcc_assert (STACK_TOP_P (operands[1]));
14832 gcc_assert (MEM_P (operands[0]));
14833 gcc_assert (GET_MODE (operands[1]) != TFmode);
14834
14835 if (fisttp)
14836 output_asm_insn ("fisttp%Z0\t%0", operands);
14837 else
14838 {
14839 if (round_mode != I387_CW_ANY)
14840 output_asm_insn ("fldcw\t%3", operands);
14841 if (stack_top_dies || dimode_p)
14842 output_asm_insn ("fistp%Z0\t%0", operands);
14843 else
14844 output_asm_insn ("fist%Z0\t%0", operands);
14845 if (round_mode != I387_CW_ANY)
14846 output_asm_insn ("fldcw\t%2", operands);
14847 }
14848
14849 return "";
14850 }
14851
14852 /* Output code for x87 ffreep insn. The OPNO argument, which may only
14853 have the values zero or one, indicates the ffreep insn's operand
14854 from the OPERANDS array. */
14855
14856 static const char *
14857 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
14858 {
14859 if (TARGET_USE_FFREEP)
14860 #ifdef HAVE_AS_IX86_FFREEP
14861 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
14862 #else
14863 {
14864 static char retval[32];
14865 int regno = REGNO (operands[opno]);
14866
14867 gcc_assert (FP_REGNO_P (regno));
14868
14869 regno -= FIRST_STACK_REG;
14870
14871 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
14872 return retval;
14873 }
14874 #endif
14875
14876 return opno ? "fstp\t%y1" : "fstp\t%y0";
14877 }
14878
14879
14880 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
14881 should be used. UNORDERED_P is true when fucom should be used. */
14882
14883 const char *
14884 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
14885 {
14886 int stack_top_dies;
14887 rtx cmp_op0, cmp_op1;
14888 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
14889
14890 if (eflags_p)
14891 {
14892 cmp_op0 = operands[0];
14893 cmp_op1 = operands[1];
14894 }
14895 else
14896 {
14897 cmp_op0 = operands[1];
14898 cmp_op1 = operands[2];
14899 }
14900
14901 if (is_sse)
14902 {
14903 if (GET_MODE (operands[0]) == SFmode)
14904 if (unordered_p)
14905 return "%vucomiss\t{%1, %0|%0, %1}";
14906 else
14907 return "%vcomiss\t{%1, %0|%0, %1}";
14908 else
14909 if (unordered_p)
14910 return "%vucomisd\t{%1, %0|%0, %1}";
14911 else
14912 return "%vcomisd\t{%1, %0|%0, %1}";
14913 }
14914
14915 gcc_assert (STACK_TOP_P (cmp_op0));
14916
14917 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
14918
14919 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
14920 {
14921 if (stack_top_dies)
14922 {
14923 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
14924 return output_387_ffreep (operands, 1);
14925 }
14926 else
14927 return "ftst\n\tfnstsw\t%0";
14928 }
14929
14930 if (STACK_REG_P (cmp_op1)
14931 && stack_top_dies
14932 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
14933 && REGNO (cmp_op1) != FIRST_STACK_REG)
14934 {
14935 /* If both the top of the 387 stack dies, and the other operand
14936 is also a stack register that dies, then this must be a
14937 `fcompp' float compare */
14938
14939 if (eflags_p)
14940 {
14941 /* There is no double popping fcomi variant. Fortunately,
14942 eflags is immune from the fstp's cc clobbering. */
14943 if (unordered_p)
14944 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
14945 else
14946 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
14947 return output_387_ffreep (operands, 0);
14948 }
14949 else
14950 {
14951 if (unordered_p)
14952 return "fucompp\n\tfnstsw\t%0";
14953 else
14954 return "fcompp\n\tfnstsw\t%0";
14955 }
14956 }
14957 else
14958 {
14959 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
14960
14961 static const char * const alt[16] =
14962 {
14963 "fcom%Z2\t%y2\n\tfnstsw\t%0",
14964 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
14965 "fucom%Z2\t%y2\n\tfnstsw\t%0",
14966 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
14967
14968 "ficom%Z2\t%y2\n\tfnstsw\t%0",
14969 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
14970 NULL,
14971 NULL,
14972
14973 "fcomi\t{%y1, %0|%0, %y1}",
14974 "fcomip\t{%y1, %0|%0, %y1}",
14975 "fucomi\t{%y1, %0|%0, %y1}",
14976 "fucomip\t{%y1, %0|%0, %y1}",
14977
14978 NULL,
14979 NULL,
14980 NULL,
14981 NULL
14982 };
14983
14984 int mask;
14985 const char *ret;
14986
14987 mask = eflags_p << 3;
14988 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
14989 mask |= unordered_p << 1;
14990 mask |= stack_top_dies;
14991
14992 gcc_assert (mask < 16);
14993 ret = alt[mask];
14994 gcc_assert (ret);
14995
14996 return ret;
14997 }
14998 }
14999
15000 void
15001 ix86_output_addr_vec_elt (FILE *file, int value)
15002 {
15003 const char *directive = ASM_LONG;
15004
15005 #ifdef ASM_QUAD
15006 if (TARGET_LP64)
15007 directive = ASM_QUAD;
15008 #else
15009 gcc_assert (!TARGET_64BIT);
15010 #endif
15011
15012 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
15013 }
15014
15015 void
15016 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
15017 {
15018 const char *directive = ASM_LONG;
15019
15020 #ifdef ASM_QUAD
15021 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
15022 directive = ASM_QUAD;
15023 #else
15024 gcc_assert (!TARGET_64BIT);
15025 #endif
15026 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
15027 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
15028 fprintf (file, "%s%s%d-%s%d\n",
15029 directive, LPREFIX, value, LPREFIX, rel);
15030 else if (HAVE_AS_GOTOFF_IN_DATA)
15031 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
15032 #if TARGET_MACHO
15033 else if (TARGET_MACHO)
15034 {
15035 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
15036 machopic_output_function_base_name (file);
15037 putc ('\n', file);
15038 }
15039 #endif
15040 else
15041 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
15042 GOT_SYMBOL_NAME, LPREFIX, value);
15043 }
15044 \f
15045 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
15046 for the target. */
15047
15048 void
15049 ix86_expand_clear (rtx dest)
15050 {
15051 rtx tmp;
15052
15053 /* We play register width games, which are only valid after reload. */
15054 gcc_assert (reload_completed);
15055
15056 /* Avoid HImode and its attendant prefix byte. */
15057 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
15058 dest = gen_rtx_REG (SImode, REGNO (dest));
15059 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
15060
15061 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
15062 if (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ())
15063 {
15064 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15065 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
15066 }
15067
15068 emit_insn (tmp);
15069 }
15070
15071 /* X is an unchanging MEM. If it is a constant pool reference, return
15072 the constant pool rtx, else NULL. */
15073
15074 rtx
15075 maybe_get_pool_constant (rtx x)
15076 {
15077 x = ix86_delegitimize_address (XEXP (x, 0));
15078
15079 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
15080 return get_pool_constant (x);
15081
15082 return NULL_RTX;
15083 }
15084
15085 void
15086 ix86_expand_move (enum machine_mode mode, rtx operands[])
15087 {
15088 rtx op0, op1;
15089 enum tls_model model;
15090
15091 op0 = operands[0];
15092 op1 = operands[1];
15093
15094 if (GET_CODE (op1) == SYMBOL_REF)
15095 {
15096 model = SYMBOL_REF_TLS_MODEL (op1);
15097 if (model)
15098 {
15099 op1 = legitimize_tls_address (op1, model, true);
15100 op1 = force_operand (op1, op0);
15101 if (op1 == op0)
15102 return;
15103 if (GET_MODE (op1) != mode)
15104 op1 = convert_to_mode (mode, op1, 1);
15105 }
15106 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15107 && SYMBOL_REF_DLLIMPORT_P (op1))
15108 op1 = legitimize_dllimport_symbol (op1, false);
15109 }
15110 else if (GET_CODE (op1) == CONST
15111 && GET_CODE (XEXP (op1, 0)) == PLUS
15112 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
15113 {
15114 rtx addend = XEXP (XEXP (op1, 0), 1);
15115 rtx symbol = XEXP (XEXP (op1, 0), 0);
15116 rtx tmp = NULL;
15117
15118 model = SYMBOL_REF_TLS_MODEL (symbol);
15119 if (model)
15120 tmp = legitimize_tls_address (symbol, model, true);
15121 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15122 && SYMBOL_REF_DLLIMPORT_P (symbol))
15123 tmp = legitimize_dllimport_symbol (symbol, true);
15124
15125 if (tmp)
15126 {
15127 tmp = force_operand (tmp, NULL);
15128 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
15129 op0, 1, OPTAB_DIRECT);
15130 if (tmp == op0)
15131 return;
15132 if (GET_MODE (tmp) != mode)
15133 op1 = convert_to_mode (mode, tmp, 1);
15134 }
15135 }
15136
15137 if ((flag_pic || MACHOPIC_INDIRECT)
15138 && symbolic_operand (op1, mode))
15139 {
15140 if (TARGET_MACHO && !TARGET_64BIT)
15141 {
15142 #if TARGET_MACHO
15143 /* dynamic-no-pic */
15144 if (MACHOPIC_INDIRECT)
15145 {
15146 rtx temp = ((reload_in_progress
15147 || ((op0 && REG_P (op0))
15148 && mode == Pmode))
15149 ? op0 : gen_reg_rtx (Pmode));
15150 op1 = machopic_indirect_data_reference (op1, temp);
15151 if (MACHOPIC_PURE)
15152 op1 = machopic_legitimize_pic_address (op1, mode,
15153 temp == op1 ? 0 : temp);
15154 }
15155 if (op0 != op1 && GET_CODE (op0) != MEM)
15156 {
15157 rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
15158 emit_insn (insn);
15159 return;
15160 }
15161 if (GET_CODE (op0) == MEM)
15162 op1 = force_reg (Pmode, op1);
15163 else
15164 {
15165 rtx temp = op0;
15166 if (GET_CODE (temp) != REG)
15167 temp = gen_reg_rtx (Pmode);
15168 temp = legitimize_pic_address (op1, temp);
15169 if (temp == op0)
15170 return;
15171 op1 = temp;
15172 }
15173 /* dynamic-no-pic */
15174 #endif
15175 }
15176 else
15177 {
15178 if (MEM_P (op0))
15179 op1 = force_reg (mode, op1);
15180 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
15181 {
15182 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
15183 op1 = legitimize_pic_address (op1, reg);
15184 if (op0 == op1)
15185 return;
15186 if (GET_MODE (op1) != mode)
15187 op1 = convert_to_mode (mode, op1, 1);
15188 }
15189 }
15190 }
15191 else
15192 {
15193 if (MEM_P (op0)
15194 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
15195 || !push_operand (op0, mode))
15196 && MEM_P (op1))
15197 op1 = force_reg (mode, op1);
15198
15199 if (push_operand (op0, mode)
15200 && ! general_no_elim_operand (op1, mode))
15201 op1 = copy_to_mode_reg (mode, op1);
15202
15203 /* Force large constants in 64bit compilation into register
15204 to get them CSEed. */
15205 if (can_create_pseudo_p ()
15206 && (mode == DImode) && TARGET_64BIT
15207 && immediate_operand (op1, mode)
15208 && !x86_64_zext_immediate_operand (op1, VOIDmode)
15209 && !register_operand (op0, mode)
15210 && optimize)
15211 op1 = copy_to_mode_reg (mode, op1);
15212
15213 if (can_create_pseudo_p ()
15214 && FLOAT_MODE_P (mode)
15215 && GET_CODE (op1) == CONST_DOUBLE)
15216 {
15217 /* If we are loading a floating point constant to a register,
15218 force the value to memory now, since we'll get better code
15219 out the back end. */
15220
15221 op1 = validize_mem (force_const_mem (mode, op1));
15222 if (!register_operand (op0, mode))
15223 {
15224 rtx temp = gen_reg_rtx (mode);
15225 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
15226 emit_move_insn (op0, temp);
15227 return;
15228 }
15229 }
15230 }
15231
15232 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
15233 }
15234
15235 void
15236 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
15237 {
15238 rtx op0 = operands[0], op1 = operands[1];
15239 unsigned int align = GET_MODE_ALIGNMENT (mode);
15240
15241 /* Force constants other than zero into memory. We do not know how
15242 the instructions used to build constants modify the upper 64 bits
15243 of the register, once we have that information we may be able
15244 to handle some of them more efficiently. */
15245 if (can_create_pseudo_p ()
15246 && register_operand (op0, mode)
15247 && (CONSTANT_P (op1)
15248 || (GET_CODE (op1) == SUBREG
15249 && CONSTANT_P (SUBREG_REG (op1))))
15250 && !standard_sse_constant_p (op1))
15251 op1 = validize_mem (force_const_mem (mode, op1));
15252
15253 /* We need to check memory alignment for SSE mode since attribute
15254 can make operands unaligned. */
15255 if (can_create_pseudo_p ()
15256 && SSE_REG_MODE_P (mode)
15257 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
15258 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
15259 {
15260 rtx tmp[2];
15261
15262 /* ix86_expand_vector_move_misalign() does not like constants ... */
15263 if (CONSTANT_P (op1)
15264 || (GET_CODE (op1) == SUBREG
15265 && CONSTANT_P (SUBREG_REG (op1))))
15266 op1 = validize_mem (force_const_mem (mode, op1));
15267
15268 /* ... nor both arguments in memory. */
15269 if (!register_operand (op0, mode)
15270 && !register_operand (op1, mode))
15271 op1 = force_reg (mode, op1);
15272
15273 tmp[0] = op0; tmp[1] = op1;
15274 ix86_expand_vector_move_misalign (mode, tmp);
15275 return;
15276 }
15277
15278 /* Make operand1 a register if it isn't already. */
15279 if (can_create_pseudo_p ()
15280 && !register_operand (op0, mode)
15281 && !register_operand (op1, mode))
15282 {
15283 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
15284 return;
15285 }
15286
15287 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
15288 }
15289
15290 /* Split 32-byte AVX unaligned load and store if needed. */
15291
15292 static void
15293 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
15294 {
15295 rtx m;
15296 rtx (*extract) (rtx, rtx, rtx);
15297 rtx (*move_unaligned) (rtx, rtx);
15298 enum machine_mode mode;
15299
15300 switch (GET_MODE (op0))
15301 {
15302 default:
15303 gcc_unreachable ();
15304 case V32QImode:
15305 extract = gen_avx_vextractf128v32qi;
15306 move_unaligned = gen_avx_movdqu256;
15307 mode = V16QImode;
15308 break;
15309 case V8SFmode:
15310 extract = gen_avx_vextractf128v8sf;
15311 move_unaligned = gen_avx_movups256;
15312 mode = V4SFmode;
15313 break;
15314 case V4DFmode:
15315 extract = gen_avx_vextractf128v4df;
15316 move_unaligned = gen_avx_movupd256;
15317 mode = V2DFmode;
15318 break;
15319 }
15320
15321 if (MEM_P (op1) && TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
15322 {
15323 rtx r = gen_reg_rtx (mode);
15324 m = adjust_address (op1, mode, 0);
15325 emit_move_insn (r, m);
15326 m = adjust_address (op1, mode, 16);
15327 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
15328 emit_move_insn (op0, r);
15329 }
15330 else if (MEM_P (op0) && TARGET_AVX256_SPLIT_UNALIGNED_STORE)
15331 {
15332 m = adjust_address (op0, mode, 0);
15333 emit_insn (extract (m, op1, const0_rtx));
15334 m = adjust_address (op0, mode, 16);
15335 emit_insn (extract (m, op1, const1_rtx));
15336 }
15337 else
15338 emit_insn (move_unaligned (op0, op1));
15339 }
15340
15341 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
15342 straight to ix86_expand_vector_move. */
15343 /* Code generation for scalar reg-reg moves of single and double precision data:
15344 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
15345 movaps reg, reg
15346 else
15347 movss reg, reg
15348 if (x86_sse_partial_reg_dependency == true)
15349 movapd reg, reg
15350 else
15351 movsd reg, reg
15352
15353 Code generation for scalar loads of double precision data:
15354 if (x86_sse_split_regs == true)
15355 movlpd mem, reg (gas syntax)
15356 else
15357 movsd mem, reg
15358
15359 Code generation for unaligned packed loads of single precision data
15360 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
15361 if (x86_sse_unaligned_move_optimal)
15362 movups mem, reg
15363
15364 if (x86_sse_partial_reg_dependency == true)
15365 {
15366 xorps reg, reg
15367 movlps mem, reg
15368 movhps mem+8, reg
15369 }
15370 else
15371 {
15372 movlps mem, reg
15373 movhps mem+8, reg
15374 }
15375
15376 Code generation for unaligned packed loads of double precision data
15377 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
15378 if (x86_sse_unaligned_move_optimal)
15379 movupd mem, reg
15380
15381 if (x86_sse_split_regs == true)
15382 {
15383 movlpd mem, reg
15384 movhpd mem+8, reg
15385 }
15386 else
15387 {
15388 movsd mem, reg
15389 movhpd mem+8, reg
15390 }
15391 */
15392
15393 void
15394 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
15395 {
15396 rtx op0, op1, m;
15397
15398 op0 = operands[0];
15399 op1 = operands[1];
15400
15401 if (TARGET_AVX)
15402 {
15403 switch (GET_MODE_CLASS (mode))
15404 {
15405 case MODE_VECTOR_INT:
15406 case MODE_INT:
15407 switch (GET_MODE_SIZE (mode))
15408 {
15409 case 16:
15410 /* If we're optimizing for size, movups is the smallest. */
15411 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15412 {
15413 op0 = gen_lowpart (V4SFmode, op0);
15414 op1 = gen_lowpart (V4SFmode, op1);
15415 emit_insn (gen_sse_movups (op0, op1));
15416 return;
15417 }
15418 op0 = gen_lowpart (V16QImode, op0);
15419 op1 = gen_lowpart (V16QImode, op1);
15420 emit_insn (gen_sse2_movdqu (op0, op1));
15421 break;
15422 case 32:
15423 op0 = gen_lowpart (V32QImode, op0);
15424 op1 = gen_lowpart (V32QImode, op1);
15425 ix86_avx256_split_vector_move_misalign (op0, op1);
15426 break;
15427 default:
15428 gcc_unreachable ();
15429 }
15430 break;
15431 case MODE_VECTOR_FLOAT:
15432 op0 = gen_lowpart (mode, op0);
15433 op1 = gen_lowpart (mode, op1);
15434
15435 switch (mode)
15436 {
15437 case V4SFmode:
15438 emit_insn (gen_sse_movups (op0, op1));
15439 break;
15440 case V8SFmode:
15441 ix86_avx256_split_vector_move_misalign (op0, op1);
15442 break;
15443 case V2DFmode:
15444 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15445 {
15446 op0 = gen_lowpart (V4SFmode, op0);
15447 op1 = gen_lowpart (V4SFmode, op1);
15448 emit_insn (gen_sse_movups (op0, op1));
15449 return;
15450 }
15451 emit_insn (gen_sse2_movupd (op0, op1));
15452 break;
15453 case V4DFmode:
15454 ix86_avx256_split_vector_move_misalign (op0, op1);
15455 break;
15456 default:
15457 gcc_unreachable ();
15458 }
15459 break;
15460
15461 default:
15462 gcc_unreachable ();
15463 }
15464
15465 return;
15466 }
15467
15468 if (MEM_P (op1))
15469 {
15470 /* If we're optimizing for size, movups is the smallest. */
15471 if (optimize_insn_for_size_p ()
15472 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15473 {
15474 op0 = gen_lowpart (V4SFmode, op0);
15475 op1 = gen_lowpart (V4SFmode, op1);
15476 emit_insn (gen_sse_movups (op0, op1));
15477 return;
15478 }
15479
15480 /* ??? If we have typed data, then it would appear that using
15481 movdqu is the only way to get unaligned data loaded with
15482 integer type. */
15483 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15484 {
15485 op0 = gen_lowpart (V16QImode, op0);
15486 op1 = gen_lowpart (V16QImode, op1);
15487 emit_insn (gen_sse2_movdqu (op0, op1));
15488 return;
15489 }
15490
15491 if (TARGET_SSE2 && mode == V2DFmode)
15492 {
15493 rtx zero;
15494
15495 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
15496 {
15497 op0 = gen_lowpart (V2DFmode, op0);
15498 op1 = gen_lowpart (V2DFmode, op1);
15499 emit_insn (gen_sse2_movupd (op0, op1));
15500 return;
15501 }
15502
15503 /* When SSE registers are split into halves, we can avoid
15504 writing to the top half twice. */
15505 if (TARGET_SSE_SPLIT_REGS)
15506 {
15507 emit_clobber (op0);
15508 zero = op0;
15509 }
15510 else
15511 {
15512 /* ??? Not sure about the best option for the Intel chips.
15513 The following would seem to satisfy; the register is
15514 entirely cleared, breaking the dependency chain. We
15515 then store to the upper half, with a dependency depth
15516 of one. A rumor has it that Intel recommends two movsd
15517 followed by an unpacklpd, but this is unconfirmed. And
15518 given that the dependency depth of the unpacklpd would
15519 still be one, I'm not sure why this would be better. */
15520 zero = CONST0_RTX (V2DFmode);
15521 }
15522
15523 m = adjust_address (op1, DFmode, 0);
15524 emit_insn (gen_sse2_loadlpd (op0, zero, m));
15525 m = adjust_address (op1, DFmode, 8);
15526 emit_insn (gen_sse2_loadhpd (op0, op0, m));
15527 }
15528 else
15529 {
15530 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
15531 {
15532 op0 = gen_lowpart (V4SFmode, op0);
15533 op1 = gen_lowpart (V4SFmode, op1);
15534 emit_insn (gen_sse_movups (op0, op1));
15535 return;
15536 }
15537
15538 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
15539 emit_move_insn (op0, CONST0_RTX (mode));
15540 else
15541 emit_clobber (op0);
15542
15543 if (mode != V4SFmode)
15544 op0 = gen_lowpart (V4SFmode, op0);
15545 m = adjust_address (op1, V2SFmode, 0);
15546 emit_insn (gen_sse_loadlps (op0, op0, m));
15547 m = adjust_address (op1, V2SFmode, 8);
15548 emit_insn (gen_sse_loadhps (op0, op0, m));
15549 }
15550 }
15551 else if (MEM_P (op0))
15552 {
15553 /* If we're optimizing for size, movups is the smallest. */
15554 if (optimize_insn_for_size_p ()
15555 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15556 {
15557 op0 = gen_lowpart (V4SFmode, op0);
15558 op1 = gen_lowpart (V4SFmode, op1);
15559 emit_insn (gen_sse_movups (op0, op1));
15560 return;
15561 }
15562
15563 /* ??? Similar to above, only less clear because of quote
15564 typeless stores unquote. */
15565 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
15566 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15567 {
15568 op0 = gen_lowpart (V16QImode, op0);
15569 op1 = gen_lowpart (V16QImode, op1);
15570 emit_insn (gen_sse2_movdqu (op0, op1));
15571 return;
15572 }
15573
15574 if (TARGET_SSE2 && mode == V2DFmode)
15575 {
15576 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
15577 {
15578 op0 = gen_lowpart (V2DFmode, op0);
15579 op1 = gen_lowpart (V2DFmode, op1);
15580 emit_insn (gen_sse2_movupd (op0, op1));
15581 }
15582 else
15583 {
15584 m = adjust_address (op0, DFmode, 0);
15585 emit_insn (gen_sse2_storelpd (m, op1));
15586 m = adjust_address (op0, DFmode, 8);
15587 emit_insn (gen_sse2_storehpd (m, op1));
15588 }
15589 }
15590 else
15591 {
15592 if (mode != V4SFmode)
15593 op1 = gen_lowpart (V4SFmode, op1);
15594
15595 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
15596 {
15597 op0 = gen_lowpart (V4SFmode, op0);
15598 emit_insn (gen_sse_movups (op0, op1));
15599 }
15600 else
15601 {
15602 m = adjust_address (op0, V2SFmode, 0);
15603 emit_insn (gen_sse_storelps (m, op1));
15604 m = adjust_address (op0, V2SFmode, 8);
15605 emit_insn (gen_sse_storehps (m, op1));
15606 }
15607 }
15608 }
15609 else
15610 gcc_unreachable ();
15611 }
15612
15613 /* Expand a push in MODE. This is some mode for which we do not support
15614 proper push instructions, at least from the registers that we expect
15615 the value to live in. */
15616
15617 void
15618 ix86_expand_push (enum machine_mode mode, rtx x)
15619 {
15620 rtx tmp;
15621
15622 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
15623 GEN_INT (-GET_MODE_SIZE (mode)),
15624 stack_pointer_rtx, 1, OPTAB_DIRECT);
15625 if (tmp != stack_pointer_rtx)
15626 emit_move_insn (stack_pointer_rtx, tmp);
15627
15628 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
15629
15630 /* When we push an operand onto stack, it has to be aligned at least
15631 at the function argument boundary. However since we don't have
15632 the argument type, we can't determine the actual argument
15633 boundary. */
15634 emit_move_insn (tmp, x);
15635 }
15636
15637 /* Helper function of ix86_fixup_binary_operands to canonicalize
15638 operand order. Returns true if the operands should be swapped. */
15639
15640 static bool
15641 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
15642 rtx operands[])
15643 {
15644 rtx dst = operands[0];
15645 rtx src1 = operands[1];
15646 rtx src2 = operands[2];
15647
15648 /* If the operation is not commutative, we can't do anything. */
15649 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
15650 return false;
15651
15652 /* Highest priority is that src1 should match dst. */
15653 if (rtx_equal_p (dst, src1))
15654 return false;
15655 if (rtx_equal_p (dst, src2))
15656 return true;
15657
15658 /* Next highest priority is that immediate constants come second. */
15659 if (immediate_operand (src2, mode))
15660 return false;
15661 if (immediate_operand (src1, mode))
15662 return true;
15663
15664 /* Lowest priority is that memory references should come second. */
15665 if (MEM_P (src2))
15666 return false;
15667 if (MEM_P (src1))
15668 return true;
15669
15670 return false;
15671 }
15672
15673
15674 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
15675 destination to use for the operation. If different from the true
15676 destination in operands[0], a copy operation will be required. */
15677
15678 rtx
15679 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
15680 rtx operands[])
15681 {
15682 rtx dst = operands[0];
15683 rtx src1 = operands[1];
15684 rtx src2 = operands[2];
15685
15686 /* Canonicalize operand order. */
15687 if (ix86_swap_binary_operands_p (code, mode, operands))
15688 {
15689 rtx temp;
15690
15691 /* It is invalid to swap operands of different modes. */
15692 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
15693
15694 temp = src1;
15695 src1 = src2;
15696 src2 = temp;
15697 }
15698
15699 /* Both source operands cannot be in memory. */
15700 if (MEM_P (src1) && MEM_P (src2))
15701 {
15702 /* Optimization: Only read from memory once. */
15703 if (rtx_equal_p (src1, src2))
15704 {
15705 src2 = force_reg (mode, src2);
15706 src1 = src2;
15707 }
15708 else
15709 src2 = force_reg (mode, src2);
15710 }
15711
15712 /* If the destination is memory, and we do not have matching source
15713 operands, do things in registers. */
15714 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
15715 dst = gen_reg_rtx (mode);
15716
15717 /* Source 1 cannot be a constant. */
15718 if (CONSTANT_P (src1))
15719 src1 = force_reg (mode, src1);
15720
15721 /* Source 1 cannot be a non-matching memory. */
15722 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
15723 src1 = force_reg (mode, src1);
15724
15725 operands[1] = src1;
15726 operands[2] = src2;
15727 return dst;
15728 }
15729
15730 /* Similarly, but assume that the destination has already been
15731 set up properly. */
15732
15733 void
15734 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
15735 enum machine_mode mode, rtx operands[])
15736 {
15737 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
15738 gcc_assert (dst == operands[0]);
15739 }
15740
15741 /* Attempt to expand a binary operator. Make the expansion closer to the
15742 actual machine, then just general_operand, which will allow 3 separate
15743 memory references (one output, two input) in a single insn. */
15744
15745 void
15746 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
15747 rtx operands[])
15748 {
15749 rtx src1, src2, dst, op, clob;
15750
15751 dst = ix86_fixup_binary_operands (code, mode, operands);
15752 src1 = operands[1];
15753 src2 = operands[2];
15754
15755 /* Emit the instruction. */
15756
15757 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
15758 if (reload_in_progress)
15759 {
15760 /* Reload doesn't know about the flags register, and doesn't know that
15761 it doesn't want to clobber it. We can only do this with PLUS. */
15762 gcc_assert (code == PLUS);
15763 emit_insn (op);
15764 }
15765 else if (reload_completed
15766 && code == PLUS
15767 && !rtx_equal_p (dst, src1))
15768 {
15769 /* This is going to be an LEA; avoid splitting it later. */
15770 emit_insn (op);
15771 }
15772 else
15773 {
15774 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15775 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
15776 }
15777
15778 /* Fix up the destination if needed. */
15779 if (dst != operands[0])
15780 emit_move_insn (operands[0], dst);
15781 }
15782
15783 /* Return TRUE or FALSE depending on whether the binary operator meets the
15784 appropriate constraints. */
15785
15786 bool
15787 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
15788 rtx operands[3])
15789 {
15790 rtx dst = operands[0];
15791 rtx src1 = operands[1];
15792 rtx src2 = operands[2];
15793
15794 /* Both source operands cannot be in memory. */
15795 if (MEM_P (src1) && MEM_P (src2))
15796 return false;
15797
15798 /* Canonicalize operand order for commutative operators. */
15799 if (ix86_swap_binary_operands_p (code, mode, operands))
15800 {
15801 rtx temp = src1;
15802 src1 = src2;
15803 src2 = temp;
15804 }
15805
15806 /* If the destination is memory, we must have a matching source operand. */
15807 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
15808 return false;
15809
15810 /* Source 1 cannot be a constant. */
15811 if (CONSTANT_P (src1))
15812 return false;
15813
15814 /* Source 1 cannot be a non-matching memory. */
15815 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
15816 /* Support "andhi/andsi/anddi" as a zero-extending move. */
15817 return (code == AND
15818 && (mode == HImode
15819 || mode == SImode
15820 || (TARGET_64BIT && mode == DImode))
15821 && satisfies_constraint_L (src2));
15822
15823 return true;
15824 }
15825
15826 /* Attempt to expand a unary operator. Make the expansion closer to the
15827 actual machine, then just general_operand, which will allow 2 separate
15828 memory references (one output, one input) in a single insn. */
15829
15830 void
15831 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
15832 rtx operands[])
15833 {
15834 int matching_memory;
15835 rtx src, dst, op, clob;
15836
15837 dst = operands[0];
15838 src = operands[1];
15839
15840 /* If the destination is memory, and we do not have matching source
15841 operands, do things in registers. */
15842 matching_memory = 0;
15843 if (MEM_P (dst))
15844 {
15845 if (rtx_equal_p (dst, src))
15846 matching_memory = 1;
15847 else
15848 dst = gen_reg_rtx (mode);
15849 }
15850
15851 /* When source operand is memory, destination must match. */
15852 if (MEM_P (src) && !matching_memory)
15853 src = force_reg (mode, src);
15854
15855 /* Emit the instruction. */
15856
15857 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
15858 if (reload_in_progress || code == NOT)
15859 {
15860 /* Reload doesn't know about the flags register, and doesn't know that
15861 it doesn't want to clobber it. */
15862 gcc_assert (code == NOT);
15863 emit_insn (op);
15864 }
15865 else
15866 {
15867 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15868 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
15869 }
15870
15871 /* Fix up the destination if needed. */
15872 if (dst != operands[0])
15873 emit_move_insn (operands[0], dst);
15874 }
15875
15876 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
15877 divisor are within the range [0-255]. */
15878
15879 void
15880 ix86_split_idivmod (enum machine_mode mode, rtx operands[],
15881 bool signed_p)
15882 {
15883 rtx end_label, qimode_label;
15884 rtx insn, div, mod;
15885 rtx scratch, tmp0, tmp1, tmp2;
15886 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
15887 rtx (*gen_zero_extend) (rtx, rtx);
15888 rtx (*gen_test_ccno_1) (rtx, rtx);
15889
15890 switch (mode)
15891 {
15892 case SImode:
15893 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
15894 gen_test_ccno_1 = gen_testsi_ccno_1;
15895 gen_zero_extend = gen_zero_extendqisi2;
15896 break;
15897 case DImode:
15898 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
15899 gen_test_ccno_1 = gen_testdi_ccno_1;
15900 gen_zero_extend = gen_zero_extendqidi2;
15901 break;
15902 default:
15903 gcc_unreachable ();
15904 }
15905
15906 end_label = gen_label_rtx ();
15907 qimode_label = gen_label_rtx ();
15908
15909 scratch = gen_reg_rtx (mode);
15910
15911 /* Use 8bit unsigned divimod if dividend and divisor are within
15912 the range [0-255]. */
15913 emit_move_insn (scratch, operands[2]);
15914 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
15915 scratch, 1, OPTAB_DIRECT);
15916 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
15917 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
15918 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
15919 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
15920 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
15921 pc_rtx);
15922 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
15923 predict_jump (REG_BR_PROB_BASE * 50 / 100);
15924 JUMP_LABEL (insn) = qimode_label;
15925
15926 /* Generate original signed/unsigned divimod. */
15927 div = gen_divmod4_1 (operands[0], operands[1],
15928 operands[2], operands[3]);
15929 emit_insn (div);
15930
15931 /* Branch to the end. */
15932 emit_jump_insn (gen_jump (end_label));
15933 emit_barrier ();
15934
15935 /* Generate 8bit unsigned divide. */
15936 emit_label (qimode_label);
15937 /* Don't use operands[0] for result of 8bit divide since not all
15938 registers support QImode ZERO_EXTRACT. */
15939 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
15940 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
15941 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
15942 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
15943
15944 if (signed_p)
15945 {
15946 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
15947 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
15948 }
15949 else
15950 {
15951 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
15952 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
15953 }
15954
15955 /* Extract remainder from AH. */
15956 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
15957 if (REG_P (operands[1]))
15958 insn = emit_move_insn (operands[1], tmp1);
15959 else
15960 {
15961 /* Need a new scratch register since the old one has result
15962 of 8bit divide. */
15963 scratch = gen_reg_rtx (mode);
15964 emit_move_insn (scratch, tmp1);
15965 insn = emit_move_insn (operands[1], scratch);
15966 }
15967 set_unique_reg_note (insn, REG_EQUAL, mod);
15968
15969 /* Zero extend quotient from AL. */
15970 tmp1 = gen_lowpart (QImode, tmp0);
15971 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
15972 set_unique_reg_note (insn, REG_EQUAL, div);
15973
15974 emit_label (end_label);
15975 }
15976
15977 #define LEA_MAX_STALL (3)
15978 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
15979
15980 /* Increase given DISTANCE in half-cycles according to
15981 dependencies between PREV and NEXT instructions.
15982 Add 1 half-cycle if there is no dependency and
15983 go to next cycle if there is some dependecy. */
15984
15985 static unsigned int
15986 increase_distance (rtx prev, rtx next, unsigned int distance)
15987 {
15988 df_ref *use_rec;
15989 df_ref *def_rec;
15990
15991 if (!prev || !next)
15992 return distance + (distance & 1) + 2;
15993
15994 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
15995 return distance + 1;
15996
15997 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
15998 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
15999 if (!DF_REF_IS_ARTIFICIAL (*def_rec)
16000 && DF_REF_REGNO (*use_rec) == DF_REF_REGNO (*def_rec))
16001 return distance + (distance & 1) + 2;
16002
16003 return distance + 1;
16004 }
16005
16006 /* Function checks if instruction INSN defines register number
16007 REGNO1 or REGNO2. */
16008
16009 static bool
16010 insn_defines_reg (unsigned int regno1, unsigned int regno2,
16011 rtx insn)
16012 {
16013 df_ref *def_rec;
16014
16015 for (def_rec = DF_INSN_DEFS (insn); *def_rec; def_rec++)
16016 if (DF_REF_REG_DEF_P (*def_rec)
16017 && !DF_REF_IS_ARTIFICIAL (*def_rec)
16018 && (regno1 == DF_REF_REGNO (*def_rec)
16019 || regno2 == DF_REF_REGNO (*def_rec)))
16020 {
16021 return true;
16022 }
16023
16024 return false;
16025 }
16026
16027 /* Function checks if instruction INSN uses register number
16028 REGNO as a part of address expression. */
16029
16030 static bool
16031 insn_uses_reg_mem (unsigned int regno, rtx insn)
16032 {
16033 df_ref *use_rec;
16034
16035 for (use_rec = DF_INSN_USES (insn); *use_rec; use_rec++)
16036 if (DF_REF_REG_MEM_P (*use_rec) && regno == DF_REF_REGNO (*use_rec))
16037 return true;
16038
16039 return false;
16040 }
16041
16042 /* Search backward for non-agu definition of register number REGNO1
16043 or register number REGNO2 in basic block starting from instruction
16044 START up to head of basic block or instruction INSN.
16045
16046 Function puts true value into *FOUND var if definition was found
16047 and false otherwise.
16048
16049 Distance in half-cycles between START and found instruction or head
16050 of BB is added to DISTANCE and returned. */
16051
16052 static int
16053 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
16054 rtx insn, int distance,
16055 rtx start, bool *found)
16056 {
16057 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
16058 rtx prev = start;
16059 rtx next = NULL;
16060 enum attr_type insn_type;
16061
16062 *found = false;
16063
16064 while (prev
16065 && prev != insn
16066 && distance < LEA_SEARCH_THRESHOLD)
16067 {
16068 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
16069 {
16070 distance = increase_distance (prev, next, distance);
16071 if (insn_defines_reg (regno1, regno2, prev))
16072 {
16073 insn_type = get_attr_type (prev);
16074 if (insn_type != TYPE_LEA)
16075 {
16076 *found = true;
16077 return distance;
16078 }
16079 }
16080
16081 next = prev;
16082 }
16083 if (prev == BB_HEAD (bb))
16084 break;
16085
16086 prev = PREV_INSN (prev);
16087 }
16088
16089 return distance;
16090 }
16091
16092 /* Search backward for non-agu definition of register number REGNO1
16093 or register number REGNO2 in INSN's basic block until
16094 1. Pass LEA_SEARCH_THRESHOLD instructions, or
16095 2. Reach neighbour BBs boundary, or
16096 3. Reach agu definition.
16097 Returns the distance between the non-agu definition point and INSN.
16098 If no definition point, returns -1. */
16099
16100 static int
16101 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
16102 rtx insn)
16103 {
16104 basic_block bb = BLOCK_FOR_INSN (insn);
16105 int distance = 0;
16106 bool found = false;
16107
16108 if (insn != BB_HEAD (bb))
16109 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
16110 distance, PREV_INSN (insn),
16111 &found);
16112
16113 if (!found && distance < LEA_SEARCH_THRESHOLD)
16114 {
16115 edge e;
16116 edge_iterator ei;
16117 bool simple_loop = false;
16118
16119 FOR_EACH_EDGE (e, ei, bb->preds)
16120 if (e->src == bb)
16121 {
16122 simple_loop = true;
16123 break;
16124 }
16125
16126 if (simple_loop)
16127 distance = distance_non_agu_define_in_bb (regno1, regno2,
16128 insn, distance,
16129 BB_END (bb), &found);
16130 else
16131 {
16132 int shortest_dist = -1;
16133 bool found_in_bb = false;
16134
16135 FOR_EACH_EDGE (e, ei, bb->preds)
16136 {
16137 int bb_dist = distance_non_agu_define_in_bb (regno1, regno2,
16138 insn, distance,
16139 BB_END (e->src),
16140 &found_in_bb);
16141 if (found_in_bb)
16142 {
16143 if (shortest_dist < 0)
16144 shortest_dist = bb_dist;
16145 else if (bb_dist > 0)
16146 shortest_dist = MIN (bb_dist, shortest_dist);
16147 }
16148
16149 found = found || found_in_bb;
16150 }
16151
16152 distance = shortest_dist;
16153 }
16154 }
16155
16156 /* get_attr_type may modify recog data. We want to make sure
16157 that recog data is valid for instruction INSN, on which
16158 distance_non_agu_define is called. INSN is unchanged here. */
16159 extract_insn_cached (insn);
16160
16161 if (!found)
16162 distance = -1;
16163 else
16164 distance = distance >> 1;
16165
16166 return distance;
16167 }
16168
16169 /* Return the distance in half-cycles between INSN and the next
16170 insn that uses register number REGNO in memory address added
16171 to DISTANCE. Return -1 if REGNO0 is set.
16172
16173 Put true value into *FOUND if register usage was found and
16174 false otherwise.
16175 Put true value into *REDEFINED if register redefinition was
16176 found and false otherwise. */
16177
16178 static int
16179 distance_agu_use_in_bb(unsigned int regno,
16180 rtx insn, int distance, rtx start,
16181 bool *found, bool *redefined)
16182 {
16183 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
16184 rtx next = start;
16185 rtx prev = NULL;
16186
16187 *found = false;
16188 *redefined = false;
16189
16190 while (next
16191 && next != insn
16192 && distance < LEA_SEARCH_THRESHOLD)
16193 {
16194 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
16195 {
16196 distance = increase_distance(prev, next, distance);
16197 if (insn_uses_reg_mem (regno, next))
16198 {
16199 /* Return DISTANCE if OP0 is used in memory
16200 address in NEXT. */
16201 *found = true;
16202 return distance;
16203 }
16204
16205 if (insn_defines_reg (regno, INVALID_REGNUM, next))
16206 {
16207 /* Return -1 if OP0 is set in NEXT. */
16208 *redefined = true;
16209 return -1;
16210 }
16211
16212 prev = next;
16213 }
16214
16215 if (next == BB_END (bb))
16216 break;
16217
16218 next = NEXT_INSN (next);
16219 }
16220
16221 return distance;
16222 }
16223
16224 /* Return the distance between INSN and the next insn that uses
16225 register number REGNO0 in memory address. Return -1 if no such
16226 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
16227
16228 static int
16229 distance_agu_use (unsigned int regno0, rtx insn)
16230 {
16231 basic_block bb = BLOCK_FOR_INSN (insn);
16232 int distance = 0;
16233 bool found = false;
16234 bool redefined = false;
16235
16236 if (insn != BB_END (bb))
16237 distance = distance_agu_use_in_bb (regno0, insn, distance,
16238 NEXT_INSN (insn),
16239 &found, &redefined);
16240
16241 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
16242 {
16243 edge e;
16244 edge_iterator ei;
16245 bool simple_loop = false;
16246
16247 FOR_EACH_EDGE (e, ei, bb->succs)
16248 if (e->dest == bb)
16249 {
16250 simple_loop = true;
16251 break;
16252 }
16253
16254 if (simple_loop)
16255 distance = distance_agu_use_in_bb (regno0, insn,
16256 distance, BB_HEAD (bb),
16257 &found, &redefined);
16258 else
16259 {
16260 int shortest_dist = -1;
16261 bool found_in_bb = false;
16262 bool redefined_in_bb = false;
16263
16264 FOR_EACH_EDGE (e, ei, bb->succs)
16265 {
16266 int bb_dist = distance_agu_use_in_bb (regno0, insn,
16267 distance, BB_HEAD (e->dest),
16268 &found_in_bb, &redefined_in_bb);
16269 if (found_in_bb)
16270 {
16271 if (shortest_dist < 0)
16272 shortest_dist = bb_dist;
16273 else if (bb_dist > 0)
16274 shortest_dist = MIN (bb_dist, shortest_dist);
16275 }
16276
16277 found = found || found_in_bb;
16278 }
16279
16280 distance = shortest_dist;
16281 }
16282 }
16283
16284 if (!found || redefined)
16285 distance = -1;
16286 else
16287 distance = distance >> 1;
16288
16289 return distance;
16290 }
16291
16292 /* Define this macro to tune LEA priority vs ADD, it take effect when
16293 there is a dilemma of choicing LEA or ADD
16294 Negative value: ADD is more preferred than LEA
16295 Zero: Netrual
16296 Positive value: LEA is more preferred than ADD*/
16297 #define IX86_LEA_PRIORITY 0
16298
16299 /* Return true if usage of lea INSN has performance advantage
16300 over a sequence of instructions. Instructions sequence has
16301 SPLIT_COST cycles higher latency than lea latency. */
16302
16303 bool
16304 ix86_lea_outperforms (rtx insn, unsigned int regno0, unsigned int regno1,
16305 unsigned int regno2, unsigned int split_cost)
16306 {
16307 int dist_define, dist_use;
16308
16309 dist_define = distance_non_agu_define (regno1, regno2, insn);
16310 dist_use = distance_agu_use (regno0, insn);
16311
16312 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
16313 {
16314 /* If there is no non AGU operand definition, no AGU
16315 operand usage and split cost is 0 then both lea
16316 and non lea variants have same priority. Currently
16317 we prefer lea for 64 bit code and non lea on 32 bit
16318 code. */
16319 if (dist_use < 0 && split_cost == 0)
16320 return TARGET_64BIT || IX86_LEA_PRIORITY;
16321 else
16322 return true;
16323 }
16324
16325 /* With longer definitions distance lea is more preferable.
16326 Here we change it to take into account splitting cost and
16327 lea priority. */
16328 dist_define += split_cost + IX86_LEA_PRIORITY;
16329
16330 /* If there is no use in memory addess then we just check
16331 that split cost does not exceed AGU stall. */
16332 if (dist_use < 0)
16333 return dist_define >= LEA_MAX_STALL;
16334
16335 /* If this insn has both backward non-agu dependence and forward
16336 agu dependence, the one with short distance takes effect. */
16337 return dist_define >= dist_use;
16338 }
16339
16340 /* Return true if it is legal to clobber flags by INSN and
16341 false otherwise. */
16342
16343 static bool
16344 ix86_ok_to_clobber_flags(rtx insn)
16345 {
16346 basic_block bb = BLOCK_FOR_INSN (insn);
16347 df_ref *use;
16348 bitmap live;
16349
16350 while (insn)
16351 {
16352 if (NONDEBUG_INSN_P (insn))
16353 {
16354 for (use = DF_INSN_USES (insn); *use; use++)
16355 if (DF_REF_REG_USE_P (*use) && DF_REF_REGNO (*use) == FLAGS_REG)
16356 return false;
16357
16358 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
16359 return true;
16360 }
16361
16362 if (insn == BB_END (bb))
16363 break;
16364
16365 insn = NEXT_INSN (insn);
16366 }
16367
16368 live = df_get_live_out(bb);
16369 return !REGNO_REG_SET_P (live, FLAGS_REG);
16370 }
16371
16372 /* Return true if we need to split op0 = op1 + op2 into a sequence of
16373 move and add to avoid AGU stalls. */
16374
16375 bool
16376 ix86_avoid_lea_for_add (rtx insn, rtx operands[])
16377 {
16378 unsigned int regno0 = true_regnum (operands[0]);
16379 unsigned int regno1 = true_regnum (operands[1]);
16380 unsigned int regno2 = true_regnum (operands[2]);
16381
16382 /* Check if we need to optimize. */
16383 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
16384 return false;
16385
16386 /* Check it is correct to split here. */
16387 if (!ix86_ok_to_clobber_flags(insn))
16388 return false;
16389
16390 /* We need to split only adds with non destructive
16391 destination operand. */
16392 if (regno0 == regno1 || regno0 == regno2)
16393 return false;
16394 else
16395 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1);
16396 }
16397
16398 /* Return true if we need to split lea into a sequence of
16399 instructions to avoid AGU stalls. */
16400
16401 bool
16402 ix86_avoid_lea_for_addr (rtx insn, rtx operands[])
16403 {
16404 unsigned int regno0 = true_regnum (operands[0]) ;
16405 unsigned int regno1 = -1;
16406 unsigned int regno2 = -1;
16407 unsigned int split_cost = 0;
16408 struct ix86_address parts;
16409 int ok;
16410
16411 /* Check we need to optimize. */
16412 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
16413 return false;
16414
16415 /* Check it is correct to split here. */
16416 if (!ix86_ok_to_clobber_flags(insn))
16417 return false;
16418
16419 ok = ix86_decompose_address (operands[1], &parts);
16420 gcc_assert (ok);
16421
16422 /* We should not split into add if non legitimate pic
16423 operand is used as displacement. */
16424 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
16425 return false;
16426
16427 if (parts.base)
16428 regno1 = true_regnum (parts.base);
16429 if (parts.index)
16430 regno2 = true_regnum (parts.index);
16431
16432 /* Compute how many cycles we will add to execution time
16433 if split lea into a sequence of instructions. */
16434 if (parts.base || parts.index)
16435 {
16436 /* Have to use mov instruction if non desctructive
16437 destination form is used. */
16438 if (regno1 != regno0 && regno2 != regno0)
16439 split_cost += 1;
16440
16441 /* Have to add index to base if both exist. */
16442 if (parts.base && parts.index)
16443 split_cost += 1;
16444
16445 /* Have to use shift and adds if scale is 2 or greater. */
16446 if (parts.scale > 1)
16447 {
16448 if (regno0 != regno1)
16449 split_cost += 1;
16450 else if (regno2 == regno0)
16451 split_cost += 4;
16452 else
16453 split_cost += parts.scale;
16454 }
16455
16456 /* Have to use add instruction with immediate if
16457 disp is non zero. */
16458 if (parts.disp && parts.disp != const0_rtx)
16459 split_cost += 1;
16460
16461 /* Subtract the price of lea. */
16462 split_cost -= 1;
16463 }
16464
16465 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost);
16466 }
16467
16468 /* Split lea instructions into a sequence of instructions
16469 which are executed on ALU to avoid AGU stalls.
16470 It is assumed that it is allowed to clobber flags register
16471 at lea position. */
16472
16473 extern void
16474 ix86_split_lea_for_addr (rtx operands[], enum machine_mode mode)
16475 {
16476 unsigned int regno0 = true_regnum (operands[0]) ;
16477 unsigned int regno1 = INVALID_REGNUM;
16478 unsigned int regno2 = INVALID_REGNUM;
16479 struct ix86_address parts;
16480 rtx tmp, clob;
16481 rtvec par;
16482 int ok, adds;
16483
16484 ok = ix86_decompose_address (operands[1], &parts);
16485 gcc_assert (ok);
16486
16487 if (parts.base)
16488 {
16489 if (GET_MODE (parts.base) != mode)
16490 parts.base = gen_rtx_SUBREG (mode, parts.base, 0);
16491 regno1 = true_regnum (parts.base);
16492 }
16493
16494 if (parts.index)
16495 {
16496 if (GET_MODE (parts.index) != mode)
16497 parts.index = gen_rtx_SUBREG (mode, parts.index, 0);
16498 regno2 = true_regnum (parts.index);
16499 }
16500
16501 if (parts.scale > 1)
16502 {
16503 /* Case r1 = r1 + ... */
16504 if (regno1 == regno0)
16505 {
16506 /* If we have a case r1 = r1 + C * r1 then we
16507 should use multiplication which is very
16508 expensive. Assume cost model is wrong if we
16509 have such case here. */
16510 gcc_assert (regno2 != regno0);
16511
16512 for (adds = parts.scale; adds > 0; adds--)
16513 {
16514 tmp = gen_rtx_PLUS (mode, operands[0], parts.index);
16515 tmp = gen_rtx_SET (VOIDmode, operands[0], tmp);
16516 clob = gen_rtx_CLOBBER (VOIDmode,
16517 gen_rtx_REG (CCmode, FLAGS_REG));
16518 par = gen_rtvec (2, tmp, clob);
16519 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
16520 }
16521 }
16522 else
16523 {
16524 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
16525 if (regno0 != regno2)
16526 emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.index));
16527
16528 /* Use shift for scaling. */
16529 tmp = gen_rtx_ASHIFT (mode, operands[0],
16530 GEN_INT (exact_log2 (parts.scale)));
16531 tmp = gen_rtx_SET (VOIDmode, operands[0], tmp);
16532 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16533 par = gen_rtvec (2, tmp, clob);
16534 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
16535
16536 if (parts.base)
16537 {
16538 tmp = gen_rtx_PLUS (mode, operands[0], parts.base);
16539 tmp = gen_rtx_SET (VOIDmode, operands[0], tmp);
16540 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16541 par = gen_rtvec (2, tmp, clob);
16542 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
16543 }
16544
16545 if (parts.disp && parts.disp != const0_rtx)
16546 {
16547 tmp = gen_rtx_PLUS (mode, operands[0], parts.disp);
16548 tmp = gen_rtx_SET (VOIDmode, operands[0], tmp);
16549 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16550 par = gen_rtvec (2, tmp, clob);
16551 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
16552 }
16553 }
16554 }
16555 else if (!parts.base && !parts.index)
16556 {
16557 gcc_assert(parts.disp);
16558 emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.disp));
16559 }
16560 else
16561 {
16562 if (!parts.base)
16563 {
16564 if (regno0 != regno2)
16565 emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.index));
16566 }
16567 else if (!parts.index)
16568 {
16569 if (regno0 != regno1)
16570 emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.base));
16571 }
16572 else
16573 {
16574 if (regno0 == regno1)
16575 tmp = gen_rtx_PLUS (mode, operands[0], parts.index);
16576 else if (regno0 == regno2)
16577 tmp = gen_rtx_PLUS (mode, operands[0], parts.base);
16578 else
16579 {
16580 emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.base));
16581 tmp = gen_rtx_PLUS (mode, operands[0], parts.index);
16582 }
16583
16584 tmp = gen_rtx_SET (VOIDmode, operands[0], tmp);
16585 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16586 par = gen_rtvec (2, tmp, clob);
16587 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
16588 }
16589
16590 if (parts.disp && parts.disp != const0_rtx)
16591 {
16592 tmp = gen_rtx_PLUS (mode, operands[0], parts.disp);
16593 tmp = gen_rtx_SET (VOIDmode, operands[0], tmp);
16594 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16595 par = gen_rtvec (2, tmp, clob);
16596 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
16597 }
16598 }
16599 }
16600
16601 /* Return true if it is ok to optimize an ADD operation to LEA
16602 operation to avoid flag register consumation. For most processors,
16603 ADD is faster than LEA. For the processors like ATOM, if the
16604 destination register of LEA holds an actual address which will be
16605 used soon, LEA is better and otherwise ADD is better. */
16606
16607 bool
16608 ix86_lea_for_add_ok (rtx insn, rtx operands[])
16609 {
16610 unsigned int regno0 = true_regnum (operands[0]);
16611 unsigned int regno1 = true_regnum (operands[1]);
16612 unsigned int regno2 = true_regnum (operands[2]);
16613
16614 /* If a = b + c, (a!=b && a!=c), must use lea form. */
16615 if (regno0 != regno1 && regno0 != regno2)
16616 return true;
16617
16618 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
16619 return false;
16620
16621 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0);
16622 }
16623
16624 /* Return true if destination reg of SET_BODY is shift count of
16625 USE_BODY. */
16626
16627 static bool
16628 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
16629 {
16630 rtx set_dest;
16631 rtx shift_rtx;
16632 int i;
16633
16634 /* Retrieve destination of SET_BODY. */
16635 switch (GET_CODE (set_body))
16636 {
16637 case SET:
16638 set_dest = SET_DEST (set_body);
16639 if (!set_dest || !REG_P (set_dest))
16640 return false;
16641 break;
16642 case PARALLEL:
16643 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
16644 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
16645 use_body))
16646 return true;
16647 default:
16648 return false;
16649 break;
16650 }
16651
16652 /* Retrieve shift count of USE_BODY. */
16653 switch (GET_CODE (use_body))
16654 {
16655 case SET:
16656 shift_rtx = XEXP (use_body, 1);
16657 break;
16658 case PARALLEL:
16659 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
16660 if (ix86_dep_by_shift_count_body (set_body,
16661 XVECEXP (use_body, 0, i)))
16662 return true;
16663 default:
16664 return false;
16665 break;
16666 }
16667
16668 if (shift_rtx
16669 && (GET_CODE (shift_rtx) == ASHIFT
16670 || GET_CODE (shift_rtx) == LSHIFTRT
16671 || GET_CODE (shift_rtx) == ASHIFTRT
16672 || GET_CODE (shift_rtx) == ROTATE
16673 || GET_CODE (shift_rtx) == ROTATERT))
16674 {
16675 rtx shift_count = XEXP (shift_rtx, 1);
16676
16677 /* Return true if shift count is dest of SET_BODY. */
16678 if (REG_P (shift_count)
16679 && true_regnum (set_dest) == true_regnum (shift_count))
16680 return true;
16681 }
16682
16683 return false;
16684 }
16685
16686 /* Return true if destination reg of SET_INSN is shift count of
16687 USE_INSN. */
16688
16689 bool
16690 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
16691 {
16692 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
16693 PATTERN (use_insn));
16694 }
16695
16696 /* Return TRUE or FALSE depending on whether the unary operator meets the
16697 appropriate constraints. */
16698
16699 bool
16700 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
16701 enum machine_mode mode ATTRIBUTE_UNUSED,
16702 rtx operands[2] ATTRIBUTE_UNUSED)
16703 {
16704 /* If one of operands is memory, source and destination must match. */
16705 if ((MEM_P (operands[0])
16706 || MEM_P (operands[1]))
16707 && ! rtx_equal_p (operands[0], operands[1]))
16708 return false;
16709 return true;
16710 }
16711
16712 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
16713 are ok, keeping in mind the possible movddup alternative. */
16714
16715 bool
16716 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
16717 {
16718 if (MEM_P (operands[0]))
16719 return rtx_equal_p (operands[0], operands[1 + high]);
16720 if (MEM_P (operands[1]) && MEM_P (operands[2]))
16721 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
16722 return true;
16723 }
16724
16725 /* Post-reload splitter for converting an SF or DFmode value in an
16726 SSE register into an unsigned SImode. */
16727
16728 void
16729 ix86_split_convert_uns_si_sse (rtx operands[])
16730 {
16731 enum machine_mode vecmode;
16732 rtx value, large, zero_or_two31, input, two31, x;
16733
16734 large = operands[1];
16735 zero_or_two31 = operands[2];
16736 input = operands[3];
16737 two31 = operands[4];
16738 vecmode = GET_MODE (large);
16739 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
16740
16741 /* Load up the value into the low element. We must ensure that the other
16742 elements are valid floats -- zero is the easiest such value. */
16743 if (MEM_P (input))
16744 {
16745 if (vecmode == V4SFmode)
16746 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
16747 else
16748 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
16749 }
16750 else
16751 {
16752 input = gen_rtx_REG (vecmode, REGNO (input));
16753 emit_move_insn (value, CONST0_RTX (vecmode));
16754 if (vecmode == V4SFmode)
16755 emit_insn (gen_sse_movss (value, value, input));
16756 else
16757 emit_insn (gen_sse2_movsd (value, value, input));
16758 }
16759
16760 emit_move_insn (large, two31);
16761 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
16762
16763 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
16764 emit_insn (gen_rtx_SET (VOIDmode, large, x));
16765
16766 x = gen_rtx_AND (vecmode, zero_or_two31, large);
16767 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
16768
16769 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
16770 emit_insn (gen_rtx_SET (VOIDmode, value, x));
16771
16772 large = gen_rtx_REG (V4SImode, REGNO (large));
16773 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
16774
16775 x = gen_rtx_REG (V4SImode, REGNO (value));
16776 if (vecmode == V4SFmode)
16777 emit_insn (gen_sse2_cvttps2dq (x, value));
16778 else
16779 emit_insn (gen_sse2_cvttpd2dq (x, value));
16780 value = x;
16781
16782 emit_insn (gen_xorv4si3 (value, value, large));
16783 }
16784
16785 /* Convert an unsigned DImode value into a DFmode, using only SSE.
16786 Expects the 64-bit DImode to be supplied in a pair of integral
16787 registers. Requires SSE2; will use SSE3 if available. For x86_32,
16788 -mfpmath=sse, !optimize_size only. */
16789
16790 void
16791 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
16792 {
16793 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
16794 rtx int_xmm, fp_xmm;
16795 rtx biases, exponents;
16796 rtx x;
16797
16798 int_xmm = gen_reg_rtx (V4SImode);
16799 if (TARGET_INTER_UNIT_MOVES)
16800 emit_insn (gen_movdi_to_sse (int_xmm, input));
16801 else if (TARGET_SSE_SPLIT_REGS)
16802 {
16803 emit_clobber (int_xmm);
16804 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
16805 }
16806 else
16807 {
16808 x = gen_reg_rtx (V2DImode);
16809 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
16810 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
16811 }
16812
16813 x = gen_rtx_CONST_VECTOR (V4SImode,
16814 gen_rtvec (4, GEN_INT (0x43300000UL),
16815 GEN_INT (0x45300000UL),
16816 const0_rtx, const0_rtx));
16817 exponents = validize_mem (force_const_mem (V4SImode, x));
16818
16819 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
16820 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
16821
16822 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
16823 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
16824 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
16825 (0x1.0p84 + double(fp_value_hi_xmm)).
16826 Note these exponents differ by 32. */
16827
16828 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
16829
16830 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
16831 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
16832 real_ldexp (&bias_lo_rvt, &dconst1, 52);
16833 real_ldexp (&bias_hi_rvt, &dconst1, 84);
16834 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
16835 x = const_double_from_real_value (bias_hi_rvt, DFmode);
16836 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
16837 biases = validize_mem (force_const_mem (V2DFmode, biases));
16838 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
16839
16840 /* Add the upper and lower DFmode values together. */
16841 if (TARGET_SSE3)
16842 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
16843 else
16844 {
16845 x = copy_to_mode_reg (V2DFmode, fp_xmm);
16846 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
16847 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
16848 }
16849
16850 ix86_expand_vector_extract (false, target, fp_xmm, 0);
16851 }
16852
16853 /* Not used, but eases macroization of patterns. */
16854 void
16855 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
16856 rtx input ATTRIBUTE_UNUSED)
16857 {
16858 gcc_unreachable ();
16859 }
16860
16861 /* Convert an unsigned SImode value into a DFmode. Only currently used
16862 for SSE, but applicable anywhere. */
16863
16864 void
16865 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
16866 {
16867 REAL_VALUE_TYPE TWO31r;
16868 rtx x, fp;
16869
16870 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
16871 NULL, 1, OPTAB_DIRECT);
16872
16873 fp = gen_reg_rtx (DFmode);
16874 emit_insn (gen_floatsidf2 (fp, x));
16875
16876 real_ldexp (&TWO31r, &dconst1, 31);
16877 x = const_double_from_real_value (TWO31r, DFmode);
16878
16879 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
16880 if (x != target)
16881 emit_move_insn (target, x);
16882 }
16883
16884 /* Convert a signed DImode value into a DFmode. Only used for SSE in
16885 32-bit mode; otherwise we have a direct convert instruction. */
16886
16887 void
16888 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
16889 {
16890 REAL_VALUE_TYPE TWO32r;
16891 rtx fp_lo, fp_hi, x;
16892
16893 fp_lo = gen_reg_rtx (DFmode);
16894 fp_hi = gen_reg_rtx (DFmode);
16895
16896 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
16897
16898 real_ldexp (&TWO32r, &dconst1, 32);
16899 x = const_double_from_real_value (TWO32r, DFmode);
16900 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
16901
16902 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
16903
16904 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
16905 0, OPTAB_DIRECT);
16906 if (x != target)
16907 emit_move_insn (target, x);
16908 }
16909
16910 /* Convert an unsigned SImode value into a SFmode, using only SSE.
16911 For x86_32, -mfpmath=sse, !optimize_size only. */
16912 void
16913 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
16914 {
16915 REAL_VALUE_TYPE ONE16r;
16916 rtx fp_hi, fp_lo, int_hi, int_lo, x;
16917
16918 real_ldexp (&ONE16r, &dconst1, 16);
16919 x = const_double_from_real_value (ONE16r, SFmode);
16920 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
16921 NULL, 0, OPTAB_DIRECT);
16922 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
16923 NULL, 0, OPTAB_DIRECT);
16924 fp_hi = gen_reg_rtx (SFmode);
16925 fp_lo = gen_reg_rtx (SFmode);
16926 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
16927 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
16928 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
16929 0, OPTAB_DIRECT);
16930 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
16931 0, OPTAB_DIRECT);
16932 if (!rtx_equal_p (target, fp_hi))
16933 emit_move_insn (target, fp_hi);
16934 }
16935
16936 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
16937 then replicate the value for all elements of the vector
16938 register. */
16939
16940 rtx
16941 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
16942 {
16943 int i, n_elt;
16944 rtvec v;
16945 enum machine_mode scalar_mode;
16946
16947 switch (mode)
16948 {
16949 case V8SImode:
16950 case V4SImode:
16951 case V4DImode:
16952 case V2DImode:
16953 gcc_assert (vect);
16954 case V8SFmode:
16955 case V4SFmode:
16956 case V4DFmode:
16957 case V2DFmode:
16958 n_elt = GET_MODE_NUNITS (mode);
16959 v = rtvec_alloc (n_elt);
16960 scalar_mode = GET_MODE_INNER (mode);
16961
16962 RTVEC_ELT (v, 0) = value;
16963
16964 for (i = 1; i < n_elt; ++i)
16965 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
16966
16967 return gen_rtx_CONST_VECTOR (mode, v);
16968
16969 default:
16970 gcc_unreachable ();
16971 }
16972 }
16973
16974 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
16975 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
16976 for an SSE register. If VECT is true, then replicate the mask for
16977 all elements of the vector register. If INVERT is true, then create
16978 a mask excluding the sign bit. */
16979
16980 rtx
16981 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
16982 {
16983 enum machine_mode vec_mode, imode;
16984 HOST_WIDE_INT hi, lo;
16985 int shift = 63;
16986 rtx v;
16987 rtx mask;
16988
16989 /* Find the sign bit, sign extended to 2*HWI. */
16990 switch (mode)
16991 {
16992 case V8SImode:
16993 case V4SImode:
16994 case V8SFmode:
16995 case V4SFmode:
16996 vec_mode = mode;
16997 mode = GET_MODE_INNER (mode);
16998 imode = SImode;
16999 lo = 0x80000000, hi = lo < 0;
17000 break;
17001
17002 case V4DImode:
17003 case V2DImode:
17004 case V4DFmode:
17005 case V2DFmode:
17006 vec_mode = mode;
17007 mode = GET_MODE_INNER (mode);
17008 imode = DImode;
17009 if (HOST_BITS_PER_WIDE_INT >= 64)
17010 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
17011 else
17012 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
17013 break;
17014
17015 case TImode:
17016 case TFmode:
17017 vec_mode = VOIDmode;
17018 if (HOST_BITS_PER_WIDE_INT >= 64)
17019 {
17020 imode = TImode;
17021 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
17022 }
17023 else
17024 {
17025 rtvec vec;
17026
17027 imode = DImode;
17028 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
17029
17030 if (invert)
17031 {
17032 lo = ~lo, hi = ~hi;
17033 v = constm1_rtx;
17034 }
17035 else
17036 v = const0_rtx;
17037
17038 mask = immed_double_const (lo, hi, imode);
17039
17040 vec = gen_rtvec (2, v, mask);
17041 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
17042 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
17043
17044 return v;
17045 }
17046 break;
17047
17048 default:
17049 gcc_unreachable ();
17050 }
17051
17052 if (invert)
17053 lo = ~lo, hi = ~hi;
17054
17055 /* Force this value into the low part of a fp vector constant. */
17056 mask = immed_double_const (lo, hi, imode);
17057 mask = gen_lowpart (mode, mask);
17058
17059 if (vec_mode == VOIDmode)
17060 return force_reg (mode, mask);
17061
17062 v = ix86_build_const_vector (vec_mode, vect, mask);
17063 return force_reg (vec_mode, v);
17064 }
17065
17066 /* Generate code for floating point ABS or NEG. */
17067
17068 void
17069 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
17070 rtx operands[])
17071 {
17072 rtx mask, set, dst, src;
17073 bool use_sse = false;
17074 bool vector_mode = VECTOR_MODE_P (mode);
17075 enum machine_mode vmode = mode;
17076
17077 if (vector_mode)
17078 use_sse = true;
17079 else if (mode == TFmode)
17080 use_sse = true;
17081 else if (TARGET_SSE_MATH)
17082 {
17083 use_sse = SSE_FLOAT_MODE_P (mode);
17084 if (mode == SFmode)
17085 vmode = V4SFmode;
17086 else if (mode == DFmode)
17087 vmode = V2DFmode;
17088 }
17089
17090 /* NEG and ABS performed with SSE use bitwise mask operations.
17091 Create the appropriate mask now. */
17092 if (use_sse)
17093 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
17094 else
17095 mask = NULL_RTX;
17096
17097 dst = operands[0];
17098 src = operands[1];
17099
17100 set = gen_rtx_fmt_e (code, mode, src);
17101 set = gen_rtx_SET (VOIDmode, dst, set);
17102
17103 if (mask)
17104 {
17105 rtx use, clob;
17106 rtvec par;
17107
17108 use = gen_rtx_USE (VOIDmode, mask);
17109 if (vector_mode)
17110 par = gen_rtvec (2, set, use);
17111 else
17112 {
17113 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17114 par = gen_rtvec (3, set, use, clob);
17115 }
17116 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
17117 }
17118 else
17119 emit_insn (set);
17120 }
17121
17122 /* Expand a copysign operation. Special case operand 0 being a constant. */
17123
17124 void
17125 ix86_expand_copysign (rtx operands[])
17126 {
17127 enum machine_mode mode, vmode;
17128 rtx dest, op0, op1, mask, nmask;
17129
17130 dest = operands[0];
17131 op0 = operands[1];
17132 op1 = operands[2];
17133
17134 mode = GET_MODE (dest);
17135
17136 if (mode == SFmode)
17137 vmode = V4SFmode;
17138 else if (mode == DFmode)
17139 vmode = V2DFmode;
17140 else
17141 vmode = mode;
17142
17143 if (GET_CODE (op0) == CONST_DOUBLE)
17144 {
17145 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
17146
17147 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
17148 op0 = simplify_unary_operation (ABS, mode, op0, mode);
17149
17150 if (mode == SFmode || mode == DFmode)
17151 {
17152 if (op0 == CONST0_RTX (mode))
17153 op0 = CONST0_RTX (vmode);
17154 else
17155 {
17156 rtx v = ix86_build_const_vector (vmode, false, op0);
17157
17158 op0 = force_reg (vmode, v);
17159 }
17160 }
17161 else if (op0 != CONST0_RTX (mode))
17162 op0 = force_reg (mode, op0);
17163
17164 mask = ix86_build_signbit_mask (vmode, 0, 0);
17165
17166 if (mode == SFmode)
17167 copysign_insn = gen_copysignsf3_const;
17168 else if (mode == DFmode)
17169 copysign_insn = gen_copysigndf3_const;
17170 else
17171 copysign_insn = gen_copysigntf3_const;
17172
17173 emit_insn (copysign_insn (dest, op0, op1, mask));
17174 }
17175 else
17176 {
17177 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
17178
17179 nmask = ix86_build_signbit_mask (vmode, 0, 1);
17180 mask = ix86_build_signbit_mask (vmode, 0, 0);
17181
17182 if (mode == SFmode)
17183 copysign_insn = gen_copysignsf3_var;
17184 else if (mode == DFmode)
17185 copysign_insn = gen_copysigndf3_var;
17186 else
17187 copysign_insn = gen_copysigntf3_var;
17188
17189 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
17190 }
17191 }
17192
17193 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
17194 be a constant, and so has already been expanded into a vector constant. */
17195
17196 void
17197 ix86_split_copysign_const (rtx operands[])
17198 {
17199 enum machine_mode mode, vmode;
17200 rtx dest, op0, mask, x;
17201
17202 dest = operands[0];
17203 op0 = operands[1];
17204 mask = operands[3];
17205
17206 mode = GET_MODE (dest);
17207 vmode = GET_MODE (mask);
17208
17209 dest = simplify_gen_subreg (vmode, dest, mode, 0);
17210 x = gen_rtx_AND (vmode, dest, mask);
17211 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17212
17213 if (op0 != CONST0_RTX (vmode))
17214 {
17215 x = gen_rtx_IOR (vmode, dest, op0);
17216 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17217 }
17218 }
17219
17220 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
17221 so we have to do two masks. */
17222
17223 void
17224 ix86_split_copysign_var (rtx operands[])
17225 {
17226 enum machine_mode mode, vmode;
17227 rtx dest, scratch, op0, op1, mask, nmask, x;
17228
17229 dest = operands[0];
17230 scratch = operands[1];
17231 op0 = operands[2];
17232 op1 = operands[3];
17233 nmask = operands[4];
17234 mask = operands[5];
17235
17236 mode = GET_MODE (dest);
17237 vmode = GET_MODE (mask);
17238
17239 if (rtx_equal_p (op0, op1))
17240 {
17241 /* Shouldn't happen often (it's useless, obviously), but when it does
17242 we'd generate incorrect code if we continue below. */
17243 emit_move_insn (dest, op0);
17244 return;
17245 }
17246
17247 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
17248 {
17249 gcc_assert (REGNO (op1) == REGNO (scratch));
17250
17251 x = gen_rtx_AND (vmode, scratch, mask);
17252 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
17253
17254 dest = mask;
17255 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
17256 x = gen_rtx_NOT (vmode, dest);
17257 x = gen_rtx_AND (vmode, x, op0);
17258 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17259 }
17260 else
17261 {
17262 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
17263 {
17264 x = gen_rtx_AND (vmode, scratch, mask);
17265 }
17266 else /* alternative 2,4 */
17267 {
17268 gcc_assert (REGNO (mask) == REGNO (scratch));
17269 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
17270 x = gen_rtx_AND (vmode, scratch, op1);
17271 }
17272 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
17273
17274 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
17275 {
17276 dest = simplify_gen_subreg (vmode, op0, mode, 0);
17277 x = gen_rtx_AND (vmode, dest, nmask);
17278 }
17279 else /* alternative 3,4 */
17280 {
17281 gcc_assert (REGNO (nmask) == REGNO (dest));
17282 dest = nmask;
17283 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
17284 x = gen_rtx_AND (vmode, dest, op0);
17285 }
17286 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17287 }
17288
17289 x = gen_rtx_IOR (vmode, dest, scratch);
17290 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17291 }
17292
17293 /* Return TRUE or FALSE depending on whether the first SET in INSN
17294 has source and destination with matching CC modes, and that the
17295 CC mode is at least as constrained as REQ_MODE. */
17296
17297 bool
17298 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
17299 {
17300 rtx set;
17301 enum machine_mode set_mode;
17302
17303 set = PATTERN (insn);
17304 if (GET_CODE (set) == PARALLEL)
17305 set = XVECEXP (set, 0, 0);
17306 gcc_assert (GET_CODE (set) == SET);
17307 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
17308
17309 set_mode = GET_MODE (SET_DEST (set));
17310 switch (set_mode)
17311 {
17312 case CCNOmode:
17313 if (req_mode != CCNOmode
17314 && (req_mode != CCmode
17315 || XEXP (SET_SRC (set), 1) != const0_rtx))
17316 return false;
17317 break;
17318 case CCmode:
17319 if (req_mode == CCGCmode)
17320 return false;
17321 /* FALLTHRU */
17322 case CCGCmode:
17323 if (req_mode == CCGOCmode || req_mode == CCNOmode)
17324 return false;
17325 /* FALLTHRU */
17326 case CCGOCmode:
17327 if (req_mode == CCZmode)
17328 return false;
17329 /* FALLTHRU */
17330 case CCZmode:
17331 break;
17332
17333 case CCAmode:
17334 case CCCmode:
17335 case CCOmode:
17336 case CCSmode:
17337 if (set_mode != req_mode)
17338 return false;
17339 break;
17340
17341 default:
17342 gcc_unreachable ();
17343 }
17344
17345 return GET_MODE (SET_SRC (set)) == set_mode;
17346 }
17347
17348 /* Generate insn patterns to do an integer compare of OPERANDS. */
17349
17350 static rtx
17351 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
17352 {
17353 enum machine_mode cmpmode;
17354 rtx tmp, flags;
17355
17356 cmpmode = SELECT_CC_MODE (code, op0, op1);
17357 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
17358
17359 /* This is very simple, but making the interface the same as in the
17360 FP case makes the rest of the code easier. */
17361 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
17362 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
17363
17364 /* Return the test that should be put into the flags user, i.e.
17365 the bcc, scc, or cmov instruction. */
17366 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
17367 }
17368
17369 /* Figure out whether to use ordered or unordered fp comparisons.
17370 Return the appropriate mode to use. */
17371
17372 enum machine_mode
17373 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
17374 {
17375 /* ??? In order to make all comparisons reversible, we do all comparisons
17376 non-trapping when compiling for IEEE. Once gcc is able to distinguish
17377 all forms trapping and nontrapping comparisons, we can make inequality
17378 comparisons trapping again, since it results in better code when using
17379 FCOM based compares. */
17380 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
17381 }
17382
17383 enum machine_mode
17384 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
17385 {
17386 enum machine_mode mode = GET_MODE (op0);
17387
17388 if (SCALAR_FLOAT_MODE_P (mode))
17389 {
17390 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
17391 return ix86_fp_compare_mode (code);
17392 }
17393
17394 switch (code)
17395 {
17396 /* Only zero flag is needed. */
17397 case EQ: /* ZF=0 */
17398 case NE: /* ZF!=0 */
17399 return CCZmode;
17400 /* Codes needing carry flag. */
17401 case GEU: /* CF=0 */
17402 case LTU: /* CF=1 */
17403 /* Detect overflow checks. They need just the carry flag. */
17404 if (GET_CODE (op0) == PLUS
17405 && rtx_equal_p (op1, XEXP (op0, 0)))
17406 return CCCmode;
17407 else
17408 return CCmode;
17409 case GTU: /* CF=0 & ZF=0 */
17410 case LEU: /* CF=1 | ZF=1 */
17411 /* Detect overflow checks. They need just the carry flag. */
17412 if (GET_CODE (op0) == MINUS
17413 && rtx_equal_p (op1, XEXP (op0, 0)))
17414 return CCCmode;
17415 else
17416 return CCmode;
17417 /* Codes possibly doable only with sign flag when
17418 comparing against zero. */
17419 case GE: /* SF=OF or SF=0 */
17420 case LT: /* SF<>OF or SF=1 */
17421 if (op1 == const0_rtx)
17422 return CCGOCmode;
17423 else
17424 /* For other cases Carry flag is not required. */
17425 return CCGCmode;
17426 /* Codes doable only with sign flag when comparing
17427 against zero, but we miss jump instruction for it
17428 so we need to use relational tests against overflow
17429 that thus needs to be zero. */
17430 case GT: /* ZF=0 & SF=OF */
17431 case LE: /* ZF=1 | SF<>OF */
17432 if (op1 == const0_rtx)
17433 return CCNOmode;
17434 else
17435 return CCGCmode;
17436 /* strcmp pattern do (use flags) and combine may ask us for proper
17437 mode. */
17438 case USE:
17439 return CCmode;
17440 default:
17441 gcc_unreachable ();
17442 }
17443 }
17444
17445 /* Return the fixed registers used for condition codes. */
17446
17447 static bool
17448 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
17449 {
17450 *p1 = FLAGS_REG;
17451 *p2 = FPSR_REG;
17452 return true;
17453 }
17454
17455 /* If two condition code modes are compatible, return a condition code
17456 mode which is compatible with both. Otherwise, return
17457 VOIDmode. */
17458
17459 static enum machine_mode
17460 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
17461 {
17462 if (m1 == m2)
17463 return m1;
17464
17465 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
17466 return VOIDmode;
17467
17468 if ((m1 == CCGCmode && m2 == CCGOCmode)
17469 || (m1 == CCGOCmode && m2 == CCGCmode))
17470 return CCGCmode;
17471
17472 switch (m1)
17473 {
17474 default:
17475 gcc_unreachable ();
17476
17477 case CCmode:
17478 case CCGCmode:
17479 case CCGOCmode:
17480 case CCNOmode:
17481 case CCAmode:
17482 case CCCmode:
17483 case CCOmode:
17484 case CCSmode:
17485 case CCZmode:
17486 switch (m2)
17487 {
17488 default:
17489 return VOIDmode;
17490
17491 case CCmode:
17492 case CCGCmode:
17493 case CCGOCmode:
17494 case CCNOmode:
17495 case CCAmode:
17496 case CCCmode:
17497 case CCOmode:
17498 case CCSmode:
17499 case CCZmode:
17500 return CCmode;
17501 }
17502
17503 case CCFPmode:
17504 case CCFPUmode:
17505 /* These are only compatible with themselves, which we already
17506 checked above. */
17507 return VOIDmode;
17508 }
17509 }
17510
17511
17512 /* Return a comparison we can do and that it is equivalent to
17513 swap_condition (code) apart possibly from orderedness.
17514 But, never change orderedness if TARGET_IEEE_FP, returning
17515 UNKNOWN in that case if necessary. */
17516
17517 static enum rtx_code
17518 ix86_fp_swap_condition (enum rtx_code code)
17519 {
17520 switch (code)
17521 {
17522 case GT: /* GTU - CF=0 & ZF=0 */
17523 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
17524 case GE: /* GEU - CF=0 */
17525 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
17526 case UNLT: /* LTU - CF=1 */
17527 return TARGET_IEEE_FP ? UNKNOWN : GT;
17528 case UNLE: /* LEU - CF=1 | ZF=1 */
17529 return TARGET_IEEE_FP ? UNKNOWN : GE;
17530 default:
17531 return swap_condition (code);
17532 }
17533 }
17534
17535 /* Return cost of comparison CODE using the best strategy for performance.
17536 All following functions do use number of instructions as a cost metrics.
17537 In future this should be tweaked to compute bytes for optimize_size and
17538 take into account performance of various instructions on various CPUs. */
17539
17540 static int
17541 ix86_fp_comparison_cost (enum rtx_code code)
17542 {
17543 int arith_cost;
17544
17545 /* The cost of code using bit-twiddling on %ah. */
17546 switch (code)
17547 {
17548 case UNLE:
17549 case UNLT:
17550 case LTGT:
17551 case GT:
17552 case GE:
17553 case UNORDERED:
17554 case ORDERED:
17555 case UNEQ:
17556 arith_cost = 4;
17557 break;
17558 case LT:
17559 case NE:
17560 case EQ:
17561 case UNGE:
17562 arith_cost = TARGET_IEEE_FP ? 5 : 4;
17563 break;
17564 case LE:
17565 case UNGT:
17566 arith_cost = TARGET_IEEE_FP ? 6 : 4;
17567 break;
17568 default:
17569 gcc_unreachable ();
17570 }
17571
17572 switch (ix86_fp_comparison_strategy (code))
17573 {
17574 case IX86_FPCMP_COMI:
17575 return arith_cost > 4 ? 3 : 2;
17576 case IX86_FPCMP_SAHF:
17577 return arith_cost > 4 ? 4 : 3;
17578 default:
17579 return arith_cost;
17580 }
17581 }
17582
17583 /* Return strategy to use for floating-point. We assume that fcomi is always
17584 preferrable where available, since that is also true when looking at size
17585 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
17586
17587 enum ix86_fpcmp_strategy
17588 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED)
17589 {
17590 /* Do fcomi/sahf based test when profitable. */
17591
17592 if (TARGET_CMOVE)
17593 return IX86_FPCMP_COMI;
17594
17595 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_function_for_size_p (cfun)))
17596 return IX86_FPCMP_SAHF;
17597
17598 return IX86_FPCMP_ARITH;
17599 }
17600
17601 /* Swap, force into registers, or otherwise massage the two operands
17602 to a fp comparison. The operands are updated in place; the new
17603 comparison code is returned. */
17604
17605 static enum rtx_code
17606 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
17607 {
17608 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
17609 rtx op0 = *pop0, op1 = *pop1;
17610 enum machine_mode op_mode = GET_MODE (op0);
17611 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
17612
17613 /* All of the unordered compare instructions only work on registers.
17614 The same is true of the fcomi compare instructions. The XFmode
17615 compare instructions require registers except when comparing
17616 against zero or when converting operand 1 from fixed point to
17617 floating point. */
17618
17619 if (!is_sse
17620 && (fpcmp_mode == CCFPUmode
17621 || (op_mode == XFmode
17622 && ! (standard_80387_constant_p (op0) == 1
17623 || standard_80387_constant_p (op1) == 1)
17624 && GET_CODE (op1) != FLOAT)
17625 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
17626 {
17627 op0 = force_reg (op_mode, op0);
17628 op1 = force_reg (op_mode, op1);
17629 }
17630 else
17631 {
17632 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
17633 things around if they appear profitable, otherwise force op0
17634 into a register. */
17635
17636 if (standard_80387_constant_p (op0) == 0
17637 || (MEM_P (op0)
17638 && ! (standard_80387_constant_p (op1) == 0
17639 || MEM_P (op1))))
17640 {
17641 enum rtx_code new_code = ix86_fp_swap_condition (code);
17642 if (new_code != UNKNOWN)
17643 {
17644 rtx tmp;
17645 tmp = op0, op0 = op1, op1 = tmp;
17646 code = new_code;
17647 }
17648 }
17649
17650 if (!REG_P (op0))
17651 op0 = force_reg (op_mode, op0);
17652
17653 if (CONSTANT_P (op1))
17654 {
17655 int tmp = standard_80387_constant_p (op1);
17656 if (tmp == 0)
17657 op1 = validize_mem (force_const_mem (op_mode, op1));
17658 else if (tmp == 1)
17659 {
17660 if (TARGET_CMOVE)
17661 op1 = force_reg (op_mode, op1);
17662 }
17663 else
17664 op1 = force_reg (op_mode, op1);
17665 }
17666 }
17667
17668 /* Try to rearrange the comparison to make it cheaper. */
17669 if (ix86_fp_comparison_cost (code)
17670 > ix86_fp_comparison_cost (swap_condition (code))
17671 && (REG_P (op1) || can_create_pseudo_p ()))
17672 {
17673 rtx tmp;
17674 tmp = op0, op0 = op1, op1 = tmp;
17675 code = swap_condition (code);
17676 if (!REG_P (op0))
17677 op0 = force_reg (op_mode, op0);
17678 }
17679
17680 *pop0 = op0;
17681 *pop1 = op1;
17682 return code;
17683 }
17684
17685 /* Convert comparison codes we use to represent FP comparison to integer
17686 code that will result in proper branch. Return UNKNOWN if no such code
17687 is available. */
17688
17689 enum rtx_code
17690 ix86_fp_compare_code_to_integer (enum rtx_code code)
17691 {
17692 switch (code)
17693 {
17694 case GT:
17695 return GTU;
17696 case GE:
17697 return GEU;
17698 case ORDERED:
17699 case UNORDERED:
17700 return code;
17701 break;
17702 case UNEQ:
17703 return EQ;
17704 break;
17705 case UNLT:
17706 return LTU;
17707 break;
17708 case UNLE:
17709 return LEU;
17710 break;
17711 case LTGT:
17712 return NE;
17713 break;
17714 default:
17715 return UNKNOWN;
17716 }
17717 }
17718
17719 /* Generate insn patterns to do a floating point compare of OPERANDS. */
17720
17721 static rtx
17722 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
17723 {
17724 enum machine_mode fpcmp_mode, intcmp_mode;
17725 rtx tmp, tmp2;
17726
17727 fpcmp_mode = ix86_fp_compare_mode (code);
17728 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
17729
17730 /* Do fcomi/sahf based test when profitable. */
17731 switch (ix86_fp_comparison_strategy (code))
17732 {
17733 case IX86_FPCMP_COMI:
17734 intcmp_mode = fpcmp_mode;
17735 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
17736 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
17737 tmp);
17738 emit_insn (tmp);
17739 break;
17740
17741 case IX86_FPCMP_SAHF:
17742 intcmp_mode = fpcmp_mode;
17743 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
17744 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
17745 tmp);
17746
17747 if (!scratch)
17748 scratch = gen_reg_rtx (HImode);
17749 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
17750 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
17751 break;
17752
17753 case IX86_FPCMP_ARITH:
17754 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
17755 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
17756 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
17757 if (!scratch)
17758 scratch = gen_reg_rtx (HImode);
17759 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
17760
17761 /* In the unordered case, we have to check C2 for NaN's, which
17762 doesn't happen to work out to anything nice combination-wise.
17763 So do some bit twiddling on the value we've got in AH to come
17764 up with an appropriate set of condition codes. */
17765
17766 intcmp_mode = CCNOmode;
17767 switch (code)
17768 {
17769 case GT:
17770 case UNGT:
17771 if (code == GT || !TARGET_IEEE_FP)
17772 {
17773 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
17774 code = EQ;
17775 }
17776 else
17777 {
17778 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17779 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
17780 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
17781 intcmp_mode = CCmode;
17782 code = GEU;
17783 }
17784 break;
17785 case LT:
17786 case UNLT:
17787 if (code == LT && TARGET_IEEE_FP)
17788 {
17789 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17790 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
17791 intcmp_mode = CCmode;
17792 code = EQ;
17793 }
17794 else
17795 {
17796 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
17797 code = NE;
17798 }
17799 break;
17800 case GE:
17801 case UNGE:
17802 if (code == GE || !TARGET_IEEE_FP)
17803 {
17804 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
17805 code = EQ;
17806 }
17807 else
17808 {
17809 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17810 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
17811 code = NE;
17812 }
17813 break;
17814 case LE:
17815 case UNLE:
17816 if (code == LE && TARGET_IEEE_FP)
17817 {
17818 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17819 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
17820 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
17821 intcmp_mode = CCmode;
17822 code = LTU;
17823 }
17824 else
17825 {
17826 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
17827 code = NE;
17828 }
17829 break;
17830 case EQ:
17831 case UNEQ:
17832 if (code == EQ && TARGET_IEEE_FP)
17833 {
17834 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17835 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
17836 intcmp_mode = CCmode;
17837 code = EQ;
17838 }
17839 else
17840 {
17841 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
17842 code = NE;
17843 }
17844 break;
17845 case NE:
17846 case LTGT:
17847 if (code == NE && TARGET_IEEE_FP)
17848 {
17849 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17850 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
17851 GEN_INT (0x40)));
17852 code = NE;
17853 }
17854 else
17855 {
17856 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
17857 code = EQ;
17858 }
17859 break;
17860
17861 case UNORDERED:
17862 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
17863 code = NE;
17864 break;
17865 case ORDERED:
17866 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
17867 code = EQ;
17868 break;
17869
17870 default:
17871 gcc_unreachable ();
17872 }
17873 break;
17874
17875 default:
17876 gcc_unreachable();
17877 }
17878
17879 /* Return the test that should be put into the flags user, i.e.
17880 the bcc, scc, or cmov instruction. */
17881 return gen_rtx_fmt_ee (code, VOIDmode,
17882 gen_rtx_REG (intcmp_mode, FLAGS_REG),
17883 const0_rtx);
17884 }
17885
17886 static rtx
17887 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
17888 {
17889 rtx ret;
17890
17891 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
17892 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
17893
17894 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
17895 {
17896 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
17897 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
17898 }
17899 else
17900 ret = ix86_expand_int_compare (code, op0, op1);
17901
17902 return ret;
17903 }
17904
17905 void
17906 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
17907 {
17908 enum machine_mode mode = GET_MODE (op0);
17909 rtx tmp;
17910
17911 switch (mode)
17912 {
17913 case SFmode:
17914 case DFmode:
17915 case XFmode:
17916 case QImode:
17917 case HImode:
17918 case SImode:
17919 simple:
17920 tmp = ix86_expand_compare (code, op0, op1);
17921 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
17922 gen_rtx_LABEL_REF (VOIDmode, label),
17923 pc_rtx);
17924 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
17925 return;
17926
17927 case DImode:
17928 if (TARGET_64BIT)
17929 goto simple;
17930 case TImode:
17931 /* Expand DImode branch into multiple compare+branch. */
17932 {
17933 rtx lo[2], hi[2], label2;
17934 enum rtx_code code1, code2, code3;
17935 enum machine_mode submode;
17936
17937 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
17938 {
17939 tmp = op0, op0 = op1, op1 = tmp;
17940 code = swap_condition (code);
17941 }
17942
17943 split_double_mode (mode, &op0, 1, lo+0, hi+0);
17944 split_double_mode (mode, &op1, 1, lo+1, hi+1);
17945
17946 submode = mode == DImode ? SImode : DImode;
17947
17948 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
17949 avoid two branches. This costs one extra insn, so disable when
17950 optimizing for size. */
17951
17952 if ((code == EQ || code == NE)
17953 && (!optimize_insn_for_size_p ()
17954 || hi[1] == const0_rtx || lo[1] == const0_rtx))
17955 {
17956 rtx xor0, xor1;
17957
17958 xor1 = hi[0];
17959 if (hi[1] != const0_rtx)
17960 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
17961 NULL_RTX, 0, OPTAB_WIDEN);
17962
17963 xor0 = lo[0];
17964 if (lo[1] != const0_rtx)
17965 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
17966 NULL_RTX, 0, OPTAB_WIDEN);
17967
17968 tmp = expand_binop (submode, ior_optab, xor1, xor0,
17969 NULL_RTX, 0, OPTAB_WIDEN);
17970
17971 ix86_expand_branch (code, tmp, const0_rtx, label);
17972 return;
17973 }
17974
17975 /* Otherwise, if we are doing less-than or greater-or-equal-than,
17976 op1 is a constant and the low word is zero, then we can just
17977 examine the high word. Similarly for low word -1 and
17978 less-or-equal-than or greater-than. */
17979
17980 if (CONST_INT_P (hi[1]))
17981 switch (code)
17982 {
17983 case LT: case LTU: case GE: case GEU:
17984 if (lo[1] == const0_rtx)
17985 {
17986 ix86_expand_branch (code, hi[0], hi[1], label);
17987 return;
17988 }
17989 break;
17990 case LE: case LEU: case GT: case GTU:
17991 if (lo[1] == constm1_rtx)
17992 {
17993 ix86_expand_branch (code, hi[0], hi[1], label);
17994 return;
17995 }
17996 break;
17997 default:
17998 break;
17999 }
18000
18001 /* Otherwise, we need two or three jumps. */
18002
18003 label2 = gen_label_rtx ();
18004
18005 code1 = code;
18006 code2 = swap_condition (code);
18007 code3 = unsigned_condition (code);
18008
18009 switch (code)
18010 {
18011 case LT: case GT: case LTU: case GTU:
18012 break;
18013
18014 case LE: code1 = LT; code2 = GT; break;
18015 case GE: code1 = GT; code2 = LT; break;
18016 case LEU: code1 = LTU; code2 = GTU; break;
18017 case GEU: code1 = GTU; code2 = LTU; break;
18018
18019 case EQ: code1 = UNKNOWN; code2 = NE; break;
18020 case NE: code2 = UNKNOWN; break;
18021
18022 default:
18023 gcc_unreachable ();
18024 }
18025
18026 /*
18027 * a < b =>
18028 * if (hi(a) < hi(b)) goto true;
18029 * if (hi(a) > hi(b)) goto false;
18030 * if (lo(a) < lo(b)) goto true;
18031 * false:
18032 */
18033
18034 if (code1 != UNKNOWN)
18035 ix86_expand_branch (code1, hi[0], hi[1], label);
18036 if (code2 != UNKNOWN)
18037 ix86_expand_branch (code2, hi[0], hi[1], label2);
18038
18039 ix86_expand_branch (code3, lo[0], lo[1], label);
18040
18041 if (code2 != UNKNOWN)
18042 emit_label (label2);
18043 return;
18044 }
18045
18046 default:
18047 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
18048 goto simple;
18049 }
18050 }
18051
18052 /* Split branch based on floating point condition. */
18053 void
18054 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
18055 rtx target1, rtx target2, rtx tmp, rtx pushed)
18056 {
18057 rtx condition;
18058 rtx i;
18059
18060 if (target2 != pc_rtx)
18061 {
18062 rtx tmp = target2;
18063 code = reverse_condition_maybe_unordered (code);
18064 target2 = target1;
18065 target1 = tmp;
18066 }
18067
18068 condition = ix86_expand_fp_compare (code, op1, op2,
18069 tmp);
18070
18071 /* Remove pushed operand from stack. */
18072 if (pushed)
18073 ix86_free_from_memory (GET_MODE (pushed));
18074
18075 i = emit_jump_insn (gen_rtx_SET
18076 (VOIDmode, pc_rtx,
18077 gen_rtx_IF_THEN_ELSE (VOIDmode,
18078 condition, target1, target2)));
18079 if (split_branch_probability >= 0)
18080 add_reg_note (i, REG_BR_PROB, GEN_INT (split_branch_probability));
18081 }
18082
18083 void
18084 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
18085 {
18086 rtx ret;
18087
18088 gcc_assert (GET_MODE (dest) == QImode);
18089
18090 ret = ix86_expand_compare (code, op0, op1);
18091 PUT_MODE (ret, QImode);
18092 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
18093 }
18094
18095 /* Expand comparison setting or clearing carry flag. Return true when
18096 successful and set pop for the operation. */
18097 static bool
18098 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
18099 {
18100 enum machine_mode mode =
18101 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
18102
18103 /* Do not handle double-mode compares that go through special path. */
18104 if (mode == (TARGET_64BIT ? TImode : DImode))
18105 return false;
18106
18107 if (SCALAR_FLOAT_MODE_P (mode))
18108 {
18109 rtx compare_op, compare_seq;
18110
18111 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
18112
18113 /* Shortcut: following common codes never translate
18114 into carry flag compares. */
18115 if (code == EQ || code == NE || code == UNEQ || code == LTGT
18116 || code == ORDERED || code == UNORDERED)
18117 return false;
18118
18119 /* These comparisons require zero flag; swap operands so they won't. */
18120 if ((code == GT || code == UNLE || code == LE || code == UNGT)
18121 && !TARGET_IEEE_FP)
18122 {
18123 rtx tmp = op0;
18124 op0 = op1;
18125 op1 = tmp;
18126 code = swap_condition (code);
18127 }
18128
18129 /* Try to expand the comparison and verify that we end up with
18130 carry flag based comparison. This fails to be true only when
18131 we decide to expand comparison using arithmetic that is not
18132 too common scenario. */
18133 start_sequence ();
18134 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
18135 compare_seq = get_insns ();
18136 end_sequence ();
18137
18138 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
18139 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
18140 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
18141 else
18142 code = GET_CODE (compare_op);
18143
18144 if (code != LTU && code != GEU)
18145 return false;
18146
18147 emit_insn (compare_seq);
18148 *pop = compare_op;
18149 return true;
18150 }
18151
18152 if (!INTEGRAL_MODE_P (mode))
18153 return false;
18154
18155 switch (code)
18156 {
18157 case LTU:
18158 case GEU:
18159 break;
18160
18161 /* Convert a==0 into (unsigned)a<1. */
18162 case EQ:
18163 case NE:
18164 if (op1 != const0_rtx)
18165 return false;
18166 op1 = const1_rtx;
18167 code = (code == EQ ? LTU : GEU);
18168 break;
18169
18170 /* Convert a>b into b<a or a>=b-1. */
18171 case GTU:
18172 case LEU:
18173 if (CONST_INT_P (op1))
18174 {
18175 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
18176 /* Bail out on overflow. We still can swap operands but that
18177 would force loading of the constant into register. */
18178 if (op1 == const0_rtx
18179 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
18180 return false;
18181 code = (code == GTU ? GEU : LTU);
18182 }
18183 else
18184 {
18185 rtx tmp = op1;
18186 op1 = op0;
18187 op0 = tmp;
18188 code = (code == GTU ? LTU : GEU);
18189 }
18190 break;
18191
18192 /* Convert a>=0 into (unsigned)a<0x80000000. */
18193 case LT:
18194 case GE:
18195 if (mode == DImode || op1 != const0_rtx)
18196 return false;
18197 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
18198 code = (code == LT ? GEU : LTU);
18199 break;
18200 case LE:
18201 case GT:
18202 if (mode == DImode || op1 != constm1_rtx)
18203 return false;
18204 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
18205 code = (code == LE ? GEU : LTU);
18206 break;
18207
18208 default:
18209 return false;
18210 }
18211 /* Swapping operands may cause constant to appear as first operand. */
18212 if (!nonimmediate_operand (op0, VOIDmode))
18213 {
18214 if (!can_create_pseudo_p ())
18215 return false;
18216 op0 = force_reg (mode, op0);
18217 }
18218 *pop = ix86_expand_compare (code, op0, op1);
18219 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
18220 return true;
18221 }
18222
18223 bool
18224 ix86_expand_int_movcc (rtx operands[])
18225 {
18226 enum rtx_code code = GET_CODE (operands[1]), compare_code;
18227 rtx compare_seq, compare_op;
18228 enum machine_mode mode = GET_MODE (operands[0]);
18229 bool sign_bit_compare_p = false;
18230 rtx op0 = XEXP (operands[1], 0);
18231 rtx op1 = XEXP (operands[1], 1);
18232
18233 start_sequence ();
18234 compare_op = ix86_expand_compare (code, op0, op1);
18235 compare_seq = get_insns ();
18236 end_sequence ();
18237
18238 compare_code = GET_CODE (compare_op);
18239
18240 if ((op1 == const0_rtx && (code == GE || code == LT))
18241 || (op1 == constm1_rtx && (code == GT || code == LE)))
18242 sign_bit_compare_p = true;
18243
18244 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
18245 HImode insns, we'd be swallowed in word prefix ops. */
18246
18247 if ((mode != HImode || TARGET_FAST_PREFIX)
18248 && (mode != (TARGET_64BIT ? TImode : DImode))
18249 && CONST_INT_P (operands[2])
18250 && CONST_INT_P (operands[3]))
18251 {
18252 rtx out = operands[0];
18253 HOST_WIDE_INT ct = INTVAL (operands[2]);
18254 HOST_WIDE_INT cf = INTVAL (operands[3]);
18255 HOST_WIDE_INT diff;
18256
18257 diff = ct - cf;
18258 /* Sign bit compares are better done using shifts than we do by using
18259 sbb. */
18260 if (sign_bit_compare_p
18261 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
18262 {
18263 /* Detect overlap between destination and compare sources. */
18264 rtx tmp = out;
18265
18266 if (!sign_bit_compare_p)
18267 {
18268 rtx flags;
18269 bool fpcmp = false;
18270
18271 compare_code = GET_CODE (compare_op);
18272
18273 flags = XEXP (compare_op, 0);
18274
18275 if (GET_MODE (flags) == CCFPmode
18276 || GET_MODE (flags) == CCFPUmode)
18277 {
18278 fpcmp = true;
18279 compare_code
18280 = ix86_fp_compare_code_to_integer (compare_code);
18281 }
18282
18283 /* To simplify rest of code, restrict to the GEU case. */
18284 if (compare_code == LTU)
18285 {
18286 HOST_WIDE_INT tmp = ct;
18287 ct = cf;
18288 cf = tmp;
18289 compare_code = reverse_condition (compare_code);
18290 code = reverse_condition (code);
18291 }
18292 else
18293 {
18294 if (fpcmp)
18295 PUT_CODE (compare_op,
18296 reverse_condition_maybe_unordered
18297 (GET_CODE (compare_op)));
18298 else
18299 PUT_CODE (compare_op,
18300 reverse_condition (GET_CODE (compare_op)));
18301 }
18302 diff = ct - cf;
18303
18304 if (reg_overlap_mentioned_p (out, op0)
18305 || reg_overlap_mentioned_p (out, op1))
18306 tmp = gen_reg_rtx (mode);
18307
18308 if (mode == DImode)
18309 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
18310 else
18311 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
18312 flags, compare_op));
18313 }
18314 else
18315 {
18316 if (code == GT || code == GE)
18317 code = reverse_condition (code);
18318 else
18319 {
18320 HOST_WIDE_INT tmp = ct;
18321 ct = cf;
18322 cf = tmp;
18323 diff = ct - cf;
18324 }
18325 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
18326 }
18327
18328 if (diff == 1)
18329 {
18330 /*
18331 * cmpl op0,op1
18332 * sbbl dest,dest
18333 * [addl dest, ct]
18334 *
18335 * Size 5 - 8.
18336 */
18337 if (ct)
18338 tmp = expand_simple_binop (mode, PLUS,
18339 tmp, GEN_INT (ct),
18340 copy_rtx (tmp), 1, OPTAB_DIRECT);
18341 }
18342 else if (cf == -1)
18343 {
18344 /*
18345 * cmpl op0,op1
18346 * sbbl dest,dest
18347 * orl $ct, dest
18348 *
18349 * Size 8.
18350 */
18351 tmp = expand_simple_binop (mode, IOR,
18352 tmp, GEN_INT (ct),
18353 copy_rtx (tmp), 1, OPTAB_DIRECT);
18354 }
18355 else if (diff == -1 && ct)
18356 {
18357 /*
18358 * cmpl op0,op1
18359 * sbbl dest,dest
18360 * notl dest
18361 * [addl dest, cf]
18362 *
18363 * Size 8 - 11.
18364 */
18365 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
18366 if (cf)
18367 tmp = expand_simple_binop (mode, PLUS,
18368 copy_rtx (tmp), GEN_INT (cf),
18369 copy_rtx (tmp), 1, OPTAB_DIRECT);
18370 }
18371 else
18372 {
18373 /*
18374 * cmpl op0,op1
18375 * sbbl dest,dest
18376 * [notl dest]
18377 * andl cf - ct, dest
18378 * [addl dest, ct]
18379 *
18380 * Size 8 - 11.
18381 */
18382
18383 if (cf == 0)
18384 {
18385 cf = ct;
18386 ct = 0;
18387 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
18388 }
18389
18390 tmp = expand_simple_binop (mode, AND,
18391 copy_rtx (tmp),
18392 gen_int_mode (cf - ct, mode),
18393 copy_rtx (tmp), 1, OPTAB_DIRECT);
18394 if (ct)
18395 tmp = expand_simple_binop (mode, PLUS,
18396 copy_rtx (tmp), GEN_INT (ct),
18397 copy_rtx (tmp), 1, OPTAB_DIRECT);
18398 }
18399
18400 if (!rtx_equal_p (tmp, out))
18401 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
18402
18403 return true;
18404 }
18405
18406 if (diff < 0)
18407 {
18408 enum machine_mode cmp_mode = GET_MODE (op0);
18409
18410 HOST_WIDE_INT tmp;
18411 tmp = ct, ct = cf, cf = tmp;
18412 diff = -diff;
18413
18414 if (SCALAR_FLOAT_MODE_P (cmp_mode))
18415 {
18416 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
18417
18418 /* We may be reversing unordered compare to normal compare, that
18419 is not valid in general (we may convert non-trapping condition
18420 to trapping one), however on i386 we currently emit all
18421 comparisons unordered. */
18422 compare_code = reverse_condition_maybe_unordered (compare_code);
18423 code = reverse_condition_maybe_unordered (code);
18424 }
18425 else
18426 {
18427 compare_code = reverse_condition (compare_code);
18428 code = reverse_condition (code);
18429 }
18430 }
18431
18432 compare_code = UNKNOWN;
18433 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
18434 && CONST_INT_P (op1))
18435 {
18436 if (op1 == const0_rtx
18437 && (code == LT || code == GE))
18438 compare_code = code;
18439 else if (op1 == constm1_rtx)
18440 {
18441 if (code == LE)
18442 compare_code = LT;
18443 else if (code == GT)
18444 compare_code = GE;
18445 }
18446 }
18447
18448 /* Optimize dest = (op0 < 0) ? -1 : cf. */
18449 if (compare_code != UNKNOWN
18450 && GET_MODE (op0) == GET_MODE (out)
18451 && (cf == -1 || ct == -1))
18452 {
18453 /* If lea code below could be used, only optimize
18454 if it results in a 2 insn sequence. */
18455
18456 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
18457 || diff == 3 || diff == 5 || diff == 9)
18458 || (compare_code == LT && ct == -1)
18459 || (compare_code == GE && cf == -1))
18460 {
18461 /*
18462 * notl op1 (if necessary)
18463 * sarl $31, op1
18464 * orl cf, op1
18465 */
18466 if (ct != -1)
18467 {
18468 cf = ct;
18469 ct = -1;
18470 code = reverse_condition (code);
18471 }
18472
18473 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
18474
18475 out = expand_simple_binop (mode, IOR,
18476 out, GEN_INT (cf),
18477 out, 1, OPTAB_DIRECT);
18478 if (out != operands[0])
18479 emit_move_insn (operands[0], out);
18480
18481 return true;
18482 }
18483 }
18484
18485
18486 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
18487 || diff == 3 || diff == 5 || diff == 9)
18488 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
18489 && (mode != DImode
18490 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
18491 {
18492 /*
18493 * xorl dest,dest
18494 * cmpl op1,op2
18495 * setcc dest
18496 * lea cf(dest*(ct-cf)),dest
18497 *
18498 * Size 14.
18499 *
18500 * This also catches the degenerate setcc-only case.
18501 */
18502
18503 rtx tmp;
18504 int nops;
18505
18506 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
18507
18508 nops = 0;
18509 /* On x86_64 the lea instruction operates on Pmode, so we need
18510 to get arithmetics done in proper mode to match. */
18511 if (diff == 1)
18512 tmp = copy_rtx (out);
18513 else
18514 {
18515 rtx out1;
18516 out1 = copy_rtx (out);
18517 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
18518 nops++;
18519 if (diff & 1)
18520 {
18521 tmp = gen_rtx_PLUS (mode, tmp, out1);
18522 nops++;
18523 }
18524 }
18525 if (cf != 0)
18526 {
18527 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
18528 nops++;
18529 }
18530 if (!rtx_equal_p (tmp, out))
18531 {
18532 if (nops == 1)
18533 out = force_operand (tmp, copy_rtx (out));
18534 else
18535 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
18536 }
18537 if (!rtx_equal_p (out, operands[0]))
18538 emit_move_insn (operands[0], copy_rtx (out));
18539
18540 return true;
18541 }
18542
18543 /*
18544 * General case: Jumpful:
18545 * xorl dest,dest cmpl op1, op2
18546 * cmpl op1, op2 movl ct, dest
18547 * setcc dest jcc 1f
18548 * decl dest movl cf, dest
18549 * andl (cf-ct),dest 1:
18550 * addl ct,dest
18551 *
18552 * Size 20. Size 14.
18553 *
18554 * This is reasonably steep, but branch mispredict costs are
18555 * high on modern cpus, so consider failing only if optimizing
18556 * for space.
18557 */
18558
18559 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
18560 && BRANCH_COST (optimize_insn_for_speed_p (),
18561 false) >= 2)
18562 {
18563 if (cf == 0)
18564 {
18565 enum machine_mode cmp_mode = GET_MODE (op0);
18566
18567 cf = ct;
18568 ct = 0;
18569
18570 if (SCALAR_FLOAT_MODE_P (cmp_mode))
18571 {
18572 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
18573
18574 /* We may be reversing unordered compare to normal compare,
18575 that is not valid in general (we may convert non-trapping
18576 condition to trapping one), however on i386 we currently
18577 emit all comparisons unordered. */
18578 code = reverse_condition_maybe_unordered (code);
18579 }
18580 else
18581 {
18582 code = reverse_condition (code);
18583 if (compare_code != UNKNOWN)
18584 compare_code = reverse_condition (compare_code);
18585 }
18586 }
18587
18588 if (compare_code != UNKNOWN)
18589 {
18590 /* notl op1 (if needed)
18591 sarl $31, op1
18592 andl (cf-ct), op1
18593 addl ct, op1
18594
18595 For x < 0 (resp. x <= -1) there will be no notl,
18596 so if possible swap the constants to get rid of the
18597 complement.
18598 True/false will be -1/0 while code below (store flag
18599 followed by decrement) is 0/-1, so the constants need
18600 to be exchanged once more. */
18601
18602 if (compare_code == GE || !cf)
18603 {
18604 code = reverse_condition (code);
18605 compare_code = LT;
18606 }
18607 else
18608 {
18609 HOST_WIDE_INT tmp = cf;
18610 cf = ct;
18611 ct = tmp;
18612 }
18613
18614 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
18615 }
18616 else
18617 {
18618 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
18619
18620 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
18621 constm1_rtx,
18622 copy_rtx (out), 1, OPTAB_DIRECT);
18623 }
18624
18625 out = expand_simple_binop (mode, AND, copy_rtx (out),
18626 gen_int_mode (cf - ct, mode),
18627 copy_rtx (out), 1, OPTAB_DIRECT);
18628 if (ct)
18629 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
18630 copy_rtx (out), 1, OPTAB_DIRECT);
18631 if (!rtx_equal_p (out, operands[0]))
18632 emit_move_insn (operands[0], copy_rtx (out));
18633
18634 return true;
18635 }
18636 }
18637
18638 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
18639 {
18640 /* Try a few things more with specific constants and a variable. */
18641
18642 optab op;
18643 rtx var, orig_out, out, tmp;
18644
18645 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
18646 return false;
18647
18648 /* If one of the two operands is an interesting constant, load a
18649 constant with the above and mask it in with a logical operation. */
18650
18651 if (CONST_INT_P (operands[2]))
18652 {
18653 var = operands[3];
18654 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
18655 operands[3] = constm1_rtx, op = and_optab;
18656 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
18657 operands[3] = const0_rtx, op = ior_optab;
18658 else
18659 return false;
18660 }
18661 else if (CONST_INT_P (operands[3]))
18662 {
18663 var = operands[2];
18664 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
18665 operands[2] = constm1_rtx, op = and_optab;
18666 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
18667 operands[2] = const0_rtx, op = ior_optab;
18668 else
18669 return false;
18670 }
18671 else
18672 return false;
18673
18674 orig_out = operands[0];
18675 tmp = gen_reg_rtx (mode);
18676 operands[0] = tmp;
18677
18678 /* Recurse to get the constant loaded. */
18679 if (ix86_expand_int_movcc (operands) == 0)
18680 return false;
18681
18682 /* Mask in the interesting variable. */
18683 out = expand_binop (mode, op, var, tmp, orig_out, 0,
18684 OPTAB_WIDEN);
18685 if (!rtx_equal_p (out, orig_out))
18686 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
18687
18688 return true;
18689 }
18690
18691 /*
18692 * For comparison with above,
18693 *
18694 * movl cf,dest
18695 * movl ct,tmp
18696 * cmpl op1,op2
18697 * cmovcc tmp,dest
18698 *
18699 * Size 15.
18700 */
18701
18702 if (! nonimmediate_operand (operands[2], mode))
18703 operands[2] = force_reg (mode, operands[2]);
18704 if (! nonimmediate_operand (operands[3], mode))
18705 operands[3] = force_reg (mode, operands[3]);
18706
18707 if (! register_operand (operands[2], VOIDmode)
18708 && (mode == QImode
18709 || ! register_operand (operands[3], VOIDmode)))
18710 operands[2] = force_reg (mode, operands[2]);
18711
18712 if (mode == QImode
18713 && ! register_operand (operands[3], VOIDmode))
18714 operands[3] = force_reg (mode, operands[3]);
18715
18716 emit_insn (compare_seq);
18717 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
18718 gen_rtx_IF_THEN_ELSE (mode,
18719 compare_op, operands[2],
18720 operands[3])));
18721 return true;
18722 }
18723
18724 /* Swap, force into registers, or otherwise massage the two operands
18725 to an sse comparison with a mask result. Thus we differ a bit from
18726 ix86_prepare_fp_compare_args which expects to produce a flags result.
18727
18728 The DEST operand exists to help determine whether to commute commutative
18729 operators. The POP0/POP1 operands are updated in place. The new
18730 comparison code is returned, or UNKNOWN if not implementable. */
18731
18732 static enum rtx_code
18733 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
18734 rtx *pop0, rtx *pop1)
18735 {
18736 rtx tmp;
18737
18738 /* AVX supports all the needed comparisons, no need to swap arguments
18739 nor help reload. */
18740 if (TARGET_AVX)
18741 return code;
18742
18743 switch (code)
18744 {
18745 case LTGT:
18746 case UNEQ:
18747 /* We have no LTGT as an operator. We could implement it with
18748 NE & ORDERED, but this requires an extra temporary. It's
18749 not clear that it's worth it. */
18750 return UNKNOWN;
18751
18752 case LT:
18753 case LE:
18754 case UNGT:
18755 case UNGE:
18756 /* These are supported directly. */
18757 break;
18758
18759 case EQ:
18760 case NE:
18761 case UNORDERED:
18762 case ORDERED:
18763 /* For commutative operators, try to canonicalize the destination
18764 operand to be first in the comparison - this helps reload to
18765 avoid extra moves. */
18766 if (!dest || !rtx_equal_p (dest, *pop1))
18767 break;
18768 /* FALLTHRU */
18769
18770 case GE:
18771 case GT:
18772 case UNLE:
18773 case UNLT:
18774 /* These are not supported directly. Swap the comparison operands
18775 to transform into something that is supported. */
18776 tmp = *pop0;
18777 *pop0 = *pop1;
18778 *pop1 = tmp;
18779 code = swap_condition (code);
18780 break;
18781
18782 default:
18783 gcc_unreachable ();
18784 }
18785
18786 return code;
18787 }
18788
18789 /* Detect conditional moves that exactly match min/max operational
18790 semantics. Note that this is IEEE safe, as long as we don't
18791 interchange the operands.
18792
18793 Returns FALSE if this conditional move doesn't match a MIN/MAX,
18794 and TRUE if the operation is successful and instructions are emitted. */
18795
18796 static bool
18797 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
18798 rtx cmp_op1, rtx if_true, rtx if_false)
18799 {
18800 enum machine_mode mode;
18801 bool is_min;
18802 rtx tmp;
18803
18804 if (code == LT)
18805 ;
18806 else if (code == UNGE)
18807 {
18808 tmp = if_true;
18809 if_true = if_false;
18810 if_false = tmp;
18811 }
18812 else
18813 return false;
18814
18815 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
18816 is_min = true;
18817 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
18818 is_min = false;
18819 else
18820 return false;
18821
18822 mode = GET_MODE (dest);
18823
18824 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
18825 but MODE may be a vector mode and thus not appropriate. */
18826 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
18827 {
18828 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
18829 rtvec v;
18830
18831 if_true = force_reg (mode, if_true);
18832 v = gen_rtvec (2, if_true, if_false);
18833 tmp = gen_rtx_UNSPEC (mode, v, u);
18834 }
18835 else
18836 {
18837 code = is_min ? SMIN : SMAX;
18838 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
18839 }
18840
18841 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
18842 return true;
18843 }
18844
18845 /* Expand an sse vector comparison. Return the register with the result. */
18846
18847 static rtx
18848 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
18849 rtx op_true, rtx op_false)
18850 {
18851 enum machine_mode mode = GET_MODE (dest);
18852 enum machine_mode cmp_mode = GET_MODE (cmp_op0);
18853 rtx x;
18854
18855 cmp_op0 = force_reg (cmp_mode, cmp_op0);
18856 if (!nonimmediate_operand (cmp_op1, cmp_mode))
18857 cmp_op1 = force_reg (cmp_mode, cmp_op1);
18858
18859 if (optimize
18860 || reg_overlap_mentioned_p (dest, op_true)
18861 || reg_overlap_mentioned_p (dest, op_false))
18862 dest = gen_reg_rtx (mode);
18863
18864 x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
18865 if (cmp_mode != mode)
18866 {
18867 x = force_reg (cmp_mode, x);
18868 convert_move (dest, x, false);
18869 }
18870 else
18871 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18872
18873 return dest;
18874 }
18875
18876 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
18877 operations. This is used for both scalar and vector conditional moves. */
18878
18879 static void
18880 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
18881 {
18882 enum machine_mode mode = GET_MODE (dest);
18883 rtx t2, t3, x;
18884
18885 if (op_false == CONST0_RTX (mode))
18886 {
18887 op_true = force_reg (mode, op_true);
18888 x = gen_rtx_AND (mode, cmp, op_true);
18889 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18890 }
18891 else if (op_true == CONST0_RTX (mode))
18892 {
18893 op_false = force_reg (mode, op_false);
18894 x = gen_rtx_NOT (mode, cmp);
18895 x = gen_rtx_AND (mode, x, op_false);
18896 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18897 }
18898 else if (TARGET_XOP)
18899 {
18900 op_true = force_reg (mode, op_true);
18901
18902 if (!nonimmediate_operand (op_false, mode))
18903 op_false = force_reg (mode, op_false);
18904
18905 emit_insn (gen_rtx_SET (mode, dest,
18906 gen_rtx_IF_THEN_ELSE (mode, cmp,
18907 op_true,
18908 op_false)));
18909 }
18910 else
18911 {
18912 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
18913
18914 op_false = force_reg (mode, op_false);
18915
18916 switch (mode)
18917 {
18918 case V4SFmode:
18919 if (TARGET_SSE4_1)
18920 gen = gen_sse4_1_blendvps;
18921 break;
18922 case V2DFmode:
18923 if (TARGET_SSE4_1)
18924 gen = gen_sse4_1_blendvpd;
18925 break;
18926 case V16QImode:
18927 case V8HImode:
18928 case V4SImode:
18929 case V2DImode:
18930 if (TARGET_SSE4_1)
18931 {
18932 gen = gen_sse4_1_pblendvb;
18933 dest = gen_lowpart (V16QImode, dest);
18934 op_false = gen_lowpart (V16QImode, op_false);
18935 op_true = gen_lowpart (V16QImode, op_true);
18936 cmp = gen_lowpart (V16QImode, cmp);
18937 }
18938 break;
18939 case V8SFmode:
18940 if (TARGET_AVX)
18941 gen = gen_avx_blendvps256;
18942 break;
18943 case V4DFmode:
18944 if (TARGET_AVX)
18945 gen = gen_avx_blendvpd256;
18946 break;
18947 case V32QImode:
18948 case V16HImode:
18949 case V8SImode:
18950 case V4DImode:
18951 if (TARGET_AVX2)
18952 {
18953 gen = gen_avx2_pblendvb;
18954 dest = gen_lowpart (V32QImode, dest);
18955 op_false = gen_lowpart (V32QImode, op_false);
18956 op_true = gen_lowpart (V32QImode, op_true);
18957 cmp = gen_lowpart (V32QImode, cmp);
18958 }
18959 break;
18960 default:
18961 break;
18962 }
18963
18964 if (gen != NULL)
18965 emit_insn (gen (dest, op_false, op_true, cmp));
18966 else
18967 {
18968 op_true = force_reg (mode, op_true);
18969
18970 t2 = gen_reg_rtx (mode);
18971 if (optimize)
18972 t3 = gen_reg_rtx (mode);
18973 else
18974 t3 = dest;
18975
18976 x = gen_rtx_AND (mode, op_true, cmp);
18977 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
18978
18979 x = gen_rtx_NOT (mode, cmp);
18980 x = gen_rtx_AND (mode, x, op_false);
18981 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
18982
18983 x = gen_rtx_IOR (mode, t3, t2);
18984 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18985 }
18986 }
18987 }
18988
18989 /* Expand a floating-point conditional move. Return true if successful. */
18990
18991 bool
18992 ix86_expand_fp_movcc (rtx operands[])
18993 {
18994 enum machine_mode mode = GET_MODE (operands[0]);
18995 enum rtx_code code = GET_CODE (operands[1]);
18996 rtx tmp, compare_op;
18997 rtx op0 = XEXP (operands[1], 0);
18998 rtx op1 = XEXP (operands[1], 1);
18999
19000 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
19001 {
19002 enum machine_mode cmode;
19003
19004 /* Since we've no cmove for sse registers, don't force bad register
19005 allocation just to gain access to it. Deny movcc when the
19006 comparison mode doesn't match the move mode. */
19007 cmode = GET_MODE (op0);
19008 if (cmode == VOIDmode)
19009 cmode = GET_MODE (op1);
19010 if (cmode != mode)
19011 return false;
19012
19013 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
19014 if (code == UNKNOWN)
19015 return false;
19016
19017 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
19018 operands[2], operands[3]))
19019 return true;
19020
19021 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
19022 operands[2], operands[3]);
19023 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
19024 return true;
19025 }
19026
19027 /* The floating point conditional move instructions don't directly
19028 support conditions resulting from a signed integer comparison. */
19029
19030 compare_op = ix86_expand_compare (code, op0, op1);
19031 if (!fcmov_comparison_operator (compare_op, VOIDmode))
19032 {
19033 tmp = gen_reg_rtx (QImode);
19034 ix86_expand_setcc (tmp, code, op0, op1);
19035
19036 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
19037 }
19038
19039 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
19040 gen_rtx_IF_THEN_ELSE (mode, compare_op,
19041 operands[2], operands[3])));
19042
19043 return true;
19044 }
19045
19046 /* Expand a floating-point vector conditional move; a vcond operation
19047 rather than a movcc operation. */
19048
19049 bool
19050 ix86_expand_fp_vcond (rtx operands[])
19051 {
19052 enum rtx_code code = GET_CODE (operands[3]);
19053 rtx cmp;
19054
19055 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
19056 &operands[4], &operands[5]);
19057 if (code == UNKNOWN)
19058 {
19059 rtx temp;
19060 switch (GET_CODE (operands[3]))
19061 {
19062 case LTGT:
19063 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
19064 operands[5], operands[0], operands[0]);
19065 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
19066 operands[5], operands[1], operands[2]);
19067 code = AND;
19068 break;
19069 case UNEQ:
19070 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
19071 operands[5], operands[0], operands[0]);
19072 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
19073 operands[5], operands[1], operands[2]);
19074 code = IOR;
19075 break;
19076 default:
19077 gcc_unreachable ();
19078 }
19079 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
19080 OPTAB_DIRECT);
19081 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
19082 return true;
19083 }
19084
19085 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
19086 operands[5], operands[1], operands[2]))
19087 return true;
19088
19089 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
19090 operands[1], operands[2]);
19091 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
19092 return true;
19093 }
19094
19095 /* Expand a signed/unsigned integral vector conditional move. */
19096
19097 bool
19098 ix86_expand_int_vcond (rtx operands[])
19099 {
19100 enum machine_mode mode = GET_MODE (operands[0]);
19101 enum rtx_code code = GET_CODE (operands[3]);
19102 bool negate = false;
19103 rtx x, cop0, cop1;
19104
19105 cop0 = operands[4];
19106 cop1 = operands[5];
19107
19108 /* XOP supports all of the comparisons on all vector int types. */
19109 if (!TARGET_XOP)
19110 {
19111 /* Canonicalize the comparison to EQ, GT, GTU. */
19112 switch (code)
19113 {
19114 case EQ:
19115 case GT:
19116 case GTU:
19117 break;
19118
19119 case NE:
19120 case LE:
19121 case LEU:
19122 code = reverse_condition (code);
19123 negate = true;
19124 break;
19125
19126 case GE:
19127 case GEU:
19128 code = reverse_condition (code);
19129 negate = true;
19130 /* FALLTHRU */
19131
19132 case LT:
19133 case LTU:
19134 code = swap_condition (code);
19135 x = cop0, cop0 = cop1, cop1 = x;
19136 break;
19137
19138 default:
19139 gcc_unreachable ();
19140 }
19141
19142 /* Only SSE4.1/SSE4.2 supports V2DImode. */
19143 if (mode == V2DImode)
19144 {
19145 switch (code)
19146 {
19147 case EQ:
19148 /* SSE4.1 supports EQ. */
19149 if (!TARGET_SSE4_1)
19150 return false;
19151 break;
19152
19153 case GT:
19154 case GTU:
19155 /* SSE4.2 supports GT/GTU. */
19156 if (!TARGET_SSE4_2)
19157 return false;
19158 break;
19159
19160 default:
19161 gcc_unreachable ();
19162 }
19163 }
19164
19165 /* Unsigned parallel compare is not supported by the hardware.
19166 Play some tricks to turn this into a signed comparison
19167 against 0. */
19168 if (code == GTU)
19169 {
19170 cop0 = force_reg (mode, cop0);
19171
19172 switch (mode)
19173 {
19174 case V8SImode:
19175 case V4DImode:
19176 case V4SImode:
19177 case V2DImode:
19178 {
19179 rtx t1, t2, mask;
19180 rtx (*gen_sub3) (rtx, rtx, rtx);
19181
19182 switch (mode)
19183 {
19184 case V8SImode: gen_sub3 = gen_subv8si3; break;
19185 case V4DImode: gen_sub3 = gen_subv4di3; break;
19186 case V4SImode: gen_sub3 = gen_subv4si3; break;
19187 case V2DImode: gen_sub3 = gen_subv2di3; break;
19188 default:
19189 gcc_unreachable ();
19190 }
19191 /* Subtract (-(INT MAX) - 1) from both operands to make
19192 them signed. */
19193 mask = ix86_build_signbit_mask (mode, true, false);
19194 t1 = gen_reg_rtx (mode);
19195 emit_insn (gen_sub3 (t1, cop0, mask));
19196
19197 t2 = gen_reg_rtx (mode);
19198 emit_insn (gen_sub3 (t2, cop1, mask));
19199
19200 cop0 = t1;
19201 cop1 = t2;
19202 code = GT;
19203 }
19204 break;
19205
19206 case V32QImode:
19207 case V16HImode:
19208 case V16QImode:
19209 case V8HImode:
19210 /* Perform a parallel unsigned saturating subtraction. */
19211 x = gen_reg_rtx (mode);
19212 emit_insn (gen_rtx_SET (VOIDmode, x,
19213 gen_rtx_US_MINUS (mode, cop0, cop1)));
19214
19215 cop0 = x;
19216 cop1 = CONST0_RTX (mode);
19217 code = EQ;
19218 negate = !negate;
19219 break;
19220
19221 default:
19222 gcc_unreachable ();
19223 }
19224 }
19225 }
19226
19227 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
19228 operands[1+negate], operands[2-negate]);
19229
19230 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
19231 operands[2-negate]);
19232 return true;
19233 }
19234
19235 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
19236 true if we should do zero extension, else sign extension. HIGH_P is
19237 true if we want the N/2 high elements, else the low elements. */
19238
19239 void
19240 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
19241 {
19242 enum machine_mode imode = GET_MODE (operands[1]);
19243 rtx tmp, dest;
19244
19245 if (TARGET_SSE4_1)
19246 {
19247 rtx (*unpack)(rtx, rtx);
19248
19249 switch (imode)
19250 {
19251 case V16QImode:
19252 if (unsigned_p)
19253 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
19254 else
19255 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
19256 break;
19257 case V8HImode:
19258 if (unsigned_p)
19259 unpack = gen_sse4_1_zero_extendv4hiv4si2;
19260 else
19261 unpack = gen_sse4_1_sign_extendv4hiv4si2;
19262 break;
19263 case V4SImode:
19264 if (unsigned_p)
19265 unpack = gen_sse4_1_zero_extendv2siv2di2;
19266 else
19267 unpack = gen_sse4_1_sign_extendv2siv2di2;
19268 break;
19269 default:
19270 gcc_unreachable ();
19271 }
19272
19273 if (high_p)
19274 {
19275 /* Shift higher 8 bytes to lower 8 bytes. */
19276 tmp = gen_reg_rtx (imode);
19277 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, tmp),
19278 gen_lowpart (V1TImode, operands[1]),
19279 GEN_INT (64)));
19280 }
19281 else
19282 tmp = operands[1];
19283
19284 emit_insn (unpack (operands[0], tmp));
19285 }
19286 else
19287 {
19288 rtx (*unpack)(rtx, rtx, rtx);
19289
19290 switch (imode)
19291 {
19292 case V16QImode:
19293 if (high_p)
19294 unpack = gen_vec_interleave_highv16qi;
19295 else
19296 unpack = gen_vec_interleave_lowv16qi;
19297 break;
19298 case V8HImode:
19299 if (high_p)
19300 unpack = gen_vec_interleave_highv8hi;
19301 else
19302 unpack = gen_vec_interleave_lowv8hi;
19303 break;
19304 case V4SImode:
19305 if (high_p)
19306 unpack = gen_vec_interleave_highv4si;
19307 else
19308 unpack = gen_vec_interleave_lowv4si;
19309 break;
19310 default:
19311 gcc_unreachable ();
19312 }
19313
19314 dest = gen_lowpart (imode, operands[0]);
19315
19316 if (unsigned_p)
19317 tmp = force_reg (imode, CONST0_RTX (imode));
19318 else
19319 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
19320 operands[1], pc_rtx, pc_rtx);
19321
19322 emit_insn (unpack (dest, operands[1], tmp));
19323 }
19324 }
19325
19326 /* Expand conditional increment or decrement using adb/sbb instructions.
19327 The default case using setcc followed by the conditional move can be
19328 done by generic code. */
19329 bool
19330 ix86_expand_int_addcc (rtx operands[])
19331 {
19332 enum rtx_code code = GET_CODE (operands[1]);
19333 rtx flags;
19334 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
19335 rtx compare_op;
19336 rtx val = const0_rtx;
19337 bool fpcmp = false;
19338 enum machine_mode mode;
19339 rtx op0 = XEXP (operands[1], 0);
19340 rtx op1 = XEXP (operands[1], 1);
19341
19342 if (operands[3] != const1_rtx
19343 && operands[3] != constm1_rtx)
19344 return false;
19345 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
19346 return false;
19347 code = GET_CODE (compare_op);
19348
19349 flags = XEXP (compare_op, 0);
19350
19351 if (GET_MODE (flags) == CCFPmode
19352 || GET_MODE (flags) == CCFPUmode)
19353 {
19354 fpcmp = true;
19355 code = ix86_fp_compare_code_to_integer (code);
19356 }
19357
19358 if (code != LTU)
19359 {
19360 val = constm1_rtx;
19361 if (fpcmp)
19362 PUT_CODE (compare_op,
19363 reverse_condition_maybe_unordered
19364 (GET_CODE (compare_op)));
19365 else
19366 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
19367 }
19368
19369 mode = GET_MODE (operands[0]);
19370
19371 /* Construct either adc or sbb insn. */
19372 if ((code == LTU) == (operands[3] == constm1_rtx))
19373 {
19374 switch (mode)
19375 {
19376 case QImode:
19377 insn = gen_subqi3_carry;
19378 break;
19379 case HImode:
19380 insn = gen_subhi3_carry;
19381 break;
19382 case SImode:
19383 insn = gen_subsi3_carry;
19384 break;
19385 case DImode:
19386 insn = gen_subdi3_carry;
19387 break;
19388 default:
19389 gcc_unreachable ();
19390 }
19391 }
19392 else
19393 {
19394 switch (mode)
19395 {
19396 case QImode:
19397 insn = gen_addqi3_carry;
19398 break;
19399 case HImode:
19400 insn = gen_addhi3_carry;
19401 break;
19402 case SImode:
19403 insn = gen_addsi3_carry;
19404 break;
19405 case DImode:
19406 insn = gen_adddi3_carry;
19407 break;
19408 default:
19409 gcc_unreachable ();
19410 }
19411 }
19412 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
19413
19414 return true;
19415 }
19416
19417
19418 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
19419 but works for floating pointer parameters and nonoffsetable memories.
19420 For pushes, it returns just stack offsets; the values will be saved
19421 in the right order. Maximally three parts are generated. */
19422
19423 static int
19424 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
19425 {
19426 int size;
19427
19428 if (!TARGET_64BIT)
19429 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
19430 else
19431 size = (GET_MODE_SIZE (mode) + 4) / 8;
19432
19433 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
19434 gcc_assert (size >= 2 && size <= 4);
19435
19436 /* Optimize constant pool reference to immediates. This is used by fp
19437 moves, that force all constants to memory to allow combining. */
19438 if (MEM_P (operand) && MEM_READONLY_P (operand))
19439 {
19440 rtx tmp = maybe_get_pool_constant (operand);
19441 if (tmp)
19442 operand = tmp;
19443 }
19444
19445 if (MEM_P (operand) && !offsettable_memref_p (operand))
19446 {
19447 /* The only non-offsetable memories we handle are pushes. */
19448 int ok = push_operand (operand, VOIDmode);
19449
19450 gcc_assert (ok);
19451
19452 operand = copy_rtx (operand);
19453 PUT_MODE (operand, Pmode);
19454 parts[0] = parts[1] = parts[2] = parts[3] = operand;
19455 return size;
19456 }
19457
19458 if (GET_CODE (operand) == CONST_VECTOR)
19459 {
19460 enum machine_mode imode = int_mode_for_mode (mode);
19461 /* Caution: if we looked through a constant pool memory above,
19462 the operand may actually have a different mode now. That's
19463 ok, since we want to pun this all the way back to an integer. */
19464 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
19465 gcc_assert (operand != NULL);
19466 mode = imode;
19467 }
19468
19469 if (!TARGET_64BIT)
19470 {
19471 if (mode == DImode)
19472 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
19473 else
19474 {
19475 int i;
19476
19477 if (REG_P (operand))
19478 {
19479 gcc_assert (reload_completed);
19480 for (i = 0; i < size; i++)
19481 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
19482 }
19483 else if (offsettable_memref_p (operand))
19484 {
19485 operand = adjust_address (operand, SImode, 0);
19486 parts[0] = operand;
19487 for (i = 1; i < size; i++)
19488 parts[i] = adjust_address (operand, SImode, 4 * i);
19489 }
19490 else if (GET_CODE (operand) == CONST_DOUBLE)
19491 {
19492 REAL_VALUE_TYPE r;
19493 long l[4];
19494
19495 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
19496 switch (mode)
19497 {
19498 case TFmode:
19499 real_to_target (l, &r, mode);
19500 parts[3] = gen_int_mode (l[3], SImode);
19501 parts[2] = gen_int_mode (l[2], SImode);
19502 break;
19503 case XFmode:
19504 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
19505 parts[2] = gen_int_mode (l[2], SImode);
19506 break;
19507 case DFmode:
19508 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
19509 break;
19510 default:
19511 gcc_unreachable ();
19512 }
19513 parts[1] = gen_int_mode (l[1], SImode);
19514 parts[0] = gen_int_mode (l[0], SImode);
19515 }
19516 else
19517 gcc_unreachable ();
19518 }
19519 }
19520 else
19521 {
19522 if (mode == TImode)
19523 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
19524 if (mode == XFmode || mode == TFmode)
19525 {
19526 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
19527 if (REG_P (operand))
19528 {
19529 gcc_assert (reload_completed);
19530 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
19531 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
19532 }
19533 else if (offsettable_memref_p (operand))
19534 {
19535 operand = adjust_address (operand, DImode, 0);
19536 parts[0] = operand;
19537 parts[1] = adjust_address (operand, upper_mode, 8);
19538 }
19539 else if (GET_CODE (operand) == CONST_DOUBLE)
19540 {
19541 REAL_VALUE_TYPE r;
19542 long l[4];
19543
19544 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
19545 real_to_target (l, &r, mode);
19546
19547 /* Do not use shift by 32 to avoid warning on 32bit systems. */
19548 if (HOST_BITS_PER_WIDE_INT >= 64)
19549 parts[0]
19550 = gen_int_mode
19551 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
19552 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
19553 DImode);
19554 else
19555 parts[0] = immed_double_const (l[0], l[1], DImode);
19556
19557 if (upper_mode == SImode)
19558 parts[1] = gen_int_mode (l[2], SImode);
19559 else if (HOST_BITS_PER_WIDE_INT >= 64)
19560 parts[1]
19561 = gen_int_mode
19562 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
19563 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
19564 DImode);
19565 else
19566 parts[1] = immed_double_const (l[2], l[3], DImode);
19567 }
19568 else
19569 gcc_unreachable ();
19570 }
19571 }
19572
19573 return size;
19574 }
19575
19576 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
19577 Return false when normal moves are needed; true when all required
19578 insns have been emitted. Operands 2-4 contain the input values
19579 int the correct order; operands 5-7 contain the output values. */
19580
19581 void
19582 ix86_split_long_move (rtx operands[])
19583 {
19584 rtx part[2][4];
19585 int nparts, i, j;
19586 int push = 0;
19587 int collisions = 0;
19588 enum machine_mode mode = GET_MODE (operands[0]);
19589 bool collisionparts[4];
19590
19591 /* The DFmode expanders may ask us to move double.
19592 For 64bit target this is single move. By hiding the fact
19593 here we simplify i386.md splitters. */
19594 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
19595 {
19596 /* Optimize constant pool reference to immediates. This is used by
19597 fp moves, that force all constants to memory to allow combining. */
19598
19599 if (MEM_P (operands[1])
19600 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
19601 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
19602 operands[1] = get_pool_constant (XEXP (operands[1], 0));
19603 if (push_operand (operands[0], VOIDmode))
19604 {
19605 operands[0] = copy_rtx (operands[0]);
19606 PUT_MODE (operands[0], Pmode);
19607 }
19608 else
19609 operands[0] = gen_lowpart (DImode, operands[0]);
19610 operands[1] = gen_lowpart (DImode, operands[1]);
19611 emit_move_insn (operands[0], operands[1]);
19612 return;
19613 }
19614
19615 /* The only non-offsettable memory we handle is push. */
19616 if (push_operand (operands[0], VOIDmode))
19617 push = 1;
19618 else
19619 gcc_assert (!MEM_P (operands[0])
19620 || offsettable_memref_p (operands[0]));
19621
19622 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
19623 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
19624
19625 /* When emitting push, take care for source operands on the stack. */
19626 if (push && MEM_P (operands[1])
19627 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
19628 {
19629 rtx src_base = XEXP (part[1][nparts - 1], 0);
19630
19631 /* Compensate for the stack decrement by 4. */
19632 if (!TARGET_64BIT && nparts == 3
19633 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
19634 src_base = plus_constant (src_base, 4);
19635
19636 /* src_base refers to the stack pointer and is
19637 automatically decreased by emitted push. */
19638 for (i = 0; i < nparts; i++)
19639 part[1][i] = change_address (part[1][i],
19640 GET_MODE (part[1][i]), src_base);
19641 }
19642
19643 /* We need to do copy in the right order in case an address register
19644 of the source overlaps the destination. */
19645 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
19646 {
19647 rtx tmp;
19648
19649 for (i = 0; i < nparts; i++)
19650 {
19651 collisionparts[i]
19652 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
19653 if (collisionparts[i])
19654 collisions++;
19655 }
19656
19657 /* Collision in the middle part can be handled by reordering. */
19658 if (collisions == 1 && nparts == 3 && collisionparts [1])
19659 {
19660 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
19661 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
19662 }
19663 else if (collisions == 1
19664 && nparts == 4
19665 && (collisionparts [1] || collisionparts [2]))
19666 {
19667 if (collisionparts [1])
19668 {
19669 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
19670 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
19671 }
19672 else
19673 {
19674 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
19675 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
19676 }
19677 }
19678
19679 /* If there are more collisions, we can't handle it by reordering.
19680 Do an lea to the last part and use only one colliding move. */
19681 else if (collisions > 1)
19682 {
19683 rtx base;
19684
19685 collisions = 1;
19686
19687 base = part[0][nparts - 1];
19688
19689 /* Handle the case when the last part isn't valid for lea.
19690 Happens in 64-bit mode storing the 12-byte XFmode. */
19691 if (GET_MODE (base) != Pmode)
19692 base = gen_rtx_REG (Pmode, REGNO (base));
19693
19694 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
19695 part[1][0] = replace_equiv_address (part[1][0], base);
19696 for (i = 1; i < nparts; i++)
19697 {
19698 tmp = plus_constant (base, UNITS_PER_WORD * i);
19699 part[1][i] = replace_equiv_address (part[1][i], tmp);
19700 }
19701 }
19702 }
19703
19704 if (push)
19705 {
19706 if (!TARGET_64BIT)
19707 {
19708 if (nparts == 3)
19709 {
19710 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
19711 emit_insn (gen_addsi3 (stack_pointer_rtx,
19712 stack_pointer_rtx, GEN_INT (-4)));
19713 emit_move_insn (part[0][2], part[1][2]);
19714 }
19715 else if (nparts == 4)
19716 {
19717 emit_move_insn (part[0][3], part[1][3]);
19718 emit_move_insn (part[0][2], part[1][2]);
19719 }
19720 }
19721 else
19722 {
19723 /* In 64bit mode we don't have 32bit push available. In case this is
19724 register, it is OK - we will just use larger counterpart. We also
19725 retype memory - these comes from attempt to avoid REX prefix on
19726 moving of second half of TFmode value. */
19727 if (GET_MODE (part[1][1]) == SImode)
19728 {
19729 switch (GET_CODE (part[1][1]))
19730 {
19731 case MEM:
19732 part[1][1] = adjust_address (part[1][1], DImode, 0);
19733 break;
19734
19735 case REG:
19736 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
19737 break;
19738
19739 default:
19740 gcc_unreachable ();
19741 }
19742
19743 if (GET_MODE (part[1][0]) == SImode)
19744 part[1][0] = part[1][1];
19745 }
19746 }
19747 emit_move_insn (part[0][1], part[1][1]);
19748 emit_move_insn (part[0][0], part[1][0]);
19749 return;
19750 }
19751
19752 /* Choose correct order to not overwrite the source before it is copied. */
19753 if ((REG_P (part[0][0])
19754 && REG_P (part[1][1])
19755 && (REGNO (part[0][0]) == REGNO (part[1][1])
19756 || (nparts == 3
19757 && REGNO (part[0][0]) == REGNO (part[1][2]))
19758 || (nparts == 4
19759 && REGNO (part[0][0]) == REGNO (part[1][3]))))
19760 || (collisions > 0
19761 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
19762 {
19763 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
19764 {
19765 operands[2 + i] = part[0][j];
19766 operands[6 + i] = part[1][j];
19767 }
19768 }
19769 else
19770 {
19771 for (i = 0; i < nparts; i++)
19772 {
19773 operands[2 + i] = part[0][i];
19774 operands[6 + i] = part[1][i];
19775 }
19776 }
19777
19778 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
19779 if (optimize_insn_for_size_p ())
19780 {
19781 for (j = 0; j < nparts - 1; j++)
19782 if (CONST_INT_P (operands[6 + j])
19783 && operands[6 + j] != const0_rtx
19784 && REG_P (operands[2 + j]))
19785 for (i = j; i < nparts - 1; i++)
19786 if (CONST_INT_P (operands[7 + i])
19787 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
19788 operands[7 + i] = operands[2 + j];
19789 }
19790
19791 for (i = 0; i < nparts; i++)
19792 emit_move_insn (operands[2 + i], operands[6 + i]);
19793
19794 return;
19795 }
19796
19797 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
19798 left shift by a constant, either using a single shift or
19799 a sequence of add instructions. */
19800
19801 static void
19802 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
19803 {
19804 rtx (*insn)(rtx, rtx, rtx);
19805
19806 if (count == 1
19807 || (count * ix86_cost->add <= ix86_cost->shift_const
19808 && !optimize_insn_for_size_p ()))
19809 {
19810 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
19811 while (count-- > 0)
19812 emit_insn (insn (operand, operand, operand));
19813 }
19814 else
19815 {
19816 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
19817 emit_insn (insn (operand, operand, GEN_INT (count)));
19818 }
19819 }
19820
19821 void
19822 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
19823 {
19824 rtx (*gen_ashl3)(rtx, rtx, rtx);
19825 rtx (*gen_shld)(rtx, rtx, rtx);
19826 int half_width = GET_MODE_BITSIZE (mode) >> 1;
19827
19828 rtx low[2], high[2];
19829 int count;
19830
19831 if (CONST_INT_P (operands[2]))
19832 {
19833 split_double_mode (mode, operands, 2, low, high);
19834 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
19835
19836 if (count >= half_width)
19837 {
19838 emit_move_insn (high[0], low[1]);
19839 emit_move_insn (low[0], const0_rtx);
19840
19841 if (count > half_width)
19842 ix86_expand_ashl_const (high[0], count - half_width, mode);
19843 }
19844 else
19845 {
19846 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
19847
19848 if (!rtx_equal_p (operands[0], operands[1]))
19849 emit_move_insn (operands[0], operands[1]);
19850
19851 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
19852 ix86_expand_ashl_const (low[0], count, mode);
19853 }
19854 return;
19855 }
19856
19857 split_double_mode (mode, operands, 1, low, high);
19858
19859 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
19860
19861 if (operands[1] == const1_rtx)
19862 {
19863 /* Assuming we've chosen a QImode capable registers, then 1 << N
19864 can be done with two 32/64-bit shifts, no branches, no cmoves. */
19865 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
19866 {
19867 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
19868
19869 ix86_expand_clear (low[0]);
19870 ix86_expand_clear (high[0]);
19871 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
19872
19873 d = gen_lowpart (QImode, low[0]);
19874 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
19875 s = gen_rtx_EQ (QImode, flags, const0_rtx);
19876 emit_insn (gen_rtx_SET (VOIDmode, d, s));
19877
19878 d = gen_lowpart (QImode, high[0]);
19879 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
19880 s = gen_rtx_NE (QImode, flags, const0_rtx);
19881 emit_insn (gen_rtx_SET (VOIDmode, d, s));
19882 }
19883
19884 /* Otherwise, we can get the same results by manually performing
19885 a bit extract operation on bit 5/6, and then performing the two
19886 shifts. The two methods of getting 0/1 into low/high are exactly
19887 the same size. Avoiding the shift in the bit extract case helps
19888 pentium4 a bit; no one else seems to care much either way. */
19889 else
19890 {
19891 enum machine_mode half_mode;
19892 rtx (*gen_lshr3)(rtx, rtx, rtx);
19893 rtx (*gen_and3)(rtx, rtx, rtx);
19894 rtx (*gen_xor3)(rtx, rtx, rtx);
19895 HOST_WIDE_INT bits;
19896 rtx x;
19897
19898 if (mode == DImode)
19899 {
19900 half_mode = SImode;
19901 gen_lshr3 = gen_lshrsi3;
19902 gen_and3 = gen_andsi3;
19903 gen_xor3 = gen_xorsi3;
19904 bits = 5;
19905 }
19906 else
19907 {
19908 half_mode = DImode;
19909 gen_lshr3 = gen_lshrdi3;
19910 gen_and3 = gen_anddi3;
19911 gen_xor3 = gen_xordi3;
19912 bits = 6;
19913 }
19914
19915 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
19916 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
19917 else
19918 x = gen_lowpart (half_mode, operands[2]);
19919 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
19920
19921 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
19922 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
19923 emit_move_insn (low[0], high[0]);
19924 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
19925 }
19926
19927 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
19928 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
19929 return;
19930 }
19931
19932 if (operands[1] == constm1_rtx)
19933 {
19934 /* For -1 << N, we can avoid the shld instruction, because we
19935 know that we're shifting 0...31/63 ones into a -1. */
19936 emit_move_insn (low[0], constm1_rtx);
19937 if (optimize_insn_for_size_p ())
19938 emit_move_insn (high[0], low[0]);
19939 else
19940 emit_move_insn (high[0], constm1_rtx);
19941 }
19942 else
19943 {
19944 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
19945
19946 if (!rtx_equal_p (operands[0], operands[1]))
19947 emit_move_insn (operands[0], operands[1]);
19948
19949 split_double_mode (mode, operands, 1, low, high);
19950 emit_insn (gen_shld (high[0], low[0], operands[2]));
19951 }
19952
19953 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
19954
19955 if (TARGET_CMOVE && scratch)
19956 {
19957 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
19958 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
19959
19960 ix86_expand_clear (scratch);
19961 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
19962 }
19963 else
19964 {
19965 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
19966 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
19967
19968 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
19969 }
19970 }
19971
19972 void
19973 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
19974 {
19975 rtx (*gen_ashr3)(rtx, rtx, rtx)
19976 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
19977 rtx (*gen_shrd)(rtx, rtx, rtx);
19978 int half_width = GET_MODE_BITSIZE (mode) >> 1;
19979
19980 rtx low[2], high[2];
19981 int count;
19982
19983 if (CONST_INT_P (operands[2]))
19984 {
19985 split_double_mode (mode, operands, 2, low, high);
19986 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
19987
19988 if (count == GET_MODE_BITSIZE (mode) - 1)
19989 {
19990 emit_move_insn (high[0], high[1]);
19991 emit_insn (gen_ashr3 (high[0], high[0],
19992 GEN_INT (half_width - 1)));
19993 emit_move_insn (low[0], high[0]);
19994
19995 }
19996 else if (count >= half_width)
19997 {
19998 emit_move_insn (low[0], high[1]);
19999 emit_move_insn (high[0], low[0]);
20000 emit_insn (gen_ashr3 (high[0], high[0],
20001 GEN_INT (half_width - 1)));
20002
20003 if (count > half_width)
20004 emit_insn (gen_ashr3 (low[0], low[0],
20005 GEN_INT (count - half_width)));
20006 }
20007 else
20008 {
20009 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
20010
20011 if (!rtx_equal_p (operands[0], operands[1]))
20012 emit_move_insn (operands[0], operands[1]);
20013
20014 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
20015 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
20016 }
20017 }
20018 else
20019 {
20020 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
20021
20022 if (!rtx_equal_p (operands[0], operands[1]))
20023 emit_move_insn (operands[0], operands[1]);
20024
20025 split_double_mode (mode, operands, 1, low, high);
20026
20027 emit_insn (gen_shrd (low[0], high[0], operands[2]));
20028 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
20029
20030 if (TARGET_CMOVE && scratch)
20031 {
20032 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
20033 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
20034
20035 emit_move_insn (scratch, high[0]);
20036 emit_insn (gen_ashr3 (scratch, scratch,
20037 GEN_INT (half_width - 1)));
20038 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
20039 scratch));
20040 }
20041 else
20042 {
20043 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
20044 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
20045
20046 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
20047 }
20048 }
20049 }
20050
20051 void
20052 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
20053 {
20054 rtx (*gen_lshr3)(rtx, rtx, rtx)
20055 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
20056 rtx (*gen_shrd)(rtx, rtx, rtx);
20057 int half_width = GET_MODE_BITSIZE (mode) >> 1;
20058
20059 rtx low[2], high[2];
20060 int count;
20061
20062 if (CONST_INT_P (operands[2]))
20063 {
20064 split_double_mode (mode, operands, 2, low, high);
20065 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
20066
20067 if (count >= half_width)
20068 {
20069 emit_move_insn (low[0], high[1]);
20070 ix86_expand_clear (high[0]);
20071
20072 if (count > half_width)
20073 emit_insn (gen_lshr3 (low[0], low[0],
20074 GEN_INT (count - half_width)));
20075 }
20076 else
20077 {
20078 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
20079
20080 if (!rtx_equal_p (operands[0], operands[1]))
20081 emit_move_insn (operands[0], operands[1]);
20082
20083 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
20084 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
20085 }
20086 }
20087 else
20088 {
20089 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
20090
20091 if (!rtx_equal_p (operands[0], operands[1]))
20092 emit_move_insn (operands[0], operands[1]);
20093
20094 split_double_mode (mode, operands, 1, low, high);
20095
20096 emit_insn (gen_shrd (low[0], high[0], operands[2]));
20097 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
20098
20099 if (TARGET_CMOVE && scratch)
20100 {
20101 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
20102 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
20103
20104 ix86_expand_clear (scratch);
20105 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
20106 scratch));
20107 }
20108 else
20109 {
20110 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
20111 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
20112
20113 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
20114 }
20115 }
20116 }
20117
20118 /* Predict just emitted jump instruction to be taken with probability PROB. */
20119 static void
20120 predict_jump (int prob)
20121 {
20122 rtx insn = get_last_insn ();
20123 gcc_assert (JUMP_P (insn));
20124 add_reg_note (insn, REG_BR_PROB, GEN_INT (prob));
20125 }
20126
20127 /* Helper function for the string operations below. Dest VARIABLE whether
20128 it is aligned to VALUE bytes. If true, jump to the label. */
20129 static rtx
20130 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
20131 {
20132 rtx label = gen_label_rtx ();
20133 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
20134 if (GET_MODE (variable) == DImode)
20135 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
20136 else
20137 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
20138 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
20139 1, label);
20140 if (epilogue)
20141 predict_jump (REG_BR_PROB_BASE * 50 / 100);
20142 else
20143 predict_jump (REG_BR_PROB_BASE * 90 / 100);
20144 return label;
20145 }
20146
20147 /* Adjust COUNTER by the VALUE. */
20148 static void
20149 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
20150 {
20151 rtx (*gen_add)(rtx, rtx, rtx)
20152 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
20153
20154 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
20155 }
20156
20157 /* Zero extend possibly SImode EXP to Pmode register. */
20158 rtx
20159 ix86_zero_extend_to_Pmode (rtx exp)
20160 {
20161 rtx r;
20162 if (GET_MODE (exp) == VOIDmode)
20163 return force_reg (Pmode, exp);
20164 if (GET_MODE (exp) == Pmode)
20165 return copy_to_mode_reg (Pmode, exp);
20166 r = gen_reg_rtx (Pmode);
20167 emit_insn (gen_zero_extendsidi2 (r, exp));
20168 return r;
20169 }
20170
20171 /* Divide COUNTREG by SCALE. */
20172 static rtx
20173 scale_counter (rtx countreg, int scale)
20174 {
20175 rtx sc;
20176
20177 if (scale == 1)
20178 return countreg;
20179 if (CONST_INT_P (countreg))
20180 return GEN_INT (INTVAL (countreg) / scale);
20181 gcc_assert (REG_P (countreg));
20182
20183 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
20184 GEN_INT (exact_log2 (scale)),
20185 NULL, 1, OPTAB_DIRECT);
20186 return sc;
20187 }
20188
20189 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
20190 DImode for constant loop counts. */
20191
20192 static enum machine_mode
20193 counter_mode (rtx count_exp)
20194 {
20195 if (GET_MODE (count_exp) != VOIDmode)
20196 return GET_MODE (count_exp);
20197 if (!CONST_INT_P (count_exp))
20198 return Pmode;
20199 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
20200 return DImode;
20201 return SImode;
20202 }
20203
20204 /* When SRCPTR is non-NULL, output simple loop to move memory
20205 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
20206 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
20207 equivalent loop to set memory by VALUE (supposed to be in MODE).
20208
20209 The size is rounded down to whole number of chunk size moved at once.
20210 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
20211
20212
20213 static void
20214 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
20215 rtx destptr, rtx srcptr, rtx value,
20216 rtx count, enum machine_mode mode, int unroll,
20217 int expected_size)
20218 {
20219 rtx out_label, top_label, iter, tmp;
20220 enum machine_mode iter_mode = counter_mode (count);
20221 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
20222 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
20223 rtx size;
20224 rtx x_addr;
20225 rtx y_addr;
20226 int i;
20227
20228 top_label = gen_label_rtx ();
20229 out_label = gen_label_rtx ();
20230 iter = gen_reg_rtx (iter_mode);
20231
20232 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
20233 NULL, 1, OPTAB_DIRECT);
20234 /* Those two should combine. */
20235 if (piece_size == const1_rtx)
20236 {
20237 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
20238 true, out_label);
20239 predict_jump (REG_BR_PROB_BASE * 10 / 100);
20240 }
20241 emit_move_insn (iter, const0_rtx);
20242
20243 emit_label (top_label);
20244
20245 tmp = convert_modes (Pmode, iter_mode, iter, true);
20246 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
20247 destmem = change_address (destmem, mode, x_addr);
20248
20249 if (srcmem)
20250 {
20251 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
20252 srcmem = change_address (srcmem, mode, y_addr);
20253
20254 /* When unrolling for chips that reorder memory reads and writes,
20255 we can save registers by using single temporary.
20256 Also using 4 temporaries is overkill in 32bit mode. */
20257 if (!TARGET_64BIT && 0)
20258 {
20259 for (i = 0; i < unroll; i++)
20260 {
20261 if (i)
20262 {
20263 destmem =
20264 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
20265 srcmem =
20266 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
20267 }
20268 emit_move_insn (destmem, srcmem);
20269 }
20270 }
20271 else
20272 {
20273 rtx tmpreg[4];
20274 gcc_assert (unroll <= 4);
20275 for (i = 0; i < unroll; i++)
20276 {
20277 tmpreg[i] = gen_reg_rtx (mode);
20278 if (i)
20279 {
20280 srcmem =
20281 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
20282 }
20283 emit_move_insn (tmpreg[i], srcmem);
20284 }
20285 for (i = 0; i < unroll; i++)
20286 {
20287 if (i)
20288 {
20289 destmem =
20290 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
20291 }
20292 emit_move_insn (destmem, tmpreg[i]);
20293 }
20294 }
20295 }
20296 else
20297 for (i = 0; i < unroll; i++)
20298 {
20299 if (i)
20300 destmem =
20301 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
20302 emit_move_insn (destmem, value);
20303 }
20304
20305 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
20306 true, OPTAB_LIB_WIDEN);
20307 if (tmp != iter)
20308 emit_move_insn (iter, tmp);
20309
20310 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
20311 true, top_label);
20312 if (expected_size != -1)
20313 {
20314 expected_size /= GET_MODE_SIZE (mode) * unroll;
20315 if (expected_size == 0)
20316 predict_jump (0);
20317 else if (expected_size > REG_BR_PROB_BASE)
20318 predict_jump (REG_BR_PROB_BASE - 1);
20319 else
20320 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
20321 }
20322 else
20323 predict_jump (REG_BR_PROB_BASE * 80 / 100);
20324 iter = ix86_zero_extend_to_Pmode (iter);
20325 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
20326 true, OPTAB_LIB_WIDEN);
20327 if (tmp != destptr)
20328 emit_move_insn (destptr, tmp);
20329 if (srcptr)
20330 {
20331 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
20332 true, OPTAB_LIB_WIDEN);
20333 if (tmp != srcptr)
20334 emit_move_insn (srcptr, tmp);
20335 }
20336 emit_label (out_label);
20337 }
20338
20339 /* Output "rep; mov" instruction.
20340 Arguments have same meaning as for previous function */
20341 static void
20342 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
20343 rtx destptr, rtx srcptr,
20344 rtx count,
20345 enum machine_mode mode)
20346 {
20347 rtx destexp;
20348 rtx srcexp;
20349 rtx countreg;
20350 HOST_WIDE_INT rounded_count;
20351
20352 /* If the size is known, it is shorter to use rep movs. */
20353 if (mode == QImode && CONST_INT_P (count)
20354 && !(INTVAL (count) & 3))
20355 mode = SImode;
20356
20357 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
20358 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
20359 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
20360 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
20361 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
20362 if (mode != QImode)
20363 {
20364 destexp = gen_rtx_ASHIFT (Pmode, countreg,
20365 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
20366 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
20367 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
20368 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
20369 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
20370 }
20371 else
20372 {
20373 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
20374 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
20375 }
20376 if (CONST_INT_P (count))
20377 {
20378 rounded_count = (INTVAL (count)
20379 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
20380 destmem = shallow_copy_rtx (destmem);
20381 srcmem = shallow_copy_rtx (srcmem);
20382 set_mem_size (destmem, rounded_count);
20383 set_mem_size (srcmem, rounded_count);
20384 }
20385 else
20386 {
20387 if (MEM_SIZE_KNOWN_P (destmem))
20388 clear_mem_size (destmem);
20389 if (MEM_SIZE_KNOWN_P (srcmem))
20390 clear_mem_size (srcmem);
20391 }
20392 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
20393 destexp, srcexp));
20394 }
20395
20396 /* Output "rep; stos" instruction.
20397 Arguments have same meaning as for previous function */
20398 static void
20399 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
20400 rtx count, enum machine_mode mode,
20401 rtx orig_value)
20402 {
20403 rtx destexp;
20404 rtx countreg;
20405 HOST_WIDE_INT rounded_count;
20406
20407 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
20408 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
20409 value = force_reg (mode, gen_lowpart (mode, value));
20410 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
20411 if (mode != QImode)
20412 {
20413 destexp = gen_rtx_ASHIFT (Pmode, countreg,
20414 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
20415 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
20416 }
20417 else
20418 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
20419 if (orig_value == const0_rtx && CONST_INT_P (count))
20420 {
20421 rounded_count = (INTVAL (count)
20422 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
20423 destmem = shallow_copy_rtx (destmem);
20424 set_mem_size (destmem, rounded_count);
20425 }
20426 else if (MEM_SIZE_KNOWN_P (destmem))
20427 clear_mem_size (destmem);
20428 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
20429 }
20430
20431 static void
20432 emit_strmov (rtx destmem, rtx srcmem,
20433 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
20434 {
20435 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
20436 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
20437 emit_insn (gen_strmov (destptr, dest, srcptr, src));
20438 }
20439
20440 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
20441 static void
20442 expand_movmem_epilogue (rtx destmem, rtx srcmem,
20443 rtx destptr, rtx srcptr, rtx count, int max_size)
20444 {
20445 rtx src, dest;
20446 if (CONST_INT_P (count))
20447 {
20448 HOST_WIDE_INT countval = INTVAL (count);
20449 int offset = 0;
20450
20451 if ((countval & 0x10) && max_size > 16)
20452 {
20453 if (TARGET_64BIT)
20454 {
20455 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
20456 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
20457 }
20458 else
20459 gcc_unreachable ();
20460 offset += 16;
20461 }
20462 if ((countval & 0x08) && max_size > 8)
20463 {
20464 if (TARGET_64BIT)
20465 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
20466 else
20467 {
20468 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
20469 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
20470 }
20471 offset += 8;
20472 }
20473 if ((countval & 0x04) && max_size > 4)
20474 {
20475 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
20476 offset += 4;
20477 }
20478 if ((countval & 0x02) && max_size > 2)
20479 {
20480 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
20481 offset += 2;
20482 }
20483 if ((countval & 0x01) && max_size > 1)
20484 {
20485 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
20486 offset += 1;
20487 }
20488 return;
20489 }
20490 if (max_size > 8)
20491 {
20492 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
20493 count, 1, OPTAB_DIRECT);
20494 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
20495 count, QImode, 1, 4);
20496 return;
20497 }
20498
20499 /* When there are stringops, we can cheaply increase dest and src pointers.
20500 Otherwise we save code size by maintaining offset (zero is readily
20501 available from preceding rep operation) and using x86 addressing modes.
20502 */
20503 if (TARGET_SINGLE_STRINGOP)
20504 {
20505 if (max_size > 4)
20506 {
20507 rtx label = ix86_expand_aligntest (count, 4, true);
20508 src = change_address (srcmem, SImode, srcptr);
20509 dest = change_address (destmem, SImode, destptr);
20510 emit_insn (gen_strmov (destptr, dest, srcptr, src));
20511 emit_label (label);
20512 LABEL_NUSES (label) = 1;
20513 }
20514 if (max_size > 2)
20515 {
20516 rtx label = ix86_expand_aligntest (count, 2, true);
20517 src = change_address (srcmem, HImode, srcptr);
20518 dest = change_address (destmem, HImode, destptr);
20519 emit_insn (gen_strmov (destptr, dest, srcptr, src));
20520 emit_label (label);
20521 LABEL_NUSES (label) = 1;
20522 }
20523 if (max_size > 1)
20524 {
20525 rtx label = ix86_expand_aligntest (count, 1, true);
20526 src = change_address (srcmem, QImode, srcptr);
20527 dest = change_address (destmem, QImode, destptr);
20528 emit_insn (gen_strmov (destptr, dest, srcptr, src));
20529 emit_label (label);
20530 LABEL_NUSES (label) = 1;
20531 }
20532 }
20533 else
20534 {
20535 rtx offset = force_reg (Pmode, const0_rtx);
20536 rtx tmp;
20537
20538 if (max_size > 4)
20539 {
20540 rtx label = ix86_expand_aligntest (count, 4, true);
20541 src = change_address (srcmem, SImode, srcptr);
20542 dest = change_address (destmem, SImode, destptr);
20543 emit_move_insn (dest, src);
20544 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
20545 true, OPTAB_LIB_WIDEN);
20546 if (tmp != offset)
20547 emit_move_insn (offset, tmp);
20548 emit_label (label);
20549 LABEL_NUSES (label) = 1;
20550 }
20551 if (max_size > 2)
20552 {
20553 rtx label = ix86_expand_aligntest (count, 2, true);
20554 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
20555 src = change_address (srcmem, HImode, tmp);
20556 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
20557 dest = change_address (destmem, HImode, tmp);
20558 emit_move_insn (dest, src);
20559 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
20560 true, OPTAB_LIB_WIDEN);
20561 if (tmp != offset)
20562 emit_move_insn (offset, tmp);
20563 emit_label (label);
20564 LABEL_NUSES (label) = 1;
20565 }
20566 if (max_size > 1)
20567 {
20568 rtx label = ix86_expand_aligntest (count, 1, true);
20569 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
20570 src = change_address (srcmem, QImode, tmp);
20571 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
20572 dest = change_address (destmem, QImode, tmp);
20573 emit_move_insn (dest, src);
20574 emit_label (label);
20575 LABEL_NUSES (label) = 1;
20576 }
20577 }
20578 }
20579
20580 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
20581 static void
20582 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
20583 rtx count, int max_size)
20584 {
20585 count =
20586 expand_simple_binop (counter_mode (count), AND, count,
20587 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
20588 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
20589 gen_lowpart (QImode, value), count, QImode,
20590 1, max_size / 2);
20591 }
20592
20593 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
20594 static void
20595 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
20596 {
20597 rtx dest;
20598
20599 if (CONST_INT_P (count))
20600 {
20601 HOST_WIDE_INT countval = INTVAL (count);
20602 int offset = 0;
20603
20604 if ((countval & 0x10) && max_size > 16)
20605 {
20606 if (TARGET_64BIT)
20607 {
20608 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
20609 emit_insn (gen_strset (destptr, dest, value));
20610 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
20611 emit_insn (gen_strset (destptr, dest, value));
20612 }
20613 else
20614 gcc_unreachable ();
20615 offset += 16;
20616 }
20617 if ((countval & 0x08) && max_size > 8)
20618 {
20619 if (TARGET_64BIT)
20620 {
20621 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
20622 emit_insn (gen_strset (destptr, dest, value));
20623 }
20624 else
20625 {
20626 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
20627 emit_insn (gen_strset (destptr, dest, value));
20628 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
20629 emit_insn (gen_strset (destptr, dest, value));
20630 }
20631 offset += 8;
20632 }
20633 if ((countval & 0x04) && max_size > 4)
20634 {
20635 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
20636 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
20637 offset += 4;
20638 }
20639 if ((countval & 0x02) && max_size > 2)
20640 {
20641 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
20642 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
20643 offset += 2;
20644 }
20645 if ((countval & 0x01) && max_size > 1)
20646 {
20647 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
20648 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
20649 offset += 1;
20650 }
20651 return;
20652 }
20653 if (max_size > 32)
20654 {
20655 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
20656 return;
20657 }
20658 if (max_size > 16)
20659 {
20660 rtx label = ix86_expand_aligntest (count, 16, true);
20661 if (TARGET_64BIT)
20662 {
20663 dest = change_address (destmem, DImode, destptr);
20664 emit_insn (gen_strset (destptr, dest, value));
20665 emit_insn (gen_strset (destptr, dest, value));
20666 }
20667 else
20668 {
20669 dest = change_address (destmem, SImode, destptr);
20670 emit_insn (gen_strset (destptr, dest, value));
20671 emit_insn (gen_strset (destptr, dest, value));
20672 emit_insn (gen_strset (destptr, dest, value));
20673 emit_insn (gen_strset (destptr, dest, value));
20674 }
20675 emit_label (label);
20676 LABEL_NUSES (label) = 1;
20677 }
20678 if (max_size > 8)
20679 {
20680 rtx label = ix86_expand_aligntest (count, 8, true);
20681 if (TARGET_64BIT)
20682 {
20683 dest = change_address (destmem, DImode, destptr);
20684 emit_insn (gen_strset (destptr, dest, value));
20685 }
20686 else
20687 {
20688 dest = change_address (destmem, SImode, destptr);
20689 emit_insn (gen_strset (destptr, dest, value));
20690 emit_insn (gen_strset (destptr, dest, value));
20691 }
20692 emit_label (label);
20693 LABEL_NUSES (label) = 1;
20694 }
20695 if (max_size > 4)
20696 {
20697 rtx label = ix86_expand_aligntest (count, 4, true);
20698 dest = change_address (destmem, SImode, destptr);
20699 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
20700 emit_label (label);
20701 LABEL_NUSES (label) = 1;
20702 }
20703 if (max_size > 2)
20704 {
20705 rtx label = ix86_expand_aligntest (count, 2, true);
20706 dest = change_address (destmem, HImode, destptr);
20707 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
20708 emit_label (label);
20709 LABEL_NUSES (label) = 1;
20710 }
20711 if (max_size > 1)
20712 {
20713 rtx label = ix86_expand_aligntest (count, 1, true);
20714 dest = change_address (destmem, QImode, destptr);
20715 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
20716 emit_label (label);
20717 LABEL_NUSES (label) = 1;
20718 }
20719 }
20720
20721 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
20722 DESIRED_ALIGNMENT. */
20723 static void
20724 expand_movmem_prologue (rtx destmem, rtx srcmem,
20725 rtx destptr, rtx srcptr, rtx count,
20726 int align, int desired_alignment)
20727 {
20728 if (align <= 1 && desired_alignment > 1)
20729 {
20730 rtx label = ix86_expand_aligntest (destptr, 1, false);
20731 srcmem = change_address (srcmem, QImode, srcptr);
20732 destmem = change_address (destmem, QImode, destptr);
20733 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
20734 ix86_adjust_counter (count, 1);
20735 emit_label (label);
20736 LABEL_NUSES (label) = 1;
20737 }
20738 if (align <= 2 && desired_alignment > 2)
20739 {
20740 rtx label = ix86_expand_aligntest (destptr, 2, false);
20741 srcmem = change_address (srcmem, HImode, srcptr);
20742 destmem = change_address (destmem, HImode, destptr);
20743 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
20744 ix86_adjust_counter (count, 2);
20745 emit_label (label);
20746 LABEL_NUSES (label) = 1;
20747 }
20748 if (align <= 4 && desired_alignment > 4)
20749 {
20750 rtx label = ix86_expand_aligntest (destptr, 4, false);
20751 srcmem = change_address (srcmem, SImode, srcptr);
20752 destmem = change_address (destmem, SImode, destptr);
20753 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
20754 ix86_adjust_counter (count, 4);
20755 emit_label (label);
20756 LABEL_NUSES (label) = 1;
20757 }
20758 gcc_assert (desired_alignment <= 8);
20759 }
20760
20761 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
20762 ALIGN_BYTES is how many bytes need to be copied. */
20763 static rtx
20764 expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
20765 int desired_align, int align_bytes)
20766 {
20767 rtx src = *srcp;
20768 rtx orig_dst = dst;
20769 rtx orig_src = src;
20770 int off = 0;
20771 int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
20772 if (src_align_bytes >= 0)
20773 src_align_bytes = desired_align - src_align_bytes;
20774 if (align_bytes & 1)
20775 {
20776 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
20777 src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
20778 off = 1;
20779 emit_insn (gen_strmov (destreg, dst, srcreg, src));
20780 }
20781 if (align_bytes & 2)
20782 {
20783 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
20784 src = adjust_automodify_address_nv (src, HImode, srcreg, off);
20785 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
20786 set_mem_align (dst, 2 * BITS_PER_UNIT);
20787 if (src_align_bytes >= 0
20788 && (src_align_bytes & 1) == (align_bytes & 1)
20789 && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
20790 set_mem_align (src, 2 * BITS_PER_UNIT);
20791 off = 2;
20792 emit_insn (gen_strmov (destreg, dst, srcreg, src));
20793 }
20794 if (align_bytes & 4)
20795 {
20796 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
20797 src = adjust_automodify_address_nv (src, SImode, srcreg, off);
20798 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
20799 set_mem_align (dst, 4 * BITS_PER_UNIT);
20800 if (src_align_bytes >= 0)
20801 {
20802 unsigned int src_align = 0;
20803 if ((src_align_bytes & 3) == (align_bytes & 3))
20804 src_align = 4;
20805 else if ((src_align_bytes & 1) == (align_bytes & 1))
20806 src_align = 2;
20807 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
20808 set_mem_align (src, src_align * BITS_PER_UNIT);
20809 }
20810 off = 4;
20811 emit_insn (gen_strmov (destreg, dst, srcreg, src));
20812 }
20813 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
20814 src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
20815 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
20816 set_mem_align (dst, desired_align * BITS_PER_UNIT);
20817 if (src_align_bytes >= 0)
20818 {
20819 unsigned int src_align = 0;
20820 if ((src_align_bytes & 7) == (align_bytes & 7))
20821 src_align = 8;
20822 else if ((src_align_bytes & 3) == (align_bytes & 3))
20823 src_align = 4;
20824 else if ((src_align_bytes & 1) == (align_bytes & 1))
20825 src_align = 2;
20826 if (src_align > (unsigned int) desired_align)
20827 src_align = desired_align;
20828 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
20829 set_mem_align (src, src_align * BITS_PER_UNIT);
20830 }
20831 if (MEM_SIZE_KNOWN_P (orig_dst))
20832 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
20833 if (MEM_SIZE_KNOWN_P (orig_src))
20834 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
20835 *srcp = src;
20836 return dst;
20837 }
20838
20839 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
20840 DESIRED_ALIGNMENT. */
20841 static void
20842 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
20843 int align, int desired_alignment)
20844 {
20845 if (align <= 1 && desired_alignment > 1)
20846 {
20847 rtx label = ix86_expand_aligntest (destptr, 1, false);
20848 destmem = change_address (destmem, QImode, destptr);
20849 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
20850 ix86_adjust_counter (count, 1);
20851 emit_label (label);
20852 LABEL_NUSES (label) = 1;
20853 }
20854 if (align <= 2 && desired_alignment > 2)
20855 {
20856 rtx label = ix86_expand_aligntest (destptr, 2, false);
20857 destmem = change_address (destmem, HImode, destptr);
20858 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
20859 ix86_adjust_counter (count, 2);
20860 emit_label (label);
20861 LABEL_NUSES (label) = 1;
20862 }
20863 if (align <= 4 && desired_alignment > 4)
20864 {
20865 rtx label = ix86_expand_aligntest (destptr, 4, false);
20866 destmem = change_address (destmem, SImode, destptr);
20867 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
20868 ix86_adjust_counter (count, 4);
20869 emit_label (label);
20870 LABEL_NUSES (label) = 1;
20871 }
20872 gcc_assert (desired_alignment <= 8);
20873 }
20874
20875 /* Set enough from DST to align DST known to by aligned by ALIGN to
20876 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
20877 static rtx
20878 expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
20879 int desired_align, int align_bytes)
20880 {
20881 int off = 0;
20882 rtx orig_dst = dst;
20883 if (align_bytes & 1)
20884 {
20885 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
20886 off = 1;
20887 emit_insn (gen_strset (destreg, dst,
20888 gen_lowpart (QImode, value)));
20889 }
20890 if (align_bytes & 2)
20891 {
20892 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
20893 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
20894 set_mem_align (dst, 2 * BITS_PER_UNIT);
20895 off = 2;
20896 emit_insn (gen_strset (destreg, dst,
20897 gen_lowpart (HImode, value)));
20898 }
20899 if (align_bytes & 4)
20900 {
20901 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
20902 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
20903 set_mem_align (dst, 4 * BITS_PER_UNIT);
20904 off = 4;
20905 emit_insn (gen_strset (destreg, dst,
20906 gen_lowpart (SImode, value)));
20907 }
20908 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
20909 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
20910 set_mem_align (dst, desired_align * BITS_PER_UNIT);
20911 if (MEM_SIZE_KNOWN_P (orig_dst))
20912 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
20913 return dst;
20914 }
20915
20916 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
20917 static enum stringop_alg
20918 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
20919 int *dynamic_check)
20920 {
20921 const struct stringop_algs * algs;
20922 bool optimize_for_speed;
20923 /* Algorithms using the rep prefix want at least edi and ecx;
20924 additionally, memset wants eax and memcpy wants esi. Don't
20925 consider such algorithms if the user has appropriated those
20926 registers for their own purposes. */
20927 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
20928 || (memset
20929 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
20930
20931 #define ALG_USABLE_P(alg) (rep_prefix_usable \
20932 || (alg != rep_prefix_1_byte \
20933 && alg != rep_prefix_4_byte \
20934 && alg != rep_prefix_8_byte))
20935 const struct processor_costs *cost;
20936
20937 /* Even if the string operation call is cold, we still might spend a lot
20938 of time processing large blocks. */
20939 if (optimize_function_for_size_p (cfun)
20940 || (optimize_insn_for_size_p ()
20941 && expected_size != -1 && expected_size < 256))
20942 optimize_for_speed = false;
20943 else
20944 optimize_for_speed = true;
20945
20946 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
20947
20948 *dynamic_check = -1;
20949 if (memset)
20950 algs = &cost->memset[TARGET_64BIT != 0];
20951 else
20952 algs = &cost->memcpy[TARGET_64BIT != 0];
20953 if (ix86_stringop_alg != no_stringop && ALG_USABLE_P (ix86_stringop_alg))
20954 return ix86_stringop_alg;
20955 /* rep; movq or rep; movl is the smallest variant. */
20956 else if (!optimize_for_speed)
20957 {
20958 if (!count || (count & 3))
20959 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
20960 else
20961 return rep_prefix_usable ? rep_prefix_4_byte : loop;
20962 }
20963 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
20964 */
20965 else if (expected_size != -1 && expected_size < 4)
20966 return loop_1_byte;
20967 else if (expected_size != -1)
20968 {
20969 unsigned int i;
20970 enum stringop_alg alg = libcall;
20971 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
20972 {
20973 /* We get here if the algorithms that were not libcall-based
20974 were rep-prefix based and we are unable to use rep prefixes
20975 based on global register usage. Break out of the loop and
20976 use the heuristic below. */
20977 if (algs->size[i].max == 0)
20978 break;
20979 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
20980 {
20981 enum stringop_alg candidate = algs->size[i].alg;
20982
20983 if (candidate != libcall && ALG_USABLE_P (candidate))
20984 alg = candidate;
20985 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
20986 last non-libcall inline algorithm. */
20987 if (TARGET_INLINE_ALL_STRINGOPS)
20988 {
20989 /* When the current size is best to be copied by a libcall,
20990 but we are still forced to inline, run the heuristic below
20991 that will pick code for medium sized blocks. */
20992 if (alg != libcall)
20993 return alg;
20994 break;
20995 }
20996 else if (ALG_USABLE_P (candidate))
20997 return candidate;
20998 }
20999 }
21000 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
21001 }
21002 /* When asked to inline the call anyway, try to pick meaningful choice.
21003 We look for maximal size of block that is faster to copy by hand and
21004 take blocks of at most of that size guessing that average size will
21005 be roughly half of the block.
21006
21007 If this turns out to be bad, we might simply specify the preferred
21008 choice in ix86_costs. */
21009 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
21010 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
21011 {
21012 int max = -1;
21013 enum stringop_alg alg;
21014 int i;
21015 bool any_alg_usable_p = true;
21016
21017 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
21018 {
21019 enum stringop_alg candidate = algs->size[i].alg;
21020 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
21021
21022 if (candidate != libcall && candidate
21023 && ALG_USABLE_P (candidate))
21024 max = algs->size[i].max;
21025 }
21026 /* If there aren't any usable algorithms, then recursing on
21027 smaller sizes isn't going to find anything. Just return the
21028 simple byte-at-a-time copy loop. */
21029 if (!any_alg_usable_p)
21030 {
21031 /* Pick something reasonable. */
21032 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
21033 *dynamic_check = 128;
21034 return loop_1_byte;
21035 }
21036 if (max == -1)
21037 max = 4096;
21038 alg = decide_alg (count, max / 2, memset, dynamic_check);
21039 gcc_assert (*dynamic_check == -1);
21040 gcc_assert (alg != libcall);
21041 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
21042 *dynamic_check = max;
21043 return alg;
21044 }
21045 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
21046 #undef ALG_USABLE_P
21047 }
21048
21049 /* Decide on alignment. We know that the operand is already aligned to ALIGN
21050 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
21051 static int
21052 decide_alignment (int align,
21053 enum stringop_alg alg,
21054 int expected_size)
21055 {
21056 int desired_align = 0;
21057 switch (alg)
21058 {
21059 case no_stringop:
21060 gcc_unreachable ();
21061 case loop:
21062 case unrolled_loop:
21063 desired_align = GET_MODE_SIZE (Pmode);
21064 break;
21065 case rep_prefix_8_byte:
21066 desired_align = 8;
21067 break;
21068 case rep_prefix_4_byte:
21069 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
21070 copying whole cacheline at once. */
21071 if (TARGET_PENTIUMPRO)
21072 desired_align = 8;
21073 else
21074 desired_align = 4;
21075 break;
21076 case rep_prefix_1_byte:
21077 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
21078 copying whole cacheline at once. */
21079 if (TARGET_PENTIUMPRO)
21080 desired_align = 8;
21081 else
21082 desired_align = 1;
21083 break;
21084 case loop_1_byte:
21085 desired_align = 1;
21086 break;
21087 case libcall:
21088 return 0;
21089 }
21090
21091 if (optimize_size)
21092 desired_align = 1;
21093 if (desired_align < align)
21094 desired_align = align;
21095 if (expected_size != -1 && expected_size < 4)
21096 desired_align = align;
21097 return desired_align;
21098 }
21099
21100 /* Return the smallest power of 2 greater than VAL. */
21101 static int
21102 smallest_pow2_greater_than (int val)
21103 {
21104 int ret = 1;
21105 while (ret <= val)
21106 ret <<= 1;
21107 return ret;
21108 }
21109
21110 /* Expand string move (memcpy) operation. Use i386 string operations
21111 when profitable. expand_setmem contains similar code. The code
21112 depends upon architecture, block size and alignment, but always has
21113 the same overall structure:
21114
21115 1) Prologue guard: Conditional that jumps up to epilogues for small
21116 blocks that can be handled by epilogue alone. This is faster
21117 but also needed for correctness, since prologue assume the block
21118 is larger than the desired alignment.
21119
21120 Optional dynamic check for size and libcall for large
21121 blocks is emitted here too, with -minline-stringops-dynamically.
21122
21123 2) Prologue: copy first few bytes in order to get destination
21124 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
21125 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
21126 copied. We emit either a jump tree on power of two sized
21127 blocks, or a byte loop.
21128
21129 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
21130 with specified algorithm.
21131
21132 4) Epilogue: code copying tail of the block that is too small to be
21133 handled by main body (or up to size guarded by prologue guard). */
21134
21135 bool
21136 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
21137 rtx expected_align_exp, rtx expected_size_exp)
21138 {
21139 rtx destreg;
21140 rtx srcreg;
21141 rtx label = NULL;
21142 rtx tmp;
21143 rtx jump_around_label = NULL;
21144 HOST_WIDE_INT align = 1;
21145 unsigned HOST_WIDE_INT count = 0;
21146 HOST_WIDE_INT expected_size = -1;
21147 int size_needed = 0, epilogue_size_needed;
21148 int desired_align = 0, align_bytes = 0;
21149 enum stringop_alg alg;
21150 int dynamic_check;
21151 bool need_zero_guard = false;
21152
21153 if (CONST_INT_P (align_exp))
21154 align = INTVAL (align_exp);
21155 /* i386 can do misaligned access on reasonably increased cost. */
21156 if (CONST_INT_P (expected_align_exp)
21157 && INTVAL (expected_align_exp) > align)
21158 align = INTVAL (expected_align_exp);
21159 /* ALIGN is the minimum of destination and source alignment, but we care here
21160 just about destination alignment. */
21161 else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
21162 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
21163
21164 if (CONST_INT_P (count_exp))
21165 count = expected_size = INTVAL (count_exp);
21166 if (CONST_INT_P (expected_size_exp) && count == 0)
21167 expected_size = INTVAL (expected_size_exp);
21168
21169 /* Make sure we don't need to care about overflow later on. */
21170 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
21171 return false;
21172
21173 /* Step 0: Decide on preferred algorithm, desired alignment and
21174 size of chunks to be copied by main loop. */
21175
21176 alg = decide_alg (count, expected_size, false, &dynamic_check);
21177 desired_align = decide_alignment (align, alg, expected_size);
21178
21179 if (!TARGET_ALIGN_STRINGOPS)
21180 align = desired_align;
21181
21182 if (alg == libcall)
21183 return false;
21184 gcc_assert (alg != no_stringop);
21185 if (!count)
21186 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
21187 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
21188 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
21189 switch (alg)
21190 {
21191 case libcall:
21192 case no_stringop:
21193 gcc_unreachable ();
21194 case loop:
21195 need_zero_guard = true;
21196 size_needed = GET_MODE_SIZE (Pmode);
21197 break;
21198 case unrolled_loop:
21199 need_zero_guard = true;
21200 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
21201 break;
21202 case rep_prefix_8_byte:
21203 size_needed = 8;
21204 break;
21205 case rep_prefix_4_byte:
21206 size_needed = 4;
21207 break;
21208 case rep_prefix_1_byte:
21209 size_needed = 1;
21210 break;
21211 case loop_1_byte:
21212 need_zero_guard = true;
21213 size_needed = 1;
21214 break;
21215 }
21216
21217 epilogue_size_needed = size_needed;
21218
21219 /* Step 1: Prologue guard. */
21220
21221 /* Alignment code needs count to be in register. */
21222 if (CONST_INT_P (count_exp) && desired_align > align)
21223 {
21224 if (INTVAL (count_exp) > desired_align
21225 && INTVAL (count_exp) > size_needed)
21226 {
21227 align_bytes
21228 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
21229 if (align_bytes <= 0)
21230 align_bytes = 0;
21231 else
21232 align_bytes = desired_align - align_bytes;
21233 }
21234 if (align_bytes == 0)
21235 count_exp = force_reg (counter_mode (count_exp), count_exp);
21236 }
21237 gcc_assert (desired_align >= 1 && align >= 1);
21238
21239 /* Ensure that alignment prologue won't copy past end of block. */
21240 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
21241 {
21242 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
21243 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
21244 Make sure it is power of 2. */
21245 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
21246
21247 if (count)
21248 {
21249 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
21250 {
21251 /* If main algorithm works on QImode, no epilogue is needed.
21252 For small sizes just don't align anything. */
21253 if (size_needed == 1)
21254 desired_align = align;
21255 else
21256 goto epilogue;
21257 }
21258 }
21259 else
21260 {
21261 label = gen_label_rtx ();
21262 emit_cmp_and_jump_insns (count_exp,
21263 GEN_INT (epilogue_size_needed),
21264 LTU, 0, counter_mode (count_exp), 1, label);
21265 if (expected_size == -1 || expected_size < epilogue_size_needed)
21266 predict_jump (REG_BR_PROB_BASE * 60 / 100);
21267 else
21268 predict_jump (REG_BR_PROB_BASE * 20 / 100);
21269 }
21270 }
21271
21272 /* Emit code to decide on runtime whether library call or inline should be
21273 used. */
21274 if (dynamic_check != -1)
21275 {
21276 if (CONST_INT_P (count_exp))
21277 {
21278 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
21279 {
21280 emit_block_move_via_libcall (dst, src, count_exp, false);
21281 count_exp = const0_rtx;
21282 goto epilogue;
21283 }
21284 }
21285 else
21286 {
21287 rtx hot_label = gen_label_rtx ();
21288 jump_around_label = gen_label_rtx ();
21289 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
21290 LEU, 0, GET_MODE (count_exp), 1, hot_label);
21291 predict_jump (REG_BR_PROB_BASE * 90 / 100);
21292 emit_block_move_via_libcall (dst, src, count_exp, false);
21293 emit_jump (jump_around_label);
21294 emit_label (hot_label);
21295 }
21296 }
21297
21298 /* Step 2: Alignment prologue. */
21299
21300 if (desired_align > align)
21301 {
21302 if (align_bytes == 0)
21303 {
21304 /* Except for the first move in epilogue, we no longer know
21305 constant offset in aliasing info. It don't seems to worth
21306 the pain to maintain it for the first move, so throw away
21307 the info early. */
21308 src = change_address (src, BLKmode, srcreg);
21309 dst = change_address (dst, BLKmode, destreg);
21310 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
21311 desired_align);
21312 }
21313 else
21314 {
21315 /* If we know how many bytes need to be stored before dst is
21316 sufficiently aligned, maintain aliasing info accurately. */
21317 dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
21318 desired_align, align_bytes);
21319 count_exp = plus_constant (count_exp, -align_bytes);
21320 count -= align_bytes;
21321 }
21322 if (need_zero_guard
21323 && (count < (unsigned HOST_WIDE_INT) size_needed
21324 || (align_bytes == 0
21325 && count < ((unsigned HOST_WIDE_INT) size_needed
21326 + desired_align - align))))
21327 {
21328 /* It is possible that we copied enough so the main loop will not
21329 execute. */
21330 gcc_assert (size_needed > 1);
21331 if (label == NULL_RTX)
21332 label = gen_label_rtx ();
21333 emit_cmp_and_jump_insns (count_exp,
21334 GEN_INT (size_needed),
21335 LTU, 0, counter_mode (count_exp), 1, label);
21336 if (expected_size == -1
21337 || expected_size < (desired_align - align) / 2 + size_needed)
21338 predict_jump (REG_BR_PROB_BASE * 20 / 100);
21339 else
21340 predict_jump (REG_BR_PROB_BASE * 60 / 100);
21341 }
21342 }
21343 if (label && size_needed == 1)
21344 {
21345 emit_label (label);
21346 LABEL_NUSES (label) = 1;
21347 label = NULL;
21348 epilogue_size_needed = 1;
21349 }
21350 else if (label == NULL_RTX)
21351 epilogue_size_needed = size_needed;
21352
21353 /* Step 3: Main loop. */
21354
21355 switch (alg)
21356 {
21357 case libcall:
21358 case no_stringop:
21359 gcc_unreachable ();
21360 case loop_1_byte:
21361 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
21362 count_exp, QImode, 1, expected_size);
21363 break;
21364 case loop:
21365 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
21366 count_exp, Pmode, 1, expected_size);
21367 break;
21368 case unrolled_loop:
21369 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
21370 registers for 4 temporaries anyway. */
21371 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
21372 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
21373 expected_size);
21374 break;
21375 case rep_prefix_8_byte:
21376 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
21377 DImode);
21378 break;
21379 case rep_prefix_4_byte:
21380 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
21381 SImode);
21382 break;
21383 case rep_prefix_1_byte:
21384 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
21385 QImode);
21386 break;
21387 }
21388 /* Adjust properly the offset of src and dest memory for aliasing. */
21389 if (CONST_INT_P (count_exp))
21390 {
21391 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
21392 (count / size_needed) * size_needed);
21393 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
21394 (count / size_needed) * size_needed);
21395 }
21396 else
21397 {
21398 src = change_address (src, BLKmode, srcreg);
21399 dst = change_address (dst, BLKmode, destreg);
21400 }
21401
21402 /* Step 4: Epilogue to copy the remaining bytes. */
21403 epilogue:
21404 if (label)
21405 {
21406 /* When the main loop is done, COUNT_EXP might hold original count,
21407 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
21408 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
21409 bytes. Compensate if needed. */
21410
21411 if (size_needed < epilogue_size_needed)
21412 {
21413 tmp =
21414 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
21415 GEN_INT (size_needed - 1), count_exp, 1,
21416 OPTAB_DIRECT);
21417 if (tmp != count_exp)
21418 emit_move_insn (count_exp, tmp);
21419 }
21420 emit_label (label);
21421 LABEL_NUSES (label) = 1;
21422 }
21423
21424 if (count_exp != const0_rtx && epilogue_size_needed > 1)
21425 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
21426 epilogue_size_needed);
21427 if (jump_around_label)
21428 emit_label (jump_around_label);
21429 return true;
21430 }
21431
21432 /* Helper function for memcpy. For QImode value 0xXY produce
21433 0xXYXYXYXY of wide specified by MODE. This is essentially
21434 a * 0x10101010, but we can do slightly better than
21435 synth_mult by unwinding the sequence by hand on CPUs with
21436 slow multiply. */
21437 static rtx
21438 promote_duplicated_reg (enum machine_mode mode, rtx val)
21439 {
21440 enum machine_mode valmode = GET_MODE (val);
21441 rtx tmp;
21442 int nops = mode == DImode ? 3 : 2;
21443
21444 gcc_assert (mode == SImode || mode == DImode);
21445 if (val == const0_rtx)
21446 return copy_to_mode_reg (mode, const0_rtx);
21447 if (CONST_INT_P (val))
21448 {
21449 HOST_WIDE_INT v = INTVAL (val) & 255;
21450
21451 v |= v << 8;
21452 v |= v << 16;
21453 if (mode == DImode)
21454 v |= (v << 16) << 16;
21455 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
21456 }
21457
21458 if (valmode == VOIDmode)
21459 valmode = QImode;
21460 if (valmode != QImode)
21461 val = gen_lowpart (QImode, val);
21462 if (mode == QImode)
21463 return val;
21464 if (!TARGET_PARTIAL_REG_STALL)
21465 nops--;
21466 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
21467 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
21468 <= (ix86_cost->shift_const + ix86_cost->add) * nops
21469 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
21470 {
21471 rtx reg = convert_modes (mode, QImode, val, true);
21472 tmp = promote_duplicated_reg (mode, const1_rtx);
21473 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
21474 OPTAB_DIRECT);
21475 }
21476 else
21477 {
21478 rtx reg = convert_modes (mode, QImode, val, true);
21479
21480 if (!TARGET_PARTIAL_REG_STALL)
21481 if (mode == SImode)
21482 emit_insn (gen_movsi_insv_1 (reg, reg));
21483 else
21484 emit_insn (gen_movdi_insv_1 (reg, reg));
21485 else
21486 {
21487 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
21488 NULL, 1, OPTAB_DIRECT);
21489 reg =
21490 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
21491 }
21492 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
21493 NULL, 1, OPTAB_DIRECT);
21494 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
21495 if (mode == SImode)
21496 return reg;
21497 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
21498 NULL, 1, OPTAB_DIRECT);
21499 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
21500 return reg;
21501 }
21502 }
21503
21504 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
21505 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
21506 alignment from ALIGN to DESIRED_ALIGN. */
21507 static rtx
21508 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
21509 {
21510 rtx promoted_val;
21511
21512 if (TARGET_64BIT
21513 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
21514 promoted_val = promote_duplicated_reg (DImode, val);
21515 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
21516 promoted_val = promote_duplicated_reg (SImode, val);
21517 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
21518 promoted_val = promote_duplicated_reg (HImode, val);
21519 else
21520 promoted_val = val;
21521
21522 return promoted_val;
21523 }
21524
21525 /* Expand string clear operation (bzero). Use i386 string operations when
21526 profitable. See expand_movmem comment for explanation of individual
21527 steps performed. */
21528 bool
21529 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
21530 rtx expected_align_exp, rtx expected_size_exp)
21531 {
21532 rtx destreg;
21533 rtx label = NULL;
21534 rtx tmp;
21535 rtx jump_around_label = NULL;
21536 HOST_WIDE_INT align = 1;
21537 unsigned HOST_WIDE_INT count = 0;
21538 HOST_WIDE_INT expected_size = -1;
21539 int size_needed = 0, epilogue_size_needed;
21540 int desired_align = 0, align_bytes = 0;
21541 enum stringop_alg alg;
21542 rtx promoted_val = NULL;
21543 bool force_loopy_epilogue = false;
21544 int dynamic_check;
21545 bool need_zero_guard = false;
21546
21547 if (CONST_INT_P (align_exp))
21548 align = INTVAL (align_exp);
21549 /* i386 can do misaligned access on reasonably increased cost. */
21550 if (CONST_INT_P (expected_align_exp)
21551 && INTVAL (expected_align_exp) > align)
21552 align = INTVAL (expected_align_exp);
21553 if (CONST_INT_P (count_exp))
21554 count = expected_size = INTVAL (count_exp);
21555 if (CONST_INT_P (expected_size_exp) && count == 0)
21556 expected_size = INTVAL (expected_size_exp);
21557
21558 /* Make sure we don't need to care about overflow later on. */
21559 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
21560 return false;
21561
21562 /* Step 0: Decide on preferred algorithm, desired alignment and
21563 size of chunks to be copied by main loop. */
21564
21565 alg = decide_alg (count, expected_size, true, &dynamic_check);
21566 desired_align = decide_alignment (align, alg, expected_size);
21567
21568 if (!TARGET_ALIGN_STRINGOPS)
21569 align = desired_align;
21570
21571 if (alg == libcall)
21572 return false;
21573 gcc_assert (alg != no_stringop);
21574 if (!count)
21575 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
21576 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
21577 switch (alg)
21578 {
21579 case libcall:
21580 case no_stringop:
21581 gcc_unreachable ();
21582 case loop:
21583 need_zero_guard = true;
21584 size_needed = GET_MODE_SIZE (Pmode);
21585 break;
21586 case unrolled_loop:
21587 need_zero_guard = true;
21588 size_needed = GET_MODE_SIZE (Pmode) * 4;
21589 break;
21590 case rep_prefix_8_byte:
21591 size_needed = 8;
21592 break;
21593 case rep_prefix_4_byte:
21594 size_needed = 4;
21595 break;
21596 case rep_prefix_1_byte:
21597 size_needed = 1;
21598 break;
21599 case loop_1_byte:
21600 need_zero_guard = true;
21601 size_needed = 1;
21602 break;
21603 }
21604 epilogue_size_needed = size_needed;
21605
21606 /* Step 1: Prologue guard. */
21607
21608 /* Alignment code needs count to be in register. */
21609 if (CONST_INT_P (count_exp) && desired_align > align)
21610 {
21611 if (INTVAL (count_exp) > desired_align
21612 && INTVAL (count_exp) > size_needed)
21613 {
21614 align_bytes
21615 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
21616 if (align_bytes <= 0)
21617 align_bytes = 0;
21618 else
21619 align_bytes = desired_align - align_bytes;
21620 }
21621 if (align_bytes == 0)
21622 {
21623 enum machine_mode mode = SImode;
21624 if (TARGET_64BIT && (count & ~0xffffffff))
21625 mode = DImode;
21626 count_exp = force_reg (mode, count_exp);
21627 }
21628 }
21629 /* Do the cheap promotion to allow better CSE across the
21630 main loop and epilogue (ie one load of the big constant in the
21631 front of all code. */
21632 if (CONST_INT_P (val_exp))
21633 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
21634 desired_align, align);
21635 /* Ensure that alignment prologue won't copy past end of block. */
21636 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
21637 {
21638 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
21639 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
21640 Make sure it is power of 2. */
21641 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
21642
21643 /* To improve performance of small blocks, we jump around the VAL
21644 promoting mode. This mean that if the promoted VAL is not constant,
21645 we might not use it in the epilogue and have to use byte
21646 loop variant. */
21647 if (epilogue_size_needed > 2 && !promoted_val)
21648 force_loopy_epilogue = true;
21649 if (count)
21650 {
21651 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
21652 {
21653 /* If main algorithm works on QImode, no epilogue is needed.
21654 For small sizes just don't align anything. */
21655 if (size_needed == 1)
21656 desired_align = align;
21657 else
21658 goto epilogue;
21659 }
21660 }
21661 else
21662 {
21663 label = gen_label_rtx ();
21664 emit_cmp_and_jump_insns (count_exp,
21665 GEN_INT (epilogue_size_needed),
21666 LTU, 0, counter_mode (count_exp), 1, label);
21667 if (expected_size == -1 || expected_size <= epilogue_size_needed)
21668 predict_jump (REG_BR_PROB_BASE * 60 / 100);
21669 else
21670 predict_jump (REG_BR_PROB_BASE * 20 / 100);
21671 }
21672 }
21673 if (dynamic_check != -1)
21674 {
21675 rtx hot_label = gen_label_rtx ();
21676 jump_around_label = gen_label_rtx ();
21677 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
21678 LEU, 0, counter_mode (count_exp), 1, hot_label);
21679 predict_jump (REG_BR_PROB_BASE * 90 / 100);
21680 set_storage_via_libcall (dst, count_exp, val_exp, false);
21681 emit_jump (jump_around_label);
21682 emit_label (hot_label);
21683 }
21684
21685 /* Step 2: Alignment prologue. */
21686
21687 /* Do the expensive promotion once we branched off the small blocks. */
21688 if (!promoted_val)
21689 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
21690 desired_align, align);
21691 gcc_assert (desired_align >= 1 && align >= 1);
21692
21693 if (desired_align > align)
21694 {
21695 if (align_bytes == 0)
21696 {
21697 /* Except for the first move in epilogue, we no longer know
21698 constant offset in aliasing info. It don't seems to worth
21699 the pain to maintain it for the first move, so throw away
21700 the info early. */
21701 dst = change_address (dst, BLKmode, destreg);
21702 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
21703 desired_align);
21704 }
21705 else
21706 {
21707 /* If we know how many bytes need to be stored before dst is
21708 sufficiently aligned, maintain aliasing info accurately. */
21709 dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
21710 desired_align, align_bytes);
21711 count_exp = plus_constant (count_exp, -align_bytes);
21712 count -= align_bytes;
21713 }
21714 if (need_zero_guard
21715 && (count < (unsigned HOST_WIDE_INT) size_needed
21716 || (align_bytes == 0
21717 && count < ((unsigned HOST_WIDE_INT) size_needed
21718 + desired_align - align))))
21719 {
21720 /* It is possible that we copied enough so the main loop will not
21721 execute. */
21722 gcc_assert (size_needed > 1);
21723 if (label == NULL_RTX)
21724 label = gen_label_rtx ();
21725 emit_cmp_and_jump_insns (count_exp,
21726 GEN_INT (size_needed),
21727 LTU, 0, counter_mode (count_exp), 1, label);
21728 if (expected_size == -1
21729 || expected_size < (desired_align - align) / 2 + size_needed)
21730 predict_jump (REG_BR_PROB_BASE * 20 / 100);
21731 else
21732 predict_jump (REG_BR_PROB_BASE * 60 / 100);
21733 }
21734 }
21735 if (label && size_needed == 1)
21736 {
21737 emit_label (label);
21738 LABEL_NUSES (label) = 1;
21739 label = NULL;
21740 promoted_val = val_exp;
21741 epilogue_size_needed = 1;
21742 }
21743 else if (label == NULL_RTX)
21744 epilogue_size_needed = size_needed;
21745
21746 /* Step 3: Main loop. */
21747
21748 switch (alg)
21749 {
21750 case libcall:
21751 case no_stringop:
21752 gcc_unreachable ();
21753 case loop_1_byte:
21754 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
21755 count_exp, QImode, 1, expected_size);
21756 break;
21757 case loop:
21758 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
21759 count_exp, Pmode, 1, expected_size);
21760 break;
21761 case unrolled_loop:
21762 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
21763 count_exp, Pmode, 4, expected_size);
21764 break;
21765 case rep_prefix_8_byte:
21766 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
21767 DImode, val_exp);
21768 break;
21769 case rep_prefix_4_byte:
21770 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
21771 SImode, val_exp);
21772 break;
21773 case rep_prefix_1_byte:
21774 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
21775 QImode, val_exp);
21776 break;
21777 }
21778 /* Adjust properly the offset of src and dest memory for aliasing. */
21779 if (CONST_INT_P (count_exp))
21780 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
21781 (count / size_needed) * size_needed);
21782 else
21783 dst = change_address (dst, BLKmode, destreg);
21784
21785 /* Step 4: Epilogue to copy the remaining bytes. */
21786
21787 if (label)
21788 {
21789 /* When the main loop is done, COUNT_EXP might hold original count,
21790 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
21791 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
21792 bytes. Compensate if needed. */
21793
21794 if (size_needed < epilogue_size_needed)
21795 {
21796 tmp =
21797 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
21798 GEN_INT (size_needed - 1), count_exp, 1,
21799 OPTAB_DIRECT);
21800 if (tmp != count_exp)
21801 emit_move_insn (count_exp, tmp);
21802 }
21803 emit_label (label);
21804 LABEL_NUSES (label) = 1;
21805 }
21806 epilogue:
21807 if (count_exp != const0_rtx && epilogue_size_needed > 1)
21808 {
21809 if (force_loopy_epilogue)
21810 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
21811 epilogue_size_needed);
21812 else
21813 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
21814 epilogue_size_needed);
21815 }
21816 if (jump_around_label)
21817 emit_label (jump_around_label);
21818 return true;
21819 }
21820
21821 /* Expand the appropriate insns for doing strlen if not just doing
21822 repnz; scasb
21823
21824 out = result, initialized with the start address
21825 align_rtx = alignment of the address.
21826 scratch = scratch register, initialized with the startaddress when
21827 not aligned, otherwise undefined
21828
21829 This is just the body. It needs the initializations mentioned above and
21830 some address computing at the end. These things are done in i386.md. */
21831
21832 static void
21833 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
21834 {
21835 int align;
21836 rtx tmp;
21837 rtx align_2_label = NULL_RTX;
21838 rtx align_3_label = NULL_RTX;
21839 rtx align_4_label = gen_label_rtx ();
21840 rtx end_0_label = gen_label_rtx ();
21841 rtx mem;
21842 rtx tmpreg = gen_reg_rtx (SImode);
21843 rtx scratch = gen_reg_rtx (SImode);
21844 rtx cmp;
21845
21846 align = 0;
21847 if (CONST_INT_P (align_rtx))
21848 align = INTVAL (align_rtx);
21849
21850 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
21851
21852 /* Is there a known alignment and is it less than 4? */
21853 if (align < 4)
21854 {
21855 rtx scratch1 = gen_reg_rtx (Pmode);
21856 emit_move_insn (scratch1, out);
21857 /* Is there a known alignment and is it not 2? */
21858 if (align != 2)
21859 {
21860 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
21861 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
21862
21863 /* Leave just the 3 lower bits. */
21864 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
21865 NULL_RTX, 0, OPTAB_WIDEN);
21866
21867 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
21868 Pmode, 1, align_4_label);
21869 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
21870 Pmode, 1, align_2_label);
21871 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
21872 Pmode, 1, align_3_label);
21873 }
21874 else
21875 {
21876 /* Since the alignment is 2, we have to check 2 or 0 bytes;
21877 check if is aligned to 4 - byte. */
21878
21879 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
21880 NULL_RTX, 0, OPTAB_WIDEN);
21881
21882 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
21883 Pmode, 1, align_4_label);
21884 }
21885
21886 mem = change_address (src, QImode, out);
21887
21888 /* Now compare the bytes. */
21889
21890 /* Compare the first n unaligned byte on a byte per byte basis. */
21891 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
21892 QImode, 1, end_0_label);
21893
21894 /* Increment the address. */
21895 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
21896
21897 /* Not needed with an alignment of 2 */
21898 if (align != 2)
21899 {
21900 emit_label (align_2_label);
21901
21902 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
21903 end_0_label);
21904
21905 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
21906
21907 emit_label (align_3_label);
21908 }
21909
21910 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
21911 end_0_label);
21912
21913 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
21914 }
21915
21916 /* Generate loop to check 4 bytes at a time. It is not a good idea to
21917 align this loop. It gives only huge programs, but does not help to
21918 speed up. */
21919 emit_label (align_4_label);
21920
21921 mem = change_address (src, SImode, out);
21922 emit_move_insn (scratch, mem);
21923 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
21924
21925 /* This formula yields a nonzero result iff one of the bytes is zero.
21926 This saves three branches inside loop and many cycles. */
21927
21928 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
21929 emit_insn (gen_one_cmplsi2 (scratch, scratch));
21930 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
21931 emit_insn (gen_andsi3 (tmpreg, tmpreg,
21932 gen_int_mode (0x80808080, SImode)));
21933 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
21934 align_4_label);
21935
21936 if (TARGET_CMOVE)
21937 {
21938 rtx reg = gen_reg_rtx (SImode);
21939 rtx reg2 = gen_reg_rtx (Pmode);
21940 emit_move_insn (reg, tmpreg);
21941 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
21942
21943 /* If zero is not in the first two bytes, move two bytes forward. */
21944 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
21945 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
21946 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
21947 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
21948 gen_rtx_IF_THEN_ELSE (SImode, tmp,
21949 reg,
21950 tmpreg)));
21951 /* Emit lea manually to avoid clobbering of flags. */
21952 emit_insn (gen_rtx_SET (SImode, reg2,
21953 gen_rtx_PLUS (Pmode, out, const2_rtx)));
21954
21955 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
21956 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
21957 emit_insn (gen_rtx_SET (VOIDmode, out,
21958 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
21959 reg2,
21960 out)));
21961 }
21962 else
21963 {
21964 rtx end_2_label = gen_label_rtx ();
21965 /* Is zero in the first two bytes? */
21966
21967 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
21968 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
21969 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
21970 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
21971 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
21972 pc_rtx);
21973 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
21974 JUMP_LABEL (tmp) = end_2_label;
21975
21976 /* Not in the first two. Move two bytes forward. */
21977 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
21978 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
21979
21980 emit_label (end_2_label);
21981
21982 }
21983
21984 /* Avoid branch in fixing the byte. */
21985 tmpreg = gen_lowpart (QImode, tmpreg);
21986 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
21987 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
21988 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
21989 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
21990
21991 emit_label (end_0_label);
21992 }
21993
21994 /* Expand strlen. */
21995
21996 bool
21997 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
21998 {
21999 rtx addr, scratch1, scratch2, scratch3, scratch4;
22000
22001 /* The generic case of strlen expander is long. Avoid it's
22002 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
22003
22004 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
22005 && !TARGET_INLINE_ALL_STRINGOPS
22006 && !optimize_insn_for_size_p ()
22007 && (!CONST_INT_P (align) || INTVAL (align) < 4))
22008 return false;
22009
22010 addr = force_reg (Pmode, XEXP (src, 0));
22011 scratch1 = gen_reg_rtx (Pmode);
22012
22013 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
22014 && !optimize_insn_for_size_p ())
22015 {
22016 /* Well it seems that some optimizer does not combine a call like
22017 foo(strlen(bar), strlen(bar));
22018 when the move and the subtraction is done here. It does calculate
22019 the length just once when these instructions are done inside of
22020 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
22021 often used and I use one fewer register for the lifetime of
22022 output_strlen_unroll() this is better. */
22023
22024 emit_move_insn (out, addr);
22025
22026 ix86_expand_strlensi_unroll_1 (out, src, align);
22027
22028 /* strlensi_unroll_1 returns the address of the zero at the end of
22029 the string, like memchr(), so compute the length by subtracting
22030 the start address. */
22031 emit_insn (ix86_gen_sub3 (out, out, addr));
22032 }
22033 else
22034 {
22035 rtx unspec;
22036
22037 /* Can't use this if the user has appropriated eax, ecx, or edi. */
22038 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
22039 return false;
22040
22041 scratch2 = gen_reg_rtx (Pmode);
22042 scratch3 = gen_reg_rtx (Pmode);
22043 scratch4 = force_reg (Pmode, constm1_rtx);
22044
22045 emit_move_insn (scratch3, addr);
22046 eoschar = force_reg (QImode, eoschar);
22047
22048 src = replace_equiv_address_nv (src, scratch3);
22049
22050 /* If .md starts supporting :P, this can be done in .md. */
22051 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
22052 scratch4), UNSPEC_SCAS);
22053 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
22054 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
22055 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
22056 }
22057 return true;
22058 }
22059
22060 /* For given symbol (function) construct code to compute address of it's PLT
22061 entry in large x86-64 PIC model. */
22062 rtx
22063 construct_plt_address (rtx symbol)
22064 {
22065 rtx tmp = gen_reg_rtx (Pmode);
22066 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
22067
22068 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
22069 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
22070
22071 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
22072 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
22073 return tmp;
22074 }
22075
22076 rtx
22077 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
22078 rtx callarg2,
22079 rtx pop, bool sibcall)
22080 {
22081 /* We need to represent that SI and DI registers are clobbered
22082 by SYSV calls. */
22083 static int clobbered_registers[] = {
22084 XMM6_REG, XMM7_REG, XMM8_REG,
22085 XMM9_REG, XMM10_REG, XMM11_REG,
22086 XMM12_REG, XMM13_REG, XMM14_REG,
22087 XMM15_REG, SI_REG, DI_REG
22088 };
22089 rtx vec[ARRAY_SIZE (clobbered_registers) + 3];
22090 rtx use = NULL, call;
22091 unsigned int vec_len;
22092
22093 if (pop == const0_rtx)
22094 pop = NULL;
22095 gcc_assert (!TARGET_64BIT || !pop);
22096
22097 if (TARGET_MACHO && !TARGET_64BIT)
22098 {
22099 #if TARGET_MACHO
22100 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
22101 fnaddr = machopic_indirect_call_target (fnaddr);
22102 #endif
22103 }
22104 else
22105 {
22106 /* Static functions and indirect calls don't need the pic register. */
22107 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
22108 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
22109 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
22110 use_reg (&use, pic_offset_table_rtx);
22111 }
22112
22113 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
22114 {
22115 rtx al = gen_rtx_REG (QImode, AX_REG);
22116 emit_move_insn (al, callarg2);
22117 use_reg (&use, al);
22118 }
22119
22120 if (ix86_cmodel == CM_LARGE_PIC
22121 && MEM_P (fnaddr)
22122 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
22123 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
22124 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
22125 else if (sibcall
22126 ? !sibcall_insn_operand (XEXP (fnaddr, 0), Pmode)
22127 : !call_insn_operand (XEXP (fnaddr, 0), Pmode))
22128 {
22129 fnaddr = XEXP (fnaddr, 0);
22130 if (GET_MODE (fnaddr) != Pmode)
22131 fnaddr = convert_to_mode (Pmode, fnaddr, 1);
22132 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (Pmode, fnaddr));
22133 }
22134
22135 vec_len = 0;
22136 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
22137 if (retval)
22138 call = gen_rtx_SET (VOIDmode, retval, call);
22139 vec[vec_len++] = call;
22140
22141 if (pop)
22142 {
22143 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
22144 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
22145 vec[vec_len++] = pop;
22146 }
22147
22148 if (TARGET_64BIT_MS_ABI
22149 && (!callarg2 || INTVAL (callarg2) != -2))
22150 {
22151 unsigned i;
22152
22153 vec[vec_len++] = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
22154 UNSPEC_MS_TO_SYSV_CALL);
22155
22156 for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
22157 vec[vec_len++]
22158 = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
22159 ? TImode : DImode,
22160 gen_rtx_REG (SSE_REGNO_P (clobbered_registers[i])
22161 ? TImode : DImode,
22162 clobbered_registers[i]));
22163 }
22164
22165 /* Add UNSPEC_CALL_NEEDS_VZEROUPPER decoration. */
22166 if (TARGET_VZEROUPPER)
22167 {
22168 int avx256;
22169 if (cfun->machine->callee_pass_avx256_p)
22170 {
22171 if (cfun->machine->callee_return_avx256_p)
22172 avx256 = callee_return_pass_avx256;
22173 else
22174 avx256 = callee_pass_avx256;
22175 }
22176 else if (cfun->machine->callee_return_avx256_p)
22177 avx256 = callee_return_avx256;
22178 else
22179 avx256 = call_no_avx256;
22180
22181 if (reload_completed)
22182 emit_insn (gen_avx_vzeroupper (GEN_INT (avx256)));
22183 else
22184 vec[vec_len++] = gen_rtx_UNSPEC (VOIDmode,
22185 gen_rtvec (1, GEN_INT (avx256)),
22186 UNSPEC_CALL_NEEDS_VZEROUPPER);
22187 }
22188
22189 if (vec_len > 1)
22190 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
22191 call = emit_call_insn (call);
22192 if (use)
22193 CALL_INSN_FUNCTION_USAGE (call) = use;
22194
22195 return call;
22196 }
22197
22198 void
22199 ix86_split_call_vzeroupper (rtx insn, rtx vzeroupper)
22200 {
22201 rtx pat = PATTERN (insn);
22202 rtvec vec = XVEC (pat, 0);
22203 int len = GET_NUM_ELEM (vec) - 1;
22204
22205 /* Strip off the last entry of the parallel. */
22206 gcc_assert (GET_CODE (RTVEC_ELT (vec, len)) == UNSPEC);
22207 gcc_assert (XINT (RTVEC_ELT (vec, len), 1) == UNSPEC_CALL_NEEDS_VZEROUPPER);
22208 if (len == 1)
22209 pat = RTVEC_ELT (vec, 0);
22210 else
22211 pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (len, &RTVEC_ELT (vec, 0)));
22212
22213 emit_insn (gen_avx_vzeroupper (vzeroupper));
22214 emit_call_insn (pat);
22215 }
22216
22217 /* Output the assembly for a call instruction. */
22218
22219 const char *
22220 ix86_output_call_insn (rtx insn, rtx call_op)
22221 {
22222 bool direct_p = constant_call_address_operand (call_op, Pmode);
22223 bool seh_nop_p = false;
22224 const char *xasm;
22225
22226 if (SIBLING_CALL_P (insn))
22227 {
22228 if (direct_p)
22229 xasm = "jmp\t%P0";
22230 /* SEH epilogue detection requires the indirect branch case
22231 to include REX.W. */
22232 else if (TARGET_SEH)
22233 xasm = "rex.W jmp %A0";
22234 else
22235 xasm = "jmp\t%A0";
22236
22237 output_asm_insn (xasm, &call_op);
22238 return "";
22239 }
22240
22241 /* SEH unwinding can require an extra nop to be emitted in several
22242 circumstances. Determine if we have one of those. */
22243 if (TARGET_SEH)
22244 {
22245 rtx i;
22246
22247 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
22248 {
22249 /* If we get to another real insn, we don't need the nop. */
22250 if (INSN_P (i))
22251 break;
22252
22253 /* If we get to the epilogue note, prevent a catch region from
22254 being adjacent to the standard epilogue sequence. If non-
22255 call-exceptions, we'll have done this during epilogue emission. */
22256 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
22257 && !flag_non_call_exceptions
22258 && !can_throw_internal (insn))
22259 {
22260 seh_nop_p = true;
22261 break;
22262 }
22263 }
22264
22265 /* If we didn't find a real insn following the call, prevent the
22266 unwinder from looking into the next function. */
22267 if (i == NULL)
22268 seh_nop_p = true;
22269 }
22270
22271 if (direct_p)
22272 xasm = "call\t%P0";
22273 else
22274 xasm = "call\t%A0";
22275
22276 output_asm_insn (xasm, &call_op);
22277
22278 if (seh_nop_p)
22279 return "nop";
22280
22281 return "";
22282 }
22283 \f
22284 /* Clear stack slot assignments remembered from previous functions.
22285 This is called from INIT_EXPANDERS once before RTL is emitted for each
22286 function. */
22287
22288 static struct machine_function *
22289 ix86_init_machine_status (void)
22290 {
22291 struct machine_function *f;
22292
22293 f = ggc_alloc_cleared_machine_function ();
22294 f->use_fast_prologue_epilogue_nregs = -1;
22295 f->tls_descriptor_call_expanded_p = 0;
22296 f->call_abi = ix86_abi;
22297
22298 return f;
22299 }
22300
22301 /* Return a MEM corresponding to a stack slot with mode MODE.
22302 Allocate a new slot if necessary.
22303
22304 The RTL for a function can have several slots available: N is
22305 which slot to use. */
22306
22307 rtx
22308 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
22309 {
22310 struct stack_local_entry *s;
22311
22312 gcc_assert (n < MAX_386_STACK_LOCALS);
22313
22314 /* Virtual slot is valid only before vregs are instantiated. */
22315 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
22316
22317 for (s = ix86_stack_locals; s; s = s->next)
22318 if (s->mode == mode && s->n == n)
22319 return validize_mem (copy_rtx (s->rtl));
22320
22321 s = ggc_alloc_stack_local_entry ();
22322 s->n = n;
22323 s->mode = mode;
22324 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
22325
22326 s->next = ix86_stack_locals;
22327 ix86_stack_locals = s;
22328 return validize_mem (s->rtl);
22329 }
22330 \f
22331 /* Calculate the length of the memory address in the instruction encoding.
22332 Includes addr32 prefix, does not include the one-byte modrm, opcode,
22333 or other prefixes. */
22334
22335 int
22336 memory_address_length (rtx addr)
22337 {
22338 struct ix86_address parts;
22339 rtx base, index, disp;
22340 int len;
22341 int ok;
22342
22343 if (GET_CODE (addr) == PRE_DEC
22344 || GET_CODE (addr) == POST_INC
22345 || GET_CODE (addr) == PRE_MODIFY
22346 || GET_CODE (addr) == POST_MODIFY)
22347 return 0;
22348
22349 ok = ix86_decompose_address (addr, &parts);
22350 gcc_assert (ok);
22351
22352 if (parts.base && GET_CODE (parts.base) == SUBREG)
22353 parts.base = SUBREG_REG (parts.base);
22354 if (parts.index && GET_CODE (parts.index) == SUBREG)
22355 parts.index = SUBREG_REG (parts.index);
22356
22357 base = parts.base;
22358 index = parts.index;
22359 disp = parts.disp;
22360
22361 /* Add length of addr32 prefix. */
22362 len = (GET_CODE (addr) == ZERO_EXTEND
22363 || GET_CODE (addr) == AND);
22364
22365 /* Rule of thumb:
22366 - esp as the base always wants an index,
22367 - ebp as the base always wants a displacement,
22368 - r12 as the base always wants an index,
22369 - r13 as the base always wants a displacement. */
22370
22371 /* Register Indirect. */
22372 if (base && !index && !disp)
22373 {
22374 /* esp (for its index) and ebp (for its displacement) need
22375 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
22376 code. */
22377 if (REG_P (addr)
22378 && (addr == arg_pointer_rtx
22379 || addr == frame_pointer_rtx
22380 || REGNO (addr) == SP_REG
22381 || REGNO (addr) == BP_REG
22382 || REGNO (addr) == R12_REG
22383 || REGNO (addr) == R13_REG))
22384 len = 1;
22385 }
22386
22387 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
22388 is not disp32, but disp32(%rip), so for disp32
22389 SIB byte is needed, unless print_operand_address
22390 optimizes it into disp32(%rip) or (%rip) is implied
22391 by UNSPEC. */
22392 else if (disp && !base && !index)
22393 {
22394 len = 4;
22395 if (TARGET_64BIT)
22396 {
22397 rtx symbol = disp;
22398
22399 if (GET_CODE (disp) == CONST)
22400 symbol = XEXP (disp, 0);
22401 if (GET_CODE (symbol) == PLUS
22402 && CONST_INT_P (XEXP (symbol, 1)))
22403 symbol = XEXP (symbol, 0);
22404
22405 if (GET_CODE (symbol) != LABEL_REF
22406 && (GET_CODE (symbol) != SYMBOL_REF
22407 || SYMBOL_REF_TLS_MODEL (symbol) != 0)
22408 && (GET_CODE (symbol) != UNSPEC
22409 || (XINT (symbol, 1) != UNSPEC_GOTPCREL
22410 && XINT (symbol, 1) != UNSPEC_PCREL
22411 && XINT (symbol, 1) != UNSPEC_GOTNTPOFF)))
22412 len += 1;
22413 }
22414 }
22415
22416 else
22417 {
22418 /* Find the length of the displacement constant. */
22419 if (disp)
22420 {
22421 if (base && satisfies_constraint_K (disp))
22422 len = 1;
22423 else
22424 len = 4;
22425 }
22426 /* ebp always wants a displacement. Similarly r13. */
22427 else if (base && REG_P (base)
22428 && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
22429 len = 1;
22430
22431 /* An index requires the two-byte modrm form.... */
22432 if (index
22433 /* ...like esp (or r12), which always wants an index. */
22434 || base == arg_pointer_rtx
22435 || base == frame_pointer_rtx
22436 || (base && REG_P (base)
22437 && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
22438 len += 1;
22439 }
22440
22441 switch (parts.seg)
22442 {
22443 case SEG_FS:
22444 case SEG_GS:
22445 len += 1;
22446 break;
22447 default:
22448 break;
22449 }
22450
22451 return len;
22452 }
22453
22454 /* Compute default value for "length_immediate" attribute. When SHORTFORM
22455 is set, expect that insn have 8bit immediate alternative. */
22456 int
22457 ix86_attr_length_immediate_default (rtx insn, bool shortform)
22458 {
22459 int len = 0;
22460 int i;
22461 extract_insn_cached (insn);
22462 for (i = recog_data.n_operands - 1; i >= 0; --i)
22463 if (CONSTANT_P (recog_data.operand[i]))
22464 {
22465 enum attr_mode mode = get_attr_mode (insn);
22466
22467 gcc_assert (!len);
22468 if (shortform && CONST_INT_P (recog_data.operand[i]))
22469 {
22470 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
22471 switch (mode)
22472 {
22473 case MODE_QI:
22474 len = 1;
22475 continue;
22476 case MODE_HI:
22477 ival = trunc_int_for_mode (ival, HImode);
22478 break;
22479 case MODE_SI:
22480 ival = trunc_int_for_mode (ival, SImode);
22481 break;
22482 default:
22483 break;
22484 }
22485 if (IN_RANGE (ival, -128, 127))
22486 {
22487 len = 1;
22488 continue;
22489 }
22490 }
22491 switch (mode)
22492 {
22493 case MODE_QI:
22494 len = 1;
22495 break;
22496 case MODE_HI:
22497 len = 2;
22498 break;
22499 case MODE_SI:
22500 len = 4;
22501 break;
22502 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
22503 case MODE_DI:
22504 len = 4;
22505 break;
22506 default:
22507 fatal_insn ("unknown insn mode", insn);
22508 }
22509 }
22510 return len;
22511 }
22512 /* Compute default value for "length_address" attribute. */
22513 int
22514 ix86_attr_length_address_default (rtx insn)
22515 {
22516 int i;
22517
22518 if (get_attr_type (insn) == TYPE_LEA)
22519 {
22520 rtx set = PATTERN (insn), addr;
22521
22522 if (GET_CODE (set) == PARALLEL)
22523 set = XVECEXP (set, 0, 0);
22524
22525 gcc_assert (GET_CODE (set) == SET);
22526
22527 addr = SET_SRC (set);
22528 if (TARGET_64BIT && get_attr_mode (insn) == MODE_SI)
22529 {
22530 if (GET_CODE (addr) == ZERO_EXTEND)
22531 addr = XEXP (addr, 0);
22532 if (GET_CODE (addr) == SUBREG)
22533 addr = SUBREG_REG (addr);
22534 }
22535
22536 return memory_address_length (addr);
22537 }
22538
22539 extract_insn_cached (insn);
22540 for (i = recog_data.n_operands - 1; i >= 0; --i)
22541 if (MEM_P (recog_data.operand[i]))
22542 {
22543 constrain_operands_cached (reload_completed);
22544 if (which_alternative != -1)
22545 {
22546 const char *constraints = recog_data.constraints[i];
22547 int alt = which_alternative;
22548
22549 while (*constraints == '=' || *constraints == '+')
22550 constraints++;
22551 while (alt-- > 0)
22552 while (*constraints++ != ',')
22553 ;
22554 /* Skip ignored operands. */
22555 if (*constraints == 'X')
22556 continue;
22557 }
22558 return memory_address_length (XEXP (recog_data.operand[i], 0));
22559 }
22560 return 0;
22561 }
22562
22563 /* Compute default value for "length_vex" attribute. It includes
22564 2 or 3 byte VEX prefix and 1 opcode byte. */
22565
22566 int
22567 ix86_attr_length_vex_default (rtx insn, bool has_0f_opcode, bool has_vex_w)
22568 {
22569 int i;
22570
22571 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
22572 byte VEX prefix. */
22573 if (!has_0f_opcode || has_vex_w)
22574 return 3 + 1;
22575
22576 /* We can always use 2 byte VEX prefix in 32bit. */
22577 if (!TARGET_64BIT)
22578 return 2 + 1;
22579
22580 extract_insn_cached (insn);
22581
22582 for (i = recog_data.n_operands - 1; i >= 0; --i)
22583 if (REG_P (recog_data.operand[i]))
22584 {
22585 /* REX.W bit uses 3 byte VEX prefix. */
22586 if (GET_MODE (recog_data.operand[i]) == DImode
22587 && GENERAL_REG_P (recog_data.operand[i]))
22588 return 3 + 1;
22589 }
22590 else
22591 {
22592 /* REX.X or REX.B bits use 3 byte VEX prefix. */
22593 if (MEM_P (recog_data.operand[i])
22594 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
22595 return 3 + 1;
22596 }
22597
22598 return 2 + 1;
22599 }
22600 \f
22601 /* Return the maximum number of instructions a cpu can issue. */
22602
22603 static int
22604 ix86_issue_rate (void)
22605 {
22606 switch (ix86_tune)
22607 {
22608 case PROCESSOR_PENTIUM:
22609 case PROCESSOR_ATOM:
22610 case PROCESSOR_K6:
22611 return 2;
22612
22613 case PROCESSOR_PENTIUMPRO:
22614 case PROCESSOR_PENTIUM4:
22615 case PROCESSOR_CORE2_32:
22616 case PROCESSOR_CORE2_64:
22617 case PROCESSOR_COREI7_32:
22618 case PROCESSOR_COREI7_64:
22619 case PROCESSOR_ATHLON:
22620 case PROCESSOR_K8:
22621 case PROCESSOR_AMDFAM10:
22622 case PROCESSOR_NOCONA:
22623 case PROCESSOR_GENERIC32:
22624 case PROCESSOR_GENERIC64:
22625 case PROCESSOR_BDVER1:
22626 case PROCESSOR_BDVER2:
22627 case PROCESSOR_BTVER1:
22628 return 3;
22629
22630 default:
22631 return 1;
22632 }
22633 }
22634
22635 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
22636 by DEP_INSN and nothing set by DEP_INSN. */
22637
22638 static bool
22639 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
22640 {
22641 rtx set, set2;
22642
22643 /* Simplify the test for uninteresting insns. */
22644 if (insn_type != TYPE_SETCC
22645 && insn_type != TYPE_ICMOV
22646 && insn_type != TYPE_FCMOV
22647 && insn_type != TYPE_IBR)
22648 return false;
22649
22650 if ((set = single_set (dep_insn)) != 0)
22651 {
22652 set = SET_DEST (set);
22653 set2 = NULL_RTX;
22654 }
22655 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
22656 && XVECLEN (PATTERN (dep_insn), 0) == 2
22657 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
22658 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
22659 {
22660 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
22661 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
22662 }
22663 else
22664 return false;
22665
22666 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
22667 return false;
22668
22669 /* This test is true if the dependent insn reads the flags but
22670 not any other potentially set register. */
22671 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
22672 return false;
22673
22674 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
22675 return false;
22676
22677 return true;
22678 }
22679
22680 /* Return true iff USE_INSN has a memory address with operands set by
22681 SET_INSN. */
22682
22683 bool
22684 ix86_agi_dependent (rtx set_insn, rtx use_insn)
22685 {
22686 int i;
22687 extract_insn_cached (use_insn);
22688 for (i = recog_data.n_operands - 1; i >= 0; --i)
22689 if (MEM_P (recog_data.operand[i]))
22690 {
22691 rtx addr = XEXP (recog_data.operand[i], 0);
22692 return modified_in_p (addr, set_insn) != 0;
22693 }
22694 return false;
22695 }
22696
22697 static int
22698 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
22699 {
22700 enum attr_type insn_type, dep_insn_type;
22701 enum attr_memory memory;
22702 rtx set, set2;
22703 int dep_insn_code_number;
22704
22705 /* Anti and output dependencies have zero cost on all CPUs. */
22706 if (REG_NOTE_KIND (link) != 0)
22707 return 0;
22708
22709 dep_insn_code_number = recog_memoized (dep_insn);
22710
22711 /* If we can't recognize the insns, we can't really do anything. */
22712 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
22713 return cost;
22714
22715 insn_type = get_attr_type (insn);
22716 dep_insn_type = get_attr_type (dep_insn);
22717
22718 switch (ix86_tune)
22719 {
22720 case PROCESSOR_PENTIUM:
22721 /* Address Generation Interlock adds a cycle of latency. */
22722 if (insn_type == TYPE_LEA)
22723 {
22724 rtx addr = PATTERN (insn);
22725
22726 if (GET_CODE (addr) == PARALLEL)
22727 addr = XVECEXP (addr, 0, 0);
22728
22729 gcc_assert (GET_CODE (addr) == SET);
22730
22731 addr = SET_SRC (addr);
22732 if (modified_in_p (addr, dep_insn))
22733 cost += 1;
22734 }
22735 else if (ix86_agi_dependent (dep_insn, insn))
22736 cost += 1;
22737
22738 /* ??? Compares pair with jump/setcc. */
22739 if (ix86_flags_dependent (insn, dep_insn, insn_type))
22740 cost = 0;
22741
22742 /* Floating point stores require value to be ready one cycle earlier. */
22743 if (insn_type == TYPE_FMOV
22744 && get_attr_memory (insn) == MEMORY_STORE
22745 && !ix86_agi_dependent (dep_insn, insn))
22746 cost += 1;
22747 break;
22748
22749 case PROCESSOR_PENTIUMPRO:
22750 memory = get_attr_memory (insn);
22751
22752 /* INT->FP conversion is expensive. */
22753 if (get_attr_fp_int_src (dep_insn))
22754 cost += 5;
22755
22756 /* There is one cycle extra latency between an FP op and a store. */
22757 if (insn_type == TYPE_FMOV
22758 && (set = single_set (dep_insn)) != NULL_RTX
22759 && (set2 = single_set (insn)) != NULL_RTX
22760 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
22761 && MEM_P (SET_DEST (set2)))
22762 cost += 1;
22763
22764 /* Show ability of reorder buffer to hide latency of load by executing
22765 in parallel with previous instruction in case
22766 previous instruction is not needed to compute the address. */
22767 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
22768 && !ix86_agi_dependent (dep_insn, insn))
22769 {
22770 /* Claim moves to take one cycle, as core can issue one load
22771 at time and the next load can start cycle later. */
22772 if (dep_insn_type == TYPE_IMOV
22773 || dep_insn_type == TYPE_FMOV)
22774 cost = 1;
22775 else if (cost > 1)
22776 cost--;
22777 }
22778 break;
22779
22780 case PROCESSOR_K6:
22781 memory = get_attr_memory (insn);
22782
22783 /* The esp dependency is resolved before the instruction is really
22784 finished. */
22785 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
22786 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
22787 return 1;
22788
22789 /* INT->FP conversion is expensive. */
22790 if (get_attr_fp_int_src (dep_insn))
22791 cost += 5;
22792
22793 /* Show ability of reorder buffer to hide latency of load by executing
22794 in parallel with previous instruction in case
22795 previous instruction is not needed to compute the address. */
22796 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
22797 && !ix86_agi_dependent (dep_insn, insn))
22798 {
22799 /* Claim moves to take one cycle, as core can issue one load
22800 at time and the next load can start cycle later. */
22801 if (dep_insn_type == TYPE_IMOV
22802 || dep_insn_type == TYPE_FMOV)
22803 cost = 1;
22804 else if (cost > 2)
22805 cost -= 2;
22806 else
22807 cost = 1;
22808 }
22809 break;
22810
22811 case PROCESSOR_ATHLON:
22812 case PROCESSOR_K8:
22813 case PROCESSOR_AMDFAM10:
22814 case PROCESSOR_BDVER1:
22815 case PROCESSOR_BDVER2:
22816 case PROCESSOR_BTVER1:
22817 case PROCESSOR_ATOM:
22818 case PROCESSOR_GENERIC32:
22819 case PROCESSOR_GENERIC64:
22820 memory = get_attr_memory (insn);
22821
22822 /* Show ability of reorder buffer to hide latency of load by executing
22823 in parallel with previous instruction in case
22824 previous instruction is not needed to compute the address. */
22825 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
22826 && !ix86_agi_dependent (dep_insn, insn))
22827 {
22828 enum attr_unit unit = get_attr_unit (insn);
22829 int loadcost = 3;
22830
22831 /* Because of the difference between the length of integer and
22832 floating unit pipeline preparation stages, the memory operands
22833 for floating point are cheaper.
22834
22835 ??? For Athlon it the difference is most probably 2. */
22836 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
22837 loadcost = 3;
22838 else
22839 loadcost = TARGET_ATHLON ? 2 : 0;
22840
22841 if (cost >= loadcost)
22842 cost -= loadcost;
22843 else
22844 cost = 0;
22845 }
22846
22847 default:
22848 break;
22849 }
22850
22851 return cost;
22852 }
22853
22854 /* How many alternative schedules to try. This should be as wide as the
22855 scheduling freedom in the DFA, but no wider. Making this value too
22856 large results extra work for the scheduler. */
22857
22858 static int
22859 ia32_multipass_dfa_lookahead (void)
22860 {
22861 switch (ix86_tune)
22862 {
22863 case PROCESSOR_PENTIUM:
22864 return 2;
22865
22866 case PROCESSOR_PENTIUMPRO:
22867 case PROCESSOR_K6:
22868 return 1;
22869
22870 case PROCESSOR_CORE2_32:
22871 case PROCESSOR_CORE2_64:
22872 case PROCESSOR_COREI7_32:
22873 case PROCESSOR_COREI7_64:
22874 /* Generally, we want haifa-sched:max_issue() to look ahead as far
22875 as many instructions can be executed on a cycle, i.e.,
22876 issue_rate. I wonder why tuning for many CPUs does not do this. */
22877 return ix86_issue_rate ();
22878
22879 default:
22880 return 0;
22881 }
22882 }
22883
22884 \f
22885
22886 /* Model decoder of Core 2/i7.
22887 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
22888 track the instruction fetch block boundaries and make sure that long
22889 (9+ bytes) instructions are assigned to D0. */
22890
22891 /* Maximum length of an insn that can be handled by
22892 a secondary decoder unit. '8' for Core 2/i7. */
22893 static int core2i7_secondary_decoder_max_insn_size;
22894
22895 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
22896 '16' for Core 2/i7. */
22897 static int core2i7_ifetch_block_size;
22898
22899 /* Maximum number of instructions decoder can handle per cycle.
22900 '6' for Core 2/i7. */
22901 static int core2i7_ifetch_block_max_insns;
22902
22903 typedef struct ix86_first_cycle_multipass_data_ *
22904 ix86_first_cycle_multipass_data_t;
22905 typedef const struct ix86_first_cycle_multipass_data_ *
22906 const_ix86_first_cycle_multipass_data_t;
22907
22908 /* A variable to store target state across calls to max_issue within
22909 one cycle. */
22910 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
22911 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
22912
22913 /* Initialize DATA. */
22914 static void
22915 core2i7_first_cycle_multipass_init (void *_data)
22916 {
22917 ix86_first_cycle_multipass_data_t data
22918 = (ix86_first_cycle_multipass_data_t) _data;
22919
22920 data->ifetch_block_len = 0;
22921 data->ifetch_block_n_insns = 0;
22922 data->ready_try_change = NULL;
22923 data->ready_try_change_size = 0;
22924 }
22925
22926 /* Advancing the cycle; reset ifetch block counts. */
22927 static void
22928 core2i7_dfa_post_advance_cycle (void)
22929 {
22930 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
22931
22932 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
22933
22934 data->ifetch_block_len = 0;
22935 data->ifetch_block_n_insns = 0;
22936 }
22937
22938 static int min_insn_size (rtx);
22939
22940 /* Filter out insns from ready_try that the core will not be able to issue
22941 on current cycle due to decoder. */
22942 static void
22943 core2i7_first_cycle_multipass_filter_ready_try
22944 (const_ix86_first_cycle_multipass_data_t data,
22945 char *ready_try, int n_ready, bool first_cycle_insn_p)
22946 {
22947 while (n_ready--)
22948 {
22949 rtx insn;
22950 int insn_size;
22951
22952 if (ready_try[n_ready])
22953 continue;
22954
22955 insn = get_ready_element (n_ready);
22956 insn_size = min_insn_size (insn);
22957
22958 if (/* If this is a too long an insn for a secondary decoder ... */
22959 (!first_cycle_insn_p
22960 && insn_size > core2i7_secondary_decoder_max_insn_size)
22961 /* ... or it would not fit into the ifetch block ... */
22962 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
22963 /* ... or the decoder is full already ... */
22964 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
22965 /* ... mask the insn out. */
22966 {
22967 ready_try[n_ready] = 1;
22968
22969 if (data->ready_try_change)
22970 SET_BIT (data->ready_try_change, n_ready);
22971 }
22972 }
22973 }
22974
22975 /* Prepare for a new round of multipass lookahead scheduling. */
22976 static void
22977 core2i7_first_cycle_multipass_begin (void *_data, char *ready_try, int n_ready,
22978 bool first_cycle_insn_p)
22979 {
22980 ix86_first_cycle_multipass_data_t data
22981 = (ix86_first_cycle_multipass_data_t) _data;
22982 const_ix86_first_cycle_multipass_data_t prev_data
22983 = ix86_first_cycle_multipass_data;
22984
22985 /* Restore the state from the end of the previous round. */
22986 data->ifetch_block_len = prev_data->ifetch_block_len;
22987 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
22988
22989 /* Filter instructions that cannot be issued on current cycle due to
22990 decoder restrictions. */
22991 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
22992 first_cycle_insn_p);
22993 }
22994
22995 /* INSN is being issued in current solution. Account for its impact on
22996 the decoder model. */
22997 static void
22998 core2i7_first_cycle_multipass_issue (void *_data, char *ready_try, int n_ready,
22999 rtx insn, const void *_prev_data)
23000 {
23001 ix86_first_cycle_multipass_data_t data
23002 = (ix86_first_cycle_multipass_data_t) _data;
23003 const_ix86_first_cycle_multipass_data_t prev_data
23004 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
23005
23006 int insn_size = min_insn_size (insn);
23007
23008 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
23009 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
23010 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
23011 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
23012
23013 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
23014 if (!data->ready_try_change)
23015 {
23016 data->ready_try_change = sbitmap_alloc (n_ready);
23017 data->ready_try_change_size = n_ready;
23018 }
23019 else if (data->ready_try_change_size < n_ready)
23020 {
23021 data->ready_try_change = sbitmap_resize (data->ready_try_change,
23022 n_ready, 0);
23023 data->ready_try_change_size = n_ready;
23024 }
23025 sbitmap_zero (data->ready_try_change);
23026
23027 /* Filter out insns from ready_try that the core will not be able to issue
23028 on current cycle due to decoder. */
23029 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
23030 false);
23031 }
23032
23033 /* Revert the effect on ready_try. */
23034 static void
23035 core2i7_first_cycle_multipass_backtrack (const void *_data,
23036 char *ready_try,
23037 int n_ready ATTRIBUTE_UNUSED)
23038 {
23039 const_ix86_first_cycle_multipass_data_t data
23040 = (const_ix86_first_cycle_multipass_data_t) _data;
23041 unsigned int i = 0;
23042 sbitmap_iterator sbi;
23043
23044 gcc_assert (sbitmap_last_set_bit (data->ready_try_change) < n_ready);
23045 EXECUTE_IF_SET_IN_SBITMAP (data->ready_try_change, 0, i, sbi)
23046 {
23047 ready_try[i] = 0;
23048 }
23049 }
23050
23051 /* Save the result of multipass lookahead scheduling for the next round. */
23052 static void
23053 core2i7_first_cycle_multipass_end (const void *_data)
23054 {
23055 const_ix86_first_cycle_multipass_data_t data
23056 = (const_ix86_first_cycle_multipass_data_t) _data;
23057 ix86_first_cycle_multipass_data_t next_data
23058 = ix86_first_cycle_multipass_data;
23059
23060 if (data != NULL)
23061 {
23062 next_data->ifetch_block_len = data->ifetch_block_len;
23063 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
23064 }
23065 }
23066
23067 /* Deallocate target data. */
23068 static void
23069 core2i7_first_cycle_multipass_fini (void *_data)
23070 {
23071 ix86_first_cycle_multipass_data_t data
23072 = (ix86_first_cycle_multipass_data_t) _data;
23073
23074 if (data->ready_try_change)
23075 {
23076 sbitmap_free (data->ready_try_change);
23077 data->ready_try_change = NULL;
23078 data->ready_try_change_size = 0;
23079 }
23080 }
23081
23082 /* Prepare for scheduling pass. */
23083 static void
23084 ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
23085 int verbose ATTRIBUTE_UNUSED,
23086 int max_uid ATTRIBUTE_UNUSED)
23087 {
23088 /* Install scheduling hooks for current CPU. Some of these hooks are used
23089 in time-critical parts of the scheduler, so we only set them up when
23090 they are actually used. */
23091 switch (ix86_tune)
23092 {
23093 case PROCESSOR_CORE2_32:
23094 case PROCESSOR_CORE2_64:
23095 case PROCESSOR_COREI7_32:
23096 case PROCESSOR_COREI7_64:
23097 targetm.sched.dfa_post_advance_cycle
23098 = core2i7_dfa_post_advance_cycle;
23099 targetm.sched.first_cycle_multipass_init
23100 = core2i7_first_cycle_multipass_init;
23101 targetm.sched.first_cycle_multipass_begin
23102 = core2i7_first_cycle_multipass_begin;
23103 targetm.sched.first_cycle_multipass_issue
23104 = core2i7_first_cycle_multipass_issue;
23105 targetm.sched.first_cycle_multipass_backtrack
23106 = core2i7_first_cycle_multipass_backtrack;
23107 targetm.sched.first_cycle_multipass_end
23108 = core2i7_first_cycle_multipass_end;
23109 targetm.sched.first_cycle_multipass_fini
23110 = core2i7_first_cycle_multipass_fini;
23111
23112 /* Set decoder parameters. */
23113 core2i7_secondary_decoder_max_insn_size = 8;
23114 core2i7_ifetch_block_size = 16;
23115 core2i7_ifetch_block_max_insns = 6;
23116 break;
23117
23118 default:
23119 targetm.sched.dfa_post_advance_cycle = NULL;
23120 targetm.sched.first_cycle_multipass_init = NULL;
23121 targetm.sched.first_cycle_multipass_begin = NULL;
23122 targetm.sched.first_cycle_multipass_issue = NULL;
23123 targetm.sched.first_cycle_multipass_backtrack = NULL;
23124 targetm.sched.first_cycle_multipass_end = NULL;
23125 targetm.sched.first_cycle_multipass_fini = NULL;
23126 break;
23127 }
23128 }
23129
23130 \f
23131 /* Compute the alignment given to a constant that is being placed in memory.
23132 EXP is the constant and ALIGN is the alignment that the object would
23133 ordinarily have.
23134 The value of this function is used instead of that alignment to align
23135 the object. */
23136
23137 int
23138 ix86_constant_alignment (tree exp, int align)
23139 {
23140 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
23141 || TREE_CODE (exp) == INTEGER_CST)
23142 {
23143 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
23144 return 64;
23145 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
23146 return 128;
23147 }
23148 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
23149 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
23150 return BITS_PER_WORD;
23151
23152 return align;
23153 }
23154
23155 /* Compute the alignment for a static variable.
23156 TYPE is the data type, and ALIGN is the alignment that
23157 the object would ordinarily have. The value of this function is used
23158 instead of that alignment to align the object. */
23159
23160 int
23161 ix86_data_alignment (tree type, int align)
23162 {
23163 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
23164
23165 if (AGGREGATE_TYPE_P (type)
23166 && TYPE_SIZE (type)
23167 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
23168 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
23169 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
23170 && align < max_align)
23171 align = max_align;
23172
23173 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
23174 to 16byte boundary. */
23175 if (TARGET_64BIT)
23176 {
23177 if (AGGREGATE_TYPE_P (type)
23178 && TYPE_SIZE (type)
23179 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
23180 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
23181 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
23182 return 128;
23183 }
23184
23185 if (TREE_CODE (type) == ARRAY_TYPE)
23186 {
23187 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
23188 return 64;
23189 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
23190 return 128;
23191 }
23192 else if (TREE_CODE (type) == COMPLEX_TYPE)
23193 {
23194
23195 if (TYPE_MODE (type) == DCmode && align < 64)
23196 return 64;
23197 if ((TYPE_MODE (type) == XCmode
23198 || TYPE_MODE (type) == TCmode) && align < 128)
23199 return 128;
23200 }
23201 else if ((TREE_CODE (type) == RECORD_TYPE
23202 || TREE_CODE (type) == UNION_TYPE
23203 || TREE_CODE (type) == QUAL_UNION_TYPE)
23204 && TYPE_FIELDS (type))
23205 {
23206 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
23207 return 64;
23208 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
23209 return 128;
23210 }
23211 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
23212 || TREE_CODE (type) == INTEGER_TYPE)
23213 {
23214 if (TYPE_MODE (type) == DFmode && align < 64)
23215 return 64;
23216 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
23217 return 128;
23218 }
23219
23220 return align;
23221 }
23222
23223 /* Compute the alignment for a local variable or a stack slot. EXP is
23224 the data type or decl itself, MODE is the widest mode available and
23225 ALIGN is the alignment that the object would ordinarily have. The
23226 value of this macro is used instead of that alignment to align the
23227 object. */
23228
23229 unsigned int
23230 ix86_local_alignment (tree exp, enum machine_mode mode,
23231 unsigned int align)
23232 {
23233 tree type, decl;
23234
23235 if (exp && DECL_P (exp))
23236 {
23237 type = TREE_TYPE (exp);
23238 decl = exp;
23239 }
23240 else
23241 {
23242 type = exp;
23243 decl = NULL;
23244 }
23245
23246 /* Don't do dynamic stack realignment for long long objects with
23247 -mpreferred-stack-boundary=2. */
23248 if (!TARGET_64BIT
23249 && align == 64
23250 && ix86_preferred_stack_boundary < 64
23251 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
23252 && (!type || !TYPE_USER_ALIGN (type))
23253 && (!decl || !DECL_USER_ALIGN (decl)))
23254 align = 32;
23255
23256 /* If TYPE is NULL, we are allocating a stack slot for caller-save
23257 register in MODE. We will return the largest alignment of XF
23258 and DF. */
23259 if (!type)
23260 {
23261 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
23262 align = GET_MODE_ALIGNMENT (DFmode);
23263 return align;
23264 }
23265
23266 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
23267 to 16byte boundary. Exact wording is:
23268
23269 An array uses the same alignment as its elements, except that a local or
23270 global array variable of length at least 16 bytes or
23271 a C99 variable-length array variable always has alignment of at least 16 bytes.
23272
23273 This was added to allow use of aligned SSE instructions at arrays. This
23274 rule is meant for static storage (where compiler can not do the analysis
23275 by itself). We follow it for automatic variables only when convenient.
23276 We fully control everything in the function compiled and functions from
23277 other unit can not rely on the alignment.
23278
23279 Exclude va_list type. It is the common case of local array where
23280 we can not benefit from the alignment. */
23281 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
23282 && TARGET_SSE)
23283 {
23284 if (AGGREGATE_TYPE_P (type)
23285 && (va_list_type_node == NULL_TREE
23286 || (TYPE_MAIN_VARIANT (type)
23287 != TYPE_MAIN_VARIANT (va_list_type_node)))
23288 && TYPE_SIZE (type)
23289 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
23290 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
23291 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
23292 return 128;
23293 }
23294 if (TREE_CODE (type) == ARRAY_TYPE)
23295 {
23296 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
23297 return 64;
23298 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
23299 return 128;
23300 }
23301 else if (TREE_CODE (type) == COMPLEX_TYPE)
23302 {
23303 if (TYPE_MODE (type) == DCmode && align < 64)
23304 return 64;
23305 if ((TYPE_MODE (type) == XCmode
23306 || TYPE_MODE (type) == TCmode) && align < 128)
23307 return 128;
23308 }
23309 else if ((TREE_CODE (type) == RECORD_TYPE
23310 || TREE_CODE (type) == UNION_TYPE
23311 || TREE_CODE (type) == QUAL_UNION_TYPE)
23312 && TYPE_FIELDS (type))
23313 {
23314 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
23315 return 64;
23316 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
23317 return 128;
23318 }
23319 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
23320 || TREE_CODE (type) == INTEGER_TYPE)
23321 {
23322
23323 if (TYPE_MODE (type) == DFmode && align < 64)
23324 return 64;
23325 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
23326 return 128;
23327 }
23328 return align;
23329 }
23330
23331 /* Compute the minimum required alignment for dynamic stack realignment
23332 purposes for a local variable, parameter or a stack slot. EXP is
23333 the data type or decl itself, MODE is its mode and ALIGN is the
23334 alignment that the object would ordinarily have. */
23335
23336 unsigned int
23337 ix86_minimum_alignment (tree exp, enum machine_mode mode,
23338 unsigned int align)
23339 {
23340 tree type, decl;
23341
23342 if (exp && DECL_P (exp))
23343 {
23344 type = TREE_TYPE (exp);
23345 decl = exp;
23346 }
23347 else
23348 {
23349 type = exp;
23350 decl = NULL;
23351 }
23352
23353 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
23354 return align;
23355
23356 /* Don't do dynamic stack realignment for long long objects with
23357 -mpreferred-stack-boundary=2. */
23358 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
23359 && (!type || !TYPE_USER_ALIGN (type))
23360 && (!decl || !DECL_USER_ALIGN (decl)))
23361 return 32;
23362
23363 return align;
23364 }
23365 \f
23366 /* Find a location for the static chain incoming to a nested function.
23367 This is a register, unless all free registers are used by arguments. */
23368
23369 static rtx
23370 ix86_static_chain (const_tree fndecl, bool incoming_p)
23371 {
23372 unsigned regno;
23373
23374 if (!DECL_STATIC_CHAIN (fndecl))
23375 return NULL;
23376
23377 if (TARGET_64BIT)
23378 {
23379 /* We always use R10 in 64-bit mode. */
23380 regno = R10_REG;
23381 }
23382 else
23383 {
23384 tree fntype;
23385 unsigned int ccvt;
23386
23387 /* By default in 32-bit mode we use ECX to pass the static chain. */
23388 regno = CX_REG;
23389
23390 fntype = TREE_TYPE (fndecl);
23391 ccvt = ix86_get_callcvt (fntype);
23392 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) != 0)
23393 {
23394 /* Fastcall functions use ecx/edx for arguments, which leaves
23395 us with EAX for the static chain.
23396 Thiscall functions use ecx for arguments, which also
23397 leaves us with EAX for the static chain. */
23398 regno = AX_REG;
23399 }
23400 else if (ix86_function_regparm (fntype, fndecl) == 3)
23401 {
23402 /* For regparm 3, we have no free call-clobbered registers in
23403 which to store the static chain. In order to implement this,
23404 we have the trampoline push the static chain to the stack.
23405 However, we can't push a value below the return address when
23406 we call the nested function directly, so we have to use an
23407 alternate entry point. For this we use ESI, and have the
23408 alternate entry point push ESI, so that things appear the
23409 same once we're executing the nested function. */
23410 if (incoming_p)
23411 {
23412 if (fndecl == current_function_decl)
23413 ix86_static_chain_on_stack = true;
23414 return gen_frame_mem (SImode,
23415 plus_constant (arg_pointer_rtx, -8));
23416 }
23417 regno = SI_REG;
23418 }
23419 }
23420
23421 return gen_rtx_REG (Pmode, regno);
23422 }
23423
23424 /* Emit RTL insns to initialize the variable parts of a trampoline.
23425 FNDECL is the decl of the target address; M_TRAMP is a MEM for
23426 the trampoline, and CHAIN_VALUE is an RTX for the static chain
23427 to be passed to the target function. */
23428
23429 static void
23430 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
23431 {
23432 rtx mem, fnaddr;
23433 int opcode;
23434 int offset = 0;
23435
23436 fnaddr = XEXP (DECL_RTL (fndecl), 0);
23437
23438 if (TARGET_64BIT)
23439 {
23440 int size;
23441
23442 /* Load the function address to r11. Try to load address using
23443 the shorter movl instead of movabs. We may want to support
23444 movq for kernel mode, but kernel does not use trampolines at
23445 the moment. */
23446 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
23447 {
23448 fnaddr = copy_to_mode_reg (DImode, fnaddr);
23449
23450 mem = adjust_address (m_tramp, HImode, offset);
23451 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
23452
23453 mem = adjust_address (m_tramp, SImode, offset + 2);
23454 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
23455 offset += 6;
23456 }
23457 else
23458 {
23459 mem = adjust_address (m_tramp, HImode, offset);
23460 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
23461
23462 mem = adjust_address (m_tramp, DImode, offset + 2);
23463 emit_move_insn (mem, fnaddr);
23464 offset += 10;
23465 }
23466
23467 /* Load static chain using movabs to r10. Use the
23468 shorter movl instead of movabs for x32. */
23469 if (TARGET_X32)
23470 {
23471 opcode = 0xba41;
23472 size = 6;
23473 }
23474 else
23475 {
23476 opcode = 0xba49;
23477 size = 10;
23478 }
23479
23480 mem = adjust_address (m_tramp, HImode, offset);
23481 emit_move_insn (mem, gen_int_mode (opcode, HImode));
23482
23483 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
23484 emit_move_insn (mem, chain_value);
23485 offset += size;
23486
23487 /* Jump to r11; the last (unused) byte is a nop, only there to
23488 pad the write out to a single 32-bit store. */
23489 mem = adjust_address (m_tramp, SImode, offset);
23490 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
23491 offset += 4;
23492 }
23493 else
23494 {
23495 rtx disp, chain;
23496
23497 /* Depending on the static chain location, either load a register
23498 with a constant, or push the constant to the stack. All of the
23499 instructions are the same size. */
23500 chain = ix86_static_chain (fndecl, true);
23501 if (REG_P (chain))
23502 {
23503 switch (REGNO (chain))
23504 {
23505 case AX_REG:
23506 opcode = 0xb8; break;
23507 case CX_REG:
23508 opcode = 0xb9; break;
23509 default:
23510 gcc_unreachable ();
23511 }
23512 }
23513 else
23514 opcode = 0x68;
23515
23516 mem = adjust_address (m_tramp, QImode, offset);
23517 emit_move_insn (mem, gen_int_mode (opcode, QImode));
23518
23519 mem = adjust_address (m_tramp, SImode, offset + 1);
23520 emit_move_insn (mem, chain_value);
23521 offset += 5;
23522
23523 mem = adjust_address (m_tramp, QImode, offset);
23524 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
23525
23526 mem = adjust_address (m_tramp, SImode, offset + 1);
23527
23528 /* Compute offset from the end of the jmp to the target function.
23529 In the case in which the trampoline stores the static chain on
23530 the stack, we need to skip the first insn which pushes the
23531 (call-saved) register static chain; this push is 1 byte. */
23532 offset += 5;
23533 disp = expand_binop (SImode, sub_optab, fnaddr,
23534 plus_constant (XEXP (m_tramp, 0),
23535 offset - (MEM_P (chain) ? 1 : 0)),
23536 NULL_RTX, 1, OPTAB_DIRECT);
23537 emit_move_insn (mem, disp);
23538 }
23539
23540 gcc_assert (offset <= TRAMPOLINE_SIZE);
23541
23542 #ifdef HAVE_ENABLE_EXECUTE_STACK
23543 #ifdef CHECK_EXECUTE_STACK_ENABLED
23544 if (CHECK_EXECUTE_STACK_ENABLED)
23545 #endif
23546 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
23547 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
23548 #endif
23549 }
23550 \f
23551 /* The following file contains several enumerations and data structures
23552 built from the definitions in i386-builtin-types.def. */
23553
23554 #include "i386-builtin-types.inc"
23555
23556 /* Table for the ix86 builtin non-function types. */
23557 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
23558
23559 /* Retrieve an element from the above table, building some of
23560 the types lazily. */
23561
23562 static tree
23563 ix86_get_builtin_type (enum ix86_builtin_type tcode)
23564 {
23565 unsigned int index;
23566 tree type, itype;
23567
23568 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
23569
23570 type = ix86_builtin_type_tab[(int) tcode];
23571 if (type != NULL)
23572 return type;
23573
23574 gcc_assert (tcode > IX86_BT_LAST_PRIM);
23575 if (tcode <= IX86_BT_LAST_VECT)
23576 {
23577 enum machine_mode mode;
23578
23579 index = tcode - IX86_BT_LAST_PRIM - 1;
23580 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
23581 mode = ix86_builtin_type_vect_mode[index];
23582
23583 type = build_vector_type_for_mode (itype, mode);
23584 }
23585 else
23586 {
23587 int quals;
23588
23589 index = tcode - IX86_BT_LAST_VECT - 1;
23590 if (tcode <= IX86_BT_LAST_PTR)
23591 quals = TYPE_UNQUALIFIED;
23592 else
23593 quals = TYPE_QUAL_CONST;
23594
23595 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
23596 if (quals != TYPE_UNQUALIFIED)
23597 itype = build_qualified_type (itype, quals);
23598
23599 type = build_pointer_type (itype);
23600 }
23601
23602 ix86_builtin_type_tab[(int) tcode] = type;
23603 return type;
23604 }
23605
23606 /* Table for the ix86 builtin function types. */
23607 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
23608
23609 /* Retrieve an element from the above table, building some of
23610 the types lazily. */
23611
23612 static tree
23613 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
23614 {
23615 tree type;
23616
23617 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
23618
23619 type = ix86_builtin_func_type_tab[(int) tcode];
23620 if (type != NULL)
23621 return type;
23622
23623 if (tcode <= IX86_BT_LAST_FUNC)
23624 {
23625 unsigned start = ix86_builtin_func_start[(int) tcode];
23626 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
23627 tree rtype, atype, args = void_list_node;
23628 unsigned i;
23629
23630 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
23631 for (i = after - 1; i > start; --i)
23632 {
23633 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
23634 args = tree_cons (NULL, atype, args);
23635 }
23636
23637 type = build_function_type (rtype, args);
23638 }
23639 else
23640 {
23641 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
23642 enum ix86_builtin_func_type icode;
23643
23644 icode = ix86_builtin_func_alias_base[index];
23645 type = ix86_get_builtin_func_type (icode);
23646 }
23647
23648 ix86_builtin_func_type_tab[(int) tcode] = type;
23649 return type;
23650 }
23651
23652
23653 /* Codes for all the SSE/MMX builtins. */
23654 enum ix86_builtins
23655 {
23656 IX86_BUILTIN_ADDPS,
23657 IX86_BUILTIN_ADDSS,
23658 IX86_BUILTIN_DIVPS,
23659 IX86_BUILTIN_DIVSS,
23660 IX86_BUILTIN_MULPS,
23661 IX86_BUILTIN_MULSS,
23662 IX86_BUILTIN_SUBPS,
23663 IX86_BUILTIN_SUBSS,
23664
23665 IX86_BUILTIN_CMPEQPS,
23666 IX86_BUILTIN_CMPLTPS,
23667 IX86_BUILTIN_CMPLEPS,
23668 IX86_BUILTIN_CMPGTPS,
23669 IX86_BUILTIN_CMPGEPS,
23670 IX86_BUILTIN_CMPNEQPS,
23671 IX86_BUILTIN_CMPNLTPS,
23672 IX86_BUILTIN_CMPNLEPS,
23673 IX86_BUILTIN_CMPNGTPS,
23674 IX86_BUILTIN_CMPNGEPS,
23675 IX86_BUILTIN_CMPORDPS,
23676 IX86_BUILTIN_CMPUNORDPS,
23677 IX86_BUILTIN_CMPEQSS,
23678 IX86_BUILTIN_CMPLTSS,
23679 IX86_BUILTIN_CMPLESS,
23680 IX86_BUILTIN_CMPNEQSS,
23681 IX86_BUILTIN_CMPNLTSS,
23682 IX86_BUILTIN_CMPNLESS,
23683 IX86_BUILTIN_CMPNGTSS,
23684 IX86_BUILTIN_CMPNGESS,
23685 IX86_BUILTIN_CMPORDSS,
23686 IX86_BUILTIN_CMPUNORDSS,
23687
23688 IX86_BUILTIN_COMIEQSS,
23689 IX86_BUILTIN_COMILTSS,
23690 IX86_BUILTIN_COMILESS,
23691 IX86_BUILTIN_COMIGTSS,
23692 IX86_BUILTIN_COMIGESS,
23693 IX86_BUILTIN_COMINEQSS,
23694 IX86_BUILTIN_UCOMIEQSS,
23695 IX86_BUILTIN_UCOMILTSS,
23696 IX86_BUILTIN_UCOMILESS,
23697 IX86_BUILTIN_UCOMIGTSS,
23698 IX86_BUILTIN_UCOMIGESS,
23699 IX86_BUILTIN_UCOMINEQSS,
23700
23701 IX86_BUILTIN_CVTPI2PS,
23702 IX86_BUILTIN_CVTPS2PI,
23703 IX86_BUILTIN_CVTSI2SS,
23704 IX86_BUILTIN_CVTSI642SS,
23705 IX86_BUILTIN_CVTSS2SI,
23706 IX86_BUILTIN_CVTSS2SI64,
23707 IX86_BUILTIN_CVTTPS2PI,
23708 IX86_BUILTIN_CVTTSS2SI,
23709 IX86_BUILTIN_CVTTSS2SI64,
23710
23711 IX86_BUILTIN_MAXPS,
23712 IX86_BUILTIN_MAXSS,
23713 IX86_BUILTIN_MINPS,
23714 IX86_BUILTIN_MINSS,
23715
23716 IX86_BUILTIN_LOADUPS,
23717 IX86_BUILTIN_STOREUPS,
23718 IX86_BUILTIN_MOVSS,
23719
23720 IX86_BUILTIN_MOVHLPS,
23721 IX86_BUILTIN_MOVLHPS,
23722 IX86_BUILTIN_LOADHPS,
23723 IX86_BUILTIN_LOADLPS,
23724 IX86_BUILTIN_STOREHPS,
23725 IX86_BUILTIN_STORELPS,
23726
23727 IX86_BUILTIN_MASKMOVQ,
23728 IX86_BUILTIN_MOVMSKPS,
23729 IX86_BUILTIN_PMOVMSKB,
23730
23731 IX86_BUILTIN_MOVNTPS,
23732 IX86_BUILTIN_MOVNTQ,
23733
23734 IX86_BUILTIN_LOADDQU,
23735 IX86_BUILTIN_STOREDQU,
23736
23737 IX86_BUILTIN_PACKSSWB,
23738 IX86_BUILTIN_PACKSSDW,
23739 IX86_BUILTIN_PACKUSWB,
23740
23741 IX86_BUILTIN_PADDB,
23742 IX86_BUILTIN_PADDW,
23743 IX86_BUILTIN_PADDD,
23744 IX86_BUILTIN_PADDQ,
23745 IX86_BUILTIN_PADDSB,
23746 IX86_BUILTIN_PADDSW,
23747 IX86_BUILTIN_PADDUSB,
23748 IX86_BUILTIN_PADDUSW,
23749 IX86_BUILTIN_PSUBB,
23750 IX86_BUILTIN_PSUBW,
23751 IX86_BUILTIN_PSUBD,
23752 IX86_BUILTIN_PSUBQ,
23753 IX86_BUILTIN_PSUBSB,
23754 IX86_BUILTIN_PSUBSW,
23755 IX86_BUILTIN_PSUBUSB,
23756 IX86_BUILTIN_PSUBUSW,
23757
23758 IX86_BUILTIN_PAND,
23759 IX86_BUILTIN_PANDN,
23760 IX86_BUILTIN_POR,
23761 IX86_BUILTIN_PXOR,
23762
23763 IX86_BUILTIN_PAVGB,
23764 IX86_BUILTIN_PAVGW,
23765
23766 IX86_BUILTIN_PCMPEQB,
23767 IX86_BUILTIN_PCMPEQW,
23768 IX86_BUILTIN_PCMPEQD,
23769 IX86_BUILTIN_PCMPGTB,
23770 IX86_BUILTIN_PCMPGTW,
23771 IX86_BUILTIN_PCMPGTD,
23772
23773 IX86_BUILTIN_PMADDWD,
23774
23775 IX86_BUILTIN_PMAXSW,
23776 IX86_BUILTIN_PMAXUB,
23777 IX86_BUILTIN_PMINSW,
23778 IX86_BUILTIN_PMINUB,
23779
23780 IX86_BUILTIN_PMULHUW,
23781 IX86_BUILTIN_PMULHW,
23782 IX86_BUILTIN_PMULLW,
23783
23784 IX86_BUILTIN_PSADBW,
23785 IX86_BUILTIN_PSHUFW,
23786
23787 IX86_BUILTIN_PSLLW,
23788 IX86_BUILTIN_PSLLD,
23789 IX86_BUILTIN_PSLLQ,
23790 IX86_BUILTIN_PSRAW,
23791 IX86_BUILTIN_PSRAD,
23792 IX86_BUILTIN_PSRLW,
23793 IX86_BUILTIN_PSRLD,
23794 IX86_BUILTIN_PSRLQ,
23795 IX86_BUILTIN_PSLLWI,
23796 IX86_BUILTIN_PSLLDI,
23797 IX86_BUILTIN_PSLLQI,
23798 IX86_BUILTIN_PSRAWI,
23799 IX86_BUILTIN_PSRADI,
23800 IX86_BUILTIN_PSRLWI,
23801 IX86_BUILTIN_PSRLDI,
23802 IX86_BUILTIN_PSRLQI,
23803
23804 IX86_BUILTIN_PUNPCKHBW,
23805 IX86_BUILTIN_PUNPCKHWD,
23806 IX86_BUILTIN_PUNPCKHDQ,
23807 IX86_BUILTIN_PUNPCKLBW,
23808 IX86_BUILTIN_PUNPCKLWD,
23809 IX86_BUILTIN_PUNPCKLDQ,
23810
23811 IX86_BUILTIN_SHUFPS,
23812
23813 IX86_BUILTIN_RCPPS,
23814 IX86_BUILTIN_RCPSS,
23815 IX86_BUILTIN_RSQRTPS,
23816 IX86_BUILTIN_RSQRTPS_NR,
23817 IX86_BUILTIN_RSQRTSS,
23818 IX86_BUILTIN_RSQRTF,
23819 IX86_BUILTIN_SQRTPS,
23820 IX86_BUILTIN_SQRTPS_NR,
23821 IX86_BUILTIN_SQRTSS,
23822
23823 IX86_BUILTIN_UNPCKHPS,
23824 IX86_BUILTIN_UNPCKLPS,
23825
23826 IX86_BUILTIN_ANDPS,
23827 IX86_BUILTIN_ANDNPS,
23828 IX86_BUILTIN_ORPS,
23829 IX86_BUILTIN_XORPS,
23830
23831 IX86_BUILTIN_EMMS,
23832 IX86_BUILTIN_LDMXCSR,
23833 IX86_BUILTIN_STMXCSR,
23834 IX86_BUILTIN_SFENCE,
23835
23836 /* 3DNow! Original */
23837 IX86_BUILTIN_FEMMS,
23838 IX86_BUILTIN_PAVGUSB,
23839 IX86_BUILTIN_PF2ID,
23840 IX86_BUILTIN_PFACC,
23841 IX86_BUILTIN_PFADD,
23842 IX86_BUILTIN_PFCMPEQ,
23843 IX86_BUILTIN_PFCMPGE,
23844 IX86_BUILTIN_PFCMPGT,
23845 IX86_BUILTIN_PFMAX,
23846 IX86_BUILTIN_PFMIN,
23847 IX86_BUILTIN_PFMUL,
23848 IX86_BUILTIN_PFRCP,
23849 IX86_BUILTIN_PFRCPIT1,
23850 IX86_BUILTIN_PFRCPIT2,
23851 IX86_BUILTIN_PFRSQIT1,
23852 IX86_BUILTIN_PFRSQRT,
23853 IX86_BUILTIN_PFSUB,
23854 IX86_BUILTIN_PFSUBR,
23855 IX86_BUILTIN_PI2FD,
23856 IX86_BUILTIN_PMULHRW,
23857
23858 /* 3DNow! Athlon Extensions */
23859 IX86_BUILTIN_PF2IW,
23860 IX86_BUILTIN_PFNACC,
23861 IX86_BUILTIN_PFPNACC,
23862 IX86_BUILTIN_PI2FW,
23863 IX86_BUILTIN_PSWAPDSI,
23864 IX86_BUILTIN_PSWAPDSF,
23865
23866 /* SSE2 */
23867 IX86_BUILTIN_ADDPD,
23868 IX86_BUILTIN_ADDSD,
23869 IX86_BUILTIN_DIVPD,
23870 IX86_BUILTIN_DIVSD,
23871 IX86_BUILTIN_MULPD,
23872 IX86_BUILTIN_MULSD,
23873 IX86_BUILTIN_SUBPD,
23874 IX86_BUILTIN_SUBSD,
23875
23876 IX86_BUILTIN_CMPEQPD,
23877 IX86_BUILTIN_CMPLTPD,
23878 IX86_BUILTIN_CMPLEPD,
23879 IX86_BUILTIN_CMPGTPD,
23880 IX86_BUILTIN_CMPGEPD,
23881 IX86_BUILTIN_CMPNEQPD,
23882 IX86_BUILTIN_CMPNLTPD,
23883 IX86_BUILTIN_CMPNLEPD,
23884 IX86_BUILTIN_CMPNGTPD,
23885 IX86_BUILTIN_CMPNGEPD,
23886 IX86_BUILTIN_CMPORDPD,
23887 IX86_BUILTIN_CMPUNORDPD,
23888 IX86_BUILTIN_CMPEQSD,
23889 IX86_BUILTIN_CMPLTSD,
23890 IX86_BUILTIN_CMPLESD,
23891 IX86_BUILTIN_CMPNEQSD,
23892 IX86_BUILTIN_CMPNLTSD,
23893 IX86_BUILTIN_CMPNLESD,
23894 IX86_BUILTIN_CMPORDSD,
23895 IX86_BUILTIN_CMPUNORDSD,
23896
23897 IX86_BUILTIN_COMIEQSD,
23898 IX86_BUILTIN_COMILTSD,
23899 IX86_BUILTIN_COMILESD,
23900 IX86_BUILTIN_COMIGTSD,
23901 IX86_BUILTIN_COMIGESD,
23902 IX86_BUILTIN_COMINEQSD,
23903 IX86_BUILTIN_UCOMIEQSD,
23904 IX86_BUILTIN_UCOMILTSD,
23905 IX86_BUILTIN_UCOMILESD,
23906 IX86_BUILTIN_UCOMIGTSD,
23907 IX86_BUILTIN_UCOMIGESD,
23908 IX86_BUILTIN_UCOMINEQSD,
23909
23910 IX86_BUILTIN_MAXPD,
23911 IX86_BUILTIN_MAXSD,
23912 IX86_BUILTIN_MINPD,
23913 IX86_BUILTIN_MINSD,
23914
23915 IX86_BUILTIN_ANDPD,
23916 IX86_BUILTIN_ANDNPD,
23917 IX86_BUILTIN_ORPD,
23918 IX86_BUILTIN_XORPD,
23919
23920 IX86_BUILTIN_SQRTPD,
23921 IX86_BUILTIN_SQRTSD,
23922
23923 IX86_BUILTIN_UNPCKHPD,
23924 IX86_BUILTIN_UNPCKLPD,
23925
23926 IX86_BUILTIN_SHUFPD,
23927
23928 IX86_BUILTIN_LOADUPD,
23929 IX86_BUILTIN_STOREUPD,
23930 IX86_BUILTIN_MOVSD,
23931
23932 IX86_BUILTIN_LOADHPD,
23933 IX86_BUILTIN_LOADLPD,
23934
23935 IX86_BUILTIN_CVTDQ2PD,
23936 IX86_BUILTIN_CVTDQ2PS,
23937
23938 IX86_BUILTIN_CVTPD2DQ,
23939 IX86_BUILTIN_CVTPD2PI,
23940 IX86_BUILTIN_CVTPD2PS,
23941 IX86_BUILTIN_CVTTPD2DQ,
23942 IX86_BUILTIN_CVTTPD2PI,
23943
23944 IX86_BUILTIN_CVTPI2PD,
23945 IX86_BUILTIN_CVTSI2SD,
23946 IX86_BUILTIN_CVTSI642SD,
23947
23948 IX86_BUILTIN_CVTSD2SI,
23949 IX86_BUILTIN_CVTSD2SI64,
23950 IX86_BUILTIN_CVTSD2SS,
23951 IX86_BUILTIN_CVTSS2SD,
23952 IX86_BUILTIN_CVTTSD2SI,
23953 IX86_BUILTIN_CVTTSD2SI64,
23954
23955 IX86_BUILTIN_CVTPS2DQ,
23956 IX86_BUILTIN_CVTPS2PD,
23957 IX86_BUILTIN_CVTTPS2DQ,
23958
23959 IX86_BUILTIN_MOVNTI,
23960 IX86_BUILTIN_MOVNTPD,
23961 IX86_BUILTIN_MOVNTDQ,
23962
23963 IX86_BUILTIN_MOVQ128,
23964
23965 /* SSE2 MMX */
23966 IX86_BUILTIN_MASKMOVDQU,
23967 IX86_BUILTIN_MOVMSKPD,
23968 IX86_BUILTIN_PMOVMSKB128,
23969
23970 IX86_BUILTIN_PACKSSWB128,
23971 IX86_BUILTIN_PACKSSDW128,
23972 IX86_BUILTIN_PACKUSWB128,
23973
23974 IX86_BUILTIN_PADDB128,
23975 IX86_BUILTIN_PADDW128,
23976 IX86_BUILTIN_PADDD128,
23977 IX86_BUILTIN_PADDQ128,
23978 IX86_BUILTIN_PADDSB128,
23979 IX86_BUILTIN_PADDSW128,
23980 IX86_BUILTIN_PADDUSB128,
23981 IX86_BUILTIN_PADDUSW128,
23982 IX86_BUILTIN_PSUBB128,
23983 IX86_BUILTIN_PSUBW128,
23984 IX86_BUILTIN_PSUBD128,
23985 IX86_BUILTIN_PSUBQ128,
23986 IX86_BUILTIN_PSUBSB128,
23987 IX86_BUILTIN_PSUBSW128,
23988 IX86_BUILTIN_PSUBUSB128,
23989 IX86_BUILTIN_PSUBUSW128,
23990
23991 IX86_BUILTIN_PAND128,
23992 IX86_BUILTIN_PANDN128,
23993 IX86_BUILTIN_POR128,
23994 IX86_BUILTIN_PXOR128,
23995
23996 IX86_BUILTIN_PAVGB128,
23997 IX86_BUILTIN_PAVGW128,
23998
23999 IX86_BUILTIN_PCMPEQB128,
24000 IX86_BUILTIN_PCMPEQW128,
24001 IX86_BUILTIN_PCMPEQD128,
24002 IX86_BUILTIN_PCMPGTB128,
24003 IX86_BUILTIN_PCMPGTW128,
24004 IX86_BUILTIN_PCMPGTD128,
24005
24006 IX86_BUILTIN_PMADDWD128,
24007
24008 IX86_BUILTIN_PMAXSW128,
24009 IX86_BUILTIN_PMAXUB128,
24010 IX86_BUILTIN_PMINSW128,
24011 IX86_BUILTIN_PMINUB128,
24012
24013 IX86_BUILTIN_PMULUDQ,
24014 IX86_BUILTIN_PMULUDQ128,
24015 IX86_BUILTIN_PMULHUW128,
24016 IX86_BUILTIN_PMULHW128,
24017 IX86_BUILTIN_PMULLW128,
24018
24019 IX86_BUILTIN_PSADBW128,
24020 IX86_BUILTIN_PSHUFHW,
24021 IX86_BUILTIN_PSHUFLW,
24022 IX86_BUILTIN_PSHUFD,
24023
24024 IX86_BUILTIN_PSLLDQI128,
24025 IX86_BUILTIN_PSLLWI128,
24026 IX86_BUILTIN_PSLLDI128,
24027 IX86_BUILTIN_PSLLQI128,
24028 IX86_BUILTIN_PSRAWI128,
24029 IX86_BUILTIN_PSRADI128,
24030 IX86_BUILTIN_PSRLDQI128,
24031 IX86_BUILTIN_PSRLWI128,
24032 IX86_BUILTIN_PSRLDI128,
24033 IX86_BUILTIN_PSRLQI128,
24034
24035 IX86_BUILTIN_PSLLDQ128,
24036 IX86_BUILTIN_PSLLW128,
24037 IX86_BUILTIN_PSLLD128,
24038 IX86_BUILTIN_PSLLQ128,
24039 IX86_BUILTIN_PSRAW128,
24040 IX86_BUILTIN_PSRAD128,
24041 IX86_BUILTIN_PSRLW128,
24042 IX86_BUILTIN_PSRLD128,
24043 IX86_BUILTIN_PSRLQ128,
24044
24045 IX86_BUILTIN_PUNPCKHBW128,
24046 IX86_BUILTIN_PUNPCKHWD128,
24047 IX86_BUILTIN_PUNPCKHDQ128,
24048 IX86_BUILTIN_PUNPCKHQDQ128,
24049 IX86_BUILTIN_PUNPCKLBW128,
24050 IX86_BUILTIN_PUNPCKLWD128,
24051 IX86_BUILTIN_PUNPCKLDQ128,
24052 IX86_BUILTIN_PUNPCKLQDQ128,
24053
24054 IX86_BUILTIN_CLFLUSH,
24055 IX86_BUILTIN_MFENCE,
24056 IX86_BUILTIN_LFENCE,
24057 IX86_BUILTIN_PAUSE,
24058
24059 IX86_BUILTIN_BSRSI,
24060 IX86_BUILTIN_BSRDI,
24061 IX86_BUILTIN_RDPMC,
24062 IX86_BUILTIN_RDTSC,
24063 IX86_BUILTIN_RDTSCP,
24064 IX86_BUILTIN_ROLQI,
24065 IX86_BUILTIN_ROLHI,
24066 IX86_BUILTIN_RORQI,
24067 IX86_BUILTIN_RORHI,
24068
24069 /* SSE3. */
24070 IX86_BUILTIN_ADDSUBPS,
24071 IX86_BUILTIN_HADDPS,
24072 IX86_BUILTIN_HSUBPS,
24073 IX86_BUILTIN_MOVSHDUP,
24074 IX86_BUILTIN_MOVSLDUP,
24075 IX86_BUILTIN_ADDSUBPD,
24076 IX86_BUILTIN_HADDPD,
24077 IX86_BUILTIN_HSUBPD,
24078 IX86_BUILTIN_LDDQU,
24079
24080 IX86_BUILTIN_MONITOR,
24081 IX86_BUILTIN_MWAIT,
24082
24083 /* SSSE3. */
24084 IX86_BUILTIN_PHADDW,
24085 IX86_BUILTIN_PHADDD,
24086 IX86_BUILTIN_PHADDSW,
24087 IX86_BUILTIN_PHSUBW,
24088 IX86_BUILTIN_PHSUBD,
24089 IX86_BUILTIN_PHSUBSW,
24090 IX86_BUILTIN_PMADDUBSW,
24091 IX86_BUILTIN_PMULHRSW,
24092 IX86_BUILTIN_PSHUFB,
24093 IX86_BUILTIN_PSIGNB,
24094 IX86_BUILTIN_PSIGNW,
24095 IX86_BUILTIN_PSIGND,
24096 IX86_BUILTIN_PALIGNR,
24097 IX86_BUILTIN_PABSB,
24098 IX86_BUILTIN_PABSW,
24099 IX86_BUILTIN_PABSD,
24100
24101 IX86_BUILTIN_PHADDW128,
24102 IX86_BUILTIN_PHADDD128,
24103 IX86_BUILTIN_PHADDSW128,
24104 IX86_BUILTIN_PHSUBW128,
24105 IX86_BUILTIN_PHSUBD128,
24106 IX86_BUILTIN_PHSUBSW128,
24107 IX86_BUILTIN_PMADDUBSW128,
24108 IX86_BUILTIN_PMULHRSW128,
24109 IX86_BUILTIN_PSHUFB128,
24110 IX86_BUILTIN_PSIGNB128,
24111 IX86_BUILTIN_PSIGNW128,
24112 IX86_BUILTIN_PSIGND128,
24113 IX86_BUILTIN_PALIGNR128,
24114 IX86_BUILTIN_PABSB128,
24115 IX86_BUILTIN_PABSW128,
24116 IX86_BUILTIN_PABSD128,
24117
24118 /* AMDFAM10 - SSE4A New Instructions. */
24119 IX86_BUILTIN_MOVNTSD,
24120 IX86_BUILTIN_MOVNTSS,
24121 IX86_BUILTIN_EXTRQI,
24122 IX86_BUILTIN_EXTRQ,
24123 IX86_BUILTIN_INSERTQI,
24124 IX86_BUILTIN_INSERTQ,
24125
24126 /* SSE4.1. */
24127 IX86_BUILTIN_BLENDPD,
24128 IX86_BUILTIN_BLENDPS,
24129 IX86_BUILTIN_BLENDVPD,
24130 IX86_BUILTIN_BLENDVPS,
24131 IX86_BUILTIN_PBLENDVB128,
24132 IX86_BUILTIN_PBLENDW128,
24133
24134 IX86_BUILTIN_DPPD,
24135 IX86_BUILTIN_DPPS,
24136
24137 IX86_BUILTIN_INSERTPS128,
24138
24139 IX86_BUILTIN_MOVNTDQA,
24140 IX86_BUILTIN_MPSADBW128,
24141 IX86_BUILTIN_PACKUSDW128,
24142 IX86_BUILTIN_PCMPEQQ,
24143 IX86_BUILTIN_PHMINPOSUW128,
24144
24145 IX86_BUILTIN_PMAXSB128,
24146 IX86_BUILTIN_PMAXSD128,
24147 IX86_BUILTIN_PMAXUD128,
24148 IX86_BUILTIN_PMAXUW128,
24149
24150 IX86_BUILTIN_PMINSB128,
24151 IX86_BUILTIN_PMINSD128,
24152 IX86_BUILTIN_PMINUD128,
24153 IX86_BUILTIN_PMINUW128,
24154
24155 IX86_BUILTIN_PMOVSXBW128,
24156 IX86_BUILTIN_PMOVSXBD128,
24157 IX86_BUILTIN_PMOVSXBQ128,
24158 IX86_BUILTIN_PMOVSXWD128,
24159 IX86_BUILTIN_PMOVSXWQ128,
24160 IX86_BUILTIN_PMOVSXDQ128,
24161
24162 IX86_BUILTIN_PMOVZXBW128,
24163 IX86_BUILTIN_PMOVZXBD128,
24164 IX86_BUILTIN_PMOVZXBQ128,
24165 IX86_BUILTIN_PMOVZXWD128,
24166 IX86_BUILTIN_PMOVZXWQ128,
24167 IX86_BUILTIN_PMOVZXDQ128,
24168
24169 IX86_BUILTIN_PMULDQ128,
24170 IX86_BUILTIN_PMULLD128,
24171
24172 IX86_BUILTIN_ROUNDPD,
24173 IX86_BUILTIN_ROUNDPS,
24174 IX86_BUILTIN_ROUNDSD,
24175 IX86_BUILTIN_ROUNDSS,
24176
24177 IX86_BUILTIN_FLOORPD,
24178 IX86_BUILTIN_CEILPD,
24179 IX86_BUILTIN_TRUNCPD,
24180 IX86_BUILTIN_RINTPD,
24181 IX86_BUILTIN_ROUNDPD_AZ,
24182 IX86_BUILTIN_FLOORPS,
24183 IX86_BUILTIN_CEILPS,
24184 IX86_BUILTIN_TRUNCPS,
24185 IX86_BUILTIN_RINTPS,
24186 IX86_BUILTIN_ROUNDPS_AZ,
24187
24188 IX86_BUILTIN_PTESTZ,
24189 IX86_BUILTIN_PTESTC,
24190 IX86_BUILTIN_PTESTNZC,
24191
24192 IX86_BUILTIN_VEC_INIT_V2SI,
24193 IX86_BUILTIN_VEC_INIT_V4HI,
24194 IX86_BUILTIN_VEC_INIT_V8QI,
24195 IX86_BUILTIN_VEC_EXT_V2DF,
24196 IX86_BUILTIN_VEC_EXT_V2DI,
24197 IX86_BUILTIN_VEC_EXT_V4SF,
24198 IX86_BUILTIN_VEC_EXT_V4SI,
24199 IX86_BUILTIN_VEC_EXT_V8HI,
24200 IX86_BUILTIN_VEC_EXT_V2SI,
24201 IX86_BUILTIN_VEC_EXT_V4HI,
24202 IX86_BUILTIN_VEC_EXT_V16QI,
24203 IX86_BUILTIN_VEC_SET_V2DI,
24204 IX86_BUILTIN_VEC_SET_V4SF,
24205 IX86_BUILTIN_VEC_SET_V4SI,
24206 IX86_BUILTIN_VEC_SET_V8HI,
24207 IX86_BUILTIN_VEC_SET_V4HI,
24208 IX86_BUILTIN_VEC_SET_V16QI,
24209
24210 IX86_BUILTIN_VEC_PACK_SFIX,
24211
24212 /* SSE4.2. */
24213 IX86_BUILTIN_CRC32QI,
24214 IX86_BUILTIN_CRC32HI,
24215 IX86_BUILTIN_CRC32SI,
24216 IX86_BUILTIN_CRC32DI,
24217
24218 IX86_BUILTIN_PCMPESTRI128,
24219 IX86_BUILTIN_PCMPESTRM128,
24220 IX86_BUILTIN_PCMPESTRA128,
24221 IX86_BUILTIN_PCMPESTRC128,
24222 IX86_BUILTIN_PCMPESTRO128,
24223 IX86_BUILTIN_PCMPESTRS128,
24224 IX86_BUILTIN_PCMPESTRZ128,
24225 IX86_BUILTIN_PCMPISTRI128,
24226 IX86_BUILTIN_PCMPISTRM128,
24227 IX86_BUILTIN_PCMPISTRA128,
24228 IX86_BUILTIN_PCMPISTRC128,
24229 IX86_BUILTIN_PCMPISTRO128,
24230 IX86_BUILTIN_PCMPISTRS128,
24231 IX86_BUILTIN_PCMPISTRZ128,
24232
24233 IX86_BUILTIN_PCMPGTQ,
24234
24235 /* AES instructions */
24236 IX86_BUILTIN_AESENC128,
24237 IX86_BUILTIN_AESENCLAST128,
24238 IX86_BUILTIN_AESDEC128,
24239 IX86_BUILTIN_AESDECLAST128,
24240 IX86_BUILTIN_AESIMC128,
24241 IX86_BUILTIN_AESKEYGENASSIST128,
24242
24243 /* PCLMUL instruction */
24244 IX86_BUILTIN_PCLMULQDQ128,
24245
24246 /* AVX */
24247 IX86_BUILTIN_ADDPD256,
24248 IX86_BUILTIN_ADDPS256,
24249 IX86_BUILTIN_ADDSUBPD256,
24250 IX86_BUILTIN_ADDSUBPS256,
24251 IX86_BUILTIN_ANDPD256,
24252 IX86_BUILTIN_ANDPS256,
24253 IX86_BUILTIN_ANDNPD256,
24254 IX86_BUILTIN_ANDNPS256,
24255 IX86_BUILTIN_BLENDPD256,
24256 IX86_BUILTIN_BLENDPS256,
24257 IX86_BUILTIN_BLENDVPD256,
24258 IX86_BUILTIN_BLENDVPS256,
24259 IX86_BUILTIN_DIVPD256,
24260 IX86_BUILTIN_DIVPS256,
24261 IX86_BUILTIN_DPPS256,
24262 IX86_BUILTIN_HADDPD256,
24263 IX86_BUILTIN_HADDPS256,
24264 IX86_BUILTIN_HSUBPD256,
24265 IX86_BUILTIN_HSUBPS256,
24266 IX86_BUILTIN_MAXPD256,
24267 IX86_BUILTIN_MAXPS256,
24268 IX86_BUILTIN_MINPD256,
24269 IX86_BUILTIN_MINPS256,
24270 IX86_BUILTIN_MULPD256,
24271 IX86_BUILTIN_MULPS256,
24272 IX86_BUILTIN_ORPD256,
24273 IX86_BUILTIN_ORPS256,
24274 IX86_BUILTIN_SHUFPD256,
24275 IX86_BUILTIN_SHUFPS256,
24276 IX86_BUILTIN_SUBPD256,
24277 IX86_BUILTIN_SUBPS256,
24278 IX86_BUILTIN_XORPD256,
24279 IX86_BUILTIN_XORPS256,
24280 IX86_BUILTIN_CMPSD,
24281 IX86_BUILTIN_CMPSS,
24282 IX86_BUILTIN_CMPPD,
24283 IX86_BUILTIN_CMPPS,
24284 IX86_BUILTIN_CMPPD256,
24285 IX86_BUILTIN_CMPPS256,
24286 IX86_BUILTIN_CVTDQ2PD256,
24287 IX86_BUILTIN_CVTDQ2PS256,
24288 IX86_BUILTIN_CVTPD2PS256,
24289 IX86_BUILTIN_CVTPS2DQ256,
24290 IX86_BUILTIN_CVTPS2PD256,
24291 IX86_BUILTIN_CVTTPD2DQ256,
24292 IX86_BUILTIN_CVTPD2DQ256,
24293 IX86_BUILTIN_CVTTPS2DQ256,
24294 IX86_BUILTIN_EXTRACTF128PD256,
24295 IX86_BUILTIN_EXTRACTF128PS256,
24296 IX86_BUILTIN_EXTRACTF128SI256,
24297 IX86_BUILTIN_VZEROALL,
24298 IX86_BUILTIN_VZEROUPPER,
24299 IX86_BUILTIN_VPERMILVARPD,
24300 IX86_BUILTIN_VPERMILVARPS,
24301 IX86_BUILTIN_VPERMILVARPD256,
24302 IX86_BUILTIN_VPERMILVARPS256,
24303 IX86_BUILTIN_VPERMILPD,
24304 IX86_BUILTIN_VPERMILPS,
24305 IX86_BUILTIN_VPERMILPD256,
24306 IX86_BUILTIN_VPERMILPS256,
24307 IX86_BUILTIN_VPERMIL2PD,
24308 IX86_BUILTIN_VPERMIL2PS,
24309 IX86_BUILTIN_VPERMIL2PD256,
24310 IX86_BUILTIN_VPERMIL2PS256,
24311 IX86_BUILTIN_VPERM2F128PD256,
24312 IX86_BUILTIN_VPERM2F128PS256,
24313 IX86_BUILTIN_VPERM2F128SI256,
24314 IX86_BUILTIN_VBROADCASTSS,
24315 IX86_BUILTIN_VBROADCASTSD256,
24316 IX86_BUILTIN_VBROADCASTSS256,
24317 IX86_BUILTIN_VBROADCASTPD256,
24318 IX86_BUILTIN_VBROADCASTPS256,
24319 IX86_BUILTIN_VINSERTF128PD256,
24320 IX86_BUILTIN_VINSERTF128PS256,
24321 IX86_BUILTIN_VINSERTF128SI256,
24322 IX86_BUILTIN_LOADUPD256,
24323 IX86_BUILTIN_LOADUPS256,
24324 IX86_BUILTIN_STOREUPD256,
24325 IX86_BUILTIN_STOREUPS256,
24326 IX86_BUILTIN_LDDQU256,
24327 IX86_BUILTIN_MOVNTDQ256,
24328 IX86_BUILTIN_MOVNTPD256,
24329 IX86_BUILTIN_MOVNTPS256,
24330 IX86_BUILTIN_LOADDQU256,
24331 IX86_BUILTIN_STOREDQU256,
24332 IX86_BUILTIN_MASKLOADPD,
24333 IX86_BUILTIN_MASKLOADPS,
24334 IX86_BUILTIN_MASKSTOREPD,
24335 IX86_BUILTIN_MASKSTOREPS,
24336 IX86_BUILTIN_MASKLOADPD256,
24337 IX86_BUILTIN_MASKLOADPS256,
24338 IX86_BUILTIN_MASKSTOREPD256,
24339 IX86_BUILTIN_MASKSTOREPS256,
24340 IX86_BUILTIN_MOVSHDUP256,
24341 IX86_BUILTIN_MOVSLDUP256,
24342 IX86_BUILTIN_MOVDDUP256,
24343
24344 IX86_BUILTIN_SQRTPD256,
24345 IX86_BUILTIN_SQRTPS256,
24346 IX86_BUILTIN_SQRTPS_NR256,
24347 IX86_BUILTIN_RSQRTPS256,
24348 IX86_BUILTIN_RSQRTPS_NR256,
24349
24350 IX86_BUILTIN_RCPPS256,
24351
24352 IX86_BUILTIN_ROUNDPD256,
24353 IX86_BUILTIN_ROUNDPS256,
24354
24355 IX86_BUILTIN_FLOORPD256,
24356 IX86_BUILTIN_CEILPD256,
24357 IX86_BUILTIN_TRUNCPD256,
24358 IX86_BUILTIN_RINTPD256,
24359 IX86_BUILTIN_ROUNDPD_AZ256,
24360 IX86_BUILTIN_FLOORPS256,
24361 IX86_BUILTIN_CEILPS256,
24362 IX86_BUILTIN_TRUNCPS256,
24363 IX86_BUILTIN_RINTPS256,
24364 IX86_BUILTIN_ROUNDPS_AZ256,
24365
24366 IX86_BUILTIN_UNPCKHPD256,
24367 IX86_BUILTIN_UNPCKLPD256,
24368 IX86_BUILTIN_UNPCKHPS256,
24369 IX86_BUILTIN_UNPCKLPS256,
24370
24371 IX86_BUILTIN_SI256_SI,
24372 IX86_BUILTIN_PS256_PS,
24373 IX86_BUILTIN_PD256_PD,
24374 IX86_BUILTIN_SI_SI256,
24375 IX86_BUILTIN_PS_PS256,
24376 IX86_BUILTIN_PD_PD256,
24377
24378 IX86_BUILTIN_VTESTZPD,
24379 IX86_BUILTIN_VTESTCPD,
24380 IX86_BUILTIN_VTESTNZCPD,
24381 IX86_BUILTIN_VTESTZPS,
24382 IX86_BUILTIN_VTESTCPS,
24383 IX86_BUILTIN_VTESTNZCPS,
24384 IX86_BUILTIN_VTESTZPD256,
24385 IX86_BUILTIN_VTESTCPD256,
24386 IX86_BUILTIN_VTESTNZCPD256,
24387 IX86_BUILTIN_VTESTZPS256,
24388 IX86_BUILTIN_VTESTCPS256,
24389 IX86_BUILTIN_VTESTNZCPS256,
24390 IX86_BUILTIN_PTESTZ256,
24391 IX86_BUILTIN_PTESTC256,
24392 IX86_BUILTIN_PTESTNZC256,
24393
24394 IX86_BUILTIN_MOVMSKPD256,
24395 IX86_BUILTIN_MOVMSKPS256,
24396
24397 /* AVX2 */
24398 IX86_BUILTIN_MPSADBW256,
24399 IX86_BUILTIN_PABSB256,
24400 IX86_BUILTIN_PABSW256,
24401 IX86_BUILTIN_PABSD256,
24402 IX86_BUILTIN_PACKSSDW256,
24403 IX86_BUILTIN_PACKSSWB256,
24404 IX86_BUILTIN_PACKUSDW256,
24405 IX86_BUILTIN_PACKUSWB256,
24406 IX86_BUILTIN_PADDB256,
24407 IX86_BUILTIN_PADDW256,
24408 IX86_BUILTIN_PADDD256,
24409 IX86_BUILTIN_PADDQ256,
24410 IX86_BUILTIN_PADDSB256,
24411 IX86_BUILTIN_PADDSW256,
24412 IX86_BUILTIN_PADDUSB256,
24413 IX86_BUILTIN_PADDUSW256,
24414 IX86_BUILTIN_PALIGNR256,
24415 IX86_BUILTIN_AND256I,
24416 IX86_BUILTIN_ANDNOT256I,
24417 IX86_BUILTIN_PAVGB256,
24418 IX86_BUILTIN_PAVGW256,
24419 IX86_BUILTIN_PBLENDVB256,
24420 IX86_BUILTIN_PBLENDVW256,
24421 IX86_BUILTIN_PCMPEQB256,
24422 IX86_BUILTIN_PCMPEQW256,
24423 IX86_BUILTIN_PCMPEQD256,
24424 IX86_BUILTIN_PCMPEQQ256,
24425 IX86_BUILTIN_PCMPGTB256,
24426 IX86_BUILTIN_PCMPGTW256,
24427 IX86_BUILTIN_PCMPGTD256,
24428 IX86_BUILTIN_PCMPGTQ256,
24429 IX86_BUILTIN_PHADDW256,
24430 IX86_BUILTIN_PHADDD256,
24431 IX86_BUILTIN_PHADDSW256,
24432 IX86_BUILTIN_PHSUBW256,
24433 IX86_BUILTIN_PHSUBD256,
24434 IX86_BUILTIN_PHSUBSW256,
24435 IX86_BUILTIN_PMADDUBSW256,
24436 IX86_BUILTIN_PMADDWD256,
24437 IX86_BUILTIN_PMAXSB256,
24438 IX86_BUILTIN_PMAXSW256,
24439 IX86_BUILTIN_PMAXSD256,
24440 IX86_BUILTIN_PMAXUB256,
24441 IX86_BUILTIN_PMAXUW256,
24442 IX86_BUILTIN_PMAXUD256,
24443 IX86_BUILTIN_PMINSB256,
24444 IX86_BUILTIN_PMINSW256,
24445 IX86_BUILTIN_PMINSD256,
24446 IX86_BUILTIN_PMINUB256,
24447 IX86_BUILTIN_PMINUW256,
24448 IX86_BUILTIN_PMINUD256,
24449 IX86_BUILTIN_PMOVMSKB256,
24450 IX86_BUILTIN_PMOVSXBW256,
24451 IX86_BUILTIN_PMOVSXBD256,
24452 IX86_BUILTIN_PMOVSXBQ256,
24453 IX86_BUILTIN_PMOVSXWD256,
24454 IX86_BUILTIN_PMOVSXWQ256,
24455 IX86_BUILTIN_PMOVSXDQ256,
24456 IX86_BUILTIN_PMOVZXBW256,
24457 IX86_BUILTIN_PMOVZXBD256,
24458 IX86_BUILTIN_PMOVZXBQ256,
24459 IX86_BUILTIN_PMOVZXWD256,
24460 IX86_BUILTIN_PMOVZXWQ256,
24461 IX86_BUILTIN_PMOVZXDQ256,
24462 IX86_BUILTIN_PMULDQ256,
24463 IX86_BUILTIN_PMULHRSW256,
24464 IX86_BUILTIN_PMULHUW256,
24465 IX86_BUILTIN_PMULHW256,
24466 IX86_BUILTIN_PMULLW256,
24467 IX86_BUILTIN_PMULLD256,
24468 IX86_BUILTIN_PMULUDQ256,
24469 IX86_BUILTIN_POR256,
24470 IX86_BUILTIN_PSADBW256,
24471 IX86_BUILTIN_PSHUFB256,
24472 IX86_BUILTIN_PSHUFD256,
24473 IX86_BUILTIN_PSHUFHW256,
24474 IX86_BUILTIN_PSHUFLW256,
24475 IX86_BUILTIN_PSIGNB256,
24476 IX86_BUILTIN_PSIGNW256,
24477 IX86_BUILTIN_PSIGND256,
24478 IX86_BUILTIN_PSLLDQI256,
24479 IX86_BUILTIN_PSLLWI256,
24480 IX86_BUILTIN_PSLLW256,
24481 IX86_BUILTIN_PSLLDI256,
24482 IX86_BUILTIN_PSLLD256,
24483 IX86_BUILTIN_PSLLQI256,
24484 IX86_BUILTIN_PSLLQ256,
24485 IX86_BUILTIN_PSRAWI256,
24486 IX86_BUILTIN_PSRAW256,
24487 IX86_BUILTIN_PSRADI256,
24488 IX86_BUILTIN_PSRAD256,
24489 IX86_BUILTIN_PSRLDQI256,
24490 IX86_BUILTIN_PSRLWI256,
24491 IX86_BUILTIN_PSRLW256,
24492 IX86_BUILTIN_PSRLDI256,
24493 IX86_BUILTIN_PSRLD256,
24494 IX86_BUILTIN_PSRLQI256,
24495 IX86_BUILTIN_PSRLQ256,
24496 IX86_BUILTIN_PSUBB256,
24497 IX86_BUILTIN_PSUBW256,
24498 IX86_BUILTIN_PSUBD256,
24499 IX86_BUILTIN_PSUBQ256,
24500 IX86_BUILTIN_PSUBSB256,
24501 IX86_BUILTIN_PSUBSW256,
24502 IX86_BUILTIN_PSUBUSB256,
24503 IX86_BUILTIN_PSUBUSW256,
24504 IX86_BUILTIN_PUNPCKHBW256,
24505 IX86_BUILTIN_PUNPCKHWD256,
24506 IX86_BUILTIN_PUNPCKHDQ256,
24507 IX86_BUILTIN_PUNPCKHQDQ256,
24508 IX86_BUILTIN_PUNPCKLBW256,
24509 IX86_BUILTIN_PUNPCKLWD256,
24510 IX86_BUILTIN_PUNPCKLDQ256,
24511 IX86_BUILTIN_PUNPCKLQDQ256,
24512 IX86_BUILTIN_PXOR256,
24513 IX86_BUILTIN_MOVNTDQA256,
24514 IX86_BUILTIN_VBROADCASTSS_PS,
24515 IX86_BUILTIN_VBROADCASTSS_PS256,
24516 IX86_BUILTIN_VBROADCASTSD_PD256,
24517 IX86_BUILTIN_VBROADCASTSI256,
24518 IX86_BUILTIN_PBLENDD256,
24519 IX86_BUILTIN_PBLENDD128,
24520 IX86_BUILTIN_PBROADCASTB256,
24521 IX86_BUILTIN_PBROADCASTW256,
24522 IX86_BUILTIN_PBROADCASTD256,
24523 IX86_BUILTIN_PBROADCASTQ256,
24524 IX86_BUILTIN_PBROADCASTB128,
24525 IX86_BUILTIN_PBROADCASTW128,
24526 IX86_BUILTIN_PBROADCASTD128,
24527 IX86_BUILTIN_PBROADCASTQ128,
24528 IX86_BUILTIN_VPERMVARSI256,
24529 IX86_BUILTIN_VPERMDF256,
24530 IX86_BUILTIN_VPERMVARSF256,
24531 IX86_BUILTIN_VPERMDI256,
24532 IX86_BUILTIN_VPERMTI256,
24533 IX86_BUILTIN_VEXTRACT128I256,
24534 IX86_BUILTIN_VINSERT128I256,
24535 IX86_BUILTIN_MASKLOADD,
24536 IX86_BUILTIN_MASKLOADQ,
24537 IX86_BUILTIN_MASKLOADD256,
24538 IX86_BUILTIN_MASKLOADQ256,
24539 IX86_BUILTIN_MASKSTORED,
24540 IX86_BUILTIN_MASKSTOREQ,
24541 IX86_BUILTIN_MASKSTORED256,
24542 IX86_BUILTIN_MASKSTOREQ256,
24543 IX86_BUILTIN_PSLLVV4DI,
24544 IX86_BUILTIN_PSLLVV2DI,
24545 IX86_BUILTIN_PSLLVV8SI,
24546 IX86_BUILTIN_PSLLVV4SI,
24547 IX86_BUILTIN_PSRAVV8SI,
24548 IX86_BUILTIN_PSRAVV4SI,
24549 IX86_BUILTIN_PSRLVV4DI,
24550 IX86_BUILTIN_PSRLVV2DI,
24551 IX86_BUILTIN_PSRLVV8SI,
24552 IX86_BUILTIN_PSRLVV4SI,
24553
24554 IX86_BUILTIN_GATHERSIV2DF,
24555 IX86_BUILTIN_GATHERSIV4DF,
24556 IX86_BUILTIN_GATHERDIV2DF,
24557 IX86_BUILTIN_GATHERDIV4DF,
24558 IX86_BUILTIN_GATHERSIV4SF,
24559 IX86_BUILTIN_GATHERSIV8SF,
24560 IX86_BUILTIN_GATHERDIV4SF,
24561 IX86_BUILTIN_GATHERDIV8SF,
24562 IX86_BUILTIN_GATHERSIV2DI,
24563 IX86_BUILTIN_GATHERSIV4DI,
24564 IX86_BUILTIN_GATHERDIV2DI,
24565 IX86_BUILTIN_GATHERDIV4DI,
24566 IX86_BUILTIN_GATHERSIV4SI,
24567 IX86_BUILTIN_GATHERSIV8SI,
24568 IX86_BUILTIN_GATHERDIV4SI,
24569 IX86_BUILTIN_GATHERDIV8SI,
24570
24571 /* TFmode support builtins. */
24572 IX86_BUILTIN_INFQ,
24573 IX86_BUILTIN_HUGE_VALQ,
24574 IX86_BUILTIN_FABSQ,
24575 IX86_BUILTIN_COPYSIGNQ,
24576
24577 /* Vectorizer support builtins. */
24578 IX86_BUILTIN_CPYSGNPS,
24579 IX86_BUILTIN_CPYSGNPD,
24580 IX86_BUILTIN_CPYSGNPS256,
24581 IX86_BUILTIN_CPYSGNPD256,
24582
24583 IX86_BUILTIN_CVTUDQ2PS,
24584
24585 IX86_BUILTIN_VEC_PERM_V2DF,
24586 IX86_BUILTIN_VEC_PERM_V4SF,
24587 IX86_BUILTIN_VEC_PERM_V2DI,
24588 IX86_BUILTIN_VEC_PERM_V4SI,
24589 IX86_BUILTIN_VEC_PERM_V8HI,
24590 IX86_BUILTIN_VEC_PERM_V16QI,
24591 IX86_BUILTIN_VEC_PERM_V2DI_U,
24592 IX86_BUILTIN_VEC_PERM_V4SI_U,
24593 IX86_BUILTIN_VEC_PERM_V8HI_U,
24594 IX86_BUILTIN_VEC_PERM_V16QI_U,
24595 IX86_BUILTIN_VEC_PERM_V4DF,
24596 IX86_BUILTIN_VEC_PERM_V8SF,
24597
24598 /* FMA4 instructions. */
24599 IX86_BUILTIN_VFMADDSS,
24600 IX86_BUILTIN_VFMADDSD,
24601 IX86_BUILTIN_VFMADDPS,
24602 IX86_BUILTIN_VFMADDPD,
24603 IX86_BUILTIN_VFMADDPS256,
24604 IX86_BUILTIN_VFMADDPD256,
24605 IX86_BUILTIN_VFMADDSUBPS,
24606 IX86_BUILTIN_VFMADDSUBPD,
24607 IX86_BUILTIN_VFMADDSUBPS256,
24608 IX86_BUILTIN_VFMADDSUBPD256,
24609
24610 /* FMA3 instructions. */
24611 IX86_BUILTIN_VFMADDSS3,
24612 IX86_BUILTIN_VFMADDSD3,
24613
24614 /* XOP instructions. */
24615 IX86_BUILTIN_VPCMOV,
24616 IX86_BUILTIN_VPCMOV_V2DI,
24617 IX86_BUILTIN_VPCMOV_V4SI,
24618 IX86_BUILTIN_VPCMOV_V8HI,
24619 IX86_BUILTIN_VPCMOV_V16QI,
24620 IX86_BUILTIN_VPCMOV_V4SF,
24621 IX86_BUILTIN_VPCMOV_V2DF,
24622 IX86_BUILTIN_VPCMOV256,
24623 IX86_BUILTIN_VPCMOV_V4DI256,
24624 IX86_BUILTIN_VPCMOV_V8SI256,
24625 IX86_BUILTIN_VPCMOV_V16HI256,
24626 IX86_BUILTIN_VPCMOV_V32QI256,
24627 IX86_BUILTIN_VPCMOV_V8SF256,
24628 IX86_BUILTIN_VPCMOV_V4DF256,
24629
24630 IX86_BUILTIN_VPPERM,
24631
24632 IX86_BUILTIN_VPMACSSWW,
24633 IX86_BUILTIN_VPMACSWW,
24634 IX86_BUILTIN_VPMACSSWD,
24635 IX86_BUILTIN_VPMACSWD,
24636 IX86_BUILTIN_VPMACSSDD,
24637 IX86_BUILTIN_VPMACSDD,
24638 IX86_BUILTIN_VPMACSSDQL,
24639 IX86_BUILTIN_VPMACSSDQH,
24640 IX86_BUILTIN_VPMACSDQL,
24641 IX86_BUILTIN_VPMACSDQH,
24642 IX86_BUILTIN_VPMADCSSWD,
24643 IX86_BUILTIN_VPMADCSWD,
24644
24645 IX86_BUILTIN_VPHADDBW,
24646 IX86_BUILTIN_VPHADDBD,
24647 IX86_BUILTIN_VPHADDBQ,
24648 IX86_BUILTIN_VPHADDWD,
24649 IX86_BUILTIN_VPHADDWQ,
24650 IX86_BUILTIN_VPHADDDQ,
24651 IX86_BUILTIN_VPHADDUBW,
24652 IX86_BUILTIN_VPHADDUBD,
24653 IX86_BUILTIN_VPHADDUBQ,
24654 IX86_BUILTIN_VPHADDUWD,
24655 IX86_BUILTIN_VPHADDUWQ,
24656 IX86_BUILTIN_VPHADDUDQ,
24657 IX86_BUILTIN_VPHSUBBW,
24658 IX86_BUILTIN_VPHSUBWD,
24659 IX86_BUILTIN_VPHSUBDQ,
24660
24661 IX86_BUILTIN_VPROTB,
24662 IX86_BUILTIN_VPROTW,
24663 IX86_BUILTIN_VPROTD,
24664 IX86_BUILTIN_VPROTQ,
24665 IX86_BUILTIN_VPROTB_IMM,
24666 IX86_BUILTIN_VPROTW_IMM,
24667 IX86_BUILTIN_VPROTD_IMM,
24668 IX86_BUILTIN_VPROTQ_IMM,
24669
24670 IX86_BUILTIN_VPSHLB,
24671 IX86_BUILTIN_VPSHLW,
24672 IX86_BUILTIN_VPSHLD,
24673 IX86_BUILTIN_VPSHLQ,
24674 IX86_BUILTIN_VPSHAB,
24675 IX86_BUILTIN_VPSHAW,
24676 IX86_BUILTIN_VPSHAD,
24677 IX86_BUILTIN_VPSHAQ,
24678
24679 IX86_BUILTIN_VFRCZSS,
24680 IX86_BUILTIN_VFRCZSD,
24681 IX86_BUILTIN_VFRCZPS,
24682 IX86_BUILTIN_VFRCZPD,
24683 IX86_BUILTIN_VFRCZPS256,
24684 IX86_BUILTIN_VFRCZPD256,
24685
24686 IX86_BUILTIN_VPCOMEQUB,
24687 IX86_BUILTIN_VPCOMNEUB,
24688 IX86_BUILTIN_VPCOMLTUB,
24689 IX86_BUILTIN_VPCOMLEUB,
24690 IX86_BUILTIN_VPCOMGTUB,
24691 IX86_BUILTIN_VPCOMGEUB,
24692 IX86_BUILTIN_VPCOMFALSEUB,
24693 IX86_BUILTIN_VPCOMTRUEUB,
24694
24695 IX86_BUILTIN_VPCOMEQUW,
24696 IX86_BUILTIN_VPCOMNEUW,
24697 IX86_BUILTIN_VPCOMLTUW,
24698 IX86_BUILTIN_VPCOMLEUW,
24699 IX86_BUILTIN_VPCOMGTUW,
24700 IX86_BUILTIN_VPCOMGEUW,
24701 IX86_BUILTIN_VPCOMFALSEUW,
24702 IX86_BUILTIN_VPCOMTRUEUW,
24703
24704 IX86_BUILTIN_VPCOMEQUD,
24705 IX86_BUILTIN_VPCOMNEUD,
24706 IX86_BUILTIN_VPCOMLTUD,
24707 IX86_BUILTIN_VPCOMLEUD,
24708 IX86_BUILTIN_VPCOMGTUD,
24709 IX86_BUILTIN_VPCOMGEUD,
24710 IX86_BUILTIN_VPCOMFALSEUD,
24711 IX86_BUILTIN_VPCOMTRUEUD,
24712
24713 IX86_BUILTIN_VPCOMEQUQ,
24714 IX86_BUILTIN_VPCOMNEUQ,
24715 IX86_BUILTIN_VPCOMLTUQ,
24716 IX86_BUILTIN_VPCOMLEUQ,
24717 IX86_BUILTIN_VPCOMGTUQ,
24718 IX86_BUILTIN_VPCOMGEUQ,
24719 IX86_BUILTIN_VPCOMFALSEUQ,
24720 IX86_BUILTIN_VPCOMTRUEUQ,
24721
24722 IX86_BUILTIN_VPCOMEQB,
24723 IX86_BUILTIN_VPCOMNEB,
24724 IX86_BUILTIN_VPCOMLTB,
24725 IX86_BUILTIN_VPCOMLEB,
24726 IX86_BUILTIN_VPCOMGTB,
24727 IX86_BUILTIN_VPCOMGEB,
24728 IX86_BUILTIN_VPCOMFALSEB,
24729 IX86_BUILTIN_VPCOMTRUEB,
24730
24731 IX86_BUILTIN_VPCOMEQW,
24732 IX86_BUILTIN_VPCOMNEW,
24733 IX86_BUILTIN_VPCOMLTW,
24734 IX86_BUILTIN_VPCOMLEW,
24735 IX86_BUILTIN_VPCOMGTW,
24736 IX86_BUILTIN_VPCOMGEW,
24737 IX86_BUILTIN_VPCOMFALSEW,
24738 IX86_BUILTIN_VPCOMTRUEW,
24739
24740 IX86_BUILTIN_VPCOMEQD,
24741 IX86_BUILTIN_VPCOMNED,
24742 IX86_BUILTIN_VPCOMLTD,
24743 IX86_BUILTIN_VPCOMLED,
24744 IX86_BUILTIN_VPCOMGTD,
24745 IX86_BUILTIN_VPCOMGED,
24746 IX86_BUILTIN_VPCOMFALSED,
24747 IX86_BUILTIN_VPCOMTRUED,
24748
24749 IX86_BUILTIN_VPCOMEQQ,
24750 IX86_BUILTIN_VPCOMNEQ,
24751 IX86_BUILTIN_VPCOMLTQ,
24752 IX86_BUILTIN_VPCOMLEQ,
24753 IX86_BUILTIN_VPCOMGTQ,
24754 IX86_BUILTIN_VPCOMGEQ,
24755 IX86_BUILTIN_VPCOMFALSEQ,
24756 IX86_BUILTIN_VPCOMTRUEQ,
24757
24758 /* LWP instructions. */
24759 IX86_BUILTIN_LLWPCB,
24760 IX86_BUILTIN_SLWPCB,
24761 IX86_BUILTIN_LWPVAL32,
24762 IX86_BUILTIN_LWPVAL64,
24763 IX86_BUILTIN_LWPINS32,
24764 IX86_BUILTIN_LWPINS64,
24765
24766 IX86_BUILTIN_CLZS,
24767
24768 /* BMI instructions. */
24769 IX86_BUILTIN_BEXTR32,
24770 IX86_BUILTIN_BEXTR64,
24771 IX86_BUILTIN_CTZS,
24772
24773 /* TBM instructions. */
24774 IX86_BUILTIN_BEXTRI32,
24775 IX86_BUILTIN_BEXTRI64,
24776
24777 /* BMI2 instructions. */
24778 IX86_BUILTIN_BZHI32,
24779 IX86_BUILTIN_BZHI64,
24780 IX86_BUILTIN_PDEP32,
24781 IX86_BUILTIN_PDEP64,
24782 IX86_BUILTIN_PEXT32,
24783 IX86_BUILTIN_PEXT64,
24784
24785 /* FSGSBASE instructions. */
24786 IX86_BUILTIN_RDFSBASE32,
24787 IX86_BUILTIN_RDFSBASE64,
24788 IX86_BUILTIN_RDGSBASE32,
24789 IX86_BUILTIN_RDGSBASE64,
24790 IX86_BUILTIN_WRFSBASE32,
24791 IX86_BUILTIN_WRFSBASE64,
24792 IX86_BUILTIN_WRGSBASE32,
24793 IX86_BUILTIN_WRGSBASE64,
24794
24795 /* RDRND instructions. */
24796 IX86_BUILTIN_RDRAND16_STEP,
24797 IX86_BUILTIN_RDRAND32_STEP,
24798 IX86_BUILTIN_RDRAND64_STEP,
24799
24800 /* F16C instructions. */
24801 IX86_BUILTIN_CVTPH2PS,
24802 IX86_BUILTIN_CVTPH2PS256,
24803 IX86_BUILTIN_CVTPS2PH,
24804 IX86_BUILTIN_CVTPS2PH256,
24805
24806 /* CFString built-in for darwin */
24807 IX86_BUILTIN_CFSTRING,
24808
24809 IX86_BUILTIN_MAX
24810 };
24811
24812 /* Table for the ix86 builtin decls. */
24813 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
24814
24815 /* Table of all of the builtin functions that are possible with different ISA's
24816 but are waiting to be built until a function is declared to use that
24817 ISA. */
24818 struct builtin_isa {
24819 const char *name; /* function name */
24820 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
24821 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
24822 bool const_p; /* true if the declaration is constant */
24823 bool set_and_not_built_p;
24824 };
24825
24826 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
24827
24828
24829 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
24830 of which isa_flags to use in the ix86_builtins_isa array. Stores the
24831 function decl in the ix86_builtins array. Returns the function decl or
24832 NULL_TREE, if the builtin was not added.
24833
24834 If the front end has a special hook for builtin functions, delay adding
24835 builtin functions that aren't in the current ISA until the ISA is changed
24836 with function specific optimization. Doing so, can save about 300K for the
24837 default compiler. When the builtin is expanded, check at that time whether
24838 it is valid.
24839
24840 If the front end doesn't have a special hook, record all builtins, even if
24841 it isn't an instruction set in the current ISA in case the user uses
24842 function specific options for a different ISA, so that we don't get scope
24843 errors if a builtin is added in the middle of a function scope. */
24844
24845 static inline tree
24846 def_builtin (HOST_WIDE_INT mask, const char *name,
24847 enum ix86_builtin_func_type tcode,
24848 enum ix86_builtins code)
24849 {
24850 tree decl = NULL_TREE;
24851
24852 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
24853 {
24854 ix86_builtins_isa[(int) code].isa = mask;
24855
24856 mask &= ~OPTION_MASK_ISA_64BIT;
24857 if (mask == 0
24858 || (mask & ix86_isa_flags) != 0
24859 || (lang_hooks.builtin_function
24860 == lang_hooks.builtin_function_ext_scope))
24861
24862 {
24863 tree type = ix86_get_builtin_func_type (tcode);
24864 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
24865 NULL, NULL_TREE);
24866 ix86_builtins[(int) code] = decl;
24867 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
24868 }
24869 else
24870 {
24871 ix86_builtins[(int) code] = NULL_TREE;
24872 ix86_builtins_isa[(int) code].tcode = tcode;
24873 ix86_builtins_isa[(int) code].name = name;
24874 ix86_builtins_isa[(int) code].const_p = false;
24875 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
24876 }
24877 }
24878
24879 return decl;
24880 }
24881
24882 /* Like def_builtin, but also marks the function decl "const". */
24883
24884 static inline tree
24885 def_builtin_const (HOST_WIDE_INT mask, const char *name,
24886 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
24887 {
24888 tree decl = def_builtin (mask, name, tcode, code);
24889 if (decl)
24890 TREE_READONLY (decl) = 1;
24891 else
24892 ix86_builtins_isa[(int) code].const_p = true;
24893
24894 return decl;
24895 }
24896
24897 /* Add any new builtin functions for a given ISA that may not have been
24898 declared. This saves a bit of space compared to adding all of the
24899 declarations to the tree, even if we didn't use them. */
24900
24901 static void
24902 ix86_add_new_builtins (HOST_WIDE_INT isa)
24903 {
24904 int i;
24905
24906 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
24907 {
24908 if ((ix86_builtins_isa[i].isa & isa) != 0
24909 && ix86_builtins_isa[i].set_and_not_built_p)
24910 {
24911 tree decl, type;
24912
24913 /* Don't define the builtin again. */
24914 ix86_builtins_isa[i].set_and_not_built_p = false;
24915
24916 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
24917 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
24918 type, i, BUILT_IN_MD, NULL,
24919 NULL_TREE);
24920
24921 ix86_builtins[i] = decl;
24922 if (ix86_builtins_isa[i].const_p)
24923 TREE_READONLY (decl) = 1;
24924 }
24925 }
24926 }
24927
24928 /* Bits for builtin_description.flag. */
24929
24930 /* Set when we don't support the comparison natively, and should
24931 swap_comparison in order to support it. */
24932 #define BUILTIN_DESC_SWAP_OPERANDS 1
24933
24934 struct builtin_description
24935 {
24936 const HOST_WIDE_INT mask;
24937 const enum insn_code icode;
24938 const char *const name;
24939 const enum ix86_builtins code;
24940 const enum rtx_code comparison;
24941 const int flag;
24942 };
24943
24944 static const struct builtin_description bdesc_comi[] =
24945 {
24946 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
24947 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
24948 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
24949 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
24950 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
24951 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
24952 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
24953 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
24954 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
24955 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
24956 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
24957 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
24958 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
24959 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
24960 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
24961 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
24962 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
24963 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
24964 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
24965 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
24966 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
24967 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
24968 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
24969 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
24970 };
24971
24972 static const struct builtin_description bdesc_pcmpestr[] =
24973 {
24974 /* SSE4.2 */
24975 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
24976 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
24977 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
24978 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
24979 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
24980 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
24981 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
24982 };
24983
24984 static const struct builtin_description bdesc_pcmpistr[] =
24985 {
24986 /* SSE4.2 */
24987 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
24988 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
24989 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
24990 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
24991 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
24992 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
24993 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
24994 };
24995
24996 /* Special builtins with variable number of arguments. */
24997 static const struct builtin_description bdesc_special_args[] =
24998 {
24999 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtsc, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
25000 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtscp, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
25001 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
25002
25003 /* MMX */
25004 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
25005
25006 /* 3DNow! */
25007 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
25008
25009 /* SSE */
25010 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
25011 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
25012 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
25013
25014 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
25015 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
25016 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
25017 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
25018
25019 /* SSE or 3DNow!A */
25020 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
25021 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
25022
25023 /* SSE2 */
25024 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
25025 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
25026 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
25027 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
25028 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
25029 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
25030 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
25031 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
25032 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
25033
25034 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
25035 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
25036
25037 /* SSE3 */
25038 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
25039
25040 /* SSE4.1 */
25041 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
25042
25043 /* SSE4A */
25044 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
25045 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
25046
25047 /* AVX */
25048 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
25049 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
25050
25051 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
25052 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
25053 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
25054 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
25055 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
25056
25057 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
25058 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
25059 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
25060 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
25061 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
25062 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
25063 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
25064
25065 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
25066 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
25067 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
25068
25069 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
25070 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
25071 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
25072 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
25073 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
25074 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
25075 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
25076 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
25077
25078 /* AVX2 */
25079 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_movntdqa, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256, UNKNOWN, (int) V4DI_FTYPE_PV4DI },
25080 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI },
25081 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI },
25082 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd256, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI },
25083 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq256, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI },
25084 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_V4SI },
25085 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_V2DI },
25086 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI },
25087 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI },
25088
25089 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
25090 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
25091 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
25092 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
25093 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
25094 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
25095
25096 /* FSGSBASE */
25097 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
25098 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
25099 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
25100 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
25101 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
25102 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
25103 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
25104 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
25105 };
25106
25107 /* Builtins with variable number of arguments. */
25108 static const struct builtin_description bdesc_args[] =
25109 {
25110 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
25111 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
25112 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdpmc, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
25113 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
25114 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
25115 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
25116 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
25117
25118 /* MMX */
25119 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25120 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25121 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25122 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25123 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25124 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25125
25126 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25127 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25128 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25129 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25130 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25131 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25132 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25133 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25134
25135 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25136 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25137
25138 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25139 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25140 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25141 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25142
25143 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25144 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25145 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25146 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25147 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25148 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25149
25150 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25151 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25152 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25153 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25154 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
25155 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
25156
25157 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
25158 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
25159 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
25160
25161 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
25162
25163 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
25164 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
25165 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
25166 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
25167 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
25168 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
25169
25170 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
25171 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
25172 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
25173 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
25174 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
25175 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
25176
25177 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
25178 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
25179 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
25180 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
25181
25182 /* 3DNow! */
25183 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
25184 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
25185 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
25186 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
25187
25188 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25189 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
25190 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
25191 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
25192 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
25193 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
25194 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
25195 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
25196 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
25197 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
25198 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
25199 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
25200 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
25201 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
25202 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25203
25204 /* 3DNow!A */
25205 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
25206 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
25207 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
25208 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
25209 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
25210 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
25211
25212 /* SSE */
25213 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
25214 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
25215 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
25216 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
25217 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
25218 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
25219 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
25220 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
25221 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
25222 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
25223 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
25224 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
25225
25226 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
25227
25228 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25229 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25230 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25231 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25232 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25233 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25234 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25235 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25236
25237 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
25238 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
25239 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
25240 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
25241 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
25242 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
25243 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
25244 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
25245 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
25246 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
25247 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
25248 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
25249 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
25250 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
25251 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
25252 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
25253 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
25254 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
25255 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
25256 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
25257 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
25258 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
25259
25260 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25261 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25262 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25263 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25264
25265 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25266 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25267 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25268 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25269
25270 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25271
25272 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25273 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25274 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25275 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25276 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25277
25278 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
25279 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
25280 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
25281
25282 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
25283
25284 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
25285 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
25286 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
25287
25288 /* SSE MMX or 3Dnow!A */
25289 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25290 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25291 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25292
25293 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25294 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25295 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25296 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25297
25298 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
25299 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
25300
25301 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
25302
25303 /* SSE2 */
25304 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
25305
25306 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2df", IX86_BUILTIN_VEC_PERM_V2DF, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI },
25307 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4sf", IX86_BUILTIN_VEC_PERM_V4SF, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI },
25308 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di", IX86_BUILTIN_VEC_PERM_V2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI },
25309 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si", IX86_BUILTIN_VEC_PERM_V4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
25310 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi", IX86_BUILTIN_VEC_PERM_V8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI },
25311 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi", IX86_BUILTIN_VEC_PERM_V16QI, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
25312 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di_u", IX86_BUILTIN_VEC_PERM_V2DI_U, UNKNOWN, (int) V2UDI_FTYPE_V2UDI_V2UDI_V2UDI },
25313 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si_u", IX86_BUILTIN_VEC_PERM_V4SI_U, UNKNOWN, (int) V4USI_FTYPE_V4USI_V4USI_V4USI },
25314 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi_u", IX86_BUILTIN_VEC_PERM_V8HI_U, UNKNOWN, (int) V8UHI_FTYPE_V8UHI_V8UHI_V8UHI },
25315 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi_u", IX86_BUILTIN_VEC_PERM_V16QI_U, UNKNOWN, (int) V16UQI_FTYPE_V16UQI_V16UQI_V16UQI },
25316 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4df", IX86_BUILTIN_VEC_PERM_V4DF, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI },
25317 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8sf", IX86_BUILTIN_VEC_PERM_V8SF, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI },
25318
25319 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
25320 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
25321 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
25322 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
25323 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
25324 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtudq2ps, "__builtin_ia32_cvtudq2ps", IX86_BUILTIN_CVTUDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
25325
25326 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
25327 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
25328 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
25329 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
25330 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
25331
25332 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
25333
25334 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
25335 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
25336 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
25337 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
25338
25339 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
25340 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
25341 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
25342
25343 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25344 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25345 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25346 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25347 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25348 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25349 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25350 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25351
25352 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
25353 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
25354 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
25355 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
25356 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
25357 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
25358 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
25359 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
25360 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
25361 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
25362 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
25363 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
25364 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
25365 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
25366 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
25367 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
25368 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
25369 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
25370 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
25371 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
25372
25373 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25374 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25375 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25376 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25377
25378 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25379 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25380 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25381 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25382
25383 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25384
25385 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25386 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25387 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25388
25389 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
25390
25391 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25392 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25393 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25394 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25395 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25396 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25397 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25398 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25399
25400 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25401 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25402 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25403 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25404 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25405 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25406 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25407 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25408
25409 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25410 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
25411
25412 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25413 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25414 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25415 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25416
25417 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25418 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25419
25420 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25421 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25422 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25423 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25424 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25425 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25426
25427 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25428 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25429 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25430 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25431
25432 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25433 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25434 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25435 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25436 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25437 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25438 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25439 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25440
25441 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
25442 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
25443 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
25444
25445 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25446 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
25447
25448 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
25449 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
25450
25451 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
25452
25453 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
25454 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
25455 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
25456 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
25457
25458 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
25459 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
25460 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
25461 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
25462 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
25463 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
25464 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
25465
25466 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
25467 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
25468 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
25469 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
25470 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
25471 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
25472 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
25473
25474 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
25475 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
25476 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
25477 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
25478
25479 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
25480 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
25481 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
25482
25483 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
25484
25485 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
25486 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
25487
25488 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
25489
25490 /* SSE2 MMX */
25491 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
25492 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
25493
25494 /* SSE3 */
25495 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
25496 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
25497
25498 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25499 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25500 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25501 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25502 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25503 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25504
25505 /* SSSE3 */
25506 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
25507 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
25508 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
25509 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
25510 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
25511 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
25512
25513 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25514 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25515 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25516 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25517 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25518 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25519 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25520 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25521 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25522 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25523 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25524 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25525 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
25526 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
25527 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25528 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25529 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25530 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25531 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25532 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25533 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25534 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25535 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25536 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25537
25538 /* SSSE3. */
25539 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
25540 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
25541
25542 /* SSE4.1 */
25543 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
25544 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
25545 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
25546 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
25547 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
25548 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
25549 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
25550 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
25551 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
25552 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
25553
25554 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
25555 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
25556 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
25557 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
25558 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
25559 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
25560 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
25561 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
25562 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
25563 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
25564 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
25565 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
25566 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
25567
25568 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
25569 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25570 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25571 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25572 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25573 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25574 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25575 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25576 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25577 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25578 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
25579 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25580
25581 /* SSE4.1 */
25582 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
25583 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
25584 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
25585 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
25586
25587 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
25588 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
25589 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
25590 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
25591
25592 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF },
25593
25594 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
25595 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
25596 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
25597 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
25598
25599 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF },
25600
25601 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
25602 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
25603 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
25604
25605 /* SSE4.2 */
25606 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25607 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
25608 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
25609 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
25610 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
25611
25612 /* SSE4A */
25613 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
25614 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
25615 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
25616 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25617
25618 /* AES */
25619 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
25620 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
25621
25622 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25623 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25624 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25625 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25626
25627 /* PCLMUL */
25628 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
25629
25630 /* AVX */
25631 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25632 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25633 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25634 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25635 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25636 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25637 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25638 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25639 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25640 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25641 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25642 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25643 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25644 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25645 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25646 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25647 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25648 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25649 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25650 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25651 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25652 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25653 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25654 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25655 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25656 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25657
25658 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
25659 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
25660 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
25661 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
25662
25663 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
25664 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
25665 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
25666 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
25667 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
25668 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
25669 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
25670 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
25671 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
25672 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
25673 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
25674 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
25675 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
25676 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
25677 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
25678 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
25679 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
25680 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
25681 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
25682 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
25683 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
25684 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
25685 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
25686 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
25687 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
25688 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
25689 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
25690 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
25691 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
25692 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
25693 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
25694 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
25695 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
25696 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
25697
25698 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25699 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25700 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
25701
25702 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
25703 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25704 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25705 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25706 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25707
25708 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25709
25710 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
25711 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
25712
25713 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
25714 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
25715 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
25716 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
25717
25718 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
25719
25720 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
25721 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
25722 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
25723 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
25724
25725 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25726
25727 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25728 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25729 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25730 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25731
25732 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
25733 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
25734 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
25735 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
25736 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
25737 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
25738
25739 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
25740 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
25741 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
25742 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
25743 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
25744 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
25745 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
25746 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
25747 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
25748 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
25749 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
25750 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
25751 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
25752 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
25753 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
25754
25755 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
25756 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
25757
25758 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25759 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25760
25761 /* AVX2 */
25762 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
25763 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
25764 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI },
25765 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI },
25766 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
25767 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
25768 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
25769 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packuswb, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
25770 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv32qi3, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
25771 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv16hi3, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
25772 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv8si3, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
25773 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv4di3, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
25774 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv32qi3, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
25775 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
25776 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
25777 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
25778 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv4di, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT },
25779 { OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
25780 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
25781 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
25782 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
25783 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI },
25784 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT },
25785 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv32qi3, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
25786 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv16hi3, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
25787 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv8si3, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
25788 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv4di3, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
25789 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv32qi3, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
25790 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv16hi3, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
25791 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv8si3, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
25792 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv4di3, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
25793 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddwv16hi3, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
25794 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phadddv8si3, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
25795 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddswv16hi3, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
25796 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubwv16hi3, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
25797 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubdv8si3, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
25798 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubswv16hi3, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
25799 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddubsw256, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
25800 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddwd, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI },
25801 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
25802 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
25803 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
25804 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
25805 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
25806 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
25807 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
25808 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
25809 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
25810 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
25811 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
25812 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
25813 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmovmskb, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256, UNKNOWN, (int) INT_FTYPE_V32QI },
25814 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv16qiv16hi2, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
25815 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8qiv8si2 , "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
25816 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4qiv4di2 , "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
25817 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8hiv8si2 , "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
25818 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4hiv4di2 , "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
25819 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4siv4di2 , "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
25820 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv16qiv16hi2, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
25821 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8qiv8si2 , "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
25822 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4qiv4di2 , "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
25823 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8hiv8si2 , "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
25824 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2 , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
25825 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2 , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
25826 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mulv4siv4di3 , "__builtin_ia32_pmuldq256" , IX86_BUILTIN_PMULDQ256 , UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
25827 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_umulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
25828 { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
25829 { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
25830 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
25831 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256 , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
25832 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_umulv4siv4di3 , "__builtin_ia32_pmuludq256" , IX86_BUILTIN_PMULUDQ256 , UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
25833 { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
25834 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
25835 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
25836 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufdv3, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT },
25837 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufhwv3, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
25838 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshuflwv3, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
25839 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
25840 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
25841 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
25842 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlqv4di3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
25843 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
25844 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
25845 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
25846 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlv8si3, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
25847 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlv4di3, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
25848 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlv4di3, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
25849 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
25850 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
25851 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
25852 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
25853 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrqv4di3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
25854 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
25855 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
25856 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
25857 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
25858 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
25859 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
25860 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv32qi3, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
25861 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv16hi3, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
25862 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv8si3, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
25863 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv4di3, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
25864 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv32qi3, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
25865 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv16hi3, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
25866 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv32qi3, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
25867 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv16hi3, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
25868 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv32qi, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
25869 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv16hi, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
25870 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv8si, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
25871 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv4di, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
25872 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv32qi, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
25873 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
25874 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
25875 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
25876 { OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
25877 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
25878 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF },
25879 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF },
25880 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
25881 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
25882 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
25883 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv32qi, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256, UNKNOWN, (int) V32QI_FTYPE_V16QI },
25884 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16hi, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256, UNKNOWN, (int) V16HI_FTYPE_V8HI },
25885 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8si, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256, UNKNOWN, (int) V8SI_FTYPE_V4SI },
25886 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4di, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
25887 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16qi, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
25888 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8hi, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
25889 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
25890 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
25891 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
25892 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
25893 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25894 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
25895 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
25896 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_extracti128, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
25897 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_inserti128, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
25898 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
25899 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25900 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
25901 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25902 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv8si, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
25903 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv4si, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25904 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4di, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
25905 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv2di, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25906 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
25907 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25908
25909 { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
25910
25911 /* BMI */
25912 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
25913 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
25914 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
25915
25916 /* TBM */
25917 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
25918 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
25919
25920 /* F16C */
25921 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
25922 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
25923 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
25924 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
25925
25926 /* BMI2 */
25927 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
25928 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
25929 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
25930 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
25931 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
25932 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
25933 };
25934
25935 /* FMA4 and XOP. */
25936 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
25937 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
25938 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
25939 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
25940 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
25941 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
25942 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
25943 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
25944 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
25945 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
25946 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
25947 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
25948 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
25949 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
25950 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
25951 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
25952 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
25953 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
25954 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
25955 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
25956 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
25957 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
25958 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
25959 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
25960 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
25961 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
25962 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
25963 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
25964 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
25965 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
25966 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
25967 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
25968 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
25969 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
25970 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
25971 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
25972 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
25973 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
25974 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
25975 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
25976 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
25977 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
25978 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
25979 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
25980 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
25981 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
25982 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
25983 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
25984 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
25985 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
25986 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
25987 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
25988
25989 static const struct builtin_description bdesc_multi_arg[] =
25990 {
25991 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
25992 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
25993 UNKNOWN, (int)MULTI_ARG_3_SF },
25994 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
25995 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
25996 UNKNOWN, (int)MULTI_ARG_3_DF },
25997
25998 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf,
25999 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3,
26000 UNKNOWN, (int)MULTI_ARG_3_SF },
26001 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df,
26002 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3,
26003 UNKNOWN, (int)MULTI_ARG_3_DF },
26004
26005 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
26006 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
26007 UNKNOWN, (int)MULTI_ARG_3_SF },
26008 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
26009 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
26010 UNKNOWN, (int)MULTI_ARG_3_DF },
26011 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
26012 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
26013 UNKNOWN, (int)MULTI_ARG_3_SF2 },
26014 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
26015 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
26016 UNKNOWN, (int)MULTI_ARG_3_DF2 },
26017
26018 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
26019 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
26020 UNKNOWN, (int)MULTI_ARG_3_SF },
26021 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
26022 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
26023 UNKNOWN, (int)MULTI_ARG_3_DF },
26024 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
26025 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
26026 UNKNOWN, (int)MULTI_ARG_3_SF2 },
26027 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
26028 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
26029 UNKNOWN, (int)MULTI_ARG_3_DF2 },
26030
26031 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
26032 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
26033 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
26034 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
26035 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
26036 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
26037 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
26038
26039 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
26040 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
26041 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
26042 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
26043 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
26044 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
26045 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
26046
26047 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
26048
26049 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
26050 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
26051 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
26052 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
26053 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
26054 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
26055 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
26056 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
26057 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
26058 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
26059 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
26060 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
26061
26062 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
26063 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
26064 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
26065 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
26066 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
26067 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
26068 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
26069 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
26070 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
26071 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
26072 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
26073 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
26074 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
26075 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
26076 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
26077 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
26078
26079 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_2_SF },
26080 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_2_DF },
26081 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
26082 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
26083 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
26084 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
26085
26086 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
26087 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
26088 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
26089 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
26090 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
26091 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
26092 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
26093 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
26094 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
26095 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
26096 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
26097 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
26098 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
26099 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
26100 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
26101
26102 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
26103 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
26104 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
26105 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
26106 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
26107 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
26108 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
26109
26110 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
26111 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
26112 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
26113 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
26114 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
26115 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
26116 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
26117
26118 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
26119 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
26120 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
26121 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
26122 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
26123 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
26124 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
26125
26126 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
26127 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
26128 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
26129 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
26130 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
26131 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
26132 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
26133
26134 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
26135 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
26136 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
26137 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
26138 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
26139 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
26140 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
26141
26142 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
26143 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
26144 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
26145 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
26146 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
26147 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
26148 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
26149
26150 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
26151 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
26152 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
26153 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
26154 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
26155 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
26156 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
26157
26158 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
26159 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
26160 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
26161 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
26162 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
26163 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
26164 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
26165
26166 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
26167 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
26168 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
26169 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
26170 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
26171 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
26172 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
26173 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
26174
26175 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
26176 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
26177 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
26178 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
26179 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
26180 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
26181 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
26182 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
26183
26184 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
26185 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
26186 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
26187 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
26188
26189 };
26190
26191 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
26192 in the current target ISA to allow the user to compile particular modules
26193 with different target specific options that differ from the command line
26194 options. */
26195 static void
26196 ix86_init_mmx_sse_builtins (void)
26197 {
26198 const struct builtin_description * d;
26199 enum ix86_builtin_func_type ftype;
26200 size_t i;
26201
26202 /* Add all special builtins with variable number of operands. */
26203 for (i = 0, d = bdesc_special_args;
26204 i < ARRAY_SIZE (bdesc_special_args);
26205 i++, d++)
26206 {
26207 if (d->name == 0)
26208 continue;
26209
26210 ftype = (enum ix86_builtin_func_type) d->flag;
26211 def_builtin (d->mask, d->name, ftype, d->code);
26212 }
26213
26214 /* Add all builtins with variable number of operands. */
26215 for (i = 0, d = bdesc_args;
26216 i < ARRAY_SIZE (bdesc_args);
26217 i++, d++)
26218 {
26219 if (d->name == 0)
26220 continue;
26221
26222 ftype = (enum ix86_builtin_func_type) d->flag;
26223 def_builtin_const (d->mask, d->name, ftype, d->code);
26224 }
26225
26226 /* pcmpestr[im] insns. */
26227 for (i = 0, d = bdesc_pcmpestr;
26228 i < ARRAY_SIZE (bdesc_pcmpestr);
26229 i++, d++)
26230 {
26231 if (d->code == IX86_BUILTIN_PCMPESTRM128)
26232 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
26233 else
26234 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
26235 def_builtin_const (d->mask, d->name, ftype, d->code);
26236 }
26237
26238 /* pcmpistr[im] insns. */
26239 for (i = 0, d = bdesc_pcmpistr;
26240 i < ARRAY_SIZE (bdesc_pcmpistr);
26241 i++, d++)
26242 {
26243 if (d->code == IX86_BUILTIN_PCMPISTRM128)
26244 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
26245 else
26246 ftype = INT_FTYPE_V16QI_V16QI_INT;
26247 def_builtin_const (d->mask, d->name, ftype, d->code);
26248 }
26249
26250 /* comi/ucomi insns. */
26251 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
26252 {
26253 if (d->mask == OPTION_MASK_ISA_SSE2)
26254 ftype = INT_FTYPE_V2DF_V2DF;
26255 else
26256 ftype = INT_FTYPE_V4SF_V4SF;
26257 def_builtin_const (d->mask, d->name, ftype, d->code);
26258 }
26259
26260 /* SSE */
26261 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
26262 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
26263 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
26264 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
26265
26266 /* SSE or 3DNow!A */
26267 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
26268 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
26269 IX86_BUILTIN_MASKMOVQ);
26270
26271 /* SSE2 */
26272 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
26273 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
26274
26275 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
26276 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
26277 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
26278 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
26279
26280 /* SSE3. */
26281 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
26282 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
26283 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
26284 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
26285
26286 /* AES */
26287 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
26288 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
26289 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
26290 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
26291 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
26292 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
26293 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
26294 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
26295 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
26296 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
26297 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
26298 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
26299
26300 /* PCLMUL */
26301 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
26302 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
26303
26304 /* RDRND */
26305 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
26306 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
26307 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
26308 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
26309 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
26310 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
26311 IX86_BUILTIN_RDRAND64_STEP);
26312
26313 /* AVX2 */
26314 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
26315 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
26316 IX86_BUILTIN_GATHERSIV2DF);
26317
26318 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
26319 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
26320 IX86_BUILTIN_GATHERSIV4DF);
26321
26322 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
26323 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
26324 IX86_BUILTIN_GATHERDIV2DF);
26325
26326 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
26327 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
26328 IX86_BUILTIN_GATHERDIV4DF);
26329
26330 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
26331 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
26332 IX86_BUILTIN_GATHERSIV4SF);
26333
26334 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
26335 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
26336 IX86_BUILTIN_GATHERSIV8SF);
26337
26338 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
26339 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
26340 IX86_BUILTIN_GATHERDIV4SF);
26341
26342 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
26343 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
26344 IX86_BUILTIN_GATHERDIV8SF);
26345
26346 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
26347 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
26348 IX86_BUILTIN_GATHERSIV2DI);
26349
26350 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
26351 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
26352 IX86_BUILTIN_GATHERSIV4DI);
26353
26354 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
26355 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
26356 IX86_BUILTIN_GATHERDIV2DI);
26357
26358 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
26359 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
26360 IX86_BUILTIN_GATHERDIV4DI);
26361
26362 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
26363 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
26364 IX86_BUILTIN_GATHERSIV4SI);
26365
26366 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
26367 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
26368 IX86_BUILTIN_GATHERSIV8SI);
26369
26370 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
26371 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
26372 IX86_BUILTIN_GATHERDIV4SI);
26373
26374 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
26375 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
26376 IX86_BUILTIN_GATHERDIV8SI);
26377
26378 /* MMX access to the vec_init patterns. */
26379 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
26380 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
26381
26382 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
26383 V4HI_FTYPE_HI_HI_HI_HI,
26384 IX86_BUILTIN_VEC_INIT_V4HI);
26385
26386 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
26387 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
26388 IX86_BUILTIN_VEC_INIT_V8QI);
26389
26390 /* Access to the vec_extract patterns. */
26391 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
26392 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
26393 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
26394 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
26395 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
26396 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
26397 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
26398 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
26399 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
26400 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
26401
26402 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
26403 "__builtin_ia32_vec_ext_v4hi",
26404 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
26405
26406 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
26407 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
26408
26409 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
26410 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
26411
26412 /* Access to the vec_set patterns. */
26413 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
26414 "__builtin_ia32_vec_set_v2di",
26415 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
26416
26417 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
26418 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
26419
26420 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
26421 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
26422
26423 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
26424 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
26425
26426 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
26427 "__builtin_ia32_vec_set_v4hi",
26428 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
26429
26430 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
26431 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
26432
26433 /* Add FMA4 multi-arg argument instructions */
26434 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
26435 {
26436 if (d->name == 0)
26437 continue;
26438
26439 ftype = (enum ix86_builtin_func_type) d->flag;
26440 def_builtin_const (d->mask, d->name, ftype, d->code);
26441 }
26442 }
26443
26444 /* Internal method for ix86_init_builtins. */
26445
26446 static void
26447 ix86_init_builtins_va_builtins_abi (void)
26448 {
26449 tree ms_va_ref, sysv_va_ref;
26450 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
26451 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
26452 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
26453 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
26454
26455 if (!TARGET_64BIT)
26456 return;
26457 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
26458 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
26459 ms_va_ref = build_reference_type (ms_va_list_type_node);
26460 sysv_va_ref =
26461 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
26462
26463 fnvoid_va_end_ms =
26464 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
26465 fnvoid_va_start_ms =
26466 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
26467 fnvoid_va_end_sysv =
26468 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
26469 fnvoid_va_start_sysv =
26470 build_varargs_function_type_list (void_type_node, sysv_va_ref,
26471 NULL_TREE);
26472 fnvoid_va_copy_ms =
26473 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
26474 NULL_TREE);
26475 fnvoid_va_copy_sysv =
26476 build_function_type_list (void_type_node, sysv_va_ref,
26477 sysv_va_ref, NULL_TREE);
26478
26479 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
26480 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
26481 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
26482 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
26483 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
26484 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
26485 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
26486 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
26487 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
26488 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
26489 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
26490 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
26491 }
26492
26493 static void
26494 ix86_init_builtin_types (void)
26495 {
26496 tree float128_type_node, float80_type_node;
26497
26498 /* The __float80 type. */
26499 float80_type_node = long_double_type_node;
26500 if (TYPE_MODE (float80_type_node) != XFmode)
26501 {
26502 /* The __float80 type. */
26503 float80_type_node = make_node (REAL_TYPE);
26504
26505 TYPE_PRECISION (float80_type_node) = 80;
26506 layout_type (float80_type_node);
26507 }
26508 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
26509
26510 /* The __float128 type. */
26511 float128_type_node = make_node (REAL_TYPE);
26512 TYPE_PRECISION (float128_type_node) = 128;
26513 layout_type (float128_type_node);
26514 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
26515
26516 /* This macro is built by i386-builtin-types.awk. */
26517 DEFINE_BUILTIN_PRIMITIVE_TYPES;
26518 }
26519
26520 static void
26521 ix86_init_builtins (void)
26522 {
26523 tree t;
26524
26525 ix86_init_builtin_types ();
26526
26527 /* TFmode support builtins. */
26528 def_builtin_const (0, "__builtin_infq",
26529 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
26530 def_builtin_const (0, "__builtin_huge_valq",
26531 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
26532
26533 /* We will expand them to normal call if SSE2 isn't available since
26534 they are used by libgcc. */
26535 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
26536 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
26537 BUILT_IN_MD, "__fabstf2", NULL_TREE);
26538 TREE_READONLY (t) = 1;
26539 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
26540
26541 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
26542 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
26543 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
26544 TREE_READONLY (t) = 1;
26545 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
26546
26547 ix86_init_mmx_sse_builtins ();
26548
26549 if (TARGET_LP64)
26550 ix86_init_builtins_va_builtins_abi ();
26551
26552 #ifdef SUBTARGET_INIT_BUILTINS
26553 SUBTARGET_INIT_BUILTINS;
26554 #endif
26555 }
26556
26557 /* Return the ix86 builtin for CODE. */
26558
26559 static tree
26560 ix86_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
26561 {
26562 if (code >= IX86_BUILTIN_MAX)
26563 return error_mark_node;
26564
26565 return ix86_builtins[code];
26566 }
26567
26568 /* Errors in the source file can cause expand_expr to return const0_rtx
26569 where we expect a vector. To avoid crashing, use one of the vector
26570 clear instructions. */
26571 static rtx
26572 safe_vector_operand (rtx x, enum machine_mode mode)
26573 {
26574 if (x == const0_rtx)
26575 x = CONST0_RTX (mode);
26576 return x;
26577 }
26578
26579 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
26580
26581 static rtx
26582 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
26583 {
26584 rtx pat;
26585 tree arg0 = CALL_EXPR_ARG (exp, 0);
26586 tree arg1 = CALL_EXPR_ARG (exp, 1);
26587 rtx op0 = expand_normal (arg0);
26588 rtx op1 = expand_normal (arg1);
26589 enum machine_mode tmode = insn_data[icode].operand[0].mode;
26590 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
26591 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
26592
26593 if (VECTOR_MODE_P (mode0))
26594 op0 = safe_vector_operand (op0, mode0);
26595 if (VECTOR_MODE_P (mode1))
26596 op1 = safe_vector_operand (op1, mode1);
26597
26598 if (optimize || !target
26599 || GET_MODE (target) != tmode
26600 || !insn_data[icode].operand[0].predicate (target, tmode))
26601 target = gen_reg_rtx (tmode);
26602
26603 if (GET_MODE (op1) == SImode && mode1 == TImode)
26604 {
26605 rtx x = gen_reg_rtx (V4SImode);
26606 emit_insn (gen_sse2_loadd (x, op1));
26607 op1 = gen_lowpart (TImode, x);
26608 }
26609
26610 if (!insn_data[icode].operand[1].predicate (op0, mode0))
26611 op0 = copy_to_mode_reg (mode0, op0);
26612 if (!insn_data[icode].operand[2].predicate (op1, mode1))
26613 op1 = copy_to_mode_reg (mode1, op1);
26614
26615 pat = GEN_FCN (icode) (target, op0, op1);
26616 if (! pat)
26617 return 0;
26618
26619 emit_insn (pat);
26620
26621 return target;
26622 }
26623
26624 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
26625
26626 static rtx
26627 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
26628 enum ix86_builtin_func_type m_type,
26629 enum rtx_code sub_code)
26630 {
26631 rtx pat;
26632 int i;
26633 int nargs;
26634 bool comparison_p = false;
26635 bool tf_p = false;
26636 bool last_arg_constant = false;
26637 int num_memory = 0;
26638 struct {
26639 rtx op;
26640 enum machine_mode mode;
26641 } args[4];
26642
26643 enum machine_mode tmode = insn_data[icode].operand[0].mode;
26644
26645 switch (m_type)
26646 {
26647 case MULTI_ARG_4_DF2_DI_I:
26648 case MULTI_ARG_4_DF2_DI_I1:
26649 case MULTI_ARG_4_SF2_SI_I:
26650 case MULTI_ARG_4_SF2_SI_I1:
26651 nargs = 4;
26652 last_arg_constant = true;
26653 break;
26654
26655 case MULTI_ARG_3_SF:
26656 case MULTI_ARG_3_DF:
26657 case MULTI_ARG_3_SF2:
26658 case MULTI_ARG_3_DF2:
26659 case MULTI_ARG_3_DI:
26660 case MULTI_ARG_3_SI:
26661 case MULTI_ARG_3_SI_DI:
26662 case MULTI_ARG_3_HI:
26663 case MULTI_ARG_3_HI_SI:
26664 case MULTI_ARG_3_QI:
26665 case MULTI_ARG_3_DI2:
26666 case MULTI_ARG_3_SI2:
26667 case MULTI_ARG_3_HI2:
26668 case MULTI_ARG_3_QI2:
26669 nargs = 3;
26670 break;
26671
26672 case MULTI_ARG_2_SF:
26673 case MULTI_ARG_2_DF:
26674 case MULTI_ARG_2_DI:
26675 case MULTI_ARG_2_SI:
26676 case MULTI_ARG_2_HI:
26677 case MULTI_ARG_2_QI:
26678 nargs = 2;
26679 break;
26680
26681 case MULTI_ARG_2_DI_IMM:
26682 case MULTI_ARG_2_SI_IMM:
26683 case MULTI_ARG_2_HI_IMM:
26684 case MULTI_ARG_2_QI_IMM:
26685 nargs = 2;
26686 last_arg_constant = true;
26687 break;
26688
26689 case MULTI_ARG_1_SF:
26690 case MULTI_ARG_1_DF:
26691 case MULTI_ARG_1_SF2:
26692 case MULTI_ARG_1_DF2:
26693 case MULTI_ARG_1_DI:
26694 case MULTI_ARG_1_SI:
26695 case MULTI_ARG_1_HI:
26696 case MULTI_ARG_1_QI:
26697 case MULTI_ARG_1_SI_DI:
26698 case MULTI_ARG_1_HI_DI:
26699 case MULTI_ARG_1_HI_SI:
26700 case MULTI_ARG_1_QI_DI:
26701 case MULTI_ARG_1_QI_SI:
26702 case MULTI_ARG_1_QI_HI:
26703 nargs = 1;
26704 break;
26705
26706 case MULTI_ARG_2_DI_CMP:
26707 case MULTI_ARG_2_SI_CMP:
26708 case MULTI_ARG_2_HI_CMP:
26709 case MULTI_ARG_2_QI_CMP:
26710 nargs = 2;
26711 comparison_p = true;
26712 break;
26713
26714 case MULTI_ARG_2_SF_TF:
26715 case MULTI_ARG_2_DF_TF:
26716 case MULTI_ARG_2_DI_TF:
26717 case MULTI_ARG_2_SI_TF:
26718 case MULTI_ARG_2_HI_TF:
26719 case MULTI_ARG_2_QI_TF:
26720 nargs = 2;
26721 tf_p = true;
26722 break;
26723
26724 default:
26725 gcc_unreachable ();
26726 }
26727
26728 if (optimize || !target
26729 || GET_MODE (target) != tmode
26730 || !insn_data[icode].operand[0].predicate (target, tmode))
26731 target = gen_reg_rtx (tmode);
26732
26733 gcc_assert (nargs <= 4);
26734
26735 for (i = 0; i < nargs; i++)
26736 {
26737 tree arg = CALL_EXPR_ARG (exp, i);
26738 rtx op = expand_normal (arg);
26739 int adjust = (comparison_p) ? 1 : 0;
26740 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
26741
26742 if (last_arg_constant && i == nargs - 1)
26743 {
26744 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
26745 {
26746 enum insn_code new_icode = icode;
26747 switch (icode)
26748 {
26749 case CODE_FOR_xop_vpermil2v2df3:
26750 case CODE_FOR_xop_vpermil2v4sf3:
26751 case CODE_FOR_xop_vpermil2v4df3:
26752 case CODE_FOR_xop_vpermil2v8sf3:
26753 error ("the last argument must be a 2-bit immediate");
26754 return gen_reg_rtx (tmode);
26755 case CODE_FOR_xop_rotlv2di3:
26756 new_icode = CODE_FOR_rotlv2di3;
26757 goto xop_rotl;
26758 case CODE_FOR_xop_rotlv4si3:
26759 new_icode = CODE_FOR_rotlv4si3;
26760 goto xop_rotl;
26761 case CODE_FOR_xop_rotlv8hi3:
26762 new_icode = CODE_FOR_rotlv8hi3;
26763 goto xop_rotl;
26764 case CODE_FOR_xop_rotlv16qi3:
26765 new_icode = CODE_FOR_rotlv16qi3;
26766 xop_rotl:
26767 if (CONST_INT_P (op))
26768 {
26769 int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
26770 op = GEN_INT (INTVAL (op) & mask);
26771 gcc_checking_assert
26772 (insn_data[icode].operand[i + 1].predicate (op, mode));
26773 }
26774 else
26775 {
26776 gcc_checking_assert
26777 (nargs == 2
26778 && insn_data[new_icode].operand[0].mode == tmode
26779 && insn_data[new_icode].operand[1].mode == tmode
26780 && insn_data[new_icode].operand[2].mode == mode
26781 && insn_data[new_icode].operand[0].predicate
26782 == insn_data[icode].operand[0].predicate
26783 && insn_data[new_icode].operand[1].predicate
26784 == insn_data[icode].operand[1].predicate);
26785 icode = new_icode;
26786 goto non_constant;
26787 }
26788 break;
26789 default:
26790 gcc_unreachable ();
26791 }
26792 }
26793 }
26794 else
26795 {
26796 non_constant:
26797 if (VECTOR_MODE_P (mode))
26798 op = safe_vector_operand (op, mode);
26799
26800 /* If we aren't optimizing, only allow one memory operand to be
26801 generated. */
26802 if (memory_operand (op, mode))
26803 num_memory++;
26804
26805 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
26806
26807 if (optimize
26808 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
26809 || num_memory > 1)
26810 op = force_reg (mode, op);
26811 }
26812
26813 args[i].op = op;
26814 args[i].mode = mode;
26815 }
26816
26817 switch (nargs)
26818 {
26819 case 1:
26820 pat = GEN_FCN (icode) (target, args[0].op);
26821 break;
26822
26823 case 2:
26824 if (tf_p)
26825 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
26826 GEN_INT ((int)sub_code));
26827 else if (! comparison_p)
26828 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
26829 else
26830 {
26831 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
26832 args[0].op,
26833 args[1].op);
26834
26835 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
26836 }
26837 break;
26838
26839 case 3:
26840 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
26841 break;
26842
26843 case 4:
26844 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
26845 break;
26846
26847 default:
26848 gcc_unreachable ();
26849 }
26850
26851 if (! pat)
26852 return 0;
26853
26854 emit_insn (pat);
26855 return target;
26856 }
26857
26858 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
26859 insns with vec_merge. */
26860
26861 static rtx
26862 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
26863 rtx target)
26864 {
26865 rtx pat;
26866 tree arg0 = CALL_EXPR_ARG (exp, 0);
26867 rtx op1, op0 = expand_normal (arg0);
26868 enum machine_mode tmode = insn_data[icode].operand[0].mode;
26869 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
26870
26871 if (optimize || !target
26872 || GET_MODE (target) != tmode
26873 || !insn_data[icode].operand[0].predicate (target, tmode))
26874 target = gen_reg_rtx (tmode);
26875
26876 if (VECTOR_MODE_P (mode0))
26877 op0 = safe_vector_operand (op0, mode0);
26878
26879 if ((optimize && !register_operand (op0, mode0))
26880 || !insn_data[icode].operand[1].predicate (op0, mode0))
26881 op0 = copy_to_mode_reg (mode0, op0);
26882
26883 op1 = op0;
26884 if (!insn_data[icode].operand[2].predicate (op1, mode0))
26885 op1 = copy_to_mode_reg (mode0, op1);
26886
26887 pat = GEN_FCN (icode) (target, op0, op1);
26888 if (! pat)
26889 return 0;
26890 emit_insn (pat);
26891 return target;
26892 }
26893
26894 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
26895
26896 static rtx
26897 ix86_expand_sse_compare (const struct builtin_description *d,
26898 tree exp, rtx target, bool swap)
26899 {
26900 rtx pat;
26901 tree arg0 = CALL_EXPR_ARG (exp, 0);
26902 tree arg1 = CALL_EXPR_ARG (exp, 1);
26903 rtx op0 = expand_normal (arg0);
26904 rtx op1 = expand_normal (arg1);
26905 rtx op2;
26906 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
26907 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
26908 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
26909 enum rtx_code comparison = d->comparison;
26910
26911 if (VECTOR_MODE_P (mode0))
26912 op0 = safe_vector_operand (op0, mode0);
26913 if (VECTOR_MODE_P (mode1))
26914 op1 = safe_vector_operand (op1, mode1);
26915
26916 /* Swap operands if we have a comparison that isn't available in
26917 hardware. */
26918 if (swap)
26919 {
26920 rtx tmp = gen_reg_rtx (mode1);
26921 emit_move_insn (tmp, op1);
26922 op1 = op0;
26923 op0 = tmp;
26924 }
26925
26926 if (optimize || !target
26927 || GET_MODE (target) != tmode
26928 || !insn_data[d->icode].operand[0].predicate (target, tmode))
26929 target = gen_reg_rtx (tmode);
26930
26931 if ((optimize && !register_operand (op0, mode0))
26932 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
26933 op0 = copy_to_mode_reg (mode0, op0);
26934 if ((optimize && !register_operand (op1, mode1))
26935 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
26936 op1 = copy_to_mode_reg (mode1, op1);
26937
26938 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
26939 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
26940 if (! pat)
26941 return 0;
26942 emit_insn (pat);
26943 return target;
26944 }
26945
26946 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
26947
26948 static rtx
26949 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
26950 rtx target)
26951 {
26952 rtx pat;
26953 tree arg0 = CALL_EXPR_ARG (exp, 0);
26954 tree arg1 = CALL_EXPR_ARG (exp, 1);
26955 rtx op0 = expand_normal (arg0);
26956 rtx op1 = expand_normal (arg1);
26957 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
26958 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
26959 enum rtx_code comparison = d->comparison;
26960
26961 if (VECTOR_MODE_P (mode0))
26962 op0 = safe_vector_operand (op0, mode0);
26963 if (VECTOR_MODE_P (mode1))
26964 op1 = safe_vector_operand (op1, mode1);
26965
26966 /* Swap operands if we have a comparison that isn't available in
26967 hardware. */
26968 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
26969 {
26970 rtx tmp = op1;
26971 op1 = op0;
26972 op0 = tmp;
26973 }
26974
26975 target = gen_reg_rtx (SImode);
26976 emit_move_insn (target, const0_rtx);
26977 target = gen_rtx_SUBREG (QImode, target, 0);
26978
26979 if ((optimize && !register_operand (op0, mode0))
26980 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
26981 op0 = copy_to_mode_reg (mode0, op0);
26982 if ((optimize && !register_operand (op1, mode1))
26983 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
26984 op1 = copy_to_mode_reg (mode1, op1);
26985
26986 pat = GEN_FCN (d->icode) (op0, op1);
26987 if (! pat)
26988 return 0;
26989 emit_insn (pat);
26990 emit_insn (gen_rtx_SET (VOIDmode,
26991 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
26992 gen_rtx_fmt_ee (comparison, QImode,
26993 SET_DEST (pat),
26994 const0_rtx)));
26995
26996 return SUBREG_REG (target);
26997 }
26998
26999 /* Subroutine of ix86_expand_args_builtin to take care of round insns. */
27000
27001 static rtx
27002 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
27003 rtx target)
27004 {
27005 rtx pat;
27006 tree arg0 = CALL_EXPR_ARG (exp, 0);
27007 rtx op1, op0 = expand_normal (arg0);
27008 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
27009 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
27010
27011 if (optimize || target == 0
27012 || GET_MODE (target) != tmode
27013 || !insn_data[d->icode].operand[0].predicate (target, tmode))
27014 target = gen_reg_rtx (tmode);
27015
27016 if (VECTOR_MODE_P (mode0))
27017 op0 = safe_vector_operand (op0, mode0);
27018
27019 if ((optimize && !register_operand (op0, mode0))
27020 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
27021 op0 = copy_to_mode_reg (mode0, op0);
27022
27023 op1 = GEN_INT (d->comparison);
27024
27025 pat = GEN_FCN (d->icode) (target, op0, op1);
27026 if (! pat)
27027 return 0;
27028 emit_insn (pat);
27029 return target;
27030 }
27031
27032 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
27033
27034 static rtx
27035 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
27036 rtx target)
27037 {
27038 rtx pat;
27039 tree arg0 = CALL_EXPR_ARG (exp, 0);
27040 tree arg1 = CALL_EXPR_ARG (exp, 1);
27041 rtx op0 = expand_normal (arg0);
27042 rtx op1 = expand_normal (arg1);
27043 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
27044 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
27045 enum rtx_code comparison = d->comparison;
27046
27047 if (VECTOR_MODE_P (mode0))
27048 op0 = safe_vector_operand (op0, mode0);
27049 if (VECTOR_MODE_P (mode1))
27050 op1 = safe_vector_operand (op1, mode1);
27051
27052 target = gen_reg_rtx (SImode);
27053 emit_move_insn (target, const0_rtx);
27054 target = gen_rtx_SUBREG (QImode, target, 0);
27055
27056 if ((optimize && !register_operand (op0, mode0))
27057 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
27058 op0 = copy_to_mode_reg (mode0, op0);
27059 if ((optimize && !register_operand (op1, mode1))
27060 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
27061 op1 = copy_to_mode_reg (mode1, op1);
27062
27063 pat = GEN_FCN (d->icode) (op0, op1);
27064 if (! pat)
27065 return 0;
27066 emit_insn (pat);
27067 emit_insn (gen_rtx_SET (VOIDmode,
27068 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
27069 gen_rtx_fmt_ee (comparison, QImode,
27070 SET_DEST (pat),
27071 const0_rtx)));
27072
27073 return SUBREG_REG (target);
27074 }
27075
27076 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
27077
27078 static rtx
27079 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
27080 tree exp, rtx target)
27081 {
27082 rtx pat;
27083 tree arg0 = CALL_EXPR_ARG (exp, 0);
27084 tree arg1 = CALL_EXPR_ARG (exp, 1);
27085 tree arg2 = CALL_EXPR_ARG (exp, 2);
27086 tree arg3 = CALL_EXPR_ARG (exp, 3);
27087 tree arg4 = CALL_EXPR_ARG (exp, 4);
27088 rtx scratch0, scratch1;
27089 rtx op0 = expand_normal (arg0);
27090 rtx op1 = expand_normal (arg1);
27091 rtx op2 = expand_normal (arg2);
27092 rtx op3 = expand_normal (arg3);
27093 rtx op4 = expand_normal (arg4);
27094 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
27095
27096 tmode0 = insn_data[d->icode].operand[0].mode;
27097 tmode1 = insn_data[d->icode].operand[1].mode;
27098 modev2 = insn_data[d->icode].operand[2].mode;
27099 modei3 = insn_data[d->icode].operand[3].mode;
27100 modev4 = insn_data[d->icode].operand[4].mode;
27101 modei5 = insn_data[d->icode].operand[5].mode;
27102 modeimm = insn_data[d->icode].operand[6].mode;
27103
27104 if (VECTOR_MODE_P (modev2))
27105 op0 = safe_vector_operand (op0, modev2);
27106 if (VECTOR_MODE_P (modev4))
27107 op2 = safe_vector_operand (op2, modev4);
27108
27109 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
27110 op0 = copy_to_mode_reg (modev2, op0);
27111 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
27112 op1 = copy_to_mode_reg (modei3, op1);
27113 if ((optimize && !register_operand (op2, modev4))
27114 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
27115 op2 = copy_to_mode_reg (modev4, op2);
27116 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
27117 op3 = copy_to_mode_reg (modei5, op3);
27118
27119 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
27120 {
27121 error ("the fifth argument must be an 8-bit immediate");
27122 return const0_rtx;
27123 }
27124
27125 if (d->code == IX86_BUILTIN_PCMPESTRI128)
27126 {
27127 if (optimize || !target
27128 || GET_MODE (target) != tmode0
27129 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
27130 target = gen_reg_rtx (tmode0);
27131
27132 scratch1 = gen_reg_rtx (tmode1);
27133
27134 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
27135 }
27136 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
27137 {
27138 if (optimize || !target
27139 || GET_MODE (target) != tmode1
27140 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
27141 target = gen_reg_rtx (tmode1);
27142
27143 scratch0 = gen_reg_rtx (tmode0);
27144
27145 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
27146 }
27147 else
27148 {
27149 gcc_assert (d->flag);
27150
27151 scratch0 = gen_reg_rtx (tmode0);
27152 scratch1 = gen_reg_rtx (tmode1);
27153
27154 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
27155 }
27156
27157 if (! pat)
27158 return 0;
27159
27160 emit_insn (pat);
27161
27162 if (d->flag)
27163 {
27164 target = gen_reg_rtx (SImode);
27165 emit_move_insn (target, const0_rtx);
27166 target = gen_rtx_SUBREG (QImode, target, 0);
27167
27168 emit_insn
27169 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
27170 gen_rtx_fmt_ee (EQ, QImode,
27171 gen_rtx_REG ((enum machine_mode) d->flag,
27172 FLAGS_REG),
27173 const0_rtx)));
27174 return SUBREG_REG (target);
27175 }
27176 else
27177 return target;
27178 }
27179
27180
27181 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
27182
27183 static rtx
27184 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
27185 tree exp, rtx target)
27186 {
27187 rtx pat;
27188 tree arg0 = CALL_EXPR_ARG (exp, 0);
27189 tree arg1 = CALL_EXPR_ARG (exp, 1);
27190 tree arg2 = CALL_EXPR_ARG (exp, 2);
27191 rtx scratch0, scratch1;
27192 rtx op0 = expand_normal (arg0);
27193 rtx op1 = expand_normal (arg1);
27194 rtx op2 = expand_normal (arg2);
27195 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
27196
27197 tmode0 = insn_data[d->icode].operand[0].mode;
27198 tmode1 = insn_data[d->icode].operand[1].mode;
27199 modev2 = insn_data[d->icode].operand[2].mode;
27200 modev3 = insn_data[d->icode].operand[3].mode;
27201 modeimm = insn_data[d->icode].operand[4].mode;
27202
27203 if (VECTOR_MODE_P (modev2))
27204 op0 = safe_vector_operand (op0, modev2);
27205 if (VECTOR_MODE_P (modev3))
27206 op1 = safe_vector_operand (op1, modev3);
27207
27208 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
27209 op0 = copy_to_mode_reg (modev2, op0);
27210 if ((optimize && !register_operand (op1, modev3))
27211 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
27212 op1 = copy_to_mode_reg (modev3, op1);
27213
27214 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
27215 {
27216 error ("the third argument must be an 8-bit immediate");
27217 return const0_rtx;
27218 }
27219
27220 if (d->code == IX86_BUILTIN_PCMPISTRI128)
27221 {
27222 if (optimize || !target
27223 || GET_MODE (target) != tmode0
27224 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
27225 target = gen_reg_rtx (tmode0);
27226
27227 scratch1 = gen_reg_rtx (tmode1);
27228
27229 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
27230 }
27231 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
27232 {
27233 if (optimize || !target
27234 || GET_MODE (target) != tmode1
27235 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
27236 target = gen_reg_rtx (tmode1);
27237
27238 scratch0 = gen_reg_rtx (tmode0);
27239
27240 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
27241 }
27242 else
27243 {
27244 gcc_assert (d->flag);
27245
27246 scratch0 = gen_reg_rtx (tmode0);
27247 scratch1 = gen_reg_rtx (tmode1);
27248
27249 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
27250 }
27251
27252 if (! pat)
27253 return 0;
27254
27255 emit_insn (pat);
27256
27257 if (d->flag)
27258 {
27259 target = gen_reg_rtx (SImode);
27260 emit_move_insn (target, const0_rtx);
27261 target = gen_rtx_SUBREG (QImode, target, 0);
27262
27263 emit_insn
27264 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
27265 gen_rtx_fmt_ee (EQ, QImode,
27266 gen_rtx_REG ((enum machine_mode) d->flag,
27267 FLAGS_REG),
27268 const0_rtx)));
27269 return SUBREG_REG (target);
27270 }
27271 else
27272 return target;
27273 }
27274
27275 /* Subroutine of ix86_expand_builtin to take care of insns with
27276 variable number of operands. */
27277
27278 static rtx
27279 ix86_expand_args_builtin (const struct builtin_description *d,
27280 tree exp, rtx target)
27281 {
27282 rtx pat, real_target;
27283 unsigned int i, nargs;
27284 unsigned int nargs_constant = 0;
27285 int num_memory = 0;
27286 struct
27287 {
27288 rtx op;
27289 enum machine_mode mode;
27290 } args[4];
27291 bool last_arg_count = false;
27292 enum insn_code icode = d->icode;
27293 const struct insn_data_d *insn_p = &insn_data[icode];
27294 enum machine_mode tmode = insn_p->operand[0].mode;
27295 enum machine_mode rmode = VOIDmode;
27296 bool swap = false;
27297 enum rtx_code comparison = d->comparison;
27298
27299 switch ((enum ix86_builtin_func_type) d->flag)
27300 {
27301 case V2DF_FTYPE_V2DF_ROUND:
27302 case V4DF_FTYPE_V4DF_ROUND:
27303 case V4SF_FTYPE_V4SF_ROUND:
27304 case V8SF_FTYPE_V8SF_ROUND:
27305 return ix86_expand_sse_round (d, exp, target);
27306 case INT_FTYPE_V8SF_V8SF_PTEST:
27307 case INT_FTYPE_V4DI_V4DI_PTEST:
27308 case INT_FTYPE_V4DF_V4DF_PTEST:
27309 case INT_FTYPE_V4SF_V4SF_PTEST:
27310 case INT_FTYPE_V2DI_V2DI_PTEST:
27311 case INT_FTYPE_V2DF_V2DF_PTEST:
27312 return ix86_expand_sse_ptest (d, exp, target);
27313 case FLOAT128_FTYPE_FLOAT128:
27314 case FLOAT_FTYPE_FLOAT:
27315 case INT_FTYPE_INT:
27316 case UINT64_FTYPE_INT:
27317 case UINT16_FTYPE_UINT16:
27318 case INT64_FTYPE_INT64:
27319 case INT64_FTYPE_V4SF:
27320 case INT64_FTYPE_V2DF:
27321 case INT_FTYPE_V16QI:
27322 case INT_FTYPE_V8QI:
27323 case INT_FTYPE_V8SF:
27324 case INT_FTYPE_V4DF:
27325 case INT_FTYPE_V4SF:
27326 case INT_FTYPE_V2DF:
27327 case INT_FTYPE_V32QI:
27328 case V16QI_FTYPE_V16QI:
27329 case V8SI_FTYPE_V8SF:
27330 case V8SI_FTYPE_V4SI:
27331 case V8HI_FTYPE_V8HI:
27332 case V8HI_FTYPE_V16QI:
27333 case V8QI_FTYPE_V8QI:
27334 case V8SF_FTYPE_V8SF:
27335 case V8SF_FTYPE_V8SI:
27336 case V8SF_FTYPE_V4SF:
27337 case V8SF_FTYPE_V8HI:
27338 case V4SI_FTYPE_V4SI:
27339 case V4SI_FTYPE_V16QI:
27340 case V4SI_FTYPE_V4SF:
27341 case V4SI_FTYPE_V8SI:
27342 case V4SI_FTYPE_V8HI:
27343 case V4SI_FTYPE_V4DF:
27344 case V4SI_FTYPE_V2DF:
27345 case V4HI_FTYPE_V4HI:
27346 case V4DF_FTYPE_V4DF:
27347 case V4DF_FTYPE_V4SI:
27348 case V4DF_FTYPE_V4SF:
27349 case V4DF_FTYPE_V2DF:
27350 case V4SF_FTYPE_V4SF:
27351 case V4SF_FTYPE_V4SI:
27352 case V4SF_FTYPE_V8SF:
27353 case V4SF_FTYPE_V4DF:
27354 case V4SF_FTYPE_V8HI:
27355 case V4SF_FTYPE_V2DF:
27356 case V2DI_FTYPE_V2DI:
27357 case V2DI_FTYPE_V16QI:
27358 case V2DI_FTYPE_V8HI:
27359 case V2DI_FTYPE_V4SI:
27360 case V2DF_FTYPE_V2DF:
27361 case V2DF_FTYPE_V4SI:
27362 case V2DF_FTYPE_V4DF:
27363 case V2DF_FTYPE_V4SF:
27364 case V2DF_FTYPE_V2SI:
27365 case V2SI_FTYPE_V2SI:
27366 case V2SI_FTYPE_V4SF:
27367 case V2SI_FTYPE_V2SF:
27368 case V2SI_FTYPE_V2DF:
27369 case V2SF_FTYPE_V2SF:
27370 case V2SF_FTYPE_V2SI:
27371 case V32QI_FTYPE_V32QI:
27372 case V32QI_FTYPE_V16QI:
27373 case V16HI_FTYPE_V16HI:
27374 case V16HI_FTYPE_V8HI:
27375 case V8SI_FTYPE_V8SI:
27376 case V16HI_FTYPE_V16QI:
27377 case V8SI_FTYPE_V16QI:
27378 case V4DI_FTYPE_V16QI:
27379 case V8SI_FTYPE_V8HI:
27380 case V4DI_FTYPE_V8HI:
27381 case V4DI_FTYPE_V4SI:
27382 case V4DI_FTYPE_V2DI:
27383 nargs = 1;
27384 break;
27385 case V4SF_FTYPE_V4SF_VEC_MERGE:
27386 case V2DF_FTYPE_V2DF_VEC_MERGE:
27387 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
27388 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
27389 case V16QI_FTYPE_V16QI_V16QI:
27390 case V16QI_FTYPE_V8HI_V8HI:
27391 case V8QI_FTYPE_V8QI_V8QI:
27392 case V8QI_FTYPE_V4HI_V4HI:
27393 case V8HI_FTYPE_V8HI_V8HI:
27394 case V8HI_FTYPE_V16QI_V16QI:
27395 case V8HI_FTYPE_V4SI_V4SI:
27396 case V8SF_FTYPE_V8SF_V8SF:
27397 case V8SF_FTYPE_V8SF_V8SI:
27398 case V4SI_FTYPE_V4SI_V4SI:
27399 case V4SI_FTYPE_V8HI_V8HI:
27400 case V4SI_FTYPE_V4SF_V4SF:
27401 case V4SI_FTYPE_V2DF_V2DF:
27402 case V4HI_FTYPE_V4HI_V4HI:
27403 case V4HI_FTYPE_V8QI_V8QI:
27404 case V4HI_FTYPE_V2SI_V2SI:
27405 case V4DF_FTYPE_V4DF_V4DF:
27406 case V4DF_FTYPE_V4DF_V4DI:
27407 case V4SF_FTYPE_V4SF_V4SF:
27408 case V4SF_FTYPE_V4SF_V4SI:
27409 case V4SF_FTYPE_V4SF_V2SI:
27410 case V4SF_FTYPE_V4SF_V2DF:
27411 case V4SF_FTYPE_V4SF_DI:
27412 case V4SF_FTYPE_V4SF_SI:
27413 case V2DI_FTYPE_V2DI_V2DI:
27414 case V2DI_FTYPE_V16QI_V16QI:
27415 case V2DI_FTYPE_V4SI_V4SI:
27416 case V2DI_FTYPE_V2DI_V16QI:
27417 case V2DI_FTYPE_V2DF_V2DF:
27418 case V2SI_FTYPE_V2SI_V2SI:
27419 case V2SI_FTYPE_V4HI_V4HI:
27420 case V2SI_FTYPE_V2SF_V2SF:
27421 case V2DF_FTYPE_V2DF_V2DF:
27422 case V2DF_FTYPE_V2DF_V4SF:
27423 case V2DF_FTYPE_V2DF_V2DI:
27424 case V2DF_FTYPE_V2DF_DI:
27425 case V2DF_FTYPE_V2DF_SI:
27426 case V2SF_FTYPE_V2SF_V2SF:
27427 case V1DI_FTYPE_V1DI_V1DI:
27428 case V1DI_FTYPE_V8QI_V8QI:
27429 case V1DI_FTYPE_V2SI_V2SI:
27430 case V32QI_FTYPE_V16HI_V16HI:
27431 case V16HI_FTYPE_V8SI_V8SI:
27432 case V32QI_FTYPE_V32QI_V32QI:
27433 case V16HI_FTYPE_V32QI_V32QI:
27434 case V16HI_FTYPE_V16HI_V16HI:
27435 case V8SI_FTYPE_V8SI_V8SI:
27436 case V8SI_FTYPE_V16HI_V16HI:
27437 case V4DI_FTYPE_V4DI_V4DI:
27438 case V4DI_FTYPE_V8SI_V8SI:
27439 if (comparison == UNKNOWN)
27440 return ix86_expand_binop_builtin (icode, exp, target);
27441 nargs = 2;
27442 break;
27443 case V4SF_FTYPE_V4SF_V4SF_SWAP:
27444 case V2DF_FTYPE_V2DF_V2DF_SWAP:
27445 gcc_assert (comparison != UNKNOWN);
27446 nargs = 2;
27447 swap = true;
27448 break;
27449 case V16HI_FTYPE_V16HI_V8HI_COUNT:
27450 case V16HI_FTYPE_V16HI_SI_COUNT:
27451 case V8SI_FTYPE_V8SI_V4SI_COUNT:
27452 case V8SI_FTYPE_V8SI_SI_COUNT:
27453 case V4DI_FTYPE_V4DI_V2DI_COUNT:
27454 case V4DI_FTYPE_V4DI_INT_COUNT:
27455 case V8HI_FTYPE_V8HI_V8HI_COUNT:
27456 case V8HI_FTYPE_V8HI_SI_COUNT:
27457 case V4SI_FTYPE_V4SI_V4SI_COUNT:
27458 case V4SI_FTYPE_V4SI_SI_COUNT:
27459 case V4HI_FTYPE_V4HI_V4HI_COUNT:
27460 case V4HI_FTYPE_V4HI_SI_COUNT:
27461 case V2DI_FTYPE_V2DI_V2DI_COUNT:
27462 case V2DI_FTYPE_V2DI_SI_COUNT:
27463 case V2SI_FTYPE_V2SI_V2SI_COUNT:
27464 case V2SI_FTYPE_V2SI_SI_COUNT:
27465 case V1DI_FTYPE_V1DI_V1DI_COUNT:
27466 case V1DI_FTYPE_V1DI_SI_COUNT:
27467 nargs = 2;
27468 last_arg_count = true;
27469 break;
27470 case UINT64_FTYPE_UINT64_UINT64:
27471 case UINT_FTYPE_UINT_UINT:
27472 case UINT_FTYPE_UINT_USHORT:
27473 case UINT_FTYPE_UINT_UCHAR:
27474 case UINT16_FTYPE_UINT16_INT:
27475 case UINT8_FTYPE_UINT8_INT:
27476 nargs = 2;
27477 break;
27478 case V2DI_FTYPE_V2DI_INT_CONVERT:
27479 nargs = 2;
27480 rmode = V1TImode;
27481 nargs_constant = 1;
27482 break;
27483 case V8HI_FTYPE_V8HI_INT:
27484 case V8HI_FTYPE_V8SF_INT:
27485 case V8HI_FTYPE_V4SF_INT:
27486 case V8SF_FTYPE_V8SF_INT:
27487 case V4SI_FTYPE_V4SI_INT:
27488 case V4SI_FTYPE_V8SI_INT:
27489 case V4HI_FTYPE_V4HI_INT:
27490 case V4DF_FTYPE_V4DF_INT:
27491 case V4SF_FTYPE_V4SF_INT:
27492 case V4SF_FTYPE_V8SF_INT:
27493 case V2DI_FTYPE_V2DI_INT:
27494 case V2DF_FTYPE_V2DF_INT:
27495 case V2DF_FTYPE_V4DF_INT:
27496 case V16HI_FTYPE_V16HI_INT:
27497 case V8SI_FTYPE_V8SI_INT:
27498 case V4DI_FTYPE_V4DI_INT:
27499 case V2DI_FTYPE_V4DI_INT:
27500 nargs = 2;
27501 nargs_constant = 1;
27502 break;
27503 case V16QI_FTYPE_V16QI_V16QI_V16QI:
27504 case V8SF_FTYPE_V8SF_V8SF_V8SF:
27505 case V4DF_FTYPE_V4DF_V4DF_V4DF:
27506 case V4SF_FTYPE_V4SF_V4SF_V4SF:
27507 case V2DF_FTYPE_V2DF_V2DF_V2DF:
27508 case V32QI_FTYPE_V32QI_V32QI_V32QI:
27509 nargs = 3;
27510 break;
27511 case V32QI_FTYPE_V32QI_V32QI_INT:
27512 case V16HI_FTYPE_V16HI_V16HI_INT:
27513 case V16QI_FTYPE_V16QI_V16QI_INT:
27514 case V4DI_FTYPE_V4DI_V4DI_INT:
27515 case V8HI_FTYPE_V8HI_V8HI_INT:
27516 case V8SI_FTYPE_V8SI_V8SI_INT:
27517 case V8SI_FTYPE_V8SI_V4SI_INT:
27518 case V8SF_FTYPE_V8SF_V8SF_INT:
27519 case V8SF_FTYPE_V8SF_V4SF_INT:
27520 case V4SI_FTYPE_V4SI_V4SI_INT:
27521 case V4DF_FTYPE_V4DF_V4DF_INT:
27522 case V4DF_FTYPE_V4DF_V2DF_INT:
27523 case V4SF_FTYPE_V4SF_V4SF_INT:
27524 case V2DI_FTYPE_V2DI_V2DI_INT:
27525 case V4DI_FTYPE_V4DI_V2DI_INT:
27526 case V2DF_FTYPE_V2DF_V2DF_INT:
27527 nargs = 3;
27528 nargs_constant = 1;
27529 break;
27530 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
27531 nargs = 3;
27532 rmode = V4DImode;
27533 nargs_constant = 1;
27534 break;
27535 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
27536 nargs = 3;
27537 rmode = V2DImode;
27538 nargs_constant = 1;
27539 break;
27540 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
27541 nargs = 3;
27542 rmode = DImode;
27543 nargs_constant = 1;
27544 break;
27545 case V2DI_FTYPE_V2DI_UINT_UINT:
27546 nargs = 3;
27547 nargs_constant = 2;
27548 break;
27549 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
27550 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
27551 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
27552 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
27553 nargs = 4;
27554 nargs_constant = 1;
27555 break;
27556 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
27557 nargs = 4;
27558 nargs_constant = 2;
27559 break;
27560 default:
27561 gcc_unreachable ();
27562 }
27563
27564 gcc_assert (nargs <= ARRAY_SIZE (args));
27565
27566 if (comparison != UNKNOWN)
27567 {
27568 gcc_assert (nargs == 2);
27569 return ix86_expand_sse_compare (d, exp, target, swap);
27570 }
27571
27572 if (rmode == VOIDmode || rmode == tmode)
27573 {
27574 if (optimize
27575 || target == 0
27576 || GET_MODE (target) != tmode
27577 || !insn_p->operand[0].predicate (target, tmode))
27578 target = gen_reg_rtx (tmode);
27579 real_target = target;
27580 }
27581 else
27582 {
27583 target = gen_reg_rtx (rmode);
27584 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
27585 }
27586
27587 for (i = 0; i < nargs; i++)
27588 {
27589 tree arg = CALL_EXPR_ARG (exp, i);
27590 rtx op = expand_normal (arg);
27591 enum machine_mode mode = insn_p->operand[i + 1].mode;
27592 bool match = insn_p->operand[i + 1].predicate (op, mode);
27593
27594 if (last_arg_count && (i + 1) == nargs)
27595 {
27596 /* SIMD shift insns take either an 8-bit immediate or
27597 register as count. But builtin functions take int as
27598 count. If count doesn't match, we put it in register. */
27599 if (!match)
27600 {
27601 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
27602 if (!insn_p->operand[i + 1].predicate (op, mode))
27603 op = copy_to_reg (op);
27604 }
27605 }
27606 else if ((nargs - i) <= nargs_constant)
27607 {
27608 if (!match)
27609 switch (icode)
27610 {
27611 case CODE_FOR_avx2_inserti128:
27612 case CODE_FOR_avx2_extracti128:
27613 error ("the last argument must be an 1-bit immediate");
27614 return const0_rtx;
27615
27616 case CODE_FOR_sse4_1_roundpd:
27617 case CODE_FOR_sse4_1_roundps:
27618 case CODE_FOR_sse4_1_roundsd:
27619 case CODE_FOR_sse4_1_roundss:
27620 case CODE_FOR_sse4_1_blendps:
27621 case CODE_FOR_avx_blendpd256:
27622 case CODE_FOR_avx_vpermilv4df:
27623 case CODE_FOR_avx_roundpd256:
27624 case CODE_FOR_avx_roundps256:
27625 error ("the last argument must be a 4-bit immediate");
27626 return const0_rtx;
27627
27628 case CODE_FOR_sse4_1_blendpd:
27629 case CODE_FOR_avx_vpermilv2df:
27630 case CODE_FOR_xop_vpermil2v2df3:
27631 case CODE_FOR_xop_vpermil2v4sf3:
27632 case CODE_FOR_xop_vpermil2v4df3:
27633 case CODE_FOR_xop_vpermil2v8sf3:
27634 error ("the last argument must be a 2-bit immediate");
27635 return const0_rtx;
27636
27637 case CODE_FOR_avx_vextractf128v4df:
27638 case CODE_FOR_avx_vextractf128v8sf:
27639 case CODE_FOR_avx_vextractf128v8si:
27640 case CODE_FOR_avx_vinsertf128v4df:
27641 case CODE_FOR_avx_vinsertf128v8sf:
27642 case CODE_FOR_avx_vinsertf128v8si:
27643 error ("the last argument must be a 1-bit immediate");
27644 return const0_rtx;
27645
27646 case CODE_FOR_avx_vmcmpv2df3:
27647 case CODE_FOR_avx_vmcmpv4sf3:
27648 case CODE_FOR_avx_cmpv2df3:
27649 case CODE_FOR_avx_cmpv4sf3:
27650 case CODE_FOR_avx_cmpv4df3:
27651 case CODE_FOR_avx_cmpv8sf3:
27652 error ("the last argument must be a 5-bit immediate");
27653 return const0_rtx;
27654
27655 default:
27656 switch (nargs_constant)
27657 {
27658 case 2:
27659 if ((nargs - i) == nargs_constant)
27660 {
27661 error ("the next to last argument must be an 8-bit immediate");
27662 break;
27663 }
27664 case 1:
27665 error ("the last argument must be an 8-bit immediate");
27666 break;
27667 default:
27668 gcc_unreachable ();
27669 }
27670 return const0_rtx;
27671 }
27672 }
27673 else
27674 {
27675 if (VECTOR_MODE_P (mode))
27676 op = safe_vector_operand (op, mode);
27677
27678 /* If we aren't optimizing, only allow one memory operand to
27679 be generated. */
27680 if (memory_operand (op, mode))
27681 num_memory++;
27682
27683 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
27684 {
27685 if (optimize || !match || num_memory > 1)
27686 op = copy_to_mode_reg (mode, op);
27687 }
27688 else
27689 {
27690 op = copy_to_reg (op);
27691 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
27692 }
27693 }
27694
27695 args[i].op = op;
27696 args[i].mode = mode;
27697 }
27698
27699 switch (nargs)
27700 {
27701 case 1:
27702 pat = GEN_FCN (icode) (real_target, args[0].op);
27703 break;
27704 case 2:
27705 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
27706 break;
27707 case 3:
27708 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
27709 args[2].op);
27710 break;
27711 case 4:
27712 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
27713 args[2].op, args[3].op);
27714 break;
27715 default:
27716 gcc_unreachable ();
27717 }
27718
27719 if (! pat)
27720 return 0;
27721
27722 emit_insn (pat);
27723 return target;
27724 }
27725
27726 /* Subroutine of ix86_expand_builtin to take care of special insns
27727 with variable number of operands. */
27728
27729 static rtx
27730 ix86_expand_special_args_builtin (const struct builtin_description *d,
27731 tree exp, rtx target)
27732 {
27733 tree arg;
27734 rtx pat, op;
27735 unsigned int i, nargs, arg_adjust, memory;
27736 struct
27737 {
27738 rtx op;
27739 enum machine_mode mode;
27740 } args[3];
27741 enum insn_code icode = d->icode;
27742 bool last_arg_constant = false;
27743 const struct insn_data_d *insn_p = &insn_data[icode];
27744 enum machine_mode tmode = insn_p->operand[0].mode;
27745 enum { load, store } klass;
27746
27747 switch ((enum ix86_builtin_func_type) d->flag)
27748 {
27749 case VOID_FTYPE_VOID:
27750 if (icode == CODE_FOR_avx_vzeroupper)
27751 target = GEN_INT (vzeroupper_intrinsic);
27752 emit_insn (GEN_FCN (icode) (target));
27753 return 0;
27754 case VOID_FTYPE_UINT64:
27755 case VOID_FTYPE_UNSIGNED:
27756 nargs = 0;
27757 klass = store;
27758 memory = 0;
27759 break;
27760 break;
27761 case UINT64_FTYPE_VOID:
27762 case UNSIGNED_FTYPE_VOID:
27763 nargs = 0;
27764 klass = load;
27765 memory = 0;
27766 break;
27767 case UINT64_FTYPE_PUNSIGNED:
27768 case V2DI_FTYPE_PV2DI:
27769 case V4DI_FTYPE_PV4DI:
27770 case V32QI_FTYPE_PCCHAR:
27771 case V16QI_FTYPE_PCCHAR:
27772 case V8SF_FTYPE_PCV4SF:
27773 case V8SF_FTYPE_PCFLOAT:
27774 case V4SF_FTYPE_PCFLOAT:
27775 case V4DF_FTYPE_PCV2DF:
27776 case V4DF_FTYPE_PCDOUBLE:
27777 case V2DF_FTYPE_PCDOUBLE:
27778 case VOID_FTYPE_PVOID:
27779 nargs = 1;
27780 klass = load;
27781 memory = 0;
27782 break;
27783 case VOID_FTYPE_PV2SF_V4SF:
27784 case VOID_FTYPE_PV4DI_V4DI:
27785 case VOID_FTYPE_PV2DI_V2DI:
27786 case VOID_FTYPE_PCHAR_V32QI:
27787 case VOID_FTYPE_PCHAR_V16QI:
27788 case VOID_FTYPE_PFLOAT_V8SF:
27789 case VOID_FTYPE_PFLOAT_V4SF:
27790 case VOID_FTYPE_PDOUBLE_V4DF:
27791 case VOID_FTYPE_PDOUBLE_V2DF:
27792 case VOID_FTYPE_PULONGLONG_ULONGLONG:
27793 case VOID_FTYPE_PINT_INT:
27794 nargs = 1;
27795 klass = store;
27796 /* Reserve memory operand for target. */
27797 memory = ARRAY_SIZE (args);
27798 break;
27799 case V4SF_FTYPE_V4SF_PCV2SF:
27800 case V2DF_FTYPE_V2DF_PCDOUBLE:
27801 nargs = 2;
27802 klass = load;
27803 memory = 1;
27804 break;
27805 case V8SF_FTYPE_PCV8SF_V8SI:
27806 case V4DF_FTYPE_PCV4DF_V4DI:
27807 case V4SF_FTYPE_PCV4SF_V4SI:
27808 case V2DF_FTYPE_PCV2DF_V2DI:
27809 case V8SI_FTYPE_PCV8SI_V8SI:
27810 case V4DI_FTYPE_PCV4DI_V4DI:
27811 case V4SI_FTYPE_PCV4SI_V4SI:
27812 case V2DI_FTYPE_PCV2DI_V2DI:
27813 nargs = 2;
27814 klass = load;
27815 memory = 0;
27816 break;
27817 case VOID_FTYPE_PV8SF_V8SI_V8SF:
27818 case VOID_FTYPE_PV4DF_V4DI_V4DF:
27819 case VOID_FTYPE_PV4SF_V4SI_V4SF:
27820 case VOID_FTYPE_PV2DF_V2DI_V2DF:
27821 case VOID_FTYPE_PV8SI_V8SI_V8SI:
27822 case VOID_FTYPE_PV4DI_V4DI_V4DI:
27823 case VOID_FTYPE_PV4SI_V4SI_V4SI:
27824 case VOID_FTYPE_PV2DI_V2DI_V2DI:
27825 nargs = 2;
27826 klass = store;
27827 /* Reserve memory operand for target. */
27828 memory = ARRAY_SIZE (args);
27829 break;
27830 case VOID_FTYPE_UINT_UINT_UINT:
27831 case VOID_FTYPE_UINT64_UINT_UINT:
27832 case UCHAR_FTYPE_UINT_UINT_UINT:
27833 case UCHAR_FTYPE_UINT64_UINT_UINT:
27834 nargs = 3;
27835 klass = load;
27836 memory = ARRAY_SIZE (args);
27837 last_arg_constant = true;
27838 break;
27839 default:
27840 gcc_unreachable ();
27841 }
27842
27843 gcc_assert (nargs <= ARRAY_SIZE (args));
27844
27845 if (klass == store)
27846 {
27847 arg = CALL_EXPR_ARG (exp, 0);
27848 op = expand_normal (arg);
27849 gcc_assert (target == 0);
27850 if (memory)
27851 {
27852 if (GET_MODE (op) != Pmode)
27853 op = convert_to_mode (Pmode, op, 1);
27854 target = gen_rtx_MEM (tmode, force_reg (Pmode, op));
27855 }
27856 else
27857 target = force_reg (tmode, op);
27858 arg_adjust = 1;
27859 }
27860 else
27861 {
27862 arg_adjust = 0;
27863 if (optimize
27864 || target == 0
27865 || GET_MODE (target) != tmode
27866 || !insn_p->operand[0].predicate (target, tmode))
27867 target = gen_reg_rtx (tmode);
27868 }
27869
27870 for (i = 0; i < nargs; i++)
27871 {
27872 enum machine_mode mode = insn_p->operand[i + 1].mode;
27873 bool match;
27874
27875 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
27876 op = expand_normal (arg);
27877 match = insn_p->operand[i + 1].predicate (op, mode);
27878
27879 if (last_arg_constant && (i + 1) == nargs)
27880 {
27881 if (!match)
27882 {
27883 if (icode == CODE_FOR_lwp_lwpvalsi3
27884 || icode == CODE_FOR_lwp_lwpinssi3
27885 || icode == CODE_FOR_lwp_lwpvaldi3
27886 || icode == CODE_FOR_lwp_lwpinsdi3)
27887 error ("the last argument must be a 32-bit immediate");
27888 else
27889 error ("the last argument must be an 8-bit immediate");
27890 return const0_rtx;
27891 }
27892 }
27893 else
27894 {
27895 if (i == memory)
27896 {
27897 /* This must be the memory operand. */
27898 if (GET_MODE (op) != Pmode)
27899 op = convert_to_mode (Pmode, op, 1);
27900 op = gen_rtx_MEM (mode, force_reg (Pmode, op));
27901 gcc_assert (GET_MODE (op) == mode
27902 || GET_MODE (op) == VOIDmode);
27903 }
27904 else
27905 {
27906 /* This must be register. */
27907 if (VECTOR_MODE_P (mode))
27908 op = safe_vector_operand (op, mode);
27909
27910 gcc_assert (GET_MODE (op) == mode
27911 || GET_MODE (op) == VOIDmode);
27912 op = copy_to_mode_reg (mode, op);
27913 }
27914 }
27915
27916 args[i].op = op;
27917 args[i].mode = mode;
27918 }
27919
27920 switch (nargs)
27921 {
27922 case 0:
27923 pat = GEN_FCN (icode) (target);
27924 break;
27925 case 1:
27926 pat = GEN_FCN (icode) (target, args[0].op);
27927 break;
27928 case 2:
27929 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
27930 break;
27931 case 3:
27932 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
27933 break;
27934 default:
27935 gcc_unreachable ();
27936 }
27937
27938 if (! pat)
27939 return 0;
27940 emit_insn (pat);
27941 return klass == store ? 0 : target;
27942 }
27943
27944 /* Return the integer constant in ARG. Constrain it to be in the range
27945 of the subparts of VEC_TYPE; issue an error if not. */
27946
27947 static int
27948 get_element_number (tree vec_type, tree arg)
27949 {
27950 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
27951
27952 if (!host_integerp (arg, 1)
27953 || (elt = tree_low_cst (arg, 1), elt > max))
27954 {
27955 error ("selector must be an integer constant in the range 0..%wi", max);
27956 return 0;
27957 }
27958
27959 return elt;
27960 }
27961
27962 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
27963 ix86_expand_vector_init. We DO have language-level syntax for this, in
27964 the form of (type){ init-list }. Except that since we can't place emms
27965 instructions from inside the compiler, we can't allow the use of MMX
27966 registers unless the user explicitly asks for it. So we do *not* define
27967 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
27968 we have builtins invoked by mmintrin.h that gives us license to emit
27969 these sorts of instructions. */
27970
27971 static rtx
27972 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
27973 {
27974 enum machine_mode tmode = TYPE_MODE (type);
27975 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
27976 int i, n_elt = GET_MODE_NUNITS (tmode);
27977 rtvec v = rtvec_alloc (n_elt);
27978
27979 gcc_assert (VECTOR_MODE_P (tmode));
27980 gcc_assert (call_expr_nargs (exp) == n_elt);
27981
27982 for (i = 0; i < n_elt; ++i)
27983 {
27984 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
27985 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
27986 }
27987
27988 if (!target || !register_operand (target, tmode))
27989 target = gen_reg_rtx (tmode);
27990
27991 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
27992 return target;
27993 }
27994
27995 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
27996 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
27997 had a language-level syntax for referencing vector elements. */
27998
27999 static rtx
28000 ix86_expand_vec_ext_builtin (tree exp, rtx target)
28001 {
28002 enum machine_mode tmode, mode0;
28003 tree arg0, arg1;
28004 int elt;
28005 rtx op0;
28006
28007 arg0 = CALL_EXPR_ARG (exp, 0);
28008 arg1 = CALL_EXPR_ARG (exp, 1);
28009
28010 op0 = expand_normal (arg0);
28011 elt = get_element_number (TREE_TYPE (arg0), arg1);
28012
28013 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
28014 mode0 = TYPE_MODE (TREE_TYPE (arg0));
28015 gcc_assert (VECTOR_MODE_P (mode0));
28016
28017 op0 = force_reg (mode0, op0);
28018
28019 if (optimize || !target || !register_operand (target, tmode))
28020 target = gen_reg_rtx (tmode);
28021
28022 ix86_expand_vector_extract (true, target, op0, elt);
28023
28024 return target;
28025 }
28026
28027 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
28028 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
28029 a language-level syntax for referencing vector elements. */
28030
28031 static rtx
28032 ix86_expand_vec_set_builtin (tree exp)
28033 {
28034 enum machine_mode tmode, mode1;
28035 tree arg0, arg1, arg2;
28036 int elt;
28037 rtx op0, op1, target;
28038
28039 arg0 = CALL_EXPR_ARG (exp, 0);
28040 arg1 = CALL_EXPR_ARG (exp, 1);
28041 arg2 = CALL_EXPR_ARG (exp, 2);
28042
28043 tmode = TYPE_MODE (TREE_TYPE (arg0));
28044 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
28045 gcc_assert (VECTOR_MODE_P (tmode));
28046
28047 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
28048 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
28049 elt = get_element_number (TREE_TYPE (arg0), arg2);
28050
28051 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
28052 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
28053
28054 op0 = force_reg (tmode, op0);
28055 op1 = force_reg (mode1, op1);
28056
28057 /* OP0 is the source of these builtin functions and shouldn't be
28058 modified. Create a copy, use it and return it as target. */
28059 target = gen_reg_rtx (tmode);
28060 emit_move_insn (target, op0);
28061 ix86_expand_vector_set (true, target, op1, elt);
28062
28063 return target;
28064 }
28065
28066 /* Expand an expression EXP that calls a built-in function,
28067 with result going to TARGET if that's convenient
28068 (and in mode MODE if that's convenient).
28069 SUBTARGET may be used as the target for computing one of EXP's operands.
28070 IGNORE is nonzero if the value is to be ignored. */
28071
28072 static rtx
28073 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
28074 enum machine_mode mode ATTRIBUTE_UNUSED,
28075 int ignore ATTRIBUTE_UNUSED)
28076 {
28077 const struct builtin_description *d;
28078 size_t i;
28079 enum insn_code icode;
28080 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
28081 tree arg0, arg1, arg2, arg3, arg4;
28082 rtx op0, op1, op2, op3, op4, pat;
28083 enum machine_mode mode0, mode1, mode2, mode3, mode4;
28084 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
28085
28086 /* Determine whether the builtin function is available under the current ISA.
28087 Originally the builtin was not created if it wasn't applicable to the
28088 current ISA based on the command line switches. With function specific
28089 options, we need to check in the context of the function making the call
28090 whether it is supported. */
28091 if (ix86_builtins_isa[fcode].isa
28092 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
28093 {
28094 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
28095 NULL, (enum fpmath_unit) 0, false);
28096
28097 if (!opts)
28098 error ("%qE needs unknown isa option", fndecl);
28099 else
28100 {
28101 gcc_assert (opts != NULL);
28102 error ("%qE needs isa option %s", fndecl, opts);
28103 free (opts);
28104 }
28105 return const0_rtx;
28106 }
28107
28108 switch (fcode)
28109 {
28110 case IX86_BUILTIN_MASKMOVQ:
28111 case IX86_BUILTIN_MASKMOVDQU:
28112 icode = (fcode == IX86_BUILTIN_MASKMOVQ
28113 ? CODE_FOR_mmx_maskmovq
28114 : CODE_FOR_sse2_maskmovdqu);
28115 /* Note the arg order is different from the operand order. */
28116 arg1 = CALL_EXPR_ARG (exp, 0);
28117 arg2 = CALL_EXPR_ARG (exp, 1);
28118 arg0 = CALL_EXPR_ARG (exp, 2);
28119 op0 = expand_normal (arg0);
28120 op1 = expand_normal (arg1);
28121 op2 = expand_normal (arg2);
28122 mode0 = insn_data[icode].operand[0].mode;
28123 mode1 = insn_data[icode].operand[1].mode;
28124 mode2 = insn_data[icode].operand[2].mode;
28125
28126 if (GET_MODE (op0) != Pmode)
28127 op0 = convert_to_mode (Pmode, op0, 1);
28128 op0 = gen_rtx_MEM (mode1, force_reg (Pmode, op0));
28129
28130 if (!insn_data[icode].operand[0].predicate (op0, mode0))
28131 op0 = copy_to_mode_reg (mode0, op0);
28132 if (!insn_data[icode].operand[1].predicate (op1, mode1))
28133 op1 = copy_to_mode_reg (mode1, op1);
28134 if (!insn_data[icode].operand[2].predicate (op2, mode2))
28135 op2 = copy_to_mode_reg (mode2, op2);
28136 pat = GEN_FCN (icode) (op0, op1, op2);
28137 if (! pat)
28138 return 0;
28139 emit_insn (pat);
28140 return 0;
28141
28142 case IX86_BUILTIN_LDMXCSR:
28143 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
28144 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
28145 emit_move_insn (target, op0);
28146 emit_insn (gen_sse_ldmxcsr (target));
28147 return 0;
28148
28149 case IX86_BUILTIN_STMXCSR:
28150 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
28151 emit_insn (gen_sse_stmxcsr (target));
28152 return copy_to_mode_reg (SImode, target);
28153
28154 case IX86_BUILTIN_CLFLUSH:
28155 arg0 = CALL_EXPR_ARG (exp, 0);
28156 op0 = expand_normal (arg0);
28157 icode = CODE_FOR_sse2_clflush;
28158 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
28159 {
28160 if (GET_MODE (op0) != Pmode)
28161 op0 = convert_to_mode (Pmode, op0, 1);
28162 op0 = force_reg (Pmode, op0);
28163 }
28164
28165 emit_insn (gen_sse2_clflush (op0));
28166 return 0;
28167
28168 case IX86_BUILTIN_MONITOR:
28169 arg0 = CALL_EXPR_ARG (exp, 0);
28170 arg1 = CALL_EXPR_ARG (exp, 1);
28171 arg2 = CALL_EXPR_ARG (exp, 2);
28172 op0 = expand_normal (arg0);
28173 op1 = expand_normal (arg1);
28174 op2 = expand_normal (arg2);
28175 if (!REG_P (op0))
28176 {
28177 if (GET_MODE (op0) != Pmode)
28178 op0 = convert_to_mode (Pmode, op0, 1);
28179 op0 = force_reg (Pmode, op0);
28180 }
28181 if (!REG_P (op1))
28182 op1 = copy_to_mode_reg (SImode, op1);
28183 if (!REG_P (op2))
28184 op2 = copy_to_mode_reg (SImode, op2);
28185 emit_insn (ix86_gen_monitor (op0, op1, op2));
28186 return 0;
28187
28188 case IX86_BUILTIN_MWAIT:
28189 arg0 = CALL_EXPR_ARG (exp, 0);
28190 arg1 = CALL_EXPR_ARG (exp, 1);
28191 op0 = expand_normal (arg0);
28192 op1 = expand_normal (arg1);
28193 if (!REG_P (op0))
28194 op0 = copy_to_mode_reg (SImode, op0);
28195 if (!REG_P (op1))
28196 op1 = copy_to_mode_reg (SImode, op1);
28197 emit_insn (gen_sse3_mwait (op0, op1));
28198 return 0;
28199
28200 case IX86_BUILTIN_VEC_INIT_V2SI:
28201 case IX86_BUILTIN_VEC_INIT_V4HI:
28202 case IX86_BUILTIN_VEC_INIT_V8QI:
28203 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
28204
28205 case IX86_BUILTIN_VEC_EXT_V2DF:
28206 case IX86_BUILTIN_VEC_EXT_V2DI:
28207 case IX86_BUILTIN_VEC_EXT_V4SF:
28208 case IX86_BUILTIN_VEC_EXT_V4SI:
28209 case IX86_BUILTIN_VEC_EXT_V8HI:
28210 case IX86_BUILTIN_VEC_EXT_V2SI:
28211 case IX86_BUILTIN_VEC_EXT_V4HI:
28212 case IX86_BUILTIN_VEC_EXT_V16QI:
28213 return ix86_expand_vec_ext_builtin (exp, target);
28214
28215 case IX86_BUILTIN_VEC_SET_V2DI:
28216 case IX86_BUILTIN_VEC_SET_V4SF:
28217 case IX86_BUILTIN_VEC_SET_V4SI:
28218 case IX86_BUILTIN_VEC_SET_V8HI:
28219 case IX86_BUILTIN_VEC_SET_V4HI:
28220 case IX86_BUILTIN_VEC_SET_V16QI:
28221 return ix86_expand_vec_set_builtin (exp);
28222
28223 case IX86_BUILTIN_VEC_PERM_V2DF:
28224 case IX86_BUILTIN_VEC_PERM_V4SF:
28225 case IX86_BUILTIN_VEC_PERM_V2DI:
28226 case IX86_BUILTIN_VEC_PERM_V4SI:
28227 case IX86_BUILTIN_VEC_PERM_V8HI:
28228 case IX86_BUILTIN_VEC_PERM_V16QI:
28229 case IX86_BUILTIN_VEC_PERM_V2DI_U:
28230 case IX86_BUILTIN_VEC_PERM_V4SI_U:
28231 case IX86_BUILTIN_VEC_PERM_V8HI_U:
28232 case IX86_BUILTIN_VEC_PERM_V16QI_U:
28233 case IX86_BUILTIN_VEC_PERM_V4DF:
28234 case IX86_BUILTIN_VEC_PERM_V8SF:
28235 return ix86_expand_vec_perm_builtin (exp);
28236
28237 case IX86_BUILTIN_INFQ:
28238 case IX86_BUILTIN_HUGE_VALQ:
28239 {
28240 REAL_VALUE_TYPE inf;
28241 rtx tmp;
28242
28243 real_inf (&inf);
28244 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
28245
28246 tmp = validize_mem (force_const_mem (mode, tmp));
28247
28248 if (target == 0)
28249 target = gen_reg_rtx (mode);
28250
28251 emit_move_insn (target, tmp);
28252 return target;
28253 }
28254
28255 case IX86_BUILTIN_LLWPCB:
28256 arg0 = CALL_EXPR_ARG (exp, 0);
28257 op0 = expand_normal (arg0);
28258 icode = CODE_FOR_lwp_llwpcb;
28259 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
28260 {
28261 if (GET_MODE (op0) != Pmode)
28262 op0 = convert_to_mode (Pmode, op0, 1);
28263 op0 = force_reg (Pmode, op0);
28264 }
28265 emit_insn (gen_lwp_llwpcb (op0));
28266 return 0;
28267
28268 case IX86_BUILTIN_SLWPCB:
28269 icode = CODE_FOR_lwp_slwpcb;
28270 if (!target
28271 || !insn_data[icode].operand[0].predicate (target, Pmode))
28272 target = gen_reg_rtx (Pmode);
28273 emit_insn (gen_lwp_slwpcb (target));
28274 return target;
28275
28276 case IX86_BUILTIN_BEXTRI32:
28277 case IX86_BUILTIN_BEXTRI64:
28278 arg0 = CALL_EXPR_ARG (exp, 0);
28279 arg1 = CALL_EXPR_ARG (exp, 1);
28280 op0 = expand_normal (arg0);
28281 op1 = expand_normal (arg1);
28282 icode = (fcode == IX86_BUILTIN_BEXTRI32
28283 ? CODE_FOR_tbm_bextri_si
28284 : CODE_FOR_tbm_bextri_di);
28285 if (!CONST_INT_P (op1))
28286 {
28287 error ("last argument must be an immediate");
28288 return const0_rtx;
28289 }
28290 else
28291 {
28292 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
28293 unsigned char lsb_index = INTVAL (op1) & 0xFF;
28294 op1 = GEN_INT (length);
28295 op2 = GEN_INT (lsb_index);
28296 pat = GEN_FCN (icode) (target, op0, op1, op2);
28297 if (pat)
28298 emit_insn (pat);
28299 return target;
28300 }
28301
28302 case IX86_BUILTIN_RDRAND16_STEP:
28303 icode = CODE_FOR_rdrandhi_1;
28304 mode0 = HImode;
28305 goto rdrand_step;
28306
28307 case IX86_BUILTIN_RDRAND32_STEP:
28308 icode = CODE_FOR_rdrandsi_1;
28309 mode0 = SImode;
28310 goto rdrand_step;
28311
28312 case IX86_BUILTIN_RDRAND64_STEP:
28313 icode = CODE_FOR_rdranddi_1;
28314 mode0 = DImode;
28315
28316 rdrand_step:
28317 op0 = gen_reg_rtx (mode0);
28318 emit_insn (GEN_FCN (icode) (op0));
28319
28320 arg0 = CALL_EXPR_ARG (exp, 0);
28321 op1 = expand_normal (arg0);
28322 if (!address_operand (op1, VOIDmode))
28323 {
28324 op1 = convert_memory_address (Pmode, op1);
28325 op1 = copy_addr_to_reg (op1);
28326 }
28327 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
28328
28329 op1 = gen_reg_rtx (SImode);
28330 emit_move_insn (op1, CONST1_RTX (SImode));
28331
28332 /* Emit SImode conditional move. */
28333 if (mode0 == HImode)
28334 {
28335 op2 = gen_reg_rtx (SImode);
28336 emit_insn (gen_zero_extendhisi2 (op2, op0));
28337 }
28338 else if (mode0 == SImode)
28339 op2 = op0;
28340 else
28341 op2 = gen_rtx_SUBREG (SImode, op0, 0);
28342
28343 if (target == 0)
28344 target = gen_reg_rtx (SImode);
28345
28346 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
28347 const0_rtx);
28348 emit_insn (gen_rtx_SET (VOIDmode, target,
28349 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
28350 return target;
28351
28352 case IX86_BUILTIN_GATHERSIV2DF:
28353 icode = CODE_FOR_avx2_gathersiv2df;
28354 goto gather_gen;
28355 case IX86_BUILTIN_GATHERSIV4DF:
28356 icode = CODE_FOR_avx2_gathersiv4df;
28357 goto gather_gen;
28358 case IX86_BUILTIN_GATHERDIV2DF:
28359 icode = CODE_FOR_avx2_gatherdiv2df;
28360 goto gather_gen;
28361 case IX86_BUILTIN_GATHERDIV4DF:
28362 icode = CODE_FOR_avx2_gatherdiv4df;
28363 goto gather_gen;
28364 case IX86_BUILTIN_GATHERSIV4SF:
28365 icode = CODE_FOR_avx2_gathersiv4sf;
28366 goto gather_gen;
28367 case IX86_BUILTIN_GATHERSIV8SF:
28368 icode = CODE_FOR_avx2_gathersiv8sf;
28369 goto gather_gen;
28370 case IX86_BUILTIN_GATHERDIV4SF:
28371 icode = CODE_FOR_avx2_gatherdiv4sf;
28372 goto gather_gen;
28373 case IX86_BUILTIN_GATHERDIV8SF:
28374 icode = CODE_FOR_avx2_gatherdiv4sf256;
28375 goto gather_gen;
28376 case IX86_BUILTIN_GATHERSIV2DI:
28377 icode = CODE_FOR_avx2_gathersiv2di;
28378 goto gather_gen;
28379 case IX86_BUILTIN_GATHERSIV4DI:
28380 icode = CODE_FOR_avx2_gathersiv4di;
28381 goto gather_gen;
28382 case IX86_BUILTIN_GATHERDIV2DI:
28383 icode = CODE_FOR_avx2_gatherdiv2di;
28384 goto gather_gen;
28385 case IX86_BUILTIN_GATHERDIV4DI:
28386 icode = CODE_FOR_avx2_gatherdiv4di;
28387 goto gather_gen;
28388 case IX86_BUILTIN_GATHERSIV4SI:
28389 icode = CODE_FOR_avx2_gathersiv4si;
28390 goto gather_gen;
28391 case IX86_BUILTIN_GATHERSIV8SI:
28392 icode = CODE_FOR_avx2_gathersiv8si;
28393 goto gather_gen;
28394 case IX86_BUILTIN_GATHERDIV4SI:
28395 icode = CODE_FOR_avx2_gatherdiv4si;
28396 goto gather_gen;
28397 case IX86_BUILTIN_GATHERDIV8SI:
28398 icode = CODE_FOR_avx2_gatherdiv4si256;
28399
28400 gather_gen:
28401 arg0 = CALL_EXPR_ARG (exp, 0);
28402 arg1 = CALL_EXPR_ARG (exp, 1);
28403 arg2 = CALL_EXPR_ARG (exp, 2);
28404 arg3 = CALL_EXPR_ARG (exp, 3);
28405 arg4 = CALL_EXPR_ARG (exp, 4);
28406 op0 = expand_normal (arg0);
28407 op1 = expand_normal (arg1);
28408 op2 = expand_normal (arg2);
28409 op3 = expand_normal (arg3);
28410 op4 = expand_normal (arg4);
28411 /* Note the arg order is different from the operand order. */
28412 mode0 = insn_data[icode].operand[1].mode;
28413 mode1 = insn_data[icode].operand[2].mode;
28414 mode2 = insn_data[icode].operand[3].mode;
28415 mode3 = insn_data[icode].operand[4].mode;
28416 mode4 = insn_data[icode].operand[5].mode;
28417
28418 if (target == NULL_RTX)
28419 target = gen_reg_rtx (insn_data[icode].operand[0].mode);
28420
28421 /* Force memory operand only with base register here. But we
28422 don't want to do it on memory operand for other builtin
28423 functions. */
28424 if (GET_MODE (op1) != Pmode)
28425 op1 = convert_to_mode (Pmode, op1, 1);
28426 op1 = force_reg (Pmode, op1);
28427 op1 = gen_rtx_MEM (mode1, op1);
28428
28429 if (!insn_data[icode].operand[1].predicate (op0, mode0))
28430 op0 = copy_to_mode_reg (mode0, op0);
28431 if (!insn_data[icode].operand[2].predicate (op1, mode1))
28432 op1 = copy_to_mode_reg (mode1, op1);
28433 if (!insn_data[icode].operand[3].predicate (op2, mode2))
28434 op2 = copy_to_mode_reg (mode2, op2);
28435 if (!insn_data[icode].operand[4].predicate (op3, mode3))
28436 op3 = copy_to_mode_reg (mode3, op3);
28437 if (!insn_data[icode].operand[5].predicate (op4, mode4))
28438 {
28439 error ("last argument must be scale 1, 2, 4, 8");
28440 return const0_rtx;
28441 }
28442 pat = GEN_FCN (icode) (target, op0, op1, op2, op3, op4);
28443 if (! pat)
28444 return const0_rtx;
28445 emit_insn (pat);
28446 return target;
28447
28448 default:
28449 break;
28450 }
28451
28452 for (i = 0, d = bdesc_special_args;
28453 i < ARRAY_SIZE (bdesc_special_args);
28454 i++, d++)
28455 if (d->code == fcode)
28456 return ix86_expand_special_args_builtin (d, exp, target);
28457
28458 for (i = 0, d = bdesc_args;
28459 i < ARRAY_SIZE (bdesc_args);
28460 i++, d++)
28461 if (d->code == fcode)
28462 switch (fcode)
28463 {
28464 case IX86_BUILTIN_FABSQ:
28465 case IX86_BUILTIN_COPYSIGNQ:
28466 if (!TARGET_SSE2)
28467 /* Emit a normal call if SSE2 isn't available. */
28468 return expand_call (exp, target, ignore);
28469 default:
28470 return ix86_expand_args_builtin (d, exp, target);
28471 }
28472
28473 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
28474 if (d->code == fcode)
28475 return ix86_expand_sse_comi (d, exp, target);
28476
28477 for (i = 0, d = bdesc_pcmpestr;
28478 i < ARRAY_SIZE (bdesc_pcmpestr);
28479 i++, d++)
28480 if (d->code == fcode)
28481 return ix86_expand_sse_pcmpestr (d, exp, target);
28482
28483 for (i = 0, d = bdesc_pcmpistr;
28484 i < ARRAY_SIZE (bdesc_pcmpistr);
28485 i++, d++)
28486 if (d->code == fcode)
28487 return ix86_expand_sse_pcmpistr (d, exp, target);
28488
28489 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
28490 if (d->code == fcode)
28491 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
28492 (enum ix86_builtin_func_type)
28493 d->flag, d->comparison);
28494
28495 gcc_unreachable ();
28496 }
28497
28498 /* Returns a function decl for a vectorized version of the builtin function
28499 with builtin function code FN and the result vector type TYPE, or NULL_TREE
28500 if it is not available. */
28501
28502 static tree
28503 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
28504 tree type_in)
28505 {
28506 enum machine_mode in_mode, out_mode;
28507 int in_n, out_n;
28508 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
28509
28510 if (TREE_CODE (type_out) != VECTOR_TYPE
28511 || TREE_CODE (type_in) != VECTOR_TYPE
28512 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
28513 return NULL_TREE;
28514
28515 out_mode = TYPE_MODE (TREE_TYPE (type_out));
28516 out_n = TYPE_VECTOR_SUBPARTS (type_out);
28517 in_mode = TYPE_MODE (TREE_TYPE (type_in));
28518 in_n = TYPE_VECTOR_SUBPARTS (type_in);
28519
28520 switch (fn)
28521 {
28522 case BUILT_IN_SQRT:
28523 if (out_mode == DFmode && in_mode == DFmode)
28524 {
28525 if (out_n == 2 && in_n == 2)
28526 return ix86_builtins[IX86_BUILTIN_SQRTPD];
28527 else if (out_n == 4 && in_n == 4)
28528 return ix86_builtins[IX86_BUILTIN_SQRTPD256];
28529 }
28530 break;
28531
28532 case BUILT_IN_SQRTF:
28533 if (out_mode == SFmode && in_mode == SFmode)
28534 {
28535 if (out_n == 4 && in_n == 4)
28536 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
28537 else if (out_n == 8 && in_n == 8)
28538 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR256];
28539 }
28540 break;
28541
28542 case BUILT_IN_LRINT:
28543 if (out_mode == SImode && out_n == 4
28544 && in_mode == DFmode && in_n == 2)
28545 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
28546 break;
28547
28548 case BUILT_IN_LRINTF:
28549 if (out_mode == SImode && in_mode == SFmode)
28550 {
28551 if (out_n == 4 && in_n == 4)
28552 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
28553 else if (out_n == 8 && in_n == 8)
28554 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ256];
28555 }
28556 break;
28557
28558 case BUILT_IN_COPYSIGN:
28559 if (out_mode == DFmode && in_mode == DFmode)
28560 {
28561 if (out_n == 2 && in_n == 2)
28562 return ix86_builtins[IX86_BUILTIN_CPYSGNPD];
28563 else if (out_n == 4 && in_n == 4)
28564 return ix86_builtins[IX86_BUILTIN_CPYSGNPD256];
28565 }
28566 break;
28567
28568 case BUILT_IN_COPYSIGNF:
28569 if (out_mode == SFmode && in_mode == SFmode)
28570 {
28571 if (out_n == 4 && in_n == 4)
28572 return ix86_builtins[IX86_BUILTIN_CPYSGNPS];
28573 else if (out_n == 8 && in_n == 8)
28574 return ix86_builtins[IX86_BUILTIN_CPYSGNPS256];
28575 }
28576 break;
28577
28578 case BUILT_IN_FLOOR:
28579 /* The round insn does not trap on denormals. */
28580 if (flag_trapping_math || !TARGET_ROUND)
28581 break;
28582
28583 if (out_mode == DFmode && in_mode == DFmode)
28584 {
28585 if (out_n == 2 && in_n == 2)
28586 return ix86_builtins[IX86_BUILTIN_FLOORPD];
28587 else if (out_n == 4 && in_n == 4)
28588 return ix86_builtins[IX86_BUILTIN_FLOORPD256];
28589 }
28590 break;
28591
28592 case BUILT_IN_FLOORF:
28593 /* The round insn does not trap on denormals. */
28594 if (flag_trapping_math || !TARGET_ROUND)
28595 break;
28596
28597 if (out_mode == SFmode && in_mode == SFmode)
28598 {
28599 if (out_n == 4 && in_n == 4)
28600 return ix86_builtins[IX86_BUILTIN_FLOORPS];
28601 else if (out_n == 8 && in_n == 8)
28602 return ix86_builtins[IX86_BUILTIN_FLOORPS256];
28603 }
28604 break;
28605
28606 case BUILT_IN_CEIL:
28607 /* The round insn does not trap on denormals. */
28608 if (flag_trapping_math || !TARGET_ROUND)
28609 break;
28610
28611 if (out_mode == DFmode && in_mode == DFmode)
28612 {
28613 if (out_n == 2 && in_n == 2)
28614 return ix86_builtins[IX86_BUILTIN_CEILPD];
28615 else if (out_n == 4 && in_n == 4)
28616 return ix86_builtins[IX86_BUILTIN_CEILPD256];
28617 }
28618 break;
28619
28620 case BUILT_IN_CEILF:
28621 /* The round insn does not trap on denormals. */
28622 if (flag_trapping_math || !TARGET_ROUND)
28623 break;
28624
28625 if (out_mode == SFmode && in_mode == SFmode)
28626 {
28627 if (out_n == 4 && in_n == 4)
28628 return ix86_builtins[IX86_BUILTIN_CEILPS];
28629 else if (out_n == 8 && in_n == 8)
28630 return ix86_builtins[IX86_BUILTIN_CEILPS256];
28631 }
28632 break;
28633
28634 case BUILT_IN_TRUNC:
28635 /* The round insn does not trap on denormals. */
28636 if (flag_trapping_math || !TARGET_ROUND)
28637 break;
28638
28639 if (out_mode == DFmode && in_mode == DFmode)
28640 {
28641 if (out_n == 2 && in_n == 2)
28642 return ix86_builtins[IX86_BUILTIN_TRUNCPD];
28643 else if (out_n == 4 && in_n == 4)
28644 return ix86_builtins[IX86_BUILTIN_TRUNCPD256];
28645 }
28646 break;
28647
28648 case BUILT_IN_TRUNCF:
28649 /* The round insn does not trap on denormals. */
28650 if (flag_trapping_math || !TARGET_ROUND)
28651 break;
28652
28653 if (out_mode == SFmode && in_mode == SFmode)
28654 {
28655 if (out_n == 4 && in_n == 4)
28656 return ix86_builtins[IX86_BUILTIN_TRUNCPS];
28657 else if (out_n == 8 && in_n == 8)
28658 return ix86_builtins[IX86_BUILTIN_TRUNCPS256];
28659 }
28660 break;
28661
28662 case BUILT_IN_RINT:
28663 /* The round insn does not trap on denormals. */
28664 if (flag_trapping_math || !TARGET_ROUND)
28665 break;
28666
28667 if (out_mode == DFmode && in_mode == DFmode)
28668 {
28669 if (out_n == 2 && in_n == 2)
28670 return ix86_builtins[IX86_BUILTIN_RINTPD];
28671 else if (out_n == 4 && in_n == 4)
28672 return ix86_builtins[IX86_BUILTIN_RINTPD256];
28673 }
28674 break;
28675
28676 case BUILT_IN_RINTF:
28677 /* The round insn does not trap on denormals. */
28678 if (flag_trapping_math || !TARGET_ROUND)
28679 break;
28680
28681 if (out_mode == SFmode && in_mode == SFmode)
28682 {
28683 if (out_n == 4 && in_n == 4)
28684 return ix86_builtins[IX86_BUILTIN_RINTPS];
28685 else if (out_n == 8 && in_n == 8)
28686 return ix86_builtins[IX86_BUILTIN_RINTPS256];
28687 }
28688 break;
28689
28690 case BUILT_IN_ROUND:
28691 /* The round insn does not trap on denormals. */
28692 if (flag_trapping_math || !TARGET_ROUND)
28693 break;
28694
28695 if (out_mode == DFmode && in_mode == DFmode)
28696 {
28697 if (out_n == 2 && in_n == 2)
28698 return ix86_builtins[IX86_BUILTIN_ROUNDPD_AZ];
28699 else if (out_n == 4 && in_n == 4)
28700 return ix86_builtins[IX86_BUILTIN_ROUNDPD_AZ256];
28701 }
28702 break;
28703
28704 case BUILT_IN_ROUNDF:
28705 /* The round insn does not trap on denormals. */
28706 if (flag_trapping_math || !TARGET_ROUND)
28707 break;
28708
28709 if (out_mode == SFmode && in_mode == SFmode)
28710 {
28711 if (out_n == 4 && in_n == 4)
28712 return ix86_builtins[IX86_BUILTIN_ROUNDPS_AZ];
28713 else if (out_n == 8 && in_n == 8)
28714 return ix86_builtins[IX86_BUILTIN_ROUNDPS_AZ256];
28715 }
28716 break;
28717
28718 case BUILT_IN_FMA:
28719 if (out_mode == DFmode && in_mode == DFmode)
28720 {
28721 if (out_n == 2 && in_n == 2)
28722 return ix86_builtins[IX86_BUILTIN_VFMADDPD];
28723 if (out_n == 4 && in_n == 4)
28724 return ix86_builtins[IX86_BUILTIN_VFMADDPD256];
28725 }
28726 break;
28727
28728 case BUILT_IN_FMAF:
28729 if (out_mode == SFmode && in_mode == SFmode)
28730 {
28731 if (out_n == 4 && in_n == 4)
28732 return ix86_builtins[IX86_BUILTIN_VFMADDPS];
28733 if (out_n == 8 && in_n == 8)
28734 return ix86_builtins[IX86_BUILTIN_VFMADDPS256];
28735 }
28736 break;
28737
28738 default:
28739 break;
28740 }
28741
28742 /* Dispatch to a handler for a vectorization library. */
28743 if (ix86_veclib_handler)
28744 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
28745 type_in);
28746
28747 return NULL_TREE;
28748 }
28749
28750 /* Handler for an SVML-style interface to
28751 a library with vectorized intrinsics. */
28752
28753 static tree
28754 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
28755 {
28756 char name[20];
28757 tree fntype, new_fndecl, args;
28758 unsigned arity;
28759 const char *bname;
28760 enum machine_mode el_mode, in_mode;
28761 int n, in_n;
28762
28763 /* The SVML is suitable for unsafe math only. */
28764 if (!flag_unsafe_math_optimizations)
28765 return NULL_TREE;
28766
28767 el_mode = TYPE_MODE (TREE_TYPE (type_out));
28768 n = TYPE_VECTOR_SUBPARTS (type_out);
28769 in_mode = TYPE_MODE (TREE_TYPE (type_in));
28770 in_n = TYPE_VECTOR_SUBPARTS (type_in);
28771 if (el_mode != in_mode
28772 || n != in_n)
28773 return NULL_TREE;
28774
28775 switch (fn)
28776 {
28777 case BUILT_IN_EXP:
28778 case BUILT_IN_LOG:
28779 case BUILT_IN_LOG10:
28780 case BUILT_IN_POW:
28781 case BUILT_IN_TANH:
28782 case BUILT_IN_TAN:
28783 case BUILT_IN_ATAN:
28784 case BUILT_IN_ATAN2:
28785 case BUILT_IN_ATANH:
28786 case BUILT_IN_CBRT:
28787 case BUILT_IN_SINH:
28788 case BUILT_IN_SIN:
28789 case BUILT_IN_ASINH:
28790 case BUILT_IN_ASIN:
28791 case BUILT_IN_COSH:
28792 case BUILT_IN_COS:
28793 case BUILT_IN_ACOSH:
28794 case BUILT_IN_ACOS:
28795 if (el_mode != DFmode || n != 2)
28796 return NULL_TREE;
28797 break;
28798
28799 case BUILT_IN_EXPF:
28800 case BUILT_IN_LOGF:
28801 case BUILT_IN_LOG10F:
28802 case BUILT_IN_POWF:
28803 case BUILT_IN_TANHF:
28804 case BUILT_IN_TANF:
28805 case BUILT_IN_ATANF:
28806 case BUILT_IN_ATAN2F:
28807 case BUILT_IN_ATANHF:
28808 case BUILT_IN_CBRTF:
28809 case BUILT_IN_SINHF:
28810 case BUILT_IN_SINF:
28811 case BUILT_IN_ASINHF:
28812 case BUILT_IN_ASINF:
28813 case BUILT_IN_COSHF:
28814 case BUILT_IN_COSF:
28815 case BUILT_IN_ACOSHF:
28816 case BUILT_IN_ACOSF:
28817 if (el_mode != SFmode || n != 4)
28818 return NULL_TREE;
28819 break;
28820
28821 default:
28822 return NULL_TREE;
28823 }
28824
28825 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
28826
28827 if (fn == BUILT_IN_LOGF)
28828 strcpy (name, "vmlsLn4");
28829 else if (fn == BUILT_IN_LOG)
28830 strcpy (name, "vmldLn2");
28831 else if (n == 4)
28832 {
28833 sprintf (name, "vmls%s", bname+10);
28834 name[strlen (name)-1] = '4';
28835 }
28836 else
28837 sprintf (name, "vmld%s2", bname+10);
28838
28839 /* Convert to uppercase. */
28840 name[4] &= ~0x20;
28841
28842 arity = 0;
28843 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
28844 args = TREE_CHAIN (args))
28845 arity++;
28846
28847 if (arity == 1)
28848 fntype = build_function_type_list (type_out, type_in, NULL);
28849 else
28850 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
28851
28852 /* Build a function declaration for the vectorized function. */
28853 new_fndecl = build_decl (BUILTINS_LOCATION,
28854 FUNCTION_DECL, get_identifier (name), fntype);
28855 TREE_PUBLIC (new_fndecl) = 1;
28856 DECL_EXTERNAL (new_fndecl) = 1;
28857 DECL_IS_NOVOPS (new_fndecl) = 1;
28858 TREE_READONLY (new_fndecl) = 1;
28859
28860 return new_fndecl;
28861 }
28862
28863 /* Handler for an ACML-style interface to
28864 a library with vectorized intrinsics. */
28865
28866 static tree
28867 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
28868 {
28869 char name[20] = "__vr.._";
28870 tree fntype, new_fndecl, args;
28871 unsigned arity;
28872 const char *bname;
28873 enum machine_mode el_mode, in_mode;
28874 int n, in_n;
28875
28876 /* The ACML is 64bits only and suitable for unsafe math only as
28877 it does not correctly support parts of IEEE with the required
28878 precision such as denormals. */
28879 if (!TARGET_64BIT
28880 || !flag_unsafe_math_optimizations)
28881 return NULL_TREE;
28882
28883 el_mode = TYPE_MODE (TREE_TYPE (type_out));
28884 n = TYPE_VECTOR_SUBPARTS (type_out);
28885 in_mode = TYPE_MODE (TREE_TYPE (type_in));
28886 in_n = TYPE_VECTOR_SUBPARTS (type_in);
28887 if (el_mode != in_mode
28888 || n != in_n)
28889 return NULL_TREE;
28890
28891 switch (fn)
28892 {
28893 case BUILT_IN_SIN:
28894 case BUILT_IN_COS:
28895 case BUILT_IN_EXP:
28896 case BUILT_IN_LOG:
28897 case BUILT_IN_LOG2:
28898 case BUILT_IN_LOG10:
28899 name[4] = 'd';
28900 name[5] = '2';
28901 if (el_mode != DFmode
28902 || n != 2)
28903 return NULL_TREE;
28904 break;
28905
28906 case BUILT_IN_SINF:
28907 case BUILT_IN_COSF:
28908 case BUILT_IN_EXPF:
28909 case BUILT_IN_POWF:
28910 case BUILT_IN_LOGF:
28911 case BUILT_IN_LOG2F:
28912 case BUILT_IN_LOG10F:
28913 name[4] = 's';
28914 name[5] = '4';
28915 if (el_mode != SFmode
28916 || n != 4)
28917 return NULL_TREE;
28918 break;
28919
28920 default:
28921 return NULL_TREE;
28922 }
28923
28924 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
28925 sprintf (name + 7, "%s", bname+10);
28926
28927 arity = 0;
28928 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
28929 args = TREE_CHAIN (args))
28930 arity++;
28931
28932 if (arity == 1)
28933 fntype = build_function_type_list (type_out, type_in, NULL);
28934 else
28935 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
28936
28937 /* Build a function declaration for the vectorized function. */
28938 new_fndecl = build_decl (BUILTINS_LOCATION,
28939 FUNCTION_DECL, get_identifier (name), fntype);
28940 TREE_PUBLIC (new_fndecl) = 1;
28941 DECL_EXTERNAL (new_fndecl) = 1;
28942 DECL_IS_NOVOPS (new_fndecl) = 1;
28943 TREE_READONLY (new_fndecl) = 1;
28944
28945 return new_fndecl;
28946 }
28947
28948
28949 /* Returns a decl of a function that implements conversion of an integer vector
28950 into a floating-point vector, or vice-versa. DEST_TYPE and SRC_TYPE
28951 are the types involved when converting according to CODE.
28952 Return NULL_TREE if it is not available. */
28953
28954 static tree
28955 ix86_vectorize_builtin_conversion (unsigned int code,
28956 tree dest_type, tree src_type)
28957 {
28958 if (! TARGET_SSE2)
28959 return NULL_TREE;
28960
28961 switch (code)
28962 {
28963 case FLOAT_EXPR:
28964 switch (TYPE_MODE (src_type))
28965 {
28966 case V4SImode:
28967 switch (TYPE_MODE (dest_type))
28968 {
28969 case V4SFmode:
28970 return (TYPE_UNSIGNED (src_type)
28971 ? ix86_builtins[IX86_BUILTIN_CVTUDQ2PS]
28972 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS]);
28973 case V4DFmode:
28974 return (TYPE_UNSIGNED (src_type)
28975 ? NULL_TREE
28976 : ix86_builtins[IX86_BUILTIN_CVTDQ2PD256]);
28977 default:
28978 return NULL_TREE;
28979 }
28980 break;
28981 case V8SImode:
28982 switch (TYPE_MODE (dest_type))
28983 {
28984 case V8SFmode:
28985 return (TYPE_UNSIGNED (src_type)
28986 ? NULL_TREE
28987 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS256]);
28988 default:
28989 return NULL_TREE;
28990 }
28991 break;
28992 default:
28993 return NULL_TREE;
28994 }
28995
28996 case FIX_TRUNC_EXPR:
28997 switch (TYPE_MODE (dest_type))
28998 {
28999 case V4SImode:
29000 switch (TYPE_MODE (src_type))
29001 {
29002 case V4SFmode:
29003 return (TYPE_UNSIGNED (dest_type)
29004 ? NULL_TREE
29005 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ]);
29006 case V4DFmode:
29007 return (TYPE_UNSIGNED (dest_type)
29008 ? NULL_TREE
29009 : ix86_builtins[IX86_BUILTIN_CVTTPD2DQ256]);
29010 default:
29011 return NULL_TREE;
29012 }
29013 break;
29014
29015 case V8SImode:
29016 switch (TYPE_MODE (src_type))
29017 {
29018 case V8SFmode:
29019 return (TYPE_UNSIGNED (dest_type)
29020 ? NULL_TREE
29021 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ256]);
29022 default:
29023 return NULL_TREE;
29024 }
29025 break;
29026
29027 default:
29028 return NULL_TREE;
29029 }
29030
29031 default:
29032 return NULL_TREE;
29033 }
29034
29035 return NULL_TREE;
29036 }
29037
29038 /* Returns a code for a target-specific builtin that implements
29039 reciprocal of the function, or NULL_TREE if not available. */
29040
29041 static tree
29042 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
29043 bool sqrt ATTRIBUTE_UNUSED)
29044 {
29045 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
29046 && flag_finite_math_only && !flag_trapping_math
29047 && flag_unsafe_math_optimizations))
29048 return NULL_TREE;
29049
29050 if (md_fn)
29051 /* Machine dependent builtins. */
29052 switch (fn)
29053 {
29054 /* Vectorized version of sqrt to rsqrt conversion. */
29055 case IX86_BUILTIN_SQRTPS_NR:
29056 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
29057
29058 case IX86_BUILTIN_SQRTPS_NR256:
29059 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR256];
29060
29061 default:
29062 return NULL_TREE;
29063 }
29064 else
29065 /* Normal builtins. */
29066 switch (fn)
29067 {
29068 /* Sqrt to rsqrt conversion. */
29069 case BUILT_IN_SQRTF:
29070 return ix86_builtins[IX86_BUILTIN_RSQRTF];
29071
29072 default:
29073 return NULL_TREE;
29074 }
29075 }
29076 \f
29077 /* Helper for avx_vpermilps256_operand et al. This is also used by
29078 the expansion functions to turn the parallel back into a mask.
29079 The return value is 0 for no match and the imm8+1 for a match. */
29080
29081 int
29082 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
29083 {
29084 unsigned i, nelt = GET_MODE_NUNITS (mode);
29085 unsigned mask = 0;
29086 unsigned char ipar[8];
29087
29088 if (XVECLEN (par, 0) != (int) nelt)
29089 return 0;
29090
29091 /* Validate that all of the elements are constants, and not totally
29092 out of range. Copy the data into an integral array to make the
29093 subsequent checks easier. */
29094 for (i = 0; i < nelt; ++i)
29095 {
29096 rtx er = XVECEXP (par, 0, i);
29097 unsigned HOST_WIDE_INT ei;
29098
29099 if (!CONST_INT_P (er))
29100 return 0;
29101 ei = INTVAL (er);
29102 if (ei >= nelt)
29103 return 0;
29104 ipar[i] = ei;
29105 }
29106
29107 switch (mode)
29108 {
29109 case V4DFmode:
29110 /* In the 256-bit DFmode case, we can only move elements within
29111 a 128-bit lane. */
29112 for (i = 0; i < 2; ++i)
29113 {
29114 if (ipar[i] >= 2)
29115 return 0;
29116 mask |= ipar[i] << i;
29117 }
29118 for (i = 2; i < 4; ++i)
29119 {
29120 if (ipar[i] < 2)
29121 return 0;
29122 mask |= (ipar[i] - 2) << i;
29123 }
29124 break;
29125
29126 case V8SFmode:
29127 /* In the 256-bit SFmode case, we have full freedom of movement
29128 within the low 128-bit lane, but the high 128-bit lane must
29129 mirror the exact same pattern. */
29130 for (i = 0; i < 4; ++i)
29131 if (ipar[i] + 4 != ipar[i + 4])
29132 return 0;
29133 nelt = 4;
29134 /* FALLTHRU */
29135
29136 case V2DFmode:
29137 case V4SFmode:
29138 /* In the 128-bit case, we've full freedom in the placement of
29139 the elements from the source operand. */
29140 for (i = 0; i < nelt; ++i)
29141 mask |= ipar[i] << (i * (nelt / 2));
29142 break;
29143
29144 default:
29145 gcc_unreachable ();
29146 }
29147
29148 /* Make sure success has a non-zero value by adding one. */
29149 return mask + 1;
29150 }
29151
29152 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
29153 the expansion functions to turn the parallel back into a mask.
29154 The return value is 0 for no match and the imm8+1 for a match. */
29155
29156 int
29157 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
29158 {
29159 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
29160 unsigned mask = 0;
29161 unsigned char ipar[8];
29162
29163 if (XVECLEN (par, 0) != (int) nelt)
29164 return 0;
29165
29166 /* Validate that all of the elements are constants, and not totally
29167 out of range. Copy the data into an integral array to make the
29168 subsequent checks easier. */
29169 for (i = 0; i < nelt; ++i)
29170 {
29171 rtx er = XVECEXP (par, 0, i);
29172 unsigned HOST_WIDE_INT ei;
29173
29174 if (!CONST_INT_P (er))
29175 return 0;
29176 ei = INTVAL (er);
29177 if (ei >= 2 * nelt)
29178 return 0;
29179 ipar[i] = ei;
29180 }
29181
29182 /* Validate that the halves of the permute are halves. */
29183 for (i = 0; i < nelt2 - 1; ++i)
29184 if (ipar[i] + 1 != ipar[i + 1])
29185 return 0;
29186 for (i = nelt2; i < nelt - 1; ++i)
29187 if (ipar[i] + 1 != ipar[i + 1])
29188 return 0;
29189
29190 /* Reconstruct the mask. */
29191 for (i = 0; i < 2; ++i)
29192 {
29193 unsigned e = ipar[i * nelt2];
29194 if (e % nelt2)
29195 return 0;
29196 e /= nelt2;
29197 mask |= e << (i * 4);
29198 }
29199
29200 /* Make sure success has a non-zero value by adding one. */
29201 return mask + 1;
29202 }
29203 \f
29204
29205 /* Store OPERAND to the memory after reload is completed. This means
29206 that we can't easily use assign_stack_local. */
29207 rtx
29208 ix86_force_to_memory (enum machine_mode mode, rtx operand)
29209 {
29210 rtx result;
29211
29212 gcc_assert (reload_completed);
29213 if (ix86_using_red_zone ())
29214 {
29215 result = gen_rtx_MEM (mode,
29216 gen_rtx_PLUS (Pmode,
29217 stack_pointer_rtx,
29218 GEN_INT (-RED_ZONE_SIZE)));
29219 emit_move_insn (result, operand);
29220 }
29221 else if (TARGET_64BIT)
29222 {
29223 switch (mode)
29224 {
29225 case HImode:
29226 case SImode:
29227 operand = gen_lowpart (DImode, operand);
29228 /* FALLTHRU */
29229 case DImode:
29230 emit_insn (
29231 gen_rtx_SET (VOIDmode,
29232 gen_rtx_MEM (DImode,
29233 gen_rtx_PRE_DEC (DImode,
29234 stack_pointer_rtx)),
29235 operand));
29236 break;
29237 default:
29238 gcc_unreachable ();
29239 }
29240 result = gen_rtx_MEM (mode, stack_pointer_rtx);
29241 }
29242 else
29243 {
29244 switch (mode)
29245 {
29246 case DImode:
29247 {
29248 rtx operands[2];
29249 split_double_mode (mode, &operand, 1, operands, operands + 1);
29250 emit_insn (
29251 gen_rtx_SET (VOIDmode,
29252 gen_rtx_MEM (SImode,
29253 gen_rtx_PRE_DEC (Pmode,
29254 stack_pointer_rtx)),
29255 operands[1]));
29256 emit_insn (
29257 gen_rtx_SET (VOIDmode,
29258 gen_rtx_MEM (SImode,
29259 gen_rtx_PRE_DEC (Pmode,
29260 stack_pointer_rtx)),
29261 operands[0]));
29262 }
29263 break;
29264 case HImode:
29265 /* Store HImodes as SImodes. */
29266 operand = gen_lowpart (SImode, operand);
29267 /* FALLTHRU */
29268 case SImode:
29269 emit_insn (
29270 gen_rtx_SET (VOIDmode,
29271 gen_rtx_MEM (GET_MODE (operand),
29272 gen_rtx_PRE_DEC (SImode,
29273 stack_pointer_rtx)),
29274 operand));
29275 break;
29276 default:
29277 gcc_unreachable ();
29278 }
29279 result = gen_rtx_MEM (mode, stack_pointer_rtx);
29280 }
29281 return result;
29282 }
29283
29284 /* Free operand from the memory. */
29285 void
29286 ix86_free_from_memory (enum machine_mode mode)
29287 {
29288 if (!ix86_using_red_zone ())
29289 {
29290 int size;
29291
29292 if (mode == DImode || TARGET_64BIT)
29293 size = 8;
29294 else
29295 size = 4;
29296 /* Use LEA to deallocate stack space. In peephole2 it will be converted
29297 to pop or add instruction if registers are available. */
29298 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
29299 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
29300 GEN_INT (size))));
29301 }
29302 }
29303
29304 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
29305
29306 Put float CONST_DOUBLE in the constant pool instead of fp regs.
29307 QImode must go into class Q_REGS.
29308 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
29309 movdf to do mem-to-mem moves through integer regs. */
29310
29311 static reg_class_t
29312 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
29313 {
29314 enum machine_mode mode = GET_MODE (x);
29315
29316 /* We're only allowed to return a subclass of CLASS. Many of the
29317 following checks fail for NO_REGS, so eliminate that early. */
29318 if (regclass == NO_REGS)
29319 return NO_REGS;
29320
29321 /* All classes can load zeros. */
29322 if (x == CONST0_RTX (mode))
29323 return regclass;
29324
29325 /* Force constants into memory if we are loading a (nonzero) constant into
29326 an MMX or SSE register. This is because there are no MMX/SSE instructions
29327 to load from a constant. */
29328 if (CONSTANT_P (x)
29329 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
29330 return NO_REGS;
29331
29332 /* Prefer SSE regs only, if we can use them for math. */
29333 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
29334 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
29335
29336 /* Floating-point constants need more complex checks. */
29337 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
29338 {
29339 /* General regs can load everything. */
29340 if (reg_class_subset_p (regclass, GENERAL_REGS))
29341 return regclass;
29342
29343 /* Floats can load 0 and 1 plus some others. Note that we eliminated
29344 zero above. We only want to wind up preferring 80387 registers if
29345 we plan on doing computation with them. */
29346 if (TARGET_80387
29347 && standard_80387_constant_p (x) > 0)
29348 {
29349 /* Limit class to non-sse. */
29350 if (regclass == FLOAT_SSE_REGS)
29351 return FLOAT_REGS;
29352 if (regclass == FP_TOP_SSE_REGS)
29353 return FP_TOP_REG;
29354 if (regclass == FP_SECOND_SSE_REGS)
29355 return FP_SECOND_REG;
29356 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
29357 return regclass;
29358 }
29359
29360 return NO_REGS;
29361 }
29362
29363 /* Generally when we see PLUS here, it's the function invariant
29364 (plus soft-fp const_int). Which can only be computed into general
29365 regs. */
29366 if (GET_CODE (x) == PLUS)
29367 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
29368
29369 /* QImode constants are easy to load, but non-constant QImode data
29370 must go into Q_REGS. */
29371 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
29372 {
29373 if (reg_class_subset_p (regclass, Q_REGS))
29374 return regclass;
29375 if (reg_class_subset_p (Q_REGS, regclass))
29376 return Q_REGS;
29377 return NO_REGS;
29378 }
29379
29380 return regclass;
29381 }
29382
29383 /* Discourage putting floating-point values in SSE registers unless
29384 SSE math is being used, and likewise for the 387 registers. */
29385 static reg_class_t
29386 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
29387 {
29388 enum machine_mode mode = GET_MODE (x);
29389
29390 /* Restrict the output reload class to the register bank that we are doing
29391 math on. If we would like not to return a subset of CLASS, reject this
29392 alternative: if reload cannot do this, it will still use its choice. */
29393 mode = GET_MODE (x);
29394 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
29395 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
29396
29397 if (X87_FLOAT_MODE_P (mode))
29398 {
29399 if (regclass == FP_TOP_SSE_REGS)
29400 return FP_TOP_REG;
29401 else if (regclass == FP_SECOND_SSE_REGS)
29402 return FP_SECOND_REG;
29403 else
29404 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
29405 }
29406
29407 return regclass;
29408 }
29409
29410 static reg_class_t
29411 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
29412 enum machine_mode mode, secondary_reload_info *sri)
29413 {
29414 /* Double-word spills from general registers to non-offsettable memory
29415 references (zero-extended addresses) require special handling. */
29416 if (TARGET_64BIT
29417 && MEM_P (x)
29418 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
29419 && rclass == GENERAL_REGS
29420 && !offsettable_memref_p (x))
29421 {
29422 sri->icode = (in_p
29423 ? CODE_FOR_reload_noff_load
29424 : CODE_FOR_reload_noff_store);
29425 /* Add the cost of moving address to a temporary. */
29426 sri->extra_cost = 1;
29427
29428 return NO_REGS;
29429 }
29430
29431 /* QImode spills from non-QI registers require
29432 intermediate register on 32bit targets. */
29433 if (!TARGET_64BIT
29434 && !in_p && mode == QImode
29435 && (rclass == GENERAL_REGS
29436 || rclass == LEGACY_REGS
29437 || rclass == INDEX_REGS))
29438 {
29439 int regno;
29440
29441 if (REG_P (x))
29442 regno = REGNO (x);
29443 else
29444 regno = -1;
29445
29446 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
29447 regno = true_regnum (x);
29448
29449 /* Return Q_REGS if the operand is in memory. */
29450 if (regno == -1)
29451 return Q_REGS;
29452 }
29453
29454 /* This condition handles corner case where an expression involving
29455 pointers gets vectorized. We're trying to use the address of a
29456 stack slot as a vector initializer.
29457
29458 (set (reg:V2DI 74 [ vect_cst_.2 ])
29459 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
29460
29461 Eventually frame gets turned into sp+offset like this:
29462
29463 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
29464 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
29465 (const_int 392 [0x188]))))
29466
29467 That later gets turned into:
29468
29469 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
29470 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
29471 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
29472
29473 We'll have the following reload recorded:
29474
29475 Reload 0: reload_in (DI) =
29476 (plus:DI (reg/f:DI 7 sp)
29477 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
29478 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
29479 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
29480 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
29481 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
29482 reload_reg_rtx: (reg:V2DI 22 xmm1)
29483
29484 Which isn't going to work since SSE instructions can't handle scalar
29485 additions. Returning GENERAL_REGS forces the addition into integer
29486 register and reload can handle subsequent reloads without problems. */
29487
29488 if (in_p && GET_CODE (x) == PLUS
29489 && SSE_CLASS_P (rclass)
29490 && SCALAR_INT_MODE_P (mode))
29491 return GENERAL_REGS;
29492
29493 return NO_REGS;
29494 }
29495
29496 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
29497
29498 static bool
29499 ix86_class_likely_spilled_p (reg_class_t rclass)
29500 {
29501 switch (rclass)
29502 {
29503 case AREG:
29504 case DREG:
29505 case CREG:
29506 case BREG:
29507 case AD_REGS:
29508 case SIREG:
29509 case DIREG:
29510 case SSE_FIRST_REG:
29511 case FP_TOP_REG:
29512 case FP_SECOND_REG:
29513 return true;
29514
29515 default:
29516 break;
29517 }
29518
29519 return false;
29520 }
29521
29522 /* If we are copying between general and FP registers, we need a memory
29523 location. The same is true for SSE and MMX registers.
29524
29525 To optimize register_move_cost performance, allow inline variant.
29526
29527 The macro can't work reliably when one of the CLASSES is class containing
29528 registers from multiple units (SSE, MMX, integer). We avoid this by never
29529 combining those units in single alternative in the machine description.
29530 Ensure that this constraint holds to avoid unexpected surprises.
29531
29532 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
29533 enforce these sanity checks. */
29534
29535 static inline bool
29536 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
29537 enum machine_mode mode, int strict)
29538 {
29539 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
29540 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
29541 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
29542 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
29543 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
29544 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
29545 {
29546 gcc_assert (!strict);
29547 return true;
29548 }
29549
29550 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
29551 return true;
29552
29553 /* ??? This is a lie. We do have moves between mmx/general, and for
29554 mmx/sse2. But by saying we need secondary memory we discourage the
29555 register allocator from using the mmx registers unless needed. */
29556 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
29557 return true;
29558
29559 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
29560 {
29561 /* SSE1 doesn't have any direct moves from other classes. */
29562 if (!TARGET_SSE2)
29563 return true;
29564
29565 /* If the target says that inter-unit moves are more expensive
29566 than moving through memory, then don't generate them. */
29567 if (!TARGET_INTER_UNIT_MOVES)
29568 return true;
29569
29570 /* Between SSE and general, we have moves no larger than word size. */
29571 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
29572 return true;
29573 }
29574
29575 return false;
29576 }
29577
29578 bool
29579 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
29580 enum machine_mode mode, int strict)
29581 {
29582 return inline_secondary_memory_needed (class1, class2, mode, strict);
29583 }
29584
29585 /* Implement the TARGET_CLASS_MAX_NREGS hook.
29586
29587 On the 80386, this is the size of MODE in words,
29588 except in the FP regs, where a single reg is always enough. */
29589
29590 static unsigned char
29591 ix86_class_max_nregs (reg_class_t rclass, enum machine_mode mode)
29592 {
29593 if (MAYBE_INTEGER_CLASS_P (rclass))
29594 {
29595 if (mode == XFmode)
29596 return (TARGET_64BIT ? 2 : 3);
29597 else if (mode == XCmode)
29598 return (TARGET_64BIT ? 4 : 6);
29599 else
29600 return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
29601 }
29602 else
29603 {
29604 if (COMPLEX_MODE_P (mode))
29605 return 2;
29606 else
29607 return 1;
29608 }
29609 }
29610
29611 /* Return true if the registers in CLASS cannot represent the change from
29612 modes FROM to TO. */
29613
29614 bool
29615 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
29616 enum reg_class regclass)
29617 {
29618 if (from == to)
29619 return false;
29620
29621 /* x87 registers can't do subreg at all, as all values are reformatted
29622 to extended precision. */
29623 if (MAYBE_FLOAT_CLASS_P (regclass))
29624 return true;
29625
29626 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
29627 {
29628 /* Vector registers do not support QI or HImode loads. If we don't
29629 disallow a change to these modes, reload will assume it's ok to
29630 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
29631 the vec_dupv4hi pattern. */
29632 if (GET_MODE_SIZE (from) < 4)
29633 return true;
29634
29635 /* Vector registers do not support subreg with nonzero offsets, which
29636 are otherwise valid for integer registers. Since we can't see
29637 whether we have a nonzero offset from here, prohibit all
29638 nonparadoxical subregs changing size. */
29639 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
29640 return true;
29641 }
29642
29643 return false;
29644 }
29645
29646 /* Return the cost of moving data of mode M between a
29647 register and memory. A value of 2 is the default; this cost is
29648 relative to those in `REGISTER_MOVE_COST'.
29649
29650 This function is used extensively by register_move_cost that is used to
29651 build tables at startup. Make it inline in this case.
29652 When IN is 2, return maximum of in and out move cost.
29653
29654 If moving between registers and memory is more expensive than
29655 between two registers, you should define this macro to express the
29656 relative cost.
29657
29658 Model also increased moving costs of QImode registers in non
29659 Q_REGS classes.
29660 */
29661 static inline int
29662 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
29663 int in)
29664 {
29665 int cost;
29666 if (FLOAT_CLASS_P (regclass))
29667 {
29668 int index;
29669 switch (mode)
29670 {
29671 case SFmode:
29672 index = 0;
29673 break;
29674 case DFmode:
29675 index = 1;
29676 break;
29677 case XFmode:
29678 index = 2;
29679 break;
29680 default:
29681 return 100;
29682 }
29683 if (in == 2)
29684 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
29685 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
29686 }
29687 if (SSE_CLASS_P (regclass))
29688 {
29689 int index;
29690 switch (GET_MODE_SIZE (mode))
29691 {
29692 case 4:
29693 index = 0;
29694 break;
29695 case 8:
29696 index = 1;
29697 break;
29698 case 16:
29699 index = 2;
29700 break;
29701 default:
29702 return 100;
29703 }
29704 if (in == 2)
29705 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
29706 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
29707 }
29708 if (MMX_CLASS_P (regclass))
29709 {
29710 int index;
29711 switch (GET_MODE_SIZE (mode))
29712 {
29713 case 4:
29714 index = 0;
29715 break;
29716 case 8:
29717 index = 1;
29718 break;
29719 default:
29720 return 100;
29721 }
29722 if (in)
29723 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
29724 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
29725 }
29726 switch (GET_MODE_SIZE (mode))
29727 {
29728 case 1:
29729 if (Q_CLASS_P (regclass) || TARGET_64BIT)
29730 {
29731 if (!in)
29732 return ix86_cost->int_store[0];
29733 if (TARGET_PARTIAL_REG_DEPENDENCY
29734 && optimize_function_for_speed_p (cfun))
29735 cost = ix86_cost->movzbl_load;
29736 else
29737 cost = ix86_cost->int_load[0];
29738 if (in == 2)
29739 return MAX (cost, ix86_cost->int_store[0]);
29740 return cost;
29741 }
29742 else
29743 {
29744 if (in == 2)
29745 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
29746 if (in)
29747 return ix86_cost->movzbl_load;
29748 else
29749 return ix86_cost->int_store[0] + 4;
29750 }
29751 break;
29752 case 2:
29753 if (in == 2)
29754 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
29755 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
29756 default:
29757 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
29758 if (mode == TFmode)
29759 mode = XFmode;
29760 if (in == 2)
29761 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
29762 else if (in)
29763 cost = ix86_cost->int_load[2];
29764 else
29765 cost = ix86_cost->int_store[2];
29766 return (cost * (((int) GET_MODE_SIZE (mode)
29767 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
29768 }
29769 }
29770
29771 static int
29772 ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
29773 bool in)
29774 {
29775 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
29776 }
29777
29778
29779 /* Return the cost of moving data from a register in class CLASS1 to
29780 one in class CLASS2.
29781
29782 It is not required that the cost always equal 2 when FROM is the same as TO;
29783 on some machines it is expensive to move between registers if they are not
29784 general registers. */
29785
29786 static int
29787 ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
29788 reg_class_t class2_i)
29789 {
29790 enum reg_class class1 = (enum reg_class) class1_i;
29791 enum reg_class class2 = (enum reg_class) class2_i;
29792
29793 /* In case we require secondary memory, compute cost of the store followed
29794 by load. In order to avoid bad register allocation choices, we need
29795 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
29796
29797 if (inline_secondary_memory_needed (class1, class2, mode, 0))
29798 {
29799 int cost = 1;
29800
29801 cost += inline_memory_move_cost (mode, class1, 2);
29802 cost += inline_memory_move_cost (mode, class2, 2);
29803
29804 /* In case of copying from general_purpose_register we may emit multiple
29805 stores followed by single load causing memory size mismatch stall.
29806 Count this as arbitrarily high cost of 20. */
29807 if (targetm.class_max_nregs (class1, mode)
29808 > targetm.class_max_nregs (class2, mode))
29809 cost += 20;
29810
29811 /* In the case of FP/MMX moves, the registers actually overlap, and we
29812 have to switch modes in order to treat them differently. */
29813 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
29814 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
29815 cost += 20;
29816
29817 return cost;
29818 }
29819
29820 /* Moves between SSE/MMX and integer unit are expensive. */
29821 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
29822 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
29823
29824 /* ??? By keeping returned value relatively high, we limit the number
29825 of moves between integer and MMX/SSE registers for all targets.
29826 Additionally, high value prevents problem with x86_modes_tieable_p(),
29827 where integer modes in MMX/SSE registers are not tieable
29828 because of missing QImode and HImode moves to, from or between
29829 MMX/SSE registers. */
29830 return MAX (8, ix86_cost->mmxsse_to_integer);
29831
29832 if (MAYBE_FLOAT_CLASS_P (class1))
29833 return ix86_cost->fp_move;
29834 if (MAYBE_SSE_CLASS_P (class1))
29835 return ix86_cost->sse_move;
29836 if (MAYBE_MMX_CLASS_P (class1))
29837 return ix86_cost->mmx_move;
29838 return 2;
29839 }
29840
29841 /* Return TRUE if hard register REGNO can hold a value of machine-mode
29842 MODE. */
29843
29844 bool
29845 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
29846 {
29847 /* Flags and only flags can only hold CCmode values. */
29848 if (CC_REGNO_P (regno))
29849 return GET_MODE_CLASS (mode) == MODE_CC;
29850 if (GET_MODE_CLASS (mode) == MODE_CC
29851 || GET_MODE_CLASS (mode) == MODE_RANDOM
29852 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
29853 return false;
29854 if (FP_REGNO_P (regno))
29855 return VALID_FP_MODE_P (mode);
29856 if (SSE_REGNO_P (regno))
29857 {
29858 /* We implement the move patterns for all vector modes into and
29859 out of SSE registers, even when no operation instructions
29860 are available. OImode move is available only when AVX is
29861 enabled. */
29862 return ((TARGET_AVX && mode == OImode)
29863 || VALID_AVX256_REG_MODE (mode)
29864 || VALID_SSE_REG_MODE (mode)
29865 || VALID_SSE2_REG_MODE (mode)
29866 || VALID_MMX_REG_MODE (mode)
29867 || VALID_MMX_REG_MODE_3DNOW (mode));
29868 }
29869 if (MMX_REGNO_P (regno))
29870 {
29871 /* We implement the move patterns for 3DNOW modes even in MMX mode,
29872 so if the register is available at all, then we can move data of
29873 the given mode into or out of it. */
29874 return (VALID_MMX_REG_MODE (mode)
29875 || VALID_MMX_REG_MODE_3DNOW (mode));
29876 }
29877
29878 if (mode == QImode)
29879 {
29880 /* Take care for QImode values - they can be in non-QI regs,
29881 but then they do cause partial register stalls. */
29882 if (regno <= BX_REG || TARGET_64BIT)
29883 return true;
29884 if (!TARGET_PARTIAL_REG_STALL)
29885 return true;
29886 return !can_create_pseudo_p ();
29887 }
29888 /* We handle both integer and floats in the general purpose registers. */
29889 else if (VALID_INT_MODE_P (mode))
29890 return true;
29891 else if (VALID_FP_MODE_P (mode))
29892 return true;
29893 else if (VALID_DFP_MODE_P (mode))
29894 return true;
29895 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
29896 on to use that value in smaller contexts, this can easily force a
29897 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
29898 supporting DImode, allow it. */
29899 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
29900 return true;
29901
29902 return false;
29903 }
29904
29905 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
29906 tieable integer mode. */
29907
29908 static bool
29909 ix86_tieable_integer_mode_p (enum machine_mode mode)
29910 {
29911 switch (mode)
29912 {
29913 case HImode:
29914 case SImode:
29915 return true;
29916
29917 case QImode:
29918 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
29919
29920 case DImode:
29921 return TARGET_64BIT;
29922
29923 default:
29924 return false;
29925 }
29926 }
29927
29928 /* Return true if MODE1 is accessible in a register that can hold MODE2
29929 without copying. That is, all register classes that can hold MODE2
29930 can also hold MODE1. */
29931
29932 bool
29933 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
29934 {
29935 if (mode1 == mode2)
29936 return true;
29937
29938 if (ix86_tieable_integer_mode_p (mode1)
29939 && ix86_tieable_integer_mode_p (mode2))
29940 return true;
29941
29942 /* MODE2 being XFmode implies fp stack or general regs, which means we
29943 can tie any smaller floating point modes to it. Note that we do not
29944 tie this with TFmode. */
29945 if (mode2 == XFmode)
29946 return mode1 == SFmode || mode1 == DFmode;
29947
29948 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
29949 that we can tie it with SFmode. */
29950 if (mode2 == DFmode)
29951 return mode1 == SFmode;
29952
29953 /* If MODE2 is only appropriate for an SSE register, then tie with
29954 any other mode acceptable to SSE registers. */
29955 if (GET_MODE_SIZE (mode2) == 16
29956 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
29957 return (GET_MODE_SIZE (mode1) == 16
29958 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
29959
29960 /* If MODE2 is appropriate for an MMX register, then tie
29961 with any other mode acceptable to MMX registers. */
29962 if (GET_MODE_SIZE (mode2) == 8
29963 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
29964 return (GET_MODE_SIZE (mode1) == 8
29965 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
29966
29967 return false;
29968 }
29969
29970 /* Compute a (partial) cost for rtx X. Return true if the complete
29971 cost has been computed, and false if subexpressions should be
29972 scanned. In either case, *TOTAL contains the cost result. */
29973
29974 static bool
29975 ix86_rtx_costs (rtx x, int code, int outer_code_i, int opno, int *total,
29976 bool speed)
29977 {
29978 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
29979 enum machine_mode mode = GET_MODE (x);
29980 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
29981
29982 switch (code)
29983 {
29984 case CONST_INT:
29985 case CONST:
29986 case LABEL_REF:
29987 case SYMBOL_REF:
29988 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
29989 *total = 3;
29990 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
29991 *total = 2;
29992 else if (flag_pic && SYMBOLIC_CONST (x)
29993 && (!TARGET_64BIT
29994 || (!GET_CODE (x) != LABEL_REF
29995 && (GET_CODE (x) != SYMBOL_REF
29996 || !SYMBOL_REF_LOCAL_P (x)))))
29997 *total = 1;
29998 else
29999 *total = 0;
30000 return true;
30001
30002 case CONST_DOUBLE:
30003 if (mode == VOIDmode)
30004 *total = 0;
30005 else
30006 switch (standard_80387_constant_p (x))
30007 {
30008 case 1: /* 0.0 */
30009 *total = 1;
30010 break;
30011 default: /* Other constants */
30012 *total = 2;
30013 break;
30014 case 0:
30015 case -1:
30016 /* Start with (MEM (SYMBOL_REF)), since that's where
30017 it'll probably end up. Add a penalty for size. */
30018 *total = (COSTS_N_INSNS (1)
30019 + (flag_pic != 0 && !TARGET_64BIT)
30020 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
30021 break;
30022 }
30023 return true;
30024
30025 case ZERO_EXTEND:
30026 /* The zero extensions is often completely free on x86_64, so make
30027 it as cheap as possible. */
30028 if (TARGET_64BIT && mode == DImode
30029 && GET_MODE (XEXP (x, 0)) == SImode)
30030 *total = 1;
30031 else if (TARGET_ZERO_EXTEND_WITH_AND)
30032 *total = cost->add;
30033 else
30034 *total = cost->movzx;
30035 return false;
30036
30037 case SIGN_EXTEND:
30038 *total = cost->movsx;
30039 return false;
30040
30041 case ASHIFT:
30042 if (CONST_INT_P (XEXP (x, 1))
30043 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
30044 {
30045 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
30046 if (value == 1)
30047 {
30048 *total = cost->add;
30049 return false;
30050 }
30051 if ((value == 2 || value == 3)
30052 && cost->lea <= cost->shift_const)
30053 {
30054 *total = cost->lea;
30055 return false;
30056 }
30057 }
30058 /* FALLTHRU */
30059
30060 case ROTATE:
30061 case ASHIFTRT:
30062 case LSHIFTRT:
30063 case ROTATERT:
30064 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
30065 {
30066 if (CONST_INT_P (XEXP (x, 1)))
30067 {
30068 if (INTVAL (XEXP (x, 1)) > 32)
30069 *total = cost->shift_const + COSTS_N_INSNS (2);
30070 else
30071 *total = cost->shift_const * 2;
30072 }
30073 else
30074 {
30075 if (GET_CODE (XEXP (x, 1)) == AND)
30076 *total = cost->shift_var * 2;
30077 else
30078 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
30079 }
30080 }
30081 else
30082 {
30083 if (CONST_INT_P (XEXP (x, 1)))
30084 *total = cost->shift_const;
30085 else
30086 *total = cost->shift_var;
30087 }
30088 return false;
30089
30090 case FMA:
30091 {
30092 rtx sub;
30093
30094 gcc_assert (FLOAT_MODE_P (mode));
30095 gcc_assert (TARGET_FMA || TARGET_FMA4);
30096
30097 /* ??? SSE scalar/vector cost should be used here. */
30098 /* ??? Bald assumption that fma has the same cost as fmul. */
30099 *total = cost->fmul;
30100 *total += rtx_cost (XEXP (x, 1), FMA, 1, speed);
30101
30102 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
30103 sub = XEXP (x, 0);
30104 if (GET_CODE (sub) == NEG)
30105 sub = XEXP (sub, 0);
30106 *total += rtx_cost (sub, FMA, 0, speed);
30107
30108 sub = XEXP (x, 2);
30109 if (GET_CODE (sub) == NEG)
30110 sub = XEXP (sub, 0);
30111 *total += rtx_cost (sub, FMA, 2, speed);
30112 return true;
30113 }
30114
30115 case MULT:
30116 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
30117 {
30118 /* ??? SSE scalar cost should be used here. */
30119 *total = cost->fmul;
30120 return false;
30121 }
30122 else if (X87_FLOAT_MODE_P (mode))
30123 {
30124 *total = cost->fmul;
30125 return false;
30126 }
30127 else if (FLOAT_MODE_P (mode))
30128 {
30129 /* ??? SSE vector cost should be used here. */
30130 *total = cost->fmul;
30131 return false;
30132 }
30133 else
30134 {
30135 rtx op0 = XEXP (x, 0);
30136 rtx op1 = XEXP (x, 1);
30137 int nbits;
30138 if (CONST_INT_P (XEXP (x, 1)))
30139 {
30140 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
30141 for (nbits = 0; value != 0; value &= value - 1)
30142 nbits++;
30143 }
30144 else
30145 /* This is arbitrary. */
30146 nbits = 7;
30147
30148 /* Compute costs correctly for widening multiplication. */
30149 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
30150 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
30151 == GET_MODE_SIZE (mode))
30152 {
30153 int is_mulwiden = 0;
30154 enum machine_mode inner_mode = GET_MODE (op0);
30155
30156 if (GET_CODE (op0) == GET_CODE (op1))
30157 is_mulwiden = 1, op1 = XEXP (op1, 0);
30158 else if (CONST_INT_P (op1))
30159 {
30160 if (GET_CODE (op0) == SIGN_EXTEND)
30161 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
30162 == INTVAL (op1);
30163 else
30164 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
30165 }
30166
30167 if (is_mulwiden)
30168 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
30169 }
30170
30171 *total = (cost->mult_init[MODE_INDEX (mode)]
30172 + nbits * cost->mult_bit
30173 + rtx_cost (op0, outer_code, opno, speed)
30174 + rtx_cost (op1, outer_code, opno, speed));
30175
30176 return true;
30177 }
30178
30179 case DIV:
30180 case UDIV:
30181 case MOD:
30182 case UMOD:
30183 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
30184 /* ??? SSE cost should be used here. */
30185 *total = cost->fdiv;
30186 else if (X87_FLOAT_MODE_P (mode))
30187 *total = cost->fdiv;
30188 else if (FLOAT_MODE_P (mode))
30189 /* ??? SSE vector cost should be used here. */
30190 *total = cost->fdiv;
30191 else
30192 *total = cost->divide[MODE_INDEX (mode)];
30193 return false;
30194
30195 case PLUS:
30196 if (GET_MODE_CLASS (mode) == MODE_INT
30197 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
30198 {
30199 if (GET_CODE (XEXP (x, 0)) == PLUS
30200 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
30201 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
30202 && CONSTANT_P (XEXP (x, 1)))
30203 {
30204 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
30205 if (val == 2 || val == 4 || val == 8)
30206 {
30207 *total = cost->lea;
30208 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
30209 outer_code, opno, speed);
30210 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
30211 outer_code, opno, speed);
30212 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
30213 return true;
30214 }
30215 }
30216 else if (GET_CODE (XEXP (x, 0)) == MULT
30217 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
30218 {
30219 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
30220 if (val == 2 || val == 4 || val == 8)
30221 {
30222 *total = cost->lea;
30223 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
30224 outer_code, opno, speed);
30225 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
30226 return true;
30227 }
30228 }
30229 else if (GET_CODE (XEXP (x, 0)) == PLUS)
30230 {
30231 *total = cost->lea;
30232 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
30233 outer_code, opno, speed);
30234 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
30235 outer_code, opno, speed);
30236 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
30237 return true;
30238 }
30239 }
30240 /* FALLTHRU */
30241
30242 case MINUS:
30243 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
30244 {
30245 /* ??? SSE cost should be used here. */
30246 *total = cost->fadd;
30247 return false;
30248 }
30249 else if (X87_FLOAT_MODE_P (mode))
30250 {
30251 *total = cost->fadd;
30252 return false;
30253 }
30254 else if (FLOAT_MODE_P (mode))
30255 {
30256 /* ??? SSE vector cost should be used here. */
30257 *total = cost->fadd;
30258 return false;
30259 }
30260 /* FALLTHRU */
30261
30262 case AND:
30263 case IOR:
30264 case XOR:
30265 if (!TARGET_64BIT && mode == DImode)
30266 {
30267 *total = (cost->add * 2
30268 + (rtx_cost (XEXP (x, 0), outer_code, opno, speed)
30269 << (GET_MODE (XEXP (x, 0)) != DImode))
30270 + (rtx_cost (XEXP (x, 1), outer_code, opno, speed)
30271 << (GET_MODE (XEXP (x, 1)) != DImode)));
30272 return true;
30273 }
30274 /* FALLTHRU */
30275
30276 case NEG:
30277 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
30278 {
30279 /* ??? SSE cost should be used here. */
30280 *total = cost->fchs;
30281 return false;
30282 }
30283 else if (X87_FLOAT_MODE_P (mode))
30284 {
30285 *total = cost->fchs;
30286 return false;
30287 }
30288 else if (FLOAT_MODE_P (mode))
30289 {
30290 /* ??? SSE vector cost should be used here. */
30291 *total = cost->fchs;
30292 return false;
30293 }
30294 /* FALLTHRU */
30295
30296 case NOT:
30297 if (!TARGET_64BIT && mode == DImode)
30298 *total = cost->add * 2;
30299 else
30300 *total = cost->add;
30301 return false;
30302
30303 case COMPARE:
30304 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
30305 && XEXP (XEXP (x, 0), 1) == const1_rtx
30306 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
30307 && XEXP (x, 1) == const0_rtx)
30308 {
30309 /* This kind of construct is implemented using test[bwl].
30310 Treat it as if we had an AND. */
30311 *total = (cost->add
30312 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, opno, speed)
30313 + rtx_cost (const1_rtx, outer_code, opno, speed));
30314 return true;
30315 }
30316 return false;
30317
30318 case FLOAT_EXTEND:
30319 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
30320 *total = 0;
30321 return false;
30322
30323 case ABS:
30324 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
30325 /* ??? SSE cost should be used here. */
30326 *total = cost->fabs;
30327 else if (X87_FLOAT_MODE_P (mode))
30328 *total = cost->fabs;
30329 else if (FLOAT_MODE_P (mode))
30330 /* ??? SSE vector cost should be used here. */
30331 *total = cost->fabs;
30332 return false;
30333
30334 case SQRT:
30335 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
30336 /* ??? SSE cost should be used here. */
30337 *total = cost->fsqrt;
30338 else if (X87_FLOAT_MODE_P (mode))
30339 *total = cost->fsqrt;
30340 else if (FLOAT_MODE_P (mode))
30341 /* ??? SSE vector cost should be used here. */
30342 *total = cost->fsqrt;
30343 return false;
30344
30345 case UNSPEC:
30346 if (XINT (x, 1) == UNSPEC_TP)
30347 *total = 0;
30348 return false;
30349
30350 case VEC_SELECT:
30351 case VEC_CONCAT:
30352 case VEC_MERGE:
30353 case VEC_DUPLICATE:
30354 /* ??? Assume all of these vector manipulation patterns are
30355 recognizable. In which case they all pretty much have the
30356 same cost. */
30357 *total = COSTS_N_INSNS (1);
30358 return true;
30359
30360 default:
30361 return false;
30362 }
30363 }
30364
30365 #if TARGET_MACHO
30366
30367 static int current_machopic_label_num;
30368
30369 /* Given a symbol name and its associated stub, write out the
30370 definition of the stub. */
30371
30372 void
30373 machopic_output_stub (FILE *file, const char *symb, const char *stub)
30374 {
30375 unsigned int length;
30376 char *binder_name, *symbol_name, lazy_ptr_name[32];
30377 int label = ++current_machopic_label_num;
30378
30379 /* For 64-bit we shouldn't get here. */
30380 gcc_assert (!TARGET_64BIT);
30381
30382 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
30383 symb = targetm.strip_name_encoding (symb);
30384
30385 length = strlen (stub);
30386 binder_name = XALLOCAVEC (char, length + 32);
30387 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
30388
30389 length = strlen (symb);
30390 symbol_name = XALLOCAVEC (char, length + 32);
30391 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
30392
30393 sprintf (lazy_ptr_name, "L%d$lz", label);
30394
30395 if (MACHOPIC_ATT_STUB)
30396 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
30397 else if (MACHOPIC_PURE)
30398 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
30399 else
30400 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
30401
30402 fprintf (file, "%s:\n", stub);
30403 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
30404
30405 if (MACHOPIC_ATT_STUB)
30406 {
30407 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
30408 }
30409 else if (MACHOPIC_PURE)
30410 {
30411 /* PIC stub. */
30412 /* 25-byte PIC stub using "CALL get_pc_thunk". */
30413 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
30414 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
30415 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
30416 label, lazy_ptr_name, label);
30417 fprintf (file, "\tjmp\t*%%ecx\n");
30418 }
30419 else
30420 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
30421
30422 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
30423 it needs no stub-binding-helper. */
30424 if (MACHOPIC_ATT_STUB)
30425 return;
30426
30427 fprintf (file, "%s:\n", binder_name);
30428
30429 if (MACHOPIC_PURE)
30430 {
30431 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
30432 fprintf (file, "\tpushl\t%%ecx\n");
30433 }
30434 else
30435 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
30436
30437 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
30438
30439 /* N.B. Keep the correspondence of these
30440 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
30441 old-pic/new-pic/non-pic stubs; altering this will break
30442 compatibility with existing dylibs. */
30443 if (MACHOPIC_PURE)
30444 {
30445 /* 25-byte PIC stub using "CALL get_pc_thunk". */
30446 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
30447 }
30448 else
30449 /* 16-byte -mdynamic-no-pic stub. */
30450 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
30451
30452 fprintf (file, "%s:\n", lazy_ptr_name);
30453 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
30454 fprintf (file, ASM_LONG "%s\n", binder_name);
30455 }
30456 #endif /* TARGET_MACHO */
30457
30458 /* Order the registers for register allocator. */
30459
30460 void
30461 x86_order_regs_for_local_alloc (void)
30462 {
30463 int pos = 0;
30464 int i;
30465
30466 /* First allocate the local general purpose registers. */
30467 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
30468 if (GENERAL_REGNO_P (i) && call_used_regs[i])
30469 reg_alloc_order [pos++] = i;
30470
30471 /* Global general purpose registers. */
30472 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
30473 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
30474 reg_alloc_order [pos++] = i;
30475
30476 /* x87 registers come first in case we are doing FP math
30477 using them. */
30478 if (!TARGET_SSE_MATH)
30479 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
30480 reg_alloc_order [pos++] = i;
30481
30482 /* SSE registers. */
30483 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
30484 reg_alloc_order [pos++] = i;
30485 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
30486 reg_alloc_order [pos++] = i;
30487
30488 /* x87 registers. */
30489 if (TARGET_SSE_MATH)
30490 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
30491 reg_alloc_order [pos++] = i;
30492
30493 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
30494 reg_alloc_order [pos++] = i;
30495
30496 /* Initialize the rest of array as we do not allocate some registers
30497 at all. */
30498 while (pos < FIRST_PSEUDO_REGISTER)
30499 reg_alloc_order [pos++] = 0;
30500 }
30501
30502 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
30503 in struct attribute_spec handler. */
30504 static tree
30505 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
30506 tree args,
30507 int flags ATTRIBUTE_UNUSED,
30508 bool *no_add_attrs)
30509 {
30510 if (TREE_CODE (*node) != FUNCTION_TYPE
30511 && TREE_CODE (*node) != METHOD_TYPE
30512 && TREE_CODE (*node) != FIELD_DECL
30513 && TREE_CODE (*node) != TYPE_DECL)
30514 {
30515 warning (OPT_Wattributes, "%qE attribute only applies to functions",
30516 name);
30517 *no_add_attrs = true;
30518 return NULL_TREE;
30519 }
30520 if (TARGET_64BIT)
30521 {
30522 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
30523 name);
30524 *no_add_attrs = true;
30525 return NULL_TREE;
30526 }
30527 if (is_attribute_p ("callee_pop_aggregate_return", name))
30528 {
30529 tree cst;
30530
30531 cst = TREE_VALUE (args);
30532 if (TREE_CODE (cst) != INTEGER_CST)
30533 {
30534 warning (OPT_Wattributes,
30535 "%qE attribute requires an integer constant argument",
30536 name);
30537 *no_add_attrs = true;
30538 }
30539 else if (compare_tree_int (cst, 0) != 0
30540 && compare_tree_int (cst, 1) != 0)
30541 {
30542 warning (OPT_Wattributes,
30543 "argument to %qE attribute is neither zero, nor one",
30544 name);
30545 *no_add_attrs = true;
30546 }
30547
30548 return NULL_TREE;
30549 }
30550
30551 return NULL_TREE;
30552 }
30553
30554 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
30555 struct attribute_spec.handler. */
30556 static tree
30557 ix86_handle_abi_attribute (tree *node, tree name,
30558 tree args ATTRIBUTE_UNUSED,
30559 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
30560 {
30561 if (TREE_CODE (*node) != FUNCTION_TYPE
30562 && TREE_CODE (*node) != METHOD_TYPE
30563 && TREE_CODE (*node) != FIELD_DECL
30564 && TREE_CODE (*node) != TYPE_DECL)
30565 {
30566 warning (OPT_Wattributes, "%qE attribute only applies to functions",
30567 name);
30568 *no_add_attrs = true;
30569 return NULL_TREE;
30570 }
30571
30572 /* Can combine regparm with all attributes but fastcall. */
30573 if (is_attribute_p ("ms_abi", name))
30574 {
30575 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
30576 {
30577 error ("ms_abi and sysv_abi attributes are not compatible");
30578 }
30579
30580 return NULL_TREE;
30581 }
30582 else if (is_attribute_p ("sysv_abi", name))
30583 {
30584 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
30585 {
30586 error ("ms_abi and sysv_abi attributes are not compatible");
30587 }
30588
30589 return NULL_TREE;
30590 }
30591
30592 return NULL_TREE;
30593 }
30594
30595 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
30596 struct attribute_spec.handler. */
30597 static tree
30598 ix86_handle_struct_attribute (tree *node, tree name,
30599 tree args ATTRIBUTE_UNUSED,
30600 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
30601 {
30602 tree *type = NULL;
30603 if (DECL_P (*node))
30604 {
30605 if (TREE_CODE (*node) == TYPE_DECL)
30606 type = &TREE_TYPE (*node);
30607 }
30608 else
30609 type = node;
30610
30611 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
30612 || TREE_CODE (*type) == UNION_TYPE)))
30613 {
30614 warning (OPT_Wattributes, "%qE attribute ignored",
30615 name);
30616 *no_add_attrs = true;
30617 }
30618
30619 else if ((is_attribute_p ("ms_struct", name)
30620 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
30621 || ((is_attribute_p ("gcc_struct", name)
30622 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
30623 {
30624 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
30625 name);
30626 *no_add_attrs = true;
30627 }
30628
30629 return NULL_TREE;
30630 }
30631
30632 static tree
30633 ix86_handle_fndecl_attribute (tree *node, tree name,
30634 tree args ATTRIBUTE_UNUSED,
30635 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
30636 {
30637 if (TREE_CODE (*node) != FUNCTION_DECL)
30638 {
30639 warning (OPT_Wattributes, "%qE attribute only applies to functions",
30640 name);
30641 *no_add_attrs = true;
30642 }
30643 return NULL_TREE;
30644 }
30645
30646 static bool
30647 ix86_ms_bitfield_layout_p (const_tree record_type)
30648 {
30649 return ((TARGET_MS_BITFIELD_LAYOUT
30650 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
30651 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
30652 }
30653
30654 /* Returns an expression indicating where the this parameter is
30655 located on entry to the FUNCTION. */
30656
30657 static rtx
30658 x86_this_parameter (tree function)
30659 {
30660 tree type = TREE_TYPE (function);
30661 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
30662 int nregs;
30663
30664 if (TARGET_64BIT)
30665 {
30666 const int *parm_regs;
30667
30668 if (ix86_function_type_abi (type) == MS_ABI)
30669 parm_regs = x86_64_ms_abi_int_parameter_registers;
30670 else
30671 parm_regs = x86_64_int_parameter_registers;
30672 return gen_rtx_REG (DImode, parm_regs[aggr]);
30673 }
30674
30675 nregs = ix86_function_regparm (type, function);
30676
30677 if (nregs > 0 && !stdarg_p (type))
30678 {
30679 int regno;
30680 unsigned int ccvt = ix86_get_callcvt (type);
30681
30682 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
30683 regno = aggr ? DX_REG : CX_REG;
30684 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
30685 {
30686 regno = CX_REG;
30687 if (aggr)
30688 return gen_rtx_MEM (SImode,
30689 plus_constant (stack_pointer_rtx, 4));
30690 }
30691 else
30692 {
30693 regno = AX_REG;
30694 if (aggr)
30695 {
30696 regno = DX_REG;
30697 if (nregs == 1)
30698 return gen_rtx_MEM (SImode,
30699 plus_constant (stack_pointer_rtx, 4));
30700 }
30701 }
30702 return gen_rtx_REG (SImode, regno);
30703 }
30704
30705 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
30706 }
30707
30708 /* Determine whether x86_output_mi_thunk can succeed. */
30709
30710 static bool
30711 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
30712 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
30713 HOST_WIDE_INT vcall_offset, const_tree function)
30714 {
30715 /* 64-bit can handle anything. */
30716 if (TARGET_64BIT)
30717 return true;
30718
30719 /* For 32-bit, everything's fine if we have one free register. */
30720 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
30721 return true;
30722
30723 /* Need a free register for vcall_offset. */
30724 if (vcall_offset)
30725 return false;
30726
30727 /* Need a free register for GOT references. */
30728 if (flag_pic && !targetm.binds_local_p (function))
30729 return false;
30730
30731 /* Otherwise ok. */
30732 return true;
30733 }
30734
30735 /* Output the assembler code for a thunk function. THUNK_DECL is the
30736 declaration for the thunk function itself, FUNCTION is the decl for
30737 the target function. DELTA is an immediate constant offset to be
30738 added to THIS. If VCALL_OFFSET is nonzero, the word at
30739 *(*this + vcall_offset) should be added to THIS. */
30740
30741 static void
30742 x86_output_mi_thunk (FILE *file,
30743 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
30744 HOST_WIDE_INT vcall_offset, tree function)
30745 {
30746 rtx this_param = x86_this_parameter (function);
30747 rtx this_reg, tmp, fnaddr;
30748
30749 emit_note (NOTE_INSN_PROLOGUE_END);
30750
30751 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
30752 pull it in now and let DELTA benefit. */
30753 if (REG_P (this_param))
30754 this_reg = this_param;
30755 else if (vcall_offset)
30756 {
30757 /* Put the this parameter into %eax. */
30758 this_reg = gen_rtx_REG (Pmode, AX_REG);
30759 emit_move_insn (this_reg, this_param);
30760 }
30761 else
30762 this_reg = NULL_RTX;
30763
30764 /* Adjust the this parameter by a fixed constant. */
30765 if (delta)
30766 {
30767 rtx delta_rtx = GEN_INT (delta);
30768 rtx delta_dst = this_reg ? this_reg : this_param;
30769
30770 if (TARGET_64BIT)
30771 {
30772 if (!x86_64_general_operand (delta_rtx, Pmode))
30773 {
30774 tmp = gen_rtx_REG (Pmode, R10_REG);
30775 emit_move_insn (tmp, delta_rtx);
30776 delta_rtx = tmp;
30777 }
30778 }
30779
30780 emit_insn (ix86_gen_add3 (delta_dst, delta_dst, delta_rtx));
30781 }
30782
30783 /* Adjust the this parameter by a value stored in the vtable. */
30784 if (vcall_offset)
30785 {
30786 rtx vcall_addr, vcall_mem, this_mem;
30787 unsigned int tmp_regno;
30788
30789 if (TARGET_64BIT)
30790 tmp_regno = R10_REG;
30791 else
30792 {
30793 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
30794 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) != 0)
30795 tmp_regno = AX_REG;
30796 else
30797 tmp_regno = CX_REG;
30798 }
30799 tmp = gen_rtx_REG (Pmode, tmp_regno);
30800
30801 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
30802 if (Pmode != ptr_mode)
30803 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
30804 emit_move_insn (tmp, this_mem);
30805
30806 /* Adjust the this parameter. */
30807 vcall_addr = plus_constant (tmp, vcall_offset);
30808 if (TARGET_64BIT
30809 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
30810 {
30811 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
30812 emit_move_insn (tmp2, GEN_INT (vcall_offset));
30813 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
30814 }
30815
30816 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
30817 if (Pmode != ptr_mode)
30818 emit_insn (gen_addsi_1_zext (this_reg,
30819 gen_rtx_REG (ptr_mode,
30820 REGNO (this_reg)),
30821 vcall_mem));
30822 else
30823 emit_insn (ix86_gen_add3 (this_reg, this_reg, vcall_mem));
30824 }
30825
30826 /* If necessary, drop THIS back to its stack slot. */
30827 if (this_reg && this_reg != this_param)
30828 emit_move_insn (this_param, this_reg);
30829
30830 fnaddr = XEXP (DECL_RTL (function), 0);
30831 if (TARGET_64BIT)
30832 {
30833 if (!flag_pic || targetm.binds_local_p (function)
30834 || cfun->machine->call_abi == MS_ABI)
30835 ;
30836 else
30837 {
30838 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
30839 tmp = gen_rtx_CONST (Pmode, tmp);
30840 fnaddr = gen_rtx_MEM (Pmode, tmp);
30841 }
30842 }
30843 else
30844 {
30845 if (!flag_pic || targetm.binds_local_p (function))
30846 ;
30847 #if TARGET_MACHO
30848 else if (TARGET_MACHO)
30849 {
30850 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
30851 fnaddr = XEXP (fnaddr, 0);
30852 }
30853 #endif /* TARGET_MACHO */
30854 else
30855 {
30856 tmp = gen_rtx_REG (Pmode, CX_REG);
30857 output_set_got (tmp, NULL_RTX);
30858
30859 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
30860 fnaddr = gen_rtx_PLUS (Pmode, fnaddr, tmp);
30861 fnaddr = gen_rtx_MEM (Pmode, fnaddr);
30862 }
30863 }
30864
30865 /* Our sibling call patterns do not allow memories, because we have no
30866 predicate that can distinguish between frame and non-frame memory.
30867 For our purposes here, we can get away with (ab)using a jump pattern,
30868 because we're going to do no optimization. */
30869 if (MEM_P (fnaddr))
30870 emit_jump_insn (gen_indirect_jump (fnaddr));
30871 else
30872 {
30873 tmp = gen_rtx_MEM (QImode, fnaddr);
30874 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
30875 tmp = emit_call_insn (tmp);
30876 SIBLING_CALL_P (tmp) = 1;
30877 }
30878 emit_barrier ();
30879
30880 /* Emit just enough of rest_of_compilation to get the insns emitted.
30881 Note that use_thunk calls assemble_start_function et al. */
30882 tmp = get_insns ();
30883 insn_locators_alloc ();
30884 shorten_branches (tmp);
30885 final_start_function (tmp, file, 1);
30886 final (tmp, file, 1);
30887 final_end_function ();
30888 }
30889
30890 static void
30891 x86_file_start (void)
30892 {
30893 default_file_start ();
30894 #if TARGET_MACHO
30895 darwin_file_start ();
30896 #endif
30897 if (X86_FILE_START_VERSION_DIRECTIVE)
30898 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
30899 if (X86_FILE_START_FLTUSED)
30900 fputs ("\t.global\t__fltused\n", asm_out_file);
30901 if (ix86_asm_dialect == ASM_INTEL)
30902 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
30903 }
30904
30905 int
30906 x86_field_alignment (tree field, int computed)
30907 {
30908 enum machine_mode mode;
30909 tree type = TREE_TYPE (field);
30910
30911 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
30912 return computed;
30913 mode = TYPE_MODE (strip_array_types (type));
30914 if (mode == DFmode || mode == DCmode
30915 || GET_MODE_CLASS (mode) == MODE_INT
30916 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
30917 return MIN (32, computed);
30918 return computed;
30919 }
30920
30921 /* Output assembler code to FILE to increment profiler label # LABELNO
30922 for profiling a function entry. */
30923 void
30924 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
30925 {
30926 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
30927 : MCOUNT_NAME);
30928
30929 if (TARGET_64BIT)
30930 {
30931 #ifndef NO_PROFILE_COUNTERS
30932 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
30933 #endif
30934
30935 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
30936 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
30937 else
30938 fprintf (file, "\tcall\t%s\n", mcount_name);
30939 }
30940 else if (flag_pic)
30941 {
30942 #ifndef NO_PROFILE_COUNTERS
30943 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
30944 LPREFIX, labelno);
30945 #endif
30946 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
30947 }
30948 else
30949 {
30950 #ifndef NO_PROFILE_COUNTERS
30951 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
30952 LPREFIX, labelno);
30953 #endif
30954 fprintf (file, "\tcall\t%s\n", mcount_name);
30955 }
30956 }
30957
30958 /* We don't have exact information about the insn sizes, but we may assume
30959 quite safely that we are informed about all 1 byte insns and memory
30960 address sizes. This is enough to eliminate unnecessary padding in
30961 99% of cases. */
30962
30963 static int
30964 min_insn_size (rtx insn)
30965 {
30966 int l = 0, len;
30967
30968 if (!INSN_P (insn) || !active_insn_p (insn))
30969 return 0;
30970
30971 /* Discard alignments we've emit and jump instructions. */
30972 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
30973 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
30974 return 0;
30975 if (JUMP_TABLE_DATA_P (insn))
30976 return 0;
30977
30978 /* Important case - calls are always 5 bytes.
30979 It is common to have many calls in the row. */
30980 if (CALL_P (insn)
30981 && symbolic_reference_mentioned_p (PATTERN (insn))
30982 && !SIBLING_CALL_P (insn))
30983 return 5;
30984 len = get_attr_length (insn);
30985 if (len <= 1)
30986 return 1;
30987
30988 /* For normal instructions we rely on get_attr_length being exact,
30989 with a few exceptions. */
30990 if (!JUMP_P (insn))
30991 {
30992 enum attr_type type = get_attr_type (insn);
30993
30994 switch (type)
30995 {
30996 case TYPE_MULTI:
30997 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
30998 || asm_noperands (PATTERN (insn)) >= 0)
30999 return 0;
31000 break;
31001 case TYPE_OTHER:
31002 case TYPE_FCMP:
31003 break;
31004 default:
31005 /* Otherwise trust get_attr_length. */
31006 return len;
31007 }
31008
31009 l = get_attr_length_address (insn);
31010 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
31011 l = 4;
31012 }
31013 if (l)
31014 return 1+l;
31015 else
31016 return 2;
31017 }
31018
31019 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
31020
31021 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
31022 window. */
31023
31024 static void
31025 ix86_avoid_jump_mispredicts (void)
31026 {
31027 rtx insn, start = get_insns ();
31028 int nbytes = 0, njumps = 0;
31029 int isjump = 0;
31030
31031 /* Look for all minimal intervals of instructions containing 4 jumps.
31032 The intervals are bounded by START and INSN. NBYTES is the total
31033 size of instructions in the interval including INSN and not including
31034 START. When the NBYTES is smaller than 16 bytes, it is possible
31035 that the end of START and INSN ends up in the same 16byte page.
31036
31037 The smallest offset in the page INSN can start is the case where START
31038 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
31039 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
31040 */
31041 for (insn = start; insn; insn = NEXT_INSN (insn))
31042 {
31043 int min_size;
31044
31045 if (LABEL_P (insn))
31046 {
31047 int align = label_to_alignment (insn);
31048 int max_skip = label_to_max_skip (insn);
31049
31050 if (max_skip > 15)
31051 max_skip = 15;
31052 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
31053 already in the current 16 byte page, because otherwise
31054 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
31055 bytes to reach 16 byte boundary. */
31056 if (align <= 0
31057 || (align <= 3 && max_skip != (1 << align) - 1))
31058 max_skip = 0;
31059 if (dump_file)
31060 fprintf (dump_file, "Label %i with max_skip %i\n",
31061 INSN_UID (insn), max_skip);
31062 if (max_skip)
31063 {
31064 while (nbytes + max_skip >= 16)
31065 {
31066 start = NEXT_INSN (start);
31067 if ((JUMP_P (start)
31068 && GET_CODE (PATTERN (start)) != ADDR_VEC
31069 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
31070 || CALL_P (start))
31071 njumps--, isjump = 1;
31072 else
31073 isjump = 0;
31074 nbytes -= min_insn_size (start);
31075 }
31076 }
31077 continue;
31078 }
31079
31080 min_size = min_insn_size (insn);
31081 nbytes += min_size;
31082 if (dump_file)
31083 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
31084 INSN_UID (insn), min_size);
31085 if ((JUMP_P (insn)
31086 && GET_CODE (PATTERN (insn)) != ADDR_VEC
31087 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
31088 || CALL_P (insn))
31089 njumps++;
31090 else
31091 continue;
31092
31093 while (njumps > 3)
31094 {
31095 start = NEXT_INSN (start);
31096 if ((JUMP_P (start)
31097 && GET_CODE (PATTERN (start)) != ADDR_VEC
31098 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
31099 || CALL_P (start))
31100 njumps--, isjump = 1;
31101 else
31102 isjump = 0;
31103 nbytes -= min_insn_size (start);
31104 }
31105 gcc_assert (njumps >= 0);
31106 if (dump_file)
31107 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
31108 INSN_UID (start), INSN_UID (insn), nbytes);
31109
31110 if (njumps == 3 && isjump && nbytes < 16)
31111 {
31112 int padsize = 15 - nbytes + min_insn_size (insn);
31113
31114 if (dump_file)
31115 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
31116 INSN_UID (insn), padsize);
31117 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
31118 }
31119 }
31120 }
31121 #endif
31122
31123 /* AMD Athlon works faster
31124 when RET is not destination of conditional jump or directly preceded
31125 by other jump instruction. We avoid the penalty by inserting NOP just
31126 before the RET instructions in such cases. */
31127 static void
31128 ix86_pad_returns (void)
31129 {
31130 edge e;
31131 edge_iterator ei;
31132
31133 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
31134 {
31135 basic_block bb = e->src;
31136 rtx ret = BB_END (bb);
31137 rtx prev;
31138 bool replace = false;
31139
31140 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
31141 || optimize_bb_for_size_p (bb))
31142 continue;
31143 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
31144 if (active_insn_p (prev) || LABEL_P (prev))
31145 break;
31146 if (prev && LABEL_P (prev))
31147 {
31148 edge e;
31149 edge_iterator ei;
31150
31151 FOR_EACH_EDGE (e, ei, bb->preds)
31152 if (EDGE_FREQUENCY (e) && e->src->index >= 0
31153 && !(e->flags & EDGE_FALLTHRU))
31154 replace = true;
31155 }
31156 if (!replace)
31157 {
31158 prev = prev_active_insn (ret);
31159 if (prev
31160 && ((JUMP_P (prev) && any_condjump_p (prev))
31161 || CALL_P (prev)))
31162 replace = true;
31163 /* Empty functions get branch mispredict even when
31164 the jump destination is not visible to us. */
31165 if (!prev && !optimize_function_for_size_p (cfun))
31166 replace = true;
31167 }
31168 if (replace)
31169 {
31170 emit_jump_insn_before (gen_return_internal_long (), ret);
31171 delete_insn (ret);
31172 }
31173 }
31174 }
31175
31176 /* Count the minimum number of instructions in BB. Return 4 if the
31177 number of instructions >= 4. */
31178
31179 static int
31180 ix86_count_insn_bb (basic_block bb)
31181 {
31182 rtx insn;
31183 int insn_count = 0;
31184
31185 /* Count number of instructions in this block. Return 4 if the number
31186 of instructions >= 4. */
31187 FOR_BB_INSNS (bb, insn)
31188 {
31189 /* Only happen in exit blocks. */
31190 if (JUMP_P (insn)
31191 && ANY_RETURN_P (PATTERN (insn)))
31192 break;
31193
31194 if (NONDEBUG_INSN_P (insn)
31195 && GET_CODE (PATTERN (insn)) != USE
31196 && GET_CODE (PATTERN (insn)) != CLOBBER)
31197 {
31198 insn_count++;
31199 if (insn_count >= 4)
31200 return insn_count;
31201 }
31202 }
31203
31204 return insn_count;
31205 }
31206
31207
31208 /* Count the minimum number of instructions in code path in BB.
31209 Return 4 if the number of instructions >= 4. */
31210
31211 static int
31212 ix86_count_insn (basic_block bb)
31213 {
31214 edge e;
31215 edge_iterator ei;
31216 int min_prev_count;
31217
31218 /* Only bother counting instructions along paths with no
31219 more than 2 basic blocks between entry and exit. Given
31220 that BB has an edge to exit, determine if a predecessor
31221 of BB has an edge from entry. If so, compute the number
31222 of instructions in the predecessor block. If there
31223 happen to be multiple such blocks, compute the minimum. */
31224 min_prev_count = 4;
31225 FOR_EACH_EDGE (e, ei, bb->preds)
31226 {
31227 edge prev_e;
31228 edge_iterator prev_ei;
31229
31230 if (e->src == ENTRY_BLOCK_PTR)
31231 {
31232 min_prev_count = 0;
31233 break;
31234 }
31235 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
31236 {
31237 if (prev_e->src == ENTRY_BLOCK_PTR)
31238 {
31239 int count = ix86_count_insn_bb (e->src);
31240 if (count < min_prev_count)
31241 min_prev_count = count;
31242 break;
31243 }
31244 }
31245 }
31246
31247 if (min_prev_count < 4)
31248 min_prev_count += ix86_count_insn_bb (bb);
31249
31250 return min_prev_count;
31251 }
31252
31253 /* Pad short funtion to 4 instructions. */
31254
31255 static void
31256 ix86_pad_short_function (void)
31257 {
31258 edge e;
31259 edge_iterator ei;
31260
31261 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
31262 {
31263 rtx ret = BB_END (e->src);
31264 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
31265 {
31266 int insn_count = ix86_count_insn (e->src);
31267
31268 /* Pad short function. */
31269 if (insn_count < 4)
31270 {
31271 rtx insn = ret;
31272
31273 /* Find epilogue. */
31274 while (insn
31275 && (!NOTE_P (insn)
31276 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
31277 insn = PREV_INSN (insn);
31278
31279 if (!insn)
31280 insn = ret;
31281
31282 /* Two NOPs count as one instruction. */
31283 insn_count = 2 * (4 - insn_count);
31284 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
31285 }
31286 }
31287 }
31288 }
31289
31290 /* Implement machine specific optimizations. We implement padding of returns
31291 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
31292 static void
31293 ix86_reorg (void)
31294 {
31295 /* We are freeing block_for_insn in the toplev to keep compatibility
31296 with old MDEP_REORGS that are not CFG based. Recompute it now. */
31297 compute_bb_for_insn ();
31298
31299 /* Run the vzeroupper optimization if needed. */
31300 if (TARGET_VZEROUPPER)
31301 move_or_delete_vzeroupper ();
31302
31303 if (optimize && optimize_function_for_speed_p (cfun))
31304 {
31305 if (TARGET_PAD_SHORT_FUNCTION)
31306 ix86_pad_short_function ();
31307 else if (TARGET_PAD_RETURNS)
31308 ix86_pad_returns ();
31309 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
31310 if (TARGET_FOUR_JUMP_LIMIT)
31311 ix86_avoid_jump_mispredicts ();
31312 #endif
31313 }
31314 }
31315
31316 /* Return nonzero when QImode register that must be represented via REX prefix
31317 is used. */
31318 bool
31319 x86_extended_QIreg_mentioned_p (rtx insn)
31320 {
31321 int i;
31322 extract_insn_cached (insn);
31323 for (i = 0; i < recog_data.n_operands; i++)
31324 if (REG_P (recog_data.operand[i])
31325 && REGNO (recog_data.operand[i]) > BX_REG)
31326 return true;
31327 return false;
31328 }
31329
31330 /* Return nonzero when P points to register encoded via REX prefix.
31331 Called via for_each_rtx. */
31332 static int
31333 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
31334 {
31335 unsigned int regno;
31336 if (!REG_P (*p))
31337 return 0;
31338 regno = REGNO (*p);
31339 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
31340 }
31341
31342 /* Return true when INSN mentions register that must be encoded using REX
31343 prefix. */
31344 bool
31345 x86_extended_reg_mentioned_p (rtx insn)
31346 {
31347 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
31348 extended_reg_mentioned_1, NULL);
31349 }
31350
31351 /* If profitable, negate (without causing overflow) integer constant
31352 of mode MODE at location LOC. Return true in this case. */
31353 bool
31354 x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
31355 {
31356 HOST_WIDE_INT val;
31357
31358 if (!CONST_INT_P (*loc))
31359 return false;
31360
31361 switch (mode)
31362 {
31363 case DImode:
31364 /* DImode x86_64 constants must fit in 32 bits. */
31365 gcc_assert (x86_64_immediate_operand (*loc, mode));
31366
31367 mode = SImode;
31368 break;
31369
31370 case SImode:
31371 case HImode:
31372 case QImode:
31373 break;
31374
31375 default:
31376 gcc_unreachable ();
31377 }
31378
31379 /* Avoid overflows. */
31380 if (mode_signbit_p (mode, *loc))
31381 return false;
31382
31383 val = INTVAL (*loc);
31384
31385 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
31386 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
31387 if ((val < 0 && val != -128)
31388 || val == 128)
31389 {
31390 *loc = GEN_INT (-val);
31391 return true;
31392 }
31393
31394 return false;
31395 }
31396
31397 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
31398 optabs would emit if we didn't have TFmode patterns. */
31399
31400 void
31401 x86_emit_floatuns (rtx operands[2])
31402 {
31403 rtx neglab, donelab, i0, i1, f0, in, out;
31404 enum machine_mode mode, inmode;
31405
31406 inmode = GET_MODE (operands[1]);
31407 gcc_assert (inmode == SImode || inmode == DImode);
31408
31409 out = operands[0];
31410 in = force_reg (inmode, operands[1]);
31411 mode = GET_MODE (out);
31412 neglab = gen_label_rtx ();
31413 donelab = gen_label_rtx ();
31414 f0 = gen_reg_rtx (mode);
31415
31416 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
31417
31418 expand_float (out, in, 0);
31419
31420 emit_jump_insn (gen_jump (donelab));
31421 emit_barrier ();
31422
31423 emit_label (neglab);
31424
31425 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
31426 1, OPTAB_DIRECT);
31427 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
31428 1, OPTAB_DIRECT);
31429 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
31430
31431 expand_float (f0, i0, 0);
31432
31433 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
31434
31435 emit_label (donelab);
31436 }
31437 \f
31438 /* AVX does not support 32-byte integer vector operations,
31439 thus the longest vector we are faced with is V16QImode. */
31440 #define MAX_VECT_LEN 16
31441
31442 struct expand_vec_perm_d
31443 {
31444 rtx target, op0, op1;
31445 unsigned char perm[MAX_VECT_LEN];
31446 enum machine_mode vmode;
31447 unsigned char nelt;
31448 bool testing_p;
31449 };
31450
31451 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
31452 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
31453
31454 /* Get a vector mode of the same size as the original but with elements
31455 twice as wide. This is only guaranteed to apply to integral vectors. */
31456
31457 static inline enum machine_mode
31458 get_mode_wider_vector (enum machine_mode o)
31459 {
31460 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
31461 enum machine_mode n = GET_MODE_WIDER_MODE (o);
31462 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
31463 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
31464 return n;
31465 }
31466
31467 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
31468 with all elements equal to VAR. Return true if successful. */
31469
31470 static bool
31471 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
31472 rtx target, rtx val)
31473 {
31474 bool ok;
31475
31476 switch (mode)
31477 {
31478 case V2SImode:
31479 case V2SFmode:
31480 if (!mmx_ok)
31481 return false;
31482 /* FALLTHRU */
31483
31484 case V4DFmode:
31485 case V4DImode:
31486 case V8SFmode:
31487 case V8SImode:
31488 case V2DFmode:
31489 case V2DImode:
31490 case V4SFmode:
31491 case V4SImode:
31492 {
31493 rtx insn, dup;
31494
31495 /* First attempt to recognize VAL as-is. */
31496 dup = gen_rtx_VEC_DUPLICATE (mode, val);
31497 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
31498 if (recog_memoized (insn) < 0)
31499 {
31500 rtx seq;
31501 /* If that fails, force VAL into a register. */
31502
31503 start_sequence ();
31504 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
31505 seq = get_insns ();
31506 end_sequence ();
31507 if (seq)
31508 emit_insn_before (seq, insn);
31509
31510 ok = recog_memoized (insn) >= 0;
31511 gcc_assert (ok);
31512 }
31513 }
31514 return true;
31515
31516 case V4HImode:
31517 if (!mmx_ok)
31518 return false;
31519 if (TARGET_SSE || TARGET_3DNOW_A)
31520 {
31521 rtx x;
31522
31523 val = gen_lowpart (SImode, val);
31524 x = gen_rtx_TRUNCATE (HImode, val);
31525 x = gen_rtx_VEC_DUPLICATE (mode, x);
31526 emit_insn (gen_rtx_SET (VOIDmode, target, x));
31527 return true;
31528 }
31529 goto widen;
31530
31531 case V8QImode:
31532 if (!mmx_ok)
31533 return false;
31534 goto widen;
31535
31536 case V8HImode:
31537 if (TARGET_SSE2)
31538 {
31539 struct expand_vec_perm_d dperm;
31540 rtx tmp1, tmp2;
31541
31542 permute:
31543 memset (&dperm, 0, sizeof (dperm));
31544 dperm.target = target;
31545 dperm.vmode = mode;
31546 dperm.nelt = GET_MODE_NUNITS (mode);
31547 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
31548
31549 /* Extend to SImode using a paradoxical SUBREG. */
31550 tmp1 = gen_reg_rtx (SImode);
31551 emit_move_insn (tmp1, gen_lowpart (SImode, val));
31552
31553 /* Insert the SImode value as low element of a V4SImode vector. */
31554 tmp2 = gen_lowpart (V4SImode, dperm.op0);
31555 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
31556
31557 ok = (expand_vec_perm_1 (&dperm)
31558 || expand_vec_perm_broadcast_1 (&dperm));
31559 gcc_assert (ok);
31560 return ok;
31561 }
31562 goto widen;
31563
31564 case V16QImode:
31565 if (TARGET_SSE2)
31566 goto permute;
31567 goto widen;
31568
31569 widen:
31570 /* Replicate the value once into the next wider mode and recurse. */
31571 {
31572 enum machine_mode smode, wsmode, wvmode;
31573 rtx x;
31574
31575 smode = GET_MODE_INNER (mode);
31576 wvmode = get_mode_wider_vector (mode);
31577 wsmode = GET_MODE_INNER (wvmode);
31578
31579 val = convert_modes (wsmode, smode, val, true);
31580 x = expand_simple_binop (wsmode, ASHIFT, val,
31581 GEN_INT (GET_MODE_BITSIZE (smode)),
31582 NULL_RTX, 1, OPTAB_LIB_WIDEN);
31583 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
31584
31585 x = gen_lowpart (wvmode, target);
31586 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
31587 gcc_assert (ok);
31588 return ok;
31589 }
31590
31591 case V16HImode:
31592 case V32QImode:
31593 {
31594 enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
31595 rtx x = gen_reg_rtx (hvmode);
31596
31597 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
31598 gcc_assert (ok);
31599
31600 x = gen_rtx_VEC_CONCAT (mode, x, x);
31601 emit_insn (gen_rtx_SET (VOIDmode, target, x));
31602 }
31603 return true;
31604
31605 default:
31606 return false;
31607 }
31608 }
31609
31610 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
31611 whose ONE_VAR element is VAR, and other elements are zero. Return true
31612 if successful. */
31613
31614 static bool
31615 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
31616 rtx target, rtx var, int one_var)
31617 {
31618 enum machine_mode vsimode;
31619 rtx new_target;
31620 rtx x, tmp;
31621 bool use_vector_set = false;
31622
31623 switch (mode)
31624 {
31625 case V2DImode:
31626 /* For SSE4.1, we normally use vector set. But if the second
31627 element is zero and inter-unit moves are OK, we use movq
31628 instead. */
31629 use_vector_set = (TARGET_64BIT
31630 && TARGET_SSE4_1
31631 && !(TARGET_INTER_UNIT_MOVES
31632 && one_var == 0));
31633 break;
31634 case V16QImode:
31635 case V4SImode:
31636 case V4SFmode:
31637 use_vector_set = TARGET_SSE4_1;
31638 break;
31639 case V8HImode:
31640 use_vector_set = TARGET_SSE2;
31641 break;
31642 case V4HImode:
31643 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
31644 break;
31645 case V32QImode:
31646 case V16HImode:
31647 case V8SImode:
31648 case V8SFmode:
31649 case V4DFmode:
31650 use_vector_set = TARGET_AVX;
31651 break;
31652 case V4DImode:
31653 /* Use ix86_expand_vector_set in 64bit mode only. */
31654 use_vector_set = TARGET_AVX && TARGET_64BIT;
31655 break;
31656 default:
31657 break;
31658 }
31659
31660 if (use_vector_set)
31661 {
31662 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
31663 var = force_reg (GET_MODE_INNER (mode), var);
31664 ix86_expand_vector_set (mmx_ok, target, var, one_var);
31665 return true;
31666 }
31667
31668 switch (mode)
31669 {
31670 case V2SFmode:
31671 case V2SImode:
31672 if (!mmx_ok)
31673 return false;
31674 /* FALLTHRU */
31675
31676 case V2DFmode:
31677 case V2DImode:
31678 if (one_var != 0)
31679 return false;
31680 var = force_reg (GET_MODE_INNER (mode), var);
31681 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
31682 emit_insn (gen_rtx_SET (VOIDmode, target, x));
31683 return true;
31684
31685 case V4SFmode:
31686 case V4SImode:
31687 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
31688 new_target = gen_reg_rtx (mode);
31689 else
31690 new_target = target;
31691 var = force_reg (GET_MODE_INNER (mode), var);
31692 x = gen_rtx_VEC_DUPLICATE (mode, var);
31693 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
31694 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
31695 if (one_var != 0)
31696 {
31697 /* We need to shuffle the value to the correct position, so
31698 create a new pseudo to store the intermediate result. */
31699
31700 /* With SSE2, we can use the integer shuffle insns. */
31701 if (mode != V4SFmode && TARGET_SSE2)
31702 {
31703 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
31704 const1_rtx,
31705 GEN_INT (one_var == 1 ? 0 : 1),
31706 GEN_INT (one_var == 2 ? 0 : 1),
31707 GEN_INT (one_var == 3 ? 0 : 1)));
31708 if (target != new_target)
31709 emit_move_insn (target, new_target);
31710 return true;
31711 }
31712
31713 /* Otherwise convert the intermediate result to V4SFmode and
31714 use the SSE1 shuffle instructions. */
31715 if (mode != V4SFmode)
31716 {
31717 tmp = gen_reg_rtx (V4SFmode);
31718 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
31719 }
31720 else
31721 tmp = new_target;
31722
31723 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
31724 const1_rtx,
31725 GEN_INT (one_var == 1 ? 0 : 1),
31726 GEN_INT (one_var == 2 ? 0+4 : 1+4),
31727 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
31728
31729 if (mode != V4SFmode)
31730 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
31731 else if (tmp != target)
31732 emit_move_insn (target, tmp);
31733 }
31734 else if (target != new_target)
31735 emit_move_insn (target, new_target);
31736 return true;
31737
31738 case V8HImode:
31739 case V16QImode:
31740 vsimode = V4SImode;
31741 goto widen;
31742 case V4HImode:
31743 case V8QImode:
31744 if (!mmx_ok)
31745 return false;
31746 vsimode = V2SImode;
31747 goto widen;
31748 widen:
31749 if (one_var != 0)
31750 return false;
31751
31752 /* Zero extend the variable element to SImode and recurse. */
31753 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
31754
31755 x = gen_reg_rtx (vsimode);
31756 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
31757 var, one_var))
31758 gcc_unreachable ();
31759
31760 emit_move_insn (target, gen_lowpart (mode, x));
31761 return true;
31762
31763 default:
31764 return false;
31765 }
31766 }
31767
31768 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
31769 consisting of the values in VALS. It is known that all elements
31770 except ONE_VAR are constants. Return true if successful. */
31771
31772 static bool
31773 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
31774 rtx target, rtx vals, int one_var)
31775 {
31776 rtx var = XVECEXP (vals, 0, one_var);
31777 enum machine_mode wmode;
31778 rtx const_vec, x;
31779
31780 const_vec = copy_rtx (vals);
31781 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
31782 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
31783
31784 switch (mode)
31785 {
31786 case V2DFmode:
31787 case V2DImode:
31788 case V2SFmode:
31789 case V2SImode:
31790 /* For the two element vectors, it's just as easy to use
31791 the general case. */
31792 return false;
31793
31794 case V4DImode:
31795 /* Use ix86_expand_vector_set in 64bit mode only. */
31796 if (!TARGET_64BIT)
31797 return false;
31798 case V4DFmode:
31799 case V8SFmode:
31800 case V8SImode:
31801 case V16HImode:
31802 case V32QImode:
31803 case V4SFmode:
31804 case V4SImode:
31805 case V8HImode:
31806 case V4HImode:
31807 break;
31808
31809 case V16QImode:
31810 if (TARGET_SSE4_1)
31811 break;
31812 wmode = V8HImode;
31813 goto widen;
31814 case V8QImode:
31815 wmode = V4HImode;
31816 goto widen;
31817 widen:
31818 /* There's no way to set one QImode entry easily. Combine
31819 the variable value with its adjacent constant value, and
31820 promote to an HImode set. */
31821 x = XVECEXP (vals, 0, one_var ^ 1);
31822 if (one_var & 1)
31823 {
31824 var = convert_modes (HImode, QImode, var, true);
31825 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
31826 NULL_RTX, 1, OPTAB_LIB_WIDEN);
31827 x = GEN_INT (INTVAL (x) & 0xff);
31828 }
31829 else
31830 {
31831 var = convert_modes (HImode, QImode, var, true);
31832 x = gen_int_mode (INTVAL (x) << 8, HImode);
31833 }
31834 if (x != const0_rtx)
31835 var = expand_simple_binop (HImode, IOR, var, x, var,
31836 1, OPTAB_LIB_WIDEN);
31837
31838 x = gen_reg_rtx (wmode);
31839 emit_move_insn (x, gen_lowpart (wmode, const_vec));
31840 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
31841
31842 emit_move_insn (target, gen_lowpart (mode, x));
31843 return true;
31844
31845 default:
31846 return false;
31847 }
31848
31849 emit_move_insn (target, const_vec);
31850 ix86_expand_vector_set (mmx_ok, target, var, one_var);
31851 return true;
31852 }
31853
31854 /* A subroutine of ix86_expand_vector_init_general. Use vector
31855 concatenate to handle the most general case: all values variable,
31856 and none identical. */
31857
31858 static void
31859 ix86_expand_vector_init_concat (enum machine_mode mode,
31860 rtx target, rtx *ops, int n)
31861 {
31862 enum machine_mode cmode, hmode = VOIDmode;
31863 rtx first[8], second[4];
31864 rtvec v;
31865 int i, j;
31866
31867 switch (n)
31868 {
31869 case 2:
31870 switch (mode)
31871 {
31872 case V8SImode:
31873 cmode = V4SImode;
31874 break;
31875 case V8SFmode:
31876 cmode = V4SFmode;
31877 break;
31878 case V4DImode:
31879 cmode = V2DImode;
31880 break;
31881 case V4DFmode:
31882 cmode = V2DFmode;
31883 break;
31884 case V4SImode:
31885 cmode = V2SImode;
31886 break;
31887 case V4SFmode:
31888 cmode = V2SFmode;
31889 break;
31890 case V2DImode:
31891 cmode = DImode;
31892 break;
31893 case V2SImode:
31894 cmode = SImode;
31895 break;
31896 case V2DFmode:
31897 cmode = DFmode;
31898 break;
31899 case V2SFmode:
31900 cmode = SFmode;
31901 break;
31902 default:
31903 gcc_unreachable ();
31904 }
31905
31906 if (!register_operand (ops[1], cmode))
31907 ops[1] = force_reg (cmode, ops[1]);
31908 if (!register_operand (ops[0], cmode))
31909 ops[0] = force_reg (cmode, ops[0]);
31910 emit_insn (gen_rtx_SET (VOIDmode, target,
31911 gen_rtx_VEC_CONCAT (mode, ops[0],
31912 ops[1])));
31913 break;
31914
31915 case 4:
31916 switch (mode)
31917 {
31918 case V4DImode:
31919 cmode = V2DImode;
31920 break;
31921 case V4DFmode:
31922 cmode = V2DFmode;
31923 break;
31924 case V4SImode:
31925 cmode = V2SImode;
31926 break;
31927 case V4SFmode:
31928 cmode = V2SFmode;
31929 break;
31930 default:
31931 gcc_unreachable ();
31932 }
31933 goto half;
31934
31935 case 8:
31936 switch (mode)
31937 {
31938 case V8SImode:
31939 cmode = V2SImode;
31940 hmode = V4SImode;
31941 break;
31942 case V8SFmode:
31943 cmode = V2SFmode;
31944 hmode = V4SFmode;
31945 break;
31946 default:
31947 gcc_unreachable ();
31948 }
31949 goto half;
31950
31951 half:
31952 /* FIXME: We process inputs backward to help RA. PR 36222. */
31953 i = n - 1;
31954 j = (n >> 1) - 1;
31955 for (; i > 0; i -= 2, j--)
31956 {
31957 first[j] = gen_reg_rtx (cmode);
31958 v = gen_rtvec (2, ops[i - 1], ops[i]);
31959 ix86_expand_vector_init (false, first[j],
31960 gen_rtx_PARALLEL (cmode, v));
31961 }
31962
31963 n >>= 1;
31964 if (n > 2)
31965 {
31966 gcc_assert (hmode != VOIDmode);
31967 for (i = j = 0; i < n; i += 2, j++)
31968 {
31969 second[j] = gen_reg_rtx (hmode);
31970 ix86_expand_vector_init_concat (hmode, second [j],
31971 &first [i], 2);
31972 }
31973 n >>= 1;
31974 ix86_expand_vector_init_concat (mode, target, second, n);
31975 }
31976 else
31977 ix86_expand_vector_init_concat (mode, target, first, n);
31978 break;
31979
31980 default:
31981 gcc_unreachable ();
31982 }
31983 }
31984
31985 /* A subroutine of ix86_expand_vector_init_general. Use vector
31986 interleave to handle the most general case: all values variable,
31987 and none identical. */
31988
31989 static void
31990 ix86_expand_vector_init_interleave (enum machine_mode mode,
31991 rtx target, rtx *ops, int n)
31992 {
31993 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
31994 int i, j;
31995 rtx op0, op1;
31996 rtx (*gen_load_even) (rtx, rtx, rtx);
31997 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
31998 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
31999
32000 switch (mode)
32001 {
32002 case V8HImode:
32003 gen_load_even = gen_vec_setv8hi;
32004 gen_interleave_first_low = gen_vec_interleave_lowv4si;
32005 gen_interleave_second_low = gen_vec_interleave_lowv2di;
32006 inner_mode = HImode;
32007 first_imode = V4SImode;
32008 second_imode = V2DImode;
32009 third_imode = VOIDmode;
32010 break;
32011 case V16QImode:
32012 gen_load_even = gen_vec_setv16qi;
32013 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
32014 gen_interleave_second_low = gen_vec_interleave_lowv4si;
32015 inner_mode = QImode;
32016 first_imode = V8HImode;
32017 second_imode = V4SImode;
32018 third_imode = V2DImode;
32019 break;
32020 default:
32021 gcc_unreachable ();
32022 }
32023
32024 for (i = 0; i < n; i++)
32025 {
32026 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
32027 op0 = gen_reg_rtx (SImode);
32028 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
32029
32030 /* Insert the SImode value as low element of V4SImode vector. */
32031 op1 = gen_reg_rtx (V4SImode);
32032 op0 = gen_rtx_VEC_MERGE (V4SImode,
32033 gen_rtx_VEC_DUPLICATE (V4SImode,
32034 op0),
32035 CONST0_RTX (V4SImode),
32036 const1_rtx);
32037 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
32038
32039 /* Cast the V4SImode vector back to a vector in orignal mode. */
32040 op0 = gen_reg_rtx (mode);
32041 emit_move_insn (op0, gen_lowpart (mode, op1));
32042
32043 /* Load even elements into the second positon. */
32044 emit_insn (gen_load_even (op0,
32045 force_reg (inner_mode,
32046 ops [i + i + 1]),
32047 const1_rtx));
32048
32049 /* Cast vector to FIRST_IMODE vector. */
32050 ops[i] = gen_reg_rtx (first_imode);
32051 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
32052 }
32053
32054 /* Interleave low FIRST_IMODE vectors. */
32055 for (i = j = 0; i < n; i += 2, j++)
32056 {
32057 op0 = gen_reg_rtx (first_imode);
32058 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
32059
32060 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
32061 ops[j] = gen_reg_rtx (second_imode);
32062 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
32063 }
32064
32065 /* Interleave low SECOND_IMODE vectors. */
32066 switch (second_imode)
32067 {
32068 case V4SImode:
32069 for (i = j = 0; i < n / 2; i += 2, j++)
32070 {
32071 op0 = gen_reg_rtx (second_imode);
32072 emit_insn (gen_interleave_second_low (op0, ops[i],
32073 ops[i + 1]));
32074
32075 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
32076 vector. */
32077 ops[j] = gen_reg_rtx (third_imode);
32078 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
32079 }
32080 second_imode = V2DImode;
32081 gen_interleave_second_low = gen_vec_interleave_lowv2di;
32082 /* FALLTHRU */
32083
32084 case V2DImode:
32085 op0 = gen_reg_rtx (second_imode);
32086 emit_insn (gen_interleave_second_low (op0, ops[0],
32087 ops[1]));
32088
32089 /* Cast the SECOND_IMODE vector back to a vector on original
32090 mode. */
32091 emit_insn (gen_rtx_SET (VOIDmode, target,
32092 gen_lowpart (mode, op0)));
32093 break;
32094
32095 default:
32096 gcc_unreachable ();
32097 }
32098 }
32099
32100 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
32101 all values variable, and none identical. */
32102
32103 static void
32104 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
32105 rtx target, rtx vals)
32106 {
32107 rtx ops[32], op0, op1;
32108 enum machine_mode half_mode = VOIDmode;
32109 int n, i;
32110
32111 switch (mode)
32112 {
32113 case V2SFmode:
32114 case V2SImode:
32115 if (!mmx_ok && !TARGET_SSE)
32116 break;
32117 /* FALLTHRU */
32118
32119 case V8SFmode:
32120 case V8SImode:
32121 case V4DFmode:
32122 case V4DImode:
32123 case V4SFmode:
32124 case V4SImode:
32125 case V2DFmode:
32126 case V2DImode:
32127 n = GET_MODE_NUNITS (mode);
32128 for (i = 0; i < n; i++)
32129 ops[i] = XVECEXP (vals, 0, i);
32130 ix86_expand_vector_init_concat (mode, target, ops, n);
32131 return;
32132
32133 case V32QImode:
32134 half_mode = V16QImode;
32135 goto half;
32136
32137 case V16HImode:
32138 half_mode = V8HImode;
32139 goto half;
32140
32141 half:
32142 n = GET_MODE_NUNITS (mode);
32143 for (i = 0; i < n; i++)
32144 ops[i] = XVECEXP (vals, 0, i);
32145 op0 = gen_reg_rtx (half_mode);
32146 op1 = gen_reg_rtx (half_mode);
32147 ix86_expand_vector_init_interleave (half_mode, op0, ops,
32148 n >> 2);
32149 ix86_expand_vector_init_interleave (half_mode, op1,
32150 &ops [n >> 1], n >> 2);
32151 emit_insn (gen_rtx_SET (VOIDmode, target,
32152 gen_rtx_VEC_CONCAT (mode, op0, op1)));
32153 return;
32154
32155 case V16QImode:
32156 if (!TARGET_SSE4_1)
32157 break;
32158 /* FALLTHRU */
32159
32160 case V8HImode:
32161 if (!TARGET_SSE2)
32162 break;
32163
32164 /* Don't use ix86_expand_vector_init_interleave if we can't
32165 move from GPR to SSE register directly. */
32166 if (!TARGET_INTER_UNIT_MOVES)
32167 break;
32168
32169 n = GET_MODE_NUNITS (mode);
32170 for (i = 0; i < n; i++)
32171 ops[i] = XVECEXP (vals, 0, i);
32172 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
32173 return;
32174
32175 case V4HImode:
32176 case V8QImode:
32177 break;
32178
32179 default:
32180 gcc_unreachable ();
32181 }
32182
32183 {
32184 int i, j, n_elts, n_words, n_elt_per_word;
32185 enum machine_mode inner_mode;
32186 rtx words[4], shift;
32187
32188 inner_mode = GET_MODE_INNER (mode);
32189 n_elts = GET_MODE_NUNITS (mode);
32190 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
32191 n_elt_per_word = n_elts / n_words;
32192 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
32193
32194 for (i = 0; i < n_words; ++i)
32195 {
32196 rtx word = NULL_RTX;
32197
32198 for (j = 0; j < n_elt_per_word; ++j)
32199 {
32200 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
32201 elt = convert_modes (word_mode, inner_mode, elt, true);
32202
32203 if (j == 0)
32204 word = elt;
32205 else
32206 {
32207 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
32208 word, 1, OPTAB_LIB_WIDEN);
32209 word = expand_simple_binop (word_mode, IOR, word, elt,
32210 word, 1, OPTAB_LIB_WIDEN);
32211 }
32212 }
32213
32214 words[i] = word;
32215 }
32216
32217 if (n_words == 1)
32218 emit_move_insn (target, gen_lowpart (mode, words[0]));
32219 else if (n_words == 2)
32220 {
32221 rtx tmp = gen_reg_rtx (mode);
32222 emit_clobber (tmp);
32223 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
32224 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
32225 emit_move_insn (target, tmp);
32226 }
32227 else if (n_words == 4)
32228 {
32229 rtx tmp = gen_reg_rtx (V4SImode);
32230 gcc_assert (word_mode == SImode);
32231 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
32232 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
32233 emit_move_insn (target, gen_lowpart (mode, tmp));
32234 }
32235 else
32236 gcc_unreachable ();
32237 }
32238 }
32239
32240 /* Initialize vector TARGET via VALS. Suppress the use of MMX
32241 instructions unless MMX_OK is true. */
32242
32243 void
32244 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
32245 {
32246 enum machine_mode mode = GET_MODE (target);
32247 enum machine_mode inner_mode = GET_MODE_INNER (mode);
32248 int n_elts = GET_MODE_NUNITS (mode);
32249 int n_var = 0, one_var = -1;
32250 bool all_same = true, all_const_zero = true;
32251 int i;
32252 rtx x;
32253
32254 for (i = 0; i < n_elts; ++i)
32255 {
32256 x = XVECEXP (vals, 0, i);
32257 if (!(CONST_INT_P (x)
32258 || GET_CODE (x) == CONST_DOUBLE
32259 || GET_CODE (x) == CONST_FIXED))
32260 n_var++, one_var = i;
32261 else if (x != CONST0_RTX (inner_mode))
32262 all_const_zero = false;
32263 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
32264 all_same = false;
32265 }
32266
32267 /* Constants are best loaded from the constant pool. */
32268 if (n_var == 0)
32269 {
32270 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
32271 return;
32272 }
32273
32274 /* If all values are identical, broadcast the value. */
32275 if (all_same
32276 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
32277 XVECEXP (vals, 0, 0)))
32278 return;
32279
32280 /* Values where only one field is non-constant are best loaded from
32281 the pool and overwritten via move later. */
32282 if (n_var == 1)
32283 {
32284 if (all_const_zero
32285 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
32286 XVECEXP (vals, 0, one_var),
32287 one_var))
32288 return;
32289
32290 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
32291 return;
32292 }
32293
32294 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
32295 }
32296
32297 void
32298 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
32299 {
32300 enum machine_mode mode = GET_MODE (target);
32301 enum machine_mode inner_mode = GET_MODE_INNER (mode);
32302 enum machine_mode half_mode;
32303 bool use_vec_merge = false;
32304 rtx tmp;
32305 static rtx (*gen_extract[6][2]) (rtx, rtx)
32306 = {
32307 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
32308 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
32309 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
32310 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
32311 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
32312 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
32313 };
32314 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
32315 = {
32316 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
32317 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
32318 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
32319 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
32320 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
32321 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
32322 };
32323 int i, j, n;
32324
32325 switch (mode)
32326 {
32327 case V2SFmode:
32328 case V2SImode:
32329 if (mmx_ok)
32330 {
32331 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
32332 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
32333 if (elt == 0)
32334 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
32335 else
32336 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
32337 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
32338 return;
32339 }
32340 break;
32341
32342 case V2DImode:
32343 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
32344 if (use_vec_merge)
32345 break;
32346
32347 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
32348 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
32349 if (elt == 0)
32350 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
32351 else
32352 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
32353 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
32354 return;
32355
32356 case V2DFmode:
32357 {
32358 rtx op0, op1;
32359
32360 /* For the two element vectors, we implement a VEC_CONCAT with
32361 the extraction of the other element. */
32362
32363 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
32364 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
32365
32366 if (elt == 0)
32367 op0 = val, op1 = tmp;
32368 else
32369 op0 = tmp, op1 = val;
32370
32371 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
32372 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
32373 }
32374 return;
32375
32376 case V4SFmode:
32377 use_vec_merge = TARGET_SSE4_1;
32378 if (use_vec_merge)
32379 break;
32380
32381 switch (elt)
32382 {
32383 case 0:
32384 use_vec_merge = true;
32385 break;
32386
32387 case 1:
32388 /* tmp = target = A B C D */
32389 tmp = copy_to_reg (target);
32390 /* target = A A B B */
32391 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
32392 /* target = X A B B */
32393 ix86_expand_vector_set (false, target, val, 0);
32394 /* target = A X C D */
32395 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
32396 const1_rtx, const0_rtx,
32397 GEN_INT (2+4), GEN_INT (3+4)));
32398 return;
32399
32400 case 2:
32401 /* tmp = target = A B C D */
32402 tmp = copy_to_reg (target);
32403 /* tmp = X B C D */
32404 ix86_expand_vector_set (false, tmp, val, 0);
32405 /* target = A B X D */
32406 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
32407 const0_rtx, const1_rtx,
32408 GEN_INT (0+4), GEN_INT (3+4)));
32409 return;
32410
32411 case 3:
32412 /* tmp = target = A B C D */
32413 tmp = copy_to_reg (target);
32414 /* tmp = X B C D */
32415 ix86_expand_vector_set (false, tmp, val, 0);
32416 /* target = A B X D */
32417 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
32418 const0_rtx, const1_rtx,
32419 GEN_INT (2+4), GEN_INT (0+4)));
32420 return;
32421
32422 default:
32423 gcc_unreachable ();
32424 }
32425 break;
32426
32427 case V4SImode:
32428 use_vec_merge = TARGET_SSE4_1;
32429 if (use_vec_merge)
32430 break;
32431
32432 /* Element 0 handled by vec_merge below. */
32433 if (elt == 0)
32434 {
32435 use_vec_merge = true;
32436 break;
32437 }
32438
32439 if (TARGET_SSE2)
32440 {
32441 /* With SSE2, use integer shuffles to swap element 0 and ELT,
32442 store into element 0, then shuffle them back. */
32443
32444 rtx order[4];
32445
32446 order[0] = GEN_INT (elt);
32447 order[1] = const1_rtx;
32448 order[2] = const2_rtx;
32449 order[3] = GEN_INT (3);
32450 order[elt] = const0_rtx;
32451
32452 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
32453 order[1], order[2], order[3]));
32454
32455 ix86_expand_vector_set (false, target, val, 0);
32456
32457 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
32458 order[1], order[2], order[3]));
32459 }
32460 else
32461 {
32462 /* For SSE1, we have to reuse the V4SF code. */
32463 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
32464 gen_lowpart (SFmode, val), elt);
32465 }
32466 return;
32467
32468 case V8HImode:
32469 use_vec_merge = TARGET_SSE2;
32470 break;
32471 case V4HImode:
32472 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
32473 break;
32474
32475 case V16QImode:
32476 use_vec_merge = TARGET_SSE4_1;
32477 break;
32478
32479 case V8QImode:
32480 break;
32481
32482 case V32QImode:
32483 half_mode = V16QImode;
32484 j = 0;
32485 n = 16;
32486 goto half;
32487
32488 case V16HImode:
32489 half_mode = V8HImode;
32490 j = 1;
32491 n = 8;
32492 goto half;
32493
32494 case V8SImode:
32495 half_mode = V4SImode;
32496 j = 2;
32497 n = 4;
32498 goto half;
32499
32500 case V4DImode:
32501 half_mode = V2DImode;
32502 j = 3;
32503 n = 2;
32504 goto half;
32505
32506 case V8SFmode:
32507 half_mode = V4SFmode;
32508 j = 4;
32509 n = 4;
32510 goto half;
32511
32512 case V4DFmode:
32513 half_mode = V2DFmode;
32514 j = 5;
32515 n = 2;
32516 goto half;
32517
32518 half:
32519 /* Compute offset. */
32520 i = elt / n;
32521 elt %= n;
32522
32523 gcc_assert (i <= 1);
32524
32525 /* Extract the half. */
32526 tmp = gen_reg_rtx (half_mode);
32527 emit_insn (gen_extract[j][i] (tmp, target));
32528
32529 /* Put val in tmp at elt. */
32530 ix86_expand_vector_set (false, tmp, val, elt);
32531
32532 /* Put it back. */
32533 emit_insn (gen_insert[j][i] (target, target, tmp));
32534 return;
32535
32536 default:
32537 break;
32538 }
32539
32540 if (use_vec_merge)
32541 {
32542 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
32543 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
32544 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
32545 }
32546 else
32547 {
32548 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
32549
32550 emit_move_insn (mem, target);
32551
32552 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
32553 emit_move_insn (tmp, val);
32554
32555 emit_move_insn (target, mem);
32556 }
32557 }
32558
32559 void
32560 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
32561 {
32562 enum machine_mode mode = GET_MODE (vec);
32563 enum machine_mode inner_mode = GET_MODE_INNER (mode);
32564 bool use_vec_extr = false;
32565 rtx tmp;
32566
32567 switch (mode)
32568 {
32569 case V2SImode:
32570 case V2SFmode:
32571 if (!mmx_ok)
32572 break;
32573 /* FALLTHRU */
32574
32575 case V2DFmode:
32576 case V2DImode:
32577 use_vec_extr = true;
32578 break;
32579
32580 case V4SFmode:
32581 use_vec_extr = TARGET_SSE4_1;
32582 if (use_vec_extr)
32583 break;
32584
32585 switch (elt)
32586 {
32587 case 0:
32588 tmp = vec;
32589 break;
32590
32591 case 1:
32592 case 3:
32593 tmp = gen_reg_rtx (mode);
32594 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
32595 GEN_INT (elt), GEN_INT (elt),
32596 GEN_INT (elt+4), GEN_INT (elt+4)));
32597 break;
32598
32599 case 2:
32600 tmp = gen_reg_rtx (mode);
32601 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
32602 break;
32603
32604 default:
32605 gcc_unreachable ();
32606 }
32607 vec = tmp;
32608 use_vec_extr = true;
32609 elt = 0;
32610 break;
32611
32612 case V4SImode:
32613 use_vec_extr = TARGET_SSE4_1;
32614 if (use_vec_extr)
32615 break;
32616
32617 if (TARGET_SSE2)
32618 {
32619 switch (elt)
32620 {
32621 case 0:
32622 tmp = vec;
32623 break;
32624
32625 case 1:
32626 case 3:
32627 tmp = gen_reg_rtx (mode);
32628 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
32629 GEN_INT (elt), GEN_INT (elt),
32630 GEN_INT (elt), GEN_INT (elt)));
32631 break;
32632
32633 case 2:
32634 tmp = gen_reg_rtx (mode);
32635 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
32636 break;
32637
32638 default:
32639 gcc_unreachable ();
32640 }
32641 vec = tmp;
32642 use_vec_extr = true;
32643 elt = 0;
32644 }
32645 else
32646 {
32647 /* For SSE1, we have to reuse the V4SF code. */
32648 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
32649 gen_lowpart (V4SFmode, vec), elt);
32650 return;
32651 }
32652 break;
32653
32654 case V8HImode:
32655 use_vec_extr = TARGET_SSE2;
32656 break;
32657 case V4HImode:
32658 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
32659 break;
32660
32661 case V16QImode:
32662 use_vec_extr = TARGET_SSE4_1;
32663 break;
32664
32665 case V8SFmode:
32666 if (TARGET_AVX)
32667 {
32668 tmp = gen_reg_rtx (V4SFmode);
32669 if (elt < 4)
32670 emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
32671 else
32672 emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
32673 ix86_expand_vector_extract (false, target, tmp, elt & 3);
32674 return;
32675 }
32676 break;
32677
32678 case V4DFmode:
32679 if (TARGET_AVX)
32680 {
32681 tmp = gen_reg_rtx (V2DFmode);
32682 if (elt < 2)
32683 emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
32684 else
32685 emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
32686 ix86_expand_vector_extract (false, target, tmp, elt & 1);
32687 return;
32688 }
32689 break;
32690
32691 case V32QImode:
32692 if (TARGET_AVX)
32693 {
32694 tmp = gen_reg_rtx (V16QImode);
32695 if (elt < 16)
32696 emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
32697 else
32698 emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
32699 ix86_expand_vector_extract (false, target, tmp, elt & 15);
32700 return;
32701 }
32702 break;
32703
32704 case V16HImode:
32705 if (TARGET_AVX)
32706 {
32707 tmp = gen_reg_rtx (V8HImode);
32708 if (elt < 8)
32709 emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
32710 else
32711 emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
32712 ix86_expand_vector_extract (false, target, tmp, elt & 7);
32713 return;
32714 }
32715 break;
32716
32717 case V8SImode:
32718 if (TARGET_AVX)
32719 {
32720 tmp = gen_reg_rtx (V4SImode);
32721 if (elt < 4)
32722 emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
32723 else
32724 emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
32725 ix86_expand_vector_extract (false, target, tmp, elt & 3);
32726 return;
32727 }
32728 break;
32729
32730 case V4DImode:
32731 if (TARGET_AVX)
32732 {
32733 tmp = gen_reg_rtx (V2DImode);
32734 if (elt < 2)
32735 emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
32736 else
32737 emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
32738 ix86_expand_vector_extract (false, target, tmp, elt & 1);
32739 return;
32740 }
32741 break;
32742
32743 case V8QImode:
32744 /* ??? Could extract the appropriate HImode element and shift. */
32745 default:
32746 break;
32747 }
32748
32749 if (use_vec_extr)
32750 {
32751 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
32752 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
32753
32754 /* Let the rtl optimizers know about the zero extension performed. */
32755 if (inner_mode == QImode || inner_mode == HImode)
32756 {
32757 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
32758 target = gen_lowpart (SImode, target);
32759 }
32760
32761 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
32762 }
32763 else
32764 {
32765 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
32766
32767 emit_move_insn (mem, vec);
32768
32769 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
32770 emit_move_insn (target, tmp);
32771 }
32772 }
32773
32774 /* Expand a vector reduction. FN is the binary pattern to reduce;
32775 DEST is the destination; IN is the input vector. */
32776
32777 void
32778 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
32779 {
32780 rtx tmp1, tmp2, tmp3, tmp4, tmp5;
32781 enum machine_mode mode = GET_MODE (in);
32782 int i;
32783
32784 tmp1 = gen_reg_rtx (mode);
32785 tmp2 = gen_reg_rtx (mode);
32786 tmp3 = gen_reg_rtx (mode);
32787
32788 switch (mode)
32789 {
32790 case V4SFmode:
32791 emit_insn (gen_sse_movhlps (tmp1, in, in));
32792 emit_insn (fn (tmp2, tmp1, in));
32793 emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
32794 const1_rtx, const1_rtx,
32795 GEN_INT (1+4), GEN_INT (1+4)));
32796 break;
32797 case V8SFmode:
32798 tmp4 = gen_reg_rtx (mode);
32799 tmp5 = gen_reg_rtx (mode);
32800 emit_insn (gen_avx_vperm2f128v8sf3 (tmp4, in, in, const1_rtx));
32801 emit_insn (fn (tmp5, tmp4, in));
32802 emit_insn (gen_avx_shufps256 (tmp1, tmp5, tmp5, GEN_INT (2+12)));
32803 emit_insn (fn (tmp2, tmp1, tmp5));
32804 emit_insn (gen_avx_shufps256 (tmp3, tmp2, tmp2, const1_rtx));
32805 break;
32806 case V4DFmode:
32807 emit_insn (gen_avx_vperm2f128v4df3 (tmp1, in, in, const1_rtx));
32808 emit_insn (fn (tmp2, tmp1, in));
32809 emit_insn (gen_avx_shufpd256 (tmp3, tmp2, tmp2, const1_rtx));
32810 break;
32811 case V32QImode:
32812 case V16HImode:
32813 case V8SImode:
32814 case V4DImode:
32815 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode, tmp1),
32816 gen_lowpart (V4DImode, in),
32817 gen_lowpart (V4DImode, in),
32818 const1_rtx));
32819 tmp4 = in;
32820 tmp5 = tmp1;
32821 for (i = 64; i >= GET_MODE_BITSIZE (GET_MODE_INNER (mode)); i >>= 1)
32822 {
32823 if (i != 64)
32824 {
32825 tmp2 = gen_reg_rtx (mode);
32826 tmp3 = gen_reg_rtx (mode);
32827 }
32828 emit_insn (fn (tmp2, tmp4, tmp5));
32829 emit_insn (gen_avx2_lshrv2ti3 (gen_lowpart (V2TImode, tmp3),
32830 gen_lowpart (V2TImode, tmp2),
32831 GEN_INT (i)));
32832 tmp4 = tmp2;
32833 tmp5 = tmp3;
32834 }
32835 break;
32836 default:
32837 gcc_unreachable ();
32838 }
32839 emit_insn (fn (dest, tmp2, tmp3));
32840 }
32841 \f
32842 /* Target hook for scalar_mode_supported_p. */
32843 static bool
32844 ix86_scalar_mode_supported_p (enum machine_mode mode)
32845 {
32846 if (DECIMAL_FLOAT_MODE_P (mode))
32847 return default_decimal_float_supported_p ();
32848 else if (mode == TFmode)
32849 return true;
32850 else
32851 return default_scalar_mode_supported_p (mode);
32852 }
32853
32854 /* Implements target hook vector_mode_supported_p. */
32855 static bool
32856 ix86_vector_mode_supported_p (enum machine_mode mode)
32857 {
32858 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
32859 return true;
32860 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
32861 return true;
32862 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
32863 return true;
32864 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
32865 return true;
32866 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
32867 return true;
32868 return false;
32869 }
32870
32871 /* Target hook for c_mode_for_suffix. */
32872 static enum machine_mode
32873 ix86_c_mode_for_suffix (char suffix)
32874 {
32875 if (suffix == 'q')
32876 return TFmode;
32877 if (suffix == 'w')
32878 return XFmode;
32879
32880 return VOIDmode;
32881 }
32882
32883 /* Worker function for TARGET_MD_ASM_CLOBBERS.
32884
32885 We do this in the new i386 backend to maintain source compatibility
32886 with the old cc0-based compiler. */
32887
32888 static tree
32889 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
32890 tree inputs ATTRIBUTE_UNUSED,
32891 tree clobbers)
32892 {
32893 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
32894 clobbers);
32895 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
32896 clobbers);
32897 return clobbers;
32898 }
32899
32900 /* Implements target vector targetm.asm.encode_section_info. */
32901
32902 static void ATTRIBUTE_UNUSED
32903 ix86_encode_section_info (tree decl, rtx rtl, int first)
32904 {
32905 default_encode_section_info (decl, rtl, first);
32906
32907 if (TREE_CODE (decl) == VAR_DECL
32908 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
32909 && ix86_in_large_data_p (decl))
32910 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
32911 }
32912
32913 /* Worker function for REVERSE_CONDITION. */
32914
32915 enum rtx_code
32916 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
32917 {
32918 return (mode != CCFPmode && mode != CCFPUmode
32919 ? reverse_condition (code)
32920 : reverse_condition_maybe_unordered (code));
32921 }
32922
32923 /* Output code to perform an x87 FP register move, from OPERANDS[1]
32924 to OPERANDS[0]. */
32925
32926 const char *
32927 output_387_reg_move (rtx insn, rtx *operands)
32928 {
32929 if (REG_P (operands[0]))
32930 {
32931 if (REG_P (operands[1])
32932 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
32933 {
32934 if (REGNO (operands[0]) == FIRST_STACK_REG)
32935 return output_387_ffreep (operands, 0);
32936 return "fstp\t%y0";
32937 }
32938 if (STACK_TOP_P (operands[0]))
32939 return "fld%Z1\t%y1";
32940 return "fst\t%y0";
32941 }
32942 else if (MEM_P (operands[0]))
32943 {
32944 gcc_assert (REG_P (operands[1]));
32945 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
32946 return "fstp%Z0\t%y0";
32947 else
32948 {
32949 /* There is no non-popping store to memory for XFmode.
32950 So if we need one, follow the store with a load. */
32951 if (GET_MODE (operands[0]) == XFmode)
32952 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
32953 else
32954 return "fst%Z0\t%y0";
32955 }
32956 }
32957 else
32958 gcc_unreachable();
32959 }
32960
32961 /* Output code to perform a conditional jump to LABEL, if C2 flag in
32962 FP status register is set. */
32963
32964 void
32965 ix86_emit_fp_unordered_jump (rtx label)
32966 {
32967 rtx reg = gen_reg_rtx (HImode);
32968 rtx temp;
32969
32970 emit_insn (gen_x86_fnstsw_1 (reg));
32971
32972 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
32973 {
32974 emit_insn (gen_x86_sahf_1 (reg));
32975
32976 temp = gen_rtx_REG (CCmode, FLAGS_REG);
32977 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
32978 }
32979 else
32980 {
32981 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
32982
32983 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
32984 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
32985 }
32986
32987 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
32988 gen_rtx_LABEL_REF (VOIDmode, label),
32989 pc_rtx);
32990 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
32991
32992 emit_jump_insn (temp);
32993 predict_jump (REG_BR_PROB_BASE * 10 / 100);
32994 }
32995
32996 /* Output code to perform a log1p XFmode calculation. */
32997
32998 void ix86_emit_i387_log1p (rtx op0, rtx op1)
32999 {
33000 rtx label1 = gen_label_rtx ();
33001 rtx label2 = gen_label_rtx ();
33002
33003 rtx tmp = gen_reg_rtx (XFmode);
33004 rtx tmp2 = gen_reg_rtx (XFmode);
33005 rtx test;
33006
33007 emit_insn (gen_absxf2 (tmp, op1));
33008 test = gen_rtx_GE (VOIDmode, tmp,
33009 CONST_DOUBLE_FROM_REAL_VALUE (
33010 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
33011 XFmode));
33012 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
33013
33014 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
33015 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
33016 emit_jump (label2);
33017
33018 emit_label (label1);
33019 emit_move_insn (tmp, CONST1_RTX (XFmode));
33020 emit_insn (gen_addxf3 (tmp, op1, tmp));
33021 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
33022 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
33023
33024 emit_label (label2);
33025 }
33026
33027 /* Emit code for round calculation. */
33028 void ix86_emit_i387_round (rtx op0, rtx op1)
33029 {
33030 enum machine_mode inmode = GET_MODE (op1);
33031 enum machine_mode outmode = GET_MODE (op0);
33032 rtx e1, e2, res, tmp, tmp1, half;
33033 rtx scratch = gen_reg_rtx (HImode);
33034 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
33035 rtx jump_label = gen_label_rtx ();
33036 rtx insn;
33037 rtx (*gen_abs) (rtx, rtx);
33038 rtx (*gen_neg) (rtx, rtx);
33039
33040 switch (inmode)
33041 {
33042 case SFmode:
33043 gen_abs = gen_abssf2;
33044 break;
33045 case DFmode:
33046 gen_abs = gen_absdf2;
33047 break;
33048 case XFmode:
33049 gen_abs = gen_absxf2;
33050 break;
33051 default:
33052 gcc_unreachable ();
33053 }
33054
33055 switch (outmode)
33056 {
33057 case SFmode:
33058 gen_neg = gen_negsf2;
33059 break;
33060 case DFmode:
33061 gen_neg = gen_negdf2;
33062 break;
33063 case XFmode:
33064 gen_neg = gen_negxf2;
33065 break;
33066 case HImode:
33067 gen_neg = gen_neghi2;
33068 break;
33069 case SImode:
33070 gen_neg = gen_negsi2;
33071 break;
33072 case DImode:
33073 gen_neg = gen_negdi2;
33074 break;
33075 default:
33076 gcc_unreachable ();
33077 }
33078
33079 e1 = gen_reg_rtx (inmode);
33080 e2 = gen_reg_rtx (inmode);
33081 res = gen_reg_rtx (outmode);
33082
33083 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, inmode);
33084
33085 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
33086
33087 /* scratch = fxam(op1) */
33088 emit_insn (gen_rtx_SET (VOIDmode, scratch,
33089 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
33090 UNSPEC_FXAM)));
33091 /* e1 = fabs(op1) */
33092 emit_insn (gen_abs (e1, op1));
33093
33094 /* e2 = e1 + 0.5 */
33095 half = force_reg (inmode, half);
33096 emit_insn (gen_rtx_SET (VOIDmode, e2,
33097 gen_rtx_PLUS (inmode, e1, half)));
33098
33099 /* res = floor(e2) */
33100 if (inmode != XFmode)
33101 {
33102 tmp1 = gen_reg_rtx (XFmode);
33103
33104 emit_insn (gen_rtx_SET (VOIDmode, tmp1,
33105 gen_rtx_FLOAT_EXTEND (XFmode, e2)));
33106 }
33107 else
33108 tmp1 = e2;
33109
33110 switch (outmode)
33111 {
33112 case SFmode:
33113 case DFmode:
33114 {
33115 rtx tmp0 = gen_reg_rtx (XFmode);
33116
33117 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
33118
33119 emit_insn (gen_rtx_SET (VOIDmode, res,
33120 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
33121 UNSPEC_TRUNC_NOOP)));
33122 }
33123 break;
33124 case XFmode:
33125 emit_insn (gen_frndintxf2_floor (res, tmp1));
33126 break;
33127 case HImode:
33128 emit_insn (gen_lfloorxfhi2 (res, tmp1));
33129 break;
33130 case SImode:
33131 emit_insn (gen_lfloorxfsi2 (res, tmp1));
33132 break;
33133 case DImode:
33134 emit_insn (gen_lfloorxfdi2 (res, tmp1));
33135 break;
33136 default:
33137 gcc_unreachable ();
33138 }
33139
33140 /* flags = signbit(a) */
33141 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
33142
33143 /* if (flags) then res = -res */
33144 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
33145 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
33146 gen_rtx_LABEL_REF (VOIDmode, jump_label),
33147 pc_rtx);
33148 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
33149 predict_jump (REG_BR_PROB_BASE * 50 / 100);
33150 JUMP_LABEL (insn) = jump_label;
33151
33152 emit_insn (gen_neg (res, res));
33153
33154 emit_label (jump_label);
33155 LABEL_NUSES (jump_label) = 1;
33156
33157 emit_move_insn (op0, res);
33158 }
33159
33160 /* Output code to perform a Newton-Rhapson approximation of a single precision
33161 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
33162
33163 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
33164 {
33165 rtx x0, x1, e0, e1;
33166
33167 x0 = gen_reg_rtx (mode);
33168 e0 = gen_reg_rtx (mode);
33169 e1 = gen_reg_rtx (mode);
33170 x1 = gen_reg_rtx (mode);
33171
33172 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
33173
33174 /* x0 = rcp(b) estimate */
33175 emit_insn (gen_rtx_SET (VOIDmode, x0,
33176 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
33177 UNSPEC_RCP)));
33178 /* e0 = x0 * b */
33179 emit_insn (gen_rtx_SET (VOIDmode, e0,
33180 gen_rtx_MULT (mode, x0, b)));
33181
33182 /* e0 = x0 * e0 */
33183 emit_insn (gen_rtx_SET (VOIDmode, e0,
33184 gen_rtx_MULT (mode, x0, e0)));
33185
33186 /* e1 = x0 + x0 */
33187 emit_insn (gen_rtx_SET (VOIDmode, e1,
33188 gen_rtx_PLUS (mode, x0, x0)));
33189
33190 /* x1 = e1 - e0 */
33191 emit_insn (gen_rtx_SET (VOIDmode, x1,
33192 gen_rtx_MINUS (mode, e1, e0)));
33193
33194 /* res = a * x1 */
33195 emit_insn (gen_rtx_SET (VOIDmode, res,
33196 gen_rtx_MULT (mode, a, x1)));
33197 }
33198
33199 /* Output code to perform a Newton-Rhapson approximation of a
33200 single precision floating point [reciprocal] square root. */
33201
33202 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
33203 bool recip)
33204 {
33205 rtx x0, e0, e1, e2, e3, mthree, mhalf;
33206 REAL_VALUE_TYPE r;
33207
33208 x0 = gen_reg_rtx (mode);
33209 e0 = gen_reg_rtx (mode);
33210 e1 = gen_reg_rtx (mode);
33211 e2 = gen_reg_rtx (mode);
33212 e3 = gen_reg_rtx (mode);
33213
33214 real_from_integer (&r, VOIDmode, -3, -1, 0);
33215 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
33216
33217 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
33218 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
33219
33220 if (VECTOR_MODE_P (mode))
33221 {
33222 mthree = ix86_build_const_vector (mode, true, mthree);
33223 mhalf = ix86_build_const_vector (mode, true, mhalf);
33224 }
33225
33226 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
33227 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
33228
33229 /* x0 = rsqrt(a) estimate */
33230 emit_insn (gen_rtx_SET (VOIDmode, x0,
33231 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
33232 UNSPEC_RSQRT)));
33233
33234 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
33235 if (!recip)
33236 {
33237 rtx zero, mask;
33238
33239 zero = gen_reg_rtx (mode);
33240 mask = gen_reg_rtx (mode);
33241
33242 zero = force_reg (mode, CONST0_RTX(mode));
33243 emit_insn (gen_rtx_SET (VOIDmode, mask,
33244 gen_rtx_NE (mode, zero, a)));
33245
33246 emit_insn (gen_rtx_SET (VOIDmode, x0,
33247 gen_rtx_AND (mode, x0, mask)));
33248 }
33249
33250 /* e0 = x0 * a */
33251 emit_insn (gen_rtx_SET (VOIDmode, e0,
33252 gen_rtx_MULT (mode, x0, a)));
33253 /* e1 = e0 * x0 */
33254 emit_insn (gen_rtx_SET (VOIDmode, e1,
33255 gen_rtx_MULT (mode, e0, x0)));
33256
33257 /* e2 = e1 - 3. */
33258 mthree = force_reg (mode, mthree);
33259 emit_insn (gen_rtx_SET (VOIDmode, e2,
33260 gen_rtx_PLUS (mode, e1, mthree)));
33261
33262 mhalf = force_reg (mode, mhalf);
33263 if (recip)
33264 /* e3 = -.5 * x0 */
33265 emit_insn (gen_rtx_SET (VOIDmode, e3,
33266 gen_rtx_MULT (mode, x0, mhalf)));
33267 else
33268 /* e3 = -.5 * e0 */
33269 emit_insn (gen_rtx_SET (VOIDmode, e3,
33270 gen_rtx_MULT (mode, e0, mhalf)));
33271 /* ret = e2 * e3 */
33272 emit_insn (gen_rtx_SET (VOIDmode, res,
33273 gen_rtx_MULT (mode, e2, e3)));
33274 }
33275
33276 #ifdef TARGET_SOLARIS
33277 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
33278
33279 static void
33280 i386_solaris_elf_named_section (const char *name, unsigned int flags,
33281 tree decl)
33282 {
33283 /* With Binutils 2.15, the "@unwind" marker must be specified on
33284 every occurrence of the ".eh_frame" section, not just the first
33285 one. */
33286 if (TARGET_64BIT
33287 && strcmp (name, ".eh_frame") == 0)
33288 {
33289 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
33290 flags & SECTION_WRITE ? "aw" : "a");
33291 return;
33292 }
33293
33294 #ifndef USE_GAS
33295 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
33296 {
33297 solaris_elf_asm_comdat_section (name, flags, decl);
33298 return;
33299 }
33300 #endif
33301
33302 default_elf_asm_named_section (name, flags, decl);
33303 }
33304 #endif /* TARGET_SOLARIS */
33305
33306 /* Return the mangling of TYPE if it is an extended fundamental type. */
33307
33308 static const char *
33309 ix86_mangle_type (const_tree type)
33310 {
33311 type = TYPE_MAIN_VARIANT (type);
33312
33313 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
33314 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
33315 return NULL;
33316
33317 switch (TYPE_MODE (type))
33318 {
33319 case TFmode:
33320 /* __float128 is "g". */
33321 return "g";
33322 case XFmode:
33323 /* "long double" or __float80 is "e". */
33324 return "e";
33325 default:
33326 return NULL;
33327 }
33328 }
33329
33330 /* For 32-bit code we can save PIC register setup by using
33331 __stack_chk_fail_local hidden function instead of calling
33332 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
33333 register, so it is better to call __stack_chk_fail directly. */
33334
33335 static tree ATTRIBUTE_UNUSED
33336 ix86_stack_protect_fail (void)
33337 {
33338 return TARGET_64BIT
33339 ? default_external_stack_protect_fail ()
33340 : default_hidden_stack_protect_fail ();
33341 }
33342
33343 /* Select a format to encode pointers in exception handling data. CODE
33344 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
33345 true if the symbol may be affected by dynamic relocations.
33346
33347 ??? All x86 object file formats are capable of representing this.
33348 After all, the relocation needed is the same as for the call insn.
33349 Whether or not a particular assembler allows us to enter such, I
33350 guess we'll have to see. */
33351 int
33352 asm_preferred_eh_data_format (int code, int global)
33353 {
33354 if (flag_pic)
33355 {
33356 int type = DW_EH_PE_sdata8;
33357 if (!TARGET_64BIT
33358 || ix86_cmodel == CM_SMALL_PIC
33359 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
33360 type = DW_EH_PE_sdata4;
33361 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
33362 }
33363 if (ix86_cmodel == CM_SMALL
33364 || (ix86_cmodel == CM_MEDIUM && code))
33365 return DW_EH_PE_udata4;
33366 return DW_EH_PE_absptr;
33367 }
33368 \f
33369 /* Expand copysign from SIGN to the positive value ABS_VALUE
33370 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
33371 the sign-bit. */
33372 static void
33373 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
33374 {
33375 enum machine_mode mode = GET_MODE (sign);
33376 rtx sgn = gen_reg_rtx (mode);
33377 if (mask == NULL_RTX)
33378 {
33379 enum machine_mode vmode;
33380
33381 if (mode == SFmode)
33382 vmode = V4SFmode;
33383 else if (mode == DFmode)
33384 vmode = V2DFmode;
33385 else
33386 vmode = mode;
33387
33388 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
33389 if (!VECTOR_MODE_P (mode))
33390 {
33391 /* We need to generate a scalar mode mask in this case. */
33392 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
33393 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
33394 mask = gen_reg_rtx (mode);
33395 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
33396 }
33397 }
33398 else
33399 mask = gen_rtx_NOT (mode, mask);
33400 emit_insn (gen_rtx_SET (VOIDmode, sgn,
33401 gen_rtx_AND (mode, mask, sign)));
33402 emit_insn (gen_rtx_SET (VOIDmode, result,
33403 gen_rtx_IOR (mode, abs_value, sgn)));
33404 }
33405
33406 /* Expand fabs (OP0) and return a new rtx that holds the result. The
33407 mask for masking out the sign-bit is stored in *SMASK, if that is
33408 non-null. */
33409 static rtx
33410 ix86_expand_sse_fabs (rtx op0, rtx *smask)
33411 {
33412 enum machine_mode vmode, mode = GET_MODE (op0);
33413 rtx xa, mask;
33414
33415 xa = gen_reg_rtx (mode);
33416 if (mode == SFmode)
33417 vmode = V4SFmode;
33418 else if (mode == DFmode)
33419 vmode = V2DFmode;
33420 else
33421 vmode = mode;
33422 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
33423 if (!VECTOR_MODE_P (mode))
33424 {
33425 /* We need to generate a scalar mode mask in this case. */
33426 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
33427 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
33428 mask = gen_reg_rtx (mode);
33429 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
33430 }
33431 emit_insn (gen_rtx_SET (VOIDmode, xa,
33432 gen_rtx_AND (mode, op0, mask)));
33433
33434 if (smask)
33435 *smask = mask;
33436
33437 return xa;
33438 }
33439
33440 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
33441 swapping the operands if SWAP_OPERANDS is true. The expanded
33442 code is a forward jump to a newly created label in case the
33443 comparison is true. The generated label rtx is returned. */
33444 static rtx
33445 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
33446 bool swap_operands)
33447 {
33448 rtx label, tmp;
33449
33450 if (swap_operands)
33451 {
33452 tmp = op0;
33453 op0 = op1;
33454 op1 = tmp;
33455 }
33456
33457 label = gen_label_rtx ();
33458 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
33459 emit_insn (gen_rtx_SET (VOIDmode, tmp,
33460 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
33461 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
33462 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
33463 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
33464 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
33465 JUMP_LABEL (tmp) = label;
33466
33467 return label;
33468 }
33469
33470 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
33471 using comparison code CODE. Operands are swapped for the comparison if
33472 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
33473 static rtx
33474 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
33475 bool swap_operands)
33476 {
33477 rtx (*insn)(rtx, rtx, rtx, rtx);
33478 enum machine_mode mode = GET_MODE (op0);
33479 rtx mask = gen_reg_rtx (mode);
33480
33481 if (swap_operands)
33482 {
33483 rtx tmp = op0;
33484 op0 = op1;
33485 op1 = tmp;
33486 }
33487
33488 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
33489
33490 emit_insn (insn (mask, op0, op1,
33491 gen_rtx_fmt_ee (code, mode, op0, op1)));
33492 return mask;
33493 }
33494
33495 /* Generate and return a rtx of mode MODE for 2**n where n is the number
33496 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
33497 static rtx
33498 ix86_gen_TWO52 (enum machine_mode mode)
33499 {
33500 REAL_VALUE_TYPE TWO52r;
33501 rtx TWO52;
33502
33503 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
33504 TWO52 = const_double_from_real_value (TWO52r, mode);
33505 TWO52 = force_reg (mode, TWO52);
33506
33507 return TWO52;
33508 }
33509
33510 /* Expand SSE sequence for computing lround from OP1 storing
33511 into OP0. */
33512 void
33513 ix86_expand_lround (rtx op0, rtx op1)
33514 {
33515 /* C code for the stuff we're doing below:
33516 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
33517 return (long)tmp;
33518 */
33519 enum machine_mode mode = GET_MODE (op1);
33520 const struct real_format *fmt;
33521 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
33522 rtx adj;
33523
33524 /* load nextafter (0.5, 0.0) */
33525 fmt = REAL_MODE_FORMAT (mode);
33526 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
33527 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
33528
33529 /* adj = copysign (0.5, op1) */
33530 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
33531 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
33532
33533 /* adj = op1 + adj */
33534 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
33535
33536 /* op0 = (imode)adj */
33537 expand_fix (op0, adj, 0);
33538 }
33539
33540 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
33541 into OPERAND0. */
33542 void
33543 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
33544 {
33545 /* C code for the stuff we're doing below (for do_floor):
33546 xi = (long)op1;
33547 xi -= (double)xi > op1 ? 1 : 0;
33548 return xi;
33549 */
33550 enum machine_mode fmode = GET_MODE (op1);
33551 enum machine_mode imode = GET_MODE (op0);
33552 rtx ireg, freg, label, tmp;
33553
33554 /* reg = (long)op1 */
33555 ireg = gen_reg_rtx (imode);
33556 expand_fix (ireg, op1, 0);
33557
33558 /* freg = (double)reg */
33559 freg = gen_reg_rtx (fmode);
33560 expand_float (freg, ireg, 0);
33561
33562 /* ireg = (freg > op1) ? ireg - 1 : ireg */
33563 label = ix86_expand_sse_compare_and_jump (UNLE,
33564 freg, op1, !do_floor);
33565 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
33566 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
33567 emit_move_insn (ireg, tmp);
33568
33569 emit_label (label);
33570 LABEL_NUSES (label) = 1;
33571
33572 emit_move_insn (op0, ireg);
33573 }
33574
33575 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
33576 result in OPERAND0. */
33577 void
33578 ix86_expand_rint (rtx operand0, rtx operand1)
33579 {
33580 /* C code for the stuff we're doing below:
33581 xa = fabs (operand1);
33582 if (!isless (xa, 2**52))
33583 return operand1;
33584 xa = xa + 2**52 - 2**52;
33585 return copysign (xa, operand1);
33586 */
33587 enum machine_mode mode = GET_MODE (operand0);
33588 rtx res, xa, label, TWO52, mask;
33589
33590 res = gen_reg_rtx (mode);
33591 emit_move_insn (res, operand1);
33592
33593 /* xa = abs (operand1) */
33594 xa = ix86_expand_sse_fabs (res, &mask);
33595
33596 /* if (!isless (xa, TWO52)) goto label; */
33597 TWO52 = ix86_gen_TWO52 (mode);
33598 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
33599
33600 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
33601 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
33602
33603 ix86_sse_copysign_to_positive (res, xa, res, mask);
33604
33605 emit_label (label);
33606 LABEL_NUSES (label) = 1;
33607
33608 emit_move_insn (operand0, res);
33609 }
33610
33611 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
33612 into OPERAND0. */
33613 void
33614 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
33615 {
33616 /* C code for the stuff we expand below.
33617 double xa = fabs (x), x2;
33618 if (!isless (xa, TWO52))
33619 return x;
33620 xa = xa + TWO52 - TWO52;
33621 x2 = copysign (xa, x);
33622 Compensate. Floor:
33623 if (x2 > x)
33624 x2 -= 1;
33625 Compensate. Ceil:
33626 if (x2 < x)
33627 x2 -= -1;
33628 return x2;
33629 */
33630 enum machine_mode mode = GET_MODE (operand0);
33631 rtx xa, TWO52, tmp, label, one, res, mask;
33632
33633 TWO52 = ix86_gen_TWO52 (mode);
33634
33635 /* Temporary for holding the result, initialized to the input
33636 operand to ease control flow. */
33637 res = gen_reg_rtx (mode);
33638 emit_move_insn (res, operand1);
33639
33640 /* xa = abs (operand1) */
33641 xa = ix86_expand_sse_fabs (res, &mask);
33642
33643 /* if (!isless (xa, TWO52)) goto label; */
33644 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
33645
33646 /* xa = xa + TWO52 - TWO52; */
33647 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
33648 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
33649
33650 /* xa = copysign (xa, operand1) */
33651 ix86_sse_copysign_to_positive (xa, xa, res, mask);
33652
33653 /* generate 1.0 or -1.0 */
33654 one = force_reg (mode,
33655 const_double_from_real_value (do_floor
33656 ? dconst1 : dconstm1, mode));
33657
33658 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
33659 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
33660 emit_insn (gen_rtx_SET (VOIDmode, tmp,
33661 gen_rtx_AND (mode, one, tmp)));
33662 /* We always need to subtract here to preserve signed zero. */
33663 tmp = expand_simple_binop (mode, MINUS,
33664 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
33665 emit_move_insn (res, tmp);
33666
33667 emit_label (label);
33668 LABEL_NUSES (label) = 1;
33669
33670 emit_move_insn (operand0, res);
33671 }
33672
33673 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
33674 into OPERAND0. */
33675 void
33676 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
33677 {
33678 /* C code for the stuff we expand below.
33679 double xa = fabs (x), x2;
33680 if (!isless (xa, TWO52))
33681 return x;
33682 x2 = (double)(long)x;
33683 Compensate. Floor:
33684 if (x2 > x)
33685 x2 -= 1;
33686 Compensate. Ceil:
33687 if (x2 < x)
33688 x2 += 1;
33689 if (HONOR_SIGNED_ZEROS (mode))
33690 return copysign (x2, x);
33691 return x2;
33692 */
33693 enum machine_mode mode = GET_MODE (operand0);
33694 rtx xa, xi, TWO52, tmp, label, one, res, mask;
33695
33696 TWO52 = ix86_gen_TWO52 (mode);
33697
33698 /* Temporary for holding the result, initialized to the input
33699 operand to ease control flow. */
33700 res = gen_reg_rtx (mode);
33701 emit_move_insn (res, operand1);
33702
33703 /* xa = abs (operand1) */
33704 xa = ix86_expand_sse_fabs (res, &mask);
33705
33706 /* if (!isless (xa, TWO52)) goto label; */
33707 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
33708
33709 /* xa = (double)(long)x */
33710 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
33711 expand_fix (xi, res, 0);
33712 expand_float (xa, xi, 0);
33713
33714 /* generate 1.0 */
33715 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
33716
33717 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
33718 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
33719 emit_insn (gen_rtx_SET (VOIDmode, tmp,
33720 gen_rtx_AND (mode, one, tmp)));
33721 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
33722 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
33723 emit_move_insn (res, tmp);
33724
33725 if (HONOR_SIGNED_ZEROS (mode))
33726 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
33727
33728 emit_label (label);
33729 LABEL_NUSES (label) = 1;
33730
33731 emit_move_insn (operand0, res);
33732 }
33733
33734 /* Expand SSE sequence for computing round from OPERAND1 storing
33735 into OPERAND0. Sequence that works without relying on DImode truncation
33736 via cvttsd2siq that is only available on 64bit targets. */
33737 void
33738 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
33739 {
33740 /* C code for the stuff we expand below.
33741 double xa = fabs (x), xa2, x2;
33742 if (!isless (xa, TWO52))
33743 return x;
33744 Using the absolute value and copying back sign makes
33745 -0.0 -> -0.0 correct.
33746 xa2 = xa + TWO52 - TWO52;
33747 Compensate.
33748 dxa = xa2 - xa;
33749 if (dxa <= -0.5)
33750 xa2 += 1;
33751 else if (dxa > 0.5)
33752 xa2 -= 1;
33753 x2 = copysign (xa2, x);
33754 return x2;
33755 */
33756 enum machine_mode mode = GET_MODE (operand0);
33757 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
33758
33759 TWO52 = ix86_gen_TWO52 (mode);
33760
33761 /* Temporary for holding the result, initialized to the input
33762 operand to ease control flow. */
33763 res = gen_reg_rtx (mode);
33764 emit_move_insn (res, operand1);
33765
33766 /* xa = abs (operand1) */
33767 xa = ix86_expand_sse_fabs (res, &mask);
33768
33769 /* if (!isless (xa, TWO52)) goto label; */
33770 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
33771
33772 /* xa2 = xa + TWO52 - TWO52; */
33773 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
33774 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
33775
33776 /* dxa = xa2 - xa; */
33777 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
33778
33779 /* generate 0.5, 1.0 and -0.5 */
33780 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
33781 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
33782 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
33783 0, OPTAB_DIRECT);
33784
33785 /* Compensate. */
33786 tmp = gen_reg_rtx (mode);
33787 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
33788 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
33789 emit_insn (gen_rtx_SET (VOIDmode, tmp,
33790 gen_rtx_AND (mode, one, tmp)));
33791 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
33792 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
33793 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
33794 emit_insn (gen_rtx_SET (VOIDmode, tmp,
33795 gen_rtx_AND (mode, one, tmp)));
33796 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
33797
33798 /* res = copysign (xa2, operand1) */
33799 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
33800
33801 emit_label (label);
33802 LABEL_NUSES (label) = 1;
33803
33804 emit_move_insn (operand0, res);
33805 }
33806
33807 /* Expand SSE sequence for computing trunc from OPERAND1 storing
33808 into OPERAND0. */
33809 void
33810 ix86_expand_trunc (rtx operand0, rtx operand1)
33811 {
33812 /* C code for SSE variant we expand below.
33813 double xa = fabs (x), x2;
33814 if (!isless (xa, TWO52))
33815 return x;
33816 x2 = (double)(long)x;
33817 if (HONOR_SIGNED_ZEROS (mode))
33818 return copysign (x2, x);
33819 return x2;
33820 */
33821 enum machine_mode mode = GET_MODE (operand0);
33822 rtx xa, xi, TWO52, label, res, mask;
33823
33824 TWO52 = ix86_gen_TWO52 (mode);
33825
33826 /* Temporary for holding the result, initialized to the input
33827 operand to ease control flow. */
33828 res = gen_reg_rtx (mode);
33829 emit_move_insn (res, operand1);
33830
33831 /* xa = abs (operand1) */
33832 xa = ix86_expand_sse_fabs (res, &mask);
33833
33834 /* if (!isless (xa, TWO52)) goto label; */
33835 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
33836
33837 /* x = (double)(long)x */
33838 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
33839 expand_fix (xi, res, 0);
33840 expand_float (res, xi, 0);
33841
33842 if (HONOR_SIGNED_ZEROS (mode))
33843 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
33844
33845 emit_label (label);
33846 LABEL_NUSES (label) = 1;
33847
33848 emit_move_insn (operand0, res);
33849 }
33850
33851 /* Expand SSE sequence for computing trunc from OPERAND1 storing
33852 into OPERAND0. */
33853 void
33854 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
33855 {
33856 enum machine_mode mode = GET_MODE (operand0);
33857 rtx xa, mask, TWO52, label, one, res, smask, tmp;
33858
33859 /* C code for SSE variant we expand below.
33860 double xa = fabs (x), x2;
33861 if (!isless (xa, TWO52))
33862 return x;
33863 xa2 = xa + TWO52 - TWO52;
33864 Compensate:
33865 if (xa2 > xa)
33866 xa2 -= 1.0;
33867 x2 = copysign (xa2, x);
33868 return x2;
33869 */
33870
33871 TWO52 = ix86_gen_TWO52 (mode);
33872
33873 /* Temporary for holding the result, initialized to the input
33874 operand to ease control flow. */
33875 res = gen_reg_rtx (mode);
33876 emit_move_insn (res, operand1);
33877
33878 /* xa = abs (operand1) */
33879 xa = ix86_expand_sse_fabs (res, &smask);
33880
33881 /* if (!isless (xa, TWO52)) goto label; */
33882 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
33883
33884 /* res = xa + TWO52 - TWO52; */
33885 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
33886 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
33887 emit_move_insn (res, tmp);
33888
33889 /* generate 1.0 */
33890 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
33891
33892 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
33893 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
33894 emit_insn (gen_rtx_SET (VOIDmode, mask,
33895 gen_rtx_AND (mode, mask, one)));
33896 tmp = expand_simple_binop (mode, MINUS,
33897 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
33898 emit_move_insn (res, tmp);
33899
33900 /* res = copysign (res, operand1) */
33901 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
33902
33903 emit_label (label);
33904 LABEL_NUSES (label) = 1;
33905
33906 emit_move_insn (operand0, res);
33907 }
33908
33909 /* Expand SSE sequence for computing round from OPERAND1 storing
33910 into OPERAND0. */
33911 void
33912 ix86_expand_round (rtx operand0, rtx operand1)
33913 {
33914 /* C code for the stuff we're doing below:
33915 double xa = fabs (x);
33916 if (!isless (xa, TWO52))
33917 return x;
33918 xa = (double)(long)(xa + nextafter (0.5, 0.0));
33919 return copysign (xa, x);
33920 */
33921 enum machine_mode mode = GET_MODE (operand0);
33922 rtx res, TWO52, xa, label, xi, half, mask;
33923 const struct real_format *fmt;
33924 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
33925
33926 /* Temporary for holding the result, initialized to the input
33927 operand to ease control flow. */
33928 res = gen_reg_rtx (mode);
33929 emit_move_insn (res, operand1);
33930
33931 TWO52 = ix86_gen_TWO52 (mode);
33932 xa = ix86_expand_sse_fabs (res, &mask);
33933 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
33934
33935 /* load nextafter (0.5, 0.0) */
33936 fmt = REAL_MODE_FORMAT (mode);
33937 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
33938 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
33939
33940 /* xa = xa + 0.5 */
33941 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
33942 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
33943
33944 /* xa = (double)(int64_t)xa */
33945 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
33946 expand_fix (xi, xa, 0);
33947 expand_float (xa, xi, 0);
33948
33949 /* res = copysign (xa, operand1) */
33950 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
33951
33952 emit_label (label);
33953 LABEL_NUSES (label) = 1;
33954
33955 emit_move_insn (operand0, res);
33956 }
33957
33958 /* Expand SSE sequence for computing round
33959 from OP1 storing into OP0 using sse4 round insn. */
33960 void
33961 ix86_expand_round_sse4 (rtx op0, rtx op1)
33962 {
33963 enum machine_mode mode = GET_MODE (op0);
33964 rtx e1, e2, res, half;
33965 const struct real_format *fmt;
33966 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
33967 rtx (*gen_copysign) (rtx, rtx, rtx);
33968 rtx (*gen_round) (rtx, rtx, rtx);
33969
33970 switch (mode)
33971 {
33972 case SFmode:
33973 gen_copysign = gen_copysignsf3;
33974 gen_round = gen_sse4_1_roundsf2;
33975 break;
33976 case DFmode:
33977 gen_copysign = gen_copysigndf3;
33978 gen_round = gen_sse4_1_rounddf2;
33979 break;
33980 default:
33981 gcc_unreachable ();
33982 }
33983
33984 /* round (a) = trunc (a + copysign (0.5, a)) */
33985
33986 /* load nextafter (0.5, 0.0) */
33987 fmt = REAL_MODE_FORMAT (mode);
33988 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
33989 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
33990 half = const_double_from_real_value (pred_half, mode);
33991
33992 /* e1 = copysign (0.5, op1) */
33993 e1 = gen_reg_rtx (mode);
33994 emit_insn (gen_copysign (e1, half, op1));
33995
33996 /* e2 = op1 + e1 */
33997 e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
33998
33999 /* res = trunc (e2) */
34000 res = gen_reg_rtx (mode);
34001 emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
34002
34003 emit_move_insn (op0, res);
34004 }
34005 \f
34006
34007 /* Table of valid machine attributes. */
34008 static const struct attribute_spec ix86_attribute_table[] =
34009 {
34010 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
34011 affects_type_identity } */
34012 /* Stdcall attribute says callee is responsible for popping arguments
34013 if they are not variable. */
34014 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
34015 true },
34016 /* Fastcall attribute says callee is responsible for popping arguments
34017 if they are not variable. */
34018 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
34019 true },
34020 /* Thiscall attribute says callee is responsible for popping arguments
34021 if they are not variable. */
34022 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
34023 true },
34024 /* Cdecl attribute says the callee is a normal C declaration */
34025 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
34026 true },
34027 /* Regparm attribute specifies how many integer arguments are to be
34028 passed in registers. */
34029 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
34030 true },
34031 /* Sseregparm attribute says we are using x86_64 calling conventions
34032 for FP arguments. */
34033 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
34034 true },
34035 /* force_align_arg_pointer says this function realigns the stack at entry. */
34036 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
34037 false, true, true, ix86_handle_cconv_attribute, false },
34038 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
34039 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
34040 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
34041 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
34042 false },
34043 #endif
34044 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
34045 false },
34046 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
34047 false },
34048 #ifdef SUBTARGET_ATTRIBUTE_TABLE
34049 SUBTARGET_ATTRIBUTE_TABLE,
34050 #endif
34051 /* ms_abi and sysv_abi calling convention function attributes. */
34052 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
34053 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
34054 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
34055 false },
34056 { "callee_pop_aggregate_return", 1, 1, false, true, true,
34057 ix86_handle_callee_pop_aggregate_return, true },
34058 /* End element. */
34059 { NULL, 0, 0, false, false, false, NULL, false }
34060 };
34061
34062 /* Implement targetm.vectorize.builtin_vectorization_cost. */
34063 static int
34064 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
34065 tree vectype ATTRIBUTE_UNUSED,
34066 int misalign ATTRIBUTE_UNUSED)
34067 {
34068 switch (type_of_cost)
34069 {
34070 case scalar_stmt:
34071 return ix86_cost->scalar_stmt_cost;
34072
34073 case scalar_load:
34074 return ix86_cost->scalar_load_cost;
34075
34076 case scalar_store:
34077 return ix86_cost->scalar_store_cost;
34078
34079 case vector_stmt:
34080 return ix86_cost->vec_stmt_cost;
34081
34082 case vector_load:
34083 return ix86_cost->vec_align_load_cost;
34084
34085 case vector_store:
34086 return ix86_cost->vec_store_cost;
34087
34088 case vec_to_scalar:
34089 return ix86_cost->vec_to_scalar_cost;
34090
34091 case scalar_to_vec:
34092 return ix86_cost->scalar_to_vec_cost;
34093
34094 case unaligned_load:
34095 case unaligned_store:
34096 return ix86_cost->vec_unalign_load_cost;
34097
34098 case cond_branch_taken:
34099 return ix86_cost->cond_taken_branch_cost;
34100
34101 case cond_branch_not_taken:
34102 return ix86_cost->cond_not_taken_branch_cost;
34103
34104 case vec_perm:
34105 return 1;
34106
34107 default:
34108 gcc_unreachable ();
34109 }
34110 }
34111
34112
34113 /* Implement targetm.vectorize.builtin_vec_perm. */
34114
34115 static tree
34116 ix86_vectorize_builtin_vec_perm (tree vec_type, tree *mask_type)
34117 {
34118 tree itype = TREE_TYPE (vec_type);
34119 bool u = TYPE_UNSIGNED (itype);
34120 enum machine_mode vmode = TYPE_MODE (vec_type);
34121 enum ix86_builtins fcode;
34122 bool ok = TARGET_SSE2;
34123
34124 switch (vmode)
34125 {
34126 case V4DFmode:
34127 ok = TARGET_AVX;
34128 fcode = IX86_BUILTIN_VEC_PERM_V4DF;
34129 goto get_di;
34130 case V2DFmode:
34131 fcode = IX86_BUILTIN_VEC_PERM_V2DF;
34132 get_di:
34133 itype = ix86_get_builtin_type (IX86_BT_DI);
34134 break;
34135
34136 case V8SFmode:
34137 ok = TARGET_AVX;
34138 fcode = IX86_BUILTIN_VEC_PERM_V8SF;
34139 goto get_si;
34140 case V4SFmode:
34141 ok = TARGET_SSE;
34142 fcode = IX86_BUILTIN_VEC_PERM_V4SF;
34143 get_si:
34144 itype = ix86_get_builtin_type (IX86_BT_SI);
34145 break;
34146
34147 case V2DImode:
34148 fcode = u ? IX86_BUILTIN_VEC_PERM_V2DI_U : IX86_BUILTIN_VEC_PERM_V2DI;
34149 break;
34150 case V4SImode:
34151 fcode = u ? IX86_BUILTIN_VEC_PERM_V4SI_U : IX86_BUILTIN_VEC_PERM_V4SI;
34152 break;
34153 case V8HImode:
34154 fcode = u ? IX86_BUILTIN_VEC_PERM_V8HI_U : IX86_BUILTIN_VEC_PERM_V8HI;
34155 break;
34156 case V16QImode:
34157 fcode = u ? IX86_BUILTIN_VEC_PERM_V16QI_U : IX86_BUILTIN_VEC_PERM_V16QI;
34158 break;
34159 default:
34160 ok = false;
34161 break;
34162 }
34163
34164 if (!ok)
34165 return NULL_TREE;
34166
34167 *mask_type = itype;
34168 return ix86_builtins[(int) fcode];
34169 }
34170
34171 /* Return a vector mode with twice as many elements as VMODE. */
34172 /* ??? Consider moving this to a table generated by genmodes.c. */
34173
34174 static enum machine_mode
34175 doublesize_vector_mode (enum machine_mode vmode)
34176 {
34177 switch (vmode)
34178 {
34179 case V2SFmode: return V4SFmode;
34180 case V1DImode: return V2DImode;
34181 case V2SImode: return V4SImode;
34182 case V4HImode: return V8HImode;
34183 case V8QImode: return V16QImode;
34184
34185 case V2DFmode: return V4DFmode;
34186 case V4SFmode: return V8SFmode;
34187 case V2DImode: return V4DImode;
34188 case V4SImode: return V8SImode;
34189 case V8HImode: return V16HImode;
34190 case V16QImode: return V32QImode;
34191
34192 case V4DFmode: return V8DFmode;
34193 case V8SFmode: return V16SFmode;
34194 case V4DImode: return V8DImode;
34195 case V8SImode: return V16SImode;
34196 case V16HImode: return V32HImode;
34197 case V32QImode: return V64QImode;
34198
34199 default:
34200 gcc_unreachable ();
34201 }
34202 }
34203
34204 /* Construct (set target (vec_select op0 (parallel perm))) and
34205 return true if that's a valid instruction in the active ISA. */
34206
34207 static bool
34208 expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
34209 {
34210 rtx rperm[MAX_VECT_LEN], x;
34211 unsigned i;
34212
34213 for (i = 0; i < nelt; ++i)
34214 rperm[i] = GEN_INT (perm[i]);
34215
34216 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
34217 x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
34218 x = gen_rtx_SET (VOIDmode, target, x);
34219
34220 x = emit_insn (x);
34221 if (recog_memoized (x) < 0)
34222 {
34223 remove_insn (x);
34224 return false;
34225 }
34226 return true;
34227 }
34228
34229 /* Similar, but generate a vec_concat from op0 and op1 as well. */
34230
34231 static bool
34232 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
34233 const unsigned char *perm, unsigned nelt)
34234 {
34235 enum machine_mode v2mode;
34236 rtx x;
34237
34238 v2mode = doublesize_vector_mode (GET_MODE (op0));
34239 x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
34240 return expand_vselect (target, x, perm, nelt);
34241 }
34242
34243 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
34244 in terms of blendp[sd] / pblendw / pblendvb. */
34245
34246 static bool
34247 expand_vec_perm_blend (struct expand_vec_perm_d *d)
34248 {
34249 enum machine_mode vmode = d->vmode;
34250 unsigned i, mask, nelt = d->nelt;
34251 rtx target, op0, op1, x;
34252
34253 if (!TARGET_SSE4_1 || d->op0 == d->op1)
34254 return false;
34255 if (!(GET_MODE_SIZE (vmode) == 16 || vmode == V4DFmode || vmode == V8SFmode))
34256 return false;
34257
34258 /* This is a blend, not a permute. Elements must stay in their
34259 respective lanes. */
34260 for (i = 0; i < nelt; ++i)
34261 {
34262 unsigned e = d->perm[i];
34263 if (!(e == i || e == i + nelt))
34264 return false;
34265 }
34266
34267 if (d->testing_p)
34268 return true;
34269
34270 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
34271 decision should be extracted elsewhere, so that we only try that
34272 sequence once all budget==3 options have been tried. */
34273
34274 /* For bytes, see if bytes move in pairs so we can use pblendw with
34275 an immediate argument, rather than pblendvb with a vector argument. */
34276 if (vmode == V16QImode)
34277 {
34278 bool pblendw_ok = true;
34279 for (i = 0; i < 16 && pblendw_ok; i += 2)
34280 pblendw_ok = (d->perm[i] + 1 == d->perm[i + 1]);
34281
34282 if (!pblendw_ok)
34283 {
34284 rtx rperm[16], vperm;
34285
34286 for (i = 0; i < nelt; ++i)
34287 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
34288
34289 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
34290 vperm = force_reg (V16QImode, vperm);
34291
34292 emit_insn (gen_sse4_1_pblendvb (d->target, d->op0, d->op1, vperm));
34293 return true;
34294 }
34295 }
34296
34297 target = d->target;
34298 op0 = d->op0;
34299 op1 = d->op1;
34300 mask = 0;
34301
34302 switch (vmode)
34303 {
34304 case V4DFmode:
34305 case V8SFmode:
34306 case V2DFmode:
34307 case V4SFmode:
34308 case V8HImode:
34309 for (i = 0; i < nelt; ++i)
34310 mask |= (d->perm[i] >= nelt) << i;
34311 break;
34312
34313 case V2DImode:
34314 for (i = 0; i < 2; ++i)
34315 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
34316 goto do_subreg;
34317
34318 case V4SImode:
34319 for (i = 0; i < 4; ++i)
34320 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
34321 goto do_subreg;
34322
34323 case V16QImode:
34324 for (i = 0; i < 8; ++i)
34325 mask |= (d->perm[i * 2] >= 16) << i;
34326
34327 do_subreg:
34328 vmode = V8HImode;
34329 target = gen_lowpart (vmode, target);
34330 op0 = gen_lowpart (vmode, op0);
34331 op1 = gen_lowpart (vmode, op1);
34332 break;
34333
34334 default:
34335 gcc_unreachable ();
34336 }
34337
34338 /* This matches five different patterns with the different modes. */
34339 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
34340 x = gen_rtx_SET (VOIDmode, target, x);
34341 emit_insn (x);
34342
34343 return true;
34344 }
34345
34346 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
34347 in terms of the variable form of vpermilps.
34348
34349 Note that we will have already failed the immediate input vpermilps,
34350 which requires that the high and low part shuffle be identical; the
34351 variable form doesn't require that. */
34352
34353 static bool
34354 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
34355 {
34356 rtx rperm[8], vperm;
34357 unsigned i;
34358
34359 if (!TARGET_AVX || d->vmode != V8SFmode || d->op0 != d->op1)
34360 return false;
34361
34362 /* We can only permute within the 128-bit lane. */
34363 for (i = 0; i < 8; ++i)
34364 {
34365 unsigned e = d->perm[i];
34366 if (i < 4 ? e >= 4 : e < 4)
34367 return false;
34368 }
34369
34370 if (d->testing_p)
34371 return true;
34372
34373 for (i = 0; i < 8; ++i)
34374 {
34375 unsigned e = d->perm[i];
34376
34377 /* Within each 128-bit lane, the elements of op0 are numbered
34378 from 0 and the elements of op1 are numbered from 4. */
34379 if (e >= 8 + 4)
34380 e -= 8;
34381 else if (e >= 4)
34382 e -= 4;
34383
34384 rperm[i] = GEN_INT (e);
34385 }
34386
34387 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
34388 vperm = force_reg (V8SImode, vperm);
34389 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
34390
34391 return true;
34392 }
34393
34394 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
34395 in terms of pshufb or vpperm. */
34396
34397 static bool
34398 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
34399 {
34400 unsigned i, nelt, eltsz;
34401 rtx rperm[16], vperm, target, op0, op1;
34402
34403 if (!(d->op0 == d->op1 ? TARGET_SSSE3 : TARGET_XOP))
34404 return false;
34405 if (GET_MODE_SIZE (d->vmode) != 16)
34406 return false;
34407
34408 if (d->testing_p)
34409 return true;
34410
34411 nelt = d->nelt;
34412 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
34413
34414 for (i = 0; i < nelt; ++i)
34415 {
34416 unsigned j, e = d->perm[i];
34417 for (j = 0; j < eltsz; ++j)
34418 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
34419 }
34420
34421 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
34422 vperm = force_reg (V16QImode, vperm);
34423
34424 target = gen_lowpart (V16QImode, d->target);
34425 op0 = gen_lowpart (V16QImode, d->op0);
34426 if (d->op0 == d->op1)
34427 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
34428 else
34429 {
34430 op1 = gen_lowpart (V16QImode, d->op1);
34431 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
34432 }
34433
34434 return true;
34435 }
34436
34437 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
34438 in a single instruction. */
34439
34440 static bool
34441 expand_vec_perm_1 (struct expand_vec_perm_d *d)
34442 {
34443 unsigned i, nelt = d->nelt;
34444 unsigned char perm2[MAX_VECT_LEN];
34445
34446 /* Check plain VEC_SELECT first, because AVX has instructions that could
34447 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
34448 input where SEL+CONCAT may not. */
34449 if (d->op0 == d->op1)
34450 {
34451 int mask = nelt - 1;
34452
34453 for (i = 0; i < nelt; i++)
34454 perm2[i] = d->perm[i] & mask;
34455
34456 if (expand_vselect (d->target, d->op0, perm2, nelt))
34457 return true;
34458
34459 /* There are plenty of patterns in sse.md that are written for
34460 SEL+CONCAT and are not replicated for a single op. Perhaps
34461 that should be changed, to avoid the nastiness here. */
34462
34463 /* Recognize interleave style patterns, which means incrementing
34464 every other permutation operand. */
34465 for (i = 0; i < nelt; i += 2)
34466 {
34467 perm2[i] = d->perm[i] & mask;
34468 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
34469 }
34470 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
34471 return true;
34472
34473 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
34474 if (nelt >= 4)
34475 {
34476 for (i = 0; i < nelt; i += 4)
34477 {
34478 perm2[i + 0] = d->perm[i + 0] & mask;
34479 perm2[i + 1] = d->perm[i + 1] & mask;
34480 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
34481 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
34482 }
34483
34484 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
34485 return true;
34486 }
34487 }
34488
34489 /* Finally, try the fully general two operand permute. */
34490 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt))
34491 return true;
34492
34493 /* Recognize interleave style patterns with reversed operands. */
34494 if (d->op0 != d->op1)
34495 {
34496 for (i = 0; i < nelt; ++i)
34497 {
34498 unsigned e = d->perm[i];
34499 if (e >= nelt)
34500 e -= nelt;
34501 else
34502 e += nelt;
34503 perm2[i] = e;
34504 }
34505
34506 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
34507 return true;
34508 }
34509
34510 /* Try the SSE4.1 blend variable merge instructions. */
34511 if (expand_vec_perm_blend (d))
34512 return true;
34513
34514 /* Try one of the AVX vpermil variable permutations. */
34515 if (expand_vec_perm_vpermil (d))
34516 return true;
34517
34518 /* Try the SSSE3 pshufb or XOP vpperm variable permutation. */
34519 if (expand_vec_perm_pshufb (d))
34520 return true;
34521
34522 return false;
34523 }
34524
34525 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
34526 in terms of a pair of pshuflw + pshufhw instructions. */
34527
34528 static bool
34529 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
34530 {
34531 unsigned char perm2[MAX_VECT_LEN];
34532 unsigned i;
34533 bool ok;
34534
34535 if (d->vmode != V8HImode || d->op0 != d->op1)
34536 return false;
34537
34538 /* The two permutations only operate in 64-bit lanes. */
34539 for (i = 0; i < 4; ++i)
34540 if (d->perm[i] >= 4)
34541 return false;
34542 for (i = 4; i < 8; ++i)
34543 if (d->perm[i] < 4)
34544 return false;
34545
34546 if (d->testing_p)
34547 return true;
34548
34549 /* Emit the pshuflw. */
34550 memcpy (perm2, d->perm, 4);
34551 for (i = 4; i < 8; ++i)
34552 perm2[i] = i;
34553 ok = expand_vselect (d->target, d->op0, perm2, 8);
34554 gcc_assert (ok);
34555
34556 /* Emit the pshufhw. */
34557 memcpy (perm2 + 4, d->perm + 4, 4);
34558 for (i = 0; i < 4; ++i)
34559 perm2[i] = i;
34560 ok = expand_vselect (d->target, d->target, perm2, 8);
34561 gcc_assert (ok);
34562
34563 return true;
34564 }
34565
34566 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
34567 the permutation using the SSSE3 palignr instruction. This succeeds
34568 when all of the elements in PERM fit within one vector and we merely
34569 need to shift them down so that a single vector permutation has a
34570 chance to succeed. */
34571
34572 static bool
34573 expand_vec_perm_palignr (struct expand_vec_perm_d *d)
34574 {
34575 unsigned i, nelt = d->nelt;
34576 unsigned min, max;
34577 bool in_order, ok;
34578 rtx shift;
34579
34580 /* Even with AVX, palignr only operates on 128-bit vectors. */
34581 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
34582 return false;
34583
34584 min = nelt, max = 0;
34585 for (i = 0; i < nelt; ++i)
34586 {
34587 unsigned e = d->perm[i];
34588 if (e < min)
34589 min = e;
34590 if (e > max)
34591 max = e;
34592 }
34593 if (min == 0 || max - min >= nelt)
34594 return false;
34595
34596 /* Given that we have SSSE3, we know we'll be able to implement the
34597 single operand permutation after the palignr with pshufb. */
34598 if (d->testing_p)
34599 return true;
34600
34601 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
34602 emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode, d->target),
34603 gen_lowpart (TImode, d->op1),
34604 gen_lowpart (TImode, d->op0), shift));
34605
34606 d->op0 = d->op1 = d->target;
34607
34608 in_order = true;
34609 for (i = 0; i < nelt; ++i)
34610 {
34611 unsigned e = d->perm[i] - min;
34612 if (e != i)
34613 in_order = false;
34614 d->perm[i] = e;
34615 }
34616
34617 /* Test for the degenerate case where the alignment by itself
34618 produces the desired permutation. */
34619 if (in_order)
34620 return true;
34621
34622 ok = expand_vec_perm_1 (d);
34623 gcc_assert (ok);
34624
34625 return ok;
34626 }
34627
34628 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
34629 a two vector permutation into a single vector permutation by using
34630 an interleave operation to merge the vectors. */
34631
34632 static bool
34633 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
34634 {
34635 struct expand_vec_perm_d dremap, dfinal;
34636 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
34637 unsigned contents, h1, h2, h3, h4;
34638 unsigned char remap[2 * MAX_VECT_LEN];
34639 rtx seq;
34640 bool ok;
34641
34642 if (d->op0 == d->op1)
34643 return false;
34644
34645 /* The 256-bit unpck[lh]p[sd] instructions only operate within the 128-bit
34646 lanes. We can use similar techniques with the vperm2f128 instruction,
34647 but it requires slightly different logic. */
34648 if (GET_MODE_SIZE (d->vmode) != 16)
34649 return false;
34650
34651 /* Examine from whence the elements come. */
34652 contents = 0;
34653 for (i = 0; i < nelt; ++i)
34654 contents |= 1u << d->perm[i];
34655
34656 /* Split the two input vectors into 4 halves. */
34657 h1 = (1u << nelt2) - 1;
34658 h2 = h1 << nelt2;
34659 h3 = h2 << nelt2;
34660 h4 = h3 << nelt2;
34661
34662 memset (remap, 0xff, sizeof (remap));
34663 dremap = *d;
34664
34665 /* If the elements from the low halves use interleave low, and similarly
34666 for interleave high. If the elements are from mis-matched halves, we
34667 can use shufps for V4SF/V4SI or do a DImode shuffle. */
34668 if ((contents & (h1 | h3)) == contents)
34669 {
34670 for (i = 0; i < nelt2; ++i)
34671 {
34672 remap[i] = i * 2;
34673 remap[i + nelt] = i * 2 + 1;
34674 dremap.perm[i * 2] = i;
34675 dremap.perm[i * 2 + 1] = i + nelt;
34676 }
34677 }
34678 else if ((contents & (h2 | h4)) == contents)
34679 {
34680 for (i = 0; i < nelt2; ++i)
34681 {
34682 remap[i + nelt2] = i * 2;
34683 remap[i + nelt + nelt2] = i * 2 + 1;
34684 dremap.perm[i * 2] = i + nelt2;
34685 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
34686 }
34687 }
34688 else if ((contents & (h1 | h4)) == contents)
34689 {
34690 for (i = 0; i < nelt2; ++i)
34691 {
34692 remap[i] = i;
34693 remap[i + nelt + nelt2] = i + nelt2;
34694 dremap.perm[i] = i;
34695 dremap.perm[i + nelt2] = i + nelt + nelt2;
34696 }
34697 if (nelt != 4)
34698 {
34699 dremap.vmode = V2DImode;
34700 dremap.nelt = 2;
34701 dremap.perm[0] = 0;
34702 dremap.perm[1] = 3;
34703 }
34704 }
34705 else if ((contents & (h2 | h3)) == contents)
34706 {
34707 for (i = 0; i < nelt2; ++i)
34708 {
34709 remap[i + nelt2] = i;
34710 remap[i + nelt] = i + nelt2;
34711 dremap.perm[i] = i + nelt2;
34712 dremap.perm[i + nelt2] = i + nelt;
34713 }
34714 if (nelt != 4)
34715 {
34716 dremap.vmode = V2DImode;
34717 dremap.nelt = 2;
34718 dremap.perm[0] = 1;
34719 dremap.perm[1] = 2;
34720 }
34721 }
34722 else
34723 return false;
34724
34725 /* Use the remapping array set up above to move the elements from their
34726 swizzled locations into their final destinations. */
34727 dfinal = *d;
34728 for (i = 0; i < nelt; ++i)
34729 {
34730 unsigned e = remap[d->perm[i]];
34731 gcc_assert (e < nelt);
34732 dfinal.perm[i] = e;
34733 }
34734 dfinal.op0 = gen_reg_rtx (dfinal.vmode);
34735 dfinal.op1 = dfinal.op0;
34736 dremap.target = dfinal.op0;
34737
34738 /* Test if the final remap can be done with a single insn. For V4SFmode or
34739 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
34740 start_sequence ();
34741 ok = expand_vec_perm_1 (&dfinal);
34742 seq = get_insns ();
34743 end_sequence ();
34744
34745 if (!ok)
34746 return false;
34747
34748 if (dremap.vmode != dfinal.vmode)
34749 {
34750 dremap.target = gen_lowpart (dremap.vmode, dremap.target);
34751 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
34752 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
34753 }
34754
34755 ok = expand_vec_perm_1 (&dremap);
34756 gcc_assert (ok);
34757
34758 emit_insn (seq);
34759 return true;
34760 }
34761
34762 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
34763 permutation with two pshufb insns and an ior. We should have already
34764 failed all two instruction sequences. */
34765
34766 static bool
34767 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
34768 {
34769 rtx rperm[2][16], vperm, l, h, op, m128;
34770 unsigned int i, nelt, eltsz;
34771
34772 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
34773 return false;
34774 gcc_assert (d->op0 != d->op1);
34775
34776 nelt = d->nelt;
34777 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
34778
34779 /* Generate two permutation masks. If the required element is within
34780 the given vector it is shuffled into the proper lane. If the required
34781 element is in the other vector, force a zero into the lane by setting
34782 bit 7 in the permutation mask. */
34783 m128 = GEN_INT (-128);
34784 for (i = 0; i < nelt; ++i)
34785 {
34786 unsigned j, e = d->perm[i];
34787 unsigned which = (e >= nelt);
34788 if (e >= nelt)
34789 e -= nelt;
34790
34791 for (j = 0; j < eltsz; ++j)
34792 {
34793 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
34794 rperm[1-which][i*eltsz + j] = m128;
34795 }
34796 }
34797
34798 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
34799 vperm = force_reg (V16QImode, vperm);
34800
34801 l = gen_reg_rtx (V16QImode);
34802 op = gen_lowpart (V16QImode, d->op0);
34803 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
34804
34805 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
34806 vperm = force_reg (V16QImode, vperm);
34807
34808 h = gen_reg_rtx (V16QImode);
34809 op = gen_lowpart (V16QImode, d->op1);
34810 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
34811
34812 op = gen_lowpart (V16QImode, d->target);
34813 emit_insn (gen_iorv16qi3 (op, l, h));
34814
34815 return true;
34816 }
34817
34818 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
34819 and extract-odd permutations. */
34820
34821 static bool
34822 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
34823 {
34824 rtx t1, t2, t3;
34825
34826 switch (d->vmode)
34827 {
34828 case V4DFmode:
34829 t1 = gen_reg_rtx (V4DFmode);
34830 t2 = gen_reg_rtx (V4DFmode);
34831
34832 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
34833 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
34834 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
34835
34836 /* Now an unpck[lh]pd will produce the result required. */
34837 if (odd)
34838 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
34839 else
34840 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
34841 emit_insn (t3);
34842 break;
34843
34844 case V8SFmode:
34845 {
34846 int mask = odd ? 0xdd : 0x88;
34847
34848 t1 = gen_reg_rtx (V8SFmode);
34849 t2 = gen_reg_rtx (V8SFmode);
34850 t3 = gen_reg_rtx (V8SFmode);
34851
34852 /* Shuffle within the 128-bit lanes to produce:
34853 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
34854 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
34855 GEN_INT (mask)));
34856
34857 /* Shuffle the lanes around to produce:
34858 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
34859 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
34860 GEN_INT (0x3)));
34861
34862 /* Shuffle within the 128-bit lanes to produce:
34863 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
34864 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
34865
34866 /* Shuffle within the 128-bit lanes to produce:
34867 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
34868 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
34869
34870 /* Shuffle the lanes around to produce:
34871 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
34872 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
34873 GEN_INT (0x20)));
34874 }
34875 break;
34876
34877 case V2DFmode:
34878 case V4SFmode:
34879 case V2DImode:
34880 case V4SImode:
34881 /* These are always directly implementable by expand_vec_perm_1. */
34882 gcc_unreachable ();
34883
34884 case V8HImode:
34885 if (TARGET_SSSE3)
34886 return expand_vec_perm_pshufb2 (d);
34887 else
34888 {
34889 /* We need 2*log2(N)-1 operations to achieve odd/even
34890 with interleave. */
34891 t1 = gen_reg_rtx (V8HImode);
34892 t2 = gen_reg_rtx (V8HImode);
34893 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
34894 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
34895 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
34896 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
34897 if (odd)
34898 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
34899 else
34900 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
34901 emit_insn (t3);
34902 }
34903 break;
34904
34905 case V16QImode:
34906 if (TARGET_SSSE3)
34907 return expand_vec_perm_pshufb2 (d);
34908 else
34909 {
34910 t1 = gen_reg_rtx (V16QImode);
34911 t2 = gen_reg_rtx (V16QImode);
34912 t3 = gen_reg_rtx (V16QImode);
34913 emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
34914 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
34915 emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
34916 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
34917 emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
34918 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
34919 if (odd)
34920 t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
34921 else
34922 t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
34923 emit_insn (t3);
34924 }
34925 break;
34926
34927 default:
34928 gcc_unreachable ();
34929 }
34930
34931 return true;
34932 }
34933
34934 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
34935 extract-even and extract-odd permutations. */
34936
34937 static bool
34938 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
34939 {
34940 unsigned i, odd, nelt = d->nelt;
34941
34942 odd = d->perm[0];
34943 if (odd != 0 && odd != 1)
34944 return false;
34945
34946 for (i = 1; i < nelt; ++i)
34947 if (d->perm[i] != 2 * i + odd)
34948 return false;
34949
34950 return expand_vec_perm_even_odd_1 (d, odd);
34951 }
34952
34953 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
34954 permutations. We assume that expand_vec_perm_1 has already failed. */
34955
34956 static bool
34957 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
34958 {
34959 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
34960 enum machine_mode vmode = d->vmode;
34961 unsigned char perm2[4];
34962 rtx op0 = d->op0;
34963 bool ok;
34964
34965 switch (vmode)
34966 {
34967 case V4DFmode:
34968 case V8SFmode:
34969 /* These are special-cased in sse.md so that we can optionally
34970 use the vbroadcast instruction. They expand to two insns
34971 if the input happens to be in a register. */
34972 gcc_unreachable ();
34973
34974 case V2DFmode:
34975 case V2DImode:
34976 case V4SFmode:
34977 case V4SImode:
34978 /* These are always implementable using standard shuffle patterns. */
34979 gcc_unreachable ();
34980
34981 case V8HImode:
34982 case V16QImode:
34983 /* These can be implemented via interleave. We save one insn by
34984 stopping once we have promoted to V4SImode and then use pshufd. */
34985 do
34986 {
34987 optab otab = vec_interleave_low_optab;
34988
34989 if (elt >= nelt2)
34990 {
34991 otab = vec_interleave_high_optab;
34992 elt -= nelt2;
34993 }
34994 nelt2 /= 2;
34995
34996 op0 = expand_binop (vmode, otab, op0, op0, NULL, 0, OPTAB_DIRECT);
34997 vmode = get_mode_wider_vector (vmode);
34998 op0 = gen_lowpart (vmode, op0);
34999 }
35000 while (vmode != V4SImode);
35001
35002 memset (perm2, elt, 4);
35003 ok = expand_vselect (gen_lowpart (V4SImode, d->target), op0, perm2, 4);
35004 gcc_assert (ok);
35005 return true;
35006
35007 default:
35008 gcc_unreachable ();
35009 }
35010 }
35011
35012 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
35013 broadcast permutations. */
35014
35015 static bool
35016 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
35017 {
35018 unsigned i, elt, nelt = d->nelt;
35019
35020 if (d->op0 != d->op1)
35021 return false;
35022
35023 elt = d->perm[0];
35024 for (i = 1; i < nelt; ++i)
35025 if (d->perm[i] != elt)
35026 return false;
35027
35028 return expand_vec_perm_broadcast_1 (d);
35029 }
35030
35031 /* The guts of ix86_expand_vec_perm_builtin, also used by the ok hook.
35032 With all of the interface bits taken care of, perform the expansion
35033 in D and return true on success. */
35034
35035 static bool
35036 ix86_expand_vec_perm_builtin_1 (struct expand_vec_perm_d *d)
35037 {
35038 /* Try a single instruction expansion. */
35039 if (expand_vec_perm_1 (d))
35040 return true;
35041
35042 /* Try sequences of two instructions. */
35043
35044 if (expand_vec_perm_pshuflw_pshufhw (d))
35045 return true;
35046
35047 if (expand_vec_perm_palignr (d))
35048 return true;
35049
35050 if (expand_vec_perm_interleave2 (d))
35051 return true;
35052
35053 if (expand_vec_perm_broadcast (d))
35054 return true;
35055
35056 /* Try sequences of three instructions. */
35057
35058 if (expand_vec_perm_pshufb2 (d))
35059 return true;
35060
35061 /* ??? Look for narrow permutations whose element orderings would
35062 allow the promotion to a wider mode. */
35063
35064 /* ??? Look for sequences of interleave or a wider permute that place
35065 the data into the correct lanes for a half-vector shuffle like
35066 pshuf[lh]w or vpermilps. */
35067
35068 /* ??? Look for sequences of interleave that produce the desired results.
35069 The combinatorics of punpck[lh] get pretty ugly... */
35070
35071 if (expand_vec_perm_even_odd (d))
35072 return true;
35073
35074 return false;
35075 }
35076
35077 /* Extract the values from the vector CST into the permutation array in D.
35078 Return 0 on error, 1 if all values from the permutation come from the
35079 first vector, 2 if all values from the second vector, and 3 otherwise. */
35080
35081 static int
35082 extract_vec_perm_cst (struct expand_vec_perm_d *d, tree cst)
35083 {
35084 tree list = TREE_VECTOR_CST_ELTS (cst);
35085 unsigned i, nelt = d->nelt;
35086 int ret = 0;
35087
35088 for (i = 0; i < nelt; ++i, list = TREE_CHAIN (list))
35089 {
35090 unsigned HOST_WIDE_INT e;
35091
35092 if (!host_integerp (TREE_VALUE (list), 1))
35093 return 0;
35094 e = tree_low_cst (TREE_VALUE (list), 1);
35095 if (e >= 2 * nelt)
35096 return 0;
35097
35098 ret |= (e < nelt ? 1 : 2);
35099 d->perm[i] = e;
35100 }
35101 gcc_assert (list == NULL);
35102
35103 /* For all elements from second vector, fold the elements to first. */
35104 if (ret == 2)
35105 for (i = 0; i < nelt; ++i)
35106 d->perm[i] -= nelt;
35107
35108 return ret;
35109 }
35110
35111 static rtx
35112 ix86_expand_vec_perm_builtin (tree exp)
35113 {
35114 struct expand_vec_perm_d d;
35115 tree arg0, arg1, arg2;
35116
35117 arg0 = CALL_EXPR_ARG (exp, 0);
35118 arg1 = CALL_EXPR_ARG (exp, 1);
35119 arg2 = CALL_EXPR_ARG (exp, 2);
35120
35121 d.vmode = TYPE_MODE (TREE_TYPE (arg0));
35122 d.nelt = GET_MODE_NUNITS (d.vmode);
35123 d.testing_p = false;
35124 gcc_assert (VECTOR_MODE_P (d.vmode));
35125
35126 if (TREE_CODE (arg2) != VECTOR_CST)
35127 {
35128 error_at (EXPR_LOCATION (exp),
35129 "vector permutation requires vector constant");
35130 goto exit_error;
35131 }
35132
35133 switch (extract_vec_perm_cst (&d, arg2))
35134 {
35135 default:
35136 gcc_unreachable();
35137
35138 case 0:
35139 error_at (EXPR_LOCATION (exp), "invalid vector permutation constant");
35140 goto exit_error;
35141
35142 case 3:
35143 if (!operand_equal_p (arg0, arg1, 0))
35144 {
35145 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
35146 d.op0 = force_reg (d.vmode, d.op0);
35147 d.op1 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
35148 d.op1 = force_reg (d.vmode, d.op1);
35149 break;
35150 }
35151
35152 /* The elements of PERM do not suggest that only the first operand
35153 is used, but both operands are identical. Allow easier matching
35154 of the permutation by folding the permutation into the single
35155 input vector. */
35156 {
35157 unsigned i, nelt = d.nelt;
35158 for (i = 0; i < nelt; ++i)
35159 if (d.perm[i] >= nelt)
35160 d.perm[i] -= nelt;
35161 }
35162 /* FALLTHRU */
35163
35164 case 1:
35165 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
35166 d.op0 = force_reg (d.vmode, d.op0);
35167 d.op1 = d.op0;
35168 break;
35169
35170 case 2:
35171 d.op0 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
35172 d.op0 = force_reg (d.vmode, d.op0);
35173 d.op1 = d.op0;
35174 break;
35175 }
35176
35177 d.target = gen_reg_rtx (d.vmode);
35178 if (ix86_expand_vec_perm_builtin_1 (&d))
35179 return d.target;
35180
35181 /* For compiler generated permutations, we should never got here, because
35182 the compiler should also be checking the ok hook. But since this is a
35183 builtin the user has access too, so don't abort. */
35184 switch (d.nelt)
35185 {
35186 case 2:
35187 sorry ("vector permutation (%d %d)", d.perm[0], d.perm[1]);
35188 break;
35189 case 4:
35190 sorry ("vector permutation (%d %d %d %d)",
35191 d.perm[0], d.perm[1], d.perm[2], d.perm[3]);
35192 break;
35193 case 8:
35194 sorry ("vector permutation (%d %d %d %d %d %d %d %d)",
35195 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
35196 d.perm[4], d.perm[5], d.perm[6], d.perm[7]);
35197 break;
35198 case 16:
35199 sorry ("vector permutation "
35200 "(%d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d)",
35201 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
35202 d.perm[4], d.perm[5], d.perm[6], d.perm[7],
35203 d.perm[8], d.perm[9], d.perm[10], d.perm[11],
35204 d.perm[12], d.perm[13], d.perm[14], d.perm[15]);
35205 break;
35206 default:
35207 gcc_unreachable ();
35208 }
35209 exit_error:
35210 return CONST0_RTX (d.vmode);
35211 }
35212
35213 /* Implement targetm.vectorize.builtin_vec_perm_ok. */
35214
35215 static bool
35216 ix86_vectorize_builtin_vec_perm_ok (tree vec_type, tree mask)
35217 {
35218 struct expand_vec_perm_d d;
35219 int vec_mask;
35220 bool ret, one_vec;
35221
35222 d.vmode = TYPE_MODE (vec_type);
35223 d.nelt = GET_MODE_NUNITS (d.vmode);
35224 d.testing_p = true;
35225
35226 /* Given sufficient ISA support we can just return true here
35227 for selected vector modes. */
35228 if (GET_MODE_SIZE (d.vmode) == 16)
35229 {
35230 /* All implementable with a single vpperm insn. */
35231 if (TARGET_XOP)
35232 return true;
35233 /* All implementable with 2 pshufb + 1 ior. */
35234 if (TARGET_SSSE3)
35235 return true;
35236 /* All implementable with shufpd or unpck[lh]pd. */
35237 if (d.nelt == 2)
35238 return true;
35239 }
35240
35241 vec_mask = extract_vec_perm_cst (&d, mask);
35242
35243 /* This hook is cannot be called in response to something that the
35244 user does (unlike the builtin expander) so we shouldn't ever see
35245 an error generated from the extract. */
35246 gcc_assert (vec_mask > 0 && vec_mask <= 3);
35247 one_vec = (vec_mask != 3);
35248
35249 /* Implementable with shufps or pshufd. */
35250 if (one_vec && (d.vmode == V4SFmode || d.vmode == V4SImode))
35251 return true;
35252
35253 /* Otherwise we have to go through the motions and see if we can
35254 figure out how to generate the requested permutation. */
35255 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
35256 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
35257 if (!one_vec)
35258 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
35259
35260 start_sequence ();
35261 ret = ix86_expand_vec_perm_builtin_1 (&d);
35262 end_sequence ();
35263
35264 return ret;
35265 }
35266
35267 void
35268 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
35269 {
35270 struct expand_vec_perm_d d;
35271 unsigned i, nelt;
35272
35273 d.target = targ;
35274 d.op0 = op0;
35275 d.op1 = op1;
35276 d.vmode = GET_MODE (targ);
35277 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
35278 d.testing_p = false;
35279
35280 for (i = 0; i < nelt; ++i)
35281 d.perm[i] = i * 2 + odd;
35282
35283 /* We'll either be able to implement the permutation directly... */
35284 if (expand_vec_perm_1 (&d))
35285 return;
35286
35287 /* ... or we use the special-case patterns. */
35288 expand_vec_perm_even_odd_1 (&d, odd);
35289 }
35290
35291 /* Expand an insert into a vector register through pinsr insn.
35292 Return true if successful. */
35293
35294 bool
35295 ix86_expand_pinsr (rtx *operands)
35296 {
35297 rtx dst = operands[0];
35298 rtx src = operands[3];
35299
35300 unsigned int size = INTVAL (operands[1]);
35301 unsigned int pos = INTVAL (operands[2]);
35302
35303 if (GET_CODE (dst) == SUBREG)
35304 {
35305 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
35306 dst = SUBREG_REG (dst);
35307 }
35308
35309 if (GET_CODE (src) == SUBREG)
35310 src = SUBREG_REG (src);
35311
35312 switch (GET_MODE (dst))
35313 {
35314 case V16QImode:
35315 case V8HImode:
35316 case V4SImode:
35317 case V2DImode:
35318 {
35319 enum machine_mode srcmode, dstmode;
35320 rtx (*pinsr)(rtx, rtx, rtx, rtx);
35321
35322 srcmode = mode_for_size (size, MODE_INT, 0);
35323
35324 switch (srcmode)
35325 {
35326 case QImode:
35327 if (!TARGET_SSE4_1)
35328 return false;
35329 dstmode = V16QImode;
35330 pinsr = gen_sse4_1_pinsrb;
35331 break;
35332
35333 case HImode:
35334 if (!TARGET_SSE2)
35335 return false;
35336 dstmode = V8HImode;
35337 pinsr = gen_sse2_pinsrw;
35338 break;
35339
35340 case SImode:
35341 if (!TARGET_SSE4_1)
35342 return false;
35343 dstmode = V4SImode;
35344 pinsr = gen_sse4_1_pinsrd;
35345 break;
35346
35347 case DImode:
35348 gcc_assert (TARGET_64BIT);
35349 if (!TARGET_SSE4_1)
35350 return false;
35351 dstmode = V2DImode;
35352 pinsr = gen_sse4_1_pinsrq;
35353 break;
35354
35355 default:
35356 return false;
35357 }
35358
35359 dst = gen_lowpart (dstmode, dst);
35360 src = gen_lowpart (srcmode, src);
35361
35362 pos /= size;
35363
35364 emit_insn (pinsr (dst, dst, src, GEN_INT (1 << pos)));
35365 return true;
35366 }
35367
35368 default:
35369 return false;
35370 }
35371 }
35372 \f
35373 /* This function returns the calling abi specific va_list type node.
35374 It returns the FNDECL specific va_list type. */
35375
35376 static tree
35377 ix86_fn_abi_va_list (tree fndecl)
35378 {
35379 if (!TARGET_64BIT)
35380 return va_list_type_node;
35381 gcc_assert (fndecl != NULL_TREE);
35382
35383 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
35384 return ms_va_list_type_node;
35385 else
35386 return sysv_va_list_type_node;
35387 }
35388
35389 /* Returns the canonical va_list type specified by TYPE. If there
35390 is no valid TYPE provided, it return NULL_TREE. */
35391
35392 static tree
35393 ix86_canonical_va_list_type (tree type)
35394 {
35395 tree wtype, htype;
35396
35397 /* Resolve references and pointers to va_list type. */
35398 if (TREE_CODE (type) == MEM_REF)
35399 type = TREE_TYPE (type);
35400 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
35401 type = TREE_TYPE (type);
35402 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
35403 type = TREE_TYPE (type);
35404
35405 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
35406 {
35407 wtype = va_list_type_node;
35408 gcc_assert (wtype != NULL_TREE);
35409 htype = type;
35410 if (TREE_CODE (wtype) == ARRAY_TYPE)
35411 {
35412 /* If va_list is an array type, the argument may have decayed
35413 to a pointer type, e.g. by being passed to another function.
35414 In that case, unwrap both types so that we can compare the
35415 underlying records. */
35416 if (TREE_CODE (htype) == ARRAY_TYPE
35417 || POINTER_TYPE_P (htype))
35418 {
35419 wtype = TREE_TYPE (wtype);
35420 htype = TREE_TYPE (htype);
35421 }
35422 }
35423 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
35424 return va_list_type_node;
35425 wtype = sysv_va_list_type_node;
35426 gcc_assert (wtype != NULL_TREE);
35427 htype = type;
35428 if (TREE_CODE (wtype) == ARRAY_TYPE)
35429 {
35430 /* If va_list is an array type, the argument may have decayed
35431 to a pointer type, e.g. by being passed to another function.
35432 In that case, unwrap both types so that we can compare the
35433 underlying records. */
35434 if (TREE_CODE (htype) == ARRAY_TYPE
35435 || POINTER_TYPE_P (htype))
35436 {
35437 wtype = TREE_TYPE (wtype);
35438 htype = TREE_TYPE (htype);
35439 }
35440 }
35441 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
35442 return sysv_va_list_type_node;
35443 wtype = ms_va_list_type_node;
35444 gcc_assert (wtype != NULL_TREE);
35445 htype = type;
35446 if (TREE_CODE (wtype) == ARRAY_TYPE)
35447 {
35448 /* If va_list is an array type, the argument may have decayed
35449 to a pointer type, e.g. by being passed to another function.
35450 In that case, unwrap both types so that we can compare the
35451 underlying records. */
35452 if (TREE_CODE (htype) == ARRAY_TYPE
35453 || POINTER_TYPE_P (htype))
35454 {
35455 wtype = TREE_TYPE (wtype);
35456 htype = TREE_TYPE (htype);
35457 }
35458 }
35459 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
35460 return ms_va_list_type_node;
35461 return NULL_TREE;
35462 }
35463 return std_canonical_va_list_type (type);
35464 }
35465
35466 /* Iterate through the target-specific builtin types for va_list.
35467 IDX denotes the iterator, *PTREE is set to the result type of
35468 the va_list builtin, and *PNAME to its internal type.
35469 Returns zero if there is no element for this index, otherwise
35470 IDX should be increased upon the next call.
35471 Note, do not iterate a base builtin's name like __builtin_va_list.
35472 Used from c_common_nodes_and_builtins. */
35473
35474 static int
35475 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
35476 {
35477 if (TARGET_64BIT)
35478 {
35479 switch (idx)
35480 {
35481 default:
35482 break;
35483
35484 case 0:
35485 *ptree = ms_va_list_type_node;
35486 *pname = "__builtin_ms_va_list";
35487 return 1;
35488
35489 case 1:
35490 *ptree = sysv_va_list_type_node;
35491 *pname = "__builtin_sysv_va_list";
35492 return 1;
35493 }
35494 }
35495
35496 return 0;
35497 }
35498
35499 #undef TARGET_SCHED_DISPATCH
35500 #define TARGET_SCHED_DISPATCH has_dispatch
35501 #undef TARGET_SCHED_DISPATCH_DO
35502 #define TARGET_SCHED_DISPATCH_DO do_dispatch
35503 #undef TARGET_SCHED_REASSOCIATION_WIDTH
35504 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
35505
35506 /* The size of the dispatch window is the total number of bytes of
35507 object code allowed in a window. */
35508 #define DISPATCH_WINDOW_SIZE 16
35509
35510 /* Number of dispatch windows considered for scheduling. */
35511 #define MAX_DISPATCH_WINDOWS 3
35512
35513 /* Maximum number of instructions in a window. */
35514 #define MAX_INSN 4
35515
35516 /* Maximum number of immediate operands in a window. */
35517 #define MAX_IMM 4
35518
35519 /* Maximum number of immediate bits allowed in a window. */
35520 #define MAX_IMM_SIZE 128
35521
35522 /* Maximum number of 32 bit immediates allowed in a window. */
35523 #define MAX_IMM_32 4
35524
35525 /* Maximum number of 64 bit immediates allowed in a window. */
35526 #define MAX_IMM_64 2
35527
35528 /* Maximum total of loads or prefetches allowed in a window. */
35529 #define MAX_LOAD 2
35530
35531 /* Maximum total of stores allowed in a window. */
35532 #define MAX_STORE 1
35533
35534 #undef BIG
35535 #define BIG 100
35536
35537
35538 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
35539 enum dispatch_group {
35540 disp_no_group = 0,
35541 disp_load,
35542 disp_store,
35543 disp_load_store,
35544 disp_prefetch,
35545 disp_imm,
35546 disp_imm_32,
35547 disp_imm_64,
35548 disp_branch,
35549 disp_cmp,
35550 disp_jcc,
35551 disp_last
35552 };
35553
35554 /* Number of allowable groups in a dispatch window. It is an array
35555 indexed by dispatch_group enum. 100 is used as a big number,
35556 because the number of these kind of operations does not have any
35557 effect in dispatch window, but we need them for other reasons in
35558 the table. */
35559 static unsigned int num_allowable_groups[disp_last] = {
35560 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
35561 };
35562
35563 char group_name[disp_last + 1][16] = {
35564 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
35565 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
35566 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
35567 };
35568
35569 /* Instruction path. */
35570 enum insn_path {
35571 no_path = 0,
35572 path_single, /* Single micro op. */
35573 path_double, /* Double micro op. */
35574 path_multi, /* Instructions with more than 2 micro op.. */
35575 last_path
35576 };
35577
35578 /* sched_insn_info defines a window to the instructions scheduled in
35579 the basic block. It contains a pointer to the insn_info table and
35580 the instruction scheduled.
35581
35582 Windows are allocated for each basic block and are linked
35583 together. */
35584 typedef struct sched_insn_info_s {
35585 rtx insn;
35586 enum dispatch_group group;
35587 enum insn_path path;
35588 int byte_len;
35589 int imm_bytes;
35590 } sched_insn_info;
35591
35592 /* Linked list of dispatch windows. This is a two way list of
35593 dispatch windows of a basic block. It contains information about
35594 the number of uops in the window and the total number of
35595 instructions and of bytes in the object code for this dispatch
35596 window. */
35597 typedef struct dispatch_windows_s {
35598 int num_insn; /* Number of insn in the window. */
35599 int num_uops; /* Number of uops in the window. */
35600 int window_size; /* Number of bytes in the window. */
35601 int window_num; /* Window number between 0 or 1. */
35602 int num_imm; /* Number of immediates in an insn. */
35603 int num_imm_32; /* Number of 32 bit immediates in an insn. */
35604 int num_imm_64; /* Number of 64 bit immediates in an insn. */
35605 int imm_size; /* Total immediates in the window. */
35606 int num_loads; /* Total memory loads in the window. */
35607 int num_stores; /* Total memory stores in the window. */
35608 int violation; /* Violation exists in window. */
35609 sched_insn_info *window; /* Pointer to the window. */
35610 struct dispatch_windows_s *next;
35611 struct dispatch_windows_s *prev;
35612 } dispatch_windows;
35613
35614 /* Immediate valuse used in an insn. */
35615 typedef struct imm_info_s
35616 {
35617 int imm;
35618 int imm32;
35619 int imm64;
35620 } imm_info;
35621
35622 static dispatch_windows *dispatch_window_list;
35623 static dispatch_windows *dispatch_window_list1;
35624
35625 /* Get dispatch group of insn. */
35626
35627 static enum dispatch_group
35628 get_mem_group (rtx insn)
35629 {
35630 enum attr_memory memory;
35631
35632 if (INSN_CODE (insn) < 0)
35633 return disp_no_group;
35634 memory = get_attr_memory (insn);
35635 if (memory == MEMORY_STORE)
35636 return disp_store;
35637
35638 if (memory == MEMORY_LOAD)
35639 return disp_load;
35640
35641 if (memory == MEMORY_BOTH)
35642 return disp_load_store;
35643
35644 return disp_no_group;
35645 }
35646
35647 /* Return true if insn is a compare instruction. */
35648
35649 static bool
35650 is_cmp (rtx insn)
35651 {
35652 enum attr_type type;
35653
35654 type = get_attr_type (insn);
35655 return (type == TYPE_TEST
35656 || type == TYPE_ICMP
35657 || type == TYPE_FCMP
35658 || GET_CODE (PATTERN (insn)) == COMPARE);
35659 }
35660
35661 /* Return true if a dispatch violation encountered. */
35662
35663 static bool
35664 dispatch_violation (void)
35665 {
35666 if (dispatch_window_list->next)
35667 return dispatch_window_list->next->violation;
35668 return dispatch_window_list->violation;
35669 }
35670
35671 /* Return true if insn is a branch instruction. */
35672
35673 static bool
35674 is_branch (rtx insn)
35675 {
35676 return (CALL_P (insn) || JUMP_P (insn));
35677 }
35678
35679 /* Return true if insn is a prefetch instruction. */
35680
35681 static bool
35682 is_prefetch (rtx insn)
35683 {
35684 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
35685 }
35686
35687 /* This function initializes a dispatch window and the list container holding a
35688 pointer to the window. */
35689
35690 static void
35691 init_window (int window_num)
35692 {
35693 int i;
35694 dispatch_windows *new_list;
35695
35696 if (window_num == 0)
35697 new_list = dispatch_window_list;
35698 else
35699 new_list = dispatch_window_list1;
35700
35701 new_list->num_insn = 0;
35702 new_list->num_uops = 0;
35703 new_list->window_size = 0;
35704 new_list->next = NULL;
35705 new_list->prev = NULL;
35706 new_list->window_num = window_num;
35707 new_list->num_imm = 0;
35708 new_list->num_imm_32 = 0;
35709 new_list->num_imm_64 = 0;
35710 new_list->imm_size = 0;
35711 new_list->num_loads = 0;
35712 new_list->num_stores = 0;
35713 new_list->violation = false;
35714
35715 for (i = 0; i < MAX_INSN; i++)
35716 {
35717 new_list->window[i].insn = NULL;
35718 new_list->window[i].group = disp_no_group;
35719 new_list->window[i].path = no_path;
35720 new_list->window[i].byte_len = 0;
35721 new_list->window[i].imm_bytes = 0;
35722 }
35723 return;
35724 }
35725
35726 /* This function allocates and initializes a dispatch window and the
35727 list container holding a pointer to the window. */
35728
35729 static dispatch_windows *
35730 allocate_window (void)
35731 {
35732 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
35733 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
35734
35735 return new_list;
35736 }
35737
35738 /* This routine initializes the dispatch scheduling information. It
35739 initiates building dispatch scheduler tables and constructs the
35740 first dispatch window. */
35741
35742 static void
35743 init_dispatch_sched (void)
35744 {
35745 /* Allocate a dispatch list and a window. */
35746 dispatch_window_list = allocate_window ();
35747 dispatch_window_list1 = allocate_window ();
35748 init_window (0);
35749 init_window (1);
35750 }
35751
35752 /* This function returns true if a branch is detected. End of a basic block
35753 does not have to be a branch, but here we assume only branches end a
35754 window. */
35755
35756 static bool
35757 is_end_basic_block (enum dispatch_group group)
35758 {
35759 return group == disp_branch;
35760 }
35761
35762 /* This function is called when the end of a window processing is reached. */
35763
35764 static void
35765 process_end_window (void)
35766 {
35767 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
35768 if (dispatch_window_list->next)
35769 {
35770 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
35771 gcc_assert (dispatch_window_list->window_size
35772 + dispatch_window_list1->window_size <= 48);
35773 init_window (1);
35774 }
35775 init_window (0);
35776 }
35777
35778 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
35779 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
35780 for 48 bytes of instructions. Note that these windows are not dispatch
35781 windows that their sizes are DISPATCH_WINDOW_SIZE. */
35782
35783 static dispatch_windows *
35784 allocate_next_window (int window_num)
35785 {
35786 if (window_num == 0)
35787 {
35788 if (dispatch_window_list->next)
35789 init_window (1);
35790 init_window (0);
35791 return dispatch_window_list;
35792 }
35793
35794 dispatch_window_list->next = dispatch_window_list1;
35795 dispatch_window_list1->prev = dispatch_window_list;
35796
35797 return dispatch_window_list1;
35798 }
35799
35800 /* Increment the number of immediate operands of an instruction. */
35801
35802 static int
35803 find_constant_1 (rtx *in_rtx, imm_info *imm_values)
35804 {
35805 if (*in_rtx == 0)
35806 return 0;
35807
35808 switch ( GET_CODE (*in_rtx))
35809 {
35810 case CONST:
35811 case SYMBOL_REF:
35812 case CONST_INT:
35813 (imm_values->imm)++;
35814 if (x86_64_immediate_operand (*in_rtx, SImode))
35815 (imm_values->imm32)++;
35816 else
35817 (imm_values->imm64)++;
35818 break;
35819
35820 case CONST_DOUBLE:
35821 (imm_values->imm)++;
35822 (imm_values->imm64)++;
35823 break;
35824
35825 case CODE_LABEL:
35826 if (LABEL_KIND (*in_rtx) == LABEL_NORMAL)
35827 {
35828 (imm_values->imm)++;
35829 (imm_values->imm32)++;
35830 }
35831 break;
35832
35833 default:
35834 break;
35835 }
35836
35837 return 0;
35838 }
35839
35840 /* Compute number of immediate operands of an instruction. */
35841
35842 static void
35843 find_constant (rtx in_rtx, imm_info *imm_values)
35844 {
35845 for_each_rtx (INSN_P (in_rtx) ? &PATTERN (in_rtx) : &in_rtx,
35846 (rtx_function) find_constant_1, (void *) imm_values);
35847 }
35848
35849 /* Return total size of immediate operands of an instruction along with number
35850 of corresponding immediate-operands. It initializes its parameters to zero
35851 befor calling FIND_CONSTANT.
35852 INSN is the input instruction. IMM is the total of immediates.
35853 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
35854 bit immediates. */
35855
35856 static int
35857 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
35858 {
35859 imm_info imm_values = {0, 0, 0};
35860
35861 find_constant (insn, &imm_values);
35862 *imm = imm_values.imm;
35863 *imm32 = imm_values.imm32;
35864 *imm64 = imm_values.imm64;
35865 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
35866 }
35867
35868 /* This function indicates if an operand of an instruction is an
35869 immediate. */
35870
35871 static bool
35872 has_immediate (rtx insn)
35873 {
35874 int num_imm_operand;
35875 int num_imm32_operand;
35876 int num_imm64_operand;
35877
35878 if (insn)
35879 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
35880 &num_imm64_operand);
35881 return false;
35882 }
35883
35884 /* Return single or double path for instructions. */
35885
35886 static enum insn_path
35887 get_insn_path (rtx insn)
35888 {
35889 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
35890
35891 if ((int)path == 0)
35892 return path_single;
35893
35894 if ((int)path == 1)
35895 return path_double;
35896
35897 return path_multi;
35898 }
35899
35900 /* Return insn dispatch group. */
35901
35902 static enum dispatch_group
35903 get_insn_group (rtx insn)
35904 {
35905 enum dispatch_group group = get_mem_group (insn);
35906 if (group)
35907 return group;
35908
35909 if (is_branch (insn))
35910 return disp_branch;
35911
35912 if (is_cmp (insn))
35913 return disp_cmp;
35914
35915 if (has_immediate (insn))
35916 return disp_imm;
35917
35918 if (is_prefetch (insn))
35919 return disp_prefetch;
35920
35921 return disp_no_group;
35922 }
35923
35924 /* Count number of GROUP restricted instructions in a dispatch
35925 window WINDOW_LIST. */
35926
35927 static int
35928 count_num_restricted (rtx insn, dispatch_windows *window_list)
35929 {
35930 enum dispatch_group group = get_insn_group (insn);
35931 int imm_size;
35932 int num_imm_operand;
35933 int num_imm32_operand;
35934 int num_imm64_operand;
35935
35936 if (group == disp_no_group)
35937 return 0;
35938
35939 if (group == disp_imm)
35940 {
35941 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
35942 &num_imm64_operand);
35943 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
35944 || num_imm_operand + window_list->num_imm > MAX_IMM
35945 || (num_imm32_operand > 0
35946 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
35947 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
35948 || (num_imm64_operand > 0
35949 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
35950 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
35951 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
35952 && num_imm64_operand > 0
35953 && ((window_list->num_imm_64 > 0
35954 && window_list->num_insn >= 2)
35955 || window_list->num_insn >= 3)))
35956 return BIG;
35957
35958 return 1;
35959 }
35960
35961 if ((group == disp_load_store
35962 && (window_list->num_loads >= MAX_LOAD
35963 || window_list->num_stores >= MAX_STORE))
35964 || ((group == disp_load
35965 || group == disp_prefetch)
35966 && window_list->num_loads >= MAX_LOAD)
35967 || (group == disp_store
35968 && window_list->num_stores >= MAX_STORE))
35969 return BIG;
35970
35971 return 1;
35972 }
35973
35974 /* This function returns true if insn satisfies dispatch rules on the
35975 last window scheduled. */
35976
35977 static bool
35978 fits_dispatch_window (rtx insn)
35979 {
35980 dispatch_windows *window_list = dispatch_window_list;
35981 dispatch_windows *window_list_next = dispatch_window_list->next;
35982 unsigned int num_restrict;
35983 enum dispatch_group group = get_insn_group (insn);
35984 enum insn_path path = get_insn_path (insn);
35985 int sum;
35986
35987 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
35988 instructions should be given the lowest priority in the
35989 scheduling process in Haifa scheduler to make sure they will be
35990 scheduled in the same dispatch window as the refrence to them. */
35991 if (group == disp_jcc || group == disp_cmp)
35992 return false;
35993
35994 /* Check nonrestricted. */
35995 if (group == disp_no_group || group == disp_branch)
35996 return true;
35997
35998 /* Get last dispatch window. */
35999 if (window_list_next)
36000 window_list = window_list_next;
36001
36002 if (window_list->window_num == 1)
36003 {
36004 sum = window_list->prev->window_size + window_list->window_size;
36005
36006 if (sum == 32
36007 || (min_insn_size (insn) + sum) >= 48)
36008 /* Window 1 is full. Go for next window. */
36009 return true;
36010 }
36011
36012 num_restrict = count_num_restricted (insn, window_list);
36013
36014 if (num_restrict > num_allowable_groups[group])
36015 return false;
36016
36017 /* See if it fits in the first window. */
36018 if (window_list->window_num == 0)
36019 {
36020 /* The first widow should have only single and double path
36021 uops. */
36022 if (path == path_double
36023 && (window_list->num_uops + 2) > MAX_INSN)
36024 return false;
36025 else if (path != path_single)
36026 return false;
36027 }
36028 return true;
36029 }
36030
36031 /* Add an instruction INSN with NUM_UOPS micro-operations to the
36032 dispatch window WINDOW_LIST. */
36033
36034 static void
36035 add_insn_window (rtx insn, dispatch_windows *window_list, int num_uops)
36036 {
36037 int byte_len = min_insn_size (insn);
36038 int num_insn = window_list->num_insn;
36039 int imm_size;
36040 sched_insn_info *window = window_list->window;
36041 enum dispatch_group group = get_insn_group (insn);
36042 enum insn_path path = get_insn_path (insn);
36043 int num_imm_operand;
36044 int num_imm32_operand;
36045 int num_imm64_operand;
36046
36047 if (!window_list->violation && group != disp_cmp
36048 && !fits_dispatch_window (insn))
36049 window_list->violation = true;
36050
36051 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
36052 &num_imm64_operand);
36053
36054 /* Initialize window with new instruction. */
36055 window[num_insn].insn = insn;
36056 window[num_insn].byte_len = byte_len;
36057 window[num_insn].group = group;
36058 window[num_insn].path = path;
36059 window[num_insn].imm_bytes = imm_size;
36060
36061 window_list->window_size += byte_len;
36062 window_list->num_insn = num_insn + 1;
36063 window_list->num_uops = window_list->num_uops + num_uops;
36064 window_list->imm_size += imm_size;
36065 window_list->num_imm += num_imm_operand;
36066 window_list->num_imm_32 += num_imm32_operand;
36067 window_list->num_imm_64 += num_imm64_operand;
36068
36069 if (group == disp_store)
36070 window_list->num_stores += 1;
36071 else if (group == disp_load
36072 || group == disp_prefetch)
36073 window_list->num_loads += 1;
36074 else if (group == disp_load_store)
36075 {
36076 window_list->num_stores += 1;
36077 window_list->num_loads += 1;
36078 }
36079 }
36080
36081 /* Adds a scheduled instruction, INSN, to the current dispatch window.
36082 If the total bytes of instructions or the number of instructions in
36083 the window exceed allowable, it allocates a new window. */
36084
36085 static void
36086 add_to_dispatch_window (rtx insn)
36087 {
36088 int byte_len;
36089 dispatch_windows *window_list;
36090 dispatch_windows *next_list;
36091 dispatch_windows *window0_list;
36092 enum insn_path path;
36093 enum dispatch_group insn_group;
36094 bool insn_fits;
36095 int num_insn;
36096 int num_uops;
36097 int window_num;
36098 int insn_num_uops;
36099 int sum;
36100
36101 if (INSN_CODE (insn) < 0)
36102 return;
36103
36104 byte_len = min_insn_size (insn);
36105 window_list = dispatch_window_list;
36106 next_list = window_list->next;
36107 path = get_insn_path (insn);
36108 insn_group = get_insn_group (insn);
36109
36110 /* Get the last dispatch window. */
36111 if (next_list)
36112 window_list = dispatch_window_list->next;
36113
36114 if (path == path_single)
36115 insn_num_uops = 1;
36116 else if (path == path_double)
36117 insn_num_uops = 2;
36118 else
36119 insn_num_uops = (int) path;
36120
36121 /* If current window is full, get a new window.
36122 Window number zero is full, if MAX_INSN uops are scheduled in it.
36123 Window number one is full, if window zero's bytes plus window
36124 one's bytes is 32, or if the bytes of the new instruction added
36125 to the total makes it greater than 48, or it has already MAX_INSN
36126 instructions in it. */
36127 num_insn = window_list->num_insn;
36128 num_uops = window_list->num_uops;
36129 window_num = window_list->window_num;
36130 insn_fits = fits_dispatch_window (insn);
36131
36132 if (num_insn >= MAX_INSN
36133 || num_uops + insn_num_uops > MAX_INSN
36134 || !(insn_fits))
36135 {
36136 window_num = ~window_num & 1;
36137 window_list = allocate_next_window (window_num);
36138 }
36139
36140 if (window_num == 0)
36141 {
36142 add_insn_window (insn, window_list, insn_num_uops);
36143 if (window_list->num_insn >= MAX_INSN
36144 && insn_group == disp_branch)
36145 {
36146 process_end_window ();
36147 return;
36148 }
36149 }
36150 else if (window_num == 1)
36151 {
36152 window0_list = window_list->prev;
36153 sum = window0_list->window_size + window_list->window_size;
36154 if (sum == 32
36155 || (byte_len + sum) >= 48)
36156 {
36157 process_end_window ();
36158 window_list = dispatch_window_list;
36159 }
36160
36161 add_insn_window (insn, window_list, insn_num_uops);
36162 }
36163 else
36164 gcc_unreachable ();
36165
36166 if (is_end_basic_block (insn_group))
36167 {
36168 /* End of basic block is reached do end-basic-block process. */
36169 process_end_window ();
36170 return;
36171 }
36172 }
36173
36174 /* Print the dispatch window, WINDOW_NUM, to FILE. */
36175
36176 DEBUG_FUNCTION static void
36177 debug_dispatch_window_file (FILE *file, int window_num)
36178 {
36179 dispatch_windows *list;
36180 int i;
36181
36182 if (window_num == 0)
36183 list = dispatch_window_list;
36184 else
36185 list = dispatch_window_list1;
36186
36187 fprintf (file, "Window #%d:\n", list->window_num);
36188 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
36189 list->num_insn, list->num_uops, list->window_size);
36190 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
36191 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
36192
36193 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
36194 list->num_stores);
36195 fprintf (file, " insn info:\n");
36196
36197 for (i = 0; i < MAX_INSN; i++)
36198 {
36199 if (!list->window[i].insn)
36200 break;
36201 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
36202 i, group_name[list->window[i].group],
36203 i, (void *)list->window[i].insn,
36204 i, list->window[i].path,
36205 i, list->window[i].byte_len,
36206 i, list->window[i].imm_bytes);
36207 }
36208 }
36209
36210 /* Print to stdout a dispatch window. */
36211
36212 DEBUG_FUNCTION void
36213 debug_dispatch_window (int window_num)
36214 {
36215 debug_dispatch_window_file (stdout, window_num);
36216 }
36217
36218 /* Print INSN dispatch information to FILE. */
36219
36220 DEBUG_FUNCTION static void
36221 debug_insn_dispatch_info_file (FILE *file, rtx insn)
36222 {
36223 int byte_len;
36224 enum insn_path path;
36225 enum dispatch_group group;
36226 int imm_size;
36227 int num_imm_operand;
36228 int num_imm32_operand;
36229 int num_imm64_operand;
36230
36231 if (INSN_CODE (insn) < 0)
36232 return;
36233
36234 byte_len = min_insn_size (insn);
36235 path = get_insn_path (insn);
36236 group = get_insn_group (insn);
36237 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
36238 &num_imm64_operand);
36239
36240 fprintf (file, " insn info:\n");
36241 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
36242 group_name[group], path, byte_len);
36243 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
36244 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
36245 }
36246
36247 /* Print to STDERR the status of the ready list with respect to
36248 dispatch windows. */
36249
36250 DEBUG_FUNCTION void
36251 debug_ready_dispatch (void)
36252 {
36253 int i;
36254 int no_ready = number_in_ready ();
36255
36256 fprintf (stdout, "Number of ready: %d\n", no_ready);
36257
36258 for (i = 0; i < no_ready; i++)
36259 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
36260 }
36261
36262 /* This routine is the driver of the dispatch scheduler. */
36263
36264 static void
36265 do_dispatch (rtx insn, int mode)
36266 {
36267 if (mode == DISPATCH_INIT)
36268 init_dispatch_sched ();
36269 else if (mode == ADD_TO_DISPATCH_WINDOW)
36270 add_to_dispatch_window (insn);
36271 }
36272
36273 /* Return TRUE if Dispatch Scheduling is supported. */
36274
36275 static bool
36276 has_dispatch (rtx insn, int action)
36277 {
36278 if ((ix86_tune == PROCESSOR_BDVER1 || ix86_tune == PROCESSOR_BDVER2)
36279 && flag_dispatch_scheduler)
36280 switch (action)
36281 {
36282 default:
36283 return false;
36284
36285 case IS_DISPATCH_ON:
36286 return true;
36287 break;
36288
36289 case IS_CMP:
36290 return is_cmp (insn);
36291
36292 case DISPATCH_VIOLATION:
36293 return dispatch_violation ();
36294
36295 case FITS_DISPATCH_WINDOW:
36296 return fits_dispatch_window (insn);
36297 }
36298
36299 return false;
36300 }
36301
36302 /* Implementation of reassociation_width target hook used by
36303 reassoc phase to identify parallelism level in reassociated
36304 tree. Statements tree_code is passed in OPC. Arguments type
36305 is passed in MODE.
36306
36307 Currently parallel reassociation is enabled for Atom
36308 processors only and we set reassociation width to be 2
36309 because Atom may issue up to 2 instructions per cycle.
36310
36311 Return value should be fixed if parallel reassociation is
36312 enabled for other processors. */
36313
36314 static int
36315 ix86_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
36316 enum machine_mode mode)
36317 {
36318 int res = 1;
36319
36320 if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL)
36321 res = 2;
36322 else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL)
36323 res = 2;
36324
36325 return res;
36326 }
36327
36328 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
36329 place emms and femms instructions. */
36330
36331 static enum machine_mode
36332 ix86_preferred_simd_mode (enum machine_mode mode)
36333 {
36334 if (!TARGET_SSE)
36335 return word_mode;
36336
36337 switch (mode)
36338 {
36339 case QImode:
36340 return TARGET_AVX2 ? V32QImode : V16QImode;
36341 case HImode:
36342 return TARGET_AVX2 ? V16HImode : V8HImode;
36343 case SImode:
36344 return TARGET_AVX2 ? V8SImode : V4SImode;
36345 case DImode:
36346 return TARGET_AVX2 ? V4DImode : V2DImode;
36347
36348 case SFmode:
36349 if (TARGET_AVX && !TARGET_PREFER_AVX128)
36350 return V8SFmode;
36351 else
36352 return V4SFmode;
36353
36354 case DFmode:
36355 if (!TARGET_VECTORIZE_DOUBLE)
36356 return word_mode;
36357 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
36358 return V4DFmode;
36359 else if (TARGET_SSE2)
36360 return V2DFmode;
36361 /* FALLTHRU */
36362
36363 default:
36364 return word_mode;
36365 }
36366 }
36367
36368 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
36369 vectors. */
36370
36371 static unsigned int
36372 ix86_autovectorize_vector_sizes (void)
36373 {
36374 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
36375 }
36376
36377 /* Initialize the GCC target structure. */
36378 #undef TARGET_RETURN_IN_MEMORY
36379 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
36380
36381 #undef TARGET_LEGITIMIZE_ADDRESS
36382 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
36383
36384 #undef TARGET_ATTRIBUTE_TABLE
36385 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
36386 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
36387 # undef TARGET_MERGE_DECL_ATTRIBUTES
36388 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
36389 #endif
36390
36391 #undef TARGET_COMP_TYPE_ATTRIBUTES
36392 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
36393
36394 #undef TARGET_INIT_BUILTINS
36395 #define TARGET_INIT_BUILTINS ix86_init_builtins
36396 #undef TARGET_BUILTIN_DECL
36397 #define TARGET_BUILTIN_DECL ix86_builtin_decl
36398 #undef TARGET_EXPAND_BUILTIN
36399 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
36400
36401 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
36402 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
36403 ix86_builtin_vectorized_function
36404
36405 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
36406 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
36407
36408 #undef TARGET_BUILTIN_RECIPROCAL
36409 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
36410
36411 #undef TARGET_ASM_FUNCTION_EPILOGUE
36412 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
36413
36414 #undef TARGET_ENCODE_SECTION_INFO
36415 #ifndef SUBTARGET_ENCODE_SECTION_INFO
36416 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
36417 #else
36418 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
36419 #endif
36420
36421 #undef TARGET_ASM_OPEN_PAREN
36422 #define TARGET_ASM_OPEN_PAREN ""
36423 #undef TARGET_ASM_CLOSE_PAREN
36424 #define TARGET_ASM_CLOSE_PAREN ""
36425
36426 #undef TARGET_ASM_BYTE_OP
36427 #define TARGET_ASM_BYTE_OP ASM_BYTE
36428
36429 #undef TARGET_ASM_ALIGNED_HI_OP
36430 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
36431 #undef TARGET_ASM_ALIGNED_SI_OP
36432 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
36433 #ifdef ASM_QUAD
36434 #undef TARGET_ASM_ALIGNED_DI_OP
36435 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
36436 #endif
36437
36438 #undef TARGET_PROFILE_BEFORE_PROLOGUE
36439 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
36440
36441 #undef TARGET_ASM_UNALIGNED_HI_OP
36442 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
36443 #undef TARGET_ASM_UNALIGNED_SI_OP
36444 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
36445 #undef TARGET_ASM_UNALIGNED_DI_OP
36446 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
36447
36448 #undef TARGET_PRINT_OPERAND
36449 #define TARGET_PRINT_OPERAND ix86_print_operand
36450 #undef TARGET_PRINT_OPERAND_ADDRESS
36451 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
36452 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
36453 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
36454 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
36455 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
36456
36457 #undef TARGET_SCHED_INIT_GLOBAL
36458 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
36459 #undef TARGET_SCHED_ADJUST_COST
36460 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
36461 #undef TARGET_SCHED_ISSUE_RATE
36462 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
36463 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
36464 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
36465 ia32_multipass_dfa_lookahead
36466
36467 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
36468 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
36469
36470 #ifdef HAVE_AS_TLS
36471 #undef TARGET_HAVE_TLS
36472 #define TARGET_HAVE_TLS true
36473 #endif
36474 #undef TARGET_CANNOT_FORCE_CONST_MEM
36475 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
36476 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
36477 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
36478
36479 #undef TARGET_DELEGITIMIZE_ADDRESS
36480 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
36481
36482 #undef TARGET_MS_BITFIELD_LAYOUT_P
36483 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
36484
36485 #if TARGET_MACHO
36486 #undef TARGET_BINDS_LOCAL_P
36487 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
36488 #endif
36489 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
36490 #undef TARGET_BINDS_LOCAL_P
36491 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
36492 #endif
36493
36494 #undef TARGET_ASM_OUTPUT_MI_THUNK
36495 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
36496 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
36497 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
36498
36499 #undef TARGET_ASM_FILE_START
36500 #define TARGET_ASM_FILE_START x86_file_start
36501
36502 #undef TARGET_OPTION_OVERRIDE
36503 #define TARGET_OPTION_OVERRIDE ix86_option_override
36504
36505 #undef TARGET_REGISTER_MOVE_COST
36506 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
36507 #undef TARGET_MEMORY_MOVE_COST
36508 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
36509 #undef TARGET_RTX_COSTS
36510 #define TARGET_RTX_COSTS ix86_rtx_costs
36511 #undef TARGET_ADDRESS_COST
36512 #define TARGET_ADDRESS_COST ix86_address_cost
36513
36514 #undef TARGET_FIXED_CONDITION_CODE_REGS
36515 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
36516 #undef TARGET_CC_MODES_COMPATIBLE
36517 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
36518
36519 #undef TARGET_MACHINE_DEPENDENT_REORG
36520 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
36521
36522 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
36523 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
36524
36525 #undef TARGET_BUILD_BUILTIN_VA_LIST
36526 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
36527
36528 #undef TARGET_ENUM_VA_LIST_P
36529 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
36530
36531 #undef TARGET_FN_ABI_VA_LIST
36532 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
36533
36534 #undef TARGET_CANONICAL_VA_LIST_TYPE
36535 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
36536
36537 #undef TARGET_EXPAND_BUILTIN_VA_START
36538 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
36539
36540 #undef TARGET_MD_ASM_CLOBBERS
36541 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
36542
36543 #undef TARGET_PROMOTE_PROTOTYPES
36544 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
36545 #undef TARGET_STRUCT_VALUE_RTX
36546 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
36547 #undef TARGET_SETUP_INCOMING_VARARGS
36548 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
36549 #undef TARGET_MUST_PASS_IN_STACK
36550 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
36551 #undef TARGET_FUNCTION_ARG_ADVANCE
36552 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
36553 #undef TARGET_FUNCTION_ARG
36554 #define TARGET_FUNCTION_ARG ix86_function_arg
36555 #undef TARGET_FUNCTION_ARG_BOUNDARY
36556 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
36557 #undef TARGET_PASS_BY_REFERENCE
36558 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
36559 #undef TARGET_INTERNAL_ARG_POINTER
36560 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
36561 #undef TARGET_UPDATE_STACK_BOUNDARY
36562 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
36563 #undef TARGET_GET_DRAP_RTX
36564 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
36565 #undef TARGET_STRICT_ARGUMENT_NAMING
36566 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
36567 #undef TARGET_STATIC_CHAIN
36568 #define TARGET_STATIC_CHAIN ix86_static_chain
36569 #undef TARGET_TRAMPOLINE_INIT
36570 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
36571 #undef TARGET_RETURN_POPS_ARGS
36572 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
36573
36574 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
36575 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
36576
36577 #undef TARGET_SCALAR_MODE_SUPPORTED_P
36578 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
36579
36580 #undef TARGET_VECTOR_MODE_SUPPORTED_P
36581 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
36582
36583 #undef TARGET_C_MODE_FOR_SUFFIX
36584 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
36585
36586 #ifdef HAVE_AS_TLS
36587 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
36588 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
36589 #endif
36590
36591 #ifdef SUBTARGET_INSERT_ATTRIBUTES
36592 #undef TARGET_INSERT_ATTRIBUTES
36593 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
36594 #endif
36595
36596 #undef TARGET_MANGLE_TYPE
36597 #define TARGET_MANGLE_TYPE ix86_mangle_type
36598
36599 #ifndef TARGET_MACHO
36600 #undef TARGET_STACK_PROTECT_FAIL
36601 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
36602 #endif
36603
36604 #undef TARGET_FUNCTION_VALUE
36605 #define TARGET_FUNCTION_VALUE ix86_function_value
36606
36607 #undef TARGET_FUNCTION_VALUE_REGNO_P
36608 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
36609
36610 #undef TARGET_PROMOTE_FUNCTION_MODE
36611 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
36612
36613 #undef TARGET_SECONDARY_RELOAD
36614 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
36615
36616 #undef TARGET_CLASS_MAX_NREGS
36617 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
36618
36619 #undef TARGET_PREFERRED_RELOAD_CLASS
36620 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
36621 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
36622 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
36623 #undef TARGET_CLASS_LIKELY_SPILLED_P
36624 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
36625
36626 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
36627 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
36628 ix86_builtin_vectorization_cost
36629 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
36630 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM \
36631 ix86_vectorize_builtin_vec_perm
36632 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK
36633 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK \
36634 ix86_vectorize_builtin_vec_perm_ok
36635 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
36636 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
36637 ix86_preferred_simd_mode
36638 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
36639 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
36640 ix86_autovectorize_vector_sizes
36641
36642 #undef TARGET_SET_CURRENT_FUNCTION
36643 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
36644
36645 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
36646 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
36647
36648 #undef TARGET_OPTION_SAVE
36649 #define TARGET_OPTION_SAVE ix86_function_specific_save
36650
36651 #undef TARGET_OPTION_RESTORE
36652 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
36653
36654 #undef TARGET_OPTION_PRINT
36655 #define TARGET_OPTION_PRINT ix86_function_specific_print
36656
36657 #undef TARGET_CAN_INLINE_P
36658 #define TARGET_CAN_INLINE_P ix86_can_inline_p
36659
36660 #undef TARGET_EXPAND_TO_RTL_HOOK
36661 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
36662
36663 #undef TARGET_LEGITIMATE_ADDRESS_P
36664 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
36665
36666 #undef TARGET_LEGITIMATE_CONSTANT_P
36667 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
36668
36669 #undef TARGET_FRAME_POINTER_REQUIRED
36670 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
36671
36672 #undef TARGET_CAN_ELIMINATE
36673 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
36674
36675 #undef TARGET_EXTRA_LIVE_ON_ENTRY
36676 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
36677
36678 #undef TARGET_ASM_CODE_END
36679 #define TARGET_ASM_CODE_END ix86_code_end
36680
36681 #undef TARGET_CONDITIONAL_REGISTER_USAGE
36682 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
36683
36684 #if TARGET_MACHO
36685 #undef TARGET_INIT_LIBFUNCS
36686 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
36687 #endif
36688
36689 struct gcc_target targetm = TARGET_INITIALIZER;
36690 \f
36691 #include "gt-i386.h"