eae3ff21607bf68feeafbc4366726b3737f89989
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
3 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
4 Free Software Foundation, Inc.
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
12
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
33 #include "output.h"
34 #include "insn-codes.h"
35 #include "insn-attr.h"
36 #include "flags.h"
37 #include "except.h"
38 #include "function.h"
39 #include "recog.h"
40 #include "expr.h"
41 #include "optabs.h"
42 #include "diagnostic-core.h"
43 #include "toplev.h"
44 #include "basic-block.h"
45 #include "ggc.h"
46 #include "target.h"
47 #include "target-def.h"
48 #include "common/common-target.h"
49 #include "langhooks.h"
50 #include "cgraph.h"
51 #include "gimple.h"
52 #include "dwarf2.h"
53 #include "df.h"
54 #include "tm-constrs.h"
55 #include "params.h"
56 #include "cselib.h"
57 #include "debug.h"
58 #include "sched-int.h"
59 #include "sbitmap.h"
60 #include "fibheap.h"
61 #include "opts.h"
62 #include "diagnostic.h"
63
64 enum upper_128bits_state
65 {
66 unknown = 0,
67 unused,
68 used
69 };
70
71 typedef struct block_info_def
72 {
73 /* State of the upper 128bits of AVX registers at exit. */
74 enum upper_128bits_state state;
75 /* TRUE if state of the upper 128bits of AVX registers is unchanged
76 in this block. */
77 bool unchanged;
78 /* TRUE if block has been processed. */
79 bool processed;
80 /* TRUE if block has been scanned. */
81 bool scanned;
82 /* Previous state of the upper 128bits of AVX registers at entry. */
83 enum upper_128bits_state prev;
84 } *block_info;
85
86 #define BLOCK_INFO(B) ((block_info) (B)->aux)
87
88 enum call_avx256_state
89 {
90 /* Callee returns 256bit AVX register. */
91 callee_return_avx256 = -1,
92 /* Callee returns and passes 256bit AVX register. */
93 callee_return_pass_avx256,
94 /* Callee passes 256bit AVX register. */
95 callee_pass_avx256,
96 /* Callee doesn't return nor passe 256bit AVX register, or no
97 256bit AVX register in function return. */
98 call_no_avx256,
99 /* vzeroupper intrinsic. */
100 vzeroupper_intrinsic
101 };
102
103 /* Check if a 256bit AVX register is referenced in stores. */
104
105 static void
106 check_avx256_stores (rtx dest, const_rtx set, void *data)
107 {
108 if ((REG_P (dest)
109 && VALID_AVX256_REG_MODE (GET_MODE (dest)))
110 || (GET_CODE (set) == SET
111 && REG_P (SET_SRC (set))
112 && VALID_AVX256_REG_MODE (GET_MODE (SET_SRC (set)))))
113 {
114 enum upper_128bits_state *state
115 = (enum upper_128bits_state *) data;
116 *state = used;
117 }
118 }
119
120 /* Helper function for move_or_delete_vzeroupper_1. Look for vzeroupper
121 in basic block BB. Delete it if upper 128bit AVX registers are
122 unused. If it isn't deleted, move it to just before a jump insn.
123
124 STATE is state of the upper 128bits of AVX registers at entry. */
125
126 static void
127 move_or_delete_vzeroupper_2 (basic_block bb,
128 enum upper_128bits_state state)
129 {
130 rtx insn, bb_end;
131 rtx vzeroupper_insn = NULL_RTX;
132 rtx pat;
133 int avx256;
134 bool unchanged;
135
136 if (BLOCK_INFO (bb)->unchanged)
137 {
138 if (dump_file)
139 fprintf (dump_file, " [bb %i] unchanged: upper 128bits: %d\n",
140 bb->index, state);
141
142 BLOCK_INFO (bb)->state = state;
143 return;
144 }
145
146 if (BLOCK_INFO (bb)->scanned && BLOCK_INFO (bb)->prev == state)
147 {
148 if (dump_file)
149 fprintf (dump_file, " [bb %i] scanned: upper 128bits: %d\n",
150 bb->index, BLOCK_INFO (bb)->state);
151 return;
152 }
153
154 BLOCK_INFO (bb)->prev = state;
155
156 if (dump_file)
157 fprintf (dump_file, " [bb %i] entry: upper 128bits: %d\n",
158 bb->index, state);
159
160 unchanged = true;
161
162 /* BB_END changes when it is deleted. */
163 bb_end = BB_END (bb);
164 insn = BB_HEAD (bb);
165 while (insn != bb_end)
166 {
167 insn = NEXT_INSN (insn);
168
169 if (!NONDEBUG_INSN_P (insn))
170 continue;
171
172 /* Move vzeroupper before jump/call. */
173 if (JUMP_P (insn) || CALL_P (insn))
174 {
175 if (!vzeroupper_insn)
176 continue;
177
178 if (PREV_INSN (insn) != vzeroupper_insn)
179 {
180 if (dump_file)
181 {
182 fprintf (dump_file, "Move vzeroupper after:\n");
183 print_rtl_single (dump_file, PREV_INSN (insn));
184 fprintf (dump_file, "before:\n");
185 print_rtl_single (dump_file, insn);
186 }
187 reorder_insns_nobb (vzeroupper_insn, vzeroupper_insn,
188 PREV_INSN (insn));
189 }
190 vzeroupper_insn = NULL_RTX;
191 continue;
192 }
193
194 pat = PATTERN (insn);
195
196 /* Check insn for vzeroupper intrinsic. */
197 if (GET_CODE (pat) == UNSPEC_VOLATILE
198 && XINT (pat, 1) == UNSPECV_VZEROUPPER)
199 {
200 if (dump_file)
201 {
202 /* Found vzeroupper intrinsic. */
203 fprintf (dump_file, "Found vzeroupper:\n");
204 print_rtl_single (dump_file, insn);
205 }
206 }
207 else
208 {
209 /* Check insn for vzeroall intrinsic. */
210 if (GET_CODE (pat) == PARALLEL
211 && GET_CODE (XVECEXP (pat, 0, 0)) == UNSPEC_VOLATILE
212 && XINT (XVECEXP (pat, 0, 0), 1) == UNSPECV_VZEROALL)
213 {
214 state = unused;
215 unchanged = false;
216
217 /* Delete pending vzeroupper insertion. */
218 if (vzeroupper_insn)
219 {
220 delete_insn (vzeroupper_insn);
221 vzeroupper_insn = NULL_RTX;
222 }
223 }
224 else if (state != used)
225 {
226 note_stores (pat, check_avx256_stores, &state);
227 if (state == used)
228 unchanged = false;
229 }
230 continue;
231 }
232
233 /* Process vzeroupper intrinsic. */
234 avx256 = INTVAL (XVECEXP (pat, 0, 0));
235
236 if (state == unused)
237 {
238 /* Since the upper 128bits are cleared, callee must not pass
239 256bit AVX register. We only need to check if callee
240 returns 256bit AVX register. */
241 if (avx256 == callee_return_avx256)
242 {
243 state = used;
244 unchanged = false;
245 }
246
247 /* Remove unnecessary vzeroupper since upper 128bits are
248 cleared. */
249 if (dump_file)
250 {
251 fprintf (dump_file, "Delete redundant vzeroupper:\n");
252 print_rtl_single (dump_file, insn);
253 }
254 delete_insn (insn);
255 }
256 else
257 {
258 /* Set state to UNUSED if callee doesn't return 256bit AVX
259 register. */
260 if (avx256 != callee_return_pass_avx256)
261 state = unused;
262
263 if (avx256 == callee_return_pass_avx256
264 || avx256 == callee_pass_avx256)
265 {
266 /* Must remove vzeroupper since callee passes in 256bit
267 AVX register. */
268 if (dump_file)
269 {
270 fprintf (dump_file, "Delete callee pass vzeroupper:\n");
271 print_rtl_single (dump_file, insn);
272 }
273 delete_insn (insn);
274 }
275 else
276 {
277 vzeroupper_insn = insn;
278 unchanged = false;
279 }
280 }
281 }
282
283 BLOCK_INFO (bb)->state = state;
284 BLOCK_INFO (bb)->unchanged = unchanged;
285 BLOCK_INFO (bb)->scanned = true;
286
287 if (dump_file)
288 fprintf (dump_file, " [bb %i] exit: %s: upper 128bits: %d\n",
289 bb->index, unchanged ? "unchanged" : "changed",
290 state);
291 }
292
293 /* Helper function for move_or_delete_vzeroupper. Process vzeroupper
294 in BLOCK and check its predecessor blocks. Treat UNKNOWN state
295 as USED if UNKNOWN_IS_UNUSED is true. Return TRUE if the exit
296 state is changed. */
297
298 static bool
299 move_or_delete_vzeroupper_1 (basic_block block, bool unknown_is_unused)
300 {
301 edge e;
302 edge_iterator ei;
303 enum upper_128bits_state state, old_state, new_state;
304 bool seen_unknown;
305
306 if (dump_file)
307 fprintf (dump_file, " Process [bb %i]: status: %d\n",
308 block->index, BLOCK_INFO (block)->processed);
309
310 if (BLOCK_INFO (block)->processed)
311 return false;
312
313 state = unused;
314
315 /* Check all predecessor edges of this block. */
316 seen_unknown = false;
317 FOR_EACH_EDGE (e, ei, block->preds)
318 {
319 if (e->src == block)
320 continue;
321 switch (BLOCK_INFO (e->src)->state)
322 {
323 case unknown:
324 if (!unknown_is_unused)
325 seen_unknown = true;
326 case unused:
327 break;
328 case used:
329 state = used;
330 goto done;
331 }
332 }
333
334 if (seen_unknown)
335 state = unknown;
336
337 done:
338 old_state = BLOCK_INFO (block)->state;
339 move_or_delete_vzeroupper_2 (block, state);
340 new_state = BLOCK_INFO (block)->state;
341
342 if (state != unknown || new_state == used)
343 BLOCK_INFO (block)->processed = true;
344
345 /* Need to rescan if the upper 128bits of AVX registers are changed
346 to USED at exit. */
347 if (new_state != old_state)
348 {
349 if (new_state == used)
350 cfun->machine->rescan_vzeroupper_p = 1;
351 return true;
352 }
353 else
354 return false;
355 }
356
357 /* Go through the instruction stream looking for vzeroupper. Delete
358 it if upper 128bit AVX registers are unused. If it isn't deleted,
359 move it to just before a jump insn. */
360
361 static void
362 move_or_delete_vzeroupper (void)
363 {
364 edge e;
365 edge_iterator ei;
366 basic_block bb;
367 fibheap_t worklist, pending, fibheap_swap;
368 sbitmap visited, in_worklist, in_pending, sbitmap_swap;
369 int *bb_order;
370 int *rc_order;
371 int i;
372
373 /* Set up block info for each basic block. */
374 alloc_aux_for_blocks (sizeof (struct block_info_def));
375
376 /* Process outgoing edges of entry point. */
377 if (dump_file)
378 fprintf (dump_file, "Process outgoing edges of entry point\n");
379
380 FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR->succs)
381 {
382 move_or_delete_vzeroupper_2 (e->dest,
383 cfun->machine->caller_pass_avx256_p
384 ? used : unused);
385 BLOCK_INFO (e->dest)->processed = true;
386 }
387
388 /* Compute reverse completion order of depth first search of the CFG
389 so that the data-flow runs faster. */
390 rc_order = XNEWVEC (int, n_basic_blocks - NUM_FIXED_BLOCKS);
391 bb_order = XNEWVEC (int, last_basic_block);
392 pre_and_rev_post_order_compute (NULL, rc_order, false);
393 for (i = 0; i < n_basic_blocks - NUM_FIXED_BLOCKS; i++)
394 bb_order[rc_order[i]] = i;
395 free (rc_order);
396
397 worklist = fibheap_new ();
398 pending = fibheap_new ();
399 visited = sbitmap_alloc (last_basic_block);
400 in_worklist = sbitmap_alloc (last_basic_block);
401 in_pending = sbitmap_alloc (last_basic_block);
402 sbitmap_zero (in_worklist);
403
404 /* Don't check outgoing edges of entry point. */
405 sbitmap_ones (in_pending);
406 FOR_EACH_BB (bb)
407 if (BLOCK_INFO (bb)->processed)
408 RESET_BIT (in_pending, bb->index);
409 else
410 {
411 move_or_delete_vzeroupper_1 (bb, false);
412 fibheap_insert (pending, bb_order[bb->index], bb);
413 }
414
415 if (dump_file)
416 fprintf (dump_file, "Check remaining basic blocks\n");
417
418 while (!fibheap_empty (pending))
419 {
420 fibheap_swap = pending;
421 pending = worklist;
422 worklist = fibheap_swap;
423 sbitmap_swap = in_pending;
424 in_pending = in_worklist;
425 in_worklist = sbitmap_swap;
426
427 sbitmap_zero (visited);
428
429 cfun->machine->rescan_vzeroupper_p = 0;
430
431 while (!fibheap_empty (worklist))
432 {
433 bb = (basic_block) fibheap_extract_min (worklist);
434 RESET_BIT (in_worklist, bb->index);
435 gcc_assert (!TEST_BIT (visited, bb->index));
436 if (!TEST_BIT (visited, bb->index))
437 {
438 edge_iterator ei;
439
440 SET_BIT (visited, bb->index);
441
442 if (move_or_delete_vzeroupper_1 (bb, false))
443 FOR_EACH_EDGE (e, ei, bb->succs)
444 {
445 if (e->dest == EXIT_BLOCK_PTR
446 || BLOCK_INFO (e->dest)->processed)
447 continue;
448
449 if (TEST_BIT (visited, e->dest->index))
450 {
451 if (!TEST_BIT (in_pending, e->dest->index))
452 {
453 /* Send E->DEST to next round. */
454 SET_BIT (in_pending, e->dest->index);
455 fibheap_insert (pending,
456 bb_order[e->dest->index],
457 e->dest);
458 }
459 }
460 else if (!TEST_BIT (in_worklist, e->dest->index))
461 {
462 /* Add E->DEST to current round. */
463 SET_BIT (in_worklist, e->dest->index);
464 fibheap_insert (worklist, bb_order[e->dest->index],
465 e->dest);
466 }
467 }
468 }
469 }
470
471 if (!cfun->machine->rescan_vzeroupper_p)
472 break;
473 }
474
475 free (bb_order);
476 fibheap_delete (worklist);
477 fibheap_delete (pending);
478 sbitmap_free (visited);
479 sbitmap_free (in_worklist);
480 sbitmap_free (in_pending);
481
482 if (dump_file)
483 fprintf (dump_file, "Process remaining basic blocks\n");
484
485 FOR_EACH_BB (bb)
486 move_or_delete_vzeroupper_1 (bb, true);
487
488 free_aux_for_blocks ();
489 }
490
491 static rtx legitimize_dllimport_symbol (rtx, bool);
492
493 #ifndef CHECK_STACK_LIMIT
494 #define CHECK_STACK_LIMIT (-1)
495 #endif
496
497 /* Return index of given mode in mult and division cost tables. */
498 #define MODE_INDEX(mode) \
499 ((mode) == QImode ? 0 \
500 : (mode) == HImode ? 1 \
501 : (mode) == SImode ? 2 \
502 : (mode) == DImode ? 3 \
503 : 4)
504
505 /* Processor costs (relative to an add) */
506 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
507 #define COSTS_N_BYTES(N) ((N) * 2)
508
509 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
510
511 const
512 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
513 COSTS_N_BYTES (2), /* cost of an add instruction */
514 COSTS_N_BYTES (3), /* cost of a lea instruction */
515 COSTS_N_BYTES (2), /* variable shift costs */
516 COSTS_N_BYTES (3), /* constant shift costs */
517 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
518 COSTS_N_BYTES (3), /* HI */
519 COSTS_N_BYTES (3), /* SI */
520 COSTS_N_BYTES (3), /* DI */
521 COSTS_N_BYTES (5)}, /* other */
522 0, /* cost of multiply per each bit set */
523 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
524 COSTS_N_BYTES (3), /* HI */
525 COSTS_N_BYTES (3), /* SI */
526 COSTS_N_BYTES (3), /* DI */
527 COSTS_N_BYTES (5)}, /* other */
528 COSTS_N_BYTES (3), /* cost of movsx */
529 COSTS_N_BYTES (3), /* cost of movzx */
530 0, /* "large" insn */
531 2, /* MOVE_RATIO */
532 2, /* cost for loading QImode using movzbl */
533 {2, 2, 2}, /* cost of loading integer registers
534 in QImode, HImode and SImode.
535 Relative to reg-reg move (2). */
536 {2, 2, 2}, /* cost of storing integer registers */
537 2, /* cost of reg,reg fld/fst */
538 {2, 2, 2}, /* cost of loading fp registers
539 in SFmode, DFmode and XFmode */
540 {2, 2, 2}, /* cost of storing fp registers
541 in SFmode, DFmode and XFmode */
542 3, /* cost of moving MMX register */
543 {3, 3}, /* cost of loading MMX registers
544 in SImode and DImode */
545 {3, 3}, /* cost of storing MMX registers
546 in SImode and DImode */
547 3, /* cost of moving SSE register */
548 {3, 3, 3}, /* cost of loading SSE registers
549 in SImode, DImode and TImode */
550 {3, 3, 3}, /* cost of storing SSE registers
551 in SImode, DImode and TImode */
552 3, /* MMX or SSE register to integer */
553 0, /* size of l1 cache */
554 0, /* size of l2 cache */
555 0, /* size of prefetch block */
556 0, /* number of parallel prefetches */
557 2, /* Branch cost */
558 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
559 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
560 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
561 COSTS_N_BYTES (2), /* cost of FABS instruction. */
562 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
563 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
564 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
565 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
566 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
567 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
568 1, /* scalar_stmt_cost. */
569 1, /* scalar load_cost. */
570 1, /* scalar_store_cost. */
571 1, /* vec_stmt_cost. */
572 1, /* vec_to_scalar_cost. */
573 1, /* scalar_to_vec_cost. */
574 1, /* vec_align_load_cost. */
575 1, /* vec_unalign_load_cost. */
576 1, /* vec_store_cost. */
577 1, /* cond_taken_branch_cost. */
578 1, /* cond_not_taken_branch_cost. */
579 };
580
581 /* Processor costs (relative to an add) */
582 static const
583 struct processor_costs i386_cost = { /* 386 specific costs */
584 COSTS_N_INSNS (1), /* cost of an add instruction */
585 COSTS_N_INSNS (1), /* cost of a lea instruction */
586 COSTS_N_INSNS (3), /* variable shift costs */
587 COSTS_N_INSNS (2), /* constant shift costs */
588 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
589 COSTS_N_INSNS (6), /* HI */
590 COSTS_N_INSNS (6), /* SI */
591 COSTS_N_INSNS (6), /* DI */
592 COSTS_N_INSNS (6)}, /* other */
593 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
594 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
595 COSTS_N_INSNS (23), /* HI */
596 COSTS_N_INSNS (23), /* SI */
597 COSTS_N_INSNS (23), /* DI */
598 COSTS_N_INSNS (23)}, /* other */
599 COSTS_N_INSNS (3), /* cost of movsx */
600 COSTS_N_INSNS (2), /* cost of movzx */
601 15, /* "large" insn */
602 3, /* MOVE_RATIO */
603 4, /* cost for loading QImode using movzbl */
604 {2, 4, 2}, /* cost of loading integer registers
605 in QImode, HImode and SImode.
606 Relative to reg-reg move (2). */
607 {2, 4, 2}, /* cost of storing integer registers */
608 2, /* cost of reg,reg fld/fst */
609 {8, 8, 8}, /* cost of loading fp registers
610 in SFmode, DFmode and XFmode */
611 {8, 8, 8}, /* cost of storing fp registers
612 in SFmode, DFmode and XFmode */
613 2, /* cost of moving MMX register */
614 {4, 8}, /* cost of loading MMX registers
615 in SImode and DImode */
616 {4, 8}, /* cost of storing MMX registers
617 in SImode and DImode */
618 2, /* cost of moving SSE register */
619 {4, 8, 16}, /* cost of loading SSE registers
620 in SImode, DImode and TImode */
621 {4, 8, 16}, /* cost of storing SSE registers
622 in SImode, DImode and TImode */
623 3, /* MMX or SSE register to integer */
624 0, /* size of l1 cache */
625 0, /* size of l2 cache */
626 0, /* size of prefetch block */
627 0, /* number of parallel prefetches */
628 1, /* Branch cost */
629 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
630 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
631 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
632 COSTS_N_INSNS (22), /* cost of FABS instruction. */
633 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
634 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
635 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
636 DUMMY_STRINGOP_ALGS},
637 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
638 DUMMY_STRINGOP_ALGS},
639 1, /* scalar_stmt_cost. */
640 1, /* scalar load_cost. */
641 1, /* scalar_store_cost. */
642 1, /* vec_stmt_cost. */
643 1, /* vec_to_scalar_cost. */
644 1, /* scalar_to_vec_cost. */
645 1, /* vec_align_load_cost. */
646 2, /* vec_unalign_load_cost. */
647 1, /* vec_store_cost. */
648 3, /* cond_taken_branch_cost. */
649 1, /* cond_not_taken_branch_cost. */
650 };
651
652 static const
653 struct processor_costs i486_cost = { /* 486 specific costs */
654 COSTS_N_INSNS (1), /* cost of an add instruction */
655 COSTS_N_INSNS (1), /* cost of a lea instruction */
656 COSTS_N_INSNS (3), /* variable shift costs */
657 COSTS_N_INSNS (2), /* constant shift costs */
658 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
659 COSTS_N_INSNS (12), /* HI */
660 COSTS_N_INSNS (12), /* SI */
661 COSTS_N_INSNS (12), /* DI */
662 COSTS_N_INSNS (12)}, /* other */
663 1, /* cost of multiply per each bit set */
664 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
665 COSTS_N_INSNS (40), /* HI */
666 COSTS_N_INSNS (40), /* SI */
667 COSTS_N_INSNS (40), /* DI */
668 COSTS_N_INSNS (40)}, /* other */
669 COSTS_N_INSNS (3), /* cost of movsx */
670 COSTS_N_INSNS (2), /* cost of movzx */
671 15, /* "large" insn */
672 3, /* MOVE_RATIO */
673 4, /* cost for loading QImode using movzbl */
674 {2, 4, 2}, /* cost of loading integer registers
675 in QImode, HImode and SImode.
676 Relative to reg-reg move (2). */
677 {2, 4, 2}, /* cost of storing integer registers */
678 2, /* cost of reg,reg fld/fst */
679 {8, 8, 8}, /* cost of loading fp registers
680 in SFmode, DFmode and XFmode */
681 {8, 8, 8}, /* cost of storing fp registers
682 in SFmode, DFmode and XFmode */
683 2, /* cost of moving MMX register */
684 {4, 8}, /* cost of loading MMX registers
685 in SImode and DImode */
686 {4, 8}, /* cost of storing MMX registers
687 in SImode and DImode */
688 2, /* cost of moving SSE register */
689 {4, 8, 16}, /* cost of loading SSE registers
690 in SImode, DImode and TImode */
691 {4, 8, 16}, /* cost of storing SSE registers
692 in SImode, DImode and TImode */
693 3, /* MMX or SSE register to integer */
694 4, /* size of l1 cache. 486 has 8kB cache
695 shared for code and data, so 4kB is
696 not really precise. */
697 4, /* size of l2 cache */
698 0, /* size of prefetch block */
699 0, /* number of parallel prefetches */
700 1, /* Branch cost */
701 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
702 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
703 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
704 COSTS_N_INSNS (3), /* cost of FABS instruction. */
705 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
706 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
707 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
708 DUMMY_STRINGOP_ALGS},
709 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
710 DUMMY_STRINGOP_ALGS},
711 1, /* scalar_stmt_cost. */
712 1, /* scalar load_cost. */
713 1, /* scalar_store_cost. */
714 1, /* vec_stmt_cost. */
715 1, /* vec_to_scalar_cost. */
716 1, /* scalar_to_vec_cost. */
717 1, /* vec_align_load_cost. */
718 2, /* vec_unalign_load_cost. */
719 1, /* vec_store_cost. */
720 3, /* cond_taken_branch_cost. */
721 1, /* cond_not_taken_branch_cost. */
722 };
723
724 static const
725 struct processor_costs pentium_cost = {
726 COSTS_N_INSNS (1), /* cost of an add instruction */
727 COSTS_N_INSNS (1), /* cost of a lea instruction */
728 COSTS_N_INSNS (4), /* variable shift costs */
729 COSTS_N_INSNS (1), /* constant shift costs */
730 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
731 COSTS_N_INSNS (11), /* HI */
732 COSTS_N_INSNS (11), /* SI */
733 COSTS_N_INSNS (11), /* DI */
734 COSTS_N_INSNS (11)}, /* other */
735 0, /* cost of multiply per each bit set */
736 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
737 COSTS_N_INSNS (25), /* HI */
738 COSTS_N_INSNS (25), /* SI */
739 COSTS_N_INSNS (25), /* DI */
740 COSTS_N_INSNS (25)}, /* other */
741 COSTS_N_INSNS (3), /* cost of movsx */
742 COSTS_N_INSNS (2), /* cost of movzx */
743 8, /* "large" insn */
744 6, /* MOVE_RATIO */
745 6, /* cost for loading QImode using movzbl */
746 {2, 4, 2}, /* cost of loading integer registers
747 in QImode, HImode and SImode.
748 Relative to reg-reg move (2). */
749 {2, 4, 2}, /* cost of storing integer registers */
750 2, /* cost of reg,reg fld/fst */
751 {2, 2, 6}, /* cost of loading fp registers
752 in SFmode, DFmode and XFmode */
753 {4, 4, 6}, /* cost of storing fp registers
754 in SFmode, DFmode and XFmode */
755 8, /* cost of moving MMX register */
756 {8, 8}, /* cost of loading MMX registers
757 in SImode and DImode */
758 {8, 8}, /* cost of storing MMX registers
759 in SImode and DImode */
760 2, /* cost of moving SSE register */
761 {4, 8, 16}, /* cost of loading SSE registers
762 in SImode, DImode and TImode */
763 {4, 8, 16}, /* cost of storing SSE registers
764 in SImode, DImode and TImode */
765 3, /* MMX or SSE register to integer */
766 8, /* size of l1 cache. */
767 8, /* size of l2 cache */
768 0, /* size of prefetch block */
769 0, /* number of parallel prefetches */
770 2, /* Branch cost */
771 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
772 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
773 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
774 COSTS_N_INSNS (1), /* cost of FABS instruction. */
775 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
776 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
777 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
778 DUMMY_STRINGOP_ALGS},
779 {{libcall, {{-1, rep_prefix_4_byte}}},
780 DUMMY_STRINGOP_ALGS},
781 1, /* scalar_stmt_cost. */
782 1, /* scalar load_cost. */
783 1, /* scalar_store_cost. */
784 1, /* vec_stmt_cost. */
785 1, /* vec_to_scalar_cost. */
786 1, /* scalar_to_vec_cost. */
787 1, /* vec_align_load_cost. */
788 2, /* vec_unalign_load_cost. */
789 1, /* vec_store_cost. */
790 3, /* cond_taken_branch_cost. */
791 1, /* cond_not_taken_branch_cost. */
792 };
793
794 static const
795 struct processor_costs pentiumpro_cost = {
796 COSTS_N_INSNS (1), /* cost of an add instruction */
797 COSTS_N_INSNS (1), /* cost of a lea instruction */
798 COSTS_N_INSNS (1), /* variable shift costs */
799 COSTS_N_INSNS (1), /* constant shift costs */
800 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
801 COSTS_N_INSNS (4), /* HI */
802 COSTS_N_INSNS (4), /* SI */
803 COSTS_N_INSNS (4), /* DI */
804 COSTS_N_INSNS (4)}, /* other */
805 0, /* cost of multiply per each bit set */
806 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
807 COSTS_N_INSNS (17), /* HI */
808 COSTS_N_INSNS (17), /* SI */
809 COSTS_N_INSNS (17), /* DI */
810 COSTS_N_INSNS (17)}, /* other */
811 COSTS_N_INSNS (1), /* cost of movsx */
812 COSTS_N_INSNS (1), /* cost of movzx */
813 8, /* "large" insn */
814 6, /* MOVE_RATIO */
815 2, /* cost for loading QImode using movzbl */
816 {4, 4, 4}, /* cost of loading integer registers
817 in QImode, HImode and SImode.
818 Relative to reg-reg move (2). */
819 {2, 2, 2}, /* cost of storing integer registers */
820 2, /* cost of reg,reg fld/fst */
821 {2, 2, 6}, /* cost of loading fp registers
822 in SFmode, DFmode and XFmode */
823 {4, 4, 6}, /* cost of storing fp registers
824 in SFmode, DFmode and XFmode */
825 2, /* cost of moving MMX register */
826 {2, 2}, /* cost of loading MMX registers
827 in SImode and DImode */
828 {2, 2}, /* cost of storing MMX registers
829 in SImode and DImode */
830 2, /* cost of moving SSE register */
831 {2, 2, 8}, /* cost of loading SSE registers
832 in SImode, DImode and TImode */
833 {2, 2, 8}, /* cost of storing SSE registers
834 in SImode, DImode and TImode */
835 3, /* MMX or SSE register to integer */
836 8, /* size of l1 cache. */
837 256, /* size of l2 cache */
838 32, /* size of prefetch block */
839 6, /* number of parallel prefetches */
840 2, /* Branch cost */
841 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
842 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
843 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
844 COSTS_N_INSNS (2), /* cost of FABS instruction. */
845 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
846 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
847 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
848 (we ensure the alignment). For small blocks inline loop is still a
849 noticeable win, for bigger blocks either rep movsl or rep movsb is
850 way to go. Rep movsb has apparently more expensive startup time in CPU,
851 but after 4K the difference is down in the noise. */
852 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
853 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
854 DUMMY_STRINGOP_ALGS},
855 {{rep_prefix_4_byte, {{1024, unrolled_loop},
856 {8192, rep_prefix_4_byte}, {-1, libcall}}},
857 DUMMY_STRINGOP_ALGS},
858 1, /* scalar_stmt_cost. */
859 1, /* scalar load_cost. */
860 1, /* scalar_store_cost. */
861 1, /* vec_stmt_cost. */
862 1, /* vec_to_scalar_cost. */
863 1, /* scalar_to_vec_cost. */
864 1, /* vec_align_load_cost. */
865 2, /* vec_unalign_load_cost. */
866 1, /* vec_store_cost. */
867 3, /* cond_taken_branch_cost. */
868 1, /* cond_not_taken_branch_cost. */
869 };
870
871 static const
872 struct processor_costs geode_cost = {
873 COSTS_N_INSNS (1), /* cost of an add instruction */
874 COSTS_N_INSNS (1), /* cost of a lea instruction */
875 COSTS_N_INSNS (2), /* variable shift costs */
876 COSTS_N_INSNS (1), /* constant shift costs */
877 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
878 COSTS_N_INSNS (4), /* HI */
879 COSTS_N_INSNS (7), /* SI */
880 COSTS_N_INSNS (7), /* DI */
881 COSTS_N_INSNS (7)}, /* other */
882 0, /* cost of multiply per each bit set */
883 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
884 COSTS_N_INSNS (23), /* HI */
885 COSTS_N_INSNS (39), /* SI */
886 COSTS_N_INSNS (39), /* DI */
887 COSTS_N_INSNS (39)}, /* other */
888 COSTS_N_INSNS (1), /* cost of movsx */
889 COSTS_N_INSNS (1), /* cost of movzx */
890 8, /* "large" insn */
891 4, /* MOVE_RATIO */
892 1, /* cost for loading QImode using movzbl */
893 {1, 1, 1}, /* cost of loading integer registers
894 in QImode, HImode and SImode.
895 Relative to reg-reg move (2). */
896 {1, 1, 1}, /* cost of storing integer registers */
897 1, /* cost of reg,reg fld/fst */
898 {1, 1, 1}, /* cost of loading fp registers
899 in SFmode, DFmode and XFmode */
900 {4, 6, 6}, /* cost of storing fp registers
901 in SFmode, DFmode and XFmode */
902
903 1, /* cost of moving MMX register */
904 {1, 1}, /* cost of loading MMX registers
905 in SImode and DImode */
906 {1, 1}, /* cost of storing MMX registers
907 in SImode and DImode */
908 1, /* cost of moving SSE register */
909 {1, 1, 1}, /* cost of loading SSE registers
910 in SImode, DImode and TImode */
911 {1, 1, 1}, /* cost of storing SSE registers
912 in SImode, DImode and TImode */
913 1, /* MMX or SSE register to integer */
914 64, /* size of l1 cache. */
915 128, /* size of l2 cache. */
916 32, /* size of prefetch block */
917 1, /* number of parallel prefetches */
918 1, /* Branch cost */
919 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
920 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
921 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
922 COSTS_N_INSNS (1), /* cost of FABS instruction. */
923 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
924 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
925 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
926 DUMMY_STRINGOP_ALGS},
927 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
928 DUMMY_STRINGOP_ALGS},
929 1, /* scalar_stmt_cost. */
930 1, /* scalar load_cost. */
931 1, /* scalar_store_cost. */
932 1, /* vec_stmt_cost. */
933 1, /* vec_to_scalar_cost. */
934 1, /* scalar_to_vec_cost. */
935 1, /* vec_align_load_cost. */
936 2, /* vec_unalign_load_cost. */
937 1, /* vec_store_cost. */
938 3, /* cond_taken_branch_cost. */
939 1, /* cond_not_taken_branch_cost. */
940 };
941
942 static const
943 struct processor_costs k6_cost = {
944 COSTS_N_INSNS (1), /* cost of an add instruction */
945 COSTS_N_INSNS (2), /* cost of a lea instruction */
946 COSTS_N_INSNS (1), /* variable shift costs */
947 COSTS_N_INSNS (1), /* constant shift costs */
948 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
949 COSTS_N_INSNS (3), /* HI */
950 COSTS_N_INSNS (3), /* SI */
951 COSTS_N_INSNS (3), /* DI */
952 COSTS_N_INSNS (3)}, /* other */
953 0, /* cost of multiply per each bit set */
954 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
955 COSTS_N_INSNS (18), /* HI */
956 COSTS_N_INSNS (18), /* SI */
957 COSTS_N_INSNS (18), /* DI */
958 COSTS_N_INSNS (18)}, /* other */
959 COSTS_N_INSNS (2), /* cost of movsx */
960 COSTS_N_INSNS (2), /* cost of movzx */
961 8, /* "large" insn */
962 4, /* MOVE_RATIO */
963 3, /* cost for loading QImode using movzbl */
964 {4, 5, 4}, /* cost of loading integer registers
965 in QImode, HImode and SImode.
966 Relative to reg-reg move (2). */
967 {2, 3, 2}, /* cost of storing integer registers */
968 4, /* cost of reg,reg fld/fst */
969 {6, 6, 6}, /* cost of loading fp registers
970 in SFmode, DFmode and XFmode */
971 {4, 4, 4}, /* cost of storing fp registers
972 in SFmode, DFmode and XFmode */
973 2, /* cost of moving MMX register */
974 {2, 2}, /* cost of loading MMX registers
975 in SImode and DImode */
976 {2, 2}, /* cost of storing MMX registers
977 in SImode and DImode */
978 2, /* cost of moving SSE register */
979 {2, 2, 8}, /* cost of loading SSE registers
980 in SImode, DImode and TImode */
981 {2, 2, 8}, /* cost of storing SSE registers
982 in SImode, DImode and TImode */
983 6, /* MMX or SSE register to integer */
984 32, /* size of l1 cache. */
985 32, /* size of l2 cache. Some models
986 have integrated l2 cache, but
987 optimizing for k6 is not important
988 enough to worry about that. */
989 32, /* size of prefetch block */
990 1, /* number of parallel prefetches */
991 1, /* Branch cost */
992 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
993 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
994 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
995 COSTS_N_INSNS (2), /* cost of FABS instruction. */
996 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
997 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
998 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
999 DUMMY_STRINGOP_ALGS},
1000 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
1001 DUMMY_STRINGOP_ALGS},
1002 1, /* scalar_stmt_cost. */
1003 1, /* scalar load_cost. */
1004 1, /* scalar_store_cost. */
1005 1, /* vec_stmt_cost. */
1006 1, /* vec_to_scalar_cost. */
1007 1, /* scalar_to_vec_cost. */
1008 1, /* vec_align_load_cost. */
1009 2, /* vec_unalign_load_cost. */
1010 1, /* vec_store_cost. */
1011 3, /* cond_taken_branch_cost. */
1012 1, /* cond_not_taken_branch_cost. */
1013 };
1014
1015 static const
1016 struct processor_costs athlon_cost = {
1017 COSTS_N_INSNS (1), /* cost of an add instruction */
1018 COSTS_N_INSNS (2), /* cost of a lea instruction */
1019 COSTS_N_INSNS (1), /* variable shift costs */
1020 COSTS_N_INSNS (1), /* constant shift costs */
1021 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
1022 COSTS_N_INSNS (5), /* HI */
1023 COSTS_N_INSNS (5), /* SI */
1024 COSTS_N_INSNS (5), /* DI */
1025 COSTS_N_INSNS (5)}, /* other */
1026 0, /* cost of multiply per each bit set */
1027 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1028 COSTS_N_INSNS (26), /* HI */
1029 COSTS_N_INSNS (42), /* SI */
1030 COSTS_N_INSNS (74), /* DI */
1031 COSTS_N_INSNS (74)}, /* other */
1032 COSTS_N_INSNS (1), /* cost of movsx */
1033 COSTS_N_INSNS (1), /* cost of movzx */
1034 8, /* "large" insn */
1035 9, /* MOVE_RATIO */
1036 4, /* cost for loading QImode using movzbl */
1037 {3, 4, 3}, /* cost of loading integer registers
1038 in QImode, HImode and SImode.
1039 Relative to reg-reg move (2). */
1040 {3, 4, 3}, /* cost of storing integer registers */
1041 4, /* cost of reg,reg fld/fst */
1042 {4, 4, 12}, /* cost of loading fp registers
1043 in SFmode, DFmode and XFmode */
1044 {6, 6, 8}, /* cost of storing fp registers
1045 in SFmode, DFmode and XFmode */
1046 2, /* cost of moving MMX register */
1047 {4, 4}, /* cost of loading MMX registers
1048 in SImode and DImode */
1049 {4, 4}, /* cost of storing MMX registers
1050 in SImode and DImode */
1051 2, /* cost of moving SSE register */
1052 {4, 4, 6}, /* cost of loading SSE registers
1053 in SImode, DImode and TImode */
1054 {4, 4, 5}, /* cost of storing SSE registers
1055 in SImode, DImode and TImode */
1056 5, /* MMX or SSE register to integer */
1057 64, /* size of l1 cache. */
1058 256, /* size of l2 cache. */
1059 64, /* size of prefetch block */
1060 6, /* number of parallel prefetches */
1061 5, /* Branch cost */
1062 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1063 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1064 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
1065 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1066 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1067 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1068 /* For some reason, Athlon deals better with REP prefix (relative to loops)
1069 compared to K8. Alignment becomes important after 8 bytes for memcpy and
1070 128 bytes for memset. */
1071 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
1072 DUMMY_STRINGOP_ALGS},
1073 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
1074 DUMMY_STRINGOP_ALGS},
1075 1, /* scalar_stmt_cost. */
1076 1, /* scalar load_cost. */
1077 1, /* scalar_store_cost. */
1078 1, /* vec_stmt_cost. */
1079 1, /* vec_to_scalar_cost. */
1080 1, /* scalar_to_vec_cost. */
1081 1, /* vec_align_load_cost. */
1082 2, /* vec_unalign_load_cost. */
1083 1, /* vec_store_cost. */
1084 3, /* cond_taken_branch_cost. */
1085 1, /* cond_not_taken_branch_cost. */
1086 };
1087
1088 static const
1089 struct processor_costs k8_cost = {
1090 COSTS_N_INSNS (1), /* cost of an add instruction */
1091 COSTS_N_INSNS (2), /* cost of a lea instruction */
1092 COSTS_N_INSNS (1), /* variable shift costs */
1093 COSTS_N_INSNS (1), /* constant shift costs */
1094 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1095 COSTS_N_INSNS (4), /* HI */
1096 COSTS_N_INSNS (3), /* SI */
1097 COSTS_N_INSNS (4), /* DI */
1098 COSTS_N_INSNS (5)}, /* other */
1099 0, /* cost of multiply per each bit set */
1100 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1101 COSTS_N_INSNS (26), /* HI */
1102 COSTS_N_INSNS (42), /* SI */
1103 COSTS_N_INSNS (74), /* DI */
1104 COSTS_N_INSNS (74)}, /* other */
1105 COSTS_N_INSNS (1), /* cost of movsx */
1106 COSTS_N_INSNS (1), /* cost of movzx */
1107 8, /* "large" insn */
1108 9, /* MOVE_RATIO */
1109 4, /* cost for loading QImode using movzbl */
1110 {3, 4, 3}, /* cost of loading integer registers
1111 in QImode, HImode and SImode.
1112 Relative to reg-reg move (2). */
1113 {3, 4, 3}, /* cost of storing integer registers */
1114 4, /* cost of reg,reg fld/fst */
1115 {4, 4, 12}, /* cost of loading fp registers
1116 in SFmode, DFmode and XFmode */
1117 {6, 6, 8}, /* cost of storing fp registers
1118 in SFmode, DFmode and XFmode */
1119 2, /* cost of moving MMX register */
1120 {3, 3}, /* cost of loading MMX registers
1121 in SImode and DImode */
1122 {4, 4}, /* cost of storing MMX registers
1123 in SImode and DImode */
1124 2, /* cost of moving SSE register */
1125 {4, 3, 6}, /* cost of loading SSE registers
1126 in SImode, DImode and TImode */
1127 {4, 4, 5}, /* cost of storing SSE registers
1128 in SImode, DImode and TImode */
1129 5, /* MMX or SSE register to integer */
1130 64, /* size of l1 cache. */
1131 512, /* size of l2 cache. */
1132 64, /* size of prefetch block */
1133 /* New AMD processors never drop prefetches; if they cannot be performed
1134 immediately, they are queued. We set number of simultaneous prefetches
1135 to a large constant to reflect this (it probably is not a good idea not
1136 to limit number of prefetches at all, as their execution also takes some
1137 time). */
1138 100, /* number of parallel prefetches */
1139 3, /* Branch cost */
1140 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1141 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1142 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1143 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1144 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1145 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1146 /* K8 has optimized REP instruction for medium sized blocks, but for very
1147 small blocks it is better to use loop. For large blocks, libcall can
1148 do nontemporary accesses and beat inline considerably. */
1149 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1150 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1151 {{libcall, {{8, loop}, {24, unrolled_loop},
1152 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1153 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1154 4, /* scalar_stmt_cost. */
1155 2, /* scalar load_cost. */
1156 2, /* scalar_store_cost. */
1157 5, /* vec_stmt_cost. */
1158 0, /* vec_to_scalar_cost. */
1159 2, /* scalar_to_vec_cost. */
1160 2, /* vec_align_load_cost. */
1161 3, /* vec_unalign_load_cost. */
1162 3, /* vec_store_cost. */
1163 3, /* cond_taken_branch_cost. */
1164 2, /* cond_not_taken_branch_cost. */
1165 };
1166
1167 struct processor_costs amdfam10_cost = {
1168 COSTS_N_INSNS (1), /* cost of an add instruction */
1169 COSTS_N_INSNS (2), /* cost of a lea instruction */
1170 COSTS_N_INSNS (1), /* variable shift costs */
1171 COSTS_N_INSNS (1), /* constant shift costs */
1172 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1173 COSTS_N_INSNS (4), /* HI */
1174 COSTS_N_INSNS (3), /* SI */
1175 COSTS_N_INSNS (4), /* DI */
1176 COSTS_N_INSNS (5)}, /* other */
1177 0, /* cost of multiply per each bit set */
1178 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1179 COSTS_N_INSNS (35), /* HI */
1180 COSTS_N_INSNS (51), /* SI */
1181 COSTS_N_INSNS (83), /* DI */
1182 COSTS_N_INSNS (83)}, /* other */
1183 COSTS_N_INSNS (1), /* cost of movsx */
1184 COSTS_N_INSNS (1), /* cost of movzx */
1185 8, /* "large" insn */
1186 9, /* MOVE_RATIO */
1187 4, /* cost for loading QImode using movzbl */
1188 {3, 4, 3}, /* cost of loading integer registers
1189 in QImode, HImode and SImode.
1190 Relative to reg-reg move (2). */
1191 {3, 4, 3}, /* cost of storing integer registers */
1192 4, /* cost of reg,reg fld/fst */
1193 {4, 4, 12}, /* cost of loading fp registers
1194 in SFmode, DFmode and XFmode */
1195 {6, 6, 8}, /* cost of storing fp registers
1196 in SFmode, DFmode and XFmode */
1197 2, /* cost of moving MMX register */
1198 {3, 3}, /* cost of loading MMX registers
1199 in SImode and DImode */
1200 {4, 4}, /* cost of storing MMX registers
1201 in SImode and DImode */
1202 2, /* cost of moving SSE register */
1203 {4, 4, 3}, /* cost of loading SSE registers
1204 in SImode, DImode and TImode */
1205 {4, 4, 5}, /* cost of storing SSE registers
1206 in SImode, DImode and TImode */
1207 3, /* MMX or SSE register to integer */
1208 /* On K8:
1209 MOVD reg64, xmmreg Double FSTORE 4
1210 MOVD reg32, xmmreg Double FSTORE 4
1211 On AMDFAM10:
1212 MOVD reg64, xmmreg Double FADD 3
1213 1/1 1/1
1214 MOVD reg32, xmmreg Double FADD 3
1215 1/1 1/1 */
1216 64, /* size of l1 cache. */
1217 512, /* size of l2 cache. */
1218 64, /* size of prefetch block */
1219 /* New AMD processors never drop prefetches; if they cannot be performed
1220 immediately, they are queued. We set number of simultaneous prefetches
1221 to a large constant to reflect this (it probably is not a good idea not
1222 to limit number of prefetches at all, as their execution also takes some
1223 time). */
1224 100, /* number of parallel prefetches */
1225 2, /* Branch cost */
1226 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1227 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1228 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1229 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1230 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1231 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1232
1233 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
1234 very small blocks it is better to use loop. For large blocks, libcall can
1235 do nontemporary accesses and beat inline considerably. */
1236 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1237 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1238 {{libcall, {{8, loop}, {24, unrolled_loop},
1239 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1240 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1241 4, /* scalar_stmt_cost. */
1242 2, /* scalar load_cost. */
1243 2, /* scalar_store_cost. */
1244 6, /* vec_stmt_cost. */
1245 0, /* vec_to_scalar_cost. */
1246 2, /* scalar_to_vec_cost. */
1247 2, /* vec_align_load_cost. */
1248 2, /* vec_unalign_load_cost. */
1249 2, /* vec_store_cost. */
1250 2, /* cond_taken_branch_cost. */
1251 1, /* cond_not_taken_branch_cost. */
1252 };
1253
1254 struct processor_costs bdver1_cost = {
1255 COSTS_N_INSNS (1), /* cost of an add instruction */
1256 COSTS_N_INSNS (1), /* cost of a lea instruction */
1257 COSTS_N_INSNS (1), /* variable shift costs */
1258 COSTS_N_INSNS (1), /* constant shift costs */
1259 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1260 COSTS_N_INSNS (4), /* HI */
1261 COSTS_N_INSNS (4), /* SI */
1262 COSTS_N_INSNS (6), /* DI */
1263 COSTS_N_INSNS (6)}, /* other */
1264 0, /* cost of multiply per each bit set */
1265 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1266 COSTS_N_INSNS (35), /* HI */
1267 COSTS_N_INSNS (51), /* SI */
1268 COSTS_N_INSNS (83), /* DI */
1269 COSTS_N_INSNS (83)}, /* other */
1270 COSTS_N_INSNS (1), /* cost of movsx */
1271 COSTS_N_INSNS (1), /* cost of movzx */
1272 8, /* "large" insn */
1273 9, /* MOVE_RATIO */
1274 4, /* cost for loading QImode using movzbl */
1275 {5, 5, 4}, /* cost of loading integer registers
1276 in QImode, HImode and SImode.
1277 Relative to reg-reg move (2). */
1278 {4, 4, 4}, /* cost of storing integer registers */
1279 2, /* cost of reg,reg fld/fst */
1280 {5, 5, 12}, /* cost of loading fp registers
1281 in SFmode, DFmode and XFmode */
1282 {4, 4, 8}, /* cost of storing fp registers
1283 in SFmode, DFmode and XFmode */
1284 2, /* cost of moving MMX register */
1285 {4, 4}, /* cost of loading MMX registers
1286 in SImode and DImode */
1287 {4, 4}, /* cost of storing MMX registers
1288 in SImode and DImode */
1289 2, /* cost of moving SSE register */
1290 {4, 4, 4}, /* cost of loading SSE registers
1291 in SImode, DImode and TImode */
1292 {4, 4, 4}, /* cost of storing SSE registers
1293 in SImode, DImode and TImode */
1294 2, /* MMX or SSE register to integer */
1295 /* On K8:
1296 MOVD reg64, xmmreg Double FSTORE 4
1297 MOVD reg32, xmmreg Double FSTORE 4
1298 On AMDFAM10:
1299 MOVD reg64, xmmreg Double FADD 3
1300 1/1 1/1
1301 MOVD reg32, xmmreg Double FADD 3
1302 1/1 1/1 */
1303 16, /* size of l1 cache. */
1304 2048, /* size of l2 cache. */
1305 64, /* size of prefetch block */
1306 /* New AMD processors never drop prefetches; if they cannot be performed
1307 immediately, they are queued. We set number of simultaneous prefetches
1308 to a large constant to reflect this (it probably is not a good idea not
1309 to limit number of prefetches at all, as their execution also takes some
1310 time). */
1311 100, /* number of parallel prefetches */
1312 2, /* Branch cost */
1313 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1314 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1315 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1316 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1317 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1318 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1319
1320 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
1321 very small blocks it is better to use loop. For large blocks, libcall
1322 can do nontemporary accesses and beat inline considerably. */
1323 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1324 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1325 {{libcall, {{8, loop}, {24, unrolled_loop},
1326 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1327 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1328 6, /* scalar_stmt_cost. */
1329 4, /* scalar load_cost. */
1330 4, /* scalar_store_cost. */
1331 6, /* vec_stmt_cost. */
1332 0, /* vec_to_scalar_cost. */
1333 2, /* scalar_to_vec_cost. */
1334 4, /* vec_align_load_cost. */
1335 4, /* vec_unalign_load_cost. */
1336 4, /* vec_store_cost. */
1337 2, /* cond_taken_branch_cost. */
1338 1, /* cond_not_taken_branch_cost. */
1339 };
1340
1341 struct processor_costs bdver2_cost = {
1342 COSTS_N_INSNS (1), /* cost of an add instruction */
1343 COSTS_N_INSNS (1), /* cost of a lea instruction */
1344 COSTS_N_INSNS (1), /* variable shift costs */
1345 COSTS_N_INSNS (1), /* constant shift costs */
1346 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1347 COSTS_N_INSNS (4), /* HI */
1348 COSTS_N_INSNS (4), /* SI */
1349 COSTS_N_INSNS (6), /* DI */
1350 COSTS_N_INSNS (6)}, /* other */
1351 0, /* cost of multiply per each bit set */
1352 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1353 COSTS_N_INSNS (35), /* HI */
1354 COSTS_N_INSNS (51), /* SI */
1355 COSTS_N_INSNS (83), /* DI */
1356 COSTS_N_INSNS (83)}, /* other */
1357 COSTS_N_INSNS (1), /* cost of movsx */
1358 COSTS_N_INSNS (1), /* cost of movzx */
1359 8, /* "large" insn */
1360 9, /* MOVE_RATIO */
1361 4, /* cost for loading QImode using movzbl */
1362 {5, 5, 4}, /* cost of loading integer registers
1363 in QImode, HImode and SImode.
1364 Relative to reg-reg move (2). */
1365 {4, 4, 4}, /* cost of storing integer registers */
1366 2, /* cost of reg,reg fld/fst */
1367 {5, 5, 12}, /* cost of loading fp registers
1368 in SFmode, DFmode and XFmode */
1369 {4, 4, 8}, /* cost of storing fp registers
1370 in SFmode, DFmode and XFmode */
1371 2, /* cost of moving MMX register */
1372 {4, 4}, /* cost of loading MMX registers
1373 in SImode and DImode */
1374 {4, 4}, /* cost of storing MMX registers
1375 in SImode and DImode */
1376 2, /* cost of moving SSE register */
1377 {4, 4, 4}, /* cost of loading SSE registers
1378 in SImode, DImode and TImode */
1379 {4, 4, 4}, /* cost of storing SSE registers
1380 in SImode, DImode and TImode */
1381 2, /* MMX or SSE register to integer */
1382 /* On K8:
1383 MOVD reg64, xmmreg Double FSTORE 4
1384 MOVD reg32, xmmreg Double FSTORE 4
1385 On AMDFAM10:
1386 MOVD reg64, xmmreg Double FADD 3
1387 1/1 1/1
1388 MOVD reg32, xmmreg Double FADD 3
1389 1/1 1/1 */
1390 16, /* size of l1 cache. */
1391 2048, /* size of l2 cache. */
1392 64, /* size of prefetch block */
1393 /* New AMD processors never drop prefetches; if they cannot be performed
1394 immediately, they are queued. We set number of simultaneous prefetches
1395 to a large constant to reflect this (it probably is not a good idea not
1396 to limit number of prefetches at all, as their execution also takes some
1397 time). */
1398 100, /* number of parallel prefetches */
1399 2, /* Branch cost */
1400 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1401 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1402 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1403 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1404 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1405 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1406
1407 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1408 very small blocks it is better to use loop. For large blocks, libcall
1409 can do nontemporary accesses and beat inline considerably. */
1410 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1411 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1412 {{libcall, {{8, loop}, {24, unrolled_loop},
1413 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1414 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1415 6, /* scalar_stmt_cost. */
1416 4, /* scalar load_cost. */
1417 4, /* scalar_store_cost. */
1418 6, /* vec_stmt_cost. */
1419 0, /* vec_to_scalar_cost. */
1420 2, /* scalar_to_vec_cost. */
1421 4, /* vec_align_load_cost. */
1422 4, /* vec_unalign_load_cost. */
1423 4, /* vec_store_cost. */
1424 2, /* cond_taken_branch_cost. */
1425 1, /* cond_not_taken_branch_cost. */
1426 };
1427
1428 struct processor_costs btver1_cost = {
1429 COSTS_N_INSNS (1), /* cost of an add instruction */
1430 COSTS_N_INSNS (2), /* cost of a lea instruction */
1431 COSTS_N_INSNS (1), /* variable shift costs */
1432 COSTS_N_INSNS (1), /* constant shift costs */
1433 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1434 COSTS_N_INSNS (4), /* HI */
1435 COSTS_N_INSNS (3), /* SI */
1436 COSTS_N_INSNS (4), /* DI */
1437 COSTS_N_INSNS (5)}, /* other */
1438 0, /* cost of multiply per each bit set */
1439 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1440 COSTS_N_INSNS (35), /* HI */
1441 COSTS_N_INSNS (51), /* SI */
1442 COSTS_N_INSNS (83), /* DI */
1443 COSTS_N_INSNS (83)}, /* other */
1444 COSTS_N_INSNS (1), /* cost of movsx */
1445 COSTS_N_INSNS (1), /* cost of movzx */
1446 8, /* "large" insn */
1447 9, /* MOVE_RATIO */
1448 4, /* cost for loading QImode using movzbl */
1449 {3, 4, 3}, /* cost of loading integer registers
1450 in QImode, HImode and SImode.
1451 Relative to reg-reg move (2). */
1452 {3, 4, 3}, /* cost of storing integer registers */
1453 4, /* cost of reg,reg fld/fst */
1454 {4, 4, 12}, /* cost of loading fp registers
1455 in SFmode, DFmode and XFmode */
1456 {6, 6, 8}, /* cost of storing fp registers
1457 in SFmode, DFmode and XFmode */
1458 2, /* cost of moving MMX register */
1459 {3, 3}, /* cost of loading MMX registers
1460 in SImode and DImode */
1461 {4, 4}, /* cost of storing MMX registers
1462 in SImode and DImode */
1463 2, /* cost of moving SSE register */
1464 {4, 4, 3}, /* cost of loading SSE registers
1465 in SImode, DImode and TImode */
1466 {4, 4, 5}, /* cost of storing SSE registers
1467 in SImode, DImode and TImode */
1468 3, /* MMX or SSE register to integer */
1469 /* On K8:
1470 MOVD reg64, xmmreg Double FSTORE 4
1471 MOVD reg32, xmmreg Double FSTORE 4
1472 On AMDFAM10:
1473 MOVD reg64, xmmreg Double FADD 3
1474 1/1 1/1
1475 MOVD reg32, xmmreg Double FADD 3
1476 1/1 1/1 */
1477 32, /* size of l1 cache. */
1478 512, /* size of l2 cache. */
1479 64, /* size of prefetch block */
1480 100, /* number of parallel prefetches */
1481 2, /* Branch cost */
1482 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1483 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1484 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1485 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1486 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1487 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1488
1489 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1490 very small blocks it is better to use loop. For large blocks, libcall can
1491 do nontemporary accesses and beat inline considerably. */
1492 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1493 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1494 {{libcall, {{8, loop}, {24, unrolled_loop},
1495 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1496 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1497 4, /* scalar_stmt_cost. */
1498 2, /* scalar load_cost. */
1499 2, /* scalar_store_cost. */
1500 6, /* vec_stmt_cost. */
1501 0, /* vec_to_scalar_cost. */
1502 2, /* scalar_to_vec_cost. */
1503 2, /* vec_align_load_cost. */
1504 2, /* vec_unalign_load_cost. */
1505 2, /* vec_store_cost. */
1506 2, /* cond_taken_branch_cost. */
1507 1, /* cond_not_taken_branch_cost. */
1508 };
1509
1510 static const
1511 struct processor_costs pentium4_cost = {
1512 COSTS_N_INSNS (1), /* cost of an add instruction */
1513 COSTS_N_INSNS (3), /* cost of a lea instruction */
1514 COSTS_N_INSNS (4), /* variable shift costs */
1515 COSTS_N_INSNS (4), /* constant shift costs */
1516 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1517 COSTS_N_INSNS (15), /* HI */
1518 COSTS_N_INSNS (15), /* SI */
1519 COSTS_N_INSNS (15), /* DI */
1520 COSTS_N_INSNS (15)}, /* other */
1521 0, /* cost of multiply per each bit set */
1522 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1523 COSTS_N_INSNS (56), /* HI */
1524 COSTS_N_INSNS (56), /* SI */
1525 COSTS_N_INSNS (56), /* DI */
1526 COSTS_N_INSNS (56)}, /* other */
1527 COSTS_N_INSNS (1), /* cost of movsx */
1528 COSTS_N_INSNS (1), /* cost of movzx */
1529 16, /* "large" insn */
1530 6, /* MOVE_RATIO */
1531 2, /* cost for loading QImode using movzbl */
1532 {4, 5, 4}, /* cost of loading integer registers
1533 in QImode, HImode and SImode.
1534 Relative to reg-reg move (2). */
1535 {2, 3, 2}, /* cost of storing integer registers */
1536 2, /* cost of reg,reg fld/fst */
1537 {2, 2, 6}, /* cost of loading fp registers
1538 in SFmode, DFmode and XFmode */
1539 {4, 4, 6}, /* cost of storing fp registers
1540 in SFmode, DFmode and XFmode */
1541 2, /* cost of moving MMX register */
1542 {2, 2}, /* cost of loading MMX registers
1543 in SImode and DImode */
1544 {2, 2}, /* cost of storing MMX registers
1545 in SImode and DImode */
1546 12, /* cost of moving SSE register */
1547 {12, 12, 12}, /* cost of loading SSE registers
1548 in SImode, DImode and TImode */
1549 {2, 2, 8}, /* cost of storing SSE registers
1550 in SImode, DImode and TImode */
1551 10, /* MMX or SSE register to integer */
1552 8, /* size of l1 cache. */
1553 256, /* size of l2 cache. */
1554 64, /* size of prefetch block */
1555 6, /* number of parallel prefetches */
1556 2, /* Branch cost */
1557 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1558 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1559 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1560 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1561 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1562 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1563 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1564 DUMMY_STRINGOP_ALGS},
1565 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1566 {-1, libcall}}},
1567 DUMMY_STRINGOP_ALGS},
1568 1, /* scalar_stmt_cost. */
1569 1, /* scalar load_cost. */
1570 1, /* scalar_store_cost. */
1571 1, /* vec_stmt_cost. */
1572 1, /* vec_to_scalar_cost. */
1573 1, /* scalar_to_vec_cost. */
1574 1, /* vec_align_load_cost. */
1575 2, /* vec_unalign_load_cost. */
1576 1, /* vec_store_cost. */
1577 3, /* cond_taken_branch_cost. */
1578 1, /* cond_not_taken_branch_cost. */
1579 };
1580
1581 static const
1582 struct processor_costs nocona_cost = {
1583 COSTS_N_INSNS (1), /* cost of an add instruction */
1584 COSTS_N_INSNS (1), /* cost of a lea instruction */
1585 COSTS_N_INSNS (1), /* variable shift costs */
1586 COSTS_N_INSNS (1), /* constant shift costs */
1587 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1588 COSTS_N_INSNS (10), /* HI */
1589 COSTS_N_INSNS (10), /* SI */
1590 COSTS_N_INSNS (10), /* DI */
1591 COSTS_N_INSNS (10)}, /* other */
1592 0, /* cost of multiply per each bit set */
1593 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1594 COSTS_N_INSNS (66), /* HI */
1595 COSTS_N_INSNS (66), /* SI */
1596 COSTS_N_INSNS (66), /* DI */
1597 COSTS_N_INSNS (66)}, /* other */
1598 COSTS_N_INSNS (1), /* cost of movsx */
1599 COSTS_N_INSNS (1), /* cost of movzx */
1600 16, /* "large" insn */
1601 17, /* MOVE_RATIO */
1602 4, /* cost for loading QImode using movzbl */
1603 {4, 4, 4}, /* cost of loading integer registers
1604 in QImode, HImode and SImode.
1605 Relative to reg-reg move (2). */
1606 {4, 4, 4}, /* cost of storing integer registers */
1607 3, /* cost of reg,reg fld/fst */
1608 {12, 12, 12}, /* cost of loading fp registers
1609 in SFmode, DFmode and XFmode */
1610 {4, 4, 4}, /* cost of storing fp registers
1611 in SFmode, DFmode and XFmode */
1612 6, /* cost of moving MMX register */
1613 {12, 12}, /* cost of loading MMX registers
1614 in SImode and DImode */
1615 {12, 12}, /* cost of storing MMX registers
1616 in SImode and DImode */
1617 6, /* cost of moving SSE register */
1618 {12, 12, 12}, /* cost of loading SSE registers
1619 in SImode, DImode and TImode */
1620 {12, 12, 12}, /* cost of storing SSE registers
1621 in SImode, DImode and TImode */
1622 8, /* MMX or SSE register to integer */
1623 8, /* size of l1 cache. */
1624 1024, /* size of l2 cache. */
1625 128, /* size of prefetch block */
1626 8, /* number of parallel prefetches */
1627 1, /* Branch cost */
1628 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1629 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1630 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1631 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1632 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1633 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1634 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1635 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
1636 {100000, unrolled_loop}, {-1, libcall}}}},
1637 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1638 {-1, libcall}}},
1639 {libcall, {{24, loop}, {64, unrolled_loop},
1640 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1641 1, /* scalar_stmt_cost. */
1642 1, /* scalar load_cost. */
1643 1, /* scalar_store_cost. */
1644 1, /* vec_stmt_cost. */
1645 1, /* vec_to_scalar_cost. */
1646 1, /* scalar_to_vec_cost. */
1647 1, /* vec_align_load_cost. */
1648 2, /* vec_unalign_load_cost. */
1649 1, /* vec_store_cost. */
1650 3, /* cond_taken_branch_cost. */
1651 1, /* cond_not_taken_branch_cost. */
1652 };
1653
1654 static const
1655 struct processor_costs atom_cost = {
1656 COSTS_N_INSNS (1), /* cost of an add instruction */
1657 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1658 COSTS_N_INSNS (1), /* variable shift costs */
1659 COSTS_N_INSNS (1), /* constant shift costs */
1660 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1661 COSTS_N_INSNS (4), /* HI */
1662 COSTS_N_INSNS (3), /* SI */
1663 COSTS_N_INSNS (4), /* DI */
1664 COSTS_N_INSNS (2)}, /* other */
1665 0, /* cost of multiply per each bit set */
1666 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1667 COSTS_N_INSNS (26), /* HI */
1668 COSTS_N_INSNS (42), /* SI */
1669 COSTS_N_INSNS (74), /* DI */
1670 COSTS_N_INSNS (74)}, /* other */
1671 COSTS_N_INSNS (1), /* cost of movsx */
1672 COSTS_N_INSNS (1), /* cost of movzx */
1673 8, /* "large" insn */
1674 17, /* MOVE_RATIO */
1675 2, /* cost for loading QImode using movzbl */
1676 {4, 4, 4}, /* cost of loading integer registers
1677 in QImode, HImode and SImode.
1678 Relative to reg-reg move (2). */
1679 {4, 4, 4}, /* cost of storing integer registers */
1680 4, /* cost of reg,reg fld/fst */
1681 {12, 12, 12}, /* cost of loading fp registers
1682 in SFmode, DFmode and XFmode */
1683 {6, 6, 8}, /* cost of storing fp registers
1684 in SFmode, DFmode and XFmode */
1685 2, /* cost of moving MMX register */
1686 {8, 8}, /* cost of loading MMX registers
1687 in SImode and DImode */
1688 {8, 8}, /* cost of storing MMX registers
1689 in SImode and DImode */
1690 2, /* cost of moving SSE register */
1691 {8, 8, 8}, /* cost of loading SSE registers
1692 in SImode, DImode and TImode */
1693 {8, 8, 8}, /* cost of storing SSE registers
1694 in SImode, DImode and TImode */
1695 5, /* MMX or SSE register to integer */
1696 32, /* size of l1 cache. */
1697 256, /* size of l2 cache. */
1698 64, /* size of prefetch block */
1699 6, /* number of parallel prefetches */
1700 3, /* Branch cost */
1701 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1702 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1703 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1704 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1705 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1706 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1707 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1708 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1709 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1710 {{libcall, {{8, loop}, {15, unrolled_loop},
1711 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1712 {libcall, {{24, loop}, {32, unrolled_loop},
1713 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1714 1, /* scalar_stmt_cost. */
1715 1, /* scalar load_cost. */
1716 1, /* scalar_store_cost. */
1717 1, /* vec_stmt_cost. */
1718 1, /* vec_to_scalar_cost. */
1719 1, /* scalar_to_vec_cost. */
1720 1, /* vec_align_load_cost. */
1721 2, /* vec_unalign_load_cost. */
1722 1, /* vec_store_cost. */
1723 3, /* cond_taken_branch_cost. */
1724 1, /* cond_not_taken_branch_cost. */
1725 };
1726
1727 /* Generic64 should produce code tuned for Nocona and K8. */
1728 static const
1729 struct processor_costs generic64_cost = {
1730 COSTS_N_INSNS (1), /* cost of an add instruction */
1731 /* On all chips taken into consideration lea is 2 cycles and more. With
1732 this cost however our current implementation of synth_mult results in
1733 use of unnecessary temporary registers causing regression on several
1734 SPECfp benchmarks. */
1735 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1736 COSTS_N_INSNS (1), /* variable shift costs */
1737 COSTS_N_INSNS (1), /* constant shift costs */
1738 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1739 COSTS_N_INSNS (4), /* HI */
1740 COSTS_N_INSNS (3), /* SI */
1741 COSTS_N_INSNS (4), /* DI */
1742 COSTS_N_INSNS (2)}, /* other */
1743 0, /* cost of multiply per each bit set */
1744 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1745 COSTS_N_INSNS (26), /* HI */
1746 COSTS_N_INSNS (42), /* SI */
1747 COSTS_N_INSNS (74), /* DI */
1748 COSTS_N_INSNS (74)}, /* other */
1749 COSTS_N_INSNS (1), /* cost of movsx */
1750 COSTS_N_INSNS (1), /* cost of movzx */
1751 8, /* "large" insn */
1752 17, /* MOVE_RATIO */
1753 4, /* cost for loading QImode using movzbl */
1754 {4, 4, 4}, /* cost of loading integer registers
1755 in QImode, HImode and SImode.
1756 Relative to reg-reg move (2). */
1757 {4, 4, 4}, /* cost of storing integer registers */
1758 4, /* cost of reg,reg fld/fst */
1759 {12, 12, 12}, /* cost of loading fp registers
1760 in SFmode, DFmode and XFmode */
1761 {6, 6, 8}, /* cost of storing fp registers
1762 in SFmode, DFmode and XFmode */
1763 2, /* cost of moving MMX register */
1764 {8, 8}, /* cost of loading MMX registers
1765 in SImode and DImode */
1766 {8, 8}, /* cost of storing MMX registers
1767 in SImode and DImode */
1768 2, /* cost of moving SSE register */
1769 {8, 8, 8}, /* cost of loading SSE registers
1770 in SImode, DImode and TImode */
1771 {8, 8, 8}, /* cost of storing SSE registers
1772 in SImode, DImode and TImode */
1773 5, /* MMX or SSE register to integer */
1774 32, /* size of l1 cache. */
1775 512, /* size of l2 cache. */
1776 64, /* size of prefetch block */
1777 6, /* number of parallel prefetches */
1778 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1779 value is increased to perhaps more appropriate value of 5. */
1780 3, /* Branch cost */
1781 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1782 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1783 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1784 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1785 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1786 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1787 {DUMMY_STRINGOP_ALGS,
1788 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1789 {DUMMY_STRINGOP_ALGS,
1790 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1791 1, /* scalar_stmt_cost. */
1792 1, /* scalar load_cost. */
1793 1, /* scalar_store_cost. */
1794 1, /* vec_stmt_cost. */
1795 1, /* vec_to_scalar_cost. */
1796 1, /* scalar_to_vec_cost. */
1797 1, /* vec_align_load_cost. */
1798 2, /* vec_unalign_load_cost. */
1799 1, /* vec_store_cost. */
1800 3, /* cond_taken_branch_cost. */
1801 1, /* cond_not_taken_branch_cost. */
1802 };
1803
1804 /* Generic32 should produce code tuned for PPro, Pentium4, Nocona,
1805 Athlon and K8. */
1806 static const
1807 struct processor_costs generic32_cost = {
1808 COSTS_N_INSNS (1), /* cost of an add instruction */
1809 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1810 COSTS_N_INSNS (1), /* variable shift costs */
1811 COSTS_N_INSNS (1), /* constant shift costs */
1812 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1813 COSTS_N_INSNS (4), /* HI */
1814 COSTS_N_INSNS (3), /* SI */
1815 COSTS_N_INSNS (4), /* DI */
1816 COSTS_N_INSNS (2)}, /* other */
1817 0, /* cost of multiply per each bit set */
1818 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1819 COSTS_N_INSNS (26), /* HI */
1820 COSTS_N_INSNS (42), /* SI */
1821 COSTS_N_INSNS (74), /* DI */
1822 COSTS_N_INSNS (74)}, /* other */
1823 COSTS_N_INSNS (1), /* cost of movsx */
1824 COSTS_N_INSNS (1), /* cost of movzx */
1825 8, /* "large" insn */
1826 17, /* MOVE_RATIO */
1827 4, /* cost for loading QImode using movzbl */
1828 {4, 4, 4}, /* cost of loading integer registers
1829 in QImode, HImode and SImode.
1830 Relative to reg-reg move (2). */
1831 {4, 4, 4}, /* cost of storing integer registers */
1832 4, /* cost of reg,reg fld/fst */
1833 {12, 12, 12}, /* cost of loading fp registers
1834 in SFmode, DFmode and XFmode */
1835 {6, 6, 8}, /* cost of storing fp registers
1836 in SFmode, DFmode and XFmode */
1837 2, /* cost of moving MMX register */
1838 {8, 8}, /* cost of loading MMX registers
1839 in SImode and DImode */
1840 {8, 8}, /* cost of storing MMX registers
1841 in SImode and DImode */
1842 2, /* cost of moving SSE register */
1843 {8, 8, 8}, /* cost of loading SSE registers
1844 in SImode, DImode and TImode */
1845 {8, 8, 8}, /* cost of storing SSE registers
1846 in SImode, DImode and TImode */
1847 5, /* MMX or SSE register to integer */
1848 32, /* size of l1 cache. */
1849 256, /* size of l2 cache. */
1850 64, /* size of prefetch block */
1851 6, /* number of parallel prefetches */
1852 3, /* Branch cost */
1853 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1854 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1855 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1856 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1857 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1858 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1859 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1860 DUMMY_STRINGOP_ALGS},
1861 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1862 DUMMY_STRINGOP_ALGS},
1863 1, /* scalar_stmt_cost. */
1864 1, /* scalar load_cost. */
1865 1, /* scalar_store_cost. */
1866 1, /* vec_stmt_cost. */
1867 1, /* vec_to_scalar_cost. */
1868 1, /* scalar_to_vec_cost. */
1869 1, /* vec_align_load_cost. */
1870 2, /* vec_unalign_load_cost. */
1871 1, /* vec_store_cost. */
1872 3, /* cond_taken_branch_cost. */
1873 1, /* cond_not_taken_branch_cost. */
1874 };
1875
1876 const struct processor_costs *ix86_cost = &pentium_cost;
1877
1878 /* Processor feature/optimization bitmasks. */
1879 #define m_386 (1<<PROCESSOR_I386)
1880 #define m_486 (1<<PROCESSOR_I486)
1881 #define m_PENT (1<<PROCESSOR_PENTIUM)
1882 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1883 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1884 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1885 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
1886 #define m_CORE2_32 (1<<PROCESSOR_CORE2_32)
1887 #define m_CORE2_64 (1<<PROCESSOR_CORE2_64)
1888 #define m_COREI7_32 (1<<PROCESSOR_COREI7_32)
1889 #define m_COREI7_64 (1<<PROCESSOR_COREI7_64)
1890 #define m_COREI7 (m_COREI7_32 | m_COREI7_64)
1891 #define m_CORE2I7_32 (m_CORE2_32 | m_COREI7_32)
1892 #define m_CORE2I7_64 (m_CORE2_64 | m_COREI7_64)
1893 #define m_CORE2I7 (m_CORE2I7_32 | m_CORE2I7_64)
1894 #define m_ATOM (1<<PROCESSOR_ATOM)
1895
1896 #define m_GEODE (1<<PROCESSOR_GEODE)
1897 #define m_K6 (1<<PROCESSOR_K6)
1898 #define m_K6_GEODE (m_K6 | m_GEODE)
1899 #define m_K8 (1<<PROCESSOR_K8)
1900 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1901 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1902 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1903 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
1904 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
1905 #define m_BDVER (m_BDVER1 | m_BDVER2)
1906 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
1907 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1)
1908
1909 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1910 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1911
1912 /* Generic instruction choice should be common subset of supported CPUs
1913 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1914 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1915
1916 /* Feature tests against the various tunings. */
1917 unsigned char ix86_tune_features[X86_TUNE_LAST];
1918
1919 /* Feature tests against the various tunings used to create ix86_tune_features
1920 based on the processor mask. */
1921 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1922 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1923 negatively, so enabling for Generic64 seems like good code size
1924 tradeoff. We can't enable it for 32bit generic because it does not
1925 work well with PPro base chips. */
1926 m_386 | m_CORE2I7_64 | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC64,
1927
1928 /* X86_TUNE_PUSH_MEMORY */
1929 m_386 | m_P4_NOCONA | m_CORE2I7 | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
1930
1931 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1932 m_486 | m_PENT,
1933
1934 /* X86_TUNE_UNROLL_STRLEN */
1935 m_486 | m_PENT | m_PPRO | m_ATOM | m_CORE2I7 | m_K6 | m_AMD_MULTIPLE | m_GENERIC,
1936
1937 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1938 on simulation result. But after P4 was made, no performance benefit
1939 was observed with branch hints. It also increases the code size.
1940 As a result, icc never generates branch hints. */
1941 0,
1942
1943 /* X86_TUNE_DOUBLE_WITH_ADD */
1944 ~m_386,
1945
1946 /* X86_TUNE_USE_SAHF */
1947 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1 | m_GENERIC,
1948
1949 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1950 partial dependencies. */
1951 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_GEODE | m_AMD_MULTIPLE | m_GENERIC,
1952
1953 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1954 register stalls on Generic32 compilation setting as well. However
1955 in current implementation the partial register stalls are not eliminated
1956 very well - they can be introduced via subregs synthesized by combine
1957 and can happen in caller/callee saving sequences. Because this option
1958 pays back little on PPro based chips and is in conflict with partial reg
1959 dependencies used by Athlon/P4 based chips, it is better to leave it off
1960 for generic32 for now. */
1961 m_PPRO,
1962
1963 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1964 m_CORE2I7 | m_GENERIC,
1965
1966 /* X86_TUNE_USE_HIMODE_FIOP */
1967 m_386 | m_486 | m_K6_GEODE,
1968
1969 /* X86_TUNE_USE_SIMODE_FIOP */
1970 ~(m_PENT | m_PPRO | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC),
1971
1972 /* X86_TUNE_USE_MOV0 */
1973 m_K6,
1974
1975 /* X86_TUNE_USE_CLTD */
1976 ~(m_PENT | m_CORE2I7 | m_ATOM | m_K6 | m_GENERIC),
1977
1978 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1979 m_PENT4,
1980
1981 /* X86_TUNE_SPLIT_LONG_MOVES */
1982 m_PPRO,
1983
1984 /* X86_TUNE_READ_MODIFY_WRITE */
1985 ~m_PENT,
1986
1987 /* X86_TUNE_READ_MODIFY */
1988 ~(m_PENT | m_PPRO),
1989
1990 /* X86_TUNE_PROMOTE_QIMODE */
1991 m_386 | m_486 | m_PENT | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
1992
1993 /* X86_TUNE_FAST_PREFIX */
1994 ~(m_386 | m_486 | m_PENT),
1995
1996 /* X86_TUNE_SINGLE_STRINGOP */
1997 m_386 | m_P4_NOCONA,
1998
1999 /* X86_TUNE_QIMODE_MATH */
2000 ~0,
2001
2002 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
2003 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
2004 might be considered for Generic32 if our scheme for avoiding partial
2005 stalls was more effective. */
2006 ~m_PPRO,
2007
2008 /* X86_TUNE_PROMOTE_QI_REGS */
2009 0,
2010
2011 /* X86_TUNE_PROMOTE_HI_REGS */
2012 m_PPRO,
2013
2014 /* X86_TUNE_SINGLE_POP: Enable if single pop insn is preferred
2015 over esp addition. */
2016 m_386 | m_486 | m_PENT | m_PPRO,
2017
2018 /* X86_TUNE_DOUBLE_POP: Enable if double pop insn is preferred
2019 over esp addition. */
2020 m_PENT,
2021
2022 /* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred
2023 over esp subtraction. */
2024 m_386 | m_486 | m_PENT | m_K6_GEODE,
2025
2026 /* X86_TUNE_DOUBLE_PUSH. Enable if double push insn is preferred
2027 over esp subtraction. */
2028 m_PENT | m_K6_GEODE,
2029
2030 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
2031 for DFmode copies */
2032 ~(m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_GEODE | m_AMD_MULTIPLE | m_ATOM | m_GENERIC),
2033
2034 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
2035 m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
2036
2037 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
2038 conflict here in between PPro/Pentium4 based chips that thread 128bit
2039 SSE registers as single units versus K8 based chips that divide SSE
2040 registers to two 64bit halves. This knob promotes all store destinations
2041 to be 128bit to allow register renaming on 128bit SSE units, but usually
2042 results in one extra microop on 64bit SSE units. Experimental results
2043 shows that disabling this option on P4 brings over 20% SPECfp regression,
2044 while enabling it on K8 brings roughly 2.4% regression that can be partly
2045 masked by careful scheduling of moves. */
2046 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMDFAM10 | m_BDVER | m_GENERIC,
2047
2048 /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
2049 m_COREI7 | m_AMDFAM10 | m_BDVER | m_BTVER1,
2050
2051 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
2052 m_COREI7 | m_BDVER,
2053
2054 /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
2055 m_BDVER ,
2056
2057 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
2058 are resolved on SSE register parts instead of whole registers, so we may
2059 maintain just lower part of scalar values in proper format leaving the
2060 upper part undefined. */
2061 m_ATHLON_K8,
2062
2063 /* X86_TUNE_SSE_TYPELESS_STORES */
2064 m_AMD_MULTIPLE,
2065
2066 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
2067 m_PPRO | m_P4_NOCONA,
2068
2069 /* X86_TUNE_MEMORY_MISMATCH_STALL */
2070 m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
2071
2072 /* X86_TUNE_PROLOGUE_USING_MOVE */
2073 m_PPRO | m_CORE2I7 | m_ATOM | m_ATHLON_K8 | m_GENERIC,
2074
2075 /* X86_TUNE_EPILOGUE_USING_MOVE */
2076 m_PPRO | m_CORE2I7 | m_ATOM | m_ATHLON_K8 | m_GENERIC,
2077
2078 /* X86_TUNE_SHIFT1 */
2079 ~m_486,
2080
2081 /* X86_TUNE_USE_FFREEP */
2082 m_AMD_MULTIPLE,
2083
2084 /* X86_TUNE_INTER_UNIT_MOVES */
2085 ~(m_AMD_MULTIPLE | m_GENERIC),
2086
2087 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
2088 ~(m_AMDFAM10 | m_BDVER ),
2089
2090 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
2091 than 4 branch instructions in the 16 byte window. */
2092 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
2093
2094 /* X86_TUNE_SCHEDULE */
2095 m_PENT | m_PPRO | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
2096
2097 /* X86_TUNE_USE_BT */
2098 m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
2099
2100 /* X86_TUNE_USE_INCDEC */
2101 ~(m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_GENERIC),
2102
2103 /* X86_TUNE_PAD_RETURNS */
2104 m_CORE2I7 | m_AMD_MULTIPLE | m_GENERIC,
2105
2106 /* X86_TUNE_PAD_SHORT_FUNCTION: Pad short funtion. */
2107 m_ATOM,
2108
2109 /* X86_TUNE_EXT_80387_CONSTANTS */
2110 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_ATHLON_K8 | m_GENERIC,
2111
2112 /* X86_TUNE_SHORTEN_X87_SSE */
2113 ~m_K8,
2114
2115 /* X86_TUNE_AVOID_VECTOR_DECODE */
2116 m_CORE2I7_64 | m_K8 | m_GENERIC64,
2117
2118 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
2119 and SImode multiply, but 386 and 486 do HImode multiply faster. */
2120 ~(m_386 | m_486),
2121
2122 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
2123 vector path on AMD machines. */
2124 m_CORE2I7_64 | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1 | m_GENERIC64,
2125
2126 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
2127 machines. */
2128 m_CORE2I7_64 | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1 | m_GENERIC64,
2129
2130 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
2131 than a MOV. */
2132 m_PENT,
2133
2134 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
2135 but one byte longer. */
2136 m_PENT,
2137
2138 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
2139 operand that cannot be represented using a modRM byte. The XOR
2140 replacement is long decoded, so this split helps here as well. */
2141 m_K6,
2142
2143 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
2144 from FP to FP. */
2145 m_CORE2I7 | m_AMDFAM10 | m_GENERIC,
2146
2147 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
2148 from integer to FP. */
2149 m_AMDFAM10,
2150
2151 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
2152 with a subsequent conditional jump instruction into a single
2153 compare-and-branch uop. */
2154 m_BDVER,
2155
2156 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
2157 will impact LEA instruction selection. */
2158 m_ATOM,
2159
2160 /* X86_TUNE_VECTORIZE_DOUBLE: Enable double precision vector
2161 instructions. */
2162 ~m_ATOM,
2163
2164 /* X86_SOFTARE_PREFETCHING_BENEFICIAL: Enable software prefetching
2165 at -O3. For the moment, the prefetching seems badly tuned for Intel
2166 chips. */
2167 m_K6_GEODE | m_AMD_MULTIPLE,
2168
2169 /* X86_TUNE_AVX128_OPTIMAL: Enable 128-bit AVX instruction generation for
2170 the auto-vectorizer. */
2171 m_BDVER,
2172
2173 /* X86_TUNE_REASSOC_INT_TO_PARALLEL: Try to produce parallel computations
2174 during reassociation of integer computation. */
2175 m_ATOM,
2176
2177 /* X86_TUNE_REASSOC_FP_TO_PARALLEL: Try to produce parallel computations
2178 during reassociation of fp computation. */
2179 m_ATOM
2180 };
2181
2182 /* Feature tests against the various architecture variations. */
2183 unsigned char ix86_arch_features[X86_ARCH_LAST];
2184
2185 /* Feature tests against the various architecture variations, used to create
2186 ix86_arch_features based on the processor mask. */
2187 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2188 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
2189 ~(m_386 | m_486 | m_PENT | m_K6),
2190
2191 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2192 ~m_386,
2193
2194 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2195 ~(m_386 | m_486),
2196
2197 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2198 ~m_386,
2199
2200 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2201 ~m_386,
2202 };
2203
2204 static const unsigned int x86_accumulate_outgoing_args
2205 = m_PPRO | m_P4_NOCONA | m_ATOM | m_CORE2I7 | m_AMD_MULTIPLE | m_GENERIC;
2206
2207 static const unsigned int x86_arch_always_fancy_math_387
2208 = m_PENT | m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC;
2209
2210 static const unsigned int x86_avx256_split_unaligned_load
2211 = m_COREI7 | m_GENERIC;
2212
2213 static const unsigned int x86_avx256_split_unaligned_store
2214 = m_COREI7 | m_BDVER | m_GENERIC;
2215
2216 /* In case the average insn count for single function invocation is
2217 lower than this constant, emit fast (but longer) prologue and
2218 epilogue code. */
2219 #define FAST_PROLOGUE_INSN_COUNT 20
2220
2221 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2222 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2223 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2224 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2225
2226 /* Array of the smallest class containing reg number REGNO, indexed by
2227 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2228
2229 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2230 {
2231 /* ax, dx, cx, bx */
2232 AREG, DREG, CREG, BREG,
2233 /* si, di, bp, sp */
2234 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2235 /* FP registers */
2236 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2237 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2238 /* arg pointer */
2239 NON_Q_REGS,
2240 /* flags, fpsr, fpcr, frame */
2241 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2242 /* SSE registers */
2243 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2244 SSE_REGS, SSE_REGS,
2245 /* MMX registers */
2246 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2247 MMX_REGS, MMX_REGS,
2248 /* REX registers */
2249 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2250 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2251 /* SSE REX registers */
2252 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2253 SSE_REGS, SSE_REGS,
2254 };
2255
2256 /* The "default" register map used in 32bit mode. */
2257
2258 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2259 {
2260 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2261 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2262 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2263 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2264 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2265 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2266 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2267 };
2268
2269 /* The "default" register map used in 64bit mode. */
2270
2271 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2272 {
2273 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2274 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2275 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2276 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2277 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2278 8,9,10,11,12,13,14,15, /* extended integer registers */
2279 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2280 };
2281
2282 /* Define the register numbers to be used in Dwarf debugging information.
2283 The SVR4 reference port C compiler uses the following register numbers
2284 in its Dwarf output code:
2285 0 for %eax (gcc regno = 0)
2286 1 for %ecx (gcc regno = 2)
2287 2 for %edx (gcc regno = 1)
2288 3 for %ebx (gcc regno = 3)
2289 4 for %esp (gcc regno = 7)
2290 5 for %ebp (gcc regno = 6)
2291 6 for %esi (gcc regno = 4)
2292 7 for %edi (gcc regno = 5)
2293 The following three DWARF register numbers are never generated by
2294 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2295 believes these numbers have these meanings.
2296 8 for %eip (no gcc equivalent)
2297 9 for %eflags (gcc regno = 17)
2298 10 for %trapno (no gcc equivalent)
2299 It is not at all clear how we should number the FP stack registers
2300 for the x86 architecture. If the version of SDB on x86/svr4 were
2301 a bit less brain dead with respect to floating-point then we would
2302 have a precedent to follow with respect to DWARF register numbers
2303 for x86 FP registers, but the SDB on x86/svr4 is so completely
2304 broken with respect to FP registers that it is hardly worth thinking
2305 of it as something to strive for compatibility with.
2306 The version of x86/svr4 SDB I have at the moment does (partially)
2307 seem to believe that DWARF register number 11 is associated with
2308 the x86 register %st(0), but that's about all. Higher DWARF
2309 register numbers don't seem to be associated with anything in
2310 particular, and even for DWARF regno 11, SDB only seems to under-
2311 stand that it should say that a variable lives in %st(0) (when
2312 asked via an `=' command) if we said it was in DWARF regno 11,
2313 but SDB still prints garbage when asked for the value of the
2314 variable in question (via a `/' command).
2315 (Also note that the labels SDB prints for various FP stack regs
2316 when doing an `x' command are all wrong.)
2317 Note that these problems generally don't affect the native SVR4
2318 C compiler because it doesn't allow the use of -O with -g and
2319 because when it is *not* optimizing, it allocates a memory
2320 location for each floating-point variable, and the memory
2321 location is what gets described in the DWARF AT_location
2322 attribute for the variable in question.
2323 Regardless of the severe mental illness of the x86/svr4 SDB, we
2324 do something sensible here and we use the following DWARF
2325 register numbers. Note that these are all stack-top-relative
2326 numbers.
2327 11 for %st(0) (gcc regno = 8)
2328 12 for %st(1) (gcc regno = 9)
2329 13 for %st(2) (gcc regno = 10)
2330 14 for %st(3) (gcc regno = 11)
2331 15 for %st(4) (gcc regno = 12)
2332 16 for %st(5) (gcc regno = 13)
2333 17 for %st(6) (gcc regno = 14)
2334 18 for %st(7) (gcc regno = 15)
2335 */
2336 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2337 {
2338 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2339 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2340 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2341 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2342 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2343 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2344 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2345 };
2346
2347 /* Define parameter passing and return registers. */
2348
2349 static int const x86_64_int_parameter_registers[6] =
2350 {
2351 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2352 };
2353
2354 static int const x86_64_ms_abi_int_parameter_registers[4] =
2355 {
2356 CX_REG, DX_REG, R8_REG, R9_REG
2357 };
2358
2359 static int const x86_64_int_return_registers[4] =
2360 {
2361 AX_REG, DX_REG, DI_REG, SI_REG
2362 };
2363
2364 /* Define the structure for the machine field in struct function. */
2365
2366 struct GTY(()) stack_local_entry {
2367 unsigned short mode;
2368 unsigned short n;
2369 rtx rtl;
2370 struct stack_local_entry *next;
2371 };
2372
2373 /* Structure describing stack frame layout.
2374 Stack grows downward:
2375
2376 [arguments]
2377 <- ARG_POINTER
2378 saved pc
2379
2380 saved static chain if ix86_static_chain_on_stack
2381
2382 saved frame pointer if frame_pointer_needed
2383 <- HARD_FRAME_POINTER
2384 [saved regs]
2385 <- regs_save_offset
2386 [padding0]
2387
2388 [saved SSE regs]
2389 <- sse_regs_save_offset
2390 [padding1] |
2391 | <- FRAME_POINTER
2392 [va_arg registers] |
2393 |
2394 [frame] |
2395 |
2396 [padding2] | = to_allocate
2397 <- STACK_POINTER
2398 */
2399 struct ix86_frame
2400 {
2401 int nsseregs;
2402 int nregs;
2403 int va_arg_size;
2404 int red_zone_size;
2405 int outgoing_arguments_size;
2406 HOST_WIDE_INT frame;
2407
2408 /* The offsets relative to ARG_POINTER. */
2409 HOST_WIDE_INT frame_pointer_offset;
2410 HOST_WIDE_INT hard_frame_pointer_offset;
2411 HOST_WIDE_INT stack_pointer_offset;
2412 HOST_WIDE_INT hfp_save_offset;
2413 HOST_WIDE_INT reg_save_offset;
2414 HOST_WIDE_INT sse_reg_save_offset;
2415
2416 /* When save_regs_using_mov is set, emit prologue using
2417 move instead of push instructions. */
2418 bool save_regs_using_mov;
2419 };
2420
2421 /* Which cpu are we scheduling for. */
2422 enum attr_cpu ix86_schedule;
2423
2424 /* Which cpu are we optimizing for. */
2425 enum processor_type ix86_tune;
2426
2427 /* Which instruction set architecture to use. */
2428 enum processor_type ix86_arch;
2429
2430 /* true if sse prefetch instruction is not NOOP. */
2431 int x86_prefetch_sse;
2432
2433 /* -mstackrealign option */
2434 static const char ix86_force_align_arg_pointer_string[]
2435 = "force_align_arg_pointer";
2436
2437 static rtx (*ix86_gen_leave) (void);
2438 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2439 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2440 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2441 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2442 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2443 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2444 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2445 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2446 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2447
2448 /* Preferred alignment for stack boundary in bits. */
2449 unsigned int ix86_preferred_stack_boundary;
2450
2451 /* Alignment for incoming stack boundary in bits specified at
2452 command line. */
2453 static unsigned int ix86_user_incoming_stack_boundary;
2454
2455 /* Default alignment for incoming stack boundary in bits. */
2456 static unsigned int ix86_default_incoming_stack_boundary;
2457
2458 /* Alignment for incoming stack boundary in bits. */
2459 unsigned int ix86_incoming_stack_boundary;
2460
2461 /* Calling abi specific va_list type nodes. */
2462 static GTY(()) tree sysv_va_list_type_node;
2463 static GTY(()) tree ms_va_list_type_node;
2464
2465 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2466 char internal_label_prefix[16];
2467 int internal_label_prefix_len;
2468
2469 /* Fence to use after loop using movnt. */
2470 tree x86_mfence;
2471
2472 /* Register class used for passing given 64bit part of the argument.
2473 These represent classes as documented by the PS ABI, with the exception
2474 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2475 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2476
2477 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2478 whenever possible (upper half does contain padding). */
2479 enum x86_64_reg_class
2480 {
2481 X86_64_NO_CLASS,
2482 X86_64_INTEGER_CLASS,
2483 X86_64_INTEGERSI_CLASS,
2484 X86_64_SSE_CLASS,
2485 X86_64_SSESF_CLASS,
2486 X86_64_SSEDF_CLASS,
2487 X86_64_SSEUP_CLASS,
2488 X86_64_X87_CLASS,
2489 X86_64_X87UP_CLASS,
2490 X86_64_COMPLEX_X87_CLASS,
2491 X86_64_MEMORY_CLASS
2492 };
2493
2494 #define MAX_CLASSES 4
2495
2496 /* Table of constants used by fldpi, fldln2, etc.... */
2497 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2498 static bool ext_80387_constants_init = 0;
2499
2500 \f
2501 static struct machine_function * ix86_init_machine_status (void);
2502 static rtx ix86_function_value (const_tree, const_tree, bool);
2503 static bool ix86_function_value_regno_p (const unsigned int);
2504 static unsigned int ix86_function_arg_boundary (enum machine_mode,
2505 const_tree);
2506 static rtx ix86_static_chain (const_tree, bool);
2507 static int ix86_function_regparm (const_tree, const_tree);
2508 static void ix86_compute_frame_layout (struct ix86_frame *);
2509 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
2510 rtx, rtx, int);
2511 static void ix86_add_new_builtins (HOST_WIDE_INT);
2512 static rtx ix86_expand_vec_perm_builtin (tree);
2513 static tree ix86_canonical_va_list_type (tree);
2514 static void predict_jump (int);
2515 static unsigned int split_stack_prologue_scratch_regno (void);
2516 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2517
2518 enum ix86_function_specific_strings
2519 {
2520 IX86_FUNCTION_SPECIFIC_ARCH,
2521 IX86_FUNCTION_SPECIFIC_TUNE,
2522 IX86_FUNCTION_SPECIFIC_MAX
2523 };
2524
2525 static char *ix86_target_string (HOST_WIDE_INT, int, const char *,
2526 const char *, enum fpmath_unit, bool);
2527 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
2528 static void ix86_function_specific_save (struct cl_target_option *);
2529 static void ix86_function_specific_restore (struct cl_target_option *);
2530 static void ix86_function_specific_print (FILE *, int,
2531 struct cl_target_option *);
2532 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2533 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2534 struct gcc_options *);
2535 static bool ix86_can_inline_p (tree, tree);
2536 static void ix86_set_current_function (tree);
2537 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2538
2539 static enum calling_abi ix86_function_abi (const_tree);
2540
2541 \f
2542 #ifndef SUBTARGET32_DEFAULT_CPU
2543 #define SUBTARGET32_DEFAULT_CPU "i386"
2544 #endif
2545
2546 /* The svr4 ABI for the i386 says that records and unions are returned
2547 in memory. */
2548 #ifndef DEFAULT_PCC_STRUCT_RETURN
2549 #define DEFAULT_PCC_STRUCT_RETURN 1
2550 #endif
2551
2552 /* Whether -mtune= or -march= were specified */
2553 static int ix86_tune_defaulted;
2554 static int ix86_arch_specified;
2555
2556 /* Vectorization library interface and handlers. */
2557 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2558
2559 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2560 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2561
2562 /* Processor target table, indexed by processor number */
2563 struct ptt
2564 {
2565 const struct processor_costs *cost; /* Processor costs */
2566 const int align_loop; /* Default alignments. */
2567 const int align_loop_max_skip;
2568 const int align_jump;
2569 const int align_jump_max_skip;
2570 const int align_func;
2571 };
2572
2573 static const struct ptt processor_target_table[PROCESSOR_max] =
2574 {
2575 {&i386_cost, 4, 3, 4, 3, 4},
2576 {&i486_cost, 16, 15, 16, 15, 16},
2577 {&pentium_cost, 16, 7, 16, 7, 16},
2578 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2579 {&geode_cost, 0, 0, 0, 0, 0},
2580 {&k6_cost, 32, 7, 32, 7, 32},
2581 {&athlon_cost, 16, 7, 16, 7, 16},
2582 {&pentium4_cost, 0, 0, 0, 0, 0},
2583 {&k8_cost, 16, 7, 16, 7, 16},
2584 {&nocona_cost, 0, 0, 0, 0, 0},
2585 /* Core 2 32-bit. */
2586 {&generic32_cost, 16, 10, 16, 10, 16},
2587 /* Core 2 64-bit. */
2588 {&generic64_cost, 16, 10, 16, 10, 16},
2589 /* Core i7 32-bit. */
2590 {&generic32_cost, 16, 10, 16, 10, 16},
2591 /* Core i7 64-bit. */
2592 {&generic64_cost, 16, 10, 16, 10, 16},
2593 {&generic32_cost, 16, 7, 16, 7, 16},
2594 {&generic64_cost, 16, 10, 16, 10, 16},
2595 {&amdfam10_cost, 32, 24, 32, 7, 32},
2596 {&bdver1_cost, 32, 24, 32, 7, 32},
2597 {&bdver2_cost, 32, 24, 32, 7, 32},
2598 {&btver1_cost, 32, 24, 32, 7, 32},
2599 {&atom_cost, 16, 7, 16, 7, 16}
2600 };
2601
2602 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2603 {
2604 "generic",
2605 "i386",
2606 "i486",
2607 "pentium",
2608 "pentium-mmx",
2609 "pentiumpro",
2610 "pentium2",
2611 "pentium3",
2612 "pentium4",
2613 "pentium-m",
2614 "prescott",
2615 "nocona",
2616 "core2",
2617 "corei7",
2618 "atom",
2619 "geode",
2620 "k6",
2621 "k6-2",
2622 "k6-3",
2623 "athlon",
2624 "athlon-4",
2625 "k8",
2626 "amdfam10",
2627 "bdver1",
2628 "bdver2",
2629 "btver1"
2630 };
2631 \f
2632 /* Return true if a red-zone is in use. */
2633
2634 static inline bool
2635 ix86_using_red_zone (void)
2636 {
2637 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2638 }
2639 \f
2640 /* Return a string that documents the current -m options. The caller is
2641 responsible for freeing the string. */
2642
2643 static char *
2644 ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
2645 const char *tune, enum fpmath_unit fpmath,
2646 bool add_nl_p)
2647 {
2648 struct ix86_target_opts
2649 {
2650 const char *option; /* option string */
2651 HOST_WIDE_INT mask; /* isa mask options */
2652 };
2653
2654 /* This table is ordered so that options like -msse4.2 that imply
2655 preceding options while match those first. */
2656 static struct ix86_target_opts isa_opts[] =
2657 {
2658 { "-m64", OPTION_MASK_ISA_64BIT },
2659 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2660 { "-mfma", OPTION_MASK_ISA_FMA },
2661 { "-mxop", OPTION_MASK_ISA_XOP },
2662 { "-mlwp", OPTION_MASK_ISA_LWP },
2663 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2664 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2665 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2666 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2667 { "-msse3", OPTION_MASK_ISA_SSE3 },
2668 { "-msse2", OPTION_MASK_ISA_SSE2 },
2669 { "-msse", OPTION_MASK_ISA_SSE },
2670 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2671 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2672 { "-mmmx", OPTION_MASK_ISA_MMX },
2673 { "-mabm", OPTION_MASK_ISA_ABM },
2674 { "-mbmi", OPTION_MASK_ISA_BMI },
2675 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
2676 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
2677 { "-mtbm", OPTION_MASK_ISA_TBM },
2678 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2679 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2680 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2681 { "-maes", OPTION_MASK_ISA_AES },
2682 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2683 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2684 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2685 { "-mf16c", OPTION_MASK_ISA_F16C },
2686 };
2687
2688 /* Flag options. */
2689 static struct ix86_target_opts flag_opts[] =
2690 {
2691 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2692 { "-m80387", MASK_80387 },
2693 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2694 { "-malign-double", MASK_ALIGN_DOUBLE },
2695 { "-mcld", MASK_CLD },
2696 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2697 { "-mieee-fp", MASK_IEEE_FP },
2698 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2699 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2700 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2701 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2702 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2703 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2704 { "-mno-red-zone", MASK_NO_RED_ZONE },
2705 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2706 { "-mrecip", MASK_RECIP },
2707 { "-mrtd", MASK_RTD },
2708 { "-msseregparm", MASK_SSEREGPARM },
2709 { "-mstack-arg-probe", MASK_STACK_PROBE },
2710 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2711 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2712 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2713 { "-mvzeroupper", MASK_VZEROUPPER },
2714 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
2715 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
2716 { "-mprefer-avx128", MASK_PREFER_AVX128},
2717 };
2718
2719 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2720
2721 char isa_other[40];
2722 char target_other[40];
2723 unsigned num = 0;
2724 unsigned i, j;
2725 char *ret;
2726 char *ptr;
2727 size_t len;
2728 size_t line_len;
2729 size_t sep_len;
2730
2731 memset (opts, '\0', sizeof (opts));
2732
2733 /* Add -march= option. */
2734 if (arch)
2735 {
2736 opts[num][0] = "-march=";
2737 opts[num++][1] = arch;
2738 }
2739
2740 /* Add -mtune= option. */
2741 if (tune)
2742 {
2743 opts[num][0] = "-mtune=";
2744 opts[num++][1] = tune;
2745 }
2746
2747 /* Pick out the options in isa options. */
2748 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2749 {
2750 if ((isa & isa_opts[i].mask) != 0)
2751 {
2752 opts[num++][0] = isa_opts[i].option;
2753 isa &= ~ isa_opts[i].mask;
2754 }
2755 }
2756
2757 if (isa && add_nl_p)
2758 {
2759 opts[num++][0] = isa_other;
2760 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)",
2761 isa);
2762 }
2763
2764 /* Add flag options. */
2765 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2766 {
2767 if ((flags & flag_opts[i].mask) != 0)
2768 {
2769 opts[num++][0] = flag_opts[i].option;
2770 flags &= ~ flag_opts[i].mask;
2771 }
2772 }
2773
2774 if (flags && add_nl_p)
2775 {
2776 opts[num++][0] = target_other;
2777 sprintf (target_other, "(other flags: %#x)", flags);
2778 }
2779
2780 /* Add -fpmath= option. */
2781 if (fpmath)
2782 {
2783 opts[num][0] = "-mfpmath=";
2784 switch ((int) fpmath)
2785 {
2786 case FPMATH_387:
2787 opts[num++][1] = "387";
2788 break;
2789
2790 case FPMATH_SSE:
2791 opts[num++][1] = "sse";
2792 break;
2793
2794 case FPMATH_387 | FPMATH_SSE:
2795 opts[num++][1] = "sse+387";
2796 break;
2797
2798 default:
2799 gcc_unreachable ();
2800 }
2801 }
2802
2803 /* Any options? */
2804 if (num == 0)
2805 return NULL;
2806
2807 gcc_assert (num < ARRAY_SIZE (opts));
2808
2809 /* Size the string. */
2810 len = 0;
2811 sep_len = (add_nl_p) ? 3 : 1;
2812 for (i = 0; i < num; i++)
2813 {
2814 len += sep_len;
2815 for (j = 0; j < 2; j++)
2816 if (opts[i][j])
2817 len += strlen (opts[i][j]);
2818 }
2819
2820 /* Build the string. */
2821 ret = ptr = (char *) xmalloc (len);
2822 line_len = 0;
2823
2824 for (i = 0; i < num; i++)
2825 {
2826 size_t len2[2];
2827
2828 for (j = 0; j < 2; j++)
2829 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2830
2831 if (i != 0)
2832 {
2833 *ptr++ = ' ';
2834 line_len++;
2835
2836 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2837 {
2838 *ptr++ = '\\';
2839 *ptr++ = '\n';
2840 line_len = 0;
2841 }
2842 }
2843
2844 for (j = 0; j < 2; j++)
2845 if (opts[i][j])
2846 {
2847 memcpy (ptr, opts[i][j], len2[j]);
2848 ptr += len2[j];
2849 line_len += len2[j];
2850 }
2851 }
2852
2853 *ptr = '\0';
2854 gcc_assert (ret + len >= ptr);
2855
2856 return ret;
2857 }
2858
2859 /* Return true, if profiling code should be emitted before
2860 prologue. Otherwise it returns false.
2861 Note: For x86 with "hotfix" it is sorried. */
2862 static bool
2863 ix86_profile_before_prologue (void)
2864 {
2865 return flag_fentry != 0;
2866 }
2867
2868 /* Function that is callable from the debugger to print the current
2869 options. */
2870 void
2871 ix86_debug_options (void)
2872 {
2873 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2874 ix86_arch_string, ix86_tune_string,
2875 ix86_fpmath, true);
2876
2877 if (opts)
2878 {
2879 fprintf (stderr, "%s\n\n", opts);
2880 free (opts);
2881 }
2882 else
2883 fputs ("<no options>\n\n", stderr);
2884
2885 return;
2886 }
2887 \f
2888 /* Override various settings based on options. If MAIN_ARGS_P, the
2889 options are from the command line, otherwise they are from
2890 attributes. */
2891
2892 static void
2893 ix86_option_override_internal (bool main_args_p)
2894 {
2895 int i;
2896 unsigned int ix86_arch_mask, ix86_tune_mask;
2897 const bool ix86_tune_specified = (ix86_tune_string != NULL);
2898 const char *prefix;
2899 const char *suffix;
2900 const char *sw;
2901
2902 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
2903 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
2904 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
2905 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
2906 #define PTA_AES (HOST_WIDE_INT_1 << 4)
2907 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
2908 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
2909 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
2910 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
2911 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
2912 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
2913 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
2914 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
2915 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
2916 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
2917 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
2918 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
2919 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
2920 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
2921 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
2922 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
2923 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
2924 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
2925 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
2926 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
2927 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
2928 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
2929 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
2930 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
2931 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
2932 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
2933 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
2934 /* if this reaches 64, need to widen struct pta flags below */
2935
2936 static struct pta
2937 {
2938 const char *const name; /* processor name or nickname. */
2939 const enum processor_type processor;
2940 const enum attr_cpu schedule;
2941 const unsigned HOST_WIDE_INT flags;
2942 }
2943 const processor_alias_table[] =
2944 {
2945 {"i386", PROCESSOR_I386, CPU_NONE, 0},
2946 {"i486", PROCESSOR_I486, CPU_NONE, 0},
2947 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2948 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2949 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
2950 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
2951 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2952 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2953 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
2954 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2955 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2956 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
2957 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2958 PTA_MMX | PTA_SSE},
2959 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2960 PTA_MMX | PTA_SSE},
2961 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2962 PTA_MMX | PTA_SSE | PTA_SSE2},
2963 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
2964 PTA_MMX |PTA_SSE | PTA_SSE2},
2965 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
2966 PTA_MMX | PTA_SSE | PTA_SSE2},
2967 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
2968 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2969 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
2970 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2971 | PTA_CX16 | PTA_NO_SAHF},
2972 {"core2", PROCESSOR_CORE2_64, CPU_CORE2,
2973 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2974 | PTA_SSSE3 | PTA_CX16},
2975 {"corei7", PROCESSOR_COREI7_64, CPU_COREI7,
2976 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2977 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_CX16},
2978 {"corei7-avx", PROCESSOR_COREI7_64, CPU_COREI7,
2979 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2980 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX
2981 | PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL},
2982 {"core-avx-i", PROCESSOR_COREI7_64, CPU_COREI7,
2983 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2984 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX
2985 | PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL | PTA_FSGSBASE
2986 | PTA_RDRND | PTA_F16C},
2987 {"core-avx2", PROCESSOR_COREI7_64, CPU_COREI7,
2988 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2989 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX | PTA_AVX2
2990 | PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL | PTA_FSGSBASE
2991 | PTA_RDRND | PTA_F16C | PTA_BMI | PTA_BMI2 | PTA_LZCNT
2992 | PTA_FMA | PTA_MOVBE},
2993 {"atom", PROCESSOR_ATOM, CPU_ATOM,
2994 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2995 | PTA_SSSE3 | PTA_CX16 | PTA_MOVBE},
2996 {"geode", PROCESSOR_GEODE, CPU_GEODE,
2997 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
2998 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
2999 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
3000 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
3001 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3002 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
3003 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3004 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
3005 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3006 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
3007 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3008 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
3009 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3010 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
3011 {"x86-64", PROCESSOR_K8, CPU_K8,
3012 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
3013 {"k8", PROCESSOR_K8, CPU_K8,
3014 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3015 | PTA_SSE2 | PTA_NO_SAHF},
3016 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3017 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3018 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3019 {"opteron", PROCESSOR_K8, CPU_K8,
3020 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3021 | PTA_SSE2 | PTA_NO_SAHF},
3022 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3023 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3024 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3025 {"athlon64", PROCESSOR_K8, CPU_K8,
3026 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3027 | PTA_SSE2 | PTA_NO_SAHF},
3028 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3029 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3030 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3031 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3032 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3033 | PTA_SSE2 | PTA_NO_SAHF},
3034 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3035 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3036 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
3037 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3038 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3039 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
3040 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3041 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3042 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3043 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3044 | PTA_XOP | PTA_LWP},
3045 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3046 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3047 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3048 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3049 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3050 | PTA_FMA},
3051 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC64,
3052 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3053 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16},
3054 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
3055 0 /* flags are only used for -march switch. */ },
3056 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
3057 PTA_64BIT /* flags are only used for -march switch. */ },
3058 };
3059
3060 int const pta_size = ARRAY_SIZE (processor_alias_table);
3061
3062 /* Set up prefix/suffix so the error messages refer to either the command
3063 line argument, or the attribute(target). */
3064 if (main_args_p)
3065 {
3066 prefix = "-m";
3067 suffix = "";
3068 sw = "switch";
3069 }
3070 else
3071 {
3072 prefix = "option(\"";
3073 suffix = "\")";
3074 sw = "attribute";
3075 }
3076
3077 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3078 SUBTARGET_OVERRIDE_OPTIONS;
3079 #endif
3080
3081 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3082 SUBSUBTARGET_OVERRIDE_OPTIONS;
3083 #endif
3084
3085 if (TARGET_X32)
3086 ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3087
3088 /* -fPIC is the default for x86_64. */
3089 if (TARGET_MACHO && TARGET_64BIT)
3090 flag_pic = 2;
3091
3092 /* Need to check -mtune=generic first. */
3093 if (ix86_tune_string)
3094 {
3095 if (!strcmp (ix86_tune_string, "generic")
3096 || !strcmp (ix86_tune_string, "i686")
3097 /* As special support for cross compilers we read -mtune=native
3098 as -mtune=generic. With native compilers we won't see the
3099 -mtune=native, as it was changed by the driver. */
3100 || !strcmp (ix86_tune_string, "native"))
3101 {
3102 if (TARGET_64BIT)
3103 ix86_tune_string = "generic64";
3104 else
3105 ix86_tune_string = "generic32";
3106 }
3107 /* If this call is for setting the option attribute, allow the
3108 generic32/generic64 that was previously set. */
3109 else if (!main_args_p
3110 && (!strcmp (ix86_tune_string, "generic32")
3111 || !strcmp (ix86_tune_string, "generic64")))
3112 ;
3113 else if (!strncmp (ix86_tune_string, "generic", 7))
3114 error ("bad value (%s) for %stune=%s %s",
3115 ix86_tune_string, prefix, suffix, sw);
3116 else if (!strcmp (ix86_tune_string, "x86-64"))
3117 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3118 "%stune=k8%s or %stune=generic%s instead as appropriate",
3119 prefix, suffix, prefix, suffix, prefix, suffix);
3120 }
3121 else
3122 {
3123 if (ix86_arch_string)
3124 ix86_tune_string = ix86_arch_string;
3125 if (!ix86_tune_string)
3126 {
3127 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
3128 ix86_tune_defaulted = 1;
3129 }
3130
3131 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
3132 need to use a sensible tune option. */
3133 if (!strcmp (ix86_tune_string, "generic")
3134 || !strcmp (ix86_tune_string, "x86-64")
3135 || !strcmp (ix86_tune_string, "i686"))
3136 {
3137 if (TARGET_64BIT)
3138 ix86_tune_string = "generic64";
3139 else
3140 ix86_tune_string = "generic32";
3141 }
3142 }
3143
3144 if (ix86_stringop_alg == rep_prefix_8_byte && !TARGET_64BIT)
3145 {
3146 /* rep; movq isn't available in 32-bit code. */
3147 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3148 ix86_stringop_alg = no_stringop;
3149 }
3150
3151 if (!ix86_arch_string)
3152 ix86_arch_string = TARGET_64BIT ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3153 else
3154 ix86_arch_specified = 1;
3155
3156 if (!global_options_set.x_ix86_abi)
3157 ix86_abi = DEFAULT_ABI;
3158
3159 if (global_options_set.x_ix86_cmodel)
3160 {
3161 switch (ix86_cmodel)
3162 {
3163 case CM_SMALL:
3164 case CM_SMALL_PIC:
3165 if (flag_pic)
3166 ix86_cmodel = CM_SMALL_PIC;
3167 if (!TARGET_64BIT)
3168 error ("code model %qs not supported in the %s bit mode",
3169 "small", "32");
3170 break;
3171
3172 case CM_MEDIUM:
3173 case CM_MEDIUM_PIC:
3174 if (flag_pic)
3175 ix86_cmodel = CM_MEDIUM_PIC;
3176 if (!TARGET_64BIT)
3177 error ("code model %qs not supported in the %s bit mode",
3178 "medium", "32");
3179 else if (TARGET_X32)
3180 error ("code model %qs not supported in x32 mode",
3181 "medium");
3182 break;
3183
3184 case CM_LARGE:
3185 case CM_LARGE_PIC:
3186 if (flag_pic)
3187 ix86_cmodel = CM_LARGE_PIC;
3188 if (!TARGET_64BIT)
3189 error ("code model %qs not supported in the %s bit mode",
3190 "large", "32");
3191 else if (TARGET_X32)
3192 error ("code model %qs not supported in x32 mode",
3193 "medium");
3194 break;
3195
3196 case CM_32:
3197 if (flag_pic)
3198 error ("code model %s does not support PIC mode", "32");
3199 if (TARGET_64BIT)
3200 error ("code model %qs not supported in the %s bit mode",
3201 "32", "64");
3202 break;
3203
3204 case CM_KERNEL:
3205 if (flag_pic)
3206 {
3207 error ("code model %s does not support PIC mode", "kernel");
3208 ix86_cmodel = CM_32;
3209 }
3210 if (!TARGET_64BIT)
3211 error ("code model %qs not supported in the %s bit mode",
3212 "kernel", "32");
3213 break;
3214
3215 default:
3216 gcc_unreachable ();
3217 }
3218 }
3219 else
3220 {
3221 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3222 use of rip-relative addressing. This eliminates fixups that
3223 would otherwise be needed if this object is to be placed in a
3224 DLL, and is essentially just as efficient as direct addressing. */
3225 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
3226 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
3227 else if (TARGET_64BIT)
3228 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
3229 else
3230 ix86_cmodel = CM_32;
3231 }
3232 if (TARGET_MACHO && ix86_asm_dialect == ASM_INTEL)
3233 {
3234 error ("-masm=intel not supported in this configuration");
3235 ix86_asm_dialect = ASM_ATT;
3236 }
3237 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3238 sorry ("%i-bit mode not compiled in",
3239 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3240
3241 for (i = 0; i < pta_size; i++)
3242 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
3243 {
3244 ix86_schedule = processor_alias_table[i].schedule;
3245 ix86_arch = processor_alias_table[i].processor;
3246 /* Default cpu tuning to the architecture. */
3247 ix86_tune = ix86_arch;
3248
3249 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3250 error ("CPU you selected does not support x86-64 "
3251 "instruction set");
3252
3253 if (processor_alias_table[i].flags & PTA_MMX
3254 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3255 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3256 if (processor_alias_table[i].flags & PTA_3DNOW
3257 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3258 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3259 if (processor_alias_table[i].flags & PTA_3DNOW_A
3260 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3261 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3262 if (processor_alias_table[i].flags & PTA_SSE
3263 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3264 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3265 if (processor_alias_table[i].flags & PTA_SSE2
3266 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3267 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3268 if (processor_alias_table[i].flags & PTA_SSE3
3269 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3270 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3271 if (processor_alias_table[i].flags & PTA_SSSE3
3272 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3273 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3274 if (processor_alias_table[i].flags & PTA_SSE4_1
3275 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3276 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3277 if (processor_alias_table[i].flags & PTA_SSE4_2
3278 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3279 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3280 if (processor_alias_table[i].flags & PTA_AVX
3281 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3282 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3283 if (processor_alias_table[i].flags & PTA_AVX2
3284 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
3285 ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
3286 if (processor_alias_table[i].flags & PTA_FMA
3287 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3288 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3289 if (processor_alias_table[i].flags & PTA_SSE4A
3290 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3291 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3292 if (processor_alias_table[i].flags & PTA_FMA4
3293 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3294 ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3295 if (processor_alias_table[i].flags & PTA_XOP
3296 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3297 ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3298 if (processor_alias_table[i].flags & PTA_LWP
3299 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3300 ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3301 if (processor_alias_table[i].flags & PTA_ABM
3302 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3303 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3304 if (processor_alias_table[i].flags & PTA_BMI
3305 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3306 ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3307 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
3308 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
3309 ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
3310 if (processor_alias_table[i].flags & PTA_TBM
3311 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3312 ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3313 if (processor_alias_table[i].flags & PTA_BMI2
3314 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
3315 ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
3316 if (processor_alias_table[i].flags & PTA_CX16
3317 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3318 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3319 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3320 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3321 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3322 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
3323 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3324 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3325 if (processor_alias_table[i].flags & PTA_MOVBE
3326 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3327 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3328 if (processor_alias_table[i].flags & PTA_AES
3329 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3330 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3331 if (processor_alias_table[i].flags & PTA_PCLMUL
3332 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3333 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3334 if (processor_alias_table[i].flags & PTA_FSGSBASE
3335 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3336 ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3337 if (processor_alias_table[i].flags & PTA_RDRND
3338 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3339 ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3340 if (processor_alias_table[i].flags & PTA_F16C
3341 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3342 ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3343 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3344 x86_prefetch_sse = true;
3345
3346 break;
3347 }
3348
3349 if (!strcmp (ix86_arch_string, "generic"))
3350 error ("generic CPU can be used only for %stune=%s %s",
3351 prefix, suffix, sw);
3352 else if (!strncmp (ix86_arch_string, "generic", 7) || i == pta_size)
3353 error ("bad value (%s) for %sarch=%s %s",
3354 ix86_arch_string, prefix, suffix, sw);
3355
3356 ix86_arch_mask = 1u << ix86_arch;
3357 for (i = 0; i < X86_ARCH_LAST; ++i)
3358 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3359
3360 for (i = 0; i < pta_size; i++)
3361 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
3362 {
3363 ix86_schedule = processor_alias_table[i].schedule;
3364 ix86_tune = processor_alias_table[i].processor;
3365 if (TARGET_64BIT)
3366 {
3367 if (!(processor_alias_table[i].flags & PTA_64BIT))
3368 {
3369 if (ix86_tune_defaulted)
3370 {
3371 ix86_tune_string = "x86-64";
3372 for (i = 0; i < pta_size; i++)
3373 if (! strcmp (ix86_tune_string,
3374 processor_alias_table[i].name))
3375 break;
3376 ix86_schedule = processor_alias_table[i].schedule;
3377 ix86_tune = processor_alias_table[i].processor;
3378 }
3379 else
3380 error ("CPU you selected does not support x86-64 "
3381 "instruction set");
3382 }
3383 }
3384 else
3385 {
3386 /* Adjust tuning when compiling for 32-bit ABI. */
3387 switch (ix86_tune)
3388 {
3389 case PROCESSOR_GENERIC64:
3390 ix86_tune = PROCESSOR_GENERIC32;
3391 ix86_schedule = CPU_PENTIUMPRO;
3392 break;
3393
3394 case PROCESSOR_CORE2_64:
3395 ix86_tune = PROCESSOR_CORE2_32;
3396 break;
3397
3398 case PROCESSOR_COREI7_64:
3399 ix86_tune = PROCESSOR_COREI7_32;
3400 break;
3401
3402 default:
3403 break;
3404 }
3405 }
3406 /* Intel CPUs have always interpreted SSE prefetch instructions as
3407 NOPs; so, we can enable SSE prefetch instructions even when
3408 -mtune (rather than -march) points us to a processor that has them.
3409 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3410 higher processors. */
3411 if (TARGET_CMOVE
3412 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3413 x86_prefetch_sse = true;
3414 break;
3415 }
3416
3417 if (ix86_tune_specified && i == pta_size)
3418 error ("bad value (%s) for %stune=%s %s",
3419 ix86_tune_string, prefix, suffix, sw);
3420
3421 ix86_tune_mask = 1u << ix86_tune;
3422 for (i = 0; i < X86_TUNE_LAST; ++i)
3423 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3424
3425 #ifndef USE_IX86_FRAME_POINTER
3426 #define USE_IX86_FRAME_POINTER 0
3427 #endif
3428
3429 #ifndef USE_X86_64_FRAME_POINTER
3430 #define USE_X86_64_FRAME_POINTER 0
3431 #endif
3432
3433 /* Set the default values for switches whose default depends on TARGET_64BIT
3434 in case they weren't overwritten by command line options. */
3435 if (TARGET_64BIT)
3436 {
3437 if (optimize > 1 && !global_options_set.x_flag_zee)
3438 flag_zee = 1;
3439 if (optimize >= 1 && !global_options_set.x_flag_omit_frame_pointer)
3440 flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3441 if (flag_asynchronous_unwind_tables == 2)
3442 flag_unwind_tables = flag_asynchronous_unwind_tables = 1;
3443 if (flag_pcc_struct_return == 2)
3444 flag_pcc_struct_return = 0;
3445 }
3446 else
3447 {
3448 if (optimize >= 1 && !global_options_set.x_flag_omit_frame_pointer)
3449 flag_omit_frame_pointer = !(USE_IX86_FRAME_POINTER || optimize_size);
3450 if (flag_asynchronous_unwind_tables == 2)
3451 flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3452 if (flag_pcc_struct_return == 2)
3453 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3454 }
3455
3456 if (optimize_size)
3457 ix86_cost = &ix86_size_cost;
3458 else
3459 ix86_cost = processor_target_table[ix86_tune].cost;
3460
3461 /* Arrange to set up i386_stack_locals for all functions. */
3462 init_machine_status = ix86_init_machine_status;
3463
3464 /* Validate -mregparm= value. */
3465 if (global_options_set.x_ix86_regparm)
3466 {
3467 if (TARGET_64BIT)
3468 warning (0, "-mregparm is ignored in 64-bit mode");
3469 if (ix86_regparm > REGPARM_MAX)
3470 {
3471 error ("-mregparm=%d is not between 0 and %d",
3472 ix86_regparm, REGPARM_MAX);
3473 ix86_regparm = 0;
3474 }
3475 }
3476 if (TARGET_64BIT)
3477 ix86_regparm = REGPARM_MAX;
3478
3479 /* Default align_* from the processor table. */
3480 if (align_loops == 0)
3481 {
3482 align_loops = processor_target_table[ix86_tune].align_loop;
3483 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3484 }
3485 if (align_jumps == 0)
3486 {
3487 align_jumps = processor_target_table[ix86_tune].align_jump;
3488 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3489 }
3490 if (align_functions == 0)
3491 {
3492 align_functions = processor_target_table[ix86_tune].align_func;
3493 }
3494
3495 /* Provide default for -mbranch-cost= value. */
3496 if (!global_options_set.x_ix86_branch_cost)
3497 ix86_branch_cost = ix86_cost->branch_cost;
3498
3499 if (TARGET_64BIT)
3500 {
3501 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3502
3503 /* Enable by default the SSE and MMX builtins. Do allow the user to
3504 explicitly disable any of these. In particular, disabling SSE and
3505 MMX for kernel code is extremely useful. */
3506 if (!ix86_arch_specified)
3507 ix86_isa_flags
3508 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3509 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3510
3511 if (TARGET_RTD)
3512 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3513 }
3514 else
3515 {
3516 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3517
3518 if (!ix86_arch_specified)
3519 ix86_isa_flags
3520 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3521
3522 /* i386 ABI does not specify red zone. It still makes sense to use it
3523 when programmer takes care to stack from being destroyed. */
3524 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3525 target_flags |= MASK_NO_RED_ZONE;
3526 }
3527
3528 /* Keep nonleaf frame pointers. */
3529 if (flag_omit_frame_pointer)
3530 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3531 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3532 flag_omit_frame_pointer = 1;
3533
3534 /* If we're doing fast math, we don't care about comparison order
3535 wrt NaNs. This lets us use a shorter comparison sequence. */
3536 if (flag_finite_math_only)
3537 target_flags &= ~MASK_IEEE_FP;
3538
3539 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3540 since the insns won't need emulation. */
3541 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3542 target_flags &= ~MASK_NO_FANCY_MATH_387;
3543
3544 /* Likewise, if the target doesn't have a 387, or we've specified
3545 software floating point, don't use 387 inline intrinsics. */
3546 if (!TARGET_80387)
3547 target_flags |= MASK_NO_FANCY_MATH_387;
3548
3549 /* Turn on MMX builtins for -msse. */
3550 if (TARGET_SSE)
3551 {
3552 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3553 x86_prefetch_sse = true;
3554 }
3555
3556 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3557 if (TARGET_SSE4_2 || TARGET_ABM)
3558 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3559
3560 /* Turn on lzcnt instruction for -mabm. */
3561 if (TARGET_ABM)
3562 ix86_isa_flags |= OPTION_MASK_ISA_LZCNT & ~ix86_isa_flags_explicit;
3563
3564 /* Validate -mpreferred-stack-boundary= value or default it to
3565 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3566 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3567 if (global_options_set.x_ix86_preferred_stack_boundary_arg)
3568 {
3569 int min = (TARGET_64BIT ? 4 : 2);
3570 int max = (TARGET_SEH ? 4 : 12);
3571
3572 if (ix86_preferred_stack_boundary_arg < min
3573 || ix86_preferred_stack_boundary_arg > max)
3574 {
3575 if (min == max)
3576 error ("-mpreferred-stack-boundary is not supported "
3577 "for this target");
3578 else
3579 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3580 ix86_preferred_stack_boundary_arg, min, max);
3581 }
3582 else
3583 ix86_preferred_stack_boundary
3584 = (1 << ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
3585 }
3586
3587 /* Set the default value for -mstackrealign. */
3588 if (ix86_force_align_arg_pointer == -1)
3589 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3590
3591 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3592
3593 /* Validate -mincoming-stack-boundary= value or default it to
3594 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3595 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3596 if (global_options_set.x_ix86_incoming_stack_boundary_arg)
3597 {
3598 if (ix86_incoming_stack_boundary_arg < (TARGET_64BIT ? 4 : 2)
3599 || ix86_incoming_stack_boundary_arg > 12)
3600 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3601 ix86_incoming_stack_boundary_arg, TARGET_64BIT ? 4 : 2);
3602 else
3603 {
3604 ix86_user_incoming_stack_boundary
3605 = (1 << ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
3606 ix86_incoming_stack_boundary
3607 = ix86_user_incoming_stack_boundary;
3608 }
3609 }
3610
3611 /* Accept -msseregparm only if at least SSE support is enabled. */
3612 if (TARGET_SSEREGPARM
3613 && ! TARGET_SSE)
3614 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3615
3616 if (global_options_set.x_ix86_fpmath)
3617 {
3618 if (ix86_fpmath & FPMATH_SSE)
3619 {
3620 if (!TARGET_SSE)
3621 {
3622 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3623 ix86_fpmath = FPMATH_387;
3624 }
3625 else if ((ix86_fpmath & FPMATH_387) && !TARGET_80387)
3626 {
3627 warning (0, "387 instruction set disabled, using SSE arithmetics");
3628 ix86_fpmath = FPMATH_SSE;
3629 }
3630 }
3631 }
3632 else
3633 ix86_fpmath = TARGET_FPMATH_DEFAULT;
3634
3635 /* If the i387 is disabled, then do not return values in it. */
3636 if (!TARGET_80387)
3637 target_flags &= ~MASK_FLOAT_RETURNS;
3638
3639 /* Use external vectorized library in vectorizing intrinsics. */
3640 if (global_options_set.x_ix86_veclibabi_type)
3641 switch (ix86_veclibabi_type)
3642 {
3643 case ix86_veclibabi_type_svml:
3644 ix86_veclib_handler = ix86_veclibabi_svml;
3645 break;
3646
3647 case ix86_veclibabi_type_acml:
3648 ix86_veclib_handler = ix86_veclibabi_acml;
3649 break;
3650
3651 default:
3652 gcc_unreachable ();
3653 }
3654
3655 if ((!USE_IX86_FRAME_POINTER
3656 || (x86_accumulate_outgoing_args & ix86_tune_mask))
3657 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3658 && !optimize_size)
3659 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3660
3661 /* ??? Unwind info is not correct around the CFG unless either a frame
3662 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3663 unwind info generation to be aware of the CFG and propagating states
3664 around edges. */
3665 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
3666 || flag_exceptions || flag_non_call_exceptions)
3667 && flag_omit_frame_pointer
3668 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3669 {
3670 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3671 warning (0, "unwind tables currently require either a frame pointer "
3672 "or %saccumulate-outgoing-args%s for correctness",
3673 prefix, suffix);
3674 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3675 }
3676
3677 /* If stack probes are required, the space used for large function
3678 arguments on the stack must also be probed, so enable
3679 -maccumulate-outgoing-args so this happens in the prologue. */
3680 if (TARGET_STACK_PROBE
3681 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3682 {
3683 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3684 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3685 "for correctness", prefix, suffix);
3686 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3687 }
3688
3689 /* For sane SSE instruction set generation we need fcomi instruction.
3690 It is safe to enable all CMOVE instructions. Also, RDRAND intrinsic
3691 expands to a sequence that includes conditional move. */
3692 if (TARGET_SSE || TARGET_RDRND)
3693 TARGET_CMOVE = 1;
3694
3695 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3696 {
3697 char *p;
3698 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
3699 p = strchr (internal_label_prefix, 'X');
3700 internal_label_prefix_len = p - internal_label_prefix;
3701 *p = '\0';
3702 }
3703
3704 /* When scheduling description is not available, disable scheduler pass
3705 so it won't slow down the compilation and make x87 code slower. */
3706 if (!TARGET_SCHEDULE)
3707 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
3708
3709 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3710 ix86_cost->simultaneous_prefetches,
3711 global_options.x_param_values,
3712 global_options_set.x_param_values);
3713 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, ix86_cost->prefetch_block,
3714 global_options.x_param_values,
3715 global_options_set.x_param_values);
3716 maybe_set_param_value (PARAM_L1_CACHE_SIZE, ix86_cost->l1_cache_size,
3717 global_options.x_param_values,
3718 global_options_set.x_param_values);
3719 maybe_set_param_value (PARAM_L2_CACHE_SIZE, ix86_cost->l2_cache_size,
3720 global_options.x_param_values,
3721 global_options_set.x_param_values);
3722
3723 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
3724 if (flag_prefetch_loop_arrays < 0
3725 && HAVE_prefetch
3726 && optimize >= 3
3727 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
3728 flag_prefetch_loop_arrays = 1;
3729
3730 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3731 can be optimized to ap = __builtin_next_arg (0). */
3732 if (!TARGET_64BIT && !flag_split_stack)
3733 targetm.expand_builtin_va_start = NULL;
3734
3735 if (TARGET_64BIT)
3736 {
3737 ix86_gen_leave = gen_leave_rex64;
3738 ix86_gen_add3 = gen_adddi3;
3739 ix86_gen_sub3 = gen_subdi3;
3740 ix86_gen_sub3_carry = gen_subdi3_carry;
3741 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
3742 ix86_gen_monitor = gen_sse3_monitor64;
3743 ix86_gen_andsp = gen_anddi3;
3744 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
3745 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
3746 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
3747 }
3748 else
3749 {
3750 ix86_gen_leave = gen_leave;
3751 ix86_gen_add3 = gen_addsi3;
3752 ix86_gen_sub3 = gen_subsi3;
3753 ix86_gen_sub3_carry = gen_subsi3_carry;
3754 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
3755 ix86_gen_monitor = gen_sse3_monitor;
3756 ix86_gen_andsp = gen_andsi3;
3757 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
3758 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
3759 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
3760 }
3761
3762 #ifdef USE_IX86_CLD
3763 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3764 if (!TARGET_64BIT)
3765 target_flags |= MASK_CLD & ~target_flags_explicit;
3766 #endif
3767
3768 if (!TARGET_64BIT && flag_pic)
3769 {
3770 if (flag_fentry > 0)
3771 sorry ("-mfentry isn%'t supported for 32-bit in combination "
3772 "with -fpic");
3773 flag_fentry = 0;
3774 }
3775 else if (TARGET_SEH)
3776 {
3777 if (flag_fentry == 0)
3778 sorry ("-mno-fentry isn%'t compatible with SEH");
3779 flag_fentry = 1;
3780 }
3781 else if (flag_fentry < 0)
3782 {
3783 #if defined(PROFILE_BEFORE_PROLOGUE)
3784 flag_fentry = 1;
3785 #else
3786 flag_fentry = 0;
3787 #endif
3788 }
3789
3790 if (TARGET_AVX)
3791 {
3792 /* When not optimize for size, enable vzeroupper optimization for
3793 TARGET_AVX with -fexpensive-optimizations and split 32-byte
3794 AVX unaligned load/store. */
3795 if (!optimize_size)
3796 {
3797 if (flag_expensive_optimizations
3798 && !(target_flags_explicit & MASK_VZEROUPPER))
3799 target_flags |= MASK_VZEROUPPER;
3800 if ((x86_avx256_split_unaligned_load & ix86_tune_mask)
3801 && !(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
3802 target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
3803 if ((x86_avx256_split_unaligned_store & ix86_tune_mask)
3804 && !(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_STORE))
3805 target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
3806 /* Enable 128-bit AVX instruction generation for the auto-vectorizer. */
3807 if (TARGET_AVX128_OPTIMAL && !(target_flags_explicit & MASK_PREFER_AVX128))
3808 target_flags |= MASK_PREFER_AVX128;
3809 }
3810 }
3811 else
3812 {
3813 /* Disable vzeroupper pass if TARGET_AVX is disabled. */
3814 target_flags &= ~MASK_VZEROUPPER;
3815 }
3816
3817 /* Save the initial options in case the user does function specific
3818 options. */
3819 if (main_args_p)
3820 target_option_default_node = target_option_current_node
3821 = build_target_option_node ();
3822 }
3823
3824 /* Return TRUE if VAL is passed in register with 256bit AVX modes. */
3825
3826 static bool
3827 function_pass_avx256_p (const_rtx val)
3828 {
3829 if (!val)
3830 return false;
3831
3832 if (REG_P (val) && VALID_AVX256_REG_MODE (GET_MODE (val)))
3833 return true;
3834
3835 if (GET_CODE (val) == PARALLEL)
3836 {
3837 int i;
3838 rtx r;
3839
3840 for (i = XVECLEN (val, 0) - 1; i >= 0; i--)
3841 {
3842 r = XVECEXP (val, 0, i);
3843 if (GET_CODE (r) == EXPR_LIST
3844 && XEXP (r, 0)
3845 && REG_P (XEXP (r, 0))
3846 && (GET_MODE (XEXP (r, 0)) == OImode
3847 || VALID_AVX256_REG_MODE (GET_MODE (XEXP (r, 0)))))
3848 return true;
3849 }
3850 }
3851
3852 return false;
3853 }
3854
3855 /* Implement the TARGET_OPTION_OVERRIDE hook. */
3856
3857 static void
3858 ix86_option_override (void)
3859 {
3860 ix86_option_override_internal (true);
3861 }
3862
3863 /* Update register usage after having seen the compiler flags. */
3864
3865 static void
3866 ix86_conditional_register_usage (void)
3867 {
3868 int i;
3869 unsigned int j;
3870
3871 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3872 {
3873 if (fixed_regs[i] > 1)
3874 fixed_regs[i] = (fixed_regs[i] == (TARGET_64BIT ? 3 : 2));
3875 if (call_used_regs[i] > 1)
3876 call_used_regs[i] = (call_used_regs[i] == (TARGET_64BIT ? 3 : 2));
3877 }
3878
3879 /* The PIC register, if it exists, is fixed. */
3880 j = PIC_OFFSET_TABLE_REGNUM;
3881 if (j != INVALID_REGNUM)
3882 fixed_regs[j] = call_used_regs[j] = 1;
3883
3884 /* The 64-bit MS_ABI changes the set of call-used registers. */
3885 if (TARGET_64BIT_MS_ABI)
3886 {
3887 call_used_regs[SI_REG] = 0;
3888 call_used_regs[DI_REG] = 0;
3889 call_used_regs[XMM6_REG] = 0;
3890 call_used_regs[XMM7_REG] = 0;
3891 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3892 call_used_regs[i] = 0;
3893 }
3894
3895 /* The default setting of CLOBBERED_REGS is for 32-bit; add in the
3896 other call-clobbered regs for 64-bit. */
3897 if (TARGET_64BIT)
3898 {
3899 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
3900
3901 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3902 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
3903 && call_used_regs[i])
3904 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
3905 }
3906
3907 /* If MMX is disabled, squash the registers. */
3908 if (! TARGET_MMX)
3909 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3910 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
3911 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3912
3913 /* If SSE is disabled, squash the registers. */
3914 if (! TARGET_SSE)
3915 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3916 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
3917 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3918
3919 /* If the FPU is disabled, squash the registers. */
3920 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
3921 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3922 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
3923 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3924
3925 /* If 32-bit, squash the 64-bit registers. */
3926 if (! TARGET_64BIT)
3927 {
3928 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
3929 reg_names[i] = "";
3930 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3931 reg_names[i] = "";
3932 }
3933 }
3934
3935 \f
3936 /* Save the current options */
3937
3938 static void
3939 ix86_function_specific_save (struct cl_target_option *ptr)
3940 {
3941 ptr->arch = ix86_arch;
3942 ptr->schedule = ix86_schedule;
3943 ptr->tune = ix86_tune;
3944 ptr->branch_cost = ix86_branch_cost;
3945 ptr->tune_defaulted = ix86_tune_defaulted;
3946 ptr->arch_specified = ix86_arch_specified;
3947 ptr->x_ix86_isa_flags_explicit = ix86_isa_flags_explicit;
3948 ptr->ix86_target_flags_explicit = target_flags_explicit;
3949
3950 /* The fields are char but the variables are not; make sure the
3951 values fit in the fields. */
3952 gcc_assert (ptr->arch == ix86_arch);
3953 gcc_assert (ptr->schedule == ix86_schedule);
3954 gcc_assert (ptr->tune == ix86_tune);
3955 gcc_assert (ptr->branch_cost == ix86_branch_cost);
3956 }
3957
3958 /* Restore the current options */
3959
3960 static void
3961 ix86_function_specific_restore (struct cl_target_option *ptr)
3962 {
3963 enum processor_type old_tune = ix86_tune;
3964 enum processor_type old_arch = ix86_arch;
3965 unsigned int ix86_arch_mask, ix86_tune_mask;
3966 int i;
3967
3968 ix86_arch = (enum processor_type) ptr->arch;
3969 ix86_schedule = (enum attr_cpu) ptr->schedule;
3970 ix86_tune = (enum processor_type) ptr->tune;
3971 ix86_branch_cost = ptr->branch_cost;
3972 ix86_tune_defaulted = ptr->tune_defaulted;
3973 ix86_arch_specified = ptr->arch_specified;
3974 ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
3975 target_flags_explicit = ptr->ix86_target_flags_explicit;
3976
3977 /* Recreate the arch feature tests if the arch changed */
3978 if (old_arch != ix86_arch)
3979 {
3980 ix86_arch_mask = 1u << ix86_arch;
3981 for (i = 0; i < X86_ARCH_LAST; ++i)
3982 ix86_arch_features[i]
3983 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3984 }
3985
3986 /* Recreate the tune optimization tests */
3987 if (old_tune != ix86_tune)
3988 {
3989 ix86_tune_mask = 1u << ix86_tune;
3990 for (i = 0; i < X86_TUNE_LAST; ++i)
3991 ix86_tune_features[i]
3992 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3993 }
3994 }
3995
3996 /* Print the current options */
3997
3998 static void
3999 ix86_function_specific_print (FILE *file, int indent,
4000 struct cl_target_option *ptr)
4001 {
4002 char *target_string
4003 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
4004 NULL, NULL, ptr->x_ix86_fpmath, false);
4005
4006 fprintf (file, "%*sarch = %d (%s)\n",
4007 indent, "",
4008 ptr->arch,
4009 ((ptr->arch < TARGET_CPU_DEFAULT_max)
4010 ? cpu_names[ptr->arch]
4011 : "<unknown>"));
4012
4013 fprintf (file, "%*stune = %d (%s)\n",
4014 indent, "",
4015 ptr->tune,
4016 ((ptr->tune < TARGET_CPU_DEFAULT_max)
4017 ? cpu_names[ptr->tune]
4018 : "<unknown>"));
4019
4020 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4021
4022 if (target_string)
4023 {
4024 fprintf (file, "%*s%s\n", indent, "", target_string);
4025 free (target_string);
4026 }
4027 }
4028
4029 \f
4030 /* Inner function to process the attribute((target(...))), take an argument and
4031 set the current options from the argument. If we have a list, recursively go
4032 over the list. */
4033
4034 static bool
4035 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
4036 struct gcc_options *enum_opts_set)
4037 {
4038 char *next_optstr;
4039 bool ret = true;
4040
4041 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4042 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4043 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4044 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4045 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4046
4047 enum ix86_opt_type
4048 {
4049 ix86_opt_unknown,
4050 ix86_opt_yes,
4051 ix86_opt_no,
4052 ix86_opt_str,
4053 ix86_opt_enum,
4054 ix86_opt_isa
4055 };
4056
4057 static const struct
4058 {
4059 const char *string;
4060 size_t len;
4061 enum ix86_opt_type type;
4062 int opt;
4063 int mask;
4064 } attrs[] = {
4065 /* isa options */
4066 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4067 IX86_ATTR_ISA ("abm", OPT_mabm),
4068 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4069 IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
4070 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
4071 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4072 IX86_ATTR_ISA ("aes", OPT_maes),
4073 IX86_ATTR_ISA ("avx", OPT_mavx),
4074 IX86_ATTR_ISA ("avx2", OPT_mavx2),
4075 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4076 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4077 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4078 IX86_ATTR_ISA ("sse", OPT_msse),
4079 IX86_ATTR_ISA ("sse2", OPT_msse2),
4080 IX86_ATTR_ISA ("sse3", OPT_msse3),
4081 IX86_ATTR_ISA ("sse4", OPT_msse4),
4082 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4083 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4084 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4085 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4086 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4087 IX86_ATTR_ISA ("fma", OPT_mfma),
4088 IX86_ATTR_ISA ("xop", OPT_mxop),
4089 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4090 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4091 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4092 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4093
4094 /* enum options */
4095 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
4096
4097 /* string options */
4098 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4099 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4100
4101 /* flag options */
4102 IX86_ATTR_YES ("cld",
4103 OPT_mcld,
4104 MASK_CLD),
4105
4106 IX86_ATTR_NO ("fancy-math-387",
4107 OPT_mfancy_math_387,
4108 MASK_NO_FANCY_MATH_387),
4109
4110 IX86_ATTR_YES ("ieee-fp",
4111 OPT_mieee_fp,
4112 MASK_IEEE_FP),
4113
4114 IX86_ATTR_YES ("inline-all-stringops",
4115 OPT_minline_all_stringops,
4116 MASK_INLINE_ALL_STRINGOPS),
4117
4118 IX86_ATTR_YES ("inline-stringops-dynamically",
4119 OPT_minline_stringops_dynamically,
4120 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4121
4122 IX86_ATTR_NO ("align-stringops",
4123 OPT_mno_align_stringops,
4124 MASK_NO_ALIGN_STRINGOPS),
4125
4126 IX86_ATTR_YES ("recip",
4127 OPT_mrecip,
4128 MASK_RECIP),
4129
4130 };
4131
4132 /* If this is a list, recurse to get the options. */
4133 if (TREE_CODE (args) == TREE_LIST)
4134 {
4135 bool ret = true;
4136
4137 for (; args; args = TREE_CHAIN (args))
4138 if (TREE_VALUE (args)
4139 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
4140 p_strings, enum_opts_set))
4141 ret = false;
4142
4143 return ret;
4144 }
4145
4146 else if (TREE_CODE (args) != STRING_CST)
4147 gcc_unreachable ();
4148
4149 /* Handle multiple arguments separated by commas. */
4150 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4151
4152 while (next_optstr && *next_optstr != '\0')
4153 {
4154 char *p = next_optstr;
4155 char *orig_p = p;
4156 char *comma = strchr (next_optstr, ',');
4157 const char *opt_string;
4158 size_t len, opt_len;
4159 int opt;
4160 bool opt_set_p;
4161 char ch;
4162 unsigned i;
4163 enum ix86_opt_type type = ix86_opt_unknown;
4164 int mask = 0;
4165
4166 if (comma)
4167 {
4168 *comma = '\0';
4169 len = comma - next_optstr;
4170 next_optstr = comma + 1;
4171 }
4172 else
4173 {
4174 len = strlen (p);
4175 next_optstr = NULL;
4176 }
4177
4178 /* Recognize no-xxx. */
4179 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4180 {
4181 opt_set_p = false;
4182 p += 3;
4183 len -= 3;
4184 }
4185 else
4186 opt_set_p = true;
4187
4188 /* Find the option. */
4189 ch = *p;
4190 opt = N_OPTS;
4191 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4192 {
4193 type = attrs[i].type;
4194 opt_len = attrs[i].len;
4195 if (ch == attrs[i].string[0]
4196 && ((type != ix86_opt_str && type != ix86_opt_enum)
4197 ? len == opt_len
4198 : len > opt_len)
4199 && memcmp (p, attrs[i].string, opt_len) == 0)
4200 {
4201 opt = attrs[i].opt;
4202 mask = attrs[i].mask;
4203 opt_string = attrs[i].string;
4204 break;
4205 }
4206 }
4207
4208 /* Process the option. */
4209 if (opt == N_OPTS)
4210 {
4211 error ("attribute(target(\"%s\")) is unknown", orig_p);
4212 ret = false;
4213 }
4214
4215 else if (type == ix86_opt_isa)
4216 {
4217 struct cl_decoded_option decoded;
4218
4219 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
4220 ix86_handle_option (&global_options, &global_options_set,
4221 &decoded, input_location);
4222 }
4223
4224 else if (type == ix86_opt_yes || type == ix86_opt_no)
4225 {
4226 if (type == ix86_opt_no)
4227 opt_set_p = !opt_set_p;
4228
4229 if (opt_set_p)
4230 target_flags |= mask;
4231 else
4232 target_flags &= ~mask;
4233 }
4234
4235 else if (type == ix86_opt_str)
4236 {
4237 if (p_strings[opt])
4238 {
4239 error ("option(\"%s\") was already specified", opt_string);
4240 ret = false;
4241 }
4242 else
4243 p_strings[opt] = xstrdup (p + opt_len);
4244 }
4245
4246 else if (type == ix86_opt_enum)
4247 {
4248 bool arg_ok;
4249 int value;
4250
4251 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
4252 if (arg_ok)
4253 set_option (&global_options, enum_opts_set, opt, value,
4254 p + opt_len, DK_UNSPECIFIED, input_location,
4255 global_dc);
4256 else
4257 {
4258 error ("attribute(target(\"%s\")) is unknown", orig_p);
4259 ret = false;
4260 }
4261 }
4262
4263 else
4264 gcc_unreachable ();
4265 }
4266
4267 return ret;
4268 }
4269
4270 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4271
4272 tree
4273 ix86_valid_target_attribute_tree (tree args)
4274 {
4275 const char *orig_arch_string = ix86_arch_string;
4276 const char *orig_tune_string = ix86_tune_string;
4277 enum fpmath_unit orig_fpmath_set = global_options_set.x_ix86_fpmath;
4278 int orig_tune_defaulted = ix86_tune_defaulted;
4279 int orig_arch_specified = ix86_arch_specified;
4280 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
4281 tree t = NULL_TREE;
4282 int i;
4283 struct cl_target_option *def
4284 = TREE_TARGET_OPTION (target_option_default_node);
4285 struct gcc_options enum_opts_set;
4286
4287 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
4288
4289 /* Process each of the options on the chain. */
4290 if (! ix86_valid_target_attribute_inner_p (args, option_strings,
4291 &enum_opts_set))
4292 return NULL_TREE;
4293
4294 /* If the changed options are different from the default, rerun
4295 ix86_option_override_internal, and then save the options away.
4296 The string options are are attribute options, and will be undone
4297 when we copy the save structure. */
4298 if (ix86_isa_flags != def->x_ix86_isa_flags
4299 || target_flags != def->x_target_flags
4300 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4301 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4302 || enum_opts_set.x_ix86_fpmath)
4303 {
4304 /* If we are using the default tune= or arch=, undo the string assigned,
4305 and use the default. */
4306 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4307 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4308 else if (!orig_arch_specified)
4309 ix86_arch_string = NULL;
4310
4311 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4312 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4313 else if (orig_tune_defaulted)
4314 ix86_tune_string = NULL;
4315
4316 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4317 if (enum_opts_set.x_ix86_fpmath)
4318 global_options_set.x_ix86_fpmath = (enum fpmath_unit) 1;
4319 else if (!TARGET_64BIT && TARGET_SSE)
4320 {
4321 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
4322 global_options_set.x_ix86_fpmath = (enum fpmath_unit) 1;
4323 }
4324
4325 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4326 ix86_option_override_internal (false);
4327
4328 /* Add any builtin functions with the new isa if any. */
4329 ix86_add_new_builtins (ix86_isa_flags);
4330
4331 /* Save the current options unless we are validating options for
4332 #pragma. */
4333 t = build_target_option_node ();
4334
4335 ix86_arch_string = orig_arch_string;
4336 ix86_tune_string = orig_tune_string;
4337 global_options_set.x_ix86_fpmath = orig_fpmath_set;
4338
4339 /* Free up memory allocated to hold the strings */
4340 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4341 free (option_strings[i]);
4342 }
4343
4344 return t;
4345 }
4346
4347 /* Hook to validate attribute((target("string"))). */
4348
4349 static bool
4350 ix86_valid_target_attribute_p (tree fndecl,
4351 tree ARG_UNUSED (name),
4352 tree args,
4353 int ARG_UNUSED (flags))
4354 {
4355 struct cl_target_option cur_target;
4356 bool ret = true;
4357 tree old_optimize = build_optimization_node ();
4358 tree new_target, new_optimize;
4359 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4360
4361 /* If the function changed the optimization levels as well as setting target
4362 options, start with the optimizations specified. */
4363 if (func_optimize && func_optimize != old_optimize)
4364 cl_optimization_restore (&global_options,
4365 TREE_OPTIMIZATION (func_optimize));
4366
4367 /* The target attributes may also change some optimization flags, so update
4368 the optimization options if necessary. */
4369 cl_target_option_save (&cur_target, &global_options);
4370 new_target = ix86_valid_target_attribute_tree (args);
4371 new_optimize = build_optimization_node ();
4372
4373 if (!new_target)
4374 ret = false;
4375
4376 else if (fndecl)
4377 {
4378 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4379
4380 if (old_optimize != new_optimize)
4381 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4382 }
4383
4384 cl_target_option_restore (&global_options, &cur_target);
4385
4386 if (old_optimize != new_optimize)
4387 cl_optimization_restore (&global_options,
4388 TREE_OPTIMIZATION (old_optimize));
4389
4390 return ret;
4391 }
4392
4393 \f
4394 /* Hook to determine if one function can safely inline another. */
4395
4396 static bool
4397 ix86_can_inline_p (tree caller, tree callee)
4398 {
4399 bool ret = false;
4400 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4401 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4402
4403 /* If callee has no option attributes, then it is ok to inline. */
4404 if (!callee_tree)
4405 ret = true;
4406
4407 /* If caller has no option attributes, but callee does then it is not ok to
4408 inline. */
4409 else if (!caller_tree)
4410 ret = false;
4411
4412 else
4413 {
4414 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4415 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4416
4417 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4418 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4419 function. */
4420 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
4421 != callee_opts->x_ix86_isa_flags)
4422 ret = false;
4423
4424 /* See if we have the same non-isa options. */
4425 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
4426 ret = false;
4427
4428 /* See if arch, tune, etc. are the same. */
4429 else if (caller_opts->arch != callee_opts->arch)
4430 ret = false;
4431
4432 else if (caller_opts->tune != callee_opts->tune)
4433 ret = false;
4434
4435 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
4436 ret = false;
4437
4438 else if (caller_opts->branch_cost != callee_opts->branch_cost)
4439 ret = false;
4440
4441 else
4442 ret = true;
4443 }
4444
4445 return ret;
4446 }
4447
4448 \f
4449 /* Remember the last target of ix86_set_current_function. */
4450 static GTY(()) tree ix86_previous_fndecl;
4451
4452 /* Establish appropriate back-end context for processing the function
4453 FNDECL. The argument might be NULL to indicate processing at top
4454 level, outside of any function scope. */
4455 static void
4456 ix86_set_current_function (tree fndecl)
4457 {
4458 /* Only change the context if the function changes. This hook is called
4459 several times in the course of compiling a function, and we don't want to
4460 slow things down too much or call target_reinit when it isn't safe. */
4461 if (fndecl && fndecl != ix86_previous_fndecl)
4462 {
4463 tree old_tree = (ix86_previous_fndecl
4464 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
4465 : NULL_TREE);
4466
4467 tree new_tree = (fndecl
4468 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
4469 : NULL_TREE);
4470
4471 ix86_previous_fndecl = fndecl;
4472 if (old_tree == new_tree)
4473 ;
4474
4475 else if (new_tree)
4476 {
4477 cl_target_option_restore (&global_options,
4478 TREE_TARGET_OPTION (new_tree));
4479 target_reinit ();
4480 }
4481
4482 else if (old_tree)
4483 {
4484 struct cl_target_option *def
4485 = TREE_TARGET_OPTION (target_option_current_node);
4486
4487 cl_target_option_restore (&global_options, def);
4488 target_reinit ();
4489 }
4490 }
4491 }
4492
4493 \f
4494 /* Return true if this goes in large data/bss. */
4495
4496 static bool
4497 ix86_in_large_data_p (tree exp)
4498 {
4499 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
4500 return false;
4501
4502 /* Functions are never large data. */
4503 if (TREE_CODE (exp) == FUNCTION_DECL)
4504 return false;
4505
4506 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
4507 {
4508 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
4509 if (strcmp (section, ".ldata") == 0
4510 || strcmp (section, ".lbss") == 0)
4511 return true;
4512 return false;
4513 }
4514 else
4515 {
4516 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
4517
4518 /* If this is an incomplete type with size 0, then we can't put it
4519 in data because it might be too big when completed. */
4520 if (!size || size > ix86_section_threshold)
4521 return true;
4522 }
4523
4524 return false;
4525 }
4526
4527 /* Switch to the appropriate section for output of DECL.
4528 DECL is either a `VAR_DECL' node or a constant of some sort.
4529 RELOC indicates whether forming the initial value of DECL requires
4530 link-time relocations. */
4531
4532 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
4533 ATTRIBUTE_UNUSED;
4534
4535 static section *
4536 x86_64_elf_select_section (tree decl, int reloc,
4537 unsigned HOST_WIDE_INT align)
4538 {
4539 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4540 && ix86_in_large_data_p (decl))
4541 {
4542 const char *sname = NULL;
4543 unsigned int flags = SECTION_WRITE;
4544 switch (categorize_decl_for_section (decl, reloc))
4545 {
4546 case SECCAT_DATA:
4547 sname = ".ldata";
4548 break;
4549 case SECCAT_DATA_REL:
4550 sname = ".ldata.rel";
4551 break;
4552 case SECCAT_DATA_REL_LOCAL:
4553 sname = ".ldata.rel.local";
4554 break;
4555 case SECCAT_DATA_REL_RO:
4556 sname = ".ldata.rel.ro";
4557 break;
4558 case SECCAT_DATA_REL_RO_LOCAL:
4559 sname = ".ldata.rel.ro.local";
4560 break;
4561 case SECCAT_BSS:
4562 sname = ".lbss";
4563 flags |= SECTION_BSS;
4564 break;
4565 case SECCAT_RODATA:
4566 case SECCAT_RODATA_MERGE_STR:
4567 case SECCAT_RODATA_MERGE_STR_INIT:
4568 case SECCAT_RODATA_MERGE_CONST:
4569 sname = ".lrodata";
4570 flags = 0;
4571 break;
4572 case SECCAT_SRODATA:
4573 case SECCAT_SDATA:
4574 case SECCAT_SBSS:
4575 gcc_unreachable ();
4576 case SECCAT_TEXT:
4577 case SECCAT_TDATA:
4578 case SECCAT_TBSS:
4579 /* We don't split these for medium model. Place them into
4580 default sections and hope for best. */
4581 break;
4582 }
4583 if (sname)
4584 {
4585 /* We might get called with string constants, but get_named_section
4586 doesn't like them as they are not DECLs. Also, we need to set
4587 flags in that case. */
4588 if (!DECL_P (decl))
4589 return get_section (sname, flags, NULL);
4590 return get_named_section (decl, sname, reloc);
4591 }
4592 }
4593 return default_elf_select_section (decl, reloc, align);
4594 }
4595
4596 /* Build up a unique section name, expressed as a
4597 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4598 RELOC indicates whether the initial value of EXP requires
4599 link-time relocations. */
4600
4601 static void ATTRIBUTE_UNUSED
4602 x86_64_elf_unique_section (tree decl, int reloc)
4603 {
4604 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4605 && ix86_in_large_data_p (decl))
4606 {
4607 const char *prefix = NULL;
4608 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4609 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
4610
4611 switch (categorize_decl_for_section (decl, reloc))
4612 {
4613 case SECCAT_DATA:
4614 case SECCAT_DATA_REL:
4615 case SECCAT_DATA_REL_LOCAL:
4616 case SECCAT_DATA_REL_RO:
4617 case SECCAT_DATA_REL_RO_LOCAL:
4618 prefix = one_only ? ".ld" : ".ldata";
4619 break;
4620 case SECCAT_BSS:
4621 prefix = one_only ? ".lb" : ".lbss";
4622 break;
4623 case SECCAT_RODATA:
4624 case SECCAT_RODATA_MERGE_STR:
4625 case SECCAT_RODATA_MERGE_STR_INIT:
4626 case SECCAT_RODATA_MERGE_CONST:
4627 prefix = one_only ? ".lr" : ".lrodata";
4628 break;
4629 case SECCAT_SRODATA:
4630 case SECCAT_SDATA:
4631 case SECCAT_SBSS:
4632 gcc_unreachable ();
4633 case SECCAT_TEXT:
4634 case SECCAT_TDATA:
4635 case SECCAT_TBSS:
4636 /* We don't split these for medium model. Place them into
4637 default sections and hope for best. */
4638 break;
4639 }
4640 if (prefix)
4641 {
4642 const char *name, *linkonce;
4643 char *string;
4644
4645 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
4646 name = targetm.strip_name_encoding (name);
4647
4648 /* If we're using one_only, then there needs to be a .gnu.linkonce
4649 prefix to the section name. */
4650 linkonce = one_only ? ".gnu.linkonce" : "";
4651
4652 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
4653
4654 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
4655 return;
4656 }
4657 }
4658 default_unique_section (decl, reloc);
4659 }
4660
4661 #ifdef COMMON_ASM_OP
4662 /* This says how to output assembler code to declare an
4663 uninitialized external linkage data object.
4664
4665 For medium model x86-64 we need to use .largecomm opcode for
4666 large objects. */
4667 void
4668 x86_elf_aligned_common (FILE *file,
4669 const char *name, unsigned HOST_WIDE_INT size,
4670 int align)
4671 {
4672 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4673 && size > (unsigned int)ix86_section_threshold)
4674 fputs (".largecomm\t", file);
4675 else
4676 fputs (COMMON_ASM_OP, file);
4677 assemble_name (file, name);
4678 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
4679 size, align / BITS_PER_UNIT);
4680 }
4681 #endif
4682
4683 /* Utility function for targets to use in implementing
4684 ASM_OUTPUT_ALIGNED_BSS. */
4685
4686 void
4687 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
4688 const char *name, unsigned HOST_WIDE_INT size,
4689 int align)
4690 {
4691 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4692 && size > (unsigned int)ix86_section_threshold)
4693 switch_to_section (get_named_section (decl, ".lbss", 0));
4694 else
4695 switch_to_section (bss_section);
4696 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
4697 #ifdef ASM_DECLARE_OBJECT_NAME
4698 last_assemble_variable_decl = decl;
4699 ASM_DECLARE_OBJECT_NAME (file, name, decl);
4700 #else
4701 /* Standard thing is just output label for the object. */
4702 ASM_OUTPUT_LABEL (file, name);
4703 #endif /* ASM_DECLARE_OBJECT_NAME */
4704 ASM_OUTPUT_SKIP (file, size ? size : 1);
4705 }
4706 \f
4707 /* Decide whether we must probe the stack before any space allocation
4708 on this target. It's essentially TARGET_STACK_PROBE except when
4709 -fstack-check causes the stack to be already probed differently. */
4710
4711 bool
4712 ix86_target_stack_probe (void)
4713 {
4714 /* Do not probe the stack twice if static stack checking is enabled. */
4715 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
4716 return false;
4717
4718 return TARGET_STACK_PROBE;
4719 }
4720 \f
4721 /* Decide whether we can make a sibling call to a function. DECL is the
4722 declaration of the function being targeted by the call and EXP is the
4723 CALL_EXPR representing the call. */
4724
4725 static bool
4726 ix86_function_ok_for_sibcall (tree decl, tree exp)
4727 {
4728 tree type, decl_or_type;
4729 rtx a, b;
4730
4731 /* If we are generating position-independent code, we cannot sibcall
4732 optimize any indirect call, or a direct call to a global function,
4733 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
4734 if (!TARGET_MACHO
4735 && !TARGET_64BIT
4736 && flag_pic
4737 && (!decl || !targetm.binds_local_p (decl)))
4738 return false;
4739
4740 /* If we need to align the outgoing stack, then sibcalling would
4741 unalign the stack, which may break the called function. */
4742 if (ix86_minimum_incoming_stack_boundary (true)
4743 < PREFERRED_STACK_BOUNDARY)
4744 return false;
4745
4746 if (decl)
4747 {
4748 decl_or_type = decl;
4749 type = TREE_TYPE (decl);
4750 }
4751 else
4752 {
4753 /* We're looking at the CALL_EXPR, we need the type of the function. */
4754 type = CALL_EXPR_FN (exp); /* pointer expression */
4755 type = TREE_TYPE (type); /* pointer type */
4756 type = TREE_TYPE (type); /* function type */
4757 decl_or_type = type;
4758 }
4759
4760 /* Check that the return value locations are the same. Like
4761 if we are returning floats on the 80387 register stack, we cannot
4762 make a sibcall from a function that doesn't return a float to a
4763 function that does or, conversely, from a function that does return
4764 a float to a function that doesn't; the necessary stack adjustment
4765 would not be executed. This is also the place we notice
4766 differences in the return value ABI. Note that it is ok for one
4767 of the functions to have void return type as long as the return
4768 value of the other is passed in a register. */
4769 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
4770 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4771 cfun->decl, false);
4772 if (STACK_REG_P (a) || STACK_REG_P (b))
4773 {
4774 if (!rtx_equal_p (a, b))
4775 return false;
4776 }
4777 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4778 {
4779 /* Disable sibcall if we need to generate vzeroupper after
4780 callee returns. */
4781 if (TARGET_VZEROUPPER
4782 && cfun->machine->callee_return_avx256_p
4783 && !cfun->machine->caller_return_avx256_p)
4784 return false;
4785 }
4786 else if (!rtx_equal_p (a, b))
4787 return false;
4788
4789 if (TARGET_64BIT)
4790 {
4791 /* The SYSV ABI has more call-clobbered registers;
4792 disallow sibcalls from MS to SYSV. */
4793 if (cfun->machine->call_abi == MS_ABI
4794 && ix86_function_type_abi (type) == SYSV_ABI)
4795 return false;
4796 }
4797 else
4798 {
4799 /* If this call is indirect, we'll need to be able to use a
4800 call-clobbered register for the address of the target function.
4801 Make sure that all such registers are not used for passing
4802 parameters. Note that DLLIMPORT functions are indirect. */
4803 if (!decl
4804 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
4805 {
4806 if (ix86_function_regparm (type, NULL) >= 3)
4807 {
4808 /* ??? Need to count the actual number of registers to be used,
4809 not the possible number of registers. Fix later. */
4810 return false;
4811 }
4812 }
4813 }
4814
4815 /* Otherwise okay. That also includes certain types of indirect calls. */
4816 return true;
4817 }
4818
4819 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
4820 and "sseregparm" calling convention attributes;
4821 arguments as in struct attribute_spec.handler. */
4822
4823 static tree
4824 ix86_handle_cconv_attribute (tree *node, tree name,
4825 tree args,
4826 int flags ATTRIBUTE_UNUSED,
4827 bool *no_add_attrs)
4828 {
4829 if (TREE_CODE (*node) != FUNCTION_TYPE
4830 && TREE_CODE (*node) != METHOD_TYPE
4831 && TREE_CODE (*node) != FIELD_DECL
4832 && TREE_CODE (*node) != TYPE_DECL)
4833 {
4834 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4835 name);
4836 *no_add_attrs = true;
4837 return NULL_TREE;
4838 }
4839
4840 /* Can combine regparm with all attributes but fastcall, and thiscall. */
4841 if (is_attribute_p ("regparm", name))
4842 {
4843 tree cst;
4844
4845 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4846 {
4847 error ("fastcall and regparm attributes are not compatible");
4848 }
4849
4850 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4851 {
4852 error ("regparam and thiscall attributes are not compatible");
4853 }
4854
4855 cst = TREE_VALUE (args);
4856 if (TREE_CODE (cst) != INTEGER_CST)
4857 {
4858 warning (OPT_Wattributes,
4859 "%qE attribute requires an integer constant argument",
4860 name);
4861 *no_add_attrs = true;
4862 }
4863 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
4864 {
4865 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
4866 name, REGPARM_MAX);
4867 *no_add_attrs = true;
4868 }
4869
4870 return NULL_TREE;
4871 }
4872
4873 if (TARGET_64BIT)
4874 {
4875 /* Do not warn when emulating the MS ABI. */
4876 if ((TREE_CODE (*node) != FUNCTION_TYPE
4877 && TREE_CODE (*node) != METHOD_TYPE)
4878 || ix86_function_type_abi (*node) != MS_ABI)
4879 warning (OPT_Wattributes, "%qE attribute ignored",
4880 name);
4881 *no_add_attrs = true;
4882 return NULL_TREE;
4883 }
4884
4885 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
4886 if (is_attribute_p ("fastcall", name))
4887 {
4888 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4889 {
4890 error ("fastcall and cdecl attributes are not compatible");
4891 }
4892 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4893 {
4894 error ("fastcall and stdcall attributes are not compatible");
4895 }
4896 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
4897 {
4898 error ("fastcall and regparm attributes are not compatible");
4899 }
4900 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4901 {
4902 error ("fastcall and thiscall attributes are not compatible");
4903 }
4904 }
4905
4906 /* Can combine stdcall with fastcall (redundant), regparm and
4907 sseregparm. */
4908 else if (is_attribute_p ("stdcall", name))
4909 {
4910 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4911 {
4912 error ("stdcall and cdecl attributes are not compatible");
4913 }
4914 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4915 {
4916 error ("stdcall and fastcall attributes are not compatible");
4917 }
4918 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4919 {
4920 error ("stdcall and thiscall attributes are not compatible");
4921 }
4922 }
4923
4924 /* Can combine cdecl with regparm and sseregparm. */
4925 else if (is_attribute_p ("cdecl", name))
4926 {
4927 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4928 {
4929 error ("stdcall and cdecl attributes are not compatible");
4930 }
4931 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4932 {
4933 error ("fastcall and cdecl attributes are not compatible");
4934 }
4935 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4936 {
4937 error ("cdecl and thiscall attributes are not compatible");
4938 }
4939 }
4940 else if (is_attribute_p ("thiscall", name))
4941 {
4942 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
4943 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
4944 name);
4945 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4946 {
4947 error ("stdcall and thiscall attributes are not compatible");
4948 }
4949 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4950 {
4951 error ("fastcall and thiscall attributes are not compatible");
4952 }
4953 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4954 {
4955 error ("cdecl and thiscall attributes are not compatible");
4956 }
4957 }
4958
4959 /* Can combine sseregparm with all attributes. */
4960
4961 return NULL_TREE;
4962 }
4963
4964 /* This function determines from TYPE the calling-convention. */
4965
4966 unsigned int
4967 ix86_get_callcvt (const_tree type)
4968 {
4969 unsigned int ret = 0;
4970 bool is_stdarg;
4971 tree attrs;
4972
4973 if (TARGET_64BIT)
4974 return IX86_CALLCVT_CDECL;
4975
4976 attrs = TYPE_ATTRIBUTES (type);
4977 if (attrs != NULL_TREE)
4978 {
4979 if (lookup_attribute ("cdecl", attrs))
4980 ret |= IX86_CALLCVT_CDECL;
4981 else if (lookup_attribute ("stdcall", attrs))
4982 ret |= IX86_CALLCVT_STDCALL;
4983 else if (lookup_attribute ("fastcall", attrs))
4984 ret |= IX86_CALLCVT_FASTCALL;
4985 else if (lookup_attribute ("thiscall", attrs))
4986 ret |= IX86_CALLCVT_THISCALL;
4987
4988 /* Regparam isn't allowed for thiscall and fastcall. */
4989 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
4990 {
4991 if (lookup_attribute ("regparm", attrs))
4992 ret |= IX86_CALLCVT_REGPARM;
4993 if (lookup_attribute ("sseregparm", attrs))
4994 ret |= IX86_CALLCVT_SSEREGPARM;
4995 }
4996
4997 if (IX86_BASE_CALLCVT(ret) != 0)
4998 return ret;
4999 }
5000
5001 is_stdarg = stdarg_p (type);
5002 if (TARGET_RTD && !is_stdarg)
5003 return IX86_CALLCVT_STDCALL | ret;
5004
5005 if (ret != 0
5006 || is_stdarg
5007 || TREE_CODE (type) != METHOD_TYPE
5008 || ix86_function_type_abi (type) != MS_ABI)
5009 return IX86_CALLCVT_CDECL | ret;
5010
5011 return IX86_CALLCVT_THISCALL;
5012 }
5013
5014 /* Return 0 if the attributes for two types are incompatible, 1 if they
5015 are compatible, and 2 if they are nearly compatible (which causes a
5016 warning to be generated). */
5017
5018 static int
5019 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5020 {
5021 unsigned int ccvt1, ccvt2;
5022
5023 if (TREE_CODE (type1) != FUNCTION_TYPE
5024 && TREE_CODE (type1) != METHOD_TYPE)
5025 return 1;
5026
5027 ccvt1 = ix86_get_callcvt (type1);
5028 ccvt2 = ix86_get_callcvt (type2);
5029 if (ccvt1 != ccvt2)
5030 return 0;
5031 if (ix86_function_regparm (type1, NULL)
5032 != ix86_function_regparm (type2, NULL))
5033 return 0;
5034
5035 return 1;
5036 }
5037 \f
5038 /* Return the regparm value for a function with the indicated TYPE and DECL.
5039 DECL may be NULL when calling function indirectly
5040 or considering a libcall. */
5041
5042 static int
5043 ix86_function_regparm (const_tree type, const_tree decl)
5044 {
5045 tree attr;
5046 int regparm;
5047 unsigned int ccvt;
5048
5049 if (TARGET_64BIT)
5050 return (ix86_function_type_abi (type) == SYSV_ABI
5051 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5052 ccvt = ix86_get_callcvt (type);
5053 regparm = ix86_regparm;
5054
5055 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
5056 {
5057 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5058 if (attr)
5059 {
5060 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5061 return regparm;
5062 }
5063 }
5064 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5065 return 2;
5066 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5067 return 1;
5068
5069 /* Use register calling convention for local functions when possible. */
5070 if (decl
5071 && TREE_CODE (decl) == FUNCTION_DECL
5072 && optimize
5073 && !(profile_flag && !flag_fentry))
5074 {
5075 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5076 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE (decl));
5077 if (i && i->local && i->can_change_signature)
5078 {
5079 int local_regparm, globals = 0, regno;
5080
5081 /* Make sure no regparm register is taken by a
5082 fixed register variable. */
5083 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
5084 if (fixed_regs[local_regparm])
5085 break;
5086
5087 /* We don't want to use regparm(3) for nested functions as
5088 these use a static chain pointer in the third argument. */
5089 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
5090 local_regparm = 2;
5091
5092 /* In 32-bit mode save a register for the split stack. */
5093 if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack)
5094 local_regparm = 2;
5095
5096 /* Each fixed register usage increases register pressure,
5097 so less registers should be used for argument passing.
5098 This functionality can be overriden by an explicit
5099 regparm value. */
5100 for (regno = 0; regno <= DI_REG; regno++)
5101 if (fixed_regs[regno])
5102 globals++;
5103
5104 local_regparm
5105 = globals < local_regparm ? local_regparm - globals : 0;
5106
5107 if (local_regparm > regparm)
5108 regparm = local_regparm;
5109 }
5110 }
5111
5112 return regparm;
5113 }
5114
5115 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5116 DFmode (2) arguments in SSE registers for a function with the
5117 indicated TYPE and DECL. DECL may be NULL when calling function
5118 indirectly or considering a libcall. Otherwise return 0. */
5119
5120 static int
5121 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5122 {
5123 gcc_assert (!TARGET_64BIT);
5124
5125 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5126 by the sseregparm attribute. */
5127 if (TARGET_SSEREGPARM
5128 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5129 {
5130 if (!TARGET_SSE)
5131 {
5132 if (warn)
5133 {
5134 if (decl)
5135 error ("calling %qD with attribute sseregparm without "
5136 "SSE/SSE2 enabled", decl);
5137 else
5138 error ("calling %qT with attribute sseregparm without "
5139 "SSE/SSE2 enabled", type);
5140 }
5141 return 0;
5142 }
5143
5144 return 2;
5145 }
5146
5147 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5148 (and DFmode for SSE2) arguments in SSE registers. */
5149 if (decl && TARGET_SSE_MATH && optimize
5150 && !(profile_flag && !flag_fentry))
5151 {
5152 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5153 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
5154 if (i && i->local && i->can_change_signature)
5155 return TARGET_SSE2 ? 2 : 1;
5156 }
5157
5158 return 0;
5159 }
5160
5161 /* Return true if EAX is live at the start of the function. Used by
5162 ix86_expand_prologue to determine if we need special help before
5163 calling allocate_stack_worker. */
5164
5165 static bool
5166 ix86_eax_live_at_start_p (void)
5167 {
5168 /* Cheat. Don't bother working forward from ix86_function_regparm
5169 to the function type to whether an actual argument is located in
5170 eax. Instead just look at cfg info, which is still close enough
5171 to correct at this point. This gives false positives for broken
5172 functions that might use uninitialized data that happens to be
5173 allocated in eax, but who cares? */
5174 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
5175 }
5176
5177 static bool
5178 ix86_keep_aggregate_return_pointer (tree fntype)
5179 {
5180 tree attr;
5181
5182 if (!TARGET_64BIT)
5183 {
5184 attr = lookup_attribute ("callee_pop_aggregate_return",
5185 TYPE_ATTRIBUTES (fntype));
5186 if (attr)
5187 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
5188
5189 /* For 32-bit MS-ABI the default is to keep aggregate
5190 return pointer. */
5191 if (ix86_function_type_abi (fntype) == MS_ABI)
5192 return true;
5193 }
5194 return KEEP_AGGREGATE_RETURN_POINTER != 0;
5195 }
5196
5197 /* Value is the number of bytes of arguments automatically
5198 popped when returning from a subroutine call.
5199 FUNDECL is the declaration node of the function (as a tree),
5200 FUNTYPE is the data type of the function (as a tree),
5201 or for a library call it is an identifier node for the subroutine name.
5202 SIZE is the number of bytes of arguments passed on the stack.
5203
5204 On the 80386, the RTD insn may be used to pop them if the number
5205 of args is fixed, but if the number is variable then the caller
5206 must pop them all. RTD can't be used for library calls now
5207 because the library is compiled with the Unix compiler.
5208 Use of RTD is a selectable option, since it is incompatible with
5209 standard Unix calling sequences. If the option is not selected,
5210 the caller must always pop the args.
5211
5212 The attribute stdcall is equivalent to RTD on a per module basis. */
5213
5214 static int
5215 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5216 {
5217 unsigned int ccvt;
5218
5219 /* None of the 64-bit ABIs pop arguments. */
5220 if (TARGET_64BIT)
5221 return 0;
5222
5223 ccvt = ix86_get_callcvt (funtype);
5224
5225 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
5226 | IX86_CALLCVT_THISCALL)) != 0
5227 && ! stdarg_p (funtype))
5228 return size;
5229
5230 /* Lose any fake structure return argument if it is passed on the stack. */
5231 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5232 && !ix86_keep_aggregate_return_pointer (funtype))
5233 {
5234 int nregs = ix86_function_regparm (funtype, fundecl);
5235 if (nregs == 0)
5236 return GET_MODE_SIZE (Pmode);
5237 }
5238
5239 return 0;
5240 }
5241 \f
5242 /* Argument support functions. */
5243
5244 /* Return true when register may be used to pass function parameters. */
5245 bool
5246 ix86_function_arg_regno_p (int regno)
5247 {
5248 int i;
5249 const int *parm_regs;
5250
5251 if (!TARGET_64BIT)
5252 {
5253 if (TARGET_MACHO)
5254 return (regno < REGPARM_MAX
5255 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
5256 else
5257 return (regno < REGPARM_MAX
5258 || (TARGET_MMX && MMX_REGNO_P (regno)
5259 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
5260 || (TARGET_SSE && SSE_REGNO_P (regno)
5261 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
5262 }
5263
5264 if (TARGET_MACHO)
5265 {
5266 if (SSE_REGNO_P (regno) && TARGET_SSE)
5267 return true;
5268 }
5269 else
5270 {
5271 if (TARGET_SSE && SSE_REGNO_P (regno)
5272 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
5273 return true;
5274 }
5275
5276 /* TODO: The function should depend on current function ABI but
5277 builtins.c would need updating then. Therefore we use the
5278 default ABI. */
5279
5280 /* RAX is used as hidden argument to va_arg functions. */
5281 if (ix86_abi == SYSV_ABI && regno == AX_REG)
5282 return true;
5283
5284 if (ix86_abi == MS_ABI)
5285 parm_regs = x86_64_ms_abi_int_parameter_registers;
5286 else
5287 parm_regs = x86_64_int_parameter_registers;
5288 for (i = 0; i < (ix86_abi == MS_ABI
5289 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
5290 if (regno == parm_regs[i])
5291 return true;
5292 return false;
5293 }
5294
5295 /* Return if we do not know how to pass TYPE solely in registers. */
5296
5297 static bool
5298 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
5299 {
5300 if (must_pass_in_stack_var_size_or_pad (mode, type))
5301 return true;
5302
5303 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5304 The layout_type routine is crafty and tries to trick us into passing
5305 currently unsupported vector types on the stack by using TImode. */
5306 return (!TARGET_64BIT && mode == TImode
5307 && type && TREE_CODE (type) != VECTOR_TYPE);
5308 }
5309
5310 /* It returns the size, in bytes, of the area reserved for arguments passed
5311 in registers for the function represented by fndecl dependent to the used
5312 abi format. */
5313 int
5314 ix86_reg_parm_stack_space (const_tree fndecl)
5315 {
5316 enum calling_abi call_abi = SYSV_ABI;
5317 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
5318 call_abi = ix86_function_abi (fndecl);
5319 else
5320 call_abi = ix86_function_type_abi (fndecl);
5321 if (TARGET_64BIT && call_abi == MS_ABI)
5322 return 32;
5323 return 0;
5324 }
5325
5326 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5327 call abi used. */
5328 enum calling_abi
5329 ix86_function_type_abi (const_tree fntype)
5330 {
5331 if (fntype != NULL_TREE && TYPE_ATTRIBUTES (fntype) != NULL_TREE)
5332 {
5333 enum calling_abi abi = ix86_abi;
5334 if (abi == SYSV_ABI)
5335 {
5336 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
5337 abi = MS_ABI;
5338 }
5339 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
5340 abi = SYSV_ABI;
5341 return abi;
5342 }
5343 return ix86_abi;
5344 }
5345
5346 static bool
5347 ix86_function_ms_hook_prologue (const_tree fn)
5348 {
5349 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
5350 {
5351 if (decl_function_context (fn) != NULL_TREE)
5352 error_at (DECL_SOURCE_LOCATION (fn),
5353 "ms_hook_prologue is not compatible with nested function");
5354 else
5355 return true;
5356 }
5357 return false;
5358 }
5359
5360 static enum calling_abi
5361 ix86_function_abi (const_tree fndecl)
5362 {
5363 if (! fndecl)
5364 return ix86_abi;
5365 return ix86_function_type_abi (TREE_TYPE (fndecl));
5366 }
5367
5368 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
5369 call abi used. */
5370 enum calling_abi
5371 ix86_cfun_abi (void)
5372 {
5373 if (! cfun)
5374 return ix86_abi;
5375 return cfun->machine->call_abi;
5376 }
5377
5378 /* Write the extra assembler code needed to declare a function properly. */
5379
5380 void
5381 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
5382 tree decl)
5383 {
5384 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
5385
5386 if (is_ms_hook)
5387 {
5388 int i, filler_count = (TARGET_64BIT ? 32 : 16);
5389 unsigned int filler_cc = 0xcccccccc;
5390
5391 for (i = 0; i < filler_count; i += 4)
5392 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
5393 }
5394
5395 #ifdef SUBTARGET_ASM_UNWIND_INIT
5396 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
5397 #endif
5398
5399 ASM_OUTPUT_LABEL (asm_out_file, fname);
5400
5401 /* Output magic byte marker, if hot-patch attribute is set. */
5402 if (is_ms_hook)
5403 {
5404 if (TARGET_64BIT)
5405 {
5406 /* leaq [%rsp + 0], %rsp */
5407 asm_fprintf (asm_out_file, ASM_BYTE
5408 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
5409 }
5410 else
5411 {
5412 /* movl.s %edi, %edi
5413 push %ebp
5414 movl.s %esp, %ebp */
5415 asm_fprintf (asm_out_file, ASM_BYTE
5416 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
5417 }
5418 }
5419 }
5420
5421 /* regclass.c */
5422 extern void init_regs (void);
5423
5424 /* Implementation of call abi switching target hook. Specific to FNDECL
5425 the specific call register sets are set. See also
5426 ix86_conditional_register_usage for more details. */
5427 void
5428 ix86_call_abi_override (const_tree fndecl)
5429 {
5430 if (fndecl == NULL_TREE)
5431 cfun->machine->call_abi = ix86_abi;
5432 else
5433 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
5434 }
5435
5436 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
5437 expensive re-initialization of init_regs each time we switch function context
5438 since this is needed only during RTL expansion. */
5439 static void
5440 ix86_maybe_switch_abi (void)
5441 {
5442 if (TARGET_64BIT &&
5443 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
5444 reinit_regs ();
5445 }
5446
5447 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5448 for a call to a function whose data type is FNTYPE.
5449 For a library call, FNTYPE is 0. */
5450
5451 void
5452 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
5453 tree fntype, /* tree ptr for function decl */
5454 rtx libname, /* SYMBOL_REF of library name or 0 */
5455 tree fndecl,
5456 int caller)
5457 {
5458 struct cgraph_local_info *i;
5459 tree fnret_type;
5460
5461 memset (cum, 0, sizeof (*cum));
5462
5463 /* Initialize for the current callee. */
5464 if (caller)
5465 {
5466 cfun->machine->callee_pass_avx256_p = false;
5467 cfun->machine->callee_return_avx256_p = false;
5468 }
5469
5470 if (fndecl)
5471 {
5472 i = cgraph_local_info (fndecl);
5473 cum->call_abi = ix86_function_abi (fndecl);
5474 fnret_type = TREE_TYPE (TREE_TYPE (fndecl));
5475 }
5476 else
5477 {
5478 i = NULL;
5479 cum->call_abi = ix86_function_type_abi (fntype);
5480 if (fntype)
5481 fnret_type = TREE_TYPE (fntype);
5482 else
5483 fnret_type = NULL;
5484 }
5485
5486 if (TARGET_VZEROUPPER && fnret_type)
5487 {
5488 rtx fnret_value = ix86_function_value (fnret_type, fntype,
5489 false);
5490 if (function_pass_avx256_p (fnret_value))
5491 {
5492 /* The return value of this function uses 256bit AVX modes. */
5493 if (caller)
5494 cfun->machine->callee_return_avx256_p = true;
5495 else
5496 cfun->machine->caller_return_avx256_p = true;
5497 }
5498 }
5499
5500 cum->caller = caller;
5501
5502 /* Set up the number of registers to use for passing arguments. */
5503
5504 if (TARGET_64BIT && cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
5505 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
5506 "or subtarget optimization implying it");
5507 cum->nregs = ix86_regparm;
5508 if (TARGET_64BIT)
5509 {
5510 cum->nregs = (cum->call_abi == SYSV_ABI
5511 ? X86_64_REGPARM_MAX
5512 : X86_64_MS_REGPARM_MAX);
5513 }
5514 if (TARGET_SSE)
5515 {
5516 cum->sse_nregs = SSE_REGPARM_MAX;
5517 if (TARGET_64BIT)
5518 {
5519 cum->sse_nregs = (cum->call_abi == SYSV_ABI
5520 ? X86_64_SSE_REGPARM_MAX
5521 : X86_64_MS_SSE_REGPARM_MAX);
5522 }
5523 }
5524 if (TARGET_MMX)
5525 cum->mmx_nregs = MMX_REGPARM_MAX;
5526 cum->warn_avx = true;
5527 cum->warn_sse = true;
5528 cum->warn_mmx = true;
5529
5530 /* Because type might mismatch in between caller and callee, we need to
5531 use actual type of function for local calls.
5532 FIXME: cgraph_analyze can be told to actually record if function uses
5533 va_start so for local functions maybe_vaarg can be made aggressive
5534 helping K&R code.
5535 FIXME: once typesytem is fixed, we won't need this code anymore. */
5536 if (i && i->local && i->can_change_signature)
5537 fntype = TREE_TYPE (fndecl);
5538 cum->maybe_vaarg = (fntype
5539 ? (!prototype_p (fntype) || stdarg_p (fntype))
5540 : !libname);
5541
5542 if (!TARGET_64BIT)
5543 {
5544 /* If there are variable arguments, then we won't pass anything
5545 in registers in 32-bit mode. */
5546 if (stdarg_p (fntype))
5547 {
5548 cum->nregs = 0;
5549 cum->sse_nregs = 0;
5550 cum->mmx_nregs = 0;
5551 cum->warn_avx = 0;
5552 cum->warn_sse = 0;
5553 cum->warn_mmx = 0;
5554 return;
5555 }
5556
5557 /* Use ecx and edx registers if function has fastcall attribute,
5558 else look for regparm information. */
5559 if (fntype)
5560 {
5561 unsigned int ccvt = ix86_get_callcvt (fntype);
5562 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5563 {
5564 cum->nregs = 1;
5565 cum->fastcall = 1; /* Same first register as in fastcall. */
5566 }
5567 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5568 {
5569 cum->nregs = 2;
5570 cum->fastcall = 1;
5571 }
5572 else
5573 cum->nregs = ix86_function_regparm (fntype, fndecl);
5574 }
5575
5576 /* Set up the number of SSE registers used for passing SFmode
5577 and DFmode arguments. Warn for mismatching ABI. */
5578 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
5579 }
5580 }
5581
5582 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
5583 But in the case of vector types, it is some vector mode.
5584
5585 When we have only some of our vector isa extensions enabled, then there
5586 are some modes for which vector_mode_supported_p is false. For these
5587 modes, the generic vector support in gcc will choose some non-vector mode
5588 in order to implement the type. By computing the natural mode, we'll
5589 select the proper ABI location for the operand and not depend on whatever
5590 the middle-end decides to do with these vector types.
5591
5592 The midde-end can't deal with the vector types > 16 bytes. In this
5593 case, we return the original mode and warn ABI change if CUM isn't
5594 NULL. */
5595
5596 static enum machine_mode
5597 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum)
5598 {
5599 enum machine_mode mode = TYPE_MODE (type);
5600
5601 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
5602 {
5603 HOST_WIDE_INT size = int_size_in_bytes (type);
5604 if ((size == 8 || size == 16 || size == 32)
5605 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
5606 && TYPE_VECTOR_SUBPARTS (type) > 1)
5607 {
5608 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
5609
5610 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
5611 mode = MIN_MODE_VECTOR_FLOAT;
5612 else
5613 mode = MIN_MODE_VECTOR_INT;
5614
5615 /* Get the mode which has this inner mode and number of units. */
5616 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
5617 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
5618 && GET_MODE_INNER (mode) == innermode)
5619 {
5620 if (size == 32 && !TARGET_AVX)
5621 {
5622 static bool warnedavx;
5623
5624 if (cum
5625 && !warnedavx
5626 && cum->warn_avx)
5627 {
5628 warnedavx = true;
5629 warning (0, "AVX vector argument without AVX "
5630 "enabled changes the ABI");
5631 }
5632 return TYPE_MODE (type);
5633 }
5634 else
5635 return mode;
5636 }
5637
5638 gcc_unreachable ();
5639 }
5640 }
5641
5642 return mode;
5643 }
5644
5645 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
5646 this may not agree with the mode that the type system has chosen for the
5647 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
5648 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
5649
5650 static rtx
5651 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
5652 unsigned int regno)
5653 {
5654 rtx tmp;
5655
5656 if (orig_mode != BLKmode)
5657 tmp = gen_rtx_REG (orig_mode, regno);
5658 else
5659 {
5660 tmp = gen_rtx_REG (mode, regno);
5661 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
5662 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
5663 }
5664
5665 return tmp;
5666 }
5667
5668 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
5669 of this code is to classify each 8bytes of incoming argument by the register
5670 class and assign registers accordingly. */
5671
5672 /* Return the union class of CLASS1 and CLASS2.
5673 See the x86-64 PS ABI for details. */
5674
5675 static enum x86_64_reg_class
5676 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
5677 {
5678 /* Rule #1: If both classes are equal, this is the resulting class. */
5679 if (class1 == class2)
5680 return class1;
5681
5682 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
5683 the other class. */
5684 if (class1 == X86_64_NO_CLASS)
5685 return class2;
5686 if (class2 == X86_64_NO_CLASS)
5687 return class1;
5688
5689 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
5690 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
5691 return X86_64_MEMORY_CLASS;
5692
5693 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
5694 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
5695 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
5696 return X86_64_INTEGERSI_CLASS;
5697 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
5698 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
5699 return X86_64_INTEGER_CLASS;
5700
5701 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
5702 MEMORY is used. */
5703 if (class1 == X86_64_X87_CLASS
5704 || class1 == X86_64_X87UP_CLASS
5705 || class1 == X86_64_COMPLEX_X87_CLASS
5706 || class2 == X86_64_X87_CLASS
5707 || class2 == X86_64_X87UP_CLASS
5708 || class2 == X86_64_COMPLEX_X87_CLASS)
5709 return X86_64_MEMORY_CLASS;
5710
5711 /* Rule #6: Otherwise class SSE is used. */
5712 return X86_64_SSE_CLASS;
5713 }
5714
5715 /* Classify the argument of type TYPE and mode MODE.
5716 CLASSES will be filled by the register class used to pass each word
5717 of the operand. The number of words is returned. In case the parameter
5718 should be passed in memory, 0 is returned. As a special case for zero
5719 sized containers, classes[0] will be NO_CLASS and 1 is returned.
5720
5721 BIT_OFFSET is used internally for handling records and specifies offset
5722 of the offset in bits modulo 256 to avoid overflow cases.
5723
5724 See the x86-64 PS ABI for details.
5725 */
5726
5727 static int
5728 classify_argument (enum machine_mode mode, const_tree type,
5729 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
5730 {
5731 HOST_WIDE_INT bytes =
5732 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5733 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5734
5735 /* Variable sized entities are always passed/returned in memory. */
5736 if (bytes < 0)
5737 return 0;
5738
5739 if (mode != VOIDmode
5740 && targetm.calls.must_pass_in_stack (mode, type))
5741 return 0;
5742
5743 if (type && AGGREGATE_TYPE_P (type))
5744 {
5745 int i;
5746 tree field;
5747 enum x86_64_reg_class subclasses[MAX_CLASSES];
5748
5749 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
5750 if (bytes > 32)
5751 return 0;
5752
5753 for (i = 0; i < words; i++)
5754 classes[i] = X86_64_NO_CLASS;
5755
5756 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
5757 signalize memory class, so handle it as special case. */
5758 if (!words)
5759 {
5760 classes[0] = X86_64_NO_CLASS;
5761 return 1;
5762 }
5763
5764 /* Classify each field of record and merge classes. */
5765 switch (TREE_CODE (type))
5766 {
5767 case RECORD_TYPE:
5768 /* And now merge the fields of structure. */
5769 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5770 {
5771 if (TREE_CODE (field) == FIELD_DECL)
5772 {
5773 int num;
5774
5775 if (TREE_TYPE (field) == error_mark_node)
5776 continue;
5777
5778 /* Bitfields are always classified as integer. Handle them
5779 early, since later code would consider them to be
5780 misaligned integers. */
5781 if (DECL_BIT_FIELD (field))
5782 {
5783 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5784 i < ((int_bit_position (field) + (bit_offset % 64))
5785 + tree_low_cst (DECL_SIZE (field), 0)
5786 + 63) / 8 / 8; i++)
5787 classes[i] =
5788 merge_classes (X86_64_INTEGER_CLASS,
5789 classes[i]);
5790 }
5791 else
5792 {
5793 int pos;
5794
5795 type = TREE_TYPE (field);
5796
5797 /* Flexible array member is ignored. */
5798 if (TYPE_MODE (type) == BLKmode
5799 && TREE_CODE (type) == ARRAY_TYPE
5800 && TYPE_SIZE (type) == NULL_TREE
5801 && TYPE_DOMAIN (type) != NULL_TREE
5802 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
5803 == NULL_TREE))
5804 {
5805 static bool warned;
5806
5807 if (!warned && warn_psabi)
5808 {
5809 warned = true;
5810 inform (input_location,
5811 "the ABI of passing struct with"
5812 " a flexible array member has"
5813 " changed in GCC 4.4");
5814 }
5815 continue;
5816 }
5817 num = classify_argument (TYPE_MODE (type), type,
5818 subclasses,
5819 (int_bit_position (field)
5820 + bit_offset) % 256);
5821 if (!num)
5822 return 0;
5823 pos = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5824 for (i = 0; i < num && (i + pos) < words; i++)
5825 classes[i + pos] =
5826 merge_classes (subclasses[i], classes[i + pos]);
5827 }
5828 }
5829 }
5830 break;
5831
5832 case ARRAY_TYPE:
5833 /* Arrays are handled as small records. */
5834 {
5835 int num;
5836 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
5837 TREE_TYPE (type), subclasses, bit_offset);
5838 if (!num)
5839 return 0;
5840
5841 /* The partial classes are now full classes. */
5842 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
5843 subclasses[0] = X86_64_SSE_CLASS;
5844 if (subclasses[0] == X86_64_INTEGERSI_CLASS
5845 && !((bit_offset % 64) == 0 && bytes == 4))
5846 subclasses[0] = X86_64_INTEGER_CLASS;
5847
5848 for (i = 0; i < words; i++)
5849 classes[i] = subclasses[i % num];
5850
5851 break;
5852 }
5853 case UNION_TYPE:
5854 case QUAL_UNION_TYPE:
5855 /* Unions are similar to RECORD_TYPE but offset is always 0.
5856 */
5857 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5858 {
5859 if (TREE_CODE (field) == FIELD_DECL)
5860 {
5861 int num;
5862
5863 if (TREE_TYPE (field) == error_mark_node)
5864 continue;
5865
5866 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
5867 TREE_TYPE (field), subclasses,
5868 bit_offset);
5869 if (!num)
5870 return 0;
5871 for (i = 0; i < num; i++)
5872 classes[i] = merge_classes (subclasses[i], classes[i]);
5873 }
5874 }
5875 break;
5876
5877 default:
5878 gcc_unreachable ();
5879 }
5880
5881 if (words > 2)
5882 {
5883 /* When size > 16 bytes, if the first one isn't
5884 X86_64_SSE_CLASS or any other ones aren't
5885 X86_64_SSEUP_CLASS, everything should be passed in
5886 memory. */
5887 if (classes[0] != X86_64_SSE_CLASS)
5888 return 0;
5889
5890 for (i = 1; i < words; i++)
5891 if (classes[i] != X86_64_SSEUP_CLASS)
5892 return 0;
5893 }
5894
5895 /* Final merger cleanup. */
5896 for (i = 0; i < words; i++)
5897 {
5898 /* If one class is MEMORY, everything should be passed in
5899 memory. */
5900 if (classes[i] == X86_64_MEMORY_CLASS)
5901 return 0;
5902
5903 /* The X86_64_SSEUP_CLASS should be always preceded by
5904 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
5905 if (classes[i] == X86_64_SSEUP_CLASS
5906 && classes[i - 1] != X86_64_SSE_CLASS
5907 && classes[i - 1] != X86_64_SSEUP_CLASS)
5908 {
5909 /* The first one should never be X86_64_SSEUP_CLASS. */
5910 gcc_assert (i != 0);
5911 classes[i] = X86_64_SSE_CLASS;
5912 }
5913
5914 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
5915 everything should be passed in memory. */
5916 if (classes[i] == X86_64_X87UP_CLASS
5917 && (classes[i - 1] != X86_64_X87_CLASS))
5918 {
5919 static bool warned;
5920
5921 /* The first one should never be X86_64_X87UP_CLASS. */
5922 gcc_assert (i != 0);
5923 if (!warned && warn_psabi)
5924 {
5925 warned = true;
5926 inform (input_location,
5927 "the ABI of passing union with long double"
5928 " has changed in GCC 4.4");
5929 }
5930 return 0;
5931 }
5932 }
5933 return words;
5934 }
5935
5936 /* Compute alignment needed. We align all types to natural boundaries with
5937 exception of XFmode that is aligned to 64bits. */
5938 if (mode != VOIDmode && mode != BLKmode)
5939 {
5940 int mode_alignment = GET_MODE_BITSIZE (mode);
5941
5942 if (mode == XFmode)
5943 mode_alignment = 128;
5944 else if (mode == XCmode)
5945 mode_alignment = 256;
5946 if (COMPLEX_MODE_P (mode))
5947 mode_alignment /= 2;
5948 /* Misaligned fields are always returned in memory. */
5949 if (bit_offset % mode_alignment)
5950 return 0;
5951 }
5952
5953 /* for V1xx modes, just use the base mode */
5954 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
5955 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
5956 mode = GET_MODE_INNER (mode);
5957
5958 /* Classification of atomic types. */
5959 switch (mode)
5960 {
5961 case SDmode:
5962 case DDmode:
5963 classes[0] = X86_64_SSE_CLASS;
5964 return 1;
5965 case TDmode:
5966 classes[0] = X86_64_SSE_CLASS;
5967 classes[1] = X86_64_SSEUP_CLASS;
5968 return 2;
5969 case DImode:
5970 case SImode:
5971 case HImode:
5972 case QImode:
5973 case CSImode:
5974 case CHImode:
5975 case CQImode:
5976 {
5977 int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
5978
5979 if (size <= 32)
5980 {
5981 classes[0] = X86_64_INTEGERSI_CLASS;
5982 return 1;
5983 }
5984 else if (size <= 64)
5985 {
5986 classes[0] = X86_64_INTEGER_CLASS;
5987 return 1;
5988 }
5989 else if (size <= 64+32)
5990 {
5991 classes[0] = X86_64_INTEGER_CLASS;
5992 classes[1] = X86_64_INTEGERSI_CLASS;
5993 return 2;
5994 }
5995 else if (size <= 64+64)
5996 {
5997 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5998 return 2;
5999 }
6000 else
6001 gcc_unreachable ();
6002 }
6003 case CDImode:
6004 case TImode:
6005 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6006 return 2;
6007 case COImode:
6008 case OImode:
6009 /* OImode shouldn't be used directly. */
6010 gcc_unreachable ();
6011 case CTImode:
6012 return 0;
6013 case SFmode:
6014 if (!(bit_offset % 64))
6015 classes[0] = X86_64_SSESF_CLASS;
6016 else
6017 classes[0] = X86_64_SSE_CLASS;
6018 return 1;
6019 case DFmode:
6020 classes[0] = X86_64_SSEDF_CLASS;
6021 return 1;
6022 case XFmode:
6023 classes[0] = X86_64_X87_CLASS;
6024 classes[1] = X86_64_X87UP_CLASS;
6025 return 2;
6026 case TFmode:
6027 classes[0] = X86_64_SSE_CLASS;
6028 classes[1] = X86_64_SSEUP_CLASS;
6029 return 2;
6030 case SCmode:
6031 classes[0] = X86_64_SSE_CLASS;
6032 if (!(bit_offset % 64))
6033 return 1;
6034 else
6035 {
6036 static bool warned;
6037
6038 if (!warned && warn_psabi)
6039 {
6040 warned = true;
6041 inform (input_location,
6042 "the ABI of passing structure with complex float"
6043 " member has changed in GCC 4.4");
6044 }
6045 classes[1] = X86_64_SSESF_CLASS;
6046 return 2;
6047 }
6048 case DCmode:
6049 classes[0] = X86_64_SSEDF_CLASS;
6050 classes[1] = X86_64_SSEDF_CLASS;
6051 return 2;
6052 case XCmode:
6053 classes[0] = X86_64_COMPLEX_X87_CLASS;
6054 return 1;
6055 case TCmode:
6056 /* This modes is larger than 16 bytes. */
6057 return 0;
6058 case V8SFmode:
6059 case V8SImode:
6060 case V32QImode:
6061 case V16HImode:
6062 case V4DFmode:
6063 case V4DImode:
6064 classes[0] = X86_64_SSE_CLASS;
6065 classes[1] = X86_64_SSEUP_CLASS;
6066 classes[2] = X86_64_SSEUP_CLASS;
6067 classes[3] = X86_64_SSEUP_CLASS;
6068 return 4;
6069 case V4SFmode:
6070 case V4SImode:
6071 case V16QImode:
6072 case V8HImode:
6073 case V2DFmode:
6074 case V2DImode:
6075 classes[0] = X86_64_SSE_CLASS;
6076 classes[1] = X86_64_SSEUP_CLASS;
6077 return 2;
6078 case V1TImode:
6079 case V1DImode:
6080 case V2SFmode:
6081 case V2SImode:
6082 case V4HImode:
6083 case V8QImode:
6084 classes[0] = X86_64_SSE_CLASS;
6085 return 1;
6086 case BLKmode:
6087 case VOIDmode:
6088 return 0;
6089 default:
6090 gcc_assert (VECTOR_MODE_P (mode));
6091
6092 if (bytes > 16)
6093 return 0;
6094
6095 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
6096
6097 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
6098 classes[0] = X86_64_INTEGERSI_CLASS;
6099 else
6100 classes[0] = X86_64_INTEGER_CLASS;
6101 classes[1] = X86_64_INTEGER_CLASS;
6102 return 1 + (bytes > 8);
6103 }
6104 }
6105
6106 /* Examine the argument and return set number of register required in each
6107 class. Return 0 iff parameter should be passed in memory. */
6108 static int
6109 examine_argument (enum machine_mode mode, const_tree type, int in_return,
6110 int *int_nregs, int *sse_nregs)
6111 {
6112 enum x86_64_reg_class regclass[MAX_CLASSES];
6113 int n = classify_argument (mode, type, regclass, 0);
6114
6115 *int_nregs = 0;
6116 *sse_nregs = 0;
6117 if (!n)
6118 return 0;
6119 for (n--; n >= 0; n--)
6120 switch (regclass[n])
6121 {
6122 case X86_64_INTEGER_CLASS:
6123 case X86_64_INTEGERSI_CLASS:
6124 (*int_nregs)++;
6125 break;
6126 case X86_64_SSE_CLASS:
6127 case X86_64_SSESF_CLASS:
6128 case X86_64_SSEDF_CLASS:
6129 (*sse_nregs)++;
6130 break;
6131 case X86_64_NO_CLASS:
6132 case X86_64_SSEUP_CLASS:
6133 break;
6134 case X86_64_X87_CLASS:
6135 case X86_64_X87UP_CLASS:
6136 if (!in_return)
6137 return 0;
6138 break;
6139 case X86_64_COMPLEX_X87_CLASS:
6140 return in_return ? 2 : 0;
6141 case X86_64_MEMORY_CLASS:
6142 gcc_unreachable ();
6143 }
6144 return 1;
6145 }
6146
6147 /* Construct container for the argument used by GCC interface. See
6148 FUNCTION_ARG for the detailed description. */
6149
6150 static rtx
6151 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
6152 const_tree type, int in_return, int nintregs, int nsseregs,
6153 const int *intreg, int sse_regno)
6154 {
6155 /* The following variables hold the static issued_error state. */
6156 static bool issued_sse_arg_error;
6157 static bool issued_sse_ret_error;
6158 static bool issued_x87_ret_error;
6159
6160 enum machine_mode tmpmode;
6161 int bytes =
6162 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6163 enum x86_64_reg_class regclass[MAX_CLASSES];
6164 int n;
6165 int i;
6166 int nexps = 0;
6167 int needed_sseregs, needed_intregs;
6168 rtx exp[MAX_CLASSES];
6169 rtx ret;
6170
6171 n = classify_argument (mode, type, regclass, 0);
6172 if (!n)
6173 return NULL;
6174 if (!examine_argument (mode, type, in_return, &needed_intregs,
6175 &needed_sseregs))
6176 return NULL;
6177 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
6178 return NULL;
6179
6180 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6181 some less clueful developer tries to use floating-point anyway. */
6182 if (needed_sseregs && !TARGET_SSE)
6183 {
6184 if (in_return)
6185 {
6186 if (!issued_sse_ret_error)
6187 {
6188 error ("SSE register return with SSE disabled");
6189 issued_sse_ret_error = true;
6190 }
6191 }
6192 else if (!issued_sse_arg_error)
6193 {
6194 error ("SSE register argument with SSE disabled");
6195 issued_sse_arg_error = true;
6196 }
6197 return NULL;
6198 }
6199
6200 /* Likewise, error if the ABI requires us to return values in the
6201 x87 registers and the user specified -mno-80387. */
6202 if (!TARGET_80387 && in_return)
6203 for (i = 0; i < n; i++)
6204 if (regclass[i] == X86_64_X87_CLASS
6205 || regclass[i] == X86_64_X87UP_CLASS
6206 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
6207 {
6208 if (!issued_x87_ret_error)
6209 {
6210 error ("x87 register return with x87 disabled");
6211 issued_x87_ret_error = true;
6212 }
6213 return NULL;
6214 }
6215
6216 /* First construct simple cases. Avoid SCmode, since we want to use
6217 single register to pass this type. */
6218 if (n == 1 && mode != SCmode)
6219 switch (regclass[0])
6220 {
6221 case X86_64_INTEGER_CLASS:
6222 case X86_64_INTEGERSI_CLASS:
6223 return gen_rtx_REG (mode, intreg[0]);
6224 case X86_64_SSE_CLASS:
6225 case X86_64_SSESF_CLASS:
6226 case X86_64_SSEDF_CLASS:
6227 if (mode != BLKmode)
6228 return gen_reg_or_parallel (mode, orig_mode,
6229 SSE_REGNO (sse_regno));
6230 break;
6231 case X86_64_X87_CLASS:
6232 case X86_64_COMPLEX_X87_CLASS:
6233 return gen_rtx_REG (mode, FIRST_STACK_REG);
6234 case X86_64_NO_CLASS:
6235 /* Zero sized array, struct or class. */
6236 return NULL;
6237 default:
6238 gcc_unreachable ();
6239 }
6240 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
6241 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
6242 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
6243 if (n == 4
6244 && regclass[0] == X86_64_SSE_CLASS
6245 && regclass[1] == X86_64_SSEUP_CLASS
6246 && regclass[2] == X86_64_SSEUP_CLASS
6247 && regclass[3] == X86_64_SSEUP_CLASS
6248 && mode != BLKmode)
6249 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
6250
6251 if (n == 2
6252 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
6253 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
6254 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
6255 && regclass[1] == X86_64_INTEGER_CLASS
6256 && (mode == CDImode || mode == TImode || mode == TFmode)
6257 && intreg[0] + 1 == intreg[1])
6258 return gen_rtx_REG (mode, intreg[0]);
6259
6260 /* Otherwise figure out the entries of the PARALLEL. */
6261 for (i = 0; i < n; i++)
6262 {
6263 int pos;
6264
6265 switch (regclass[i])
6266 {
6267 case X86_64_NO_CLASS:
6268 break;
6269 case X86_64_INTEGER_CLASS:
6270 case X86_64_INTEGERSI_CLASS:
6271 /* Merge TImodes on aligned occasions here too. */
6272 if (i * 8 + 8 > bytes)
6273 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
6274 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
6275 tmpmode = SImode;
6276 else
6277 tmpmode = DImode;
6278 /* We've requested 24 bytes we don't have mode for. Use DImode. */
6279 if (tmpmode == BLKmode)
6280 tmpmode = DImode;
6281 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6282 gen_rtx_REG (tmpmode, *intreg),
6283 GEN_INT (i*8));
6284 intreg++;
6285 break;
6286 case X86_64_SSESF_CLASS:
6287 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6288 gen_rtx_REG (SFmode,
6289 SSE_REGNO (sse_regno)),
6290 GEN_INT (i*8));
6291 sse_regno++;
6292 break;
6293 case X86_64_SSEDF_CLASS:
6294 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6295 gen_rtx_REG (DFmode,
6296 SSE_REGNO (sse_regno)),
6297 GEN_INT (i*8));
6298 sse_regno++;
6299 break;
6300 case X86_64_SSE_CLASS:
6301 pos = i;
6302 switch (n)
6303 {
6304 case 1:
6305 tmpmode = DImode;
6306 break;
6307 case 2:
6308 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
6309 {
6310 tmpmode = TImode;
6311 i++;
6312 }
6313 else
6314 tmpmode = DImode;
6315 break;
6316 case 4:
6317 gcc_assert (i == 0
6318 && regclass[1] == X86_64_SSEUP_CLASS
6319 && regclass[2] == X86_64_SSEUP_CLASS
6320 && regclass[3] == X86_64_SSEUP_CLASS);
6321 tmpmode = OImode;
6322 i += 3;
6323 break;
6324 default:
6325 gcc_unreachable ();
6326 }
6327 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6328 gen_rtx_REG (tmpmode,
6329 SSE_REGNO (sse_regno)),
6330 GEN_INT (pos*8));
6331 sse_regno++;
6332 break;
6333 default:
6334 gcc_unreachable ();
6335 }
6336 }
6337
6338 /* Empty aligned struct, union or class. */
6339 if (nexps == 0)
6340 return NULL;
6341
6342 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
6343 for (i = 0; i < nexps; i++)
6344 XVECEXP (ret, 0, i) = exp [i];
6345 return ret;
6346 }
6347
6348 /* Update the data in CUM to advance over an argument of mode MODE
6349 and data type TYPE. (TYPE is null for libcalls where that information
6350 may not be available.) */
6351
6352 static void
6353 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6354 const_tree type, HOST_WIDE_INT bytes,
6355 HOST_WIDE_INT words)
6356 {
6357 switch (mode)
6358 {
6359 default:
6360 break;
6361
6362 case BLKmode:
6363 if (bytes < 0)
6364 break;
6365 /* FALLTHRU */
6366
6367 case DImode:
6368 case SImode:
6369 case HImode:
6370 case QImode:
6371 cum->words += words;
6372 cum->nregs -= words;
6373 cum->regno += words;
6374
6375 if (cum->nregs <= 0)
6376 {
6377 cum->nregs = 0;
6378 cum->regno = 0;
6379 }
6380 break;
6381
6382 case OImode:
6383 /* OImode shouldn't be used directly. */
6384 gcc_unreachable ();
6385
6386 case DFmode:
6387 if (cum->float_in_sse < 2)
6388 break;
6389 case SFmode:
6390 if (cum->float_in_sse < 1)
6391 break;
6392 /* FALLTHRU */
6393
6394 case V8SFmode:
6395 case V8SImode:
6396 case V32QImode:
6397 case V16HImode:
6398 case V4DFmode:
6399 case V4DImode:
6400 case TImode:
6401 case V16QImode:
6402 case V8HImode:
6403 case V4SImode:
6404 case V2DImode:
6405 case V4SFmode:
6406 case V2DFmode:
6407 if (!type || !AGGREGATE_TYPE_P (type))
6408 {
6409 cum->sse_words += words;
6410 cum->sse_nregs -= 1;
6411 cum->sse_regno += 1;
6412 if (cum->sse_nregs <= 0)
6413 {
6414 cum->sse_nregs = 0;
6415 cum->sse_regno = 0;
6416 }
6417 }
6418 break;
6419
6420 case V8QImode:
6421 case V4HImode:
6422 case V2SImode:
6423 case V2SFmode:
6424 case V1TImode:
6425 case V1DImode:
6426 if (!type || !AGGREGATE_TYPE_P (type))
6427 {
6428 cum->mmx_words += words;
6429 cum->mmx_nregs -= 1;
6430 cum->mmx_regno += 1;
6431 if (cum->mmx_nregs <= 0)
6432 {
6433 cum->mmx_nregs = 0;
6434 cum->mmx_regno = 0;
6435 }
6436 }
6437 break;
6438 }
6439 }
6440
6441 static void
6442 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6443 const_tree type, HOST_WIDE_INT words, bool named)
6444 {
6445 int int_nregs, sse_nregs;
6446
6447 /* Unnamed 256bit vector mode parameters are passed on stack. */
6448 if (!named && VALID_AVX256_REG_MODE (mode))
6449 return;
6450
6451 if (examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
6452 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
6453 {
6454 cum->nregs -= int_nregs;
6455 cum->sse_nregs -= sse_nregs;
6456 cum->regno += int_nregs;
6457 cum->sse_regno += sse_nregs;
6458 }
6459 else
6460 {
6461 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
6462 cum->words = (cum->words + align - 1) & ~(align - 1);
6463 cum->words += words;
6464 }
6465 }
6466
6467 static void
6468 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
6469 HOST_WIDE_INT words)
6470 {
6471 /* Otherwise, this should be passed indirect. */
6472 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
6473
6474 cum->words += words;
6475 if (cum->nregs > 0)
6476 {
6477 cum->nregs -= 1;
6478 cum->regno += 1;
6479 }
6480 }
6481
6482 /* Update the data in CUM to advance over an argument of mode MODE and
6483 data type TYPE. (TYPE is null for libcalls where that information
6484 may not be available.) */
6485
6486 static void
6487 ix86_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
6488 const_tree type, bool named)
6489 {
6490 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6491 HOST_WIDE_INT bytes, words;
6492
6493 if (mode == BLKmode)
6494 bytes = int_size_in_bytes (type);
6495 else
6496 bytes = GET_MODE_SIZE (mode);
6497 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6498
6499 if (type)
6500 mode = type_natural_mode (type, NULL);
6501
6502 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6503 function_arg_advance_ms_64 (cum, bytes, words);
6504 else if (TARGET_64BIT)
6505 function_arg_advance_64 (cum, mode, type, words, named);
6506 else
6507 function_arg_advance_32 (cum, mode, type, bytes, words);
6508 }
6509
6510 /* Define where to put the arguments to a function.
6511 Value is zero to push the argument on the stack,
6512 or a hard register in which to store the argument.
6513
6514 MODE is the argument's machine mode.
6515 TYPE is the data type of the argument (as a tree).
6516 This is null for libcalls where that information may
6517 not be available.
6518 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6519 the preceding args and about the function being called.
6520 NAMED is nonzero if this argument is a named parameter
6521 (otherwise it is an extra parameter matching an ellipsis). */
6522
6523 static rtx
6524 function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6525 enum machine_mode orig_mode, const_tree type,
6526 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
6527 {
6528 static bool warnedsse, warnedmmx;
6529
6530 /* Avoid the AL settings for the Unix64 ABI. */
6531 if (mode == VOIDmode)
6532 return constm1_rtx;
6533
6534 switch (mode)
6535 {
6536 default:
6537 break;
6538
6539 case BLKmode:
6540 if (bytes < 0)
6541 break;
6542 /* FALLTHRU */
6543 case DImode:
6544 case SImode:
6545 case HImode:
6546 case QImode:
6547 if (words <= cum->nregs)
6548 {
6549 int regno = cum->regno;
6550
6551 /* Fastcall allocates the first two DWORD (SImode) or
6552 smaller arguments to ECX and EDX if it isn't an
6553 aggregate type . */
6554 if (cum->fastcall)
6555 {
6556 if (mode == BLKmode
6557 || mode == DImode
6558 || (type && AGGREGATE_TYPE_P (type)))
6559 break;
6560
6561 /* ECX not EAX is the first allocated register. */
6562 if (regno == AX_REG)
6563 regno = CX_REG;
6564 }
6565 return gen_rtx_REG (mode, regno);
6566 }
6567 break;
6568
6569 case DFmode:
6570 if (cum->float_in_sse < 2)
6571 break;
6572 case SFmode:
6573 if (cum->float_in_sse < 1)
6574 break;
6575 /* FALLTHRU */
6576 case TImode:
6577 /* In 32bit, we pass TImode in xmm registers. */
6578 case V16QImode:
6579 case V8HImode:
6580 case V4SImode:
6581 case V2DImode:
6582 case V4SFmode:
6583 case V2DFmode:
6584 if (!type || !AGGREGATE_TYPE_P (type))
6585 {
6586 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
6587 {
6588 warnedsse = true;
6589 warning (0, "SSE vector argument without SSE enabled "
6590 "changes the ABI");
6591 }
6592 if (cum->sse_nregs)
6593 return gen_reg_or_parallel (mode, orig_mode,
6594 cum->sse_regno + FIRST_SSE_REG);
6595 }
6596 break;
6597
6598 case OImode:
6599 /* OImode shouldn't be used directly. */
6600 gcc_unreachable ();
6601
6602 case V8SFmode:
6603 case V8SImode:
6604 case V32QImode:
6605 case V16HImode:
6606 case V4DFmode:
6607 case V4DImode:
6608 if (!type || !AGGREGATE_TYPE_P (type))
6609 {
6610 if (cum->sse_nregs)
6611 return gen_reg_or_parallel (mode, orig_mode,
6612 cum->sse_regno + FIRST_SSE_REG);
6613 }
6614 break;
6615
6616 case V8QImode:
6617 case V4HImode:
6618 case V2SImode:
6619 case V2SFmode:
6620 case V1TImode:
6621 case V1DImode:
6622 if (!type || !AGGREGATE_TYPE_P (type))
6623 {
6624 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
6625 {
6626 warnedmmx = true;
6627 warning (0, "MMX vector argument without MMX enabled "
6628 "changes the ABI");
6629 }
6630 if (cum->mmx_nregs)
6631 return gen_reg_or_parallel (mode, orig_mode,
6632 cum->mmx_regno + FIRST_MMX_REG);
6633 }
6634 break;
6635 }
6636
6637 return NULL_RTX;
6638 }
6639
6640 static rtx
6641 function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6642 enum machine_mode orig_mode, const_tree type, bool named)
6643 {
6644 /* Handle a hidden AL argument containing number of registers
6645 for varargs x86-64 functions. */
6646 if (mode == VOIDmode)
6647 return GEN_INT (cum->maybe_vaarg
6648 ? (cum->sse_nregs < 0
6649 ? X86_64_SSE_REGPARM_MAX
6650 : cum->sse_regno)
6651 : -1);
6652
6653 switch (mode)
6654 {
6655 default:
6656 break;
6657
6658 case V8SFmode:
6659 case V8SImode:
6660 case V32QImode:
6661 case V16HImode:
6662 case V4DFmode:
6663 case V4DImode:
6664 /* Unnamed 256bit vector mode parameters are passed on stack. */
6665 if (!named)
6666 return NULL;
6667 break;
6668 }
6669
6670 return construct_container (mode, orig_mode, type, 0, cum->nregs,
6671 cum->sse_nregs,
6672 &x86_64_int_parameter_registers [cum->regno],
6673 cum->sse_regno);
6674 }
6675
6676 static rtx
6677 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6678 enum machine_mode orig_mode, bool named,
6679 HOST_WIDE_INT bytes)
6680 {
6681 unsigned int regno;
6682
6683 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
6684 We use value of -2 to specify that current function call is MSABI. */
6685 if (mode == VOIDmode)
6686 return GEN_INT (-2);
6687
6688 /* If we've run out of registers, it goes on the stack. */
6689 if (cum->nregs == 0)
6690 return NULL_RTX;
6691
6692 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
6693
6694 /* Only floating point modes are passed in anything but integer regs. */
6695 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
6696 {
6697 if (named)
6698 regno = cum->regno + FIRST_SSE_REG;
6699 else
6700 {
6701 rtx t1, t2;
6702
6703 /* Unnamed floating parameters are passed in both the
6704 SSE and integer registers. */
6705 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
6706 t2 = gen_rtx_REG (mode, regno);
6707 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
6708 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
6709 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
6710 }
6711 }
6712 /* Handle aggregated types passed in register. */
6713 if (orig_mode == BLKmode)
6714 {
6715 if (bytes > 0 && bytes <= 8)
6716 mode = (bytes > 4 ? DImode : SImode);
6717 if (mode == BLKmode)
6718 mode = DImode;
6719 }
6720
6721 return gen_reg_or_parallel (mode, orig_mode, regno);
6722 }
6723
6724 /* Return where to put the arguments to a function.
6725 Return zero to push the argument on the stack, or a hard register in which to store the argument.
6726
6727 MODE is the argument's machine mode. TYPE is the data type of the
6728 argument. It is null for libcalls where that information may not be
6729 available. CUM gives information about the preceding args and about
6730 the function being called. NAMED is nonzero if this argument is a
6731 named parameter (otherwise it is an extra parameter matching an
6732 ellipsis). */
6733
6734 static rtx
6735 ix86_function_arg (cumulative_args_t cum_v, enum machine_mode omode,
6736 const_tree type, bool named)
6737 {
6738 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6739 enum machine_mode mode = omode;
6740 HOST_WIDE_INT bytes, words;
6741 rtx arg;
6742
6743 if (mode == BLKmode)
6744 bytes = int_size_in_bytes (type);
6745 else
6746 bytes = GET_MODE_SIZE (mode);
6747 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6748
6749 /* To simplify the code below, represent vector types with a vector mode
6750 even if MMX/SSE are not active. */
6751 if (type && TREE_CODE (type) == VECTOR_TYPE)
6752 mode = type_natural_mode (type, cum);
6753
6754 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6755 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
6756 else if (TARGET_64BIT)
6757 arg = function_arg_64 (cum, mode, omode, type, named);
6758 else
6759 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
6760
6761 if (TARGET_VZEROUPPER && function_pass_avx256_p (arg))
6762 {
6763 /* This argument uses 256bit AVX modes. */
6764 if (cum->caller)
6765 cfun->machine->callee_pass_avx256_p = true;
6766 else
6767 cfun->machine->caller_pass_avx256_p = true;
6768 }
6769
6770 return arg;
6771 }
6772
6773 /* A C expression that indicates when an argument must be passed by
6774 reference. If nonzero for an argument, a copy of that argument is
6775 made in memory and a pointer to the argument is passed instead of
6776 the argument itself. The pointer is passed in whatever way is
6777 appropriate for passing a pointer to that type. */
6778
6779 static bool
6780 ix86_pass_by_reference (cumulative_args_t cum_v ATTRIBUTE_UNUSED,
6781 enum machine_mode mode ATTRIBUTE_UNUSED,
6782 const_tree type, bool named ATTRIBUTE_UNUSED)
6783 {
6784 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6785
6786 /* See Windows x64 Software Convention. */
6787 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6788 {
6789 int msize = (int) GET_MODE_SIZE (mode);
6790 if (type)
6791 {
6792 /* Arrays are passed by reference. */
6793 if (TREE_CODE (type) == ARRAY_TYPE)
6794 return true;
6795
6796 if (AGGREGATE_TYPE_P (type))
6797 {
6798 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
6799 are passed by reference. */
6800 msize = int_size_in_bytes (type);
6801 }
6802 }
6803
6804 /* __m128 is passed by reference. */
6805 switch (msize) {
6806 case 1: case 2: case 4: case 8:
6807 break;
6808 default:
6809 return true;
6810 }
6811 }
6812 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
6813 return 1;
6814
6815 return 0;
6816 }
6817
6818 /* Return true when TYPE should be 128bit aligned for 32bit argument
6819 passing ABI. XXX: This function is obsolete and is only used for
6820 checking psABI compatibility with previous versions of GCC. */
6821
6822 static bool
6823 ix86_compat_aligned_value_p (const_tree type)
6824 {
6825 enum machine_mode mode = TYPE_MODE (type);
6826 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
6827 || mode == TDmode
6828 || mode == TFmode
6829 || mode == TCmode)
6830 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
6831 return true;
6832 if (TYPE_ALIGN (type) < 128)
6833 return false;
6834
6835 if (AGGREGATE_TYPE_P (type))
6836 {
6837 /* Walk the aggregates recursively. */
6838 switch (TREE_CODE (type))
6839 {
6840 case RECORD_TYPE:
6841 case UNION_TYPE:
6842 case QUAL_UNION_TYPE:
6843 {
6844 tree field;
6845
6846 /* Walk all the structure fields. */
6847 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6848 {
6849 if (TREE_CODE (field) == FIELD_DECL
6850 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
6851 return true;
6852 }
6853 break;
6854 }
6855
6856 case ARRAY_TYPE:
6857 /* Just for use if some languages passes arrays by value. */
6858 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
6859 return true;
6860 break;
6861
6862 default:
6863 gcc_unreachable ();
6864 }
6865 }
6866 return false;
6867 }
6868
6869 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
6870 XXX: This function is obsolete and is only used for checking psABI
6871 compatibility with previous versions of GCC. */
6872
6873 static unsigned int
6874 ix86_compat_function_arg_boundary (enum machine_mode mode,
6875 const_tree type, unsigned int align)
6876 {
6877 /* In 32bit, only _Decimal128 and __float128 are aligned to their
6878 natural boundaries. */
6879 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
6880 {
6881 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
6882 make an exception for SSE modes since these require 128bit
6883 alignment.
6884
6885 The handling here differs from field_alignment. ICC aligns MMX
6886 arguments to 4 byte boundaries, while structure fields are aligned
6887 to 8 byte boundaries. */
6888 if (!type)
6889 {
6890 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
6891 align = PARM_BOUNDARY;
6892 }
6893 else
6894 {
6895 if (!ix86_compat_aligned_value_p (type))
6896 align = PARM_BOUNDARY;
6897 }
6898 }
6899 if (align > BIGGEST_ALIGNMENT)
6900 align = BIGGEST_ALIGNMENT;
6901 return align;
6902 }
6903
6904 /* Return true when TYPE should be 128bit aligned for 32bit argument
6905 passing ABI. */
6906
6907 static bool
6908 ix86_contains_aligned_value_p (const_tree type)
6909 {
6910 enum machine_mode mode = TYPE_MODE (type);
6911
6912 if (mode == XFmode || mode == XCmode)
6913 return false;
6914
6915 if (TYPE_ALIGN (type) < 128)
6916 return false;
6917
6918 if (AGGREGATE_TYPE_P (type))
6919 {
6920 /* Walk the aggregates recursively. */
6921 switch (TREE_CODE (type))
6922 {
6923 case RECORD_TYPE:
6924 case UNION_TYPE:
6925 case QUAL_UNION_TYPE:
6926 {
6927 tree field;
6928
6929 /* Walk all the structure fields. */
6930 for (field = TYPE_FIELDS (type);
6931 field;
6932 field = DECL_CHAIN (field))
6933 {
6934 if (TREE_CODE (field) == FIELD_DECL
6935 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
6936 return true;
6937 }
6938 break;
6939 }
6940
6941 case ARRAY_TYPE:
6942 /* Just for use if some languages passes arrays by value. */
6943 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
6944 return true;
6945 break;
6946
6947 default:
6948 gcc_unreachable ();
6949 }
6950 }
6951 else
6952 return TYPE_ALIGN (type) >= 128;
6953
6954 return false;
6955 }
6956
6957 /* Gives the alignment boundary, in bits, of an argument with the
6958 specified mode and type. */
6959
6960 static unsigned int
6961 ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
6962 {
6963 unsigned int align;
6964 if (type)
6965 {
6966 /* Since the main variant type is used for call, we convert it to
6967 the main variant type. */
6968 type = TYPE_MAIN_VARIANT (type);
6969 align = TYPE_ALIGN (type);
6970 }
6971 else
6972 align = GET_MODE_ALIGNMENT (mode);
6973 if (align < PARM_BOUNDARY)
6974 align = PARM_BOUNDARY;
6975 else
6976 {
6977 static bool warned;
6978 unsigned int saved_align = align;
6979
6980 if (!TARGET_64BIT)
6981 {
6982 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
6983 if (!type)
6984 {
6985 if (mode == XFmode || mode == XCmode)
6986 align = PARM_BOUNDARY;
6987 }
6988 else if (!ix86_contains_aligned_value_p (type))
6989 align = PARM_BOUNDARY;
6990
6991 if (align < 128)
6992 align = PARM_BOUNDARY;
6993 }
6994
6995 if (warn_psabi
6996 && !warned
6997 && align != ix86_compat_function_arg_boundary (mode, type,
6998 saved_align))
6999 {
7000 warned = true;
7001 inform (input_location,
7002 "The ABI for passing parameters with %d-byte"
7003 " alignment has changed in GCC 4.6",
7004 align / BITS_PER_UNIT);
7005 }
7006 }
7007
7008 return align;
7009 }
7010
7011 /* Return true if N is a possible register number of function value. */
7012
7013 static bool
7014 ix86_function_value_regno_p (const unsigned int regno)
7015 {
7016 switch (regno)
7017 {
7018 case AX_REG:
7019 return true;
7020
7021 case FIRST_FLOAT_REG:
7022 /* TODO: The function should depend on current function ABI but
7023 builtins.c would need updating then. Therefore we use the
7024 default ABI. */
7025 if (TARGET_64BIT && ix86_abi == MS_ABI)
7026 return false;
7027 return TARGET_FLOAT_RETURNS_IN_80387;
7028
7029 case FIRST_SSE_REG:
7030 return TARGET_SSE;
7031
7032 case FIRST_MMX_REG:
7033 if (TARGET_MACHO || TARGET_64BIT)
7034 return false;
7035 return TARGET_MMX;
7036 }
7037
7038 return false;
7039 }
7040
7041 /* Define how to find the value returned by a function.
7042 VALTYPE is the data type of the value (as a tree).
7043 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7044 otherwise, FUNC is 0. */
7045
7046 static rtx
7047 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
7048 const_tree fntype, const_tree fn)
7049 {
7050 unsigned int regno;
7051
7052 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7053 we normally prevent this case when mmx is not available. However
7054 some ABIs may require the result to be returned like DImode. */
7055 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7056 regno = FIRST_MMX_REG;
7057
7058 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7059 we prevent this case when sse is not available. However some ABIs
7060 may require the result to be returned like integer TImode. */
7061 else if (mode == TImode
7062 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7063 regno = FIRST_SSE_REG;
7064
7065 /* 32-byte vector modes in %ymm0. */
7066 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
7067 regno = FIRST_SSE_REG;
7068
7069 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7070 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
7071 regno = FIRST_FLOAT_REG;
7072 else
7073 /* Most things go in %eax. */
7074 regno = AX_REG;
7075
7076 /* Override FP return register with %xmm0 for local functions when
7077 SSE math is enabled or for functions with sseregparm attribute. */
7078 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
7079 {
7080 int sse_level = ix86_function_sseregparm (fntype, fn, false);
7081 if ((sse_level >= 1 && mode == SFmode)
7082 || (sse_level == 2 && mode == DFmode))
7083 regno = FIRST_SSE_REG;
7084 }
7085
7086 /* OImode shouldn't be used directly. */
7087 gcc_assert (mode != OImode);
7088
7089 return gen_rtx_REG (orig_mode, regno);
7090 }
7091
7092 static rtx
7093 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
7094 const_tree valtype)
7095 {
7096 rtx ret;
7097
7098 /* Handle libcalls, which don't provide a type node. */
7099 if (valtype == NULL)
7100 {
7101 unsigned int regno;
7102
7103 switch (mode)
7104 {
7105 case SFmode:
7106 case SCmode:
7107 case DFmode:
7108 case DCmode:
7109 case TFmode:
7110 case SDmode:
7111 case DDmode:
7112 case TDmode:
7113 regno = FIRST_SSE_REG;
7114 break;
7115 case XFmode:
7116 case XCmode:
7117 regno = FIRST_FLOAT_REG;
7118 break;
7119 case TCmode:
7120 return NULL;
7121 default:
7122 regno = AX_REG;
7123 }
7124
7125 return gen_rtx_REG (mode, regno);
7126 }
7127 else if (POINTER_TYPE_P (valtype))
7128 {
7129 /* Pointers are always returned in Pmode. */
7130 mode = Pmode;
7131 }
7132
7133 ret = construct_container (mode, orig_mode, valtype, 1,
7134 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
7135 x86_64_int_return_registers, 0);
7136
7137 /* For zero sized structures, construct_container returns NULL, but we
7138 need to keep rest of compiler happy by returning meaningful value. */
7139 if (!ret)
7140 ret = gen_rtx_REG (orig_mode, AX_REG);
7141
7142 return ret;
7143 }
7144
7145 static rtx
7146 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
7147 {
7148 unsigned int regno = AX_REG;
7149
7150 if (TARGET_SSE)
7151 {
7152 switch (GET_MODE_SIZE (mode))
7153 {
7154 case 16:
7155 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7156 && !COMPLEX_MODE_P (mode))
7157 regno = FIRST_SSE_REG;
7158 break;
7159 case 8:
7160 case 4:
7161 if (mode == SFmode || mode == DFmode)
7162 regno = FIRST_SSE_REG;
7163 break;
7164 default:
7165 break;
7166 }
7167 }
7168 return gen_rtx_REG (orig_mode, regno);
7169 }
7170
7171 static rtx
7172 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
7173 enum machine_mode orig_mode, enum machine_mode mode)
7174 {
7175 const_tree fn, fntype;
7176
7177 fn = NULL_TREE;
7178 if (fntype_or_decl && DECL_P (fntype_or_decl))
7179 fn = fntype_or_decl;
7180 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
7181
7182 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
7183 return function_value_ms_64 (orig_mode, mode);
7184 else if (TARGET_64BIT)
7185 return function_value_64 (orig_mode, mode, valtype);
7186 else
7187 return function_value_32 (orig_mode, mode, fntype, fn);
7188 }
7189
7190 static rtx
7191 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
7192 bool outgoing ATTRIBUTE_UNUSED)
7193 {
7194 enum machine_mode mode, orig_mode;
7195
7196 orig_mode = TYPE_MODE (valtype);
7197 mode = type_natural_mode (valtype, NULL);
7198 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
7199 }
7200
7201 /* Pointer function arguments and return values are promoted to Pmode. */
7202
7203 static enum machine_mode
7204 ix86_promote_function_mode (const_tree type, enum machine_mode mode,
7205 int *punsignedp, const_tree fntype,
7206 int for_return)
7207 {
7208 if (type != NULL_TREE && POINTER_TYPE_P (type))
7209 {
7210 *punsignedp = POINTERS_EXTEND_UNSIGNED;
7211 return Pmode;
7212 }
7213 return default_promote_function_mode (type, mode, punsignedp, fntype,
7214 for_return);
7215 }
7216
7217 rtx
7218 ix86_libcall_value (enum machine_mode mode)
7219 {
7220 return ix86_function_value_1 (NULL, NULL, mode, mode);
7221 }
7222
7223 /* Return true iff type is returned in memory. */
7224
7225 static bool ATTRIBUTE_UNUSED
7226 return_in_memory_32 (const_tree type, enum machine_mode mode)
7227 {
7228 HOST_WIDE_INT size;
7229
7230 if (mode == BLKmode)
7231 return true;
7232
7233 size = int_size_in_bytes (type);
7234
7235 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
7236 return false;
7237
7238 if (VECTOR_MODE_P (mode) || mode == TImode)
7239 {
7240 /* User-created vectors small enough to fit in EAX. */
7241 if (size < 8)
7242 return false;
7243
7244 /* MMX/3dNow values are returned in MM0,
7245 except when it doesn't exits or the ABI prescribes otherwise. */
7246 if (size == 8)
7247 return !TARGET_MMX || TARGET_VECT8_RETURNS;
7248
7249 /* SSE values are returned in XMM0, except when it doesn't exist. */
7250 if (size == 16)
7251 return !TARGET_SSE;
7252
7253 /* AVX values are returned in YMM0, except when it doesn't exist. */
7254 if (size == 32)
7255 return !TARGET_AVX;
7256 }
7257
7258 if (mode == XFmode)
7259 return false;
7260
7261 if (size > 12)
7262 return true;
7263
7264 /* OImode shouldn't be used directly. */
7265 gcc_assert (mode != OImode);
7266
7267 return false;
7268 }
7269
7270 static bool ATTRIBUTE_UNUSED
7271 return_in_memory_64 (const_tree type, enum machine_mode mode)
7272 {
7273 int needed_intregs, needed_sseregs;
7274 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
7275 }
7276
7277 static bool ATTRIBUTE_UNUSED
7278 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
7279 {
7280 HOST_WIDE_INT size = int_size_in_bytes (type);
7281
7282 /* __m128 is returned in xmm0. */
7283 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7284 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
7285 return false;
7286
7287 /* Otherwise, the size must be exactly in [1248]. */
7288 return size != 1 && size != 2 && size != 4 && size != 8;
7289 }
7290
7291 static bool
7292 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7293 {
7294 #ifdef SUBTARGET_RETURN_IN_MEMORY
7295 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
7296 #else
7297 const enum machine_mode mode = type_natural_mode (type, NULL);
7298
7299 if (TARGET_64BIT)
7300 {
7301 if (ix86_function_type_abi (fntype) == MS_ABI)
7302 return return_in_memory_ms_64 (type, mode);
7303 else
7304 return return_in_memory_64 (type, mode);
7305 }
7306 else
7307 return return_in_memory_32 (type, mode);
7308 #endif
7309 }
7310
7311 /* When returning SSE vector types, we have a choice of either
7312 (1) being abi incompatible with a -march switch, or
7313 (2) generating an error.
7314 Given no good solution, I think the safest thing is one warning.
7315 The user won't be able to use -Werror, but....
7316
7317 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
7318 called in response to actually generating a caller or callee that
7319 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
7320 via aggregate_value_p for general type probing from tree-ssa. */
7321
7322 static rtx
7323 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
7324 {
7325 static bool warnedsse, warnedmmx;
7326
7327 if (!TARGET_64BIT && type)
7328 {
7329 /* Look at the return type of the function, not the function type. */
7330 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
7331
7332 if (!TARGET_SSE && !warnedsse)
7333 {
7334 if (mode == TImode
7335 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7336 {
7337 warnedsse = true;
7338 warning (0, "SSE vector return without SSE enabled "
7339 "changes the ABI");
7340 }
7341 }
7342
7343 if (!TARGET_MMX && !warnedmmx)
7344 {
7345 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7346 {
7347 warnedmmx = true;
7348 warning (0, "MMX vector return without MMX enabled "
7349 "changes the ABI");
7350 }
7351 }
7352 }
7353
7354 return NULL;
7355 }
7356
7357 \f
7358 /* Create the va_list data type. */
7359
7360 /* Returns the calling convention specific va_list date type.
7361 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
7362
7363 static tree
7364 ix86_build_builtin_va_list_abi (enum calling_abi abi)
7365 {
7366 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
7367
7368 /* For i386 we use plain pointer to argument area. */
7369 if (!TARGET_64BIT || abi == MS_ABI)
7370 return build_pointer_type (char_type_node);
7371
7372 record = lang_hooks.types.make_type (RECORD_TYPE);
7373 type_decl = build_decl (BUILTINS_LOCATION,
7374 TYPE_DECL, get_identifier ("__va_list_tag"), record);
7375
7376 f_gpr = build_decl (BUILTINS_LOCATION,
7377 FIELD_DECL, get_identifier ("gp_offset"),
7378 unsigned_type_node);
7379 f_fpr = build_decl (BUILTINS_LOCATION,
7380 FIELD_DECL, get_identifier ("fp_offset"),
7381 unsigned_type_node);
7382 f_ovf = build_decl (BUILTINS_LOCATION,
7383 FIELD_DECL, get_identifier ("overflow_arg_area"),
7384 ptr_type_node);
7385 f_sav = build_decl (BUILTINS_LOCATION,
7386 FIELD_DECL, get_identifier ("reg_save_area"),
7387 ptr_type_node);
7388
7389 va_list_gpr_counter_field = f_gpr;
7390 va_list_fpr_counter_field = f_fpr;
7391
7392 DECL_FIELD_CONTEXT (f_gpr) = record;
7393 DECL_FIELD_CONTEXT (f_fpr) = record;
7394 DECL_FIELD_CONTEXT (f_ovf) = record;
7395 DECL_FIELD_CONTEXT (f_sav) = record;
7396
7397 TYPE_STUB_DECL (record) = type_decl;
7398 TYPE_NAME (record) = type_decl;
7399 TYPE_FIELDS (record) = f_gpr;
7400 DECL_CHAIN (f_gpr) = f_fpr;
7401 DECL_CHAIN (f_fpr) = f_ovf;
7402 DECL_CHAIN (f_ovf) = f_sav;
7403
7404 layout_type (record);
7405
7406 /* The correct type is an array type of one element. */
7407 return build_array_type (record, build_index_type (size_zero_node));
7408 }
7409
7410 /* Setup the builtin va_list data type and for 64-bit the additional
7411 calling convention specific va_list data types. */
7412
7413 static tree
7414 ix86_build_builtin_va_list (void)
7415 {
7416 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
7417
7418 /* Initialize abi specific va_list builtin types. */
7419 if (TARGET_64BIT)
7420 {
7421 tree t;
7422 if (ix86_abi == MS_ABI)
7423 {
7424 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
7425 if (TREE_CODE (t) != RECORD_TYPE)
7426 t = build_variant_type_copy (t);
7427 sysv_va_list_type_node = t;
7428 }
7429 else
7430 {
7431 t = ret;
7432 if (TREE_CODE (t) != RECORD_TYPE)
7433 t = build_variant_type_copy (t);
7434 sysv_va_list_type_node = t;
7435 }
7436 if (ix86_abi != MS_ABI)
7437 {
7438 t = ix86_build_builtin_va_list_abi (MS_ABI);
7439 if (TREE_CODE (t) != RECORD_TYPE)
7440 t = build_variant_type_copy (t);
7441 ms_va_list_type_node = t;
7442 }
7443 else
7444 {
7445 t = ret;
7446 if (TREE_CODE (t) != RECORD_TYPE)
7447 t = build_variant_type_copy (t);
7448 ms_va_list_type_node = t;
7449 }
7450 }
7451
7452 return ret;
7453 }
7454
7455 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
7456
7457 static void
7458 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
7459 {
7460 rtx save_area, mem;
7461 alias_set_type set;
7462 int i, max;
7463
7464 /* GPR size of varargs save area. */
7465 if (cfun->va_list_gpr_size)
7466 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
7467 else
7468 ix86_varargs_gpr_size = 0;
7469
7470 /* FPR size of varargs save area. We don't need it if we don't pass
7471 anything in SSE registers. */
7472 if (TARGET_SSE && cfun->va_list_fpr_size)
7473 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
7474 else
7475 ix86_varargs_fpr_size = 0;
7476
7477 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
7478 return;
7479
7480 save_area = frame_pointer_rtx;
7481 set = get_varargs_alias_set ();
7482
7483 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
7484 if (max > X86_64_REGPARM_MAX)
7485 max = X86_64_REGPARM_MAX;
7486
7487 for (i = cum->regno; i < max; i++)
7488 {
7489 mem = gen_rtx_MEM (Pmode,
7490 plus_constant (save_area, i * UNITS_PER_WORD));
7491 MEM_NOTRAP_P (mem) = 1;
7492 set_mem_alias_set (mem, set);
7493 emit_move_insn (mem, gen_rtx_REG (Pmode,
7494 x86_64_int_parameter_registers[i]));
7495 }
7496
7497 if (ix86_varargs_fpr_size)
7498 {
7499 enum machine_mode smode;
7500 rtx label, test;
7501
7502 /* Now emit code to save SSE registers. The AX parameter contains number
7503 of SSE parameter registers used to call this function, though all we
7504 actually check here is the zero/non-zero status. */
7505
7506 label = gen_label_rtx ();
7507 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
7508 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
7509 label));
7510
7511 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
7512 we used movdqa (i.e. TImode) instead? Perhaps even better would
7513 be if we could determine the real mode of the data, via a hook
7514 into pass_stdarg. Ignore all that for now. */
7515 smode = V4SFmode;
7516 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
7517 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
7518
7519 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
7520 if (max > X86_64_SSE_REGPARM_MAX)
7521 max = X86_64_SSE_REGPARM_MAX;
7522
7523 for (i = cum->sse_regno; i < max; ++i)
7524 {
7525 mem = plus_constant (save_area, i * 16 + ix86_varargs_gpr_size);
7526 mem = gen_rtx_MEM (smode, mem);
7527 MEM_NOTRAP_P (mem) = 1;
7528 set_mem_alias_set (mem, set);
7529 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
7530
7531 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
7532 }
7533
7534 emit_label (label);
7535 }
7536 }
7537
7538 static void
7539 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
7540 {
7541 alias_set_type set = get_varargs_alias_set ();
7542 int i;
7543
7544 /* Reset to zero, as there might be a sysv vaarg used
7545 before. */
7546 ix86_varargs_gpr_size = 0;
7547 ix86_varargs_fpr_size = 0;
7548
7549 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
7550 {
7551 rtx reg, mem;
7552
7553 mem = gen_rtx_MEM (Pmode,
7554 plus_constant (virtual_incoming_args_rtx,
7555 i * UNITS_PER_WORD));
7556 MEM_NOTRAP_P (mem) = 1;
7557 set_mem_alias_set (mem, set);
7558
7559 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
7560 emit_move_insn (mem, reg);
7561 }
7562 }
7563
7564 static void
7565 ix86_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
7566 tree type, int *pretend_size ATTRIBUTE_UNUSED,
7567 int no_rtl)
7568 {
7569 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7570 CUMULATIVE_ARGS next_cum;
7571 tree fntype;
7572
7573 /* This argument doesn't appear to be used anymore. Which is good,
7574 because the old code here didn't suppress rtl generation. */
7575 gcc_assert (!no_rtl);
7576
7577 if (!TARGET_64BIT)
7578 return;
7579
7580 fntype = TREE_TYPE (current_function_decl);
7581
7582 /* For varargs, we do not want to skip the dummy va_dcl argument.
7583 For stdargs, we do want to skip the last named argument. */
7584 next_cum = *cum;
7585 if (stdarg_p (fntype))
7586 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
7587 true);
7588
7589 if (cum->call_abi == MS_ABI)
7590 setup_incoming_varargs_ms_64 (&next_cum);
7591 else
7592 setup_incoming_varargs_64 (&next_cum);
7593 }
7594
7595 /* Checks if TYPE is of kind va_list char *. */
7596
7597 static bool
7598 is_va_list_char_pointer (tree type)
7599 {
7600 tree canonic;
7601
7602 /* For 32-bit it is always true. */
7603 if (!TARGET_64BIT)
7604 return true;
7605 canonic = ix86_canonical_va_list_type (type);
7606 return (canonic == ms_va_list_type_node
7607 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
7608 }
7609
7610 /* Implement va_start. */
7611
7612 static void
7613 ix86_va_start (tree valist, rtx nextarg)
7614 {
7615 HOST_WIDE_INT words, n_gpr, n_fpr;
7616 tree f_gpr, f_fpr, f_ovf, f_sav;
7617 tree gpr, fpr, ovf, sav, t;
7618 tree type;
7619 rtx ovf_rtx;
7620
7621 if (flag_split_stack
7622 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7623 {
7624 unsigned int scratch_regno;
7625
7626 /* When we are splitting the stack, we can't refer to the stack
7627 arguments using internal_arg_pointer, because they may be on
7628 the old stack. The split stack prologue will arrange to
7629 leave a pointer to the old stack arguments in a scratch
7630 register, which we here copy to a pseudo-register. The split
7631 stack prologue can't set the pseudo-register directly because
7632 it (the prologue) runs before any registers have been saved. */
7633
7634 scratch_regno = split_stack_prologue_scratch_regno ();
7635 if (scratch_regno != INVALID_REGNUM)
7636 {
7637 rtx reg, seq;
7638
7639 reg = gen_reg_rtx (Pmode);
7640 cfun->machine->split_stack_varargs_pointer = reg;
7641
7642 start_sequence ();
7643 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
7644 seq = get_insns ();
7645 end_sequence ();
7646
7647 push_topmost_sequence ();
7648 emit_insn_after (seq, entry_of_function ());
7649 pop_topmost_sequence ();
7650 }
7651 }
7652
7653 /* Only 64bit target needs something special. */
7654 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7655 {
7656 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7657 std_expand_builtin_va_start (valist, nextarg);
7658 else
7659 {
7660 rtx va_r, next;
7661
7662 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
7663 next = expand_binop (ptr_mode, add_optab,
7664 cfun->machine->split_stack_varargs_pointer,
7665 crtl->args.arg_offset_rtx,
7666 NULL_RTX, 0, OPTAB_LIB_WIDEN);
7667 convert_move (va_r, next, 0);
7668 }
7669 return;
7670 }
7671
7672 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7673 f_fpr = DECL_CHAIN (f_gpr);
7674 f_ovf = DECL_CHAIN (f_fpr);
7675 f_sav = DECL_CHAIN (f_ovf);
7676
7677 valist = build_simple_mem_ref (valist);
7678 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
7679 /* The following should be folded into the MEM_REF offset. */
7680 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
7681 f_gpr, NULL_TREE);
7682 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
7683 f_fpr, NULL_TREE);
7684 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
7685 f_ovf, NULL_TREE);
7686 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
7687 f_sav, NULL_TREE);
7688
7689 /* Count number of gp and fp argument registers used. */
7690 words = crtl->args.info.words;
7691 n_gpr = crtl->args.info.regno;
7692 n_fpr = crtl->args.info.sse_regno;
7693
7694 if (cfun->va_list_gpr_size)
7695 {
7696 type = TREE_TYPE (gpr);
7697 t = build2 (MODIFY_EXPR, type,
7698 gpr, build_int_cst (type, n_gpr * 8));
7699 TREE_SIDE_EFFECTS (t) = 1;
7700 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7701 }
7702
7703 if (TARGET_SSE && cfun->va_list_fpr_size)
7704 {
7705 type = TREE_TYPE (fpr);
7706 t = build2 (MODIFY_EXPR, type, fpr,
7707 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
7708 TREE_SIDE_EFFECTS (t) = 1;
7709 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7710 }
7711
7712 /* Find the overflow area. */
7713 type = TREE_TYPE (ovf);
7714 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7715 ovf_rtx = crtl->args.internal_arg_pointer;
7716 else
7717 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
7718 t = make_tree (type, ovf_rtx);
7719 if (words != 0)
7720 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
7721 t = build2 (MODIFY_EXPR, type, ovf, t);
7722 TREE_SIDE_EFFECTS (t) = 1;
7723 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7724
7725 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
7726 {
7727 /* Find the register save area.
7728 Prologue of the function save it right above stack frame. */
7729 type = TREE_TYPE (sav);
7730 t = make_tree (type, frame_pointer_rtx);
7731 if (!ix86_varargs_gpr_size)
7732 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
7733 t = build2 (MODIFY_EXPR, type, sav, t);
7734 TREE_SIDE_EFFECTS (t) = 1;
7735 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7736 }
7737 }
7738
7739 /* Implement va_arg. */
7740
7741 static tree
7742 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7743 gimple_seq *post_p)
7744 {
7745 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
7746 tree f_gpr, f_fpr, f_ovf, f_sav;
7747 tree gpr, fpr, ovf, sav, t;
7748 int size, rsize;
7749 tree lab_false, lab_over = NULL_TREE;
7750 tree addr, t2;
7751 rtx container;
7752 int indirect_p = 0;
7753 tree ptrtype;
7754 enum machine_mode nat_mode;
7755 unsigned int arg_boundary;
7756
7757 /* Only 64bit target needs something special. */
7758 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7759 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
7760
7761 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7762 f_fpr = DECL_CHAIN (f_gpr);
7763 f_ovf = DECL_CHAIN (f_fpr);
7764 f_sav = DECL_CHAIN (f_ovf);
7765
7766 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
7767 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
7768 valist = build_va_arg_indirect_ref (valist);
7769 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
7770 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
7771 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
7772
7773 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
7774 if (indirect_p)
7775 type = build_pointer_type (type);
7776 size = int_size_in_bytes (type);
7777 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7778
7779 nat_mode = type_natural_mode (type, NULL);
7780 switch (nat_mode)
7781 {
7782 case V8SFmode:
7783 case V8SImode:
7784 case V32QImode:
7785 case V16HImode:
7786 case V4DFmode:
7787 case V4DImode:
7788 /* Unnamed 256bit vector mode parameters are passed on stack. */
7789 if (!TARGET_64BIT_MS_ABI)
7790 {
7791 container = NULL;
7792 break;
7793 }
7794
7795 default:
7796 container = construct_container (nat_mode, TYPE_MODE (type),
7797 type, 0, X86_64_REGPARM_MAX,
7798 X86_64_SSE_REGPARM_MAX, intreg,
7799 0);
7800 break;
7801 }
7802
7803 /* Pull the value out of the saved registers. */
7804
7805 addr = create_tmp_var (ptr_type_node, "addr");
7806
7807 if (container)
7808 {
7809 int needed_intregs, needed_sseregs;
7810 bool need_temp;
7811 tree int_addr, sse_addr;
7812
7813 lab_false = create_artificial_label (UNKNOWN_LOCATION);
7814 lab_over = create_artificial_label (UNKNOWN_LOCATION);
7815
7816 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
7817
7818 need_temp = (!REG_P (container)
7819 && ((needed_intregs && TYPE_ALIGN (type) > 64)
7820 || TYPE_ALIGN (type) > 128));
7821
7822 /* In case we are passing structure, verify that it is consecutive block
7823 on the register save area. If not we need to do moves. */
7824 if (!need_temp && !REG_P (container))
7825 {
7826 /* Verify that all registers are strictly consecutive */
7827 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
7828 {
7829 int i;
7830
7831 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7832 {
7833 rtx slot = XVECEXP (container, 0, i);
7834 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
7835 || INTVAL (XEXP (slot, 1)) != i * 16)
7836 need_temp = 1;
7837 }
7838 }
7839 else
7840 {
7841 int i;
7842
7843 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7844 {
7845 rtx slot = XVECEXP (container, 0, i);
7846 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
7847 || INTVAL (XEXP (slot, 1)) != i * 8)
7848 need_temp = 1;
7849 }
7850 }
7851 }
7852 if (!need_temp)
7853 {
7854 int_addr = addr;
7855 sse_addr = addr;
7856 }
7857 else
7858 {
7859 int_addr = create_tmp_var (ptr_type_node, "int_addr");
7860 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
7861 }
7862
7863 /* First ensure that we fit completely in registers. */
7864 if (needed_intregs)
7865 {
7866 t = build_int_cst (TREE_TYPE (gpr),
7867 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
7868 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
7869 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7870 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7871 gimplify_and_add (t, pre_p);
7872 }
7873 if (needed_sseregs)
7874 {
7875 t = build_int_cst (TREE_TYPE (fpr),
7876 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
7877 + X86_64_REGPARM_MAX * 8);
7878 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
7879 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7880 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7881 gimplify_and_add (t, pre_p);
7882 }
7883
7884 /* Compute index to start of area used for integer regs. */
7885 if (needed_intregs)
7886 {
7887 /* int_addr = gpr + sav; */
7888 t = fold_build_pointer_plus (sav, gpr);
7889 gimplify_assign (int_addr, t, pre_p);
7890 }
7891 if (needed_sseregs)
7892 {
7893 /* sse_addr = fpr + sav; */
7894 t = fold_build_pointer_plus (sav, fpr);
7895 gimplify_assign (sse_addr, t, pre_p);
7896 }
7897 if (need_temp)
7898 {
7899 int i, prev_size = 0;
7900 tree temp = create_tmp_var (type, "va_arg_tmp");
7901
7902 /* addr = &temp; */
7903 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
7904 gimplify_assign (addr, t, pre_p);
7905
7906 for (i = 0; i < XVECLEN (container, 0); i++)
7907 {
7908 rtx slot = XVECEXP (container, 0, i);
7909 rtx reg = XEXP (slot, 0);
7910 enum machine_mode mode = GET_MODE (reg);
7911 tree piece_type;
7912 tree addr_type;
7913 tree daddr_type;
7914 tree src_addr, src;
7915 int src_offset;
7916 tree dest_addr, dest;
7917 int cur_size = GET_MODE_SIZE (mode);
7918
7919 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
7920 prev_size = INTVAL (XEXP (slot, 1));
7921 if (prev_size + cur_size > size)
7922 {
7923 cur_size = size - prev_size;
7924 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
7925 if (mode == BLKmode)
7926 mode = QImode;
7927 }
7928 piece_type = lang_hooks.types.type_for_mode (mode, 1);
7929 if (mode == GET_MODE (reg))
7930 addr_type = build_pointer_type (piece_type);
7931 else
7932 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
7933 true);
7934 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
7935 true);
7936
7937 if (SSE_REGNO_P (REGNO (reg)))
7938 {
7939 src_addr = sse_addr;
7940 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
7941 }
7942 else
7943 {
7944 src_addr = int_addr;
7945 src_offset = REGNO (reg) * 8;
7946 }
7947 src_addr = fold_convert (addr_type, src_addr);
7948 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
7949
7950 dest_addr = fold_convert (daddr_type, addr);
7951 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
7952 if (cur_size == GET_MODE_SIZE (mode))
7953 {
7954 src = build_va_arg_indirect_ref (src_addr);
7955 dest = build_va_arg_indirect_ref (dest_addr);
7956
7957 gimplify_assign (dest, src, pre_p);
7958 }
7959 else
7960 {
7961 tree copy
7962 = build_call_expr (implicit_built_in_decls[BUILT_IN_MEMCPY],
7963 3, dest_addr, src_addr,
7964 size_int (cur_size));
7965 gimplify_and_add (copy, pre_p);
7966 }
7967 prev_size += cur_size;
7968 }
7969 }
7970
7971 if (needed_intregs)
7972 {
7973 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
7974 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
7975 gimplify_assign (gpr, t, pre_p);
7976 }
7977
7978 if (needed_sseregs)
7979 {
7980 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
7981 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
7982 gimplify_assign (fpr, t, pre_p);
7983 }
7984
7985 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
7986
7987 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
7988 }
7989
7990 /* ... otherwise out of the overflow area. */
7991
7992 /* When we align parameter on stack for caller, if the parameter
7993 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
7994 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
7995 here with caller. */
7996 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
7997 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
7998 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
7999
8000 /* Care for on-stack alignment if needed. */
8001 if (arg_boundary <= 64 || size == 0)
8002 t = ovf;
8003 else
8004 {
8005 HOST_WIDE_INT align = arg_boundary / 8;
8006 t = fold_build_pointer_plus_hwi (ovf, align - 1);
8007 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
8008 build_int_cst (TREE_TYPE (t), -align));
8009 }
8010
8011 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
8012 gimplify_assign (addr, t, pre_p);
8013
8014 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
8015 gimplify_assign (unshare_expr (ovf), t, pre_p);
8016
8017 if (container)
8018 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
8019
8020 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
8021 addr = fold_convert (ptrtype, addr);
8022
8023 if (indirect_p)
8024 addr = build_va_arg_indirect_ref (addr);
8025 return build_va_arg_indirect_ref (addr);
8026 }
8027 \f
8028 /* Return true if OPNUM's MEM should be matched
8029 in movabs* patterns. */
8030
8031 bool
8032 ix86_check_movabs (rtx insn, int opnum)
8033 {
8034 rtx set, mem;
8035
8036 set = PATTERN (insn);
8037 if (GET_CODE (set) == PARALLEL)
8038 set = XVECEXP (set, 0, 0);
8039 gcc_assert (GET_CODE (set) == SET);
8040 mem = XEXP (set, opnum);
8041 while (GET_CODE (mem) == SUBREG)
8042 mem = SUBREG_REG (mem);
8043 gcc_assert (MEM_P (mem));
8044 return volatile_ok || !MEM_VOLATILE_P (mem);
8045 }
8046 \f
8047 /* Initialize the table of extra 80387 mathematical constants. */
8048
8049 static void
8050 init_ext_80387_constants (void)
8051 {
8052 static const char * cst[5] =
8053 {
8054 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8055 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8056 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8057 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8058 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8059 };
8060 int i;
8061
8062 for (i = 0; i < 5; i++)
8063 {
8064 real_from_string (&ext_80387_constants_table[i], cst[i]);
8065 /* Ensure each constant is rounded to XFmode precision. */
8066 real_convert (&ext_80387_constants_table[i],
8067 XFmode, &ext_80387_constants_table[i]);
8068 }
8069
8070 ext_80387_constants_init = 1;
8071 }
8072
8073 /* Return non-zero if the constant is something that
8074 can be loaded with a special instruction. */
8075
8076 int
8077 standard_80387_constant_p (rtx x)
8078 {
8079 enum machine_mode mode = GET_MODE (x);
8080
8081 REAL_VALUE_TYPE r;
8082
8083 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
8084 return -1;
8085
8086 if (x == CONST0_RTX (mode))
8087 return 1;
8088 if (x == CONST1_RTX (mode))
8089 return 2;
8090
8091 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8092
8093 /* For XFmode constants, try to find a special 80387 instruction when
8094 optimizing for size or on those CPUs that benefit from them. */
8095 if (mode == XFmode
8096 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
8097 {
8098 int i;
8099
8100 if (! ext_80387_constants_init)
8101 init_ext_80387_constants ();
8102
8103 for (i = 0; i < 5; i++)
8104 if (real_identical (&r, &ext_80387_constants_table[i]))
8105 return i + 3;
8106 }
8107
8108 /* Load of the constant -0.0 or -1.0 will be split as
8109 fldz;fchs or fld1;fchs sequence. */
8110 if (real_isnegzero (&r))
8111 return 8;
8112 if (real_identical (&r, &dconstm1))
8113 return 9;
8114
8115 return 0;
8116 }
8117
8118 /* Return the opcode of the special instruction to be used to load
8119 the constant X. */
8120
8121 const char *
8122 standard_80387_constant_opcode (rtx x)
8123 {
8124 switch (standard_80387_constant_p (x))
8125 {
8126 case 1:
8127 return "fldz";
8128 case 2:
8129 return "fld1";
8130 case 3:
8131 return "fldlg2";
8132 case 4:
8133 return "fldln2";
8134 case 5:
8135 return "fldl2e";
8136 case 6:
8137 return "fldl2t";
8138 case 7:
8139 return "fldpi";
8140 case 8:
8141 case 9:
8142 return "#";
8143 default:
8144 gcc_unreachable ();
8145 }
8146 }
8147
8148 /* Return the CONST_DOUBLE representing the 80387 constant that is
8149 loaded by the specified special instruction. The argument IDX
8150 matches the return value from standard_80387_constant_p. */
8151
8152 rtx
8153 standard_80387_constant_rtx (int idx)
8154 {
8155 int i;
8156
8157 if (! ext_80387_constants_init)
8158 init_ext_80387_constants ();
8159
8160 switch (idx)
8161 {
8162 case 3:
8163 case 4:
8164 case 5:
8165 case 6:
8166 case 7:
8167 i = idx - 3;
8168 break;
8169
8170 default:
8171 gcc_unreachable ();
8172 }
8173
8174 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
8175 XFmode);
8176 }
8177
8178 /* Return 1 if X is all 0s and 2 if x is all 1s
8179 in supported SSE/AVX vector mode. */
8180
8181 int
8182 standard_sse_constant_p (rtx x)
8183 {
8184 enum machine_mode mode = GET_MODE (x);
8185
8186 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
8187 return 1;
8188 if (vector_all_ones_operand (x, mode))
8189 switch (mode)
8190 {
8191 case V16QImode:
8192 case V8HImode:
8193 case V4SImode:
8194 case V2DImode:
8195 if (TARGET_SSE2)
8196 return 2;
8197 case V32QImode:
8198 case V16HImode:
8199 case V8SImode:
8200 case V4DImode:
8201 if (TARGET_AVX2)
8202 return 2;
8203 default:
8204 break;
8205 }
8206
8207 return 0;
8208 }
8209
8210 /* Return the opcode of the special instruction to be used to load
8211 the constant X. */
8212
8213 const char *
8214 standard_sse_constant_opcode (rtx insn, rtx x)
8215 {
8216 switch (standard_sse_constant_p (x))
8217 {
8218 case 1:
8219 switch (get_attr_mode (insn))
8220 {
8221 case MODE_TI:
8222 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8223 return "%vpxor\t%0, %d0";
8224 case MODE_V2DF:
8225 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8226 return "%vxorpd\t%0, %d0";
8227 case MODE_V4SF:
8228 return "%vxorps\t%0, %d0";
8229
8230 case MODE_OI:
8231 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8232 return "vpxor\t%x0, %x0, %x0";
8233 case MODE_V4DF:
8234 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8235 return "vxorpd\t%x0, %x0, %x0";
8236 case MODE_V8SF:
8237 return "vxorps\t%x0, %x0, %x0";
8238
8239 default:
8240 break;
8241 }
8242
8243 case 2:
8244 if (TARGET_AVX)
8245 return "vpcmpeqd\t%0, %0, %0";
8246 else
8247 return "pcmpeqd\t%0, %0";
8248
8249 default:
8250 break;
8251 }
8252 gcc_unreachable ();
8253 }
8254
8255 /* Returns true if OP contains a symbol reference */
8256
8257 bool
8258 symbolic_reference_mentioned_p (rtx op)
8259 {
8260 const char *fmt;
8261 int i;
8262
8263 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
8264 return true;
8265
8266 fmt = GET_RTX_FORMAT (GET_CODE (op));
8267 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
8268 {
8269 if (fmt[i] == 'E')
8270 {
8271 int j;
8272
8273 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
8274 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
8275 return true;
8276 }
8277
8278 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
8279 return true;
8280 }
8281
8282 return false;
8283 }
8284
8285 /* Return true if it is appropriate to emit `ret' instructions in the
8286 body of a function. Do this only if the epilogue is simple, needing a
8287 couple of insns. Prior to reloading, we can't tell how many registers
8288 must be saved, so return false then. Return false if there is no frame
8289 marker to de-allocate. */
8290
8291 bool
8292 ix86_can_use_return_insn_p (void)
8293 {
8294 struct ix86_frame frame;
8295
8296 if (! reload_completed || frame_pointer_needed)
8297 return 0;
8298
8299 /* Don't allow more than 32k pop, since that's all we can do
8300 with one instruction. */
8301 if (crtl->args.pops_args && crtl->args.size >= 32768)
8302 return 0;
8303
8304 ix86_compute_frame_layout (&frame);
8305 return (frame.stack_pointer_offset == UNITS_PER_WORD
8306 && (frame.nregs + frame.nsseregs) == 0);
8307 }
8308 \f
8309 /* Value should be nonzero if functions must have frame pointers.
8310 Zero means the frame pointer need not be set up (and parms may
8311 be accessed via the stack pointer) in functions that seem suitable. */
8312
8313 static bool
8314 ix86_frame_pointer_required (void)
8315 {
8316 /* If we accessed previous frames, then the generated code expects
8317 to be able to access the saved ebp value in our frame. */
8318 if (cfun->machine->accesses_prev_frame)
8319 return true;
8320
8321 /* Several x86 os'es need a frame pointer for other reasons,
8322 usually pertaining to setjmp. */
8323 if (SUBTARGET_FRAME_POINTER_REQUIRED)
8324 return true;
8325
8326 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
8327 turns off the frame pointer by default. Turn it back on now if
8328 we've not got a leaf function. */
8329 if (TARGET_OMIT_LEAF_FRAME_POINTER
8330 && (!current_function_is_leaf
8331 || ix86_current_function_calls_tls_descriptor))
8332 return true;
8333
8334 if (crtl->profile && !flag_fentry)
8335 return true;
8336
8337 return false;
8338 }
8339
8340 /* Record that the current function accesses previous call frames. */
8341
8342 void
8343 ix86_setup_frame_addresses (void)
8344 {
8345 cfun->machine->accesses_prev_frame = 1;
8346 }
8347 \f
8348 #ifndef USE_HIDDEN_LINKONCE
8349 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
8350 # define USE_HIDDEN_LINKONCE 1
8351 # else
8352 # define USE_HIDDEN_LINKONCE 0
8353 # endif
8354 #endif
8355
8356 static int pic_labels_used;
8357
8358 /* Fills in the label name that should be used for a pc thunk for
8359 the given register. */
8360
8361 static void
8362 get_pc_thunk_name (char name[32], unsigned int regno)
8363 {
8364 gcc_assert (!TARGET_64BIT);
8365
8366 if (USE_HIDDEN_LINKONCE)
8367 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
8368 else
8369 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
8370 }
8371
8372
8373 /* This function generates code for -fpic that loads %ebx with
8374 the return address of the caller and then returns. */
8375
8376 static void
8377 ix86_code_end (void)
8378 {
8379 rtx xops[2];
8380 int regno;
8381
8382 for (regno = AX_REG; regno <= SP_REG; regno++)
8383 {
8384 char name[32];
8385 tree decl;
8386
8387 if (!(pic_labels_used & (1 << regno)))
8388 continue;
8389
8390 get_pc_thunk_name (name, regno);
8391
8392 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
8393 get_identifier (name),
8394 build_function_type_list (void_type_node, NULL_TREE));
8395 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
8396 NULL_TREE, void_type_node);
8397 TREE_PUBLIC (decl) = 1;
8398 TREE_STATIC (decl) = 1;
8399
8400 #if TARGET_MACHO
8401 if (TARGET_MACHO)
8402 {
8403 switch_to_section (darwin_sections[text_coal_section]);
8404 fputs ("\t.weak_definition\t", asm_out_file);
8405 assemble_name (asm_out_file, name);
8406 fputs ("\n\t.private_extern\t", asm_out_file);
8407 assemble_name (asm_out_file, name);
8408 putc ('\n', asm_out_file);
8409 ASM_OUTPUT_LABEL (asm_out_file, name);
8410 DECL_WEAK (decl) = 1;
8411 }
8412 else
8413 #endif
8414 if (USE_HIDDEN_LINKONCE)
8415 {
8416 DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
8417
8418 targetm.asm_out.unique_section (decl, 0);
8419 switch_to_section (get_named_section (decl, NULL, 0));
8420
8421 targetm.asm_out.globalize_label (asm_out_file, name);
8422 fputs ("\t.hidden\t", asm_out_file);
8423 assemble_name (asm_out_file, name);
8424 putc ('\n', asm_out_file);
8425 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
8426 }
8427 else
8428 {
8429 switch_to_section (text_section);
8430 ASM_OUTPUT_LABEL (asm_out_file, name);
8431 }
8432
8433 DECL_INITIAL (decl) = make_node (BLOCK);
8434 current_function_decl = decl;
8435 init_function_start (decl);
8436 first_function_block_is_cold = false;
8437 /* Make sure unwind info is emitted for the thunk if needed. */
8438 final_start_function (emit_barrier (), asm_out_file, 1);
8439
8440 /* Pad stack IP move with 4 instructions (two NOPs count
8441 as one instruction). */
8442 if (TARGET_PAD_SHORT_FUNCTION)
8443 {
8444 int i = 8;
8445
8446 while (i--)
8447 fputs ("\tnop\n", asm_out_file);
8448 }
8449
8450 xops[0] = gen_rtx_REG (Pmode, regno);
8451 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
8452 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
8453 fputs ("\tret\n", asm_out_file);
8454 final_end_function ();
8455 init_insn_lengths ();
8456 free_after_compilation (cfun);
8457 set_cfun (NULL);
8458 current_function_decl = NULL;
8459 }
8460
8461 if (flag_split_stack)
8462 file_end_indicate_split_stack ();
8463 }
8464
8465 /* Emit code for the SET_GOT patterns. */
8466
8467 const char *
8468 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
8469 {
8470 rtx xops[3];
8471
8472 xops[0] = dest;
8473
8474 if (TARGET_VXWORKS_RTP && flag_pic)
8475 {
8476 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
8477 xops[2] = gen_rtx_MEM (Pmode,
8478 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
8479 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
8480
8481 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
8482 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
8483 an unadorned address. */
8484 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
8485 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
8486 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
8487 return "";
8488 }
8489
8490 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
8491
8492 if (!flag_pic)
8493 {
8494 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
8495
8496 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
8497
8498 #if TARGET_MACHO
8499 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8500 is what will be referenced by the Mach-O PIC subsystem. */
8501 if (!label)
8502 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8503 #endif
8504
8505 targetm.asm_out.internal_label (asm_out_file, "L",
8506 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
8507 }
8508 else
8509 {
8510 char name[32];
8511 get_pc_thunk_name (name, REGNO (dest));
8512 pic_labels_used |= 1 << REGNO (dest);
8513
8514 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
8515 xops[2] = gen_rtx_MEM (QImode, xops[2]);
8516 output_asm_insn ("call\t%X2", xops);
8517 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8518 is what will be referenced by the Mach-O PIC subsystem. */
8519 #if TARGET_MACHO
8520 if (!label)
8521 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8522 else
8523 targetm.asm_out.internal_label (asm_out_file, "L",
8524 CODE_LABEL_NUMBER (label));
8525 #endif
8526 }
8527
8528 if (!TARGET_MACHO)
8529 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
8530
8531 return "";
8532 }
8533
8534 /* Generate an "push" pattern for input ARG. */
8535
8536 static rtx
8537 gen_push (rtx arg)
8538 {
8539 struct machine_function *m = cfun->machine;
8540
8541 if (m->fs.cfa_reg == stack_pointer_rtx)
8542 m->fs.cfa_offset += UNITS_PER_WORD;
8543 m->fs.sp_offset += UNITS_PER_WORD;
8544
8545 return gen_rtx_SET (VOIDmode,
8546 gen_rtx_MEM (Pmode,
8547 gen_rtx_PRE_DEC (Pmode,
8548 stack_pointer_rtx)),
8549 arg);
8550 }
8551
8552 /* Generate an "pop" pattern for input ARG. */
8553
8554 static rtx
8555 gen_pop (rtx arg)
8556 {
8557 return gen_rtx_SET (VOIDmode,
8558 arg,
8559 gen_rtx_MEM (Pmode,
8560 gen_rtx_POST_INC (Pmode,
8561 stack_pointer_rtx)));
8562 }
8563
8564 /* Return >= 0 if there is an unused call-clobbered register available
8565 for the entire function. */
8566
8567 static unsigned int
8568 ix86_select_alt_pic_regnum (void)
8569 {
8570 if (current_function_is_leaf
8571 && !crtl->profile
8572 && !ix86_current_function_calls_tls_descriptor)
8573 {
8574 int i, drap;
8575 /* Can't use the same register for both PIC and DRAP. */
8576 if (crtl->drap_reg)
8577 drap = REGNO (crtl->drap_reg);
8578 else
8579 drap = -1;
8580 for (i = 2; i >= 0; --i)
8581 if (i != drap && !df_regs_ever_live_p (i))
8582 return i;
8583 }
8584
8585 return INVALID_REGNUM;
8586 }
8587
8588 /* Return TRUE if we need to save REGNO. */
8589
8590 static bool
8591 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
8592 {
8593 if (pic_offset_table_rtx
8594 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
8595 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
8596 || crtl->profile
8597 || crtl->calls_eh_return
8598 || crtl->uses_const_pool))
8599 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
8600
8601 if (crtl->calls_eh_return && maybe_eh_return)
8602 {
8603 unsigned i;
8604 for (i = 0; ; i++)
8605 {
8606 unsigned test = EH_RETURN_DATA_REGNO (i);
8607 if (test == INVALID_REGNUM)
8608 break;
8609 if (test == regno)
8610 return true;
8611 }
8612 }
8613
8614 if (crtl->drap_reg && regno == REGNO (crtl->drap_reg))
8615 return true;
8616
8617 return (df_regs_ever_live_p (regno)
8618 && !call_used_regs[regno]
8619 && !fixed_regs[regno]
8620 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
8621 }
8622
8623 /* Return number of saved general prupose registers. */
8624
8625 static int
8626 ix86_nsaved_regs (void)
8627 {
8628 int nregs = 0;
8629 int regno;
8630
8631 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8632 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8633 nregs ++;
8634 return nregs;
8635 }
8636
8637 /* Return number of saved SSE registrers. */
8638
8639 static int
8640 ix86_nsaved_sseregs (void)
8641 {
8642 int nregs = 0;
8643 int regno;
8644
8645 if (!TARGET_64BIT_MS_ABI)
8646 return 0;
8647 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8648 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8649 nregs ++;
8650 return nregs;
8651 }
8652
8653 /* Given FROM and TO register numbers, say whether this elimination is
8654 allowed. If stack alignment is needed, we can only replace argument
8655 pointer with hard frame pointer, or replace frame pointer with stack
8656 pointer. Otherwise, frame pointer elimination is automatically
8657 handled and all other eliminations are valid. */
8658
8659 static bool
8660 ix86_can_eliminate (const int from, const int to)
8661 {
8662 if (stack_realign_fp)
8663 return ((from == ARG_POINTER_REGNUM
8664 && to == HARD_FRAME_POINTER_REGNUM)
8665 || (from == FRAME_POINTER_REGNUM
8666 && to == STACK_POINTER_REGNUM));
8667 else
8668 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
8669 }
8670
8671 /* Return the offset between two registers, one to be eliminated, and the other
8672 its replacement, at the start of a routine. */
8673
8674 HOST_WIDE_INT
8675 ix86_initial_elimination_offset (int from, int to)
8676 {
8677 struct ix86_frame frame;
8678 ix86_compute_frame_layout (&frame);
8679
8680 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8681 return frame.hard_frame_pointer_offset;
8682 else if (from == FRAME_POINTER_REGNUM
8683 && to == HARD_FRAME_POINTER_REGNUM)
8684 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
8685 else
8686 {
8687 gcc_assert (to == STACK_POINTER_REGNUM);
8688
8689 if (from == ARG_POINTER_REGNUM)
8690 return frame.stack_pointer_offset;
8691
8692 gcc_assert (from == FRAME_POINTER_REGNUM);
8693 return frame.stack_pointer_offset - frame.frame_pointer_offset;
8694 }
8695 }
8696
8697 /* In a dynamically-aligned function, we can't know the offset from
8698 stack pointer to frame pointer, so we must ensure that setjmp
8699 eliminates fp against the hard fp (%ebp) rather than trying to
8700 index from %esp up to the top of the frame across a gap that is
8701 of unknown (at compile-time) size. */
8702 static rtx
8703 ix86_builtin_setjmp_frame_value (void)
8704 {
8705 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
8706 }
8707
8708 /* When using -fsplit-stack, the allocation routines set a field in
8709 the TCB to the bottom of the stack plus this much space, measured
8710 in bytes. */
8711
8712 #define SPLIT_STACK_AVAILABLE 256
8713
8714 /* Fill structure ix86_frame about frame of currently computed function. */
8715
8716 static void
8717 ix86_compute_frame_layout (struct ix86_frame *frame)
8718 {
8719 unsigned int stack_alignment_needed;
8720 HOST_WIDE_INT offset;
8721 unsigned int preferred_alignment;
8722 HOST_WIDE_INT size = get_frame_size ();
8723 HOST_WIDE_INT to_allocate;
8724
8725 frame->nregs = ix86_nsaved_regs ();
8726 frame->nsseregs = ix86_nsaved_sseregs ();
8727
8728 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
8729 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
8730
8731 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
8732 function prologues and leaf. */
8733 if ((TARGET_64BIT_MS_ABI && preferred_alignment < 16)
8734 && (!current_function_is_leaf || cfun->calls_alloca != 0
8735 || ix86_current_function_calls_tls_descriptor))
8736 {
8737 preferred_alignment = 16;
8738 stack_alignment_needed = 16;
8739 crtl->preferred_stack_boundary = 128;
8740 crtl->stack_alignment_needed = 128;
8741 }
8742
8743 gcc_assert (!size || stack_alignment_needed);
8744 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
8745 gcc_assert (preferred_alignment <= stack_alignment_needed);
8746
8747 /* For SEH we have to limit the amount of code movement into the prologue.
8748 At present we do this via a BLOCKAGE, at which point there's very little
8749 scheduling that can be done, which means that there's very little point
8750 in doing anything except PUSHs. */
8751 if (TARGET_SEH)
8752 cfun->machine->use_fast_prologue_epilogue = false;
8753
8754 /* During reload iteration the amount of registers saved can change.
8755 Recompute the value as needed. Do not recompute when amount of registers
8756 didn't change as reload does multiple calls to the function and does not
8757 expect the decision to change within single iteration. */
8758 else if (!optimize_function_for_size_p (cfun)
8759 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
8760 {
8761 int count = frame->nregs;
8762 struct cgraph_node *node = cgraph_get_node (current_function_decl);
8763
8764 cfun->machine->use_fast_prologue_epilogue_nregs = count;
8765
8766 /* The fast prologue uses move instead of push to save registers. This
8767 is significantly longer, but also executes faster as modern hardware
8768 can execute the moves in parallel, but can't do that for push/pop.
8769
8770 Be careful about choosing what prologue to emit: When function takes
8771 many instructions to execute we may use slow version as well as in
8772 case function is known to be outside hot spot (this is known with
8773 feedback only). Weight the size of function by number of registers
8774 to save as it is cheap to use one or two push instructions but very
8775 slow to use many of them. */
8776 if (count)
8777 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
8778 if (node->frequency < NODE_FREQUENCY_NORMAL
8779 || (flag_branch_probabilities
8780 && node->frequency < NODE_FREQUENCY_HOT))
8781 cfun->machine->use_fast_prologue_epilogue = false;
8782 else
8783 cfun->machine->use_fast_prologue_epilogue
8784 = !expensive_function_p (count);
8785 }
8786
8787 frame->save_regs_using_mov
8788 = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
8789 /* If static stack checking is enabled and done with probes,
8790 the registers need to be saved before allocating the frame. */
8791 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
8792
8793 /* Skip return address. */
8794 offset = UNITS_PER_WORD;
8795
8796 /* Skip pushed static chain. */
8797 if (ix86_static_chain_on_stack)
8798 offset += UNITS_PER_WORD;
8799
8800 /* Skip saved base pointer. */
8801 if (frame_pointer_needed)
8802 offset += UNITS_PER_WORD;
8803 frame->hfp_save_offset = offset;
8804
8805 /* The traditional frame pointer location is at the top of the frame. */
8806 frame->hard_frame_pointer_offset = offset;
8807
8808 /* Register save area */
8809 offset += frame->nregs * UNITS_PER_WORD;
8810 frame->reg_save_offset = offset;
8811
8812 /* Align and set SSE register save area. */
8813 if (frame->nsseregs)
8814 {
8815 /* The only ABI that has saved SSE registers (Win64) also has a
8816 16-byte aligned default stack, and thus we don't need to be
8817 within the re-aligned local stack frame to save them. */
8818 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
8819 offset = (offset + 16 - 1) & -16;
8820 offset += frame->nsseregs * 16;
8821 }
8822 frame->sse_reg_save_offset = offset;
8823
8824 /* The re-aligned stack starts here. Values before this point are not
8825 directly comparable with values below this point. In order to make
8826 sure that no value happens to be the same before and after, force
8827 the alignment computation below to add a non-zero value. */
8828 if (stack_realign_fp)
8829 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
8830
8831 /* Va-arg area */
8832 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
8833 offset += frame->va_arg_size;
8834
8835 /* Align start of frame for local function. */
8836 if (stack_realign_fp
8837 || offset != frame->sse_reg_save_offset
8838 || size != 0
8839 || !current_function_is_leaf
8840 || cfun->calls_alloca
8841 || ix86_current_function_calls_tls_descriptor)
8842 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
8843
8844 /* Frame pointer points here. */
8845 frame->frame_pointer_offset = offset;
8846
8847 offset += size;
8848
8849 /* Add outgoing arguments area. Can be skipped if we eliminated
8850 all the function calls as dead code.
8851 Skipping is however impossible when function calls alloca. Alloca
8852 expander assumes that last crtl->outgoing_args_size
8853 of stack frame are unused. */
8854 if (ACCUMULATE_OUTGOING_ARGS
8855 && (!current_function_is_leaf || cfun->calls_alloca
8856 || ix86_current_function_calls_tls_descriptor))
8857 {
8858 offset += crtl->outgoing_args_size;
8859 frame->outgoing_arguments_size = crtl->outgoing_args_size;
8860 }
8861 else
8862 frame->outgoing_arguments_size = 0;
8863
8864 /* Align stack boundary. Only needed if we're calling another function
8865 or using alloca. */
8866 if (!current_function_is_leaf || cfun->calls_alloca
8867 || ix86_current_function_calls_tls_descriptor)
8868 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
8869
8870 /* We've reached end of stack frame. */
8871 frame->stack_pointer_offset = offset;
8872
8873 /* Size prologue needs to allocate. */
8874 to_allocate = offset - frame->sse_reg_save_offset;
8875
8876 if ((!to_allocate && frame->nregs <= 1)
8877 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
8878 frame->save_regs_using_mov = false;
8879
8880 if (ix86_using_red_zone ()
8881 && current_function_sp_is_unchanging
8882 && current_function_is_leaf
8883 && !ix86_current_function_calls_tls_descriptor)
8884 {
8885 frame->red_zone_size = to_allocate;
8886 if (frame->save_regs_using_mov)
8887 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
8888 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
8889 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
8890 }
8891 else
8892 frame->red_zone_size = 0;
8893 frame->stack_pointer_offset -= frame->red_zone_size;
8894
8895 /* The SEH frame pointer location is near the bottom of the frame.
8896 This is enforced by the fact that the difference between the
8897 stack pointer and the frame pointer is limited to 240 bytes in
8898 the unwind data structure. */
8899 if (TARGET_SEH)
8900 {
8901 HOST_WIDE_INT diff;
8902
8903 /* If we can leave the frame pointer where it is, do so. */
8904 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
8905 if (diff > 240 || (diff & 15) != 0)
8906 {
8907 /* Ideally we'd determine what portion of the local stack frame
8908 (within the constraint of the lowest 240) is most heavily used.
8909 But without that complication, simply bias the frame pointer
8910 by 128 bytes so as to maximize the amount of the local stack
8911 frame that is addressable with 8-bit offsets. */
8912 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
8913 }
8914 }
8915 }
8916
8917 /* This is semi-inlined memory_address_length, but simplified
8918 since we know that we're always dealing with reg+offset, and
8919 to avoid having to create and discard all that rtl. */
8920
8921 static inline int
8922 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
8923 {
8924 int len = 4;
8925
8926 if (offset == 0)
8927 {
8928 /* EBP and R13 cannot be encoded without an offset. */
8929 len = (regno == BP_REG || regno == R13_REG);
8930 }
8931 else if (IN_RANGE (offset, -128, 127))
8932 len = 1;
8933
8934 /* ESP and R12 must be encoded with a SIB byte. */
8935 if (regno == SP_REG || regno == R12_REG)
8936 len++;
8937
8938 return len;
8939 }
8940
8941 /* Return an RTX that points to CFA_OFFSET within the stack frame.
8942 The valid base registers are taken from CFUN->MACHINE->FS. */
8943
8944 static rtx
8945 choose_baseaddr (HOST_WIDE_INT cfa_offset)
8946 {
8947 const struct machine_function *m = cfun->machine;
8948 rtx base_reg = NULL;
8949 HOST_WIDE_INT base_offset = 0;
8950
8951 if (m->use_fast_prologue_epilogue)
8952 {
8953 /* Choose the base register most likely to allow the most scheduling
8954 opportunities. Generally FP is valid througout the function,
8955 while DRAP must be reloaded within the epilogue. But choose either
8956 over the SP due to increased encoding size. */
8957
8958 if (m->fs.fp_valid)
8959 {
8960 base_reg = hard_frame_pointer_rtx;
8961 base_offset = m->fs.fp_offset - cfa_offset;
8962 }
8963 else if (m->fs.drap_valid)
8964 {
8965 base_reg = crtl->drap_reg;
8966 base_offset = 0 - cfa_offset;
8967 }
8968 else if (m->fs.sp_valid)
8969 {
8970 base_reg = stack_pointer_rtx;
8971 base_offset = m->fs.sp_offset - cfa_offset;
8972 }
8973 }
8974 else
8975 {
8976 HOST_WIDE_INT toffset;
8977 int len = 16, tlen;
8978
8979 /* Choose the base register with the smallest address encoding.
8980 With a tie, choose FP > DRAP > SP. */
8981 if (m->fs.sp_valid)
8982 {
8983 base_reg = stack_pointer_rtx;
8984 base_offset = m->fs.sp_offset - cfa_offset;
8985 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
8986 }
8987 if (m->fs.drap_valid)
8988 {
8989 toffset = 0 - cfa_offset;
8990 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
8991 if (tlen <= len)
8992 {
8993 base_reg = crtl->drap_reg;
8994 base_offset = toffset;
8995 len = tlen;
8996 }
8997 }
8998 if (m->fs.fp_valid)
8999 {
9000 toffset = m->fs.fp_offset - cfa_offset;
9001 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
9002 if (tlen <= len)
9003 {
9004 base_reg = hard_frame_pointer_rtx;
9005 base_offset = toffset;
9006 len = tlen;
9007 }
9008 }
9009 }
9010 gcc_assert (base_reg != NULL);
9011
9012 return plus_constant (base_reg, base_offset);
9013 }
9014
9015 /* Emit code to save registers in the prologue. */
9016
9017 static void
9018 ix86_emit_save_regs (void)
9019 {
9020 unsigned int regno;
9021 rtx insn;
9022
9023 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
9024 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9025 {
9026 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
9027 RTX_FRAME_RELATED_P (insn) = 1;
9028 }
9029 }
9030
9031 /* Emit a single register save at CFA - CFA_OFFSET. */
9032
9033 static void
9034 ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno,
9035 HOST_WIDE_INT cfa_offset)
9036 {
9037 struct machine_function *m = cfun->machine;
9038 rtx reg = gen_rtx_REG (mode, regno);
9039 rtx mem, addr, base, insn;
9040
9041 addr = choose_baseaddr (cfa_offset);
9042 mem = gen_frame_mem (mode, addr);
9043
9044 /* For SSE saves, we need to indicate the 128-bit alignment. */
9045 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
9046
9047 insn = emit_move_insn (mem, reg);
9048 RTX_FRAME_RELATED_P (insn) = 1;
9049
9050 base = addr;
9051 if (GET_CODE (base) == PLUS)
9052 base = XEXP (base, 0);
9053 gcc_checking_assert (REG_P (base));
9054
9055 /* When saving registers into a re-aligned local stack frame, avoid
9056 any tricky guessing by dwarf2out. */
9057 if (m->fs.realigned)
9058 {
9059 gcc_checking_assert (stack_realign_drap);
9060
9061 if (regno == REGNO (crtl->drap_reg))
9062 {
9063 /* A bit of a hack. We force the DRAP register to be saved in
9064 the re-aligned stack frame, which provides us with a copy
9065 of the CFA that will last past the prologue. Install it. */
9066 gcc_checking_assert (cfun->machine->fs.fp_valid);
9067 addr = plus_constant (hard_frame_pointer_rtx,
9068 cfun->machine->fs.fp_offset - cfa_offset);
9069 mem = gen_rtx_MEM (mode, addr);
9070 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
9071 }
9072 else
9073 {
9074 /* The frame pointer is a stable reference within the
9075 aligned frame. Use it. */
9076 gcc_checking_assert (cfun->machine->fs.fp_valid);
9077 addr = plus_constant (hard_frame_pointer_rtx,
9078 cfun->machine->fs.fp_offset - cfa_offset);
9079 mem = gen_rtx_MEM (mode, addr);
9080 add_reg_note (insn, REG_CFA_EXPRESSION,
9081 gen_rtx_SET (VOIDmode, mem, reg));
9082 }
9083 }
9084
9085 /* The memory may not be relative to the current CFA register,
9086 which means that we may need to generate a new pattern for
9087 use by the unwind info. */
9088 else if (base != m->fs.cfa_reg)
9089 {
9090 addr = plus_constant (m->fs.cfa_reg, m->fs.cfa_offset - cfa_offset);
9091 mem = gen_rtx_MEM (mode, addr);
9092 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
9093 }
9094 }
9095
9096 /* Emit code to save registers using MOV insns.
9097 First register is stored at CFA - CFA_OFFSET. */
9098 static void
9099 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
9100 {
9101 unsigned int regno;
9102
9103 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9104 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9105 {
9106 ix86_emit_save_reg_using_mov (Pmode, regno, cfa_offset);
9107 cfa_offset -= UNITS_PER_WORD;
9108 }
9109 }
9110
9111 /* Emit code to save SSE registers using MOV insns.
9112 First register is stored at CFA - CFA_OFFSET. */
9113 static void
9114 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
9115 {
9116 unsigned int regno;
9117
9118 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9119 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9120 {
9121 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
9122 cfa_offset -= 16;
9123 }
9124 }
9125
9126 static GTY(()) rtx queued_cfa_restores;
9127
9128 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9129 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9130 Don't add the note if the previously saved value will be left untouched
9131 within stack red-zone till return, as unwinders can find the same value
9132 in the register and on the stack. */
9133
9134 static void
9135 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
9136 {
9137 if (cfa_offset <= cfun->machine->fs.red_zone_offset)
9138 return;
9139
9140 if (insn)
9141 {
9142 add_reg_note (insn, REG_CFA_RESTORE, reg);
9143 RTX_FRAME_RELATED_P (insn) = 1;
9144 }
9145 else
9146 queued_cfa_restores
9147 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
9148 }
9149
9150 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9151
9152 static void
9153 ix86_add_queued_cfa_restore_notes (rtx insn)
9154 {
9155 rtx last;
9156 if (!queued_cfa_restores)
9157 return;
9158 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
9159 ;
9160 XEXP (last, 1) = REG_NOTES (insn);
9161 REG_NOTES (insn) = queued_cfa_restores;
9162 queued_cfa_restores = NULL_RTX;
9163 RTX_FRAME_RELATED_P (insn) = 1;
9164 }
9165
9166 /* Expand prologue or epilogue stack adjustment.
9167 The pattern exist to put a dependency on all ebp-based memory accesses.
9168 STYLE should be negative if instructions should be marked as frame related,
9169 zero if %r11 register is live and cannot be freely used and positive
9170 otherwise. */
9171
9172 static void
9173 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
9174 int style, bool set_cfa)
9175 {
9176 struct machine_function *m = cfun->machine;
9177 rtx insn;
9178 bool add_frame_related_expr = false;
9179
9180 if (! TARGET_64BIT)
9181 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
9182 else if (x86_64_immediate_operand (offset, DImode))
9183 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
9184 else
9185 {
9186 rtx tmp;
9187 /* r11 is used by indirect sibcall return as well, set before the
9188 epilogue and used after the epilogue. */
9189 if (style)
9190 tmp = gen_rtx_REG (DImode, R11_REG);
9191 else
9192 {
9193 gcc_assert (src != hard_frame_pointer_rtx
9194 && dest != hard_frame_pointer_rtx);
9195 tmp = hard_frame_pointer_rtx;
9196 }
9197 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
9198 if (style < 0)
9199 add_frame_related_expr = true;
9200
9201 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
9202 }
9203
9204 insn = emit_insn (insn);
9205 if (style >= 0)
9206 ix86_add_queued_cfa_restore_notes (insn);
9207
9208 if (set_cfa)
9209 {
9210 rtx r;
9211
9212 gcc_assert (m->fs.cfa_reg == src);
9213 m->fs.cfa_offset += INTVAL (offset);
9214 m->fs.cfa_reg = dest;
9215
9216 r = gen_rtx_PLUS (Pmode, src, offset);
9217 r = gen_rtx_SET (VOIDmode, dest, r);
9218 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
9219 RTX_FRAME_RELATED_P (insn) = 1;
9220 }
9221 else if (style < 0)
9222 {
9223 RTX_FRAME_RELATED_P (insn) = 1;
9224 if (add_frame_related_expr)
9225 {
9226 rtx r = gen_rtx_PLUS (Pmode, src, offset);
9227 r = gen_rtx_SET (VOIDmode, dest, r);
9228 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
9229 }
9230 }
9231
9232 if (dest == stack_pointer_rtx)
9233 {
9234 HOST_WIDE_INT ooffset = m->fs.sp_offset;
9235 bool valid = m->fs.sp_valid;
9236
9237 if (src == hard_frame_pointer_rtx)
9238 {
9239 valid = m->fs.fp_valid;
9240 ooffset = m->fs.fp_offset;
9241 }
9242 else if (src == crtl->drap_reg)
9243 {
9244 valid = m->fs.drap_valid;
9245 ooffset = 0;
9246 }
9247 else
9248 {
9249 /* Else there are two possibilities: SP itself, which we set
9250 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
9251 taken care of this by hand along the eh_return path. */
9252 gcc_checking_assert (src == stack_pointer_rtx
9253 || offset == const0_rtx);
9254 }
9255
9256 m->fs.sp_offset = ooffset - INTVAL (offset);
9257 m->fs.sp_valid = valid;
9258 }
9259 }
9260
9261 /* Find an available register to be used as dynamic realign argument
9262 pointer regsiter. Such a register will be written in prologue and
9263 used in begin of body, so it must not be
9264 1. parameter passing register.
9265 2. GOT pointer.
9266 We reuse static-chain register if it is available. Otherwise, we
9267 use DI for i386 and R13 for x86-64. We chose R13 since it has
9268 shorter encoding.
9269
9270 Return: the regno of chosen register. */
9271
9272 static unsigned int
9273 find_drap_reg (void)
9274 {
9275 tree decl = cfun->decl;
9276
9277 if (TARGET_64BIT)
9278 {
9279 /* Use R13 for nested function or function need static chain.
9280 Since function with tail call may use any caller-saved
9281 registers in epilogue, DRAP must not use caller-saved
9282 register in such case. */
9283 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
9284 return R13_REG;
9285
9286 return R10_REG;
9287 }
9288 else
9289 {
9290 /* Use DI for nested function or function need static chain.
9291 Since function with tail call may use any caller-saved
9292 registers in epilogue, DRAP must not use caller-saved
9293 register in such case. */
9294 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
9295 return DI_REG;
9296
9297 /* Reuse static chain register if it isn't used for parameter
9298 passing. */
9299 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
9300 {
9301 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
9302 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
9303 return CX_REG;
9304 }
9305 return DI_REG;
9306 }
9307 }
9308
9309 /* Return minimum incoming stack alignment. */
9310
9311 static unsigned int
9312 ix86_minimum_incoming_stack_boundary (bool sibcall)
9313 {
9314 unsigned int incoming_stack_boundary;
9315
9316 /* Prefer the one specified at command line. */
9317 if (ix86_user_incoming_stack_boundary)
9318 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
9319 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
9320 if -mstackrealign is used, it isn't used for sibcall check and
9321 estimated stack alignment is 128bit. */
9322 else if (!sibcall
9323 && !TARGET_64BIT
9324 && ix86_force_align_arg_pointer
9325 && crtl->stack_alignment_estimated == 128)
9326 incoming_stack_boundary = MIN_STACK_BOUNDARY;
9327 else
9328 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
9329
9330 /* Incoming stack alignment can be changed on individual functions
9331 via force_align_arg_pointer attribute. We use the smallest
9332 incoming stack boundary. */
9333 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
9334 && lookup_attribute (ix86_force_align_arg_pointer_string,
9335 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
9336 incoming_stack_boundary = MIN_STACK_BOUNDARY;
9337
9338 /* The incoming stack frame has to be aligned at least at
9339 parm_stack_boundary. */
9340 if (incoming_stack_boundary < crtl->parm_stack_boundary)
9341 incoming_stack_boundary = crtl->parm_stack_boundary;
9342
9343 /* Stack at entrance of main is aligned by runtime. We use the
9344 smallest incoming stack boundary. */
9345 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
9346 && DECL_NAME (current_function_decl)
9347 && MAIN_NAME_P (DECL_NAME (current_function_decl))
9348 && DECL_FILE_SCOPE_P (current_function_decl))
9349 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
9350
9351 return incoming_stack_boundary;
9352 }
9353
9354 /* Update incoming stack boundary and estimated stack alignment. */
9355
9356 static void
9357 ix86_update_stack_boundary (void)
9358 {
9359 ix86_incoming_stack_boundary
9360 = ix86_minimum_incoming_stack_boundary (false);
9361
9362 /* x86_64 vararg needs 16byte stack alignment for register save
9363 area. */
9364 if (TARGET_64BIT
9365 && cfun->stdarg
9366 && crtl->stack_alignment_estimated < 128)
9367 crtl->stack_alignment_estimated = 128;
9368 }
9369
9370 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
9371 needed or an rtx for DRAP otherwise. */
9372
9373 static rtx
9374 ix86_get_drap_rtx (void)
9375 {
9376 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
9377 crtl->need_drap = true;
9378
9379 if (stack_realign_drap)
9380 {
9381 /* Assign DRAP to vDRAP and returns vDRAP */
9382 unsigned int regno = find_drap_reg ();
9383 rtx drap_vreg;
9384 rtx arg_ptr;
9385 rtx seq, insn;
9386
9387 arg_ptr = gen_rtx_REG (Pmode, regno);
9388 crtl->drap_reg = arg_ptr;
9389
9390 start_sequence ();
9391 drap_vreg = copy_to_reg (arg_ptr);
9392 seq = get_insns ();
9393 end_sequence ();
9394
9395 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
9396 if (!optimize)
9397 {
9398 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
9399 RTX_FRAME_RELATED_P (insn) = 1;
9400 }
9401 return drap_vreg;
9402 }
9403 else
9404 return NULL;
9405 }
9406
9407 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
9408
9409 static rtx
9410 ix86_internal_arg_pointer (void)
9411 {
9412 return virtual_incoming_args_rtx;
9413 }
9414
9415 struct scratch_reg {
9416 rtx reg;
9417 bool saved;
9418 };
9419
9420 /* Return a short-lived scratch register for use on function entry.
9421 In 32-bit mode, it is valid only after the registers are saved
9422 in the prologue. This register must be released by means of
9423 release_scratch_register_on_entry once it is dead. */
9424
9425 static void
9426 get_scratch_register_on_entry (struct scratch_reg *sr)
9427 {
9428 int regno;
9429
9430 sr->saved = false;
9431
9432 if (TARGET_64BIT)
9433 {
9434 /* We always use R11 in 64-bit mode. */
9435 regno = R11_REG;
9436 }
9437 else
9438 {
9439 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
9440 bool fastcall_p
9441 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
9442 bool static_chain_p = DECL_STATIC_CHAIN (decl);
9443 int regparm = ix86_function_regparm (fntype, decl);
9444 int drap_regno
9445 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
9446
9447 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
9448 for the static chain register. */
9449 if ((regparm < 1 || (fastcall_p && !static_chain_p))
9450 && drap_regno != AX_REG)
9451 regno = AX_REG;
9452 else if (regparm < 2 && drap_regno != DX_REG)
9453 regno = DX_REG;
9454 /* ecx is the static chain register. */
9455 else if (regparm < 3 && !fastcall_p && !static_chain_p
9456 && drap_regno != CX_REG)
9457 regno = CX_REG;
9458 else if (ix86_save_reg (BX_REG, true))
9459 regno = BX_REG;
9460 /* esi is the static chain register. */
9461 else if (!(regparm == 3 && static_chain_p)
9462 && ix86_save_reg (SI_REG, true))
9463 regno = SI_REG;
9464 else if (ix86_save_reg (DI_REG, true))
9465 regno = DI_REG;
9466 else
9467 {
9468 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
9469 sr->saved = true;
9470 }
9471 }
9472
9473 sr->reg = gen_rtx_REG (Pmode, regno);
9474 if (sr->saved)
9475 {
9476 rtx insn = emit_insn (gen_push (sr->reg));
9477 RTX_FRAME_RELATED_P (insn) = 1;
9478 }
9479 }
9480
9481 /* Release a scratch register obtained from the preceding function. */
9482
9483 static void
9484 release_scratch_register_on_entry (struct scratch_reg *sr)
9485 {
9486 if (sr->saved)
9487 {
9488 rtx x, insn = emit_insn (gen_pop (sr->reg));
9489
9490 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
9491 RTX_FRAME_RELATED_P (insn) = 1;
9492 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
9493 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
9494 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
9495 }
9496 }
9497
9498 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
9499
9500 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
9501
9502 static void
9503 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
9504 {
9505 /* We skip the probe for the first interval + a small dope of 4 words and
9506 probe that many bytes past the specified size to maintain a protection
9507 area at the botton of the stack. */
9508 const int dope = 4 * UNITS_PER_WORD;
9509 rtx size_rtx = GEN_INT (size), last;
9510
9511 /* See if we have a constant small number of probes to generate. If so,
9512 that's the easy case. The run-time loop is made up of 11 insns in the
9513 generic case while the compile-time loop is made up of 3+2*(n-1) insns
9514 for n # of intervals. */
9515 if (size <= 5 * PROBE_INTERVAL)
9516 {
9517 HOST_WIDE_INT i, adjust;
9518 bool first_probe = true;
9519
9520 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
9521 values of N from 1 until it exceeds SIZE. If only one probe is
9522 needed, this will not generate any code. Then adjust and probe
9523 to PROBE_INTERVAL + SIZE. */
9524 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9525 {
9526 if (first_probe)
9527 {
9528 adjust = 2 * PROBE_INTERVAL + dope;
9529 first_probe = false;
9530 }
9531 else
9532 adjust = PROBE_INTERVAL;
9533
9534 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9535 plus_constant (stack_pointer_rtx, -adjust)));
9536 emit_stack_probe (stack_pointer_rtx);
9537 }
9538
9539 if (first_probe)
9540 adjust = size + PROBE_INTERVAL + dope;
9541 else
9542 adjust = size + PROBE_INTERVAL - i;
9543
9544 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9545 plus_constant (stack_pointer_rtx, -adjust)));
9546 emit_stack_probe (stack_pointer_rtx);
9547
9548 /* Adjust back to account for the additional first interval. */
9549 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9550 plus_constant (stack_pointer_rtx,
9551 PROBE_INTERVAL + dope)));
9552 }
9553
9554 /* Otherwise, do the same as above, but in a loop. Note that we must be
9555 extra careful with variables wrapping around because we might be at
9556 the very top (or the very bottom) of the address space and we have
9557 to be able to handle this case properly; in particular, we use an
9558 equality test for the loop condition. */
9559 else
9560 {
9561 HOST_WIDE_INT rounded_size;
9562 struct scratch_reg sr;
9563
9564 get_scratch_register_on_entry (&sr);
9565
9566
9567 /* Step 1: round SIZE to the previous multiple of the interval. */
9568
9569 rounded_size = size & -PROBE_INTERVAL;
9570
9571
9572 /* Step 2: compute initial and final value of the loop counter. */
9573
9574 /* SP = SP_0 + PROBE_INTERVAL. */
9575 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9576 plus_constant (stack_pointer_rtx,
9577 - (PROBE_INTERVAL + dope))));
9578
9579 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
9580 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
9581 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
9582 gen_rtx_PLUS (Pmode, sr.reg,
9583 stack_pointer_rtx)));
9584
9585
9586 /* Step 3: the loop
9587
9588 while (SP != LAST_ADDR)
9589 {
9590 SP = SP + PROBE_INTERVAL
9591 probe at SP
9592 }
9593
9594 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
9595 values of N from 1 until it is equal to ROUNDED_SIZE. */
9596
9597 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
9598
9599
9600 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
9601 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
9602
9603 if (size != rounded_size)
9604 {
9605 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9606 plus_constant (stack_pointer_rtx,
9607 rounded_size - size)));
9608 emit_stack_probe (stack_pointer_rtx);
9609 }
9610
9611 /* Adjust back to account for the additional first interval. */
9612 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9613 plus_constant (stack_pointer_rtx,
9614 PROBE_INTERVAL + dope)));
9615
9616 release_scratch_register_on_entry (&sr);
9617 }
9618
9619 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
9620
9621 /* Even if the stack pointer isn't the CFA register, we need to correctly
9622 describe the adjustments made to it, in particular differentiate the
9623 frame-related ones from the frame-unrelated ones. */
9624 if (size > 0)
9625 {
9626 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
9627 XVECEXP (expr, 0, 0)
9628 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9629 plus_constant (stack_pointer_rtx, -size));
9630 XVECEXP (expr, 0, 1)
9631 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9632 plus_constant (stack_pointer_rtx,
9633 PROBE_INTERVAL + dope + size));
9634 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
9635 RTX_FRAME_RELATED_P (last) = 1;
9636
9637 cfun->machine->fs.sp_offset += size;
9638 }
9639
9640 /* Make sure nothing is scheduled before we are done. */
9641 emit_insn (gen_blockage ());
9642 }
9643
9644 /* Adjust the stack pointer up to REG while probing it. */
9645
9646 const char *
9647 output_adjust_stack_and_probe (rtx reg)
9648 {
9649 static int labelno = 0;
9650 char loop_lab[32], end_lab[32];
9651 rtx xops[2];
9652
9653 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
9654 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
9655
9656 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
9657
9658 /* Jump to END_LAB if SP == LAST_ADDR. */
9659 xops[0] = stack_pointer_rtx;
9660 xops[1] = reg;
9661 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
9662 fputs ("\tje\t", asm_out_file);
9663 assemble_name_raw (asm_out_file, end_lab);
9664 fputc ('\n', asm_out_file);
9665
9666 /* SP = SP + PROBE_INTERVAL. */
9667 xops[1] = GEN_INT (PROBE_INTERVAL);
9668 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
9669
9670 /* Probe at SP. */
9671 xops[1] = const0_rtx;
9672 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
9673
9674 fprintf (asm_out_file, "\tjmp\t");
9675 assemble_name_raw (asm_out_file, loop_lab);
9676 fputc ('\n', asm_out_file);
9677
9678 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
9679
9680 return "";
9681 }
9682
9683 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
9684 inclusive. These are offsets from the current stack pointer. */
9685
9686 static void
9687 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
9688 {
9689 /* See if we have a constant small number of probes to generate. If so,
9690 that's the easy case. The run-time loop is made up of 7 insns in the
9691 generic case while the compile-time loop is made up of n insns for n #
9692 of intervals. */
9693 if (size <= 7 * PROBE_INTERVAL)
9694 {
9695 HOST_WIDE_INT i;
9696
9697 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
9698 it exceeds SIZE. If only one probe is needed, this will not
9699 generate any code. Then probe at FIRST + SIZE. */
9700 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9701 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + i)));
9702
9703 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + size)));
9704 }
9705
9706 /* Otherwise, do the same as above, but in a loop. Note that we must be
9707 extra careful with variables wrapping around because we might be at
9708 the very top (or the very bottom) of the address space and we have
9709 to be able to handle this case properly; in particular, we use an
9710 equality test for the loop condition. */
9711 else
9712 {
9713 HOST_WIDE_INT rounded_size, last;
9714 struct scratch_reg sr;
9715
9716 get_scratch_register_on_entry (&sr);
9717
9718
9719 /* Step 1: round SIZE to the previous multiple of the interval. */
9720
9721 rounded_size = size & -PROBE_INTERVAL;
9722
9723
9724 /* Step 2: compute initial and final value of the loop counter. */
9725
9726 /* TEST_OFFSET = FIRST. */
9727 emit_move_insn (sr.reg, GEN_INT (-first));
9728
9729 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
9730 last = first + rounded_size;
9731
9732
9733 /* Step 3: the loop
9734
9735 while (TEST_ADDR != LAST_ADDR)
9736 {
9737 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
9738 probe at TEST_ADDR
9739 }
9740
9741 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
9742 until it is equal to ROUNDED_SIZE. */
9743
9744 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
9745
9746
9747 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
9748 that SIZE is equal to ROUNDED_SIZE. */
9749
9750 if (size != rounded_size)
9751 emit_stack_probe (plus_constant (gen_rtx_PLUS (Pmode,
9752 stack_pointer_rtx,
9753 sr.reg),
9754 rounded_size - size));
9755
9756 release_scratch_register_on_entry (&sr);
9757 }
9758
9759 /* Make sure nothing is scheduled before we are done. */
9760 emit_insn (gen_blockage ());
9761 }
9762
9763 /* Probe a range of stack addresses from REG to END, inclusive. These are
9764 offsets from the current stack pointer. */
9765
9766 const char *
9767 output_probe_stack_range (rtx reg, rtx end)
9768 {
9769 static int labelno = 0;
9770 char loop_lab[32], end_lab[32];
9771 rtx xops[3];
9772
9773 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
9774 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
9775
9776 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
9777
9778 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
9779 xops[0] = reg;
9780 xops[1] = end;
9781 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
9782 fputs ("\tje\t", asm_out_file);
9783 assemble_name_raw (asm_out_file, end_lab);
9784 fputc ('\n', asm_out_file);
9785
9786 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
9787 xops[1] = GEN_INT (PROBE_INTERVAL);
9788 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
9789
9790 /* Probe at TEST_ADDR. */
9791 xops[0] = stack_pointer_rtx;
9792 xops[1] = reg;
9793 xops[2] = const0_rtx;
9794 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
9795
9796 fprintf (asm_out_file, "\tjmp\t");
9797 assemble_name_raw (asm_out_file, loop_lab);
9798 fputc ('\n', asm_out_file);
9799
9800 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
9801
9802 return "";
9803 }
9804
9805 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
9806 to be generated in correct form. */
9807 static void
9808 ix86_finalize_stack_realign_flags (void)
9809 {
9810 /* Check if stack realign is really needed after reload, and
9811 stores result in cfun */
9812 unsigned int incoming_stack_boundary
9813 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
9814 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
9815 unsigned int stack_realign = (incoming_stack_boundary
9816 < (current_function_is_leaf
9817 ? crtl->max_used_stack_slot_alignment
9818 : crtl->stack_alignment_needed));
9819
9820 if (crtl->stack_realign_finalized)
9821 {
9822 /* After stack_realign_needed is finalized, we can't no longer
9823 change it. */
9824 gcc_assert (crtl->stack_realign_needed == stack_realign);
9825 }
9826 else
9827 {
9828 crtl->stack_realign_needed = stack_realign;
9829 crtl->stack_realign_finalized = true;
9830 }
9831 }
9832
9833 /* Expand the prologue into a bunch of separate insns. */
9834
9835 void
9836 ix86_expand_prologue (void)
9837 {
9838 struct machine_function *m = cfun->machine;
9839 rtx insn, t;
9840 bool pic_reg_used;
9841 struct ix86_frame frame;
9842 HOST_WIDE_INT allocate;
9843 bool int_registers_saved;
9844
9845 ix86_finalize_stack_realign_flags ();
9846
9847 /* DRAP should not coexist with stack_realign_fp */
9848 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
9849
9850 memset (&m->fs, 0, sizeof (m->fs));
9851
9852 /* Initialize CFA state for before the prologue. */
9853 m->fs.cfa_reg = stack_pointer_rtx;
9854 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
9855
9856 /* Track SP offset to the CFA. We continue tracking this after we've
9857 swapped the CFA register away from SP. In the case of re-alignment
9858 this is fudged; we're interested to offsets within the local frame. */
9859 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
9860 m->fs.sp_valid = true;
9861
9862 ix86_compute_frame_layout (&frame);
9863
9864 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
9865 {
9866 /* We should have already generated an error for any use of
9867 ms_hook on a nested function. */
9868 gcc_checking_assert (!ix86_static_chain_on_stack);
9869
9870 /* Check if profiling is active and we shall use profiling before
9871 prologue variant. If so sorry. */
9872 if (crtl->profile && flag_fentry != 0)
9873 sorry ("ms_hook_prologue attribute isn%'t compatible "
9874 "with -mfentry for 32-bit");
9875
9876 /* In ix86_asm_output_function_label we emitted:
9877 8b ff movl.s %edi,%edi
9878 55 push %ebp
9879 8b ec movl.s %esp,%ebp
9880
9881 This matches the hookable function prologue in Win32 API
9882 functions in Microsoft Windows XP Service Pack 2 and newer.
9883 Wine uses this to enable Windows apps to hook the Win32 API
9884 functions provided by Wine.
9885
9886 What that means is that we've already set up the frame pointer. */
9887
9888 if (frame_pointer_needed
9889 && !(crtl->drap_reg && crtl->stack_realign_needed))
9890 {
9891 rtx push, mov;
9892
9893 /* We've decided to use the frame pointer already set up.
9894 Describe this to the unwinder by pretending that both
9895 push and mov insns happen right here.
9896
9897 Putting the unwind info here at the end of the ms_hook
9898 is done so that we can make absolutely certain we get
9899 the required byte sequence at the start of the function,
9900 rather than relying on an assembler that can produce
9901 the exact encoding required.
9902
9903 However it does mean (in the unpatched case) that we have
9904 a 1 insn window where the asynchronous unwind info is
9905 incorrect. However, if we placed the unwind info at
9906 its correct location we would have incorrect unwind info
9907 in the patched case. Which is probably all moot since
9908 I don't expect Wine generates dwarf2 unwind info for the
9909 system libraries that use this feature. */
9910
9911 insn = emit_insn (gen_blockage ());
9912
9913 push = gen_push (hard_frame_pointer_rtx);
9914 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
9915 stack_pointer_rtx);
9916 RTX_FRAME_RELATED_P (push) = 1;
9917 RTX_FRAME_RELATED_P (mov) = 1;
9918
9919 RTX_FRAME_RELATED_P (insn) = 1;
9920 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
9921 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
9922
9923 /* Note that gen_push incremented m->fs.cfa_offset, even
9924 though we didn't emit the push insn here. */
9925 m->fs.cfa_reg = hard_frame_pointer_rtx;
9926 m->fs.fp_offset = m->fs.cfa_offset;
9927 m->fs.fp_valid = true;
9928 }
9929 else
9930 {
9931 /* The frame pointer is not needed so pop %ebp again.
9932 This leaves us with a pristine state. */
9933 emit_insn (gen_pop (hard_frame_pointer_rtx));
9934 }
9935 }
9936
9937 /* The first insn of a function that accepts its static chain on the
9938 stack is to push the register that would be filled in by a direct
9939 call. This insn will be skipped by the trampoline. */
9940 else if (ix86_static_chain_on_stack)
9941 {
9942 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
9943 emit_insn (gen_blockage ());
9944
9945 /* We don't want to interpret this push insn as a register save,
9946 only as a stack adjustment. The real copy of the register as
9947 a save will be done later, if needed. */
9948 t = plus_constant (stack_pointer_rtx, -UNITS_PER_WORD);
9949 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
9950 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
9951 RTX_FRAME_RELATED_P (insn) = 1;
9952 }
9953
9954 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
9955 of DRAP is needed and stack realignment is really needed after reload */
9956 if (stack_realign_drap)
9957 {
9958 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
9959
9960 /* Only need to push parameter pointer reg if it is caller saved. */
9961 if (!call_used_regs[REGNO (crtl->drap_reg)])
9962 {
9963 /* Push arg pointer reg */
9964 insn = emit_insn (gen_push (crtl->drap_reg));
9965 RTX_FRAME_RELATED_P (insn) = 1;
9966 }
9967
9968 /* Grab the argument pointer. */
9969 t = plus_constant (stack_pointer_rtx, m->fs.sp_offset);
9970 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
9971 RTX_FRAME_RELATED_P (insn) = 1;
9972 m->fs.cfa_reg = crtl->drap_reg;
9973 m->fs.cfa_offset = 0;
9974
9975 /* Align the stack. */
9976 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
9977 stack_pointer_rtx,
9978 GEN_INT (-align_bytes)));
9979 RTX_FRAME_RELATED_P (insn) = 1;
9980
9981 /* Replicate the return address on the stack so that return
9982 address can be reached via (argp - 1) slot. This is needed
9983 to implement macro RETURN_ADDR_RTX and intrinsic function
9984 expand_builtin_return_addr etc. */
9985 t = plus_constant (crtl->drap_reg, -UNITS_PER_WORD);
9986 t = gen_frame_mem (Pmode, t);
9987 insn = emit_insn (gen_push (t));
9988 RTX_FRAME_RELATED_P (insn) = 1;
9989
9990 /* For the purposes of frame and register save area addressing,
9991 we've started over with a new frame. */
9992 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
9993 m->fs.realigned = true;
9994 }
9995
9996 if (frame_pointer_needed && !m->fs.fp_valid)
9997 {
9998 /* Note: AT&T enter does NOT have reversed args. Enter is probably
9999 slower on all targets. Also sdb doesn't like it. */
10000 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
10001 RTX_FRAME_RELATED_P (insn) = 1;
10002
10003 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
10004 {
10005 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
10006 RTX_FRAME_RELATED_P (insn) = 1;
10007
10008 if (m->fs.cfa_reg == stack_pointer_rtx)
10009 m->fs.cfa_reg = hard_frame_pointer_rtx;
10010 m->fs.fp_offset = m->fs.sp_offset;
10011 m->fs.fp_valid = true;
10012 }
10013 }
10014
10015 int_registers_saved = (frame.nregs == 0);
10016
10017 if (!int_registers_saved)
10018 {
10019 /* If saving registers via PUSH, do so now. */
10020 if (!frame.save_regs_using_mov)
10021 {
10022 ix86_emit_save_regs ();
10023 int_registers_saved = true;
10024 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
10025 }
10026
10027 /* When using red zone we may start register saving before allocating
10028 the stack frame saving one cycle of the prologue. However, avoid
10029 doing this if we have to probe the stack; at least on x86_64 the
10030 stack probe can turn into a call that clobbers a red zone location. */
10031 else if (ix86_using_red_zone ()
10032 && (! TARGET_STACK_PROBE
10033 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
10034 {
10035 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10036 int_registers_saved = true;
10037 }
10038 }
10039
10040 if (stack_realign_fp)
10041 {
10042 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10043 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
10044
10045 /* The computation of the size of the re-aligned stack frame means
10046 that we must allocate the size of the register save area before
10047 performing the actual alignment. Otherwise we cannot guarantee
10048 that there's enough storage above the realignment point. */
10049 if (m->fs.sp_offset != frame.sse_reg_save_offset)
10050 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10051 GEN_INT (m->fs.sp_offset
10052 - frame.sse_reg_save_offset),
10053 -1, false);
10054
10055 /* Align the stack. */
10056 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10057 stack_pointer_rtx,
10058 GEN_INT (-align_bytes)));
10059
10060 /* For the purposes of register save area addressing, the stack
10061 pointer is no longer valid. As for the value of sp_offset,
10062 see ix86_compute_frame_layout, which we need to match in order
10063 to pass verification of stack_pointer_offset at the end. */
10064 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
10065 m->fs.sp_valid = false;
10066 }
10067
10068 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
10069
10070 if (flag_stack_usage_info)
10071 {
10072 /* We start to count from ARG_POINTER. */
10073 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
10074
10075 /* If it was realigned, take into account the fake frame. */
10076 if (stack_realign_drap)
10077 {
10078 if (ix86_static_chain_on_stack)
10079 stack_size += UNITS_PER_WORD;
10080
10081 if (!call_used_regs[REGNO (crtl->drap_reg)])
10082 stack_size += UNITS_PER_WORD;
10083
10084 /* This over-estimates by 1 minimal-stack-alignment-unit but
10085 mitigates that by counting in the new return address slot. */
10086 current_function_dynamic_stack_size
10087 += crtl->stack_alignment_needed / BITS_PER_UNIT;
10088 }
10089
10090 current_function_static_stack_size = stack_size;
10091 }
10092
10093 /* The stack has already been decremented by the instruction calling us
10094 so probe if the size is non-negative to preserve the protection area. */
10095 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
10096 {
10097 /* We expect the registers to be saved when probes are used. */
10098 gcc_assert (int_registers_saved);
10099
10100 if (STACK_CHECK_MOVING_SP)
10101 {
10102 ix86_adjust_stack_and_probe (allocate);
10103 allocate = 0;
10104 }
10105 else
10106 {
10107 HOST_WIDE_INT size = allocate;
10108
10109 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
10110 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
10111
10112 if (TARGET_STACK_PROBE)
10113 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
10114 else
10115 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
10116 }
10117 }
10118
10119 if (allocate == 0)
10120 ;
10121 else if (!ix86_target_stack_probe ()
10122 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
10123 {
10124 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10125 GEN_INT (-allocate), -1,
10126 m->fs.cfa_reg == stack_pointer_rtx);
10127 }
10128 else
10129 {
10130 rtx eax = gen_rtx_REG (Pmode, AX_REG);
10131 rtx r10 = NULL;
10132 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
10133
10134 bool eax_live = false;
10135 bool r10_live = false;
10136
10137 if (TARGET_64BIT)
10138 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
10139 if (!TARGET_64BIT_MS_ABI)
10140 eax_live = ix86_eax_live_at_start_p ();
10141
10142 if (eax_live)
10143 {
10144 emit_insn (gen_push (eax));
10145 allocate -= UNITS_PER_WORD;
10146 }
10147 if (r10_live)
10148 {
10149 r10 = gen_rtx_REG (Pmode, R10_REG);
10150 emit_insn (gen_push (r10));
10151 allocate -= UNITS_PER_WORD;
10152 }
10153
10154 emit_move_insn (eax, GEN_INT (allocate));
10155 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
10156
10157 /* Use the fact that AX still contains ALLOCATE. */
10158 adjust_stack_insn = (TARGET_64BIT
10159 ? gen_pro_epilogue_adjust_stack_di_sub
10160 : gen_pro_epilogue_adjust_stack_si_sub);
10161
10162 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
10163 stack_pointer_rtx, eax));
10164
10165 /* Note that SEH directives need to continue tracking the stack
10166 pointer even after the frame pointer has been set up. */
10167 if (m->fs.cfa_reg == stack_pointer_rtx || TARGET_SEH)
10168 {
10169 if (m->fs.cfa_reg == stack_pointer_rtx)
10170 m->fs.cfa_offset += allocate;
10171
10172 RTX_FRAME_RELATED_P (insn) = 1;
10173 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10174 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10175 plus_constant (stack_pointer_rtx,
10176 -allocate)));
10177 }
10178 m->fs.sp_offset += allocate;
10179
10180 if (r10_live && eax_live)
10181 {
10182 t = choose_baseaddr (m->fs.sp_offset - allocate);
10183 emit_move_insn (r10, gen_frame_mem (Pmode, t));
10184 t = choose_baseaddr (m->fs.sp_offset - allocate - UNITS_PER_WORD);
10185 emit_move_insn (eax, gen_frame_mem (Pmode, t));
10186 }
10187 else if (eax_live || r10_live)
10188 {
10189 t = choose_baseaddr (m->fs.sp_offset - allocate);
10190 emit_move_insn ((eax_live ? eax : r10), gen_frame_mem (Pmode, t));
10191 }
10192 }
10193 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
10194
10195 /* If we havn't already set up the frame pointer, do so now. */
10196 if (frame_pointer_needed && !m->fs.fp_valid)
10197 {
10198 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
10199 GEN_INT (frame.stack_pointer_offset
10200 - frame.hard_frame_pointer_offset));
10201 insn = emit_insn (insn);
10202 RTX_FRAME_RELATED_P (insn) = 1;
10203 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
10204
10205 if (m->fs.cfa_reg == stack_pointer_rtx)
10206 m->fs.cfa_reg = hard_frame_pointer_rtx;
10207 m->fs.fp_offset = frame.hard_frame_pointer_offset;
10208 m->fs.fp_valid = true;
10209 }
10210
10211 if (!int_registers_saved)
10212 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10213 if (frame.nsseregs)
10214 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
10215
10216 pic_reg_used = false;
10217 if (pic_offset_table_rtx
10218 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
10219 || crtl->profile))
10220 {
10221 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
10222
10223 if (alt_pic_reg_used != INVALID_REGNUM)
10224 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
10225
10226 pic_reg_used = true;
10227 }
10228
10229 if (pic_reg_used)
10230 {
10231 if (TARGET_64BIT)
10232 {
10233 if (ix86_cmodel == CM_LARGE_PIC)
10234 {
10235 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
10236 rtx label = gen_label_rtx ();
10237 emit_label (label);
10238 LABEL_PRESERVE_P (label) = 1;
10239 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
10240 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
10241 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
10242 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
10243 pic_offset_table_rtx, tmp_reg));
10244 }
10245 else
10246 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
10247 }
10248 else
10249 {
10250 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
10251 RTX_FRAME_RELATED_P (insn) = 1;
10252 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
10253 }
10254 }
10255
10256 /* In the pic_reg_used case, make sure that the got load isn't deleted
10257 when mcount needs it. Blockage to avoid call movement across mcount
10258 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
10259 note. */
10260 if (crtl->profile && !flag_fentry && pic_reg_used)
10261 emit_insn (gen_prologue_use (pic_offset_table_rtx));
10262
10263 if (crtl->drap_reg && !crtl->stack_realign_needed)
10264 {
10265 /* vDRAP is setup but after reload it turns out stack realign
10266 isn't necessary, here we will emit prologue to setup DRAP
10267 without stack realign adjustment */
10268 t = choose_baseaddr (0);
10269 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10270 }
10271
10272 /* Prevent instructions from being scheduled into register save push
10273 sequence when access to the redzone area is done through frame pointer.
10274 The offset between the frame pointer and the stack pointer is calculated
10275 relative to the value of the stack pointer at the end of the function
10276 prologue, and moving instructions that access redzone area via frame
10277 pointer inside push sequence violates this assumption. */
10278 if (frame_pointer_needed && frame.red_zone_size)
10279 emit_insn (gen_memory_blockage ());
10280
10281 /* Emit cld instruction if stringops are used in the function. */
10282 if (TARGET_CLD && ix86_current_function_needs_cld)
10283 emit_insn (gen_cld ());
10284
10285 /* SEH requires that the prologue end within 256 bytes of the start of
10286 the function. Prevent instruction schedules that would extend that.
10287 Further, prevent alloca modifications to the stack pointer from being
10288 combined with prologue modifications. */
10289 if (TARGET_SEH)
10290 emit_insn (gen_prologue_use (stack_pointer_rtx));
10291 }
10292
10293 /* Emit code to restore REG using a POP insn. */
10294
10295 static void
10296 ix86_emit_restore_reg_using_pop (rtx reg)
10297 {
10298 struct machine_function *m = cfun->machine;
10299 rtx insn = emit_insn (gen_pop (reg));
10300
10301 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
10302 m->fs.sp_offset -= UNITS_PER_WORD;
10303
10304 if (m->fs.cfa_reg == crtl->drap_reg
10305 && REGNO (reg) == REGNO (crtl->drap_reg))
10306 {
10307 /* Previously we'd represented the CFA as an expression
10308 like *(%ebp - 8). We've just popped that value from
10309 the stack, which means we need to reset the CFA to
10310 the drap register. This will remain until we restore
10311 the stack pointer. */
10312 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
10313 RTX_FRAME_RELATED_P (insn) = 1;
10314
10315 /* This means that the DRAP register is valid for addressing too. */
10316 m->fs.drap_valid = true;
10317 return;
10318 }
10319
10320 if (m->fs.cfa_reg == stack_pointer_rtx)
10321 {
10322 rtx x = plus_constant (stack_pointer_rtx, UNITS_PER_WORD);
10323 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10324 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
10325 RTX_FRAME_RELATED_P (insn) = 1;
10326
10327 m->fs.cfa_offset -= UNITS_PER_WORD;
10328 }
10329
10330 /* When the frame pointer is the CFA, and we pop it, we are
10331 swapping back to the stack pointer as the CFA. This happens
10332 for stack frames that don't allocate other data, so we assume
10333 the stack pointer is now pointing at the return address, i.e.
10334 the function entry state, which makes the offset be 1 word. */
10335 if (reg == hard_frame_pointer_rtx)
10336 {
10337 m->fs.fp_valid = false;
10338 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
10339 {
10340 m->fs.cfa_reg = stack_pointer_rtx;
10341 m->fs.cfa_offset -= UNITS_PER_WORD;
10342
10343 add_reg_note (insn, REG_CFA_DEF_CFA,
10344 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10345 GEN_INT (m->fs.cfa_offset)));
10346 RTX_FRAME_RELATED_P (insn) = 1;
10347 }
10348 }
10349 }
10350
10351 /* Emit code to restore saved registers using POP insns. */
10352
10353 static void
10354 ix86_emit_restore_regs_using_pop (void)
10355 {
10356 unsigned int regno;
10357
10358 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10359 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
10360 ix86_emit_restore_reg_using_pop (gen_rtx_REG (Pmode, regno));
10361 }
10362
10363 /* Emit code and notes for the LEAVE instruction. */
10364
10365 static void
10366 ix86_emit_leave (void)
10367 {
10368 struct machine_function *m = cfun->machine;
10369 rtx insn = emit_insn (ix86_gen_leave ());
10370
10371 ix86_add_queued_cfa_restore_notes (insn);
10372
10373 gcc_assert (m->fs.fp_valid);
10374 m->fs.sp_valid = true;
10375 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
10376 m->fs.fp_valid = false;
10377
10378 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
10379 {
10380 m->fs.cfa_reg = stack_pointer_rtx;
10381 m->fs.cfa_offset = m->fs.sp_offset;
10382
10383 add_reg_note (insn, REG_CFA_DEF_CFA,
10384 plus_constant (stack_pointer_rtx, m->fs.sp_offset));
10385 RTX_FRAME_RELATED_P (insn) = 1;
10386 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
10387 m->fs.fp_offset);
10388 }
10389 }
10390
10391 /* Emit code to restore saved registers using MOV insns.
10392 First register is restored from CFA - CFA_OFFSET. */
10393 static void
10394 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
10395 bool maybe_eh_return)
10396 {
10397 struct machine_function *m = cfun->machine;
10398 unsigned int regno;
10399
10400 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10401 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10402 {
10403 rtx reg = gen_rtx_REG (Pmode, regno);
10404 rtx insn, mem;
10405
10406 mem = choose_baseaddr (cfa_offset);
10407 mem = gen_frame_mem (Pmode, mem);
10408 insn = emit_move_insn (reg, mem);
10409
10410 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
10411 {
10412 /* Previously we'd represented the CFA as an expression
10413 like *(%ebp - 8). We've just popped that value from
10414 the stack, which means we need to reset the CFA to
10415 the drap register. This will remain until we restore
10416 the stack pointer. */
10417 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
10418 RTX_FRAME_RELATED_P (insn) = 1;
10419
10420 /* This means that the DRAP register is valid for addressing. */
10421 m->fs.drap_valid = true;
10422 }
10423 else
10424 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10425
10426 cfa_offset -= UNITS_PER_WORD;
10427 }
10428 }
10429
10430 /* Emit code to restore saved registers using MOV insns.
10431 First register is restored from CFA - CFA_OFFSET. */
10432 static void
10433 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
10434 bool maybe_eh_return)
10435 {
10436 unsigned int regno;
10437
10438 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10439 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10440 {
10441 rtx reg = gen_rtx_REG (V4SFmode, regno);
10442 rtx mem;
10443
10444 mem = choose_baseaddr (cfa_offset);
10445 mem = gen_rtx_MEM (V4SFmode, mem);
10446 set_mem_align (mem, 128);
10447 emit_move_insn (reg, mem);
10448
10449 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10450
10451 cfa_offset -= 16;
10452 }
10453 }
10454
10455 /* Restore function stack, frame, and registers. */
10456
10457 void
10458 ix86_expand_epilogue (int style)
10459 {
10460 struct machine_function *m = cfun->machine;
10461 struct machine_frame_state frame_state_save = m->fs;
10462 struct ix86_frame frame;
10463 bool restore_regs_via_mov;
10464 bool using_drap;
10465
10466 ix86_finalize_stack_realign_flags ();
10467 ix86_compute_frame_layout (&frame);
10468
10469 m->fs.sp_valid = (!frame_pointer_needed
10470 || (current_function_sp_is_unchanging
10471 && !stack_realign_fp));
10472 gcc_assert (!m->fs.sp_valid
10473 || m->fs.sp_offset == frame.stack_pointer_offset);
10474
10475 /* The FP must be valid if the frame pointer is present. */
10476 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
10477 gcc_assert (!m->fs.fp_valid
10478 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
10479
10480 /* We must have *some* valid pointer to the stack frame. */
10481 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
10482
10483 /* The DRAP is never valid at this point. */
10484 gcc_assert (!m->fs.drap_valid);
10485
10486 /* See the comment about red zone and frame
10487 pointer usage in ix86_expand_prologue. */
10488 if (frame_pointer_needed && frame.red_zone_size)
10489 emit_insn (gen_memory_blockage ());
10490
10491 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
10492 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
10493
10494 /* Determine the CFA offset of the end of the red-zone. */
10495 m->fs.red_zone_offset = 0;
10496 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
10497 {
10498 /* The red-zone begins below the return address. */
10499 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
10500
10501 /* When the register save area is in the aligned portion of
10502 the stack, determine the maximum runtime displacement that
10503 matches up with the aligned frame. */
10504 if (stack_realign_drap)
10505 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
10506 + UNITS_PER_WORD);
10507 }
10508
10509 /* Special care must be taken for the normal return case of a function
10510 using eh_return: the eax and edx registers are marked as saved, but
10511 not restored along this path. Adjust the save location to match. */
10512 if (crtl->calls_eh_return && style != 2)
10513 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
10514
10515 /* EH_RETURN requires the use of moves to function properly. */
10516 if (crtl->calls_eh_return)
10517 restore_regs_via_mov = true;
10518 /* SEH requires the use of pops to identify the epilogue. */
10519 else if (TARGET_SEH)
10520 restore_regs_via_mov = false;
10521 /* If we're only restoring one register and sp is not valid then
10522 using a move instruction to restore the register since it's
10523 less work than reloading sp and popping the register. */
10524 else if (!m->fs.sp_valid && frame.nregs <= 1)
10525 restore_regs_via_mov = true;
10526 else if (TARGET_EPILOGUE_USING_MOVE
10527 && cfun->machine->use_fast_prologue_epilogue
10528 && (frame.nregs > 1
10529 || m->fs.sp_offset != frame.reg_save_offset))
10530 restore_regs_via_mov = true;
10531 else if (frame_pointer_needed
10532 && !frame.nregs
10533 && m->fs.sp_offset != frame.reg_save_offset)
10534 restore_regs_via_mov = true;
10535 else if (frame_pointer_needed
10536 && TARGET_USE_LEAVE
10537 && cfun->machine->use_fast_prologue_epilogue
10538 && frame.nregs == 1)
10539 restore_regs_via_mov = true;
10540 else
10541 restore_regs_via_mov = false;
10542
10543 if (restore_regs_via_mov || frame.nsseregs)
10544 {
10545 /* Ensure that the entire register save area is addressable via
10546 the stack pointer, if we will restore via sp. */
10547 if (TARGET_64BIT
10548 && m->fs.sp_offset > 0x7fffffff
10549 && !(m->fs.fp_valid || m->fs.drap_valid)
10550 && (frame.nsseregs + frame.nregs) != 0)
10551 {
10552 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10553 GEN_INT (m->fs.sp_offset
10554 - frame.sse_reg_save_offset),
10555 style,
10556 m->fs.cfa_reg == stack_pointer_rtx);
10557 }
10558 }
10559
10560 /* If there are any SSE registers to restore, then we have to do it
10561 via moves, since there's obviously no pop for SSE regs. */
10562 if (frame.nsseregs)
10563 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
10564 style == 2);
10565
10566 if (restore_regs_via_mov)
10567 {
10568 rtx t;
10569
10570 if (frame.nregs)
10571 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
10572
10573 /* eh_return epilogues need %ecx added to the stack pointer. */
10574 if (style == 2)
10575 {
10576 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
10577
10578 /* Stack align doesn't work with eh_return. */
10579 gcc_assert (!stack_realign_drap);
10580 /* Neither does regparm nested functions. */
10581 gcc_assert (!ix86_static_chain_on_stack);
10582
10583 if (frame_pointer_needed)
10584 {
10585 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
10586 t = plus_constant (t, m->fs.fp_offset - UNITS_PER_WORD);
10587 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
10588
10589 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
10590 insn = emit_move_insn (hard_frame_pointer_rtx, t);
10591
10592 /* Note that we use SA as a temporary CFA, as the return
10593 address is at the proper place relative to it. We
10594 pretend this happens at the FP restore insn because
10595 prior to this insn the FP would be stored at the wrong
10596 offset relative to SA, and after this insn we have no
10597 other reasonable register to use for the CFA. We don't
10598 bother resetting the CFA to the SP for the duration of
10599 the return insn. */
10600 add_reg_note (insn, REG_CFA_DEF_CFA,
10601 plus_constant (sa, UNITS_PER_WORD));
10602 ix86_add_queued_cfa_restore_notes (insn);
10603 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
10604 RTX_FRAME_RELATED_P (insn) = 1;
10605
10606 m->fs.cfa_reg = sa;
10607 m->fs.cfa_offset = UNITS_PER_WORD;
10608 m->fs.fp_valid = false;
10609
10610 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
10611 const0_rtx, style, false);
10612 }
10613 else
10614 {
10615 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
10616 t = plus_constant (t, m->fs.sp_offset - UNITS_PER_WORD);
10617 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
10618 ix86_add_queued_cfa_restore_notes (insn);
10619
10620 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
10621 if (m->fs.cfa_offset != UNITS_PER_WORD)
10622 {
10623 m->fs.cfa_offset = UNITS_PER_WORD;
10624 add_reg_note (insn, REG_CFA_DEF_CFA,
10625 plus_constant (stack_pointer_rtx,
10626 UNITS_PER_WORD));
10627 RTX_FRAME_RELATED_P (insn) = 1;
10628 }
10629 }
10630 m->fs.sp_offset = UNITS_PER_WORD;
10631 m->fs.sp_valid = true;
10632 }
10633 }
10634 else
10635 {
10636 /* SEH requires that the function end with (1) a stack adjustment
10637 if necessary, (2) a sequence of pops, and (3) a return or
10638 jump instruction. Prevent insns from the function body from
10639 being scheduled into this sequence. */
10640 if (TARGET_SEH)
10641 {
10642 /* Prevent a catch region from being adjacent to the standard
10643 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
10644 several other flags that would be interesting to test are
10645 not yet set up. */
10646 if (flag_non_call_exceptions)
10647 emit_insn (gen_nops (const1_rtx));
10648 else
10649 emit_insn (gen_blockage ());
10650 }
10651
10652 /* First step is to deallocate the stack frame so that we can
10653 pop the registers. */
10654 if (!m->fs.sp_valid)
10655 {
10656 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
10657 GEN_INT (m->fs.fp_offset
10658 - frame.reg_save_offset),
10659 style, false);
10660 }
10661 else if (m->fs.sp_offset != frame.reg_save_offset)
10662 {
10663 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10664 GEN_INT (m->fs.sp_offset
10665 - frame.reg_save_offset),
10666 style,
10667 m->fs.cfa_reg == stack_pointer_rtx);
10668 }
10669
10670 ix86_emit_restore_regs_using_pop ();
10671 }
10672
10673 /* If we used a stack pointer and haven't already got rid of it,
10674 then do so now. */
10675 if (m->fs.fp_valid)
10676 {
10677 /* If the stack pointer is valid and pointing at the frame
10678 pointer store address, then we only need a pop. */
10679 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
10680 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10681 /* Leave results in shorter dependency chains on CPUs that are
10682 able to grok it fast. */
10683 else if (TARGET_USE_LEAVE
10684 || optimize_function_for_size_p (cfun)
10685 || !cfun->machine->use_fast_prologue_epilogue)
10686 ix86_emit_leave ();
10687 else
10688 {
10689 pro_epilogue_adjust_stack (stack_pointer_rtx,
10690 hard_frame_pointer_rtx,
10691 const0_rtx, style, !using_drap);
10692 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10693 }
10694 }
10695
10696 if (using_drap)
10697 {
10698 int param_ptr_offset = UNITS_PER_WORD;
10699 rtx insn;
10700
10701 gcc_assert (stack_realign_drap);
10702
10703 if (ix86_static_chain_on_stack)
10704 param_ptr_offset += UNITS_PER_WORD;
10705 if (!call_used_regs[REGNO (crtl->drap_reg)])
10706 param_ptr_offset += UNITS_PER_WORD;
10707
10708 insn = emit_insn (gen_rtx_SET
10709 (VOIDmode, stack_pointer_rtx,
10710 gen_rtx_PLUS (Pmode,
10711 crtl->drap_reg,
10712 GEN_INT (-param_ptr_offset))));
10713 m->fs.cfa_reg = stack_pointer_rtx;
10714 m->fs.cfa_offset = param_ptr_offset;
10715 m->fs.sp_offset = param_ptr_offset;
10716 m->fs.realigned = false;
10717
10718 add_reg_note (insn, REG_CFA_DEF_CFA,
10719 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10720 GEN_INT (param_ptr_offset)));
10721 RTX_FRAME_RELATED_P (insn) = 1;
10722
10723 if (!call_used_regs[REGNO (crtl->drap_reg)])
10724 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
10725 }
10726
10727 /* At this point the stack pointer must be valid, and we must have
10728 restored all of the registers. We may not have deallocated the
10729 entire stack frame. We've delayed this until now because it may
10730 be possible to merge the local stack deallocation with the
10731 deallocation forced by ix86_static_chain_on_stack. */
10732 gcc_assert (m->fs.sp_valid);
10733 gcc_assert (!m->fs.fp_valid);
10734 gcc_assert (!m->fs.realigned);
10735 if (m->fs.sp_offset != UNITS_PER_WORD)
10736 {
10737 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10738 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
10739 style, true);
10740 }
10741
10742 /* Sibcall epilogues don't want a return instruction. */
10743 if (style == 0)
10744 {
10745 m->fs = frame_state_save;
10746 return;
10747 }
10748
10749 /* Emit vzeroupper if needed. */
10750 if (TARGET_VZEROUPPER
10751 && !TREE_THIS_VOLATILE (cfun->decl)
10752 && !cfun->machine->caller_return_avx256_p)
10753 emit_insn (gen_avx_vzeroupper (GEN_INT (call_no_avx256)));
10754
10755 if (crtl->args.pops_args && crtl->args.size)
10756 {
10757 rtx popc = GEN_INT (crtl->args.pops_args);
10758
10759 /* i386 can only pop 64K bytes. If asked to pop more, pop return
10760 address, do explicit add, and jump indirectly to the caller. */
10761
10762 if (crtl->args.pops_args >= 65536)
10763 {
10764 rtx ecx = gen_rtx_REG (SImode, CX_REG);
10765 rtx insn;
10766
10767 /* There is no "pascal" calling convention in any 64bit ABI. */
10768 gcc_assert (!TARGET_64BIT);
10769
10770 insn = emit_insn (gen_pop (ecx));
10771 m->fs.cfa_offset -= UNITS_PER_WORD;
10772 m->fs.sp_offset -= UNITS_PER_WORD;
10773
10774 add_reg_note (insn, REG_CFA_ADJUST_CFA,
10775 copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
10776 add_reg_note (insn, REG_CFA_REGISTER,
10777 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
10778 RTX_FRAME_RELATED_P (insn) = 1;
10779
10780 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10781 popc, -1, true);
10782 emit_jump_insn (gen_return_indirect_internal (ecx));
10783 }
10784 else
10785 emit_jump_insn (gen_return_pop_internal (popc));
10786 }
10787 else
10788 emit_jump_insn (gen_return_internal ());
10789
10790 /* Restore the state back to the state from the prologue,
10791 so that it's correct for the next epilogue. */
10792 m->fs = frame_state_save;
10793 }
10794
10795 /* Reset from the function's potential modifications. */
10796
10797 static void
10798 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
10799 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
10800 {
10801 if (pic_offset_table_rtx)
10802 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
10803 #if TARGET_MACHO
10804 /* Mach-O doesn't support labels at the end of objects, so if
10805 it looks like we might want one, insert a NOP. */
10806 {
10807 rtx insn = get_last_insn ();
10808 while (insn
10809 && NOTE_P (insn)
10810 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
10811 insn = PREV_INSN (insn);
10812 if (insn
10813 && (LABEL_P (insn)
10814 || (NOTE_P (insn)
10815 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
10816 fputs ("\tnop\n", file);
10817 }
10818 #endif
10819
10820 }
10821
10822 /* Return a scratch register to use in the split stack prologue. The
10823 split stack prologue is used for -fsplit-stack. It is the first
10824 instructions in the function, even before the regular prologue.
10825 The scratch register can be any caller-saved register which is not
10826 used for parameters or for the static chain. */
10827
10828 static unsigned int
10829 split_stack_prologue_scratch_regno (void)
10830 {
10831 if (TARGET_64BIT)
10832 return R11_REG;
10833 else
10834 {
10835 bool is_fastcall;
10836 int regparm;
10837
10838 is_fastcall = (lookup_attribute ("fastcall",
10839 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
10840 != NULL);
10841 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
10842
10843 if (is_fastcall)
10844 {
10845 if (DECL_STATIC_CHAIN (cfun->decl))
10846 {
10847 sorry ("-fsplit-stack does not support fastcall with "
10848 "nested function");
10849 return INVALID_REGNUM;
10850 }
10851 return AX_REG;
10852 }
10853 else if (regparm < 3)
10854 {
10855 if (!DECL_STATIC_CHAIN (cfun->decl))
10856 return CX_REG;
10857 else
10858 {
10859 if (regparm >= 2)
10860 {
10861 sorry ("-fsplit-stack does not support 2 register "
10862 " parameters for a nested function");
10863 return INVALID_REGNUM;
10864 }
10865 return DX_REG;
10866 }
10867 }
10868 else
10869 {
10870 /* FIXME: We could make this work by pushing a register
10871 around the addition and comparison. */
10872 sorry ("-fsplit-stack does not support 3 register parameters");
10873 return INVALID_REGNUM;
10874 }
10875 }
10876 }
10877
10878 /* A SYMBOL_REF for the function which allocates new stackspace for
10879 -fsplit-stack. */
10880
10881 static GTY(()) rtx split_stack_fn;
10882
10883 /* A SYMBOL_REF for the more stack function when using the large
10884 model. */
10885
10886 static GTY(()) rtx split_stack_fn_large;
10887
10888 /* Handle -fsplit-stack. These are the first instructions in the
10889 function, even before the regular prologue. */
10890
10891 void
10892 ix86_expand_split_stack_prologue (void)
10893 {
10894 struct ix86_frame frame;
10895 HOST_WIDE_INT allocate;
10896 unsigned HOST_WIDE_INT args_size;
10897 rtx label, limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
10898 rtx scratch_reg = NULL_RTX;
10899 rtx varargs_label = NULL_RTX;
10900 rtx fn;
10901
10902 gcc_assert (flag_split_stack && reload_completed);
10903
10904 ix86_finalize_stack_realign_flags ();
10905 ix86_compute_frame_layout (&frame);
10906 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
10907
10908 /* This is the label we will branch to if we have enough stack
10909 space. We expect the basic block reordering pass to reverse this
10910 branch if optimizing, so that we branch in the unlikely case. */
10911 label = gen_label_rtx ();
10912
10913 /* We need to compare the stack pointer minus the frame size with
10914 the stack boundary in the TCB. The stack boundary always gives
10915 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
10916 can compare directly. Otherwise we need to do an addition. */
10917
10918 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
10919 UNSPEC_STACK_CHECK);
10920 limit = gen_rtx_CONST (Pmode, limit);
10921 limit = gen_rtx_MEM (Pmode, limit);
10922 if (allocate < SPLIT_STACK_AVAILABLE)
10923 current = stack_pointer_rtx;
10924 else
10925 {
10926 unsigned int scratch_regno;
10927 rtx offset;
10928
10929 /* We need a scratch register to hold the stack pointer minus
10930 the required frame size. Since this is the very start of the
10931 function, the scratch register can be any caller-saved
10932 register which is not used for parameters. */
10933 offset = GEN_INT (- allocate);
10934 scratch_regno = split_stack_prologue_scratch_regno ();
10935 if (scratch_regno == INVALID_REGNUM)
10936 return;
10937 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
10938 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
10939 {
10940 /* We don't use ix86_gen_add3 in this case because it will
10941 want to split to lea, but when not optimizing the insn
10942 will not be split after this point. */
10943 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
10944 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10945 offset)));
10946 }
10947 else
10948 {
10949 emit_move_insn (scratch_reg, offset);
10950 emit_insn (gen_adddi3 (scratch_reg, scratch_reg,
10951 stack_pointer_rtx));
10952 }
10953 current = scratch_reg;
10954 }
10955
10956 ix86_expand_branch (GEU, current, limit, label);
10957 jump_insn = get_last_insn ();
10958 JUMP_LABEL (jump_insn) = label;
10959
10960 /* Mark the jump as very likely to be taken. */
10961 add_reg_note (jump_insn, REG_BR_PROB,
10962 GEN_INT (REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100));
10963
10964 if (split_stack_fn == NULL_RTX)
10965 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
10966 fn = split_stack_fn;
10967
10968 /* Get more stack space. We pass in the desired stack space and the
10969 size of the arguments to copy to the new stack. In 32-bit mode
10970 we push the parameters; __morestack will return on a new stack
10971 anyhow. In 64-bit mode we pass the parameters in r10 and
10972 r11. */
10973 allocate_rtx = GEN_INT (allocate);
10974 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
10975 call_fusage = NULL_RTX;
10976 if (TARGET_64BIT)
10977 {
10978 rtx reg10, reg11;
10979
10980 reg10 = gen_rtx_REG (Pmode, R10_REG);
10981 reg11 = gen_rtx_REG (Pmode, R11_REG);
10982
10983 /* If this function uses a static chain, it will be in %r10.
10984 Preserve it across the call to __morestack. */
10985 if (DECL_STATIC_CHAIN (cfun->decl))
10986 {
10987 rtx rax;
10988
10989 rax = gen_rtx_REG (Pmode, AX_REG);
10990 emit_move_insn (rax, reg10);
10991 use_reg (&call_fusage, rax);
10992 }
10993
10994 if (ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
10995 {
10996 HOST_WIDE_INT argval;
10997
10998 /* When using the large model we need to load the address
10999 into a register, and we've run out of registers. So we
11000 switch to a different calling convention, and we call a
11001 different function: __morestack_large. We pass the
11002 argument size in the upper 32 bits of r10 and pass the
11003 frame size in the lower 32 bits. */
11004 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
11005 gcc_assert ((args_size & 0xffffffff) == args_size);
11006
11007 if (split_stack_fn_large == NULL_RTX)
11008 split_stack_fn_large =
11009 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
11010
11011 if (ix86_cmodel == CM_LARGE_PIC)
11012 {
11013 rtx label, x;
11014
11015 label = gen_label_rtx ();
11016 emit_label (label);
11017 LABEL_PRESERVE_P (label) = 1;
11018 emit_insn (gen_set_rip_rex64 (reg10, label));
11019 emit_insn (gen_set_got_offset_rex64 (reg11, label));
11020 emit_insn (gen_adddi3 (reg10, reg10, reg11));
11021 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
11022 UNSPEC_GOT);
11023 x = gen_rtx_CONST (Pmode, x);
11024 emit_move_insn (reg11, x);
11025 x = gen_rtx_PLUS (Pmode, reg10, reg11);
11026 x = gen_const_mem (Pmode, x);
11027 emit_move_insn (reg11, x);
11028 }
11029 else
11030 emit_move_insn (reg11, split_stack_fn_large);
11031
11032 fn = reg11;
11033
11034 argval = ((args_size << 16) << 16) + allocate;
11035 emit_move_insn (reg10, GEN_INT (argval));
11036 }
11037 else
11038 {
11039 emit_move_insn (reg10, allocate_rtx);
11040 emit_move_insn (reg11, GEN_INT (args_size));
11041 use_reg (&call_fusage, reg11);
11042 }
11043
11044 use_reg (&call_fusage, reg10);
11045 }
11046 else
11047 {
11048 emit_insn (gen_push (GEN_INT (args_size)));
11049 emit_insn (gen_push (allocate_rtx));
11050 }
11051 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
11052 GEN_INT (UNITS_PER_WORD), constm1_rtx,
11053 NULL_RTX, false);
11054 add_function_usage_to (call_insn, call_fusage);
11055
11056 /* In order to make call/return prediction work right, we now need
11057 to execute a return instruction. See
11058 libgcc/config/i386/morestack.S for the details on how this works.
11059
11060 For flow purposes gcc must not see this as a return
11061 instruction--we need control flow to continue at the subsequent
11062 label. Therefore, we use an unspec. */
11063 gcc_assert (crtl->args.pops_args < 65536);
11064 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
11065
11066 /* If we are in 64-bit mode and this function uses a static chain,
11067 we saved %r10 in %rax before calling _morestack. */
11068 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
11069 emit_move_insn (gen_rtx_REG (Pmode, R10_REG),
11070 gen_rtx_REG (Pmode, AX_REG));
11071
11072 /* If this function calls va_start, we need to store a pointer to
11073 the arguments on the old stack, because they may not have been
11074 all copied to the new stack. At this point the old stack can be
11075 found at the frame pointer value used by __morestack, because
11076 __morestack has set that up before calling back to us. Here we
11077 store that pointer in a scratch register, and in
11078 ix86_expand_prologue we store the scratch register in a stack
11079 slot. */
11080 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11081 {
11082 unsigned int scratch_regno;
11083 rtx frame_reg;
11084 int words;
11085
11086 scratch_regno = split_stack_prologue_scratch_regno ();
11087 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11088 frame_reg = gen_rtx_REG (Pmode, BP_REG);
11089
11090 /* 64-bit:
11091 fp -> old fp value
11092 return address within this function
11093 return address of caller of this function
11094 stack arguments
11095 So we add three words to get to the stack arguments.
11096
11097 32-bit:
11098 fp -> old fp value
11099 return address within this function
11100 first argument to __morestack
11101 second argument to __morestack
11102 return address of caller of this function
11103 stack arguments
11104 So we add five words to get to the stack arguments.
11105 */
11106 words = TARGET_64BIT ? 3 : 5;
11107 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11108 gen_rtx_PLUS (Pmode, frame_reg,
11109 GEN_INT (words * UNITS_PER_WORD))));
11110
11111 varargs_label = gen_label_rtx ();
11112 emit_jump_insn (gen_jump (varargs_label));
11113 JUMP_LABEL (get_last_insn ()) = varargs_label;
11114
11115 emit_barrier ();
11116 }
11117
11118 emit_label (label);
11119 LABEL_NUSES (label) = 1;
11120
11121 /* If this function calls va_start, we now have to set the scratch
11122 register for the case where we do not call __morestack. In this
11123 case we need to set it based on the stack pointer. */
11124 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11125 {
11126 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11127 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11128 GEN_INT (UNITS_PER_WORD))));
11129
11130 emit_label (varargs_label);
11131 LABEL_NUSES (varargs_label) = 1;
11132 }
11133 }
11134
11135 /* We may have to tell the dataflow pass that the split stack prologue
11136 is initializing a scratch register. */
11137
11138 static void
11139 ix86_live_on_entry (bitmap regs)
11140 {
11141 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11142 {
11143 gcc_assert (flag_split_stack);
11144 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
11145 }
11146 }
11147 \f
11148 /* Determine if op is suitable SUBREG RTX for address. */
11149
11150 static bool
11151 ix86_address_subreg_operand (rtx op)
11152 {
11153 enum machine_mode mode;
11154
11155 if (!REG_P (op))
11156 return false;
11157
11158 mode = GET_MODE (op);
11159
11160 if (GET_MODE_CLASS (mode) != MODE_INT)
11161 return false;
11162
11163 /* Don't allow SUBREGs that span more than a word. It can lead to spill
11164 failures when the register is one word out of a two word structure. */
11165 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
11166 return false;
11167
11168 /* Allow only SUBREGs of non-eliminable hard registers. */
11169 return register_no_elim_operand (op, mode);
11170 }
11171
11172 /* Extract the parts of an RTL expression that is a valid memory address
11173 for an instruction. Return 0 if the structure of the address is
11174 grossly off. Return -1 if the address contains ASHIFT, so it is not
11175 strictly valid, but still used for computing length of lea instruction. */
11176
11177 int
11178 ix86_decompose_address (rtx addr, struct ix86_address *out)
11179 {
11180 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
11181 rtx base_reg, index_reg;
11182 HOST_WIDE_INT scale = 1;
11183 rtx scale_rtx = NULL_RTX;
11184 rtx tmp;
11185 int retval = 1;
11186 enum ix86_address_seg seg = SEG_DEFAULT;
11187
11188 /* Allow zero-extended SImode addresses,
11189 they will be emitted with addr32 prefix. */
11190 if (TARGET_64BIT && GET_MODE (addr) == DImode)
11191 {
11192 if (GET_CODE (addr) == ZERO_EXTEND
11193 && GET_MODE (XEXP (addr, 0)) == SImode)
11194 addr = XEXP (addr, 0);
11195 else if (GET_CODE (addr) == AND
11196 && const_32bit_mask (XEXP (addr, 1), DImode))
11197 {
11198 addr = XEXP (addr, 0);
11199
11200 /* Strip subreg. */
11201 if (GET_CODE (addr) == SUBREG
11202 && GET_MODE (SUBREG_REG (addr)) == SImode)
11203 addr = SUBREG_REG (addr);
11204 }
11205 }
11206
11207 if (REG_P (addr))
11208 base = addr;
11209 else if (GET_CODE (addr) == SUBREG)
11210 {
11211 if (ix86_address_subreg_operand (SUBREG_REG (addr)))
11212 base = addr;
11213 else
11214 return 0;
11215 }
11216 else if (GET_CODE (addr) == PLUS)
11217 {
11218 rtx addends[4], op;
11219 int n = 0, i;
11220
11221 op = addr;
11222 do
11223 {
11224 if (n >= 4)
11225 return 0;
11226 addends[n++] = XEXP (op, 1);
11227 op = XEXP (op, 0);
11228 }
11229 while (GET_CODE (op) == PLUS);
11230 if (n >= 4)
11231 return 0;
11232 addends[n] = op;
11233
11234 for (i = n; i >= 0; --i)
11235 {
11236 op = addends[i];
11237 switch (GET_CODE (op))
11238 {
11239 case MULT:
11240 if (index)
11241 return 0;
11242 index = XEXP (op, 0);
11243 scale_rtx = XEXP (op, 1);
11244 break;
11245
11246 case ASHIFT:
11247 if (index)
11248 return 0;
11249 index = XEXP (op, 0);
11250 tmp = XEXP (op, 1);
11251 if (!CONST_INT_P (tmp))
11252 return 0;
11253 scale = INTVAL (tmp);
11254 if ((unsigned HOST_WIDE_INT) scale > 3)
11255 return 0;
11256 scale = 1 << scale;
11257 break;
11258
11259 case UNSPEC:
11260 if (XINT (op, 1) == UNSPEC_TP
11261 && TARGET_TLS_DIRECT_SEG_REFS
11262 && seg == SEG_DEFAULT)
11263 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
11264 else
11265 return 0;
11266 break;
11267
11268 case SUBREG:
11269 if (!ix86_address_subreg_operand (SUBREG_REG (op)))
11270 return 0;
11271 /* FALLTHRU */
11272
11273 case REG:
11274 if (!base)
11275 base = op;
11276 else if (!index)
11277 index = op;
11278 else
11279 return 0;
11280 break;
11281
11282 case CONST:
11283 case CONST_INT:
11284 case SYMBOL_REF:
11285 case LABEL_REF:
11286 if (disp)
11287 return 0;
11288 disp = op;
11289 break;
11290
11291 default:
11292 return 0;
11293 }
11294 }
11295 }
11296 else if (GET_CODE (addr) == MULT)
11297 {
11298 index = XEXP (addr, 0); /* index*scale */
11299 scale_rtx = XEXP (addr, 1);
11300 }
11301 else if (GET_CODE (addr) == ASHIFT)
11302 {
11303 /* We're called for lea too, which implements ashift on occasion. */
11304 index = XEXP (addr, 0);
11305 tmp = XEXP (addr, 1);
11306 if (!CONST_INT_P (tmp))
11307 return 0;
11308 scale = INTVAL (tmp);
11309 if ((unsigned HOST_WIDE_INT) scale > 3)
11310 return 0;
11311 scale = 1 << scale;
11312 retval = -1;
11313 }
11314 else
11315 disp = addr; /* displacement */
11316
11317 if (index)
11318 {
11319 if (REG_P (index))
11320 ;
11321 else if (GET_CODE (index) == SUBREG
11322 && ix86_address_subreg_operand (SUBREG_REG (index)))
11323 ;
11324 else
11325 return 0;
11326 }
11327
11328 /* Extract the integral value of scale. */
11329 if (scale_rtx)
11330 {
11331 if (!CONST_INT_P (scale_rtx))
11332 return 0;
11333 scale = INTVAL (scale_rtx);
11334 }
11335
11336 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
11337 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
11338
11339 /* Avoid useless 0 displacement. */
11340 if (disp == const0_rtx && (base || index))
11341 disp = NULL_RTX;
11342
11343 /* Allow arg pointer and stack pointer as index if there is not scaling. */
11344 if (base_reg && index_reg && scale == 1
11345 && (index_reg == arg_pointer_rtx
11346 || index_reg == frame_pointer_rtx
11347 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
11348 {
11349 rtx tmp;
11350 tmp = base, base = index, index = tmp;
11351 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
11352 }
11353
11354 /* Special case: %ebp cannot be encoded as a base without a displacement.
11355 Similarly %r13. */
11356 if (!disp
11357 && base_reg
11358 && (base_reg == hard_frame_pointer_rtx
11359 || base_reg == frame_pointer_rtx
11360 || base_reg == arg_pointer_rtx
11361 || (REG_P (base_reg)
11362 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
11363 || REGNO (base_reg) == R13_REG))))
11364 disp = const0_rtx;
11365
11366 /* Special case: on K6, [%esi] makes the instruction vector decoded.
11367 Avoid this by transforming to [%esi+0].
11368 Reload calls address legitimization without cfun defined, so we need
11369 to test cfun for being non-NULL. */
11370 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
11371 && base_reg && !index_reg && !disp
11372 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
11373 disp = const0_rtx;
11374
11375 /* Special case: encode reg+reg instead of reg*2. */
11376 if (!base && index && scale == 2)
11377 base = index, base_reg = index_reg, scale = 1;
11378
11379 /* Special case: scaling cannot be encoded without base or displacement. */
11380 if (!base && !disp && index && scale != 1)
11381 disp = const0_rtx;
11382
11383 out->base = base;
11384 out->index = index;
11385 out->disp = disp;
11386 out->scale = scale;
11387 out->seg = seg;
11388
11389 return retval;
11390 }
11391 \f
11392 /* Return cost of the memory address x.
11393 For i386, it is better to use a complex address than let gcc copy
11394 the address into a reg and make a new pseudo. But not if the address
11395 requires to two regs - that would mean more pseudos with longer
11396 lifetimes. */
11397 static int
11398 ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
11399 {
11400 struct ix86_address parts;
11401 int cost = 1;
11402 int ok = ix86_decompose_address (x, &parts);
11403
11404 gcc_assert (ok);
11405
11406 if (parts.base && GET_CODE (parts.base) == SUBREG)
11407 parts.base = SUBREG_REG (parts.base);
11408 if (parts.index && GET_CODE (parts.index) == SUBREG)
11409 parts.index = SUBREG_REG (parts.index);
11410
11411 /* Attempt to minimize number of registers in the address. */
11412 if ((parts.base
11413 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
11414 || (parts.index
11415 && (!REG_P (parts.index)
11416 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
11417 cost++;
11418
11419 if (parts.base
11420 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
11421 && parts.index
11422 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
11423 && parts.base != parts.index)
11424 cost++;
11425
11426 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
11427 since it's predecode logic can't detect the length of instructions
11428 and it degenerates to vector decoded. Increase cost of such
11429 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
11430 to split such addresses or even refuse such addresses at all.
11431
11432 Following addressing modes are affected:
11433 [base+scale*index]
11434 [scale*index+disp]
11435 [base+index]
11436
11437 The first and last case may be avoidable by explicitly coding the zero in
11438 memory address, but I don't have AMD-K6 machine handy to check this
11439 theory. */
11440
11441 if (TARGET_K6
11442 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
11443 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
11444 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
11445 cost += 10;
11446
11447 return cost;
11448 }
11449 \f
11450 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
11451 this is used for to form addresses to local data when -fPIC is in
11452 use. */
11453
11454 static bool
11455 darwin_local_data_pic (rtx disp)
11456 {
11457 return (GET_CODE (disp) == UNSPEC
11458 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
11459 }
11460
11461 /* Determine if a given RTX is a valid constant. We already know this
11462 satisfies CONSTANT_P. */
11463
11464 static bool
11465 ix86_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
11466 {
11467 switch (GET_CODE (x))
11468 {
11469 case CONST:
11470 x = XEXP (x, 0);
11471
11472 if (GET_CODE (x) == PLUS)
11473 {
11474 if (!CONST_INT_P (XEXP (x, 1)))
11475 return false;
11476 x = XEXP (x, 0);
11477 }
11478
11479 if (TARGET_MACHO && darwin_local_data_pic (x))
11480 return true;
11481
11482 /* Only some unspecs are valid as "constants". */
11483 if (GET_CODE (x) == UNSPEC)
11484 switch (XINT (x, 1))
11485 {
11486 case UNSPEC_GOT:
11487 case UNSPEC_GOTOFF:
11488 case UNSPEC_PLTOFF:
11489 return TARGET_64BIT;
11490 case UNSPEC_TPOFF:
11491 case UNSPEC_NTPOFF:
11492 x = XVECEXP (x, 0, 0);
11493 return (GET_CODE (x) == SYMBOL_REF
11494 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11495 case UNSPEC_DTPOFF:
11496 x = XVECEXP (x, 0, 0);
11497 return (GET_CODE (x) == SYMBOL_REF
11498 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
11499 default:
11500 return false;
11501 }
11502
11503 /* We must have drilled down to a symbol. */
11504 if (GET_CODE (x) == LABEL_REF)
11505 return true;
11506 if (GET_CODE (x) != SYMBOL_REF)
11507 return false;
11508 /* FALLTHRU */
11509
11510 case SYMBOL_REF:
11511 /* TLS symbols are never valid. */
11512 if (SYMBOL_REF_TLS_MODEL (x))
11513 return false;
11514
11515 /* DLLIMPORT symbols are never valid. */
11516 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
11517 && SYMBOL_REF_DLLIMPORT_P (x))
11518 return false;
11519
11520 #if TARGET_MACHO
11521 /* mdynamic-no-pic */
11522 if (MACHO_DYNAMIC_NO_PIC_P)
11523 return machopic_symbol_defined_p (x);
11524 #endif
11525 break;
11526
11527 case CONST_DOUBLE:
11528 if (GET_MODE (x) == TImode
11529 && x != CONST0_RTX (TImode)
11530 && !TARGET_64BIT)
11531 return false;
11532 break;
11533
11534 case CONST_VECTOR:
11535 if (!standard_sse_constant_p (x))
11536 return false;
11537
11538 default:
11539 break;
11540 }
11541
11542 /* Otherwise we handle everything else in the move patterns. */
11543 return true;
11544 }
11545
11546 /* Determine if it's legal to put X into the constant pool. This
11547 is not possible for the address of thread-local symbols, which
11548 is checked above. */
11549
11550 static bool
11551 ix86_cannot_force_const_mem (enum machine_mode mode, rtx x)
11552 {
11553 /* We can always put integral constants and vectors in memory. */
11554 switch (GET_CODE (x))
11555 {
11556 case CONST_INT:
11557 case CONST_DOUBLE:
11558 case CONST_VECTOR:
11559 return false;
11560
11561 default:
11562 break;
11563 }
11564 return !ix86_legitimate_constant_p (mode, x);
11565 }
11566
11567
11568 /* Nonzero if the constant value X is a legitimate general operand
11569 when generating PIC code. It is given that flag_pic is on and
11570 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
11571
11572 bool
11573 legitimate_pic_operand_p (rtx x)
11574 {
11575 rtx inner;
11576
11577 switch (GET_CODE (x))
11578 {
11579 case CONST:
11580 inner = XEXP (x, 0);
11581 if (GET_CODE (inner) == PLUS
11582 && CONST_INT_P (XEXP (inner, 1)))
11583 inner = XEXP (inner, 0);
11584
11585 /* Only some unspecs are valid as "constants". */
11586 if (GET_CODE (inner) == UNSPEC)
11587 switch (XINT (inner, 1))
11588 {
11589 case UNSPEC_GOT:
11590 case UNSPEC_GOTOFF:
11591 case UNSPEC_PLTOFF:
11592 return TARGET_64BIT;
11593 case UNSPEC_TPOFF:
11594 x = XVECEXP (inner, 0, 0);
11595 return (GET_CODE (x) == SYMBOL_REF
11596 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11597 case UNSPEC_MACHOPIC_OFFSET:
11598 return legitimate_pic_address_disp_p (x);
11599 default:
11600 return false;
11601 }
11602 /* FALLTHRU */
11603
11604 case SYMBOL_REF:
11605 case LABEL_REF:
11606 return legitimate_pic_address_disp_p (x);
11607
11608 default:
11609 return true;
11610 }
11611 }
11612
11613 /* Determine if a given CONST RTX is a valid memory displacement
11614 in PIC mode. */
11615
11616 bool
11617 legitimate_pic_address_disp_p (rtx disp)
11618 {
11619 bool saw_plus;
11620
11621 /* In 64bit mode we can allow direct addresses of symbols and labels
11622 when they are not dynamic symbols. */
11623 if (TARGET_64BIT)
11624 {
11625 rtx op0 = disp, op1;
11626
11627 switch (GET_CODE (disp))
11628 {
11629 case LABEL_REF:
11630 return true;
11631
11632 case CONST:
11633 if (GET_CODE (XEXP (disp, 0)) != PLUS)
11634 break;
11635 op0 = XEXP (XEXP (disp, 0), 0);
11636 op1 = XEXP (XEXP (disp, 0), 1);
11637 if (!CONST_INT_P (op1)
11638 || INTVAL (op1) >= 16*1024*1024
11639 || INTVAL (op1) < -16*1024*1024)
11640 break;
11641 if (GET_CODE (op0) == LABEL_REF)
11642 return true;
11643 if (GET_CODE (op0) != SYMBOL_REF)
11644 break;
11645 /* FALLTHRU */
11646
11647 case SYMBOL_REF:
11648 /* TLS references should always be enclosed in UNSPEC. */
11649 if (SYMBOL_REF_TLS_MODEL (op0))
11650 return false;
11651 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
11652 && ix86_cmodel != CM_LARGE_PIC)
11653 return true;
11654 break;
11655
11656 default:
11657 break;
11658 }
11659 }
11660 if (GET_CODE (disp) != CONST)
11661 return false;
11662 disp = XEXP (disp, 0);
11663
11664 if (TARGET_64BIT)
11665 {
11666 /* We are unsafe to allow PLUS expressions. This limit allowed distance
11667 of GOT tables. We should not need these anyway. */
11668 if (GET_CODE (disp) != UNSPEC
11669 || (XINT (disp, 1) != UNSPEC_GOTPCREL
11670 && XINT (disp, 1) != UNSPEC_GOTOFF
11671 && XINT (disp, 1) != UNSPEC_PCREL
11672 && XINT (disp, 1) != UNSPEC_PLTOFF))
11673 return false;
11674
11675 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
11676 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
11677 return false;
11678 return true;
11679 }
11680
11681 saw_plus = false;
11682 if (GET_CODE (disp) == PLUS)
11683 {
11684 if (!CONST_INT_P (XEXP (disp, 1)))
11685 return false;
11686 disp = XEXP (disp, 0);
11687 saw_plus = true;
11688 }
11689
11690 if (TARGET_MACHO && darwin_local_data_pic (disp))
11691 return true;
11692
11693 if (GET_CODE (disp) != UNSPEC)
11694 return false;
11695
11696 switch (XINT (disp, 1))
11697 {
11698 case UNSPEC_GOT:
11699 if (saw_plus)
11700 return false;
11701 /* We need to check for both symbols and labels because VxWorks loads
11702 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
11703 details. */
11704 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
11705 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
11706 case UNSPEC_GOTOFF:
11707 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
11708 While ABI specify also 32bit relocation but we don't produce it in
11709 small PIC model at all. */
11710 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
11711 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
11712 && !TARGET_64BIT)
11713 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
11714 return false;
11715 case UNSPEC_GOTTPOFF:
11716 case UNSPEC_GOTNTPOFF:
11717 case UNSPEC_INDNTPOFF:
11718 if (saw_plus)
11719 return false;
11720 disp = XVECEXP (disp, 0, 0);
11721 return (GET_CODE (disp) == SYMBOL_REF
11722 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
11723 case UNSPEC_NTPOFF:
11724 disp = XVECEXP (disp, 0, 0);
11725 return (GET_CODE (disp) == SYMBOL_REF
11726 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
11727 case UNSPEC_DTPOFF:
11728 disp = XVECEXP (disp, 0, 0);
11729 return (GET_CODE (disp) == SYMBOL_REF
11730 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
11731 }
11732
11733 return false;
11734 }
11735
11736 /* Recognizes RTL expressions that are valid memory addresses for an
11737 instruction. The MODE argument is the machine mode for the MEM
11738 expression that wants to use this address.
11739
11740 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
11741 convert common non-canonical forms to canonical form so that they will
11742 be recognized. */
11743
11744 static bool
11745 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
11746 rtx addr, bool strict)
11747 {
11748 struct ix86_address parts;
11749 rtx base, index, disp;
11750 HOST_WIDE_INT scale;
11751
11752 if (ix86_decompose_address (addr, &parts) <= 0)
11753 /* Decomposition failed. */
11754 return false;
11755
11756 base = parts.base;
11757 index = parts.index;
11758 disp = parts.disp;
11759 scale = parts.scale;
11760
11761 /* Validate base register. */
11762 if (base)
11763 {
11764 rtx reg;
11765
11766 if (REG_P (base))
11767 reg = base;
11768 else if (GET_CODE (base) == SUBREG && REG_P (SUBREG_REG (base)))
11769 reg = SUBREG_REG (base);
11770 else
11771 /* Base is not a register. */
11772 return false;
11773
11774 if (GET_MODE (base) != SImode && GET_MODE (base) != DImode)
11775 return false;
11776
11777 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
11778 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
11779 /* Base is not valid. */
11780 return false;
11781 }
11782
11783 /* Validate index register. */
11784 if (index)
11785 {
11786 rtx reg;
11787
11788 if (REG_P (index))
11789 reg = index;
11790 else if (GET_CODE (index) == SUBREG && REG_P (SUBREG_REG (index)))
11791 reg = SUBREG_REG (index);
11792 else
11793 /* Index is not a register. */
11794 return false;
11795
11796 if (GET_MODE (index) != SImode && GET_MODE (index) != DImode)
11797 return false;
11798
11799 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
11800 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
11801 /* Index is not valid. */
11802 return false;
11803 }
11804
11805 /* Index and base should have the same mode. */
11806 if (base && index
11807 && GET_MODE (base) != GET_MODE (index))
11808 return false;
11809
11810 /* Validate scale factor. */
11811 if (scale != 1)
11812 {
11813 if (!index)
11814 /* Scale without index. */
11815 return false;
11816
11817 if (scale != 2 && scale != 4 && scale != 8)
11818 /* Scale is not a valid multiplier. */
11819 return false;
11820 }
11821
11822 /* Validate displacement. */
11823 if (disp)
11824 {
11825 if (GET_CODE (disp) == CONST
11826 && GET_CODE (XEXP (disp, 0)) == UNSPEC
11827 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
11828 switch (XINT (XEXP (disp, 0), 1))
11829 {
11830 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
11831 used. While ABI specify also 32bit relocations, we don't produce
11832 them at all and use IP relative instead. */
11833 case UNSPEC_GOT:
11834 case UNSPEC_GOTOFF:
11835 gcc_assert (flag_pic);
11836 if (!TARGET_64BIT)
11837 goto is_legitimate_pic;
11838
11839 /* 64bit address unspec. */
11840 return false;
11841
11842 case UNSPEC_GOTPCREL:
11843 case UNSPEC_PCREL:
11844 gcc_assert (flag_pic);
11845 goto is_legitimate_pic;
11846
11847 case UNSPEC_GOTTPOFF:
11848 case UNSPEC_GOTNTPOFF:
11849 case UNSPEC_INDNTPOFF:
11850 case UNSPEC_NTPOFF:
11851 case UNSPEC_DTPOFF:
11852 break;
11853
11854 case UNSPEC_STACK_CHECK:
11855 gcc_assert (flag_split_stack);
11856 break;
11857
11858 default:
11859 /* Invalid address unspec. */
11860 return false;
11861 }
11862
11863 else if (SYMBOLIC_CONST (disp)
11864 && (flag_pic
11865 || (TARGET_MACHO
11866 #if TARGET_MACHO
11867 && MACHOPIC_INDIRECT
11868 && !machopic_operand_p (disp)
11869 #endif
11870 )))
11871 {
11872
11873 is_legitimate_pic:
11874 if (TARGET_64BIT && (index || base))
11875 {
11876 /* foo@dtpoff(%rX) is ok. */
11877 if (GET_CODE (disp) != CONST
11878 || GET_CODE (XEXP (disp, 0)) != PLUS
11879 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
11880 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
11881 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
11882 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
11883 /* Non-constant pic memory reference. */
11884 return false;
11885 }
11886 else if ((!TARGET_MACHO || flag_pic)
11887 && ! legitimate_pic_address_disp_p (disp))
11888 /* Displacement is an invalid pic construct. */
11889 return false;
11890 #if TARGET_MACHO
11891 else if (MACHO_DYNAMIC_NO_PIC_P
11892 && !ix86_legitimate_constant_p (Pmode, disp))
11893 /* displacment must be referenced via non_lazy_pointer */
11894 return false;
11895 #endif
11896
11897 /* This code used to verify that a symbolic pic displacement
11898 includes the pic_offset_table_rtx register.
11899
11900 While this is good idea, unfortunately these constructs may
11901 be created by "adds using lea" optimization for incorrect
11902 code like:
11903
11904 int a;
11905 int foo(int i)
11906 {
11907 return *(&a+i);
11908 }
11909
11910 This code is nonsensical, but results in addressing
11911 GOT table with pic_offset_table_rtx base. We can't
11912 just refuse it easily, since it gets matched by
11913 "addsi3" pattern, that later gets split to lea in the
11914 case output register differs from input. While this
11915 can be handled by separate addsi pattern for this case
11916 that never results in lea, this seems to be easier and
11917 correct fix for crash to disable this test. */
11918 }
11919 else if (GET_CODE (disp) != LABEL_REF
11920 && !CONST_INT_P (disp)
11921 && (GET_CODE (disp) != CONST
11922 || !ix86_legitimate_constant_p (Pmode, disp))
11923 && (GET_CODE (disp) != SYMBOL_REF
11924 || !ix86_legitimate_constant_p (Pmode, disp)))
11925 /* Displacement is not constant. */
11926 return false;
11927 else if (TARGET_64BIT
11928 && !x86_64_immediate_operand (disp, VOIDmode))
11929 /* Displacement is out of range. */
11930 return false;
11931 }
11932
11933 /* Everything looks valid. */
11934 return true;
11935 }
11936
11937 /* Determine if a given RTX is a valid constant address. */
11938
11939 bool
11940 constant_address_p (rtx x)
11941 {
11942 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
11943 }
11944 \f
11945 /* Return a unique alias set for the GOT. */
11946
11947 static alias_set_type
11948 ix86_GOT_alias_set (void)
11949 {
11950 static alias_set_type set = -1;
11951 if (set == -1)
11952 set = new_alias_set ();
11953 return set;
11954 }
11955
11956 /* Return a legitimate reference for ORIG (an address) using the
11957 register REG. If REG is 0, a new pseudo is generated.
11958
11959 There are two types of references that must be handled:
11960
11961 1. Global data references must load the address from the GOT, via
11962 the PIC reg. An insn is emitted to do this load, and the reg is
11963 returned.
11964
11965 2. Static data references, constant pool addresses, and code labels
11966 compute the address as an offset from the GOT, whose base is in
11967 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
11968 differentiate them from global data objects. The returned
11969 address is the PIC reg + an unspec constant.
11970
11971 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
11972 reg also appears in the address. */
11973
11974 static rtx
11975 legitimize_pic_address (rtx orig, rtx reg)
11976 {
11977 rtx addr = orig;
11978 rtx new_rtx = orig;
11979 rtx base;
11980
11981 #if TARGET_MACHO
11982 if (TARGET_MACHO && !TARGET_64BIT)
11983 {
11984 if (reg == 0)
11985 reg = gen_reg_rtx (Pmode);
11986 /* Use the generic Mach-O PIC machinery. */
11987 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
11988 }
11989 #endif
11990
11991 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
11992 new_rtx = addr;
11993 else if (TARGET_64BIT
11994 && ix86_cmodel != CM_SMALL_PIC
11995 && gotoff_operand (addr, Pmode))
11996 {
11997 rtx tmpreg;
11998 /* This symbol may be referenced via a displacement from the PIC
11999 base address (@GOTOFF). */
12000
12001 if (reload_in_progress)
12002 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12003 if (GET_CODE (addr) == CONST)
12004 addr = XEXP (addr, 0);
12005 if (GET_CODE (addr) == PLUS)
12006 {
12007 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
12008 UNSPEC_GOTOFF);
12009 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
12010 }
12011 else
12012 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
12013 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12014 if (!reg)
12015 tmpreg = gen_reg_rtx (Pmode);
12016 else
12017 tmpreg = reg;
12018 emit_move_insn (tmpreg, new_rtx);
12019
12020 if (reg != 0)
12021 {
12022 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
12023 tmpreg, 1, OPTAB_DIRECT);
12024 new_rtx = reg;
12025 }
12026 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
12027 }
12028 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
12029 {
12030 /* This symbol may be referenced via a displacement from the PIC
12031 base address (@GOTOFF). */
12032
12033 if (reload_in_progress)
12034 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12035 if (GET_CODE (addr) == CONST)
12036 addr = XEXP (addr, 0);
12037 if (GET_CODE (addr) == PLUS)
12038 {
12039 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
12040 UNSPEC_GOTOFF);
12041 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
12042 }
12043 else
12044 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
12045 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12046 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12047
12048 if (reg != 0)
12049 {
12050 emit_move_insn (reg, new_rtx);
12051 new_rtx = reg;
12052 }
12053 }
12054 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
12055 /* We can't use @GOTOFF for text labels on VxWorks;
12056 see gotoff_operand. */
12057 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
12058 {
12059 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
12060 {
12061 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
12062 return legitimize_dllimport_symbol (addr, true);
12063 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
12064 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
12065 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
12066 {
12067 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
12068 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
12069 }
12070 }
12071
12072 /* For x64 PE-COFF there is no GOT table. So we use address
12073 directly. */
12074 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
12075 {
12076 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
12077 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12078
12079 if (reg == 0)
12080 reg = gen_reg_rtx (Pmode);
12081 emit_move_insn (reg, new_rtx);
12082 new_rtx = reg;
12083 }
12084 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
12085 {
12086 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
12087 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12088 new_rtx = gen_const_mem (Pmode, new_rtx);
12089 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
12090
12091 if (reg == 0)
12092 reg = gen_reg_rtx (Pmode);
12093 /* Use directly gen_movsi, otherwise the address is loaded
12094 into register for CSE. We don't want to CSE this addresses,
12095 instead we CSE addresses from the GOT table, so skip this. */
12096 emit_insn (gen_movsi (reg, new_rtx));
12097 new_rtx = reg;
12098 }
12099 else
12100 {
12101 /* This symbol must be referenced via a load from the
12102 Global Offset Table (@GOT). */
12103
12104 if (reload_in_progress)
12105 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12106 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
12107 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12108 if (TARGET_64BIT)
12109 new_rtx = force_reg (Pmode, new_rtx);
12110 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12111 new_rtx = gen_const_mem (Pmode, new_rtx);
12112 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
12113
12114 if (reg == 0)
12115 reg = gen_reg_rtx (Pmode);
12116 emit_move_insn (reg, new_rtx);
12117 new_rtx = reg;
12118 }
12119 }
12120 else
12121 {
12122 if (CONST_INT_P (addr)
12123 && !x86_64_immediate_operand (addr, VOIDmode))
12124 {
12125 if (reg)
12126 {
12127 emit_move_insn (reg, addr);
12128 new_rtx = reg;
12129 }
12130 else
12131 new_rtx = force_reg (Pmode, addr);
12132 }
12133 else if (GET_CODE (addr) == CONST)
12134 {
12135 addr = XEXP (addr, 0);
12136
12137 /* We must match stuff we generate before. Assume the only
12138 unspecs that can get here are ours. Not that we could do
12139 anything with them anyway.... */
12140 if (GET_CODE (addr) == UNSPEC
12141 || (GET_CODE (addr) == PLUS
12142 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
12143 return orig;
12144 gcc_assert (GET_CODE (addr) == PLUS);
12145 }
12146 if (GET_CODE (addr) == PLUS)
12147 {
12148 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
12149
12150 /* Check first to see if this is a constant offset from a @GOTOFF
12151 symbol reference. */
12152 if (gotoff_operand (op0, Pmode)
12153 && CONST_INT_P (op1))
12154 {
12155 if (!TARGET_64BIT)
12156 {
12157 if (reload_in_progress)
12158 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12159 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
12160 UNSPEC_GOTOFF);
12161 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
12162 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12163 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12164
12165 if (reg != 0)
12166 {
12167 emit_move_insn (reg, new_rtx);
12168 new_rtx = reg;
12169 }
12170 }
12171 else
12172 {
12173 if (INTVAL (op1) < -16*1024*1024
12174 || INTVAL (op1) >= 16*1024*1024)
12175 {
12176 if (!x86_64_immediate_operand (op1, Pmode))
12177 op1 = force_reg (Pmode, op1);
12178 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
12179 }
12180 }
12181 }
12182 else
12183 {
12184 base = legitimize_pic_address (XEXP (addr, 0), reg);
12185 new_rtx = legitimize_pic_address (XEXP (addr, 1),
12186 base == reg ? NULL_RTX : reg);
12187
12188 if (CONST_INT_P (new_rtx))
12189 new_rtx = plus_constant (base, INTVAL (new_rtx));
12190 else
12191 {
12192 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
12193 {
12194 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
12195 new_rtx = XEXP (new_rtx, 1);
12196 }
12197 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
12198 }
12199 }
12200 }
12201 }
12202 return new_rtx;
12203 }
12204 \f
12205 /* Load the thread pointer. If TO_REG is true, force it into a register. */
12206
12207 static rtx
12208 get_thread_pointer (bool to_reg)
12209 {
12210 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
12211
12212 if (GET_MODE (tp) != Pmode)
12213 tp = convert_to_mode (Pmode, tp, 1);
12214
12215 if (to_reg)
12216 tp = copy_addr_to_reg (tp);
12217
12218 return tp;
12219 }
12220
12221 /* Construct the SYMBOL_REF for the tls_get_addr function. */
12222
12223 static GTY(()) rtx ix86_tls_symbol;
12224
12225 static rtx
12226 ix86_tls_get_addr (void)
12227 {
12228 if (!ix86_tls_symbol)
12229 {
12230 const char *sym
12231 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
12232 ? "___tls_get_addr" : "__tls_get_addr");
12233
12234 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
12235 }
12236
12237 return ix86_tls_symbol;
12238 }
12239
12240 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
12241
12242 static GTY(()) rtx ix86_tls_module_base_symbol;
12243
12244 rtx
12245 ix86_tls_module_base (void)
12246 {
12247 if (!ix86_tls_module_base_symbol)
12248 {
12249 ix86_tls_module_base_symbol
12250 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
12251
12252 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
12253 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
12254 }
12255
12256 return ix86_tls_module_base_symbol;
12257 }
12258
12259 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
12260 false if we expect this to be used for a memory address and true if
12261 we expect to load the address into a register. */
12262
12263 static rtx
12264 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
12265 {
12266 rtx dest, base, off;
12267 rtx pic = NULL_RTX, tp = NULL_RTX;
12268 int type;
12269
12270 switch (model)
12271 {
12272 case TLS_MODEL_GLOBAL_DYNAMIC:
12273 dest = gen_reg_rtx (Pmode);
12274
12275 if (!TARGET_64BIT)
12276 {
12277 if (flag_pic)
12278 pic = pic_offset_table_rtx;
12279 else
12280 {
12281 pic = gen_reg_rtx (Pmode);
12282 emit_insn (gen_set_got (pic));
12283 }
12284 }
12285
12286 if (TARGET_GNU2_TLS)
12287 {
12288 if (TARGET_64BIT)
12289 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
12290 else
12291 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
12292
12293 tp = get_thread_pointer (true);
12294 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
12295
12296 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
12297 }
12298 else
12299 {
12300 rtx caddr = ix86_tls_get_addr ();
12301
12302 if (TARGET_64BIT)
12303 {
12304 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
12305
12306 start_sequence ();
12307 emit_call_insn (gen_tls_global_dynamic_64 (rax, x, caddr));
12308 insns = get_insns ();
12309 end_sequence ();
12310
12311 RTL_CONST_CALL_P (insns) = 1;
12312 emit_libcall_block (insns, dest, rax, x);
12313 }
12314 else
12315 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
12316 }
12317 break;
12318
12319 case TLS_MODEL_LOCAL_DYNAMIC:
12320 base = gen_reg_rtx (Pmode);
12321
12322 if (!TARGET_64BIT)
12323 {
12324 if (flag_pic)
12325 pic = pic_offset_table_rtx;
12326 else
12327 {
12328 pic = gen_reg_rtx (Pmode);
12329 emit_insn (gen_set_got (pic));
12330 }
12331 }
12332
12333 if (TARGET_GNU2_TLS)
12334 {
12335 rtx tmp = ix86_tls_module_base ();
12336
12337 if (TARGET_64BIT)
12338 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
12339 else
12340 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
12341
12342 tp = get_thread_pointer (true);
12343 set_unique_reg_note (get_last_insn (), REG_EQUAL,
12344 gen_rtx_MINUS (Pmode, tmp, tp));
12345 }
12346 else
12347 {
12348 rtx caddr = ix86_tls_get_addr ();
12349
12350 if (TARGET_64BIT)
12351 {
12352 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, eqv;
12353
12354 start_sequence ();
12355 emit_call_insn (gen_tls_local_dynamic_base_64 (rax, caddr));
12356 insns = get_insns ();
12357 end_sequence ();
12358
12359 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
12360 share the LD_BASE result with other LD model accesses. */
12361 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
12362 UNSPEC_TLS_LD_BASE);
12363
12364 RTL_CONST_CALL_P (insns) = 1;
12365 emit_libcall_block (insns, base, rax, eqv);
12366 }
12367 else
12368 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
12369 }
12370
12371 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
12372 off = gen_rtx_CONST (Pmode, off);
12373
12374 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
12375
12376 if (TARGET_GNU2_TLS)
12377 {
12378 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
12379
12380 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
12381 }
12382 break;
12383
12384 case TLS_MODEL_INITIAL_EXEC:
12385 if (TARGET_64BIT)
12386 {
12387 if (TARGET_SUN_TLS)
12388 {
12389 /* The Sun linker took the AMD64 TLS spec literally
12390 and can only handle %rax as destination of the
12391 initial executable code sequence. */
12392
12393 dest = gen_reg_rtx (Pmode);
12394 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
12395 return dest;
12396 }
12397
12398 pic = NULL;
12399 type = UNSPEC_GOTNTPOFF;
12400 }
12401 else if (flag_pic)
12402 {
12403 if (reload_in_progress)
12404 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12405 pic = pic_offset_table_rtx;
12406 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
12407 }
12408 else if (!TARGET_ANY_GNU_TLS)
12409 {
12410 pic = gen_reg_rtx (Pmode);
12411 emit_insn (gen_set_got (pic));
12412 type = UNSPEC_GOTTPOFF;
12413 }
12414 else
12415 {
12416 pic = NULL;
12417 type = UNSPEC_INDNTPOFF;
12418 }
12419
12420 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
12421 off = gen_rtx_CONST (Pmode, off);
12422 if (pic)
12423 off = gen_rtx_PLUS (Pmode, pic, off);
12424 off = gen_const_mem (Pmode, off);
12425 set_mem_alias_set (off, ix86_GOT_alias_set ());
12426
12427 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12428 {
12429 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12430 off = force_reg (Pmode, off);
12431 return gen_rtx_PLUS (Pmode, base, off);
12432 }
12433 else
12434 {
12435 base = get_thread_pointer (true);
12436 dest = gen_reg_rtx (Pmode);
12437 emit_insn (gen_subsi3 (dest, base, off));
12438 }
12439 break;
12440
12441 case TLS_MODEL_LOCAL_EXEC:
12442 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
12443 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12444 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
12445 off = gen_rtx_CONST (Pmode, off);
12446
12447 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12448 {
12449 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12450 return gen_rtx_PLUS (Pmode, base, off);
12451 }
12452 else
12453 {
12454 base = get_thread_pointer (true);
12455 dest = gen_reg_rtx (Pmode);
12456 emit_insn (gen_subsi3 (dest, base, off));
12457 }
12458 break;
12459
12460 default:
12461 gcc_unreachable ();
12462 }
12463
12464 return dest;
12465 }
12466
12467 /* Create or return the unique __imp_DECL dllimport symbol corresponding
12468 to symbol DECL. */
12469
12470 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
12471 htab_t dllimport_map;
12472
12473 static tree
12474 get_dllimport_decl (tree decl)
12475 {
12476 struct tree_map *h, in;
12477 void **loc;
12478 const char *name;
12479 const char *prefix;
12480 size_t namelen, prefixlen;
12481 char *imp_name;
12482 tree to;
12483 rtx rtl;
12484
12485 if (!dllimport_map)
12486 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
12487
12488 in.hash = htab_hash_pointer (decl);
12489 in.base.from = decl;
12490 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
12491 h = (struct tree_map *) *loc;
12492 if (h)
12493 return h->to;
12494
12495 *loc = h = ggc_alloc_tree_map ();
12496 h->hash = in.hash;
12497 h->base.from = decl;
12498 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
12499 VAR_DECL, NULL, ptr_type_node);
12500 DECL_ARTIFICIAL (to) = 1;
12501 DECL_IGNORED_P (to) = 1;
12502 DECL_EXTERNAL (to) = 1;
12503 TREE_READONLY (to) = 1;
12504
12505 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
12506 name = targetm.strip_name_encoding (name);
12507 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
12508 ? "*__imp_" : "*__imp__";
12509 namelen = strlen (name);
12510 prefixlen = strlen (prefix);
12511 imp_name = (char *) alloca (namelen + prefixlen + 1);
12512 memcpy (imp_name, prefix, prefixlen);
12513 memcpy (imp_name + prefixlen, name, namelen + 1);
12514
12515 name = ggc_alloc_string (imp_name, namelen + prefixlen);
12516 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
12517 SET_SYMBOL_REF_DECL (rtl, to);
12518 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
12519
12520 rtl = gen_const_mem (Pmode, rtl);
12521 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
12522
12523 SET_DECL_RTL (to, rtl);
12524 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
12525
12526 return to;
12527 }
12528
12529 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
12530 true if we require the result be a register. */
12531
12532 static rtx
12533 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
12534 {
12535 tree imp_decl;
12536 rtx x;
12537
12538 gcc_assert (SYMBOL_REF_DECL (symbol));
12539 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
12540
12541 x = DECL_RTL (imp_decl);
12542 if (want_reg)
12543 x = force_reg (Pmode, x);
12544 return x;
12545 }
12546
12547 /* Try machine-dependent ways of modifying an illegitimate address
12548 to be legitimate. If we find one, return the new, valid address.
12549 This macro is used in only one place: `memory_address' in explow.c.
12550
12551 OLDX is the address as it was before break_out_memory_refs was called.
12552 In some cases it is useful to look at this to decide what needs to be done.
12553
12554 It is always safe for this macro to do nothing. It exists to recognize
12555 opportunities to optimize the output.
12556
12557 For the 80386, we handle X+REG by loading X into a register R and
12558 using R+REG. R will go in a general reg and indexing will be used.
12559 However, if REG is a broken-out memory address or multiplication,
12560 nothing needs to be done because REG can certainly go in a general reg.
12561
12562 When -fpic is used, special handling is needed for symbolic references.
12563 See comments by legitimize_pic_address in i386.c for details. */
12564
12565 static rtx
12566 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
12567 enum machine_mode mode)
12568 {
12569 int changed = 0;
12570 unsigned log;
12571
12572 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
12573 if (log)
12574 return legitimize_tls_address (x, (enum tls_model) log, false);
12575 if (GET_CODE (x) == CONST
12576 && GET_CODE (XEXP (x, 0)) == PLUS
12577 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
12578 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
12579 {
12580 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
12581 (enum tls_model) log, false);
12582 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
12583 }
12584
12585 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
12586 {
12587 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
12588 return legitimize_dllimport_symbol (x, true);
12589 if (GET_CODE (x) == CONST
12590 && GET_CODE (XEXP (x, 0)) == PLUS
12591 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
12592 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
12593 {
12594 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
12595 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
12596 }
12597 }
12598
12599 if (flag_pic && SYMBOLIC_CONST (x))
12600 return legitimize_pic_address (x, 0);
12601
12602 #if TARGET_MACHO
12603 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
12604 return machopic_indirect_data_reference (x, 0);
12605 #endif
12606
12607 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
12608 if (GET_CODE (x) == ASHIFT
12609 && CONST_INT_P (XEXP (x, 1))
12610 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
12611 {
12612 changed = 1;
12613 log = INTVAL (XEXP (x, 1));
12614 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
12615 GEN_INT (1 << log));
12616 }
12617
12618 if (GET_CODE (x) == PLUS)
12619 {
12620 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
12621
12622 if (GET_CODE (XEXP (x, 0)) == ASHIFT
12623 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
12624 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
12625 {
12626 changed = 1;
12627 log = INTVAL (XEXP (XEXP (x, 0), 1));
12628 XEXP (x, 0) = gen_rtx_MULT (Pmode,
12629 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
12630 GEN_INT (1 << log));
12631 }
12632
12633 if (GET_CODE (XEXP (x, 1)) == ASHIFT
12634 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
12635 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
12636 {
12637 changed = 1;
12638 log = INTVAL (XEXP (XEXP (x, 1), 1));
12639 XEXP (x, 1) = gen_rtx_MULT (Pmode,
12640 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
12641 GEN_INT (1 << log));
12642 }
12643
12644 /* Put multiply first if it isn't already. */
12645 if (GET_CODE (XEXP (x, 1)) == MULT)
12646 {
12647 rtx tmp = XEXP (x, 0);
12648 XEXP (x, 0) = XEXP (x, 1);
12649 XEXP (x, 1) = tmp;
12650 changed = 1;
12651 }
12652
12653 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
12654 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
12655 created by virtual register instantiation, register elimination, and
12656 similar optimizations. */
12657 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
12658 {
12659 changed = 1;
12660 x = gen_rtx_PLUS (Pmode,
12661 gen_rtx_PLUS (Pmode, XEXP (x, 0),
12662 XEXP (XEXP (x, 1), 0)),
12663 XEXP (XEXP (x, 1), 1));
12664 }
12665
12666 /* Canonicalize
12667 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
12668 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
12669 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
12670 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
12671 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
12672 && CONSTANT_P (XEXP (x, 1)))
12673 {
12674 rtx constant;
12675 rtx other = NULL_RTX;
12676
12677 if (CONST_INT_P (XEXP (x, 1)))
12678 {
12679 constant = XEXP (x, 1);
12680 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
12681 }
12682 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
12683 {
12684 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
12685 other = XEXP (x, 1);
12686 }
12687 else
12688 constant = 0;
12689
12690 if (constant)
12691 {
12692 changed = 1;
12693 x = gen_rtx_PLUS (Pmode,
12694 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
12695 XEXP (XEXP (XEXP (x, 0), 1), 0)),
12696 plus_constant (other, INTVAL (constant)));
12697 }
12698 }
12699
12700 if (changed && ix86_legitimate_address_p (mode, x, false))
12701 return x;
12702
12703 if (GET_CODE (XEXP (x, 0)) == MULT)
12704 {
12705 changed = 1;
12706 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
12707 }
12708
12709 if (GET_CODE (XEXP (x, 1)) == MULT)
12710 {
12711 changed = 1;
12712 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
12713 }
12714
12715 if (changed
12716 && REG_P (XEXP (x, 1))
12717 && REG_P (XEXP (x, 0)))
12718 return x;
12719
12720 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
12721 {
12722 changed = 1;
12723 x = legitimize_pic_address (x, 0);
12724 }
12725
12726 if (changed && ix86_legitimate_address_p (mode, x, false))
12727 return x;
12728
12729 if (REG_P (XEXP (x, 0)))
12730 {
12731 rtx temp = gen_reg_rtx (Pmode);
12732 rtx val = force_operand (XEXP (x, 1), temp);
12733 if (val != temp)
12734 {
12735 if (GET_MODE (val) != Pmode)
12736 val = convert_to_mode (Pmode, val, 1);
12737 emit_move_insn (temp, val);
12738 }
12739
12740 XEXP (x, 1) = temp;
12741 return x;
12742 }
12743
12744 else if (REG_P (XEXP (x, 1)))
12745 {
12746 rtx temp = gen_reg_rtx (Pmode);
12747 rtx val = force_operand (XEXP (x, 0), temp);
12748 if (val != temp)
12749 {
12750 if (GET_MODE (val) != Pmode)
12751 val = convert_to_mode (Pmode, val, 1);
12752 emit_move_insn (temp, val);
12753 }
12754
12755 XEXP (x, 0) = temp;
12756 return x;
12757 }
12758 }
12759
12760 return x;
12761 }
12762 \f
12763 /* Print an integer constant expression in assembler syntax. Addition
12764 and subtraction are the only arithmetic that may appear in these
12765 expressions. FILE is the stdio stream to write to, X is the rtx, and
12766 CODE is the operand print code from the output string. */
12767
12768 static void
12769 output_pic_addr_const (FILE *file, rtx x, int code)
12770 {
12771 char buf[256];
12772
12773 switch (GET_CODE (x))
12774 {
12775 case PC:
12776 gcc_assert (flag_pic);
12777 putc ('.', file);
12778 break;
12779
12780 case SYMBOL_REF:
12781 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
12782 output_addr_const (file, x);
12783 else
12784 {
12785 const char *name = XSTR (x, 0);
12786
12787 /* Mark the decl as referenced so that cgraph will
12788 output the function. */
12789 if (SYMBOL_REF_DECL (x))
12790 mark_decl_referenced (SYMBOL_REF_DECL (x));
12791
12792 #if TARGET_MACHO
12793 if (MACHOPIC_INDIRECT
12794 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
12795 name = machopic_indirection_name (x, /*stub_p=*/true);
12796 #endif
12797 assemble_name (file, name);
12798 }
12799 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
12800 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
12801 fputs ("@PLT", file);
12802 break;
12803
12804 case LABEL_REF:
12805 x = XEXP (x, 0);
12806 /* FALLTHRU */
12807 case CODE_LABEL:
12808 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
12809 assemble_name (asm_out_file, buf);
12810 break;
12811
12812 case CONST_INT:
12813 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
12814 break;
12815
12816 case CONST:
12817 /* This used to output parentheses around the expression,
12818 but that does not work on the 386 (either ATT or BSD assembler). */
12819 output_pic_addr_const (file, XEXP (x, 0), code);
12820 break;
12821
12822 case CONST_DOUBLE:
12823 if (GET_MODE (x) == VOIDmode)
12824 {
12825 /* We can use %d if the number is <32 bits and positive. */
12826 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
12827 fprintf (file, "0x%lx%08lx",
12828 (unsigned long) CONST_DOUBLE_HIGH (x),
12829 (unsigned long) CONST_DOUBLE_LOW (x));
12830 else
12831 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
12832 }
12833 else
12834 /* We can't handle floating point constants;
12835 TARGET_PRINT_OPERAND must handle them. */
12836 output_operand_lossage ("floating constant misused");
12837 break;
12838
12839 case PLUS:
12840 /* Some assemblers need integer constants to appear first. */
12841 if (CONST_INT_P (XEXP (x, 0)))
12842 {
12843 output_pic_addr_const (file, XEXP (x, 0), code);
12844 putc ('+', file);
12845 output_pic_addr_const (file, XEXP (x, 1), code);
12846 }
12847 else
12848 {
12849 gcc_assert (CONST_INT_P (XEXP (x, 1)));
12850 output_pic_addr_const (file, XEXP (x, 1), code);
12851 putc ('+', file);
12852 output_pic_addr_const (file, XEXP (x, 0), code);
12853 }
12854 break;
12855
12856 case MINUS:
12857 if (!TARGET_MACHO)
12858 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
12859 output_pic_addr_const (file, XEXP (x, 0), code);
12860 putc ('-', file);
12861 output_pic_addr_const (file, XEXP (x, 1), code);
12862 if (!TARGET_MACHO)
12863 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
12864 break;
12865
12866 case UNSPEC:
12867 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
12868 {
12869 bool f = i386_asm_output_addr_const_extra (file, x);
12870 gcc_assert (f);
12871 break;
12872 }
12873
12874 gcc_assert (XVECLEN (x, 0) == 1);
12875 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
12876 switch (XINT (x, 1))
12877 {
12878 case UNSPEC_GOT:
12879 fputs ("@GOT", file);
12880 break;
12881 case UNSPEC_GOTOFF:
12882 fputs ("@GOTOFF", file);
12883 break;
12884 case UNSPEC_PLTOFF:
12885 fputs ("@PLTOFF", file);
12886 break;
12887 case UNSPEC_PCREL:
12888 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12889 "(%rip)" : "[rip]", file);
12890 break;
12891 case UNSPEC_GOTPCREL:
12892 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12893 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
12894 break;
12895 case UNSPEC_GOTTPOFF:
12896 /* FIXME: This might be @TPOFF in Sun ld too. */
12897 fputs ("@gottpoff", file);
12898 break;
12899 case UNSPEC_TPOFF:
12900 fputs ("@tpoff", file);
12901 break;
12902 case UNSPEC_NTPOFF:
12903 if (TARGET_64BIT)
12904 fputs ("@tpoff", file);
12905 else
12906 fputs ("@ntpoff", file);
12907 break;
12908 case UNSPEC_DTPOFF:
12909 fputs ("@dtpoff", file);
12910 break;
12911 case UNSPEC_GOTNTPOFF:
12912 if (TARGET_64BIT)
12913 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12914 "@gottpoff(%rip)": "@gottpoff[rip]", file);
12915 else
12916 fputs ("@gotntpoff", file);
12917 break;
12918 case UNSPEC_INDNTPOFF:
12919 fputs ("@indntpoff", file);
12920 break;
12921 #if TARGET_MACHO
12922 case UNSPEC_MACHOPIC_OFFSET:
12923 putc ('-', file);
12924 machopic_output_function_base_name (file);
12925 break;
12926 #endif
12927 default:
12928 output_operand_lossage ("invalid UNSPEC as operand");
12929 break;
12930 }
12931 break;
12932
12933 default:
12934 output_operand_lossage ("invalid expression as operand");
12935 }
12936 }
12937
12938 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
12939 We need to emit DTP-relative relocations. */
12940
12941 static void ATTRIBUTE_UNUSED
12942 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
12943 {
12944 fputs (ASM_LONG, file);
12945 output_addr_const (file, x);
12946 fputs ("@dtpoff", file);
12947 switch (size)
12948 {
12949 case 4:
12950 break;
12951 case 8:
12952 fputs (", 0", file);
12953 break;
12954 default:
12955 gcc_unreachable ();
12956 }
12957 }
12958
12959 /* Return true if X is a representation of the PIC register. This copes
12960 with calls from ix86_find_base_term, where the register might have
12961 been replaced by a cselib value. */
12962
12963 static bool
12964 ix86_pic_register_p (rtx x)
12965 {
12966 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
12967 return (pic_offset_table_rtx
12968 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
12969 else
12970 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
12971 }
12972
12973 /* Helper function for ix86_delegitimize_address.
12974 Attempt to delegitimize TLS local-exec accesses. */
12975
12976 static rtx
12977 ix86_delegitimize_tls_address (rtx orig_x)
12978 {
12979 rtx x = orig_x, unspec;
12980 struct ix86_address addr;
12981
12982 if (!TARGET_TLS_DIRECT_SEG_REFS)
12983 return orig_x;
12984 if (MEM_P (x))
12985 x = XEXP (x, 0);
12986 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
12987 return orig_x;
12988 if (ix86_decompose_address (x, &addr) == 0
12989 || addr.seg != (TARGET_64BIT ? SEG_FS : SEG_GS)
12990 || addr.disp == NULL_RTX
12991 || GET_CODE (addr.disp) != CONST)
12992 return orig_x;
12993 unspec = XEXP (addr.disp, 0);
12994 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
12995 unspec = XEXP (unspec, 0);
12996 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
12997 return orig_x;
12998 x = XVECEXP (unspec, 0, 0);
12999 gcc_assert (GET_CODE (x) == SYMBOL_REF);
13000 if (unspec != XEXP (addr.disp, 0))
13001 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
13002 if (addr.index)
13003 {
13004 rtx idx = addr.index;
13005 if (addr.scale != 1)
13006 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
13007 x = gen_rtx_PLUS (Pmode, idx, x);
13008 }
13009 if (addr.base)
13010 x = gen_rtx_PLUS (Pmode, addr.base, x);
13011 if (MEM_P (orig_x))
13012 x = replace_equiv_address_nv (orig_x, x);
13013 return x;
13014 }
13015
13016 /* In the name of slightly smaller debug output, and to cater to
13017 general assembler lossage, recognize PIC+GOTOFF and turn it back
13018 into a direct symbol reference.
13019
13020 On Darwin, this is necessary to avoid a crash, because Darwin
13021 has a different PIC label for each routine but the DWARF debugging
13022 information is not associated with any particular routine, so it's
13023 necessary to remove references to the PIC label from RTL stored by
13024 the DWARF output code. */
13025
13026 static rtx
13027 ix86_delegitimize_address (rtx x)
13028 {
13029 rtx orig_x = delegitimize_mem_from_attrs (x);
13030 /* addend is NULL or some rtx if x is something+GOTOFF where
13031 something doesn't include the PIC register. */
13032 rtx addend = NULL_RTX;
13033 /* reg_addend is NULL or a multiple of some register. */
13034 rtx reg_addend = NULL_RTX;
13035 /* const_addend is NULL or a const_int. */
13036 rtx const_addend = NULL_RTX;
13037 /* This is the result, or NULL. */
13038 rtx result = NULL_RTX;
13039
13040 x = orig_x;
13041
13042 if (MEM_P (x))
13043 x = XEXP (x, 0);
13044
13045 if (TARGET_64BIT)
13046 {
13047 if (GET_CODE (x) != CONST
13048 || GET_CODE (XEXP (x, 0)) != UNSPEC
13049 || (XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
13050 && XINT (XEXP (x, 0), 1) != UNSPEC_PCREL)
13051 || !MEM_P (orig_x))
13052 return ix86_delegitimize_tls_address (orig_x);
13053 x = XVECEXP (XEXP (x, 0), 0, 0);
13054 if (GET_MODE (orig_x) != GET_MODE (x))
13055 {
13056 x = simplify_gen_subreg (GET_MODE (orig_x), x,
13057 GET_MODE (x), 0);
13058 if (x == NULL_RTX)
13059 return orig_x;
13060 }
13061 return x;
13062 }
13063
13064 if (GET_CODE (x) != PLUS
13065 || GET_CODE (XEXP (x, 1)) != CONST)
13066 return ix86_delegitimize_tls_address (orig_x);
13067
13068 if (ix86_pic_register_p (XEXP (x, 0)))
13069 /* %ebx + GOT/GOTOFF */
13070 ;
13071 else if (GET_CODE (XEXP (x, 0)) == PLUS)
13072 {
13073 /* %ebx + %reg * scale + GOT/GOTOFF */
13074 reg_addend = XEXP (x, 0);
13075 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
13076 reg_addend = XEXP (reg_addend, 1);
13077 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
13078 reg_addend = XEXP (reg_addend, 0);
13079 else
13080 {
13081 reg_addend = NULL_RTX;
13082 addend = XEXP (x, 0);
13083 }
13084 }
13085 else
13086 addend = XEXP (x, 0);
13087
13088 x = XEXP (XEXP (x, 1), 0);
13089 if (GET_CODE (x) == PLUS
13090 && CONST_INT_P (XEXP (x, 1)))
13091 {
13092 const_addend = XEXP (x, 1);
13093 x = XEXP (x, 0);
13094 }
13095
13096 if (GET_CODE (x) == UNSPEC
13097 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
13098 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
13099 result = XVECEXP (x, 0, 0);
13100
13101 if (TARGET_MACHO && darwin_local_data_pic (x)
13102 && !MEM_P (orig_x))
13103 result = XVECEXP (x, 0, 0);
13104
13105 if (! result)
13106 return ix86_delegitimize_tls_address (orig_x);
13107
13108 if (const_addend)
13109 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
13110 if (reg_addend)
13111 result = gen_rtx_PLUS (Pmode, reg_addend, result);
13112 if (addend)
13113 {
13114 /* If the rest of original X doesn't involve the PIC register, add
13115 addend and subtract pic_offset_table_rtx. This can happen e.g.
13116 for code like:
13117 leal (%ebx, %ecx, 4), %ecx
13118 ...
13119 movl foo@GOTOFF(%ecx), %edx
13120 in which case we return (%ecx - %ebx) + foo. */
13121 if (pic_offset_table_rtx)
13122 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
13123 pic_offset_table_rtx),
13124 result);
13125 else
13126 return orig_x;
13127 }
13128 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
13129 {
13130 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
13131 if (result == NULL_RTX)
13132 return orig_x;
13133 }
13134 return result;
13135 }
13136
13137 /* If X is a machine specific address (i.e. a symbol or label being
13138 referenced as a displacement from the GOT implemented using an
13139 UNSPEC), then return the base term. Otherwise return X. */
13140
13141 rtx
13142 ix86_find_base_term (rtx x)
13143 {
13144 rtx term;
13145
13146 if (TARGET_64BIT)
13147 {
13148 if (GET_CODE (x) != CONST)
13149 return x;
13150 term = XEXP (x, 0);
13151 if (GET_CODE (term) == PLUS
13152 && (CONST_INT_P (XEXP (term, 1))
13153 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
13154 term = XEXP (term, 0);
13155 if (GET_CODE (term) != UNSPEC
13156 || (XINT (term, 1) != UNSPEC_GOTPCREL
13157 && XINT (term, 1) != UNSPEC_PCREL))
13158 return x;
13159
13160 return XVECEXP (term, 0, 0);
13161 }
13162
13163 return ix86_delegitimize_address (x);
13164 }
13165 \f
13166 static void
13167 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
13168 int fp, FILE *file)
13169 {
13170 const char *suffix;
13171
13172 if (mode == CCFPmode || mode == CCFPUmode)
13173 {
13174 code = ix86_fp_compare_code_to_integer (code);
13175 mode = CCmode;
13176 }
13177 if (reverse)
13178 code = reverse_condition (code);
13179
13180 switch (code)
13181 {
13182 case EQ:
13183 switch (mode)
13184 {
13185 case CCAmode:
13186 suffix = "a";
13187 break;
13188
13189 case CCCmode:
13190 suffix = "c";
13191 break;
13192
13193 case CCOmode:
13194 suffix = "o";
13195 break;
13196
13197 case CCSmode:
13198 suffix = "s";
13199 break;
13200
13201 default:
13202 suffix = "e";
13203 }
13204 break;
13205 case NE:
13206 switch (mode)
13207 {
13208 case CCAmode:
13209 suffix = "na";
13210 break;
13211
13212 case CCCmode:
13213 suffix = "nc";
13214 break;
13215
13216 case CCOmode:
13217 suffix = "no";
13218 break;
13219
13220 case CCSmode:
13221 suffix = "ns";
13222 break;
13223
13224 default:
13225 suffix = "ne";
13226 }
13227 break;
13228 case GT:
13229 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
13230 suffix = "g";
13231 break;
13232 case GTU:
13233 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
13234 Those same assemblers have the same but opposite lossage on cmov. */
13235 if (mode == CCmode)
13236 suffix = fp ? "nbe" : "a";
13237 else if (mode == CCCmode)
13238 suffix = "b";
13239 else
13240 gcc_unreachable ();
13241 break;
13242 case LT:
13243 switch (mode)
13244 {
13245 case CCNOmode:
13246 case CCGOCmode:
13247 suffix = "s";
13248 break;
13249
13250 case CCmode:
13251 case CCGCmode:
13252 suffix = "l";
13253 break;
13254
13255 default:
13256 gcc_unreachable ();
13257 }
13258 break;
13259 case LTU:
13260 gcc_assert (mode == CCmode || mode == CCCmode);
13261 suffix = "b";
13262 break;
13263 case GE:
13264 switch (mode)
13265 {
13266 case CCNOmode:
13267 case CCGOCmode:
13268 suffix = "ns";
13269 break;
13270
13271 case CCmode:
13272 case CCGCmode:
13273 suffix = "ge";
13274 break;
13275
13276 default:
13277 gcc_unreachable ();
13278 }
13279 break;
13280 case GEU:
13281 /* ??? As above. */
13282 gcc_assert (mode == CCmode || mode == CCCmode);
13283 suffix = fp ? "nb" : "ae";
13284 break;
13285 case LE:
13286 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
13287 suffix = "le";
13288 break;
13289 case LEU:
13290 /* ??? As above. */
13291 if (mode == CCmode)
13292 suffix = "be";
13293 else if (mode == CCCmode)
13294 suffix = fp ? "nb" : "ae";
13295 else
13296 gcc_unreachable ();
13297 break;
13298 case UNORDERED:
13299 suffix = fp ? "u" : "p";
13300 break;
13301 case ORDERED:
13302 suffix = fp ? "nu" : "np";
13303 break;
13304 default:
13305 gcc_unreachable ();
13306 }
13307 fputs (suffix, file);
13308 }
13309
13310 /* Print the name of register X to FILE based on its machine mode and number.
13311 If CODE is 'w', pretend the mode is HImode.
13312 If CODE is 'b', pretend the mode is QImode.
13313 If CODE is 'k', pretend the mode is SImode.
13314 If CODE is 'q', pretend the mode is DImode.
13315 If CODE is 'x', pretend the mode is V4SFmode.
13316 If CODE is 't', pretend the mode is V8SFmode.
13317 If CODE is 'h', pretend the reg is the 'high' byte register.
13318 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
13319 If CODE is 'd', duplicate the operand for AVX instruction.
13320 */
13321
13322 void
13323 print_reg (rtx x, int code, FILE *file)
13324 {
13325 const char *reg;
13326 bool duplicated = code == 'd' && TARGET_AVX;
13327
13328 gcc_assert (x == pc_rtx
13329 || (REGNO (x) != ARG_POINTER_REGNUM
13330 && REGNO (x) != FRAME_POINTER_REGNUM
13331 && REGNO (x) != FLAGS_REG
13332 && REGNO (x) != FPSR_REG
13333 && REGNO (x) != FPCR_REG));
13334
13335 if (ASSEMBLER_DIALECT == ASM_ATT)
13336 putc ('%', file);
13337
13338 if (x == pc_rtx)
13339 {
13340 gcc_assert (TARGET_64BIT);
13341 fputs ("rip", file);
13342 return;
13343 }
13344
13345 if (code == 'w' || MMX_REG_P (x))
13346 code = 2;
13347 else if (code == 'b')
13348 code = 1;
13349 else if (code == 'k')
13350 code = 4;
13351 else if (code == 'q')
13352 code = 8;
13353 else if (code == 'y')
13354 code = 3;
13355 else if (code == 'h')
13356 code = 0;
13357 else if (code == 'x')
13358 code = 16;
13359 else if (code == 't')
13360 code = 32;
13361 else
13362 code = GET_MODE_SIZE (GET_MODE (x));
13363
13364 /* Irritatingly, AMD extended registers use different naming convention
13365 from the normal registers. */
13366 if (REX_INT_REG_P (x))
13367 {
13368 gcc_assert (TARGET_64BIT);
13369 switch (code)
13370 {
13371 case 0:
13372 error ("extended registers have no high halves");
13373 break;
13374 case 1:
13375 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
13376 break;
13377 case 2:
13378 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
13379 break;
13380 case 4:
13381 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
13382 break;
13383 case 8:
13384 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
13385 break;
13386 default:
13387 error ("unsupported operand size for extended register");
13388 break;
13389 }
13390 return;
13391 }
13392
13393 reg = NULL;
13394 switch (code)
13395 {
13396 case 3:
13397 if (STACK_TOP_P (x))
13398 {
13399 reg = "st(0)";
13400 break;
13401 }
13402 /* FALLTHRU */
13403 case 8:
13404 case 4:
13405 case 12:
13406 if (! ANY_FP_REG_P (x))
13407 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
13408 /* FALLTHRU */
13409 case 16:
13410 case 2:
13411 normal:
13412 reg = hi_reg_name[REGNO (x)];
13413 break;
13414 case 1:
13415 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
13416 goto normal;
13417 reg = qi_reg_name[REGNO (x)];
13418 break;
13419 case 0:
13420 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
13421 goto normal;
13422 reg = qi_high_reg_name[REGNO (x)];
13423 break;
13424 case 32:
13425 if (SSE_REG_P (x))
13426 {
13427 gcc_assert (!duplicated);
13428 putc ('y', file);
13429 fputs (hi_reg_name[REGNO (x)] + 1, file);
13430 return;
13431 }
13432 break;
13433 default:
13434 gcc_unreachable ();
13435 }
13436
13437 fputs (reg, file);
13438 if (duplicated)
13439 {
13440 if (ASSEMBLER_DIALECT == ASM_ATT)
13441 fprintf (file, ", %%%s", reg);
13442 else
13443 fprintf (file, ", %s", reg);
13444 }
13445 }
13446
13447 /* Locate some local-dynamic symbol still in use by this function
13448 so that we can print its name in some tls_local_dynamic_base
13449 pattern. */
13450
13451 static int
13452 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
13453 {
13454 rtx x = *px;
13455
13456 if (GET_CODE (x) == SYMBOL_REF
13457 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
13458 {
13459 cfun->machine->some_ld_name = XSTR (x, 0);
13460 return 1;
13461 }
13462
13463 return 0;
13464 }
13465
13466 static const char *
13467 get_some_local_dynamic_name (void)
13468 {
13469 rtx insn;
13470
13471 if (cfun->machine->some_ld_name)
13472 return cfun->machine->some_ld_name;
13473
13474 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
13475 if (NONDEBUG_INSN_P (insn)
13476 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
13477 return cfun->machine->some_ld_name;
13478
13479 return NULL;
13480 }
13481
13482 /* Meaning of CODE:
13483 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
13484 C -- print opcode suffix for set/cmov insn.
13485 c -- like C, but print reversed condition
13486 F,f -- likewise, but for floating-point.
13487 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
13488 otherwise nothing
13489 R -- print the prefix for register names.
13490 z -- print the opcode suffix for the size of the current operand.
13491 Z -- likewise, with special suffixes for x87 instructions.
13492 * -- print a star (in certain assembler syntax)
13493 A -- print an absolute memory reference.
13494 w -- print the operand as if it's a "word" (HImode) even if it isn't.
13495 s -- print a shift double count, followed by the assemblers argument
13496 delimiter.
13497 b -- print the QImode name of the register for the indicated operand.
13498 %b0 would print %al if operands[0] is reg 0.
13499 w -- likewise, print the HImode name of the register.
13500 k -- likewise, print the SImode name of the register.
13501 q -- likewise, print the DImode name of the register.
13502 x -- likewise, print the V4SFmode name of the register.
13503 t -- likewise, print the V8SFmode name of the register.
13504 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
13505 y -- print "st(0)" instead of "st" as a register.
13506 d -- print duplicated register operand for AVX instruction.
13507 D -- print condition for SSE cmp instruction.
13508 P -- if PIC, print an @PLT suffix.
13509 p -- print raw symbol name.
13510 X -- don't print any sort of PIC '@' suffix for a symbol.
13511 & -- print some in-use local-dynamic symbol name.
13512 H -- print a memory address offset by 8; used for sse high-parts
13513 Y -- print condition for XOP pcom* instruction.
13514 + -- print a branch hint as 'cs' or 'ds' prefix
13515 ; -- print a semicolon (after prefixes due to bug in older gas).
13516 @ -- print a segment register of thread base pointer load
13517 */
13518
13519 void
13520 ix86_print_operand (FILE *file, rtx x, int code)
13521 {
13522 if (code)
13523 {
13524 switch (code)
13525 {
13526 case '*':
13527 if (ASSEMBLER_DIALECT == ASM_ATT)
13528 putc ('*', file);
13529 return;
13530
13531 case '&':
13532 {
13533 const char *name = get_some_local_dynamic_name ();
13534 if (name == NULL)
13535 output_operand_lossage ("'%%&' used without any "
13536 "local dynamic TLS references");
13537 else
13538 assemble_name (file, name);
13539 return;
13540 }
13541
13542 case 'A':
13543 switch (ASSEMBLER_DIALECT)
13544 {
13545 case ASM_ATT:
13546 putc ('*', file);
13547 break;
13548
13549 case ASM_INTEL:
13550 /* Intel syntax. For absolute addresses, registers should not
13551 be surrounded by braces. */
13552 if (!REG_P (x))
13553 {
13554 putc ('[', file);
13555 ix86_print_operand (file, x, 0);
13556 putc (']', file);
13557 return;
13558 }
13559 break;
13560
13561 default:
13562 gcc_unreachable ();
13563 }
13564
13565 ix86_print_operand (file, x, 0);
13566 return;
13567
13568
13569 case 'L':
13570 if (ASSEMBLER_DIALECT == ASM_ATT)
13571 putc ('l', file);
13572 return;
13573
13574 case 'W':
13575 if (ASSEMBLER_DIALECT == ASM_ATT)
13576 putc ('w', file);
13577 return;
13578
13579 case 'B':
13580 if (ASSEMBLER_DIALECT == ASM_ATT)
13581 putc ('b', file);
13582 return;
13583
13584 case 'Q':
13585 if (ASSEMBLER_DIALECT == ASM_ATT)
13586 putc ('l', file);
13587 return;
13588
13589 case 'S':
13590 if (ASSEMBLER_DIALECT == ASM_ATT)
13591 putc ('s', file);
13592 return;
13593
13594 case 'T':
13595 if (ASSEMBLER_DIALECT == ASM_ATT)
13596 putc ('t', file);
13597 return;
13598
13599 case 'z':
13600 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
13601 {
13602 /* Opcodes don't get size suffixes if using Intel opcodes. */
13603 if (ASSEMBLER_DIALECT == ASM_INTEL)
13604 return;
13605
13606 switch (GET_MODE_SIZE (GET_MODE (x)))
13607 {
13608 case 1:
13609 putc ('b', file);
13610 return;
13611
13612 case 2:
13613 putc ('w', file);
13614 return;
13615
13616 case 4:
13617 putc ('l', file);
13618 return;
13619
13620 case 8:
13621 putc ('q', file);
13622 return;
13623
13624 default:
13625 output_operand_lossage
13626 ("invalid operand size for operand code '%c'", code);
13627 return;
13628 }
13629 }
13630
13631 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
13632 warning
13633 (0, "non-integer operand used with operand code '%c'", code);
13634 /* FALLTHRU */
13635
13636 case 'Z':
13637 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
13638 if (ASSEMBLER_DIALECT == ASM_INTEL)
13639 return;
13640
13641 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
13642 {
13643 switch (GET_MODE_SIZE (GET_MODE (x)))
13644 {
13645 case 2:
13646 #ifdef HAVE_AS_IX86_FILDS
13647 putc ('s', file);
13648 #endif
13649 return;
13650
13651 case 4:
13652 putc ('l', file);
13653 return;
13654
13655 case 8:
13656 #ifdef HAVE_AS_IX86_FILDQ
13657 putc ('q', file);
13658 #else
13659 fputs ("ll", file);
13660 #endif
13661 return;
13662
13663 default:
13664 break;
13665 }
13666 }
13667 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
13668 {
13669 /* 387 opcodes don't get size suffixes
13670 if the operands are registers. */
13671 if (STACK_REG_P (x))
13672 return;
13673
13674 switch (GET_MODE_SIZE (GET_MODE (x)))
13675 {
13676 case 4:
13677 putc ('s', file);
13678 return;
13679
13680 case 8:
13681 putc ('l', file);
13682 return;
13683
13684 case 12:
13685 case 16:
13686 putc ('t', file);
13687 return;
13688
13689 default:
13690 break;
13691 }
13692 }
13693 else
13694 {
13695 output_operand_lossage
13696 ("invalid operand type used with operand code '%c'", code);
13697 return;
13698 }
13699
13700 output_operand_lossage
13701 ("invalid operand size for operand code '%c'", code);
13702 return;
13703
13704 case 'd':
13705 case 'b':
13706 case 'w':
13707 case 'k':
13708 case 'q':
13709 case 'h':
13710 case 't':
13711 case 'y':
13712 case 'x':
13713 case 'X':
13714 case 'P':
13715 case 'p':
13716 break;
13717
13718 case 's':
13719 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
13720 {
13721 ix86_print_operand (file, x, 0);
13722 fputs (", ", file);
13723 }
13724 return;
13725
13726 case 'D':
13727 /* Little bit of braindamage here. The SSE compare instructions
13728 does use completely different names for the comparisons that the
13729 fp conditional moves. */
13730 if (TARGET_AVX)
13731 {
13732 switch (GET_CODE (x))
13733 {
13734 case EQ:
13735 fputs ("eq", file);
13736 break;
13737 case UNEQ:
13738 fputs ("eq_us", file);
13739 break;
13740 case LT:
13741 fputs ("lt", file);
13742 break;
13743 case UNLT:
13744 fputs ("nge", file);
13745 break;
13746 case LE:
13747 fputs ("le", file);
13748 break;
13749 case UNLE:
13750 fputs ("ngt", file);
13751 break;
13752 case UNORDERED:
13753 fputs ("unord", file);
13754 break;
13755 case NE:
13756 fputs ("neq", file);
13757 break;
13758 case LTGT:
13759 fputs ("neq_oq", file);
13760 break;
13761 case GE:
13762 fputs ("ge", file);
13763 break;
13764 case UNGE:
13765 fputs ("nlt", file);
13766 break;
13767 case GT:
13768 fputs ("gt", file);
13769 break;
13770 case UNGT:
13771 fputs ("nle", file);
13772 break;
13773 case ORDERED:
13774 fputs ("ord", file);
13775 break;
13776 default:
13777 output_operand_lossage ("operand is not a condition code, "
13778 "invalid operand code 'D'");
13779 return;
13780 }
13781 }
13782 else
13783 {
13784 switch (GET_CODE (x))
13785 {
13786 case EQ:
13787 case UNEQ:
13788 fputs ("eq", file);
13789 break;
13790 case LT:
13791 case UNLT:
13792 fputs ("lt", file);
13793 break;
13794 case LE:
13795 case UNLE:
13796 fputs ("le", file);
13797 break;
13798 case UNORDERED:
13799 fputs ("unord", file);
13800 break;
13801 case NE:
13802 case LTGT:
13803 fputs ("neq", file);
13804 break;
13805 case UNGE:
13806 case GE:
13807 fputs ("nlt", file);
13808 break;
13809 case UNGT:
13810 case GT:
13811 fputs ("nle", file);
13812 break;
13813 case ORDERED:
13814 fputs ("ord", file);
13815 break;
13816 default:
13817 output_operand_lossage ("operand is not a condition code, "
13818 "invalid operand code 'D'");
13819 return;
13820 }
13821 }
13822 return;
13823 case 'O':
13824 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13825 if (ASSEMBLER_DIALECT == ASM_ATT)
13826 {
13827 switch (GET_MODE (x))
13828 {
13829 case HImode: putc ('w', file); break;
13830 case SImode:
13831 case SFmode: putc ('l', file); break;
13832 case DImode:
13833 case DFmode: putc ('q', file); break;
13834 default: gcc_unreachable ();
13835 }
13836 putc ('.', file);
13837 }
13838 #endif
13839 return;
13840 case 'C':
13841 if (!COMPARISON_P (x))
13842 {
13843 output_operand_lossage ("operand is neither a constant nor a "
13844 "condition code, invalid operand code "
13845 "'C'");
13846 return;
13847 }
13848 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
13849 return;
13850 case 'F':
13851 if (!COMPARISON_P (x))
13852 {
13853 output_operand_lossage ("operand is neither a constant nor a "
13854 "condition code, invalid operand code "
13855 "'F'");
13856 return;
13857 }
13858 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13859 if (ASSEMBLER_DIALECT == ASM_ATT)
13860 putc ('.', file);
13861 #endif
13862 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
13863 return;
13864
13865 /* Like above, but reverse condition */
13866 case 'c':
13867 /* Check to see if argument to %c is really a constant
13868 and not a condition code which needs to be reversed. */
13869 if (!COMPARISON_P (x))
13870 {
13871 output_operand_lossage ("operand is neither a constant nor a "
13872 "condition code, invalid operand "
13873 "code 'c'");
13874 return;
13875 }
13876 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
13877 return;
13878 case 'f':
13879 if (!COMPARISON_P (x))
13880 {
13881 output_operand_lossage ("operand is neither a constant nor a "
13882 "condition code, invalid operand "
13883 "code 'f'");
13884 return;
13885 }
13886 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13887 if (ASSEMBLER_DIALECT == ASM_ATT)
13888 putc ('.', file);
13889 #endif
13890 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
13891 return;
13892
13893 case 'H':
13894 /* It doesn't actually matter what mode we use here, as we're
13895 only going to use this for printing. */
13896 x = adjust_address_nv (x, DImode, 8);
13897 break;
13898
13899 case '+':
13900 {
13901 rtx x;
13902
13903 if (!optimize
13904 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
13905 return;
13906
13907 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
13908 if (x)
13909 {
13910 int pred_val = INTVAL (XEXP (x, 0));
13911
13912 if (pred_val < REG_BR_PROB_BASE * 45 / 100
13913 || pred_val > REG_BR_PROB_BASE * 55 / 100)
13914 {
13915 int taken = pred_val > REG_BR_PROB_BASE / 2;
13916 int cputaken = final_forward_branch_p (current_output_insn) == 0;
13917
13918 /* Emit hints only in the case default branch prediction
13919 heuristics would fail. */
13920 if (taken != cputaken)
13921 {
13922 /* We use 3e (DS) prefix for taken branches and
13923 2e (CS) prefix for not taken branches. */
13924 if (taken)
13925 fputs ("ds ; ", file);
13926 else
13927 fputs ("cs ; ", file);
13928 }
13929 }
13930 }
13931 return;
13932 }
13933
13934 case 'Y':
13935 switch (GET_CODE (x))
13936 {
13937 case NE:
13938 fputs ("neq", file);
13939 break;
13940 case EQ:
13941 fputs ("eq", file);
13942 break;
13943 case GE:
13944 case GEU:
13945 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
13946 break;
13947 case GT:
13948 case GTU:
13949 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
13950 break;
13951 case LE:
13952 case LEU:
13953 fputs ("le", file);
13954 break;
13955 case LT:
13956 case LTU:
13957 fputs ("lt", file);
13958 break;
13959 case UNORDERED:
13960 fputs ("unord", file);
13961 break;
13962 case ORDERED:
13963 fputs ("ord", file);
13964 break;
13965 case UNEQ:
13966 fputs ("ueq", file);
13967 break;
13968 case UNGE:
13969 fputs ("nlt", file);
13970 break;
13971 case UNGT:
13972 fputs ("nle", file);
13973 break;
13974 case UNLE:
13975 fputs ("ule", file);
13976 break;
13977 case UNLT:
13978 fputs ("ult", file);
13979 break;
13980 case LTGT:
13981 fputs ("une", file);
13982 break;
13983 default:
13984 output_operand_lossage ("operand is not a condition code, "
13985 "invalid operand code 'Y'");
13986 return;
13987 }
13988 return;
13989
13990 case ';':
13991 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
13992 putc (';', file);
13993 #endif
13994 return;
13995
13996 case '@':
13997 if (ASSEMBLER_DIALECT == ASM_ATT)
13998 putc ('%', file);
13999
14000 /* The kernel uses a different segment register for performance
14001 reasons; a system call would not have to trash the userspace
14002 segment register, which would be expensive. */
14003 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
14004 fputs ("fs", file);
14005 else
14006 fputs ("gs", file);
14007 return;
14008
14009 default:
14010 output_operand_lossage ("invalid operand code '%c'", code);
14011 }
14012 }
14013
14014 if (REG_P (x))
14015 print_reg (x, code, file);
14016
14017 else if (MEM_P (x))
14018 {
14019 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
14020 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
14021 && GET_MODE (x) != BLKmode)
14022 {
14023 const char * size;
14024 switch (GET_MODE_SIZE (GET_MODE (x)))
14025 {
14026 case 1: size = "BYTE"; break;
14027 case 2: size = "WORD"; break;
14028 case 4: size = "DWORD"; break;
14029 case 8: size = "QWORD"; break;
14030 case 12: size = "TBYTE"; break;
14031 case 16:
14032 if (GET_MODE (x) == XFmode)
14033 size = "TBYTE";
14034 else
14035 size = "XMMWORD";
14036 break;
14037 case 32: size = "YMMWORD"; break;
14038 default:
14039 gcc_unreachable ();
14040 }
14041
14042 /* Check for explicit size override (codes 'b', 'w' and 'k') */
14043 if (code == 'b')
14044 size = "BYTE";
14045 else if (code == 'w')
14046 size = "WORD";
14047 else if (code == 'k')
14048 size = "DWORD";
14049
14050 fputs (size, file);
14051 fputs (" PTR ", file);
14052 }
14053
14054 x = XEXP (x, 0);
14055 /* Avoid (%rip) for call operands. */
14056 if (CONSTANT_ADDRESS_P (x) && code == 'P'
14057 && !CONST_INT_P (x))
14058 output_addr_const (file, x);
14059 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
14060 output_operand_lossage ("invalid constraints for operand");
14061 else
14062 output_address (x);
14063 }
14064
14065 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
14066 {
14067 REAL_VALUE_TYPE r;
14068 long l;
14069
14070 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
14071 REAL_VALUE_TO_TARGET_SINGLE (r, l);
14072
14073 if (ASSEMBLER_DIALECT == ASM_ATT)
14074 putc ('$', file);
14075 /* Sign extend 32bit SFmode immediate to 8 bytes. */
14076 if (code == 'q')
14077 fprintf (file, "0x%08llx", (unsigned long long) (int) l);
14078 else
14079 fprintf (file, "0x%08x", (unsigned int) l);
14080 }
14081
14082 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
14083 {
14084 REAL_VALUE_TYPE r;
14085 long l[2];
14086
14087 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
14088 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
14089
14090 if (ASSEMBLER_DIALECT == ASM_ATT)
14091 putc ('$', file);
14092 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
14093 }
14094
14095 /* These float cases don't actually occur as immediate operands. */
14096 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
14097 {
14098 char dstr[30];
14099
14100 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
14101 fputs (dstr, file);
14102 }
14103
14104 else
14105 {
14106 /* We have patterns that allow zero sets of memory, for instance.
14107 In 64-bit mode, we should probably support all 8-byte vectors,
14108 since we can in fact encode that into an immediate. */
14109 if (GET_CODE (x) == CONST_VECTOR)
14110 {
14111 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
14112 x = const0_rtx;
14113 }
14114
14115 if (code != 'P' && code != 'p')
14116 {
14117 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
14118 {
14119 if (ASSEMBLER_DIALECT == ASM_ATT)
14120 putc ('$', file);
14121 }
14122 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
14123 || GET_CODE (x) == LABEL_REF)
14124 {
14125 if (ASSEMBLER_DIALECT == ASM_ATT)
14126 putc ('$', file);
14127 else
14128 fputs ("OFFSET FLAT:", file);
14129 }
14130 }
14131 if (CONST_INT_P (x))
14132 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14133 else if (flag_pic || MACHOPIC_INDIRECT)
14134 output_pic_addr_const (file, x, code);
14135 else
14136 output_addr_const (file, x);
14137 }
14138 }
14139
14140 static bool
14141 ix86_print_operand_punct_valid_p (unsigned char code)
14142 {
14143 return (code == '@' || code == '*' || code == '+'
14144 || code == '&' || code == ';');
14145 }
14146 \f
14147 /* Print a memory operand whose address is ADDR. */
14148
14149 static void
14150 ix86_print_operand_address (FILE *file, rtx addr)
14151 {
14152 struct ix86_address parts;
14153 rtx base, index, disp;
14154 int scale;
14155 int ok = ix86_decompose_address (addr, &parts);
14156
14157 gcc_assert (ok);
14158
14159 if (parts.base && GET_CODE (parts.base) == SUBREG)
14160 {
14161 rtx tmp = SUBREG_REG (parts.base);
14162 parts.base = simplify_subreg (GET_MODE (parts.base),
14163 tmp, GET_MODE (tmp), 0);
14164 }
14165
14166 if (parts.index && GET_CODE (parts.index) == SUBREG)
14167 {
14168 rtx tmp = SUBREG_REG (parts.index);
14169 parts.index = simplify_subreg (GET_MODE (parts.index),
14170 tmp, GET_MODE (tmp), 0);
14171 }
14172
14173 base = parts.base;
14174 index = parts.index;
14175 disp = parts.disp;
14176 scale = parts.scale;
14177
14178 switch (parts.seg)
14179 {
14180 case SEG_DEFAULT:
14181 break;
14182 case SEG_FS:
14183 case SEG_GS:
14184 if (ASSEMBLER_DIALECT == ASM_ATT)
14185 putc ('%', file);
14186 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
14187 break;
14188 default:
14189 gcc_unreachable ();
14190 }
14191
14192 /* Use one byte shorter RIP relative addressing for 64bit mode. */
14193 if (TARGET_64BIT && !base && !index)
14194 {
14195 rtx symbol = disp;
14196
14197 if (GET_CODE (disp) == CONST
14198 && GET_CODE (XEXP (disp, 0)) == PLUS
14199 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
14200 symbol = XEXP (XEXP (disp, 0), 0);
14201
14202 if (GET_CODE (symbol) == LABEL_REF
14203 || (GET_CODE (symbol) == SYMBOL_REF
14204 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
14205 base = pc_rtx;
14206 }
14207 if (!base && !index)
14208 {
14209 /* Displacement only requires special attention. */
14210
14211 if (CONST_INT_P (disp))
14212 {
14213 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
14214 fputs ("ds:", file);
14215 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
14216 }
14217 else if (flag_pic)
14218 output_pic_addr_const (file, disp, 0);
14219 else
14220 output_addr_const (file, disp);
14221 }
14222 else
14223 {
14224 int code = 0;
14225
14226 /* Print SImode registers for zero-extended addresses to force
14227 addr32 prefix. Otherwise print DImode registers to avoid it. */
14228 if (TARGET_64BIT)
14229 code = ((GET_CODE (addr) == ZERO_EXTEND
14230 || GET_CODE (addr) == AND)
14231 ? 'l'
14232 : 'q');
14233
14234 if (ASSEMBLER_DIALECT == ASM_ATT)
14235 {
14236 if (disp)
14237 {
14238 if (flag_pic)
14239 output_pic_addr_const (file, disp, 0);
14240 else if (GET_CODE (disp) == LABEL_REF)
14241 output_asm_label (disp);
14242 else
14243 output_addr_const (file, disp);
14244 }
14245
14246 putc ('(', file);
14247 if (base)
14248 print_reg (base, code, file);
14249 if (index)
14250 {
14251 putc (',', file);
14252 print_reg (index, code, file);
14253 if (scale != 1)
14254 fprintf (file, ",%d", scale);
14255 }
14256 putc (')', file);
14257 }
14258 else
14259 {
14260 rtx offset = NULL_RTX;
14261
14262 if (disp)
14263 {
14264 /* Pull out the offset of a symbol; print any symbol itself. */
14265 if (GET_CODE (disp) == CONST
14266 && GET_CODE (XEXP (disp, 0)) == PLUS
14267 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
14268 {
14269 offset = XEXP (XEXP (disp, 0), 1);
14270 disp = gen_rtx_CONST (VOIDmode,
14271 XEXP (XEXP (disp, 0), 0));
14272 }
14273
14274 if (flag_pic)
14275 output_pic_addr_const (file, disp, 0);
14276 else if (GET_CODE (disp) == LABEL_REF)
14277 output_asm_label (disp);
14278 else if (CONST_INT_P (disp))
14279 offset = disp;
14280 else
14281 output_addr_const (file, disp);
14282 }
14283
14284 putc ('[', file);
14285 if (base)
14286 {
14287 print_reg (base, code, file);
14288 if (offset)
14289 {
14290 if (INTVAL (offset) >= 0)
14291 putc ('+', file);
14292 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
14293 }
14294 }
14295 else if (offset)
14296 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
14297 else
14298 putc ('0', file);
14299
14300 if (index)
14301 {
14302 putc ('+', file);
14303 print_reg (index, code, file);
14304 if (scale != 1)
14305 fprintf (file, "*%d", scale);
14306 }
14307 putc (']', file);
14308 }
14309 }
14310 }
14311
14312 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
14313
14314 static bool
14315 i386_asm_output_addr_const_extra (FILE *file, rtx x)
14316 {
14317 rtx op;
14318
14319 if (GET_CODE (x) != UNSPEC)
14320 return false;
14321
14322 op = XVECEXP (x, 0, 0);
14323 switch (XINT (x, 1))
14324 {
14325 case UNSPEC_GOTTPOFF:
14326 output_addr_const (file, op);
14327 /* FIXME: This might be @TPOFF in Sun ld. */
14328 fputs ("@gottpoff", file);
14329 break;
14330 case UNSPEC_TPOFF:
14331 output_addr_const (file, op);
14332 fputs ("@tpoff", file);
14333 break;
14334 case UNSPEC_NTPOFF:
14335 output_addr_const (file, op);
14336 if (TARGET_64BIT)
14337 fputs ("@tpoff", file);
14338 else
14339 fputs ("@ntpoff", file);
14340 break;
14341 case UNSPEC_DTPOFF:
14342 output_addr_const (file, op);
14343 fputs ("@dtpoff", file);
14344 break;
14345 case UNSPEC_GOTNTPOFF:
14346 output_addr_const (file, op);
14347 if (TARGET_64BIT)
14348 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14349 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
14350 else
14351 fputs ("@gotntpoff", file);
14352 break;
14353 case UNSPEC_INDNTPOFF:
14354 output_addr_const (file, op);
14355 fputs ("@indntpoff", file);
14356 break;
14357 #if TARGET_MACHO
14358 case UNSPEC_MACHOPIC_OFFSET:
14359 output_addr_const (file, op);
14360 putc ('-', file);
14361 machopic_output_function_base_name (file);
14362 break;
14363 #endif
14364
14365 case UNSPEC_STACK_CHECK:
14366 {
14367 int offset;
14368
14369 gcc_assert (flag_split_stack);
14370
14371 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
14372 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
14373 #else
14374 gcc_unreachable ();
14375 #endif
14376
14377 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
14378 }
14379 break;
14380
14381 default:
14382 return false;
14383 }
14384
14385 return true;
14386 }
14387 \f
14388 /* Split one or more double-mode RTL references into pairs of half-mode
14389 references. The RTL can be REG, offsettable MEM, integer constant, or
14390 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
14391 split and "num" is its length. lo_half and hi_half are output arrays
14392 that parallel "operands". */
14393
14394 void
14395 split_double_mode (enum machine_mode mode, rtx operands[],
14396 int num, rtx lo_half[], rtx hi_half[])
14397 {
14398 enum machine_mode half_mode;
14399 unsigned int byte;
14400
14401 switch (mode)
14402 {
14403 case TImode:
14404 half_mode = DImode;
14405 break;
14406 case DImode:
14407 half_mode = SImode;
14408 break;
14409 default:
14410 gcc_unreachable ();
14411 }
14412
14413 byte = GET_MODE_SIZE (half_mode);
14414
14415 while (num--)
14416 {
14417 rtx op = operands[num];
14418
14419 /* simplify_subreg refuse to split volatile memory addresses,
14420 but we still have to handle it. */
14421 if (MEM_P (op))
14422 {
14423 lo_half[num] = adjust_address (op, half_mode, 0);
14424 hi_half[num] = adjust_address (op, half_mode, byte);
14425 }
14426 else
14427 {
14428 lo_half[num] = simplify_gen_subreg (half_mode, op,
14429 GET_MODE (op) == VOIDmode
14430 ? mode : GET_MODE (op), 0);
14431 hi_half[num] = simplify_gen_subreg (half_mode, op,
14432 GET_MODE (op) == VOIDmode
14433 ? mode : GET_MODE (op), byte);
14434 }
14435 }
14436 }
14437 \f
14438 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
14439 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
14440 is the expression of the binary operation. The output may either be
14441 emitted here, or returned to the caller, like all output_* functions.
14442
14443 There is no guarantee that the operands are the same mode, as they
14444 might be within FLOAT or FLOAT_EXTEND expressions. */
14445
14446 #ifndef SYSV386_COMPAT
14447 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
14448 wants to fix the assemblers because that causes incompatibility
14449 with gcc. No-one wants to fix gcc because that causes
14450 incompatibility with assemblers... You can use the option of
14451 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
14452 #define SYSV386_COMPAT 1
14453 #endif
14454
14455 const char *
14456 output_387_binary_op (rtx insn, rtx *operands)
14457 {
14458 static char buf[40];
14459 const char *p;
14460 const char *ssep;
14461 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
14462
14463 #ifdef ENABLE_CHECKING
14464 /* Even if we do not want to check the inputs, this documents input
14465 constraints. Which helps in understanding the following code. */
14466 if (STACK_REG_P (operands[0])
14467 && ((REG_P (operands[1])
14468 && REGNO (operands[0]) == REGNO (operands[1])
14469 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
14470 || (REG_P (operands[2])
14471 && REGNO (operands[0]) == REGNO (operands[2])
14472 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
14473 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
14474 ; /* ok */
14475 else
14476 gcc_assert (is_sse);
14477 #endif
14478
14479 switch (GET_CODE (operands[3]))
14480 {
14481 case PLUS:
14482 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14483 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14484 p = "fiadd";
14485 else
14486 p = "fadd";
14487 ssep = "vadd";
14488 break;
14489
14490 case MINUS:
14491 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14492 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14493 p = "fisub";
14494 else
14495 p = "fsub";
14496 ssep = "vsub";
14497 break;
14498
14499 case MULT:
14500 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14501 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14502 p = "fimul";
14503 else
14504 p = "fmul";
14505 ssep = "vmul";
14506 break;
14507
14508 case DIV:
14509 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14510 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14511 p = "fidiv";
14512 else
14513 p = "fdiv";
14514 ssep = "vdiv";
14515 break;
14516
14517 default:
14518 gcc_unreachable ();
14519 }
14520
14521 if (is_sse)
14522 {
14523 if (TARGET_AVX)
14524 {
14525 strcpy (buf, ssep);
14526 if (GET_MODE (operands[0]) == SFmode)
14527 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
14528 else
14529 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
14530 }
14531 else
14532 {
14533 strcpy (buf, ssep + 1);
14534 if (GET_MODE (operands[0]) == SFmode)
14535 strcat (buf, "ss\t{%2, %0|%0, %2}");
14536 else
14537 strcat (buf, "sd\t{%2, %0|%0, %2}");
14538 }
14539 return buf;
14540 }
14541 strcpy (buf, p);
14542
14543 switch (GET_CODE (operands[3]))
14544 {
14545 case MULT:
14546 case PLUS:
14547 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
14548 {
14549 rtx temp = operands[2];
14550 operands[2] = operands[1];
14551 operands[1] = temp;
14552 }
14553
14554 /* know operands[0] == operands[1]. */
14555
14556 if (MEM_P (operands[2]))
14557 {
14558 p = "%Z2\t%2";
14559 break;
14560 }
14561
14562 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
14563 {
14564 if (STACK_TOP_P (operands[0]))
14565 /* How is it that we are storing to a dead operand[2]?
14566 Well, presumably operands[1] is dead too. We can't
14567 store the result to st(0) as st(0) gets popped on this
14568 instruction. Instead store to operands[2] (which I
14569 think has to be st(1)). st(1) will be popped later.
14570 gcc <= 2.8.1 didn't have this check and generated
14571 assembly code that the Unixware assembler rejected. */
14572 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14573 else
14574 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14575 break;
14576 }
14577
14578 if (STACK_TOP_P (operands[0]))
14579 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14580 else
14581 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
14582 break;
14583
14584 case MINUS:
14585 case DIV:
14586 if (MEM_P (operands[1]))
14587 {
14588 p = "r%Z1\t%1";
14589 break;
14590 }
14591
14592 if (MEM_P (operands[2]))
14593 {
14594 p = "%Z2\t%2";
14595 break;
14596 }
14597
14598 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
14599 {
14600 #if SYSV386_COMPAT
14601 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
14602 derived assemblers, confusingly reverse the direction of
14603 the operation for fsub{r} and fdiv{r} when the
14604 destination register is not st(0). The Intel assembler
14605 doesn't have this brain damage. Read !SYSV386_COMPAT to
14606 figure out what the hardware really does. */
14607 if (STACK_TOP_P (operands[0]))
14608 p = "{p\t%0, %2|rp\t%2, %0}";
14609 else
14610 p = "{rp\t%2, %0|p\t%0, %2}";
14611 #else
14612 if (STACK_TOP_P (operands[0]))
14613 /* As above for fmul/fadd, we can't store to st(0). */
14614 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14615 else
14616 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14617 #endif
14618 break;
14619 }
14620
14621 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
14622 {
14623 #if SYSV386_COMPAT
14624 if (STACK_TOP_P (operands[0]))
14625 p = "{rp\t%0, %1|p\t%1, %0}";
14626 else
14627 p = "{p\t%1, %0|rp\t%0, %1}";
14628 #else
14629 if (STACK_TOP_P (operands[0]))
14630 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
14631 else
14632 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
14633 #endif
14634 break;
14635 }
14636
14637 if (STACK_TOP_P (operands[0]))
14638 {
14639 if (STACK_TOP_P (operands[1]))
14640 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14641 else
14642 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
14643 break;
14644 }
14645 else if (STACK_TOP_P (operands[1]))
14646 {
14647 #if SYSV386_COMPAT
14648 p = "{\t%1, %0|r\t%0, %1}";
14649 #else
14650 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
14651 #endif
14652 }
14653 else
14654 {
14655 #if SYSV386_COMPAT
14656 p = "{r\t%2, %0|\t%0, %2}";
14657 #else
14658 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
14659 #endif
14660 }
14661 break;
14662
14663 default:
14664 gcc_unreachable ();
14665 }
14666
14667 strcat (buf, p);
14668 return buf;
14669 }
14670
14671 /* Return needed mode for entity in optimize_mode_switching pass. */
14672
14673 int
14674 ix86_mode_needed (int entity, rtx insn)
14675 {
14676 enum attr_i387_cw mode;
14677
14678 /* The mode UNINITIALIZED is used to store control word after a
14679 function call or ASM pattern. The mode ANY specify that function
14680 has no requirements on the control word and make no changes in the
14681 bits we are interested in. */
14682
14683 if (CALL_P (insn)
14684 || (NONJUMP_INSN_P (insn)
14685 && (asm_noperands (PATTERN (insn)) >= 0
14686 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
14687 return I387_CW_UNINITIALIZED;
14688
14689 if (recog_memoized (insn) < 0)
14690 return I387_CW_ANY;
14691
14692 mode = get_attr_i387_cw (insn);
14693
14694 switch (entity)
14695 {
14696 case I387_TRUNC:
14697 if (mode == I387_CW_TRUNC)
14698 return mode;
14699 break;
14700
14701 case I387_FLOOR:
14702 if (mode == I387_CW_FLOOR)
14703 return mode;
14704 break;
14705
14706 case I387_CEIL:
14707 if (mode == I387_CW_CEIL)
14708 return mode;
14709 break;
14710
14711 case I387_MASK_PM:
14712 if (mode == I387_CW_MASK_PM)
14713 return mode;
14714 break;
14715
14716 default:
14717 gcc_unreachable ();
14718 }
14719
14720 return I387_CW_ANY;
14721 }
14722
14723 /* Output code to initialize control word copies used by trunc?f?i and
14724 rounding patterns. CURRENT_MODE is set to current control word,
14725 while NEW_MODE is set to new control word. */
14726
14727 void
14728 emit_i387_cw_initialization (int mode)
14729 {
14730 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
14731 rtx new_mode;
14732
14733 enum ix86_stack_slot slot;
14734
14735 rtx reg = gen_reg_rtx (HImode);
14736
14737 emit_insn (gen_x86_fnstcw_1 (stored_mode));
14738 emit_move_insn (reg, copy_rtx (stored_mode));
14739
14740 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
14741 || optimize_function_for_size_p (cfun))
14742 {
14743 switch (mode)
14744 {
14745 case I387_CW_TRUNC:
14746 /* round toward zero (truncate) */
14747 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
14748 slot = SLOT_CW_TRUNC;
14749 break;
14750
14751 case I387_CW_FLOOR:
14752 /* round down toward -oo */
14753 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
14754 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
14755 slot = SLOT_CW_FLOOR;
14756 break;
14757
14758 case I387_CW_CEIL:
14759 /* round up toward +oo */
14760 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
14761 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
14762 slot = SLOT_CW_CEIL;
14763 break;
14764
14765 case I387_CW_MASK_PM:
14766 /* mask precision exception for nearbyint() */
14767 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
14768 slot = SLOT_CW_MASK_PM;
14769 break;
14770
14771 default:
14772 gcc_unreachable ();
14773 }
14774 }
14775 else
14776 {
14777 switch (mode)
14778 {
14779 case I387_CW_TRUNC:
14780 /* round toward zero (truncate) */
14781 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
14782 slot = SLOT_CW_TRUNC;
14783 break;
14784
14785 case I387_CW_FLOOR:
14786 /* round down toward -oo */
14787 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
14788 slot = SLOT_CW_FLOOR;
14789 break;
14790
14791 case I387_CW_CEIL:
14792 /* round up toward +oo */
14793 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
14794 slot = SLOT_CW_CEIL;
14795 break;
14796
14797 case I387_CW_MASK_PM:
14798 /* mask precision exception for nearbyint() */
14799 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
14800 slot = SLOT_CW_MASK_PM;
14801 break;
14802
14803 default:
14804 gcc_unreachable ();
14805 }
14806 }
14807
14808 gcc_assert (slot < MAX_386_STACK_LOCALS);
14809
14810 new_mode = assign_386_stack_local (HImode, slot);
14811 emit_move_insn (new_mode, reg);
14812 }
14813
14814 /* Output code for INSN to convert a float to a signed int. OPERANDS
14815 are the insn operands. The output may be [HSD]Imode and the input
14816 operand may be [SDX]Fmode. */
14817
14818 const char *
14819 output_fix_trunc (rtx insn, rtx *operands, bool fisttp)
14820 {
14821 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
14822 int dimode_p = GET_MODE (operands[0]) == DImode;
14823 int round_mode = get_attr_i387_cw (insn);
14824
14825 /* Jump through a hoop or two for DImode, since the hardware has no
14826 non-popping instruction. We used to do this a different way, but
14827 that was somewhat fragile and broke with post-reload splitters. */
14828 if ((dimode_p || fisttp) && !stack_top_dies)
14829 output_asm_insn ("fld\t%y1", operands);
14830
14831 gcc_assert (STACK_TOP_P (operands[1]));
14832 gcc_assert (MEM_P (operands[0]));
14833 gcc_assert (GET_MODE (operands[1]) != TFmode);
14834
14835 if (fisttp)
14836 output_asm_insn ("fisttp%Z0\t%0", operands);
14837 else
14838 {
14839 if (round_mode != I387_CW_ANY)
14840 output_asm_insn ("fldcw\t%3", operands);
14841 if (stack_top_dies || dimode_p)
14842 output_asm_insn ("fistp%Z0\t%0", operands);
14843 else
14844 output_asm_insn ("fist%Z0\t%0", operands);
14845 if (round_mode != I387_CW_ANY)
14846 output_asm_insn ("fldcw\t%2", operands);
14847 }
14848
14849 return "";
14850 }
14851
14852 /* Output code for x87 ffreep insn. The OPNO argument, which may only
14853 have the values zero or one, indicates the ffreep insn's operand
14854 from the OPERANDS array. */
14855
14856 static const char *
14857 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
14858 {
14859 if (TARGET_USE_FFREEP)
14860 #ifdef HAVE_AS_IX86_FFREEP
14861 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
14862 #else
14863 {
14864 static char retval[32];
14865 int regno = REGNO (operands[opno]);
14866
14867 gcc_assert (FP_REGNO_P (regno));
14868
14869 regno -= FIRST_STACK_REG;
14870
14871 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
14872 return retval;
14873 }
14874 #endif
14875
14876 return opno ? "fstp\t%y1" : "fstp\t%y0";
14877 }
14878
14879
14880 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
14881 should be used. UNORDERED_P is true when fucom should be used. */
14882
14883 const char *
14884 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
14885 {
14886 int stack_top_dies;
14887 rtx cmp_op0, cmp_op1;
14888 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
14889
14890 if (eflags_p)
14891 {
14892 cmp_op0 = operands[0];
14893 cmp_op1 = operands[1];
14894 }
14895 else
14896 {
14897 cmp_op0 = operands[1];
14898 cmp_op1 = operands[2];
14899 }
14900
14901 if (is_sse)
14902 {
14903 if (GET_MODE (operands[0]) == SFmode)
14904 if (unordered_p)
14905 return "%vucomiss\t{%1, %0|%0, %1}";
14906 else
14907 return "%vcomiss\t{%1, %0|%0, %1}";
14908 else
14909 if (unordered_p)
14910 return "%vucomisd\t{%1, %0|%0, %1}";
14911 else
14912 return "%vcomisd\t{%1, %0|%0, %1}";
14913 }
14914
14915 gcc_assert (STACK_TOP_P (cmp_op0));
14916
14917 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
14918
14919 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
14920 {
14921 if (stack_top_dies)
14922 {
14923 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
14924 return output_387_ffreep (operands, 1);
14925 }
14926 else
14927 return "ftst\n\tfnstsw\t%0";
14928 }
14929
14930 if (STACK_REG_P (cmp_op1)
14931 && stack_top_dies
14932 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
14933 && REGNO (cmp_op1) != FIRST_STACK_REG)
14934 {
14935 /* If both the top of the 387 stack dies, and the other operand
14936 is also a stack register that dies, then this must be a
14937 `fcompp' float compare */
14938
14939 if (eflags_p)
14940 {
14941 /* There is no double popping fcomi variant. Fortunately,
14942 eflags is immune from the fstp's cc clobbering. */
14943 if (unordered_p)
14944 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
14945 else
14946 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
14947 return output_387_ffreep (operands, 0);
14948 }
14949 else
14950 {
14951 if (unordered_p)
14952 return "fucompp\n\tfnstsw\t%0";
14953 else
14954 return "fcompp\n\tfnstsw\t%0";
14955 }
14956 }
14957 else
14958 {
14959 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
14960
14961 static const char * const alt[16] =
14962 {
14963 "fcom%Z2\t%y2\n\tfnstsw\t%0",
14964 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
14965 "fucom%Z2\t%y2\n\tfnstsw\t%0",
14966 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
14967
14968 "ficom%Z2\t%y2\n\tfnstsw\t%0",
14969 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
14970 NULL,
14971 NULL,
14972
14973 "fcomi\t{%y1, %0|%0, %y1}",
14974 "fcomip\t{%y1, %0|%0, %y1}",
14975 "fucomi\t{%y1, %0|%0, %y1}",
14976 "fucomip\t{%y1, %0|%0, %y1}",
14977
14978 NULL,
14979 NULL,
14980 NULL,
14981 NULL
14982 };
14983
14984 int mask;
14985 const char *ret;
14986
14987 mask = eflags_p << 3;
14988 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
14989 mask |= unordered_p << 1;
14990 mask |= stack_top_dies;
14991
14992 gcc_assert (mask < 16);
14993 ret = alt[mask];
14994 gcc_assert (ret);
14995
14996 return ret;
14997 }
14998 }
14999
15000 void
15001 ix86_output_addr_vec_elt (FILE *file, int value)
15002 {
15003 const char *directive = ASM_LONG;
15004
15005 #ifdef ASM_QUAD
15006 if (TARGET_LP64)
15007 directive = ASM_QUAD;
15008 #else
15009 gcc_assert (!TARGET_64BIT);
15010 #endif
15011
15012 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
15013 }
15014
15015 void
15016 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
15017 {
15018 const char *directive = ASM_LONG;
15019
15020 #ifdef ASM_QUAD
15021 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
15022 directive = ASM_QUAD;
15023 #else
15024 gcc_assert (!TARGET_64BIT);
15025 #endif
15026 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
15027 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
15028 fprintf (file, "%s%s%d-%s%d\n",
15029 directive, LPREFIX, value, LPREFIX, rel);
15030 else if (HAVE_AS_GOTOFF_IN_DATA)
15031 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
15032 #if TARGET_MACHO
15033 else if (TARGET_MACHO)
15034 {
15035 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
15036 machopic_output_function_base_name (file);
15037 putc ('\n', file);
15038 }
15039 #endif
15040 else
15041 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
15042 GOT_SYMBOL_NAME, LPREFIX, value);
15043 }
15044 \f
15045 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
15046 for the target. */
15047
15048 void
15049 ix86_expand_clear (rtx dest)
15050 {
15051 rtx tmp;
15052
15053 /* We play register width games, which are only valid after reload. */
15054 gcc_assert (reload_completed);
15055
15056 /* Avoid HImode and its attendant prefix byte. */
15057 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
15058 dest = gen_rtx_REG (SImode, REGNO (dest));
15059 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
15060
15061 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
15062 if (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ())
15063 {
15064 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15065 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
15066 }
15067
15068 emit_insn (tmp);
15069 }
15070
15071 /* X is an unchanging MEM. If it is a constant pool reference, return
15072 the constant pool rtx, else NULL. */
15073
15074 rtx
15075 maybe_get_pool_constant (rtx x)
15076 {
15077 x = ix86_delegitimize_address (XEXP (x, 0));
15078
15079 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
15080 return get_pool_constant (x);
15081
15082 return NULL_RTX;
15083 }
15084
15085 void
15086 ix86_expand_move (enum machine_mode mode, rtx operands[])
15087 {
15088 rtx op0, op1;
15089 enum tls_model model;
15090
15091 op0 = operands[0];
15092 op1 = operands[1];
15093
15094 if (GET_CODE (op1) == SYMBOL_REF)
15095 {
15096 model = SYMBOL_REF_TLS_MODEL (op1);
15097 if (model)
15098 {
15099 op1 = legitimize_tls_address (op1, model, true);
15100 op1 = force_operand (op1, op0);
15101 if (op1 == op0)
15102 return;
15103 if (GET_MODE (op1) != mode)
15104 op1 = convert_to_mode (mode, op1, 1);
15105 }
15106 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15107 && SYMBOL_REF_DLLIMPORT_P (op1))
15108 op1 = legitimize_dllimport_symbol (op1, false);
15109 }
15110 else if (GET_CODE (op1) == CONST
15111 && GET_CODE (XEXP (op1, 0)) == PLUS
15112 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
15113 {
15114 rtx addend = XEXP (XEXP (op1, 0), 1);
15115 rtx symbol = XEXP (XEXP (op1, 0), 0);
15116 rtx tmp = NULL;
15117
15118 model = SYMBOL_REF_TLS_MODEL (symbol);
15119 if (model)
15120 tmp = legitimize_tls_address (symbol, model, true);
15121 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15122 && SYMBOL_REF_DLLIMPORT_P (symbol))
15123 tmp = legitimize_dllimport_symbol (symbol, true);
15124
15125 if (tmp)
15126 {
15127 tmp = force_operand (tmp, NULL);
15128 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
15129 op0, 1, OPTAB_DIRECT);
15130 if (tmp == op0)
15131 return;
15132 if (GET_MODE (tmp) != mode)
15133 op1 = convert_to_mode (mode, tmp, 1);
15134 }
15135 }
15136
15137 if ((flag_pic || MACHOPIC_INDIRECT)
15138 && symbolic_operand (op1, mode))
15139 {
15140 if (TARGET_MACHO && !TARGET_64BIT)
15141 {
15142 #if TARGET_MACHO
15143 /* dynamic-no-pic */
15144 if (MACHOPIC_INDIRECT)
15145 {
15146 rtx temp = ((reload_in_progress
15147 || ((op0 && REG_P (op0))
15148 && mode == Pmode))
15149 ? op0 : gen_reg_rtx (Pmode));
15150 op1 = machopic_indirect_data_reference (op1, temp);
15151 if (MACHOPIC_PURE)
15152 op1 = machopic_legitimize_pic_address (op1, mode,
15153 temp == op1 ? 0 : temp);
15154 }
15155 if (op0 != op1 && GET_CODE (op0) != MEM)
15156 {
15157 rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
15158 emit_insn (insn);
15159 return;
15160 }
15161 if (GET_CODE (op0) == MEM)
15162 op1 = force_reg (Pmode, op1);
15163 else
15164 {
15165 rtx temp = op0;
15166 if (GET_CODE (temp) != REG)
15167 temp = gen_reg_rtx (Pmode);
15168 temp = legitimize_pic_address (op1, temp);
15169 if (temp == op0)
15170 return;
15171 op1 = temp;
15172 }
15173 /* dynamic-no-pic */
15174 #endif
15175 }
15176 else
15177 {
15178 if (MEM_P (op0))
15179 op1 = force_reg (mode, op1);
15180 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
15181 {
15182 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
15183 op1 = legitimize_pic_address (op1, reg);
15184 if (op0 == op1)
15185 return;
15186 if (GET_MODE (op1) != mode)
15187 op1 = convert_to_mode (mode, op1, 1);
15188 }
15189 }
15190 }
15191 else
15192 {
15193 if (MEM_P (op0)
15194 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
15195 || !push_operand (op0, mode))
15196 && MEM_P (op1))
15197 op1 = force_reg (mode, op1);
15198
15199 if (push_operand (op0, mode)
15200 && ! general_no_elim_operand (op1, mode))
15201 op1 = copy_to_mode_reg (mode, op1);
15202
15203 /* Force large constants in 64bit compilation into register
15204 to get them CSEed. */
15205 if (can_create_pseudo_p ()
15206 && (mode == DImode) && TARGET_64BIT
15207 && immediate_operand (op1, mode)
15208 && !x86_64_zext_immediate_operand (op1, VOIDmode)
15209 && !register_operand (op0, mode)
15210 && optimize)
15211 op1 = copy_to_mode_reg (mode, op1);
15212
15213 if (can_create_pseudo_p ()
15214 && FLOAT_MODE_P (mode)
15215 && GET_CODE (op1) == CONST_DOUBLE)
15216 {
15217 /* If we are loading a floating point constant to a register,
15218 force the value to memory now, since we'll get better code
15219 out the back end. */
15220
15221 op1 = validize_mem (force_const_mem (mode, op1));
15222 if (!register_operand (op0, mode))
15223 {
15224 rtx temp = gen_reg_rtx (mode);
15225 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
15226 emit_move_insn (op0, temp);
15227 return;
15228 }
15229 }
15230 }
15231
15232 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
15233 }
15234
15235 void
15236 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
15237 {
15238 rtx op0 = operands[0], op1 = operands[1];
15239 unsigned int align = GET_MODE_ALIGNMENT (mode);
15240
15241 /* Force constants other than zero into memory. We do not know how
15242 the instructions used to build constants modify the upper 64 bits
15243 of the register, once we have that information we may be able
15244 to handle some of them more efficiently. */
15245 if (can_create_pseudo_p ()
15246 && register_operand (op0, mode)
15247 && (CONSTANT_P (op1)
15248 || (GET_CODE (op1) == SUBREG
15249 && CONSTANT_P (SUBREG_REG (op1))))
15250 && !standard_sse_constant_p (op1))
15251 op1 = validize_mem (force_const_mem (mode, op1));
15252
15253 /* We need to check memory alignment for SSE mode since attribute
15254 can make operands unaligned. */
15255 if (can_create_pseudo_p ()
15256 && SSE_REG_MODE_P (mode)
15257 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
15258 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
15259 {
15260 rtx tmp[2];
15261
15262 /* ix86_expand_vector_move_misalign() does not like constants ... */
15263 if (CONSTANT_P (op1)
15264 || (GET_CODE (op1) == SUBREG
15265 && CONSTANT_P (SUBREG_REG (op1))))
15266 op1 = validize_mem (force_const_mem (mode, op1));
15267
15268 /* ... nor both arguments in memory. */
15269 if (!register_operand (op0, mode)
15270 && !register_operand (op1, mode))
15271 op1 = force_reg (mode, op1);
15272
15273 tmp[0] = op0; tmp[1] = op1;
15274 ix86_expand_vector_move_misalign (mode, tmp);
15275 return;
15276 }
15277
15278 /* Make operand1 a register if it isn't already. */
15279 if (can_create_pseudo_p ()
15280 && !register_operand (op0, mode)
15281 && !register_operand (op1, mode))
15282 {
15283 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
15284 return;
15285 }
15286
15287 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
15288 }
15289
15290 /* Split 32-byte AVX unaligned load and store if needed. */
15291
15292 static void
15293 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
15294 {
15295 rtx m;
15296 rtx (*extract) (rtx, rtx, rtx);
15297 rtx (*move_unaligned) (rtx, rtx);
15298 enum machine_mode mode;
15299
15300 switch (GET_MODE (op0))
15301 {
15302 default:
15303 gcc_unreachable ();
15304 case V32QImode:
15305 extract = gen_avx_vextractf128v32qi;
15306 move_unaligned = gen_avx_movdqu256;
15307 mode = V16QImode;
15308 break;
15309 case V8SFmode:
15310 extract = gen_avx_vextractf128v8sf;
15311 move_unaligned = gen_avx_movups256;
15312 mode = V4SFmode;
15313 break;
15314 case V4DFmode:
15315 extract = gen_avx_vextractf128v4df;
15316 move_unaligned = gen_avx_movupd256;
15317 mode = V2DFmode;
15318 break;
15319 }
15320
15321 if (MEM_P (op1) && TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
15322 {
15323 rtx r = gen_reg_rtx (mode);
15324 m = adjust_address (op1, mode, 0);
15325 emit_move_insn (r, m);
15326 m = adjust_address (op1, mode, 16);
15327 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
15328 emit_move_insn (op0, r);
15329 }
15330 else if (MEM_P (op0) && TARGET_AVX256_SPLIT_UNALIGNED_STORE)
15331 {
15332 m = adjust_address (op0, mode, 0);
15333 emit_insn (extract (m, op1, const0_rtx));
15334 m = adjust_address (op0, mode, 16);
15335 emit_insn (extract (m, op1, const1_rtx));
15336 }
15337 else
15338 emit_insn (move_unaligned (op0, op1));
15339 }
15340
15341 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
15342 straight to ix86_expand_vector_move. */
15343 /* Code generation for scalar reg-reg moves of single and double precision data:
15344 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
15345 movaps reg, reg
15346 else
15347 movss reg, reg
15348 if (x86_sse_partial_reg_dependency == true)
15349 movapd reg, reg
15350 else
15351 movsd reg, reg
15352
15353 Code generation for scalar loads of double precision data:
15354 if (x86_sse_split_regs == true)
15355 movlpd mem, reg (gas syntax)
15356 else
15357 movsd mem, reg
15358
15359 Code generation for unaligned packed loads of single precision data
15360 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
15361 if (x86_sse_unaligned_move_optimal)
15362 movups mem, reg
15363
15364 if (x86_sse_partial_reg_dependency == true)
15365 {
15366 xorps reg, reg
15367 movlps mem, reg
15368 movhps mem+8, reg
15369 }
15370 else
15371 {
15372 movlps mem, reg
15373 movhps mem+8, reg
15374 }
15375
15376 Code generation for unaligned packed loads of double precision data
15377 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
15378 if (x86_sse_unaligned_move_optimal)
15379 movupd mem, reg
15380
15381 if (x86_sse_split_regs == true)
15382 {
15383 movlpd mem, reg
15384 movhpd mem+8, reg
15385 }
15386 else
15387 {
15388 movsd mem, reg
15389 movhpd mem+8, reg
15390 }
15391 */
15392
15393 void
15394 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
15395 {
15396 rtx op0, op1, m;
15397
15398 op0 = operands[0];
15399 op1 = operands[1];
15400
15401 if (TARGET_AVX)
15402 {
15403 switch (GET_MODE_CLASS (mode))
15404 {
15405 case MODE_VECTOR_INT:
15406 case MODE_INT:
15407 switch (GET_MODE_SIZE (mode))
15408 {
15409 case 16:
15410 /* If we're optimizing for size, movups is the smallest. */
15411 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15412 {
15413 op0 = gen_lowpart (V4SFmode, op0);
15414 op1 = gen_lowpart (V4SFmode, op1);
15415 emit_insn (gen_sse_movups (op0, op1));
15416 return;
15417 }
15418 op0 = gen_lowpart (V16QImode, op0);
15419 op1 = gen_lowpart (V16QImode, op1);
15420 emit_insn (gen_sse2_movdqu (op0, op1));
15421 break;
15422 case 32:
15423 op0 = gen_lowpart (V32QImode, op0);
15424 op1 = gen_lowpart (V32QImode, op1);
15425 ix86_avx256_split_vector_move_misalign (op0, op1);
15426 break;
15427 default:
15428 gcc_unreachable ();
15429 }
15430 break;
15431 case MODE_VECTOR_FLOAT:
15432 op0 = gen_lowpart (mode, op0);
15433 op1 = gen_lowpart (mode, op1);
15434
15435 switch (mode)
15436 {
15437 case V4SFmode:
15438 emit_insn (gen_sse_movups (op0, op1));
15439 break;
15440 case V8SFmode:
15441 ix86_avx256_split_vector_move_misalign (op0, op1);
15442 break;
15443 case V2DFmode:
15444 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15445 {
15446 op0 = gen_lowpart (V4SFmode, op0);
15447 op1 = gen_lowpart (V4SFmode, op1);
15448 emit_insn (gen_sse_movups (op0, op1));
15449 return;
15450 }
15451 emit_insn (gen_sse2_movupd (op0, op1));
15452 break;
15453 case V4DFmode:
15454 ix86_avx256_split_vector_move_misalign (op0, op1);
15455 break;
15456 default:
15457 gcc_unreachable ();
15458 }
15459 break;
15460
15461 default:
15462 gcc_unreachable ();
15463 }
15464
15465 return;
15466 }
15467
15468 if (MEM_P (op1))
15469 {
15470 /* If we're optimizing for size, movups is the smallest. */
15471 if (optimize_insn_for_size_p ()
15472 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15473 {
15474 op0 = gen_lowpart (V4SFmode, op0);
15475 op1 = gen_lowpart (V4SFmode, op1);
15476 emit_insn (gen_sse_movups (op0, op1));
15477 return;
15478 }
15479
15480 /* ??? If we have typed data, then it would appear that using
15481 movdqu is the only way to get unaligned data loaded with
15482 integer type. */
15483 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15484 {
15485 op0 = gen_lowpart (V16QImode, op0);
15486 op1 = gen_lowpart (V16QImode, op1);
15487 emit_insn (gen_sse2_movdqu (op0, op1));
15488 return;
15489 }
15490
15491 if (TARGET_SSE2 && mode == V2DFmode)
15492 {
15493 rtx zero;
15494
15495 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
15496 {
15497 op0 = gen_lowpart (V2DFmode, op0);
15498 op1 = gen_lowpart (V2DFmode, op1);
15499 emit_insn (gen_sse2_movupd (op0, op1));
15500 return;
15501 }
15502
15503 /* When SSE registers are split into halves, we can avoid
15504 writing to the top half twice. */
15505 if (TARGET_SSE_SPLIT_REGS)
15506 {
15507 emit_clobber (op0);
15508 zero = op0;
15509 }
15510 else
15511 {
15512 /* ??? Not sure about the best option for the Intel chips.
15513 The following would seem to satisfy; the register is
15514 entirely cleared, breaking the dependency chain. We
15515 then store to the upper half, with a dependency depth
15516 of one. A rumor has it that Intel recommends two movsd
15517 followed by an unpacklpd, but this is unconfirmed. And
15518 given that the dependency depth of the unpacklpd would
15519 still be one, I'm not sure why this would be better. */
15520 zero = CONST0_RTX (V2DFmode);
15521 }
15522
15523 m = adjust_address (op1, DFmode, 0);
15524 emit_insn (gen_sse2_loadlpd (op0, zero, m));
15525 m = adjust_address (op1, DFmode, 8);
15526 emit_insn (gen_sse2_loadhpd (op0, op0, m));
15527 }
15528 else
15529 {
15530 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
15531 {
15532 op0 = gen_lowpart (V4SFmode, op0);
15533 op1 = gen_lowpart (V4SFmode, op1);
15534 emit_insn (gen_sse_movups (op0, op1));
15535 return;
15536 }
15537
15538 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
15539 emit_move_insn (op0, CONST0_RTX (mode));
15540 else
15541 emit_clobber (op0);
15542
15543 if (mode != V4SFmode)
15544 op0 = gen_lowpart (V4SFmode, op0);
15545 m = adjust_address (op1, V2SFmode, 0);
15546 emit_insn (gen_sse_loadlps (op0, op0, m));
15547 m = adjust_address (op1, V2SFmode, 8);
15548 emit_insn (gen_sse_loadhps (op0, op0, m));
15549 }
15550 }
15551 else if (MEM_P (op0))
15552 {
15553 /* If we're optimizing for size, movups is the smallest. */
15554 if (optimize_insn_for_size_p ()
15555 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15556 {
15557 op0 = gen_lowpart (V4SFmode, op0);
15558 op1 = gen_lowpart (V4SFmode, op1);
15559 emit_insn (gen_sse_movups (op0, op1));
15560 return;
15561 }
15562
15563 /* ??? Similar to above, only less clear because of quote
15564 typeless stores unquote. */
15565 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
15566 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15567 {
15568 op0 = gen_lowpart (V16QImode, op0);
15569 op1 = gen_lowpart (V16QImode, op1);
15570 emit_insn (gen_sse2_movdqu (op0, op1));
15571 return;
15572 }
15573
15574 if (TARGET_SSE2 && mode == V2DFmode)
15575 {
15576 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
15577 {
15578 op0 = gen_lowpart (V2DFmode, op0);
15579 op1 = gen_lowpart (V2DFmode, op1);
15580 emit_insn (gen_sse2_movupd (op0, op1));
15581 }
15582 else
15583 {
15584 m = adjust_address (op0, DFmode, 0);
15585 emit_insn (gen_sse2_storelpd (m, op1));
15586 m = adjust_address (op0, DFmode, 8);
15587 emit_insn (gen_sse2_storehpd (m, op1));
15588 }
15589 }
15590 else
15591 {
15592 if (mode != V4SFmode)
15593 op1 = gen_lowpart (V4SFmode, op1);
15594
15595 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
15596 {
15597 op0 = gen_lowpart (V4SFmode, op0);
15598 emit_insn (gen_sse_movups (op0, op1));
15599 }
15600 else
15601 {
15602 m = adjust_address (op0, V2SFmode, 0);
15603 emit_insn (gen_sse_storelps (m, op1));
15604 m = adjust_address (op0, V2SFmode, 8);
15605 emit_insn (gen_sse_storehps (m, op1));
15606 }
15607 }
15608 }
15609 else
15610 gcc_unreachable ();
15611 }
15612
15613 /* Expand a push in MODE. This is some mode for which we do not support
15614 proper push instructions, at least from the registers that we expect
15615 the value to live in. */
15616
15617 void
15618 ix86_expand_push (enum machine_mode mode, rtx x)
15619 {
15620 rtx tmp;
15621
15622 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
15623 GEN_INT (-GET_MODE_SIZE (mode)),
15624 stack_pointer_rtx, 1, OPTAB_DIRECT);
15625 if (tmp != stack_pointer_rtx)
15626 emit_move_insn (stack_pointer_rtx, tmp);
15627
15628 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
15629
15630 /* When we push an operand onto stack, it has to be aligned at least
15631 at the function argument boundary. However since we don't have
15632 the argument type, we can't determine the actual argument
15633 boundary. */
15634 emit_move_insn (tmp, x);
15635 }
15636
15637 /* Helper function of ix86_fixup_binary_operands to canonicalize
15638 operand order. Returns true if the operands should be swapped. */
15639
15640 static bool
15641 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
15642 rtx operands[])
15643 {
15644 rtx dst = operands[0];
15645 rtx src1 = operands[1];
15646 rtx src2 = operands[2];
15647
15648 /* If the operation is not commutative, we can't do anything. */
15649 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
15650 return false;
15651
15652 /* Highest priority is that src1 should match dst. */
15653 if (rtx_equal_p (dst, src1))
15654 return false;
15655 if (rtx_equal_p (dst, src2))
15656 return true;
15657
15658 /* Next highest priority is that immediate constants come second. */
15659 if (immediate_operand (src2, mode))
15660 return false;
15661 if (immediate_operand (src1, mode))
15662 return true;
15663
15664 /* Lowest priority is that memory references should come second. */
15665 if (MEM_P (src2))
15666 return false;
15667 if (MEM_P (src1))
15668 return true;
15669
15670 return false;
15671 }
15672
15673
15674 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
15675 destination to use for the operation. If different from the true
15676 destination in operands[0], a copy operation will be required. */
15677
15678 rtx
15679 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
15680 rtx operands[])
15681 {
15682 rtx dst = operands[0];
15683 rtx src1 = operands[1];
15684 rtx src2 = operands[2];
15685
15686 /* Canonicalize operand order. */
15687 if (ix86_swap_binary_operands_p (code, mode, operands))
15688 {
15689 rtx temp;
15690
15691 /* It is invalid to swap operands of different modes. */
15692 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
15693
15694 temp = src1;
15695 src1 = src2;
15696 src2 = temp;
15697 }
15698
15699 /* Both source operands cannot be in memory. */
15700 if (MEM_P (src1) && MEM_P (src2))
15701 {
15702 /* Optimization: Only read from memory once. */
15703 if (rtx_equal_p (src1, src2))
15704 {
15705 src2 = force_reg (mode, src2);
15706 src1 = src2;
15707 }
15708 else
15709 src2 = force_reg (mode, src2);
15710 }
15711
15712 /* If the destination is memory, and we do not have matching source
15713 operands, do things in registers. */
15714 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
15715 dst = gen_reg_rtx (mode);
15716
15717 /* Source 1 cannot be a constant. */
15718 if (CONSTANT_P (src1))
15719 src1 = force_reg (mode, src1);
15720
15721 /* Source 1 cannot be a non-matching memory. */
15722 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
15723 src1 = force_reg (mode, src1);
15724
15725 operands[1] = src1;
15726 operands[2] = src2;
15727 return dst;
15728 }
15729
15730 /* Similarly, but assume that the destination has already been
15731 set up properly. */
15732
15733 void
15734 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
15735 enum machine_mode mode, rtx operands[])
15736 {
15737 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
15738 gcc_assert (dst == operands[0]);
15739 }
15740
15741 /* Attempt to expand a binary operator. Make the expansion closer to the
15742 actual machine, then just general_operand, which will allow 3 separate
15743 memory references (one output, two input) in a single insn. */
15744
15745 void
15746 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
15747 rtx operands[])
15748 {
15749 rtx src1, src2, dst, op, clob;
15750
15751 dst = ix86_fixup_binary_operands (code, mode, operands);
15752 src1 = operands[1];
15753 src2 = operands[2];
15754
15755 /* Emit the instruction. */
15756
15757 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
15758 if (reload_in_progress)
15759 {
15760 /* Reload doesn't know about the flags register, and doesn't know that
15761 it doesn't want to clobber it. We can only do this with PLUS. */
15762 gcc_assert (code == PLUS);
15763 emit_insn (op);
15764 }
15765 else if (reload_completed
15766 && code == PLUS
15767 && !rtx_equal_p (dst, src1))
15768 {
15769 /* This is going to be an LEA; avoid splitting it later. */
15770 emit_insn (op);
15771 }
15772 else
15773 {
15774 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15775 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
15776 }
15777
15778 /* Fix up the destination if needed. */
15779 if (dst != operands[0])
15780 emit_move_insn (operands[0], dst);
15781 }
15782
15783 /* Return TRUE or FALSE depending on whether the binary operator meets the
15784 appropriate constraints. */
15785
15786 bool
15787 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
15788 rtx operands[3])
15789 {
15790 rtx dst = operands[0];
15791 rtx src1 = operands[1];
15792 rtx src2 = operands[2];
15793
15794 /* Both source operands cannot be in memory. */
15795 if (MEM_P (src1) && MEM_P (src2))
15796 return false;
15797
15798 /* Canonicalize operand order for commutative operators. */
15799 if (ix86_swap_binary_operands_p (code, mode, operands))
15800 {
15801 rtx temp = src1;
15802 src1 = src2;
15803 src2 = temp;
15804 }
15805
15806 /* If the destination is memory, we must have a matching source operand. */
15807 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
15808 return false;
15809
15810 /* Source 1 cannot be a constant. */
15811 if (CONSTANT_P (src1))
15812 return false;
15813
15814 /* Source 1 cannot be a non-matching memory. */
15815 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
15816 /* Support "andhi/andsi/anddi" as a zero-extending move. */
15817 return (code == AND
15818 && (mode == HImode
15819 || mode == SImode
15820 || (TARGET_64BIT && mode == DImode))
15821 && satisfies_constraint_L (src2));
15822
15823 return true;
15824 }
15825
15826 /* Attempt to expand a unary operator. Make the expansion closer to the
15827 actual machine, then just general_operand, which will allow 2 separate
15828 memory references (one output, one input) in a single insn. */
15829
15830 void
15831 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
15832 rtx operands[])
15833 {
15834 int matching_memory;
15835 rtx src, dst, op, clob;
15836
15837 dst = operands[0];
15838 src = operands[1];
15839
15840 /* If the destination is memory, and we do not have matching source
15841 operands, do things in registers. */
15842 matching_memory = 0;
15843 if (MEM_P (dst))
15844 {
15845 if (rtx_equal_p (dst, src))
15846 matching_memory = 1;
15847 else
15848 dst = gen_reg_rtx (mode);
15849 }
15850
15851 /* When source operand is memory, destination must match. */
15852 if (MEM_P (src) && !matching_memory)
15853 src = force_reg (mode, src);
15854
15855 /* Emit the instruction. */
15856
15857 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
15858 if (reload_in_progress || code == NOT)
15859 {
15860 /* Reload doesn't know about the flags register, and doesn't know that
15861 it doesn't want to clobber it. */
15862 gcc_assert (code == NOT);
15863 emit_insn (op);
15864 }
15865 else
15866 {
15867 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15868 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
15869 }
15870
15871 /* Fix up the destination if needed. */
15872 if (dst != operands[0])
15873 emit_move_insn (operands[0], dst);
15874 }
15875
15876 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
15877 divisor are within the range [0-255]. */
15878
15879 void
15880 ix86_split_idivmod (enum machine_mode mode, rtx operands[],
15881 bool signed_p)
15882 {
15883 rtx end_label, qimode_label;
15884 rtx insn, div, mod;
15885 rtx scratch, tmp0, tmp1, tmp2;
15886 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
15887 rtx (*gen_zero_extend) (rtx, rtx);
15888 rtx (*gen_test_ccno_1) (rtx, rtx);
15889
15890 switch (mode)
15891 {
15892 case SImode:
15893 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
15894 gen_test_ccno_1 = gen_testsi_ccno_1;
15895 gen_zero_extend = gen_zero_extendqisi2;
15896 break;
15897 case DImode:
15898 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
15899 gen_test_ccno_1 = gen_testdi_ccno_1;
15900 gen_zero_extend = gen_zero_extendqidi2;
15901 break;
15902 default:
15903 gcc_unreachable ();
15904 }
15905
15906 end_label = gen_label_rtx ();
15907 qimode_label = gen_label_rtx ();
15908
15909 scratch = gen_reg_rtx (mode);
15910
15911 /* Use 8bit unsigned divimod if dividend and divisor are within
15912 the range [0-255]. */
15913 emit_move_insn (scratch, operands[2]);
15914 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
15915 scratch, 1, OPTAB_DIRECT);
15916 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
15917 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
15918 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
15919 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
15920 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
15921 pc_rtx);
15922 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
15923 predict_jump (REG_BR_PROB_BASE * 50 / 100);
15924 JUMP_LABEL (insn) = qimode_label;
15925
15926 /* Generate original signed/unsigned divimod. */
15927 div = gen_divmod4_1 (operands[0], operands[1],
15928 operands[2], operands[3]);
15929 emit_insn (div);
15930
15931 /* Branch to the end. */
15932 emit_jump_insn (gen_jump (end_label));
15933 emit_barrier ();
15934
15935 /* Generate 8bit unsigned divide. */
15936 emit_label (qimode_label);
15937 /* Don't use operands[0] for result of 8bit divide since not all
15938 registers support QImode ZERO_EXTRACT. */
15939 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
15940 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
15941 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
15942 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
15943
15944 if (signed_p)
15945 {
15946 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
15947 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
15948 }
15949 else
15950 {
15951 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
15952 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
15953 }
15954
15955 /* Extract remainder from AH. */
15956 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
15957 if (REG_P (operands[1]))
15958 insn = emit_move_insn (operands[1], tmp1);
15959 else
15960 {
15961 /* Need a new scratch register since the old one has result
15962 of 8bit divide. */
15963 scratch = gen_reg_rtx (mode);
15964 emit_move_insn (scratch, tmp1);
15965 insn = emit_move_insn (operands[1], scratch);
15966 }
15967 set_unique_reg_note (insn, REG_EQUAL, mod);
15968
15969 /* Zero extend quotient from AL. */
15970 tmp1 = gen_lowpart (QImode, tmp0);
15971 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
15972 set_unique_reg_note (insn, REG_EQUAL, div);
15973
15974 emit_label (end_label);
15975 }
15976
15977 #define LEA_MAX_STALL (3)
15978 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
15979
15980 /* Increase given DISTANCE in half-cycles according to
15981 dependencies between PREV and NEXT instructions.
15982 Add 1 half-cycle if there is no dependency and
15983 go to next cycle if there is some dependecy. */
15984
15985 static unsigned int
15986 increase_distance (rtx prev, rtx next, unsigned int distance)
15987 {
15988 df_ref *use_rec;
15989 df_ref *def_rec;
15990
15991 if (!prev || !next)
15992 return distance + (distance & 1) + 2;
15993
15994 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
15995 return distance + 1;
15996
15997 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
15998 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
15999 if (!DF_REF_IS_ARTIFICIAL (*def_rec)
16000 && DF_REF_REGNO (*use_rec) == DF_REF_REGNO (*def_rec))
16001 return distance + (distance & 1) + 2;
16002
16003 return distance + 1;
16004 }
16005
16006 /* Function checks if instruction INSN defines register number
16007 REGNO1 or REGNO2. */
16008
16009 static bool
16010 insn_defines_reg (unsigned int regno1, unsigned int regno2,
16011 rtx insn)
16012 {
16013 df_ref *def_rec;
16014
16015 for (def_rec = DF_INSN_DEFS (insn); *def_rec; def_rec++)
16016 if (DF_REF_REG_DEF_P (*def_rec)
16017 && !DF_REF_IS_ARTIFICIAL (*def_rec)
16018 && (regno1 == DF_REF_REGNO (*def_rec)
16019 || regno2 == DF_REF_REGNO (*def_rec)))
16020 {
16021 return true;
16022 }
16023
16024 return false;
16025 }
16026
16027 /* Function checks if instruction INSN uses register number
16028 REGNO as a part of address expression. */
16029
16030 static bool
16031 insn_uses_reg_mem (unsigned int regno, rtx insn)
16032 {
16033 df_ref *use_rec;
16034
16035 for (use_rec = DF_INSN_USES (insn); *use_rec; use_rec++)
16036 if (DF_REF_REG_MEM_P (*use_rec) && regno == DF_REF_REGNO (*use_rec))
16037 return true;
16038
16039 return false;
16040 }
16041
16042 /* Search backward for non-agu definition of register number REGNO1
16043 or register number REGNO2 in basic block starting from instruction
16044 START up to head of basic block or instruction INSN.
16045
16046 Function puts true value into *FOUND var if definition was found
16047 and false otherwise.
16048
16049 Distance in half-cycles between START and found instruction or head
16050 of BB is added to DISTANCE and returned. */
16051
16052 static int
16053 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
16054 rtx insn, int distance,
16055 rtx start, bool *found)
16056 {
16057 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
16058 rtx prev = start;
16059 rtx next = NULL;
16060 enum attr_type insn_type;
16061
16062 *found = false;
16063
16064 while (prev
16065 && prev != insn
16066 && distance < LEA_SEARCH_THRESHOLD)
16067 {
16068 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
16069 {
16070 distance = increase_distance (prev, next, distance);
16071 if (insn_defines_reg (regno1, regno2, prev))
16072 {
16073 insn_type = get_attr_type (prev);
16074 if (insn_type != TYPE_LEA)
16075 {
16076 *found = true;
16077 return distance;
16078 }
16079 }
16080
16081 next = prev;
16082 }
16083 if (prev == BB_HEAD (bb))
16084 break;
16085
16086 prev = PREV_INSN (prev);
16087 }
16088
16089 return distance;
16090 }
16091
16092 /* Search backward for non-agu definition of register number REGNO1
16093 or register number REGNO2 in INSN's basic block until
16094 1. Pass LEA_SEARCH_THRESHOLD instructions, or
16095 2. Reach neighbour BBs boundary, or
16096 3. Reach agu definition.
16097 Returns the distance between the non-agu definition point and INSN.
16098 If no definition point, returns -1. */
16099
16100 static int
16101 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
16102 rtx insn)
16103 {
16104 basic_block bb = BLOCK_FOR_INSN (insn);
16105 int distance = 0;
16106 bool found = false;
16107
16108 if (insn != BB_HEAD (bb))
16109 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
16110 distance, PREV_INSN (insn),
16111 &found);
16112
16113 if (!found && distance < LEA_SEARCH_THRESHOLD)
16114 {
16115 edge e;
16116 edge_iterator ei;
16117 bool simple_loop = false;
16118
16119 FOR_EACH_EDGE (e, ei, bb->preds)
16120 if (e->src == bb)
16121 {
16122 simple_loop = true;
16123 break;
16124 }
16125
16126 if (simple_loop)
16127 distance = distance_non_agu_define_in_bb (regno1, regno2,
16128 insn, distance,
16129 BB_END (bb), &found);
16130 else
16131 {
16132 int shortest_dist = -1;
16133 bool found_in_bb = false;
16134
16135 FOR_EACH_EDGE (e, ei, bb->preds)
16136 {
16137 int bb_dist = distance_non_agu_define_in_bb (regno1, regno2,
16138 insn, distance,
16139 BB_END (e->src),
16140 &found_in_bb);
16141 if (found_in_bb)
16142 {
16143 if (shortest_dist < 0)
16144 shortest_dist = bb_dist;
16145 else if (bb_dist > 0)
16146 shortest_dist = MIN (bb_dist, shortest_dist);
16147 }
16148
16149 found = found || found_in_bb;
16150 }
16151
16152 distance = shortest_dist;
16153 }
16154 }
16155
16156 /* get_attr_type may modify recog data. We want to make sure
16157 that recog data is valid for instruction INSN, on which
16158 distance_non_agu_define is called. INSN is unchanged here. */
16159 extract_insn_cached (insn);
16160
16161 if (!found)
16162 distance = -1;
16163 else
16164 distance = distance >> 1;
16165
16166 return distance;
16167 }
16168
16169 /* Return the distance in half-cycles between INSN and the next
16170 insn that uses register number REGNO in memory address added
16171 to DISTANCE. Return -1 if REGNO0 is set.
16172
16173 Put true value into *FOUND if register usage was found and
16174 false otherwise.
16175 Put true value into *REDEFINED if register redefinition was
16176 found and false otherwise. */
16177
16178 static int
16179 distance_agu_use_in_bb(unsigned int regno,
16180 rtx insn, int distance, rtx start,
16181 bool *found, bool *redefined)
16182 {
16183 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
16184 rtx next = start;
16185 rtx prev = NULL;
16186
16187 *found = false;
16188 *redefined = false;
16189
16190 while (next
16191 && next != insn
16192 && distance < LEA_SEARCH_THRESHOLD)
16193 {
16194 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
16195 {
16196 distance = increase_distance(prev, next, distance);
16197 if (insn_uses_reg_mem (regno, next))
16198 {
16199 /* Return DISTANCE if OP0 is used in memory
16200 address in NEXT. */
16201 *found = true;
16202 return distance;
16203 }
16204
16205 if (insn_defines_reg (regno, INVALID_REGNUM, next))
16206 {
16207 /* Return -1 if OP0 is set in NEXT. */
16208 *redefined = true;
16209 return -1;
16210 }
16211
16212 prev = next;
16213 }
16214
16215 if (next == BB_END (bb))
16216 break;
16217
16218 next = NEXT_INSN (next);
16219 }
16220
16221 return distance;
16222 }
16223
16224 /* Return the distance between INSN and the next insn that uses
16225 register number REGNO0 in memory address. Return -1 if no such
16226 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
16227
16228 static int
16229 distance_agu_use (unsigned int regno0, rtx insn)
16230 {
16231 basic_block bb = BLOCK_FOR_INSN (insn);
16232 int distance = 0;
16233 bool found = false;
16234 bool redefined = false;
16235
16236 if (insn != BB_END (bb))
16237 distance = distance_agu_use_in_bb (regno0, insn, distance,
16238 NEXT_INSN (insn),
16239 &found, &redefined);
16240
16241 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
16242 {
16243 edge e;
16244 edge_iterator ei;
16245 bool simple_loop = false;
16246
16247 FOR_EACH_EDGE (e, ei, bb->succs)
16248 if (e->dest == bb)
16249 {
16250 simple_loop = true;
16251 break;
16252 }
16253
16254 if (simple_loop)
16255 distance = distance_agu_use_in_bb (regno0, insn,
16256 distance, BB_HEAD (bb),
16257 &found, &redefined);
16258 else
16259 {
16260 int shortest_dist = -1;
16261 bool found_in_bb = false;
16262 bool redefined_in_bb = false;
16263
16264 FOR_EACH_EDGE (e, ei, bb->succs)
16265 {
16266 int bb_dist = distance_agu_use_in_bb (regno0, insn,
16267 distance, BB_HEAD (e->dest),
16268 &found_in_bb, &redefined_in_bb);
16269 if (found_in_bb)
16270 {
16271 if (shortest_dist < 0)
16272 shortest_dist = bb_dist;
16273 else if (bb_dist > 0)
16274 shortest_dist = MIN (bb_dist, shortest_dist);
16275 }
16276
16277 found = found || found_in_bb;
16278 }
16279
16280 distance = shortest_dist;
16281 }
16282 }
16283
16284 if (!found || redefined)
16285 distance = -1;
16286 else
16287 distance = distance >> 1;
16288
16289 return distance;
16290 }
16291
16292 /* Define this macro to tune LEA priority vs ADD, it take effect when
16293 there is a dilemma of choicing LEA or ADD
16294 Negative value: ADD is more preferred than LEA
16295 Zero: Netrual
16296 Positive value: LEA is more preferred than ADD*/
16297 #define IX86_LEA_PRIORITY 0
16298
16299 /* Return true if usage of lea INSN has performance advantage
16300 over a sequence of instructions. Instructions sequence has
16301 SPLIT_COST cycles higher latency than lea latency. */
16302
16303 bool
16304 ix86_lea_outperforms (rtx insn, unsigned int regno0, unsigned int regno1,
16305 unsigned int regno2, unsigned int split_cost)
16306 {
16307 int dist_define, dist_use;
16308
16309 dist_define = distance_non_agu_define (regno1, regno2, insn);
16310 dist_use = distance_agu_use (regno0, insn);
16311
16312 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
16313 {
16314 /* If there is no non AGU operand definition, no AGU
16315 operand usage and split cost is 0 then both lea
16316 and non lea variants have same priority. Currently
16317 we prefer lea for 64 bit code and non lea on 32 bit
16318 code. */
16319 if (dist_use < 0 && split_cost == 0)
16320 return TARGET_64BIT || IX86_LEA_PRIORITY;
16321 else
16322 return true;
16323 }
16324
16325 /* With longer definitions distance lea is more preferable.
16326 Here we change it to take into account splitting cost and
16327 lea priority. */
16328 dist_define += split_cost + IX86_LEA_PRIORITY;
16329
16330 /* If there is no use in memory addess then we just check
16331 that split cost does not exceed AGU stall. */
16332 if (dist_use < 0)
16333 return dist_define >= LEA_MAX_STALL;
16334
16335 /* If this insn has both backward non-agu dependence and forward
16336 agu dependence, the one with short distance takes effect. */
16337 return dist_define >= dist_use;
16338 }
16339
16340 /* Return true if it is legal to clobber flags by INSN and
16341 false otherwise. */
16342
16343 static bool
16344 ix86_ok_to_clobber_flags(rtx insn)
16345 {
16346 basic_block bb = BLOCK_FOR_INSN (insn);
16347 df_ref *use;
16348 bitmap live;
16349
16350 while (insn)
16351 {
16352 if (NONDEBUG_INSN_P (insn))
16353 {
16354 for (use = DF_INSN_USES (insn); *use; use++)
16355 if (DF_REF_REG_USE_P (*use) && DF_REF_REGNO (*use) == FLAGS_REG)
16356 return false;
16357
16358 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
16359 return true;
16360 }
16361
16362 if (insn == BB_END (bb))
16363 break;
16364
16365 insn = NEXT_INSN (insn);
16366 }
16367
16368 live = df_get_live_out(bb);
16369 return !REGNO_REG_SET_P (live, FLAGS_REG);
16370 }
16371
16372 /* Return true if we need to split op0 = op1 + op2 into a sequence of
16373 move and add to avoid AGU stalls. */
16374
16375 bool
16376 ix86_avoid_lea_for_add (rtx insn, rtx operands[])
16377 {
16378 unsigned int regno0 = true_regnum (operands[0]);
16379 unsigned int regno1 = true_regnum (operands[1]);
16380 unsigned int regno2 = true_regnum (operands[2]);
16381
16382 /* Check if we need to optimize. */
16383 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
16384 return false;
16385
16386 /* Check it is correct to split here. */
16387 if (!ix86_ok_to_clobber_flags(insn))
16388 return false;
16389
16390 /* We need to split only adds with non destructive
16391 destination operand. */
16392 if (regno0 == regno1 || regno0 == regno2)
16393 return false;
16394 else
16395 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1);
16396 }
16397
16398 /* Return true if we need to split lea into a sequence of
16399 instructions to avoid AGU stalls. */
16400
16401 bool
16402 ix86_avoid_lea_for_addr (rtx insn, rtx operands[])
16403 {
16404 unsigned int regno0 = true_regnum (operands[0]) ;
16405 unsigned int regno1 = -1;
16406 unsigned int regno2 = -1;
16407 unsigned int split_cost = 0;
16408 struct ix86_address parts;
16409 int ok;
16410
16411 /* Check we need to optimize. */
16412 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
16413 return false;
16414
16415 /* Check it is correct to split here. */
16416 if (!ix86_ok_to_clobber_flags(insn))
16417 return false;
16418
16419 ok = ix86_decompose_address (operands[1], &parts);
16420 gcc_assert (ok);
16421
16422 /* We should not split into add if non legitimate pic
16423 operand is used as displacement. */
16424 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
16425 return false;
16426
16427 if (parts.base)
16428 regno1 = true_regnum (parts.base);
16429 if (parts.index)
16430 regno2 = true_regnum (parts.index);
16431
16432 /* Compute how many cycles we will add to execution time
16433 if split lea into a sequence of instructions. */
16434 if (parts.base || parts.index)
16435 {
16436 /* Have to use mov instruction if non desctructive
16437 destination form is used. */
16438 if (regno1 != regno0 && regno2 != regno0)
16439 split_cost += 1;
16440
16441 /* Have to add index to base if both exist. */
16442 if (parts.base && parts.index)
16443 split_cost += 1;
16444
16445 /* Have to use shift and adds if scale is 2 or greater. */
16446 if (parts.scale > 1)
16447 {
16448 if (regno0 != regno1)
16449 split_cost += 1;
16450 else if (regno2 == regno0)
16451 split_cost += 4;
16452 else
16453 split_cost += parts.scale;
16454 }
16455
16456 /* Have to use add instruction with immediate if
16457 disp is non zero. */
16458 if (parts.disp && parts.disp != const0_rtx)
16459 split_cost += 1;
16460
16461 /* Subtract the price of lea. */
16462 split_cost -= 1;
16463 }
16464
16465 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost);
16466 }
16467
16468 /* Split lea instructions into a sequence of instructions
16469 which are executed on ALU to avoid AGU stalls.
16470 It is assumed that it is allowed to clobber flags register
16471 at lea position. */
16472
16473 extern void
16474 ix86_split_lea_for_addr (rtx operands[], enum machine_mode mode)
16475 {
16476 unsigned int regno0 = true_regnum (operands[0]) ;
16477 unsigned int regno1 = INVALID_REGNUM;
16478 unsigned int regno2 = INVALID_REGNUM;
16479 struct ix86_address parts;
16480 rtx tmp, clob;
16481 rtvec par;
16482 int ok, adds;
16483
16484 ok = ix86_decompose_address (operands[1], &parts);
16485 gcc_assert (ok);
16486
16487 if (parts.base)
16488 {
16489 if (GET_MODE (parts.base) != mode)
16490 parts.base = gen_rtx_SUBREG (mode, parts.base, 0);
16491 regno1 = true_regnum (parts.base);
16492 }
16493
16494 if (parts.index)
16495 {
16496 if (GET_MODE (parts.index) != mode)
16497 parts.index = gen_rtx_SUBREG (mode, parts.index, 0);
16498 regno2 = true_regnum (parts.index);
16499 }
16500
16501 if (parts.scale > 1)
16502 {
16503 /* Case r1 = r1 + ... */
16504 if (regno1 == regno0)
16505 {
16506 /* If we have a case r1 = r1 + C * r1 then we
16507 should use multiplication which is very
16508 expensive. Assume cost model is wrong if we
16509 have such case here. */
16510 gcc_assert (regno2 != regno0);
16511
16512 for (adds = parts.scale; adds > 0; adds--)
16513 {
16514 tmp = gen_rtx_PLUS (mode, operands[0], parts.index);
16515 tmp = gen_rtx_SET (VOIDmode, operands[0], tmp);
16516 clob = gen_rtx_CLOBBER (VOIDmode,
16517 gen_rtx_REG (CCmode, FLAGS_REG));
16518 par = gen_rtvec (2, tmp, clob);
16519 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
16520 }
16521 }
16522 else
16523 {
16524 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
16525 if (regno0 != regno2)
16526 emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.index));
16527
16528 /* Use shift for scaling. */
16529 tmp = gen_rtx_ASHIFT (mode, operands[0],
16530 GEN_INT (exact_log2 (parts.scale)));
16531 tmp = gen_rtx_SET (VOIDmode, operands[0], tmp);
16532 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16533 par = gen_rtvec (2, tmp, clob);
16534 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
16535
16536 if (parts.base)
16537 {
16538 tmp = gen_rtx_PLUS (mode, operands[0], parts.base);
16539 tmp = gen_rtx_SET (VOIDmode, operands[0], tmp);
16540 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16541 par = gen_rtvec (2, tmp, clob);
16542 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
16543 }
16544
16545 if (parts.disp && parts.disp != const0_rtx)
16546 {
16547 tmp = gen_rtx_PLUS (mode, operands[0], parts.disp);
16548 tmp = gen_rtx_SET (VOIDmode, operands[0], tmp);
16549 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16550 par = gen_rtvec (2, tmp, clob);
16551 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
16552 }
16553 }
16554 }
16555 else if (!parts.base && !parts.index)
16556 {
16557 gcc_assert(parts.disp);
16558 emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.disp));
16559 }
16560 else
16561 {
16562 if (!parts.base)
16563 {
16564 if (regno0 != regno2)
16565 emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.index));
16566 }
16567 else if (!parts.index)
16568 {
16569 if (regno0 != regno1)
16570 emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.base));
16571 }
16572 else
16573 {
16574 if (regno0 == regno1)
16575 tmp = gen_rtx_PLUS (mode, operands[0], parts.index);
16576 else if (regno0 == regno2)
16577 tmp = gen_rtx_PLUS (mode, operands[0], parts.base);
16578 else
16579 {
16580 emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.base));
16581 tmp = gen_rtx_PLUS (mode, operands[0], parts.index);
16582 }
16583
16584 tmp = gen_rtx_SET (VOIDmode, operands[0], tmp);
16585 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16586 par = gen_rtvec (2, tmp, clob);
16587 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
16588 }
16589
16590 if (parts.disp && parts.disp != const0_rtx)
16591 {
16592 tmp = gen_rtx_PLUS (mode, operands[0], parts.disp);
16593 tmp = gen_rtx_SET (VOIDmode, operands[0], tmp);
16594 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16595 par = gen_rtvec (2, tmp, clob);
16596 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
16597 }
16598 }
16599 }
16600
16601 /* Return true if it is ok to optimize an ADD operation to LEA
16602 operation to avoid flag register consumation. For most processors,
16603 ADD is faster than LEA. For the processors like ATOM, if the
16604 destination register of LEA holds an actual address which will be
16605 used soon, LEA is better and otherwise ADD is better. */
16606
16607 bool
16608 ix86_lea_for_add_ok (rtx insn, rtx operands[])
16609 {
16610 unsigned int regno0 = true_regnum (operands[0]);
16611 unsigned int regno1 = true_regnum (operands[1]);
16612 unsigned int regno2 = true_regnum (operands[2]);
16613
16614 /* If a = b + c, (a!=b && a!=c), must use lea form. */
16615 if (regno0 != regno1 && regno0 != regno2)
16616 return true;
16617
16618 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
16619 return false;
16620
16621 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0);
16622 }
16623
16624 /* Return true if destination reg of SET_BODY is shift count of
16625 USE_BODY. */
16626
16627 static bool
16628 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
16629 {
16630 rtx set_dest;
16631 rtx shift_rtx;
16632 int i;
16633
16634 /* Retrieve destination of SET_BODY. */
16635 switch (GET_CODE (set_body))
16636 {
16637 case SET:
16638 set_dest = SET_DEST (set_body);
16639 if (!set_dest || !REG_P (set_dest))
16640 return false;
16641 break;
16642 case PARALLEL:
16643 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
16644 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
16645 use_body))
16646 return true;
16647 default:
16648 return false;
16649 break;
16650 }
16651
16652 /* Retrieve shift count of USE_BODY. */
16653 switch (GET_CODE (use_body))
16654 {
16655 case SET:
16656 shift_rtx = XEXP (use_body, 1);
16657 break;
16658 case PARALLEL:
16659 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
16660 if (ix86_dep_by_shift_count_body (set_body,
16661 XVECEXP (use_body, 0, i)))
16662 return true;
16663 default:
16664 return false;
16665 break;
16666 }
16667
16668 if (shift_rtx
16669 && (GET_CODE (shift_rtx) == ASHIFT
16670 || GET_CODE (shift_rtx) == LSHIFTRT
16671 || GET_CODE (shift_rtx) == ASHIFTRT
16672 || GET_CODE (shift_rtx) == ROTATE
16673 || GET_CODE (shift_rtx) == ROTATERT))
16674 {
16675 rtx shift_count = XEXP (shift_rtx, 1);
16676
16677 /* Return true if shift count is dest of SET_BODY. */
16678 if (REG_P (shift_count)
16679 && true_regnum (set_dest) == true_regnum (shift_count))
16680 return true;
16681 }
16682
16683 return false;
16684 }
16685
16686 /* Return true if destination reg of SET_INSN is shift count of
16687 USE_INSN. */
16688
16689 bool
16690 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
16691 {
16692 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
16693 PATTERN (use_insn));
16694 }
16695
16696 /* Return TRUE or FALSE depending on whether the unary operator meets the
16697 appropriate constraints. */
16698
16699 bool
16700 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
16701 enum machine_mode mode ATTRIBUTE_UNUSED,
16702 rtx operands[2] ATTRIBUTE_UNUSED)
16703 {
16704 /* If one of operands is memory, source and destination must match. */
16705 if ((MEM_P (operands[0])
16706 || MEM_P (operands[1]))
16707 && ! rtx_equal_p (operands[0], operands[1]))
16708 return false;
16709 return true;
16710 }
16711
16712 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
16713 are ok, keeping in mind the possible movddup alternative. */
16714
16715 bool
16716 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
16717 {
16718 if (MEM_P (operands[0]))
16719 return rtx_equal_p (operands[0], operands[1 + high]);
16720 if (MEM_P (operands[1]) && MEM_P (operands[2]))
16721 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
16722 return true;
16723 }
16724
16725 /* Post-reload splitter for converting an SF or DFmode value in an
16726 SSE register into an unsigned SImode. */
16727
16728 void
16729 ix86_split_convert_uns_si_sse (rtx operands[])
16730 {
16731 enum machine_mode vecmode;
16732 rtx value, large, zero_or_two31, input, two31, x;
16733
16734 large = operands[1];
16735 zero_or_two31 = operands[2];
16736 input = operands[3];
16737 two31 = operands[4];
16738 vecmode = GET_MODE (large);
16739 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
16740
16741 /* Load up the value into the low element. We must ensure that the other
16742 elements are valid floats -- zero is the easiest such value. */
16743 if (MEM_P (input))
16744 {
16745 if (vecmode == V4SFmode)
16746 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
16747 else
16748 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
16749 }
16750 else
16751 {
16752 input = gen_rtx_REG (vecmode, REGNO (input));
16753 emit_move_insn (value, CONST0_RTX (vecmode));
16754 if (vecmode == V4SFmode)
16755 emit_insn (gen_sse_movss (value, value, input));
16756 else
16757 emit_insn (gen_sse2_movsd (value, value, input));
16758 }
16759
16760 emit_move_insn (large, two31);
16761 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
16762
16763 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
16764 emit_insn (gen_rtx_SET (VOIDmode, large, x));
16765
16766 x = gen_rtx_AND (vecmode, zero_or_two31, large);
16767 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
16768
16769 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
16770 emit_insn (gen_rtx_SET (VOIDmode, value, x));
16771
16772 large = gen_rtx_REG (V4SImode, REGNO (large));
16773 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
16774
16775 x = gen_rtx_REG (V4SImode, REGNO (value));
16776 if (vecmode == V4SFmode)
16777 emit_insn (gen_sse2_cvttps2dq (x, value));
16778 else
16779 emit_insn (gen_sse2_cvttpd2dq (x, value));
16780 value = x;
16781
16782 emit_insn (gen_xorv4si3 (value, value, large));
16783 }
16784
16785 /* Convert an unsigned DImode value into a DFmode, using only SSE.
16786 Expects the 64-bit DImode to be supplied in a pair of integral
16787 registers. Requires SSE2; will use SSE3 if available. For x86_32,
16788 -mfpmath=sse, !optimize_size only. */
16789
16790 void
16791 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
16792 {
16793 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
16794 rtx int_xmm, fp_xmm;
16795 rtx biases, exponents;
16796 rtx x;
16797
16798 int_xmm = gen_reg_rtx (V4SImode);
16799 if (TARGET_INTER_UNIT_MOVES)
16800 emit_insn (gen_movdi_to_sse (int_xmm, input));
16801 else if (TARGET_SSE_SPLIT_REGS)
16802 {
16803 emit_clobber (int_xmm);
16804 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
16805 }
16806 else
16807 {
16808 x = gen_reg_rtx (V2DImode);
16809 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
16810 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
16811 }
16812
16813 x = gen_rtx_CONST_VECTOR (V4SImode,
16814 gen_rtvec (4, GEN_INT (0x43300000UL),
16815 GEN_INT (0x45300000UL),
16816 const0_rtx, const0_rtx));
16817 exponents = validize_mem (force_const_mem (V4SImode, x));
16818
16819 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
16820 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
16821
16822 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
16823 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
16824 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
16825 (0x1.0p84 + double(fp_value_hi_xmm)).
16826 Note these exponents differ by 32. */
16827
16828 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
16829
16830 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
16831 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
16832 real_ldexp (&bias_lo_rvt, &dconst1, 52);
16833 real_ldexp (&bias_hi_rvt, &dconst1, 84);
16834 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
16835 x = const_double_from_real_value (bias_hi_rvt, DFmode);
16836 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
16837 biases = validize_mem (force_const_mem (V2DFmode, biases));
16838 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
16839
16840 /* Add the upper and lower DFmode values together. */
16841 if (TARGET_SSE3)
16842 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
16843 else
16844 {
16845 x = copy_to_mode_reg (V2DFmode, fp_xmm);
16846 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
16847 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
16848 }
16849
16850 ix86_expand_vector_extract (false, target, fp_xmm, 0);
16851 }
16852
16853 /* Not used, but eases macroization of patterns. */
16854 void
16855 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
16856 rtx input ATTRIBUTE_UNUSED)
16857 {
16858 gcc_unreachable ();
16859 }
16860
16861 /* Convert an unsigned SImode value into a DFmode. Only currently used
16862 for SSE, but applicable anywhere. */
16863
16864 void
16865 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
16866 {
16867 REAL_VALUE_TYPE TWO31r;
16868 rtx x, fp;
16869
16870 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
16871 NULL, 1, OPTAB_DIRECT);
16872
16873 fp = gen_reg_rtx (DFmode);
16874 emit_insn (gen_floatsidf2 (fp, x));
16875
16876 real_ldexp (&TWO31r, &dconst1, 31);
16877 x = const_double_from_real_value (TWO31r, DFmode);
16878
16879 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
16880 if (x != target)
16881 emit_move_insn (target, x);
16882 }
16883
16884 /* Convert a signed DImode value into a DFmode. Only used for SSE in
16885 32-bit mode; otherwise we have a direct convert instruction. */
16886
16887 void
16888 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
16889 {
16890 REAL_VALUE_TYPE TWO32r;
16891 rtx fp_lo, fp_hi, x;
16892
16893 fp_lo = gen_reg_rtx (DFmode);
16894 fp_hi = gen_reg_rtx (DFmode);
16895
16896 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
16897
16898 real_ldexp (&TWO32r, &dconst1, 32);
16899 x = const_double_from_real_value (TWO32r, DFmode);
16900 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
16901
16902 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
16903
16904 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
16905 0, OPTAB_DIRECT);
16906 if (x != target)
16907 emit_move_insn (target, x);
16908 }
16909
16910 /* Convert an unsigned SImode value into a SFmode, using only SSE.
16911 For x86_32, -mfpmath=sse, !optimize_size only. */
16912 void
16913 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
16914 {
16915 REAL_VALUE_TYPE ONE16r;
16916 rtx fp_hi, fp_lo, int_hi, int_lo, x;
16917
16918 real_ldexp (&ONE16r, &dconst1, 16);
16919 x = const_double_from_real_value (ONE16r, SFmode);
16920 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
16921 NULL, 0, OPTAB_DIRECT);
16922 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
16923 NULL, 0, OPTAB_DIRECT);
16924 fp_hi = gen_reg_rtx (SFmode);
16925 fp_lo = gen_reg_rtx (SFmode);
16926 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
16927 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
16928 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
16929 0, OPTAB_DIRECT);
16930 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
16931 0, OPTAB_DIRECT);
16932 if (!rtx_equal_p (target, fp_hi))
16933 emit_move_insn (target, fp_hi);
16934 }
16935
16936 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
16937 then replicate the value for all elements of the vector
16938 register. */
16939
16940 rtx
16941 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
16942 {
16943 int i, n_elt;
16944 rtvec v;
16945 enum machine_mode scalar_mode;
16946
16947 switch (mode)
16948 {
16949 case V8SImode:
16950 case V4SImode:
16951 case V4DImode:
16952 case V2DImode:
16953 gcc_assert (vect);
16954 case V8SFmode:
16955 case V4SFmode:
16956 case V4DFmode:
16957 case V2DFmode:
16958 n_elt = GET_MODE_NUNITS (mode);
16959 v = rtvec_alloc (n_elt);
16960 scalar_mode = GET_MODE_INNER (mode);
16961
16962 RTVEC_ELT (v, 0) = value;
16963
16964 for (i = 1; i < n_elt; ++i)
16965 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
16966
16967 return gen_rtx_CONST_VECTOR (mode, v);
16968
16969 default:
16970 gcc_unreachable ();
16971 }
16972 }
16973
16974 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
16975 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
16976 for an SSE register. If VECT is true, then replicate the mask for
16977 all elements of the vector register. If INVERT is true, then create
16978 a mask excluding the sign bit. */
16979
16980 rtx
16981 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
16982 {
16983 enum machine_mode vec_mode, imode;
16984 HOST_WIDE_INT hi, lo;
16985 int shift = 63;
16986 rtx v;
16987 rtx mask;
16988
16989 /* Find the sign bit, sign extended to 2*HWI. */
16990 switch (mode)
16991 {
16992 case V8SImode:
16993 case V4SImode:
16994 case V8SFmode:
16995 case V4SFmode:
16996 vec_mode = mode;
16997 mode = GET_MODE_INNER (mode);
16998 imode = SImode;
16999 lo = 0x80000000, hi = lo < 0;
17000 break;
17001
17002 case V4DImode:
17003 case V2DImode:
17004 case V4DFmode:
17005 case V2DFmode:
17006 vec_mode = mode;
17007 mode = GET_MODE_INNER (mode);
17008 imode = DImode;
17009 if (HOST_BITS_PER_WIDE_INT >= 64)
17010 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
17011 else
17012 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
17013 break;
17014
17015 case TImode:
17016 case TFmode:
17017 vec_mode = VOIDmode;
17018 if (HOST_BITS_PER_WIDE_INT >= 64)
17019 {
17020 imode = TImode;
17021 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
17022 }
17023 else
17024 {
17025 rtvec vec;
17026
17027 imode = DImode;
17028 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
17029
17030 if (invert)
17031 {
17032 lo = ~lo, hi = ~hi;
17033 v = constm1_rtx;
17034 }
17035 else
17036 v = const0_rtx;
17037
17038 mask = immed_double_const (lo, hi, imode);
17039
17040 vec = gen_rtvec (2, v, mask);
17041 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
17042 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
17043
17044 return v;
17045 }
17046 break;
17047
17048 default:
17049 gcc_unreachable ();
17050 }
17051
17052 if (invert)
17053 lo = ~lo, hi = ~hi;
17054
17055 /* Force this value into the low part of a fp vector constant. */
17056 mask = immed_double_const (lo, hi, imode);
17057 mask = gen_lowpart (mode, mask);
17058
17059 if (vec_mode == VOIDmode)
17060 return force_reg (mode, mask);
17061
17062 v = ix86_build_const_vector (vec_mode, vect, mask);
17063 return force_reg (vec_mode, v);
17064 }
17065
17066 /* Generate code for floating point ABS or NEG. */
17067
17068 void
17069 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
17070 rtx operands[])
17071 {
17072 rtx mask, set, dst, src;
17073 bool use_sse = false;
17074 bool vector_mode = VECTOR_MODE_P (mode);
17075 enum machine_mode vmode = mode;
17076
17077 if (vector_mode)
17078 use_sse = true;
17079 else if (mode == TFmode)
17080 use_sse = true;
17081 else if (TARGET_SSE_MATH)
17082 {
17083 use_sse = SSE_FLOAT_MODE_P (mode);
17084 if (mode == SFmode)
17085 vmode = V4SFmode;
17086 else if (mode == DFmode)
17087 vmode = V2DFmode;
17088 }
17089
17090 /* NEG and ABS performed with SSE use bitwise mask operations.
17091 Create the appropriate mask now. */
17092 if (use_sse)
17093 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
17094 else
17095 mask = NULL_RTX;
17096
17097 dst = operands[0];
17098 src = operands[1];
17099
17100 set = gen_rtx_fmt_e (code, mode, src);
17101 set = gen_rtx_SET (VOIDmode, dst, set);
17102
17103 if (mask)
17104 {
17105 rtx use, clob;
17106 rtvec par;
17107
17108 use = gen_rtx_USE (VOIDmode, mask);
17109 if (vector_mode)
17110 par = gen_rtvec (2, set, use);
17111 else
17112 {
17113 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17114 par = gen_rtvec (3, set, use, clob);
17115 }
17116 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
17117 }
17118 else
17119 emit_insn (set);
17120 }
17121
17122 /* Expand a copysign operation. Special case operand 0 being a constant. */
17123
17124 void
17125 ix86_expand_copysign (rtx operands[])
17126 {
17127 enum machine_mode mode, vmode;
17128 rtx dest, op0, op1, mask, nmask;
17129
17130 dest = operands[0];
17131 op0 = operands[1];
17132 op1 = operands[2];
17133
17134 mode = GET_MODE (dest);
17135
17136 if (mode == SFmode)
17137 vmode = V4SFmode;
17138 else if (mode == DFmode)
17139 vmode = V2DFmode;
17140 else
17141 vmode = mode;
17142
17143 if (GET_CODE (op0) == CONST_DOUBLE)
17144 {
17145 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
17146
17147 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
17148 op0 = simplify_unary_operation (ABS, mode, op0, mode);
17149
17150 if (mode == SFmode || mode == DFmode)
17151 {
17152 if (op0 == CONST0_RTX (mode))
17153 op0 = CONST0_RTX (vmode);
17154 else
17155 {
17156 rtx v = ix86_build_const_vector (vmode, false, op0);
17157
17158 op0 = force_reg (vmode, v);
17159 }
17160 }
17161 else if (op0 != CONST0_RTX (mode))
17162 op0 = force_reg (mode, op0);
17163
17164 mask = ix86_build_signbit_mask (vmode, 0, 0);
17165
17166 if (mode == SFmode)
17167 copysign_insn = gen_copysignsf3_const;
17168 else if (mode == DFmode)
17169 copysign_insn = gen_copysigndf3_const;
17170 else
17171 copysign_insn = gen_copysigntf3_const;
17172
17173 emit_insn (copysign_insn (dest, op0, op1, mask));
17174 }
17175 else
17176 {
17177 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
17178
17179 nmask = ix86_build_signbit_mask (vmode, 0, 1);
17180 mask = ix86_build_signbit_mask (vmode, 0, 0);
17181
17182 if (mode == SFmode)
17183 copysign_insn = gen_copysignsf3_var;
17184 else if (mode == DFmode)
17185 copysign_insn = gen_copysigndf3_var;
17186 else
17187 copysign_insn = gen_copysigntf3_var;
17188
17189 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
17190 }
17191 }
17192
17193 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
17194 be a constant, and so has already been expanded into a vector constant. */
17195
17196 void
17197 ix86_split_copysign_const (rtx operands[])
17198 {
17199 enum machine_mode mode, vmode;
17200 rtx dest, op0, mask, x;
17201
17202 dest = operands[0];
17203 op0 = operands[1];
17204 mask = operands[3];
17205
17206 mode = GET_MODE (dest);
17207 vmode = GET_MODE (mask);
17208
17209 dest = simplify_gen_subreg (vmode, dest, mode, 0);
17210 x = gen_rtx_AND (vmode, dest, mask);
17211 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17212
17213 if (op0 != CONST0_RTX (vmode))
17214 {
17215 x = gen_rtx_IOR (vmode, dest, op0);
17216 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17217 }
17218 }
17219
17220 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
17221 so we have to do two masks. */
17222
17223 void
17224 ix86_split_copysign_var (rtx operands[])
17225 {
17226 enum machine_mode mode, vmode;
17227 rtx dest, scratch, op0, op1, mask, nmask, x;
17228
17229 dest = operands[0];
17230 scratch = operands[1];
17231 op0 = operands[2];
17232 op1 = operands[3];
17233 nmask = operands[4];
17234 mask = operands[5];
17235
17236 mode = GET_MODE (dest);
17237 vmode = GET_MODE (mask);
17238
17239 if (rtx_equal_p (op0, op1))
17240 {
17241 /* Shouldn't happen often (it's useless, obviously), but when it does
17242 we'd generate incorrect code if we continue below. */
17243 emit_move_insn (dest, op0);
17244 return;
17245 }
17246
17247 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
17248 {
17249 gcc_assert (REGNO (op1) == REGNO (scratch));
17250
17251 x = gen_rtx_AND (vmode, scratch, mask);
17252 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
17253
17254 dest = mask;
17255 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
17256 x = gen_rtx_NOT (vmode, dest);
17257 x = gen_rtx_AND (vmode, x, op0);
17258 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17259 }
17260 else
17261 {
17262 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
17263 {
17264 x = gen_rtx_AND (vmode, scratch, mask);
17265 }
17266 else /* alternative 2,4 */
17267 {
17268 gcc_assert (REGNO (mask) == REGNO (scratch));
17269 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
17270 x = gen_rtx_AND (vmode, scratch, op1);
17271 }
17272 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
17273
17274 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
17275 {
17276 dest = simplify_gen_subreg (vmode, op0, mode, 0);
17277 x = gen_rtx_AND (vmode, dest, nmask);
17278 }
17279 else /* alternative 3,4 */
17280 {
17281 gcc_assert (REGNO (nmask) == REGNO (dest));
17282 dest = nmask;
17283 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
17284 x = gen_rtx_AND (vmode, dest, op0);
17285 }
17286 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17287 }
17288
17289 x = gen_rtx_IOR (vmode, dest, scratch);
17290 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17291 }
17292
17293 /* Return TRUE or FALSE depending on whether the first SET in INSN
17294 has source and destination with matching CC modes, and that the
17295 CC mode is at least as constrained as REQ_MODE. */
17296
17297 bool
17298 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
17299 {
17300 rtx set;
17301 enum machine_mode set_mode;
17302
17303 set = PATTERN (insn);
17304 if (GET_CODE (set) == PARALLEL)
17305 set = XVECEXP (set, 0, 0);
17306 gcc_assert (GET_CODE (set) == SET);
17307 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
17308
17309 set_mode = GET_MODE (SET_DEST (set));
17310 switch (set_mode)
17311 {
17312 case CCNOmode:
17313 if (req_mode != CCNOmode
17314 && (req_mode != CCmode
17315 || XEXP (SET_SRC (set), 1) != const0_rtx))
17316 return false;
17317 break;
17318 case CCmode:
17319 if (req_mode == CCGCmode)
17320 return false;
17321 /* FALLTHRU */
17322 case CCGCmode:
17323 if (req_mode == CCGOCmode || req_mode == CCNOmode)
17324 return false;
17325 /* FALLTHRU */
17326 case CCGOCmode:
17327 if (req_mode == CCZmode)
17328 return false;
17329 /* FALLTHRU */
17330 case CCZmode:
17331 break;
17332
17333 case CCAmode:
17334 case CCCmode:
17335 case CCOmode:
17336 case CCSmode:
17337 if (set_mode != req_mode)
17338 return false;
17339 break;
17340
17341 default:
17342 gcc_unreachable ();
17343 }
17344
17345 return GET_MODE (SET_SRC (set)) == set_mode;
17346 }
17347
17348 /* Generate insn patterns to do an integer compare of OPERANDS. */
17349
17350 static rtx
17351 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
17352 {
17353 enum machine_mode cmpmode;
17354 rtx tmp, flags;
17355
17356 cmpmode = SELECT_CC_MODE (code, op0, op1);
17357 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
17358
17359 /* This is very simple, but making the interface the same as in the
17360 FP case makes the rest of the code easier. */
17361 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
17362 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
17363
17364 /* Return the test that should be put into the flags user, i.e.
17365 the bcc, scc, or cmov instruction. */
17366 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
17367 }
17368
17369 /* Figure out whether to use ordered or unordered fp comparisons.
17370 Return the appropriate mode to use. */
17371
17372 enum machine_mode
17373 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
17374 {
17375 /* ??? In order to make all comparisons reversible, we do all comparisons
17376 non-trapping when compiling for IEEE. Once gcc is able to distinguish
17377 all forms trapping and nontrapping comparisons, we can make inequality
17378 comparisons trapping again, since it results in better code when using
17379 FCOM based compares. */
17380 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
17381 }
17382
17383 enum machine_mode
17384 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
17385 {
17386 enum machine_mode mode = GET_MODE (op0);
17387
17388 if (SCALAR_FLOAT_MODE_P (mode))
17389 {
17390 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
17391 return ix86_fp_compare_mode (code);
17392 }
17393
17394 switch (code)
17395 {
17396 /* Only zero flag is needed. */
17397 case EQ: /* ZF=0 */
17398 case NE: /* ZF!=0 */
17399 return CCZmode;
17400 /* Codes needing carry flag. */
17401 case GEU: /* CF=0 */
17402 case LTU: /* CF=1 */
17403 /* Detect overflow checks. They need just the carry flag. */
17404 if (GET_CODE (op0) == PLUS
17405 && rtx_equal_p (op1, XEXP (op0, 0)))
17406 return CCCmode;
17407 else
17408 return CCmode;
17409 case GTU: /* CF=0 & ZF=0 */
17410 case LEU: /* CF=1 | ZF=1 */
17411 /* Detect overflow checks. They need just the carry flag. */
17412 if (GET_CODE (op0) == MINUS
17413 && rtx_equal_p (op1, XEXP (op0, 0)))
17414 return CCCmode;
17415 else
17416 return CCmode;
17417 /* Codes possibly doable only with sign flag when
17418 comparing against zero. */
17419 case GE: /* SF=OF or SF=0 */
17420 case LT: /* SF<>OF or SF=1 */
17421 if (op1 == const0_rtx)
17422 return CCGOCmode;
17423 else
17424 /* For other cases Carry flag is not required. */
17425 return CCGCmode;
17426 /* Codes doable only with sign flag when comparing
17427 against zero, but we miss jump instruction for it
17428 so we need to use relational tests against overflow
17429 that thus needs to be zero. */
17430 case GT: /* ZF=0 & SF=OF */
17431 case LE: /* ZF=1 | SF<>OF */
17432 if (op1 == const0_rtx)
17433 return CCNOmode;
17434 else
17435 return CCGCmode;
17436 /* strcmp pattern do (use flags) and combine may ask us for proper
17437 mode. */
17438 case USE:
17439 return CCmode;
17440 default:
17441 gcc_unreachable ();
17442 }
17443 }
17444
17445 /* Return the fixed registers used for condition codes. */
17446
17447 static bool
17448 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
17449 {
17450 *p1 = FLAGS_REG;
17451 *p2 = FPSR_REG;
17452 return true;
17453 }
17454
17455 /* If two condition code modes are compatible, return a condition code
17456 mode which is compatible with both. Otherwise, return
17457 VOIDmode. */
17458
17459 static enum machine_mode
17460 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
17461 {
17462 if (m1 == m2)
17463 return m1;
17464
17465 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
17466 return VOIDmode;
17467
17468 if ((m1 == CCGCmode && m2 == CCGOCmode)
17469 || (m1 == CCGOCmode && m2 == CCGCmode))
17470 return CCGCmode;
17471
17472 switch (m1)
17473 {
17474 default:
17475 gcc_unreachable ();
17476
17477 case CCmode:
17478 case CCGCmode:
17479 case CCGOCmode:
17480 case CCNOmode:
17481 case CCAmode:
17482 case CCCmode:
17483 case CCOmode:
17484 case CCSmode:
17485 case CCZmode:
17486 switch (m2)
17487 {
17488 default:
17489 return VOIDmode;
17490
17491 case CCmode:
17492 case CCGCmode:
17493 case CCGOCmode:
17494 case CCNOmode:
17495 case CCAmode:
17496 case CCCmode:
17497 case CCOmode:
17498 case CCSmode:
17499 case CCZmode:
17500 return CCmode;
17501 }
17502
17503 case CCFPmode:
17504 case CCFPUmode:
17505 /* These are only compatible with themselves, which we already
17506 checked above. */
17507 return VOIDmode;
17508 }
17509 }
17510
17511
17512 /* Return a comparison we can do and that it is equivalent to
17513 swap_condition (code) apart possibly from orderedness.
17514 But, never change orderedness if TARGET_IEEE_FP, returning
17515 UNKNOWN in that case if necessary. */
17516
17517 static enum rtx_code
17518 ix86_fp_swap_condition (enum rtx_code code)
17519 {
17520 switch (code)
17521 {
17522 case GT: /* GTU - CF=0 & ZF=0 */
17523 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
17524 case GE: /* GEU - CF=0 */
17525 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
17526 case UNLT: /* LTU - CF=1 */
17527 return TARGET_IEEE_FP ? UNKNOWN : GT;
17528 case UNLE: /* LEU - CF=1 | ZF=1 */
17529 return TARGET_IEEE_FP ? UNKNOWN : GE;
17530 default:
17531 return swap_condition (code);
17532 }
17533 }
17534
17535 /* Return cost of comparison CODE using the best strategy for performance.
17536 All following functions do use number of instructions as a cost metrics.
17537 In future this should be tweaked to compute bytes for optimize_size and
17538 take into account performance of various instructions on various CPUs. */
17539
17540 static int
17541 ix86_fp_comparison_cost (enum rtx_code code)
17542 {
17543 int arith_cost;
17544
17545 /* The cost of code using bit-twiddling on %ah. */
17546 switch (code)
17547 {
17548 case UNLE:
17549 case UNLT:
17550 case LTGT:
17551 case GT:
17552 case GE:
17553 case UNORDERED:
17554 case ORDERED:
17555 case UNEQ:
17556 arith_cost = 4;
17557 break;
17558 case LT:
17559 case NE:
17560 case EQ:
17561 case UNGE:
17562 arith_cost = TARGET_IEEE_FP ? 5 : 4;
17563 break;
17564 case LE:
17565 case UNGT:
17566 arith_cost = TARGET_IEEE_FP ? 6 : 4;
17567 break;
17568 default:
17569 gcc_unreachable ();
17570 }
17571
17572 switch (ix86_fp_comparison_strategy (code))
17573 {
17574 case IX86_FPCMP_COMI:
17575 return arith_cost > 4 ? 3 : 2;
17576 case IX86_FPCMP_SAHF:
17577 return arith_cost > 4 ? 4 : 3;
17578 default:
17579 return arith_cost;
17580 }
17581 }
17582
17583 /* Return strategy to use for floating-point. We assume that fcomi is always
17584 preferrable where available, since that is also true when looking at size
17585 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
17586
17587 enum ix86_fpcmp_strategy
17588 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED)
17589 {
17590 /* Do fcomi/sahf based test when profitable. */
17591
17592 if (TARGET_CMOVE)
17593 return IX86_FPCMP_COMI;
17594
17595 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_function_for_size_p (cfun)))
17596 return IX86_FPCMP_SAHF;
17597
17598 return IX86_FPCMP_ARITH;
17599 }
17600
17601 /* Swap, force into registers, or otherwise massage the two operands
17602 to a fp comparison. The operands are updated in place; the new
17603 comparison code is returned. */
17604
17605 static enum rtx_code
17606 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
17607 {
17608 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
17609 rtx op0 = *pop0, op1 = *pop1;
17610 enum machine_mode op_mode = GET_MODE (op0);
17611 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
17612
17613 /* All of the unordered compare instructions only work on registers.
17614 The same is true of the fcomi compare instructions. The XFmode
17615 compare instructions require registers except when comparing
17616 against zero or when converting operand 1 from fixed point to
17617 floating point. */
17618
17619 if (!is_sse
17620 && (fpcmp_mode == CCFPUmode
17621 || (op_mode == XFmode
17622 && ! (standard_80387_constant_p (op0) == 1
17623 || standard_80387_constant_p (op1) == 1)
17624 && GET_CODE (op1) != FLOAT)
17625 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
17626 {
17627 op0 = force_reg (op_mode, op0);
17628 op1 = force_reg (op_mode, op1);
17629 }
17630 else
17631 {
17632 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
17633 things around if they appear profitable, otherwise force op0
17634 into a register. */
17635
17636 if (standard_80387_constant_p (op0) == 0
17637 || (MEM_P (op0)
17638 && ! (standard_80387_constant_p (op1) == 0
17639 || MEM_P (op1))))
17640 {
17641 enum rtx_code new_code = ix86_fp_swap_condition (code);
17642 if (new_code != UNKNOWN)
17643 {
17644 rtx tmp;
17645 tmp = op0, op0 = op1, op1 = tmp;
17646 code = new_code;
17647 }
17648 }
17649
17650 if (!REG_P (op0))
17651 op0 = force_reg (op_mode, op0);
17652
17653 if (CONSTANT_P (op1))
17654 {
17655 int tmp = standard_80387_constant_p (op1);
17656 if (tmp == 0)
17657 op1 = validize_mem (force_const_mem (op_mode, op1));
17658 else if (tmp == 1)
17659 {
17660 if (TARGET_CMOVE)
17661 op1 = force_reg (op_mode, op1);
17662 }
17663 else
17664 op1 = force_reg (op_mode, op1);
17665 }
17666 }
17667
17668 /* Try to rearrange the comparison to make it cheaper. */
17669 if (ix86_fp_comparison_cost (code)
17670 > ix86_fp_comparison_cost (swap_condition (code))
17671 && (REG_P (op1) || can_create_pseudo_p ()))
17672 {
17673 rtx tmp;
17674 tmp = op0, op0 = op1, op1 = tmp;
17675 code = swap_condition (code);
17676 if (!REG_P (op0))
17677 op0 = force_reg (op_mode, op0);
17678 }
17679
17680 *pop0 = op0;
17681 *pop1 = op1;
17682 return code;
17683 }
17684
17685 /* Convert comparison codes we use to represent FP comparison to integer
17686 code that will result in proper branch. Return UNKNOWN if no such code
17687 is available. */
17688
17689 enum rtx_code
17690 ix86_fp_compare_code_to_integer (enum rtx_code code)
17691 {
17692 switch (code)
17693 {
17694 case GT:
17695 return GTU;
17696 case GE:
17697 return GEU;
17698 case ORDERED:
17699 case UNORDERED:
17700 return code;
17701 break;
17702 case UNEQ:
17703 return EQ;
17704 break;
17705 case UNLT:
17706 return LTU;
17707 break;
17708 case UNLE:
17709 return LEU;
17710 break;
17711 case LTGT:
17712 return NE;
17713 break;
17714 default:
17715 return UNKNOWN;
17716 }
17717 }
17718
17719 /* Generate insn patterns to do a floating point compare of OPERANDS. */
17720
17721 static rtx
17722 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
17723 {
17724 enum machine_mode fpcmp_mode, intcmp_mode;
17725 rtx tmp, tmp2;
17726
17727 fpcmp_mode = ix86_fp_compare_mode (code);
17728 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
17729
17730 /* Do fcomi/sahf based test when profitable. */
17731 switch (ix86_fp_comparison_strategy (code))
17732 {
17733 case IX86_FPCMP_COMI:
17734 intcmp_mode = fpcmp_mode;
17735 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
17736 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
17737 tmp);
17738 emit_insn (tmp);
17739 break;
17740
17741 case IX86_FPCMP_SAHF:
17742 intcmp_mode = fpcmp_mode;
17743 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
17744 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
17745 tmp);
17746
17747 if (!scratch)
17748 scratch = gen_reg_rtx (HImode);
17749 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
17750 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
17751 break;
17752
17753 case IX86_FPCMP_ARITH:
17754 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
17755 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
17756 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
17757 if (!scratch)
17758 scratch = gen_reg_rtx (HImode);
17759 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
17760
17761 /* In the unordered case, we have to check C2 for NaN's, which
17762 doesn't happen to work out to anything nice combination-wise.
17763 So do some bit twiddling on the value we've got in AH to come
17764 up with an appropriate set of condition codes. */
17765
17766 intcmp_mode = CCNOmode;
17767 switch (code)
17768 {
17769 case GT:
17770 case UNGT:
17771 if (code == GT || !TARGET_IEEE_FP)
17772 {
17773 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
17774 code = EQ;
17775 }
17776 else
17777 {
17778 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17779 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
17780 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
17781 intcmp_mode = CCmode;
17782 code = GEU;
17783 }
17784 break;
17785 case LT:
17786 case UNLT:
17787 if (code == LT && TARGET_IEEE_FP)
17788 {
17789 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17790 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
17791 intcmp_mode = CCmode;
17792 code = EQ;
17793 }
17794 else
17795 {
17796 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
17797 code = NE;
17798 }
17799 break;
17800 case GE:
17801 case UNGE:
17802 if (code == GE || !TARGET_IEEE_FP)
17803 {
17804 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
17805 code = EQ;
17806 }
17807 else
17808 {
17809 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17810 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
17811 code = NE;
17812 }
17813 break;
17814 case LE:
17815 case UNLE:
17816 if (code == LE && TARGET_IEEE_FP)
17817 {
17818 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17819 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
17820 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
17821 intcmp_mode = CCmode;
17822 code = LTU;
17823 }
17824 else
17825 {
17826 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
17827 code = NE;
17828 }
17829 break;
17830 case EQ:
17831 case UNEQ:
17832 if (code == EQ && TARGET_IEEE_FP)
17833 {
17834 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17835 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
17836 intcmp_mode = CCmode;
17837 code = EQ;
17838 }
17839 else
17840 {
17841 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
17842 code = NE;
17843 }
17844 break;
17845 case NE:
17846 case LTGT:
17847 if (code == NE && TARGET_IEEE_FP)
17848 {
17849 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17850 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
17851 GEN_INT (0x40)));
17852 code = NE;
17853 }
17854 else
17855 {
17856 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
17857 code = EQ;
17858 }
17859 break;
17860
17861 case UNORDERED:
17862 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
17863 code = NE;
17864 break;
17865 case ORDERED:
17866 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
17867 code = EQ;
17868 break;
17869
17870 default:
17871 gcc_unreachable ();
17872 }
17873 break;
17874
17875 default:
17876 gcc_unreachable();
17877 }
17878
17879 /* Return the test that should be put into the flags user, i.e.
17880 the bcc, scc, or cmov instruction. */
17881 return gen_rtx_fmt_ee (code, VOIDmode,
17882 gen_rtx_REG (intcmp_mode, FLAGS_REG),
17883 const0_rtx);
17884 }
17885
17886 static rtx
17887 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
17888 {
17889 rtx ret;
17890
17891 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
17892 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
17893
17894 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
17895 {
17896 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
17897 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
17898 }
17899 else
17900 ret = ix86_expand_int_compare (code, op0, op1);
17901
17902 return ret;
17903 }
17904
17905 void
17906 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
17907 {
17908 enum machine_mode mode = GET_MODE (op0);
17909 rtx tmp;
17910
17911 switch (mode)
17912 {
17913 case SFmode:
17914 case DFmode:
17915 case XFmode:
17916 case QImode:
17917 case HImode:
17918 case SImode:
17919 simple:
17920 tmp = ix86_expand_compare (code, op0, op1);
17921 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
17922 gen_rtx_LABEL_REF (VOIDmode, label),
17923 pc_rtx);
17924 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
17925 return;
17926
17927 case DImode:
17928 if (TARGET_64BIT)
17929 goto simple;
17930 case TImode:
17931 /* Expand DImode branch into multiple compare+branch. */
17932 {
17933 rtx lo[2], hi[2], label2;
17934 enum rtx_code code1, code2, code3;
17935 enum machine_mode submode;
17936
17937 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
17938 {
17939 tmp = op0, op0 = op1, op1 = tmp;
17940 code = swap_condition (code);
17941 }
17942
17943 split_double_mode (mode, &op0, 1, lo+0, hi+0);
17944 split_double_mode (mode, &op1, 1, lo+1, hi+1);
17945
17946 submode = mode == DImode ? SImode : DImode;
17947
17948 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
17949 avoid two branches. This costs one extra insn, so disable when
17950 optimizing for size. */
17951
17952 if ((code == EQ || code == NE)
17953 && (!optimize_insn_for_size_p ()
17954 || hi[1] == const0_rtx || lo[1] == const0_rtx))
17955 {
17956 rtx xor0, xor1;
17957
17958 xor1 = hi[0];
17959 if (hi[1] != const0_rtx)
17960 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
17961 NULL_RTX, 0, OPTAB_WIDEN);
17962
17963 xor0 = lo[0];
17964 if (lo[1] != const0_rtx)
17965 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
17966 NULL_RTX, 0, OPTAB_WIDEN);
17967
17968 tmp = expand_binop (submode, ior_optab, xor1, xor0,
17969 NULL_RTX, 0, OPTAB_WIDEN);
17970
17971 ix86_expand_branch (code, tmp, const0_rtx, label);
17972 return;
17973 }
17974
17975 /* Otherwise, if we are doing less-than or greater-or-equal-than,
17976 op1 is a constant and the low word is zero, then we can just
17977 examine the high word. Similarly for low word -1 and
17978 less-or-equal-than or greater-than. */
17979
17980 if (CONST_INT_P (hi[1]))
17981 switch (code)
17982 {
17983 case LT: case LTU: case GE: case GEU:
17984 if (lo[1] == const0_rtx)
17985 {
17986 ix86_expand_branch (code, hi[0], hi[1], label);
17987 return;
17988 }
17989 break;
17990 case LE: case LEU: case GT: case GTU:
17991 if (lo[1] == constm1_rtx)
17992 {
17993 ix86_expand_branch (code, hi[0], hi[1], label);
17994 return;
17995 }
17996 break;
17997 default:
17998 break;
17999 }
18000
18001 /* Otherwise, we need two or three jumps. */
18002
18003 label2 = gen_label_rtx ();
18004
18005 code1 = code;
18006 code2 = swap_condition (code);
18007 code3 = unsigned_condition (code);
18008
18009 switch (code)
18010 {
18011 case LT: case GT: case LTU: case GTU:
18012 break;
18013
18014 case LE: code1 = LT; code2 = GT; break;
18015 case GE: code1 = GT; code2 = LT; break;
18016 case LEU: code1 = LTU; code2 = GTU; break;
18017 case GEU: code1 = GTU; code2 = LTU; break;
18018
18019 case EQ: code1 = UNKNOWN; code2 = NE; break;
18020 case NE: code2 = UNKNOWN; break;
18021
18022 default:
18023 gcc_unreachable ();
18024 }
18025
18026 /*
18027 * a < b =>
18028 * if (hi(a) < hi(b)) goto true;
18029 * if (hi(a) > hi(b)) goto false;
18030 * if (lo(a) < lo(b)) goto true;
18031 * false:
18032 */
18033
18034 if (code1 != UNKNOWN)
18035 ix86_expand_branch (code1, hi[0], hi[1], label);
18036 if (code2 != UNKNOWN)
18037 ix86_expand_branch (code2, hi[0], hi[1], label2);
18038
18039 ix86_expand_branch (code3, lo[0], lo[1], label);
18040
18041 if (code2 != UNKNOWN)
18042 emit_label (label2);
18043 return;
18044 }
18045
18046 default:
18047 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
18048 goto simple;
18049 }
18050 }
18051
18052 /* Split branch based on floating point condition. */
18053 void
18054 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
18055 rtx target1, rtx target2, rtx tmp, rtx pushed)
18056 {
18057 rtx condition;
18058 rtx i;
18059
18060 if (target2 != pc_rtx)
18061 {
18062 rtx tmp = target2;
18063 code = reverse_condition_maybe_unordered (code);
18064 target2 = target1;
18065 target1 = tmp;
18066 }
18067
18068 condition = ix86_expand_fp_compare (code, op1, op2,
18069 tmp);
18070
18071 /* Remove pushed operand from stack. */
18072 if (pushed)
18073 ix86_free_from_memory (GET_MODE (pushed));
18074
18075 i = emit_jump_insn (gen_rtx_SET
18076 (VOIDmode, pc_rtx,
18077 gen_rtx_IF_THEN_ELSE (VOIDmode,
18078 condition, target1, target2)));
18079 if (split_branch_probability >= 0)
18080 add_reg_note (i, REG_BR_PROB, GEN_INT (split_branch_probability));
18081 }
18082
18083 void
18084 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
18085 {
18086 rtx ret;
18087
18088 gcc_assert (GET_MODE (dest) == QImode);
18089
18090 ret = ix86_expand_compare (code, op0, op1);
18091 PUT_MODE (ret, QImode);
18092 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
18093 }
18094
18095 /* Expand comparison setting or clearing carry flag. Return true when
18096 successful and set pop for the operation. */
18097 static bool
18098 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
18099 {
18100 enum machine_mode mode =
18101 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
18102
18103 /* Do not handle double-mode compares that go through special path. */
18104 if (mode == (TARGET_64BIT ? TImode : DImode))
18105 return false;
18106
18107 if (SCALAR_FLOAT_MODE_P (mode))
18108 {
18109 rtx compare_op, compare_seq;
18110
18111 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
18112
18113 /* Shortcut: following common codes never translate
18114 into carry flag compares. */
18115 if (code == EQ || code == NE || code == UNEQ || code == LTGT
18116 || code == ORDERED || code == UNORDERED)
18117 return false;
18118
18119 /* These comparisons require zero flag; swap operands so they won't. */
18120 if ((code == GT || code == UNLE || code == LE || code == UNGT)
18121 && !TARGET_IEEE_FP)
18122 {
18123 rtx tmp = op0;
18124 op0 = op1;
18125 op1 = tmp;
18126 code = swap_condition (code);
18127 }
18128
18129 /* Try to expand the comparison and verify that we end up with
18130 carry flag based comparison. This fails to be true only when
18131 we decide to expand comparison using arithmetic that is not
18132 too common scenario. */
18133 start_sequence ();
18134 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
18135 compare_seq = get_insns ();
18136 end_sequence ();
18137
18138 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
18139 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
18140 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
18141 else
18142 code = GET_CODE (compare_op);
18143
18144 if (code != LTU && code != GEU)
18145 return false;
18146
18147 emit_insn (compare_seq);
18148 *pop = compare_op;
18149 return true;
18150 }
18151
18152 if (!INTEGRAL_MODE_P (mode))
18153 return false;
18154
18155 switch (code)
18156 {
18157 case LTU:
18158 case GEU:
18159 break;
18160
18161 /* Convert a==0 into (unsigned)a<1. */
18162 case EQ:
18163 case NE:
18164 if (op1 != const0_rtx)
18165 return false;
18166 op1 = const1_rtx;
18167 code = (code == EQ ? LTU : GEU);
18168 break;
18169
18170 /* Convert a>b into b<a or a>=b-1. */
18171 case GTU:
18172 case LEU:
18173 if (CONST_INT_P (op1))
18174 {
18175 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
18176 /* Bail out on overflow. We still can swap operands but that
18177 would force loading of the constant into register. */
18178 if (op1 == const0_rtx
18179 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
18180 return false;
18181 code = (code == GTU ? GEU : LTU);
18182 }
18183 else
18184 {
18185 rtx tmp = op1;
18186 op1 = op0;
18187 op0 = tmp;
18188 code = (code == GTU ? LTU : GEU);
18189 }
18190 break;
18191
18192 /* Convert a>=0 into (unsigned)a<0x80000000. */
18193 case LT:
18194 case GE:
18195 if (mode == DImode || op1 != const0_rtx)
18196 return false;
18197 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
18198 code = (code == LT ? GEU : LTU);
18199 break;
18200 case LE:
18201 case GT:
18202 if (mode == DImode || op1 != constm1_rtx)
18203 return false;
18204 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
18205 code = (code == LE ? GEU : LTU);
18206 break;
18207
18208 default:
18209 return false;
18210 }
18211 /* Swapping operands may cause constant to appear as first operand. */
18212 if (!nonimmediate_operand (op0, VOIDmode))
18213 {
18214 if (!can_create_pseudo_p ())
18215 return false;
18216 op0 = force_reg (mode, op0);
18217 }
18218 *pop = ix86_expand_compare (code, op0, op1);
18219 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
18220 return true;
18221 }
18222
18223 bool
18224 ix86_expand_int_movcc (rtx operands[])
18225 {
18226 enum rtx_code code = GET_CODE (operands[1]), compare_code;
18227 rtx compare_seq, compare_op;
18228 enum machine_mode mode = GET_MODE (operands[0]);
18229 bool sign_bit_compare_p = false;
18230 rtx op0 = XEXP (operands[1], 0);
18231 rtx op1 = XEXP (operands[1], 1);
18232
18233 start_sequence ();
18234 compare_op = ix86_expand_compare (code, op0, op1);
18235 compare_seq = get_insns ();
18236 end_sequence ();
18237
18238 compare_code = GET_CODE (compare_op);
18239
18240 if ((op1 == const0_rtx && (code == GE || code == LT))
18241 || (op1 == constm1_rtx && (code == GT || code == LE)))
18242 sign_bit_compare_p = true;
18243
18244 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
18245 HImode insns, we'd be swallowed in word prefix ops. */
18246
18247 if ((mode != HImode || TARGET_FAST_PREFIX)
18248 && (mode != (TARGET_64BIT ? TImode : DImode))
18249 && CONST_INT_P (operands[2])
18250 && CONST_INT_P (operands[3]))
18251 {
18252 rtx out = operands[0];
18253 HOST_WIDE_INT ct = INTVAL (operands[2]);
18254 HOST_WIDE_INT cf = INTVAL (operands[3]);
18255 HOST_WIDE_INT diff;
18256
18257 diff = ct - cf;
18258 /* Sign bit compares are better done using shifts than we do by using
18259 sbb. */
18260 if (sign_bit_compare_p
18261 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
18262 {
18263 /* Detect overlap between destination and compare sources. */
18264 rtx tmp = out;
18265
18266 if (!sign_bit_compare_p)
18267 {
18268 rtx flags;
18269 bool fpcmp = false;
18270
18271 compare_code = GET_CODE (compare_op);
18272
18273 flags = XEXP (compare_op, 0);
18274
18275 if (GET_MODE (flags) == CCFPmode
18276 || GET_MODE (flags) == CCFPUmode)
18277 {
18278 fpcmp = true;
18279 compare_code
18280 = ix86_fp_compare_code_to_integer (compare_code);
18281 }
18282
18283 /* To simplify rest of code, restrict to the GEU case. */
18284 if (compare_code == LTU)
18285 {
18286 HOST_WIDE_INT tmp = ct;
18287 ct = cf;
18288 cf = tmp;
18289 compare_code = reverse_condition (compare_code);
18290 code = reverse_condition (code);
18291 }
18292 else
18293 {
18294 if (fpcmp)
18295 PUT_CODE (compare_op,
18296 reverse_condition_maybe_unordered
18297 (GET_CODE (compare_op)));
18298 else
18299 PUT_CODE (compare_op,
18300 reverse_condition (GET_CODE (compare_op)));
18301 }
18302 diff = ct - cf;
18303
18304 if (reg_overlap_mentioned_p (out, op0)
18305 || reg_overlap_mentioned_p (out, op1))
18306 tmp = gen_reg_rtx (mode);
18307
18308 if (mode == DImode)
18309 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
18310 else
18311 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
18312 flags, compare_op));
18313 }
18314 else
18315 {
18316 if (code == GT || code == GE)
18317 code = reverse_condition (code);
18318 else
18319 {
18320 HOST_WIDE_INT tmp = ct;
18321 ct = cf;
18322 cf = tmp;
18323 diff = ct - cf;
18324 }
18325 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
18326 }
18327
18328 if (diff == 1)
18329 {
18330 /*
18331 * cmpl op0,op1
18332 * sbbl dest,dest
18333 * [addl dest, ct]
18334 *
18335 * Size 5 - 8.
18336 */
18337 if (ct)
18338 tmp = expand_simple_binop (mode, PLUS,
18339 tmp, GEN_INT (ct),
18340 copy_rtx (tmp), 1, OPTAB_DIRECT);
18341 }
18342 else if (cf == -1)
18343 {
18344 /*
18345 * cmpl op0,op1
18346 * sbbl dest,dest
18347 * orl $ct, dest
18348 *
18349 * Size 8.
18350 */
18351 tmp = expand_simple_binop (mode, IOR,
18352 tmp, GEN_INT (ct),
18353 copy_rtx (tmp), 1, OPTAB_DIRECT);
18354 }
18355 else if (diff == -1 && ct)
18356 {
18357 /*
18358 * cmpl op0,op1
18359 * sbbl dest,dest
18360 * notl dest
18361 * [addl dest, cf]
18362 *
18363 * Size 8 - 11.
18364 */
18365 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
18366 if (cf)
18367 tmp = expand_simple_binop (mode, PLUS,
18368 copy_rtx (tmp), GEN_INT (cf),
18369 copy_rtx (tmp), 1, OPTAB_DIRECT);
18370 }
18371 else
18372 {
18373 /*
18374 * cmpl op0,op1
18375 * sbbl dest,dest
18376 * [notl dest]
18377 * andl cf - ct, dest
18378 * [addl dest, ct]
18379 *
18380 * Size 8 - 11.
18381 */
18382
18383 if (cf == 0)
18384 {
18385 cf = ct;
18386 ct = 0;
18387 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
18388 }
18389
18390 tmp = expand_simple_binop (mode, AND,
18391 copy_rtx (tmp),
18392 gen_int_mode (cf - ct, mode),
18393 copy_rtx (tmp), 1, OPTAB_DIRECT);
18394 if (ct)
18395 tmp = expand_simple_binop (mode, PLUS,
18396 copy_rtx (tmp), GEN_INT (ct),
18397 copy_rtx (tmp), 1, OPTAB_DIRECT);
18398 }
18399
18400 if (!rtx_equal_p (tmp, out))
18401 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
18402
18403 return true;
18404 }
18405
18406 if (diff < 0)
18407 {
18408 enum machine_mode cmp_mode = GET_MODE (op0);
18409
18410 HOST_WIDE_INT tmp;
18411 tmp = ct, ct = cf, cf = tmp;
18412 diff = -diff;
18413
18414 if (SCALAR_FLOAT_MODE_P (cmp_mode))
18415 {
18416 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
18417
18418 /* We may be reversing unordered compare to normal compare, that
18419 is not valid in general (we may convert non-trapping condition
18420 to trapping one), however on i386 we currently emit all
18421 comparisons unordered. */
18422 compare_code = reverse_condition_maybe_unordered (compare_code);
18423 code = reverse_condition_maybe_unordered (code);
18424 }
18425 else
18426 {
18427 compare_code = reverse_condition (compare_code);
18428 code = reverse_condition (code);
18429 }
18430 }
18431
18432 compare_code = UNKNOWN;
18433 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
18434 && CONST_INT_P (op1))
18435 {
18436 if (op1 == const0_rtx
18437 && (code == LT || code == GE))
18438 compare_code = code;
18439 else if (op1 == constm1_rtx)
18440 {
18441 if (code == LE)
18442 compare_code = LT;
18443 else if (code == GT)
18444 compare_code = GE;
18445 }
18446 }
18447
18448 /* Optimize dest = (op0 < 0) ? -1 : cf. */
18449 if (compare_code != UNKNOWN
18450 && GET_MODE (op0) == GET_MODE (out)
18451 && (cf == -1 || ct == -1))
18452 {
18453 /* If lea code below could be used, only optimize
18454 if it results in a 2 insn sequence. */
18455
18456 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
18457 || diff == 3 || diff == 5 || diff == 9)
18458 || (compare_code == LT && ct == -1)
18459 || (compare_code == GE && cf == -1))
18460 {
18461 /*
18462 * notl op1 (if necessary)
18463 * sarl $31, op1
18464 * orl cf, op1
18465 */
18466 if (ct != -1)
18467 {
18468 cf = ct;
18469 ct = -1;
18470 code = reverse_condition (code);
18471 }
18472
18473 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
18474
18475 out = expand_simple_binop (mode, IOR,
18476 out, GEN_INT (cf),
18477 out, 1, OPTAB_DIRECT);
18478 if (out != operands[0])
18479 emit_move_insn (operands[0], out);
18480
18481 return true;
18482 }
18483 }
18484
18485
18486 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
18487 || diff == 3 || diff == 5 || diff == 9)
18488 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
18489 && (mode != DImode
18490 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
18491 {
18492 /*
18493 * xorl dest,dest
18494 * cmpl op1,op2
18495 * setcc dest
18496 * lea cf(dest*(ct-cf)),dest
18497 *
18498 * Size 14.
18499 *
18500 * This also catches the degenerate setcc-only case.
18501 */
18502
18503 rtx tmp;
18504 int nops;
18505
18506 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
18507
18508 nops = 0;
18509 /* On x86_64 the lea instruction operates on Pmode, so we need
18510 to get arithmetics done in proper mode to match. */
18511 if (diff == 1)
18512 tmp = copy_rtx (out);
18513 else
18514 {
18515 rtx out1;
18516 out1 = copy_rtx (out);
18517 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
18518 nops++;
18519 if (diff & 1)
18520 {
18521 tmp = gen_rtx_PLUS (mode, tmp, out1);
18522 nops++;
18523 }
18524 }
18525 if (cf != 0)
18526 {
18527 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
18528 nops++;
18529 }
18530 if (!rtx_equal_p (tmp, out))
18531 {
18532 if (nops == 1)
18533 out = force_operand (tmp, copy_rtx (out));
18534 else
18535 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
18536 }
18537 if (!rtx_equal_p (out, operands[0]))
18538 emit_move_insn (operands[0], copy_rtx (out));
18539
18540 return true;
18541 }
18542
18543 /*
18544 * General case: Jumpful:
18545 * xorl dest,dest cmpl op1, op2
18546 * cmpl op1, op2 movl ct, dest
18547 * setcc dest jcc 1f
18548 * decl dest movl cf, dest
18549 * andl (cf-ct),dest 1:
18550 * addl ct,dest
18551 *
18552 * Size 20. Size 14.
18553 *
18554 * This is reasonably steep, but branch mispredict costs are
18555 * high on modern cpus, so consider failing only if optimizing
18556 * for space.
18557 */
18558
18559 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
18560 && BRANCH_COST (optimize_insn_for_speed_p (),
18561 false) >= 2)
18562 {
18563 if (cf == 0)
18564 {
18565 enum machine_mode cmp_mode = GET_MODE (op0);
18566
18567 cf = ct;
18568 ct = 0;
18569
18570 if (SCALAR_FLOAT_MODE_P (cmp_mode))
18571 {
18572 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
18573
18574 /* We may be reversing unordered compare to normal compare,
18575 that is not valid in general (we may convert non-trapping
18576 condition to trapping one), however on i386 we currently
18577 emit all comparisons unordered. */
18578 code = reverse_condition_maybe_unordered (code);
18579 }
18580 else
18581 {
18582 code = reverse_condition (code);
18583 if (compare_code != UNKNOWN)
18584 compare_code = reverse_condition (compare_code);
18585 }
18586 }
18587
18588 if (compare_code != UNKNOWN)
18589 {
18590 /* notl op1 (if needed)
18591 sarl $31, op1
18592 andl (cf-ct), op1
18593 addl ct, op1
18594
18595 For x < 0 (resp. x <= -1) there will be no notl,
18596 so if possible swap the constants to get rid of the
18597 complement.
18598 True/false will be -1/0 while code below (store flag
18599 followed by decrement) is 0/-1, so the constants need
18600 to be exchanged once more. */
18601
18602 if (compare_code == GE || !cf)
18603 {
18604 code = reverse_condition (code);
18605 compare_code = LT;
18606 }
18607 else
18608 {
18609 HOST_WIDE_INT tmp = cf;
18610 cf = ct;
18611 ct = tmp;
18612 }
18613
18614 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
18615 }
18616 else
18617 {
18618 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
18619
18620 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
18621 constm1_rtx,
18622 copy_rtx (out), 1, OPTAB_DIRECT);
18623 }
18624
18625 out = expand_simple_binop (mode, AND, copy_rtx (out),
18626 gen_int_mode (cf - ct, mode),
18627 copy_rtx (out), 1, OPTAB_DIRECT);
18628 if (ct)
18629 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
18630 copy_rtx (out), 1, OPTAB_DIRECT);
18631 if (!rtx_equal_p (out, operands[0]))
18632 emit_move_insn (operands[0], copy_rtx (out));
18633
18634 return true;
18635 }
18636 }
18637
18638 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
18639 {
18640 /* Try a few things more with specific constants and a variable. */
18641
18642 optab op;
18643 rtx var, orig_out, out, tmp;
18644
18645 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
18646 return false;
18647
18648 /* If one of the two operands is an interesting constant, load a
18649 constant with the above and mask it in with a logical operation. */
18650
18651 if (CONST_INT_P (operands[2]))
18652 {
18653 var = operands[3];
18654 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
18655 operands[3] = constm1_rtx, op = and_optab;
18656 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
18657 operands[3] = const0_rtx, op = ior_optab;
18658 else
18659 return false;
18660 }
18661 else if (CONST_INT_P (operands[3]))
18662 {
18663 var = operands[2];
18664 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
18665 operands[2] = constm1_rtx, op = and_optab;
18666 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
18667 operands[2] = const0_rtx, op = ior_optab;
18668 else
18669 return false;
18670 }
18671 else
18672 return false;
18673
18674 orig_out = operands[0];
18675 tmp = gen_reg_rtx (mode);
18676 operands[0] = tmp;
18677
18678 /* Recurse to get the constant loaded. */
18679 if (ix86_expand_int_movcc (operands) == 0)
18680 return false;
18681
18682 /* Mask in the interesting variable. */
18683 out = expand_binop (mode, op, var, tmp, orig_out, 0,
18684 OPTAB_WIDEN);
18685 if (!rtx_equal_p (out, orig_out))
18686 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
18687
18688 return true;
18689 }
18690
18691 /*
18692 * For comparison with above,
18693 *
18694 * movl cf,dest
18695 * movl ct,tmp
18696 * cmpl op1,op2
18697 * cmovcc tmp,dest
18698 *
18699 * Size 15.
18700 */
18701
18702 if (! nonimmediate_operand (operands[2], mode))
18703 operands[2] = force_reg (mode, operands[2]);
18704 if (! nonimmediate_operand (operands[3], mode))
18705 operands[3] = force_reg (mode, operands[3]);
18706
18707 if (! register_operand (operands[2], VOIDmode)
18708 && (mode == QImode
18709 || ! register_operand (operands[3], VOIDmode)))
18710 operands[2] = force_reg (mode, operands[2]);
18711
18712 if (mode == QImode
18713 && ! register_operand (operands[3], VOIDmode))
18714 operands[3] = force_reg (mode, operands[3]);
18715
18716 emit_insn (compare_seq);
18717 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
18718 gen_rtx_IF_THEN_ELSE (mode,
18719 compare_op, operands[2],
18720 operands[3])));
18721 return true;
18722 }
18723
18724 /* Swap, force into registers, or otherwise massage the two operands
18725 to an sse comparison with a mask result. Thus we differ a bit from
18726 ix86_prepare_fp_compare_args which expects to produce a flags result.
18727
18728 The DEST operand exists to help determine whether to commute commutative
18729 operators. The POP0/POP1 operands are updated in place. The new
18730 comparison code is returned, or UNKNOWN if not implementable. */
18731
18732 static enum rtx_code
18733 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
18734 rtx *pop0, rtx *pop1)
18735 {
18736 rtx tmp;
18737
18738 /* AVX supports all the needed comparisons, no need to swap arguments
18739 nor help reload. */
18740 if (TARGET_AVX)
18741 return code;
18742
18743 switch (code)
18744 {
18745 case LTGT:
18746 case UNEQ:
18747 /* We have no LTGT as an operator. We could implement it with
18748 NE & ORDERED, but this requires an extra temporary. It's
18749 not clear that it's worth it. */
18750 return UNKNOWN;
18751
18752 case LT:
18753 case LE:
18754 case UNGT:
18755 case UNGE:
18756 /* These are supported directly. */
18757 break;
18758
18759 case EQ:
18760 case NE:
18761 case UNORDERED:
18762 case ORDERED:
18763 /* For commutative operators, try to canonicalize the destination
18764 operand to be first in the comparison - this helps reload to
18765 avoid extra moves. */
18766 if (!dest || !rtx_equal_p (dest, *pop1))
18767 break;
18768 /* FALLTHRU */
18769
18770 case GE:
18771 case GT:
18772 case UNLE:
18773 case UNLT:
18774 /* These are not supported directly. Swap the comparison operands
18775 to transform into something that is supported. */
18776 tmp = *pop0;
18777 *pop0 = *pop1;
18778 *pop1 = tmp;
18779 code = swap_condition (code);
18780 break;
18781
18782 default:
18783 gcc_unreachable ();
18784 }
18785
18786 return code;
18787 }
18788
18789 /* Detect conditional moves that exactly match min/max operational
18790 semantics. Note that this is IEEE safe, as long as we don't
18791 interchange the operands.
18792
18793 Returns FALSE if this conditional move doesn't match a MIN/MAX,
18794 and TRUE if the operation is successful and instructions are emitted. */
18795
18796 static bool
18797 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
18798 rtx cmp_op1, rtx if_true, rtx if_false)
18799 {
18800 enum machine_mode mode;
18801 bool is_min;
18802 rtx tmp;
18803
18804 if (code == LT)
18805 ;
18806 else if (code == UNGE)
18807 {
18808 tmp = if_true;
18809 if_true = if_false;
18810 if_false = tmp;
18811 }
18812 else
18813 return false;
18814
18815 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
18816 is_min = true;
18817 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
18818 is_min = false;
18819 else
18820 return false;
18821
18822 mode = GET_MODE (dest);
18823
18824 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
18825 but MODE may be a vector mode and thus not appropriate. */
18826 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
18827 {
18828 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
18829 rtvec v;
18830
18831 if_true = force_reg (mode, if_true);
18832 v = gen_rtvec (2, if_true, if_false);
18833 tmp = gen_rtx_UNSPEC (mode, v, u);
18834 }
18835 else
18836 {
18837 code = is_min ? SMIN : SMAX;
18838 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
18839 }
18840
18841 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
18842 return true;
18843 }
18844
18845 /* Expand an sse vector comparison. Return the register with the result. */
18846
18847 static rtx
18848 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
18849 rtx op_true, rtx op_false)
18850 {
18851 enum machine_mode mode = GET_MODE (dest);
18852 enum machine_mode cmp_mode = GET_MODE (cmp_op0);
18853 rtx x;
18854
18855 cmp_op0 = force_reg (cmp_mode, cmp_op0);
18856 if (!nonimmediate_operand (cmp_op1, cmp_mode))
18857 cmp_op1 = force_reg (cmp_mode, cmp_op1);
18858
18859 if (optimize
18860 || reg_overlap_mentioned_p (dest, op_true)
18861 || reg_overlap_mentioned_p (dest, op_false))
18862 dest = gen_reg_rtx (mode);
18863
18864 x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
18865 if (cmp_mode != mode)
18866 {
18867 x = force_reg (cmp_mode, x);
18868 convert_move (dest, x, false);
18869 }
18870 else
18871 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18872
18873 return dest;
18874 }
18875
18876 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
18877 operations. This is used for both scalar and vector conditional moves. */
18878
18879 static void
18880 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
18881 {
18882 enum machine_mode mode = GET_MODE (dest);
18883 rtx t2, t3, x;
18884
18885 if (op_false == CONST0_RTX (mode))
18886 {
18887 op_true = force_reg (mode, op_true);
18888 x = gen_rtx_AND (mode, cmp, op_true);
18889 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18890 }
18891 else if (op_true == CONST0_RTX (mode))
18892 {
18893 op_false = force_reg (mode, op_false);
18894 x = gen_rtx_NOT (mode, cmp);
18895 x = gen_rtx_AND (mode, x, op_false);
18896 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18897 }
18898 else if (TARGET_XOP)
18899 {
18900 op_true = force_reg (mode, op_true);
18901
18902 if (!nonimmediate_operand (op_false, mode))
18903 op_false = force_reg (mode, op_false);
18904
18905 emit_insn (gen_rtx_SET (mode, dest,
18906 gen_rtx_IF_THEN_ELSE (mode, cmp,
18907 op_true,
18908 op_false)));
18909 }
18910 else
18911 {
18912 op_true = force_reg (mode, op_true);
18913 op_false = force_reg (mode, op_false);
18914
18915 t2 = gen_reg_rtx (mode);
18916 if (optimize)
18917 t3 = gen_reg_rtx (mode);
18918 else
18919 t3 = dest;
18920
18921 x = gen_rtx_AND (mode, op_true, cmp);
18922 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
18923
18924 x = gen_rtx_NOT (mode, cmp);
18925 x = gen_rtx_AND (mode, x, op_false);
18926 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
18927
18928 x = gen_rtx_IOR (mode, t3, t2);
18929 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18930 }
18931 }
18932
18933 /* Expand a floating-point conditional move. Return true if successful. */
18934
18935 bool
18936 ix86_expand_fp_movcc (rtx operands[])
18937 {
18938 enum machine_mode mode = GET_MODE (operands[0]);
18939 enum rtx_code code = GET_CODE (operands[1]);
18940 rtx tmp, compare_op;
18941 rtx op0 = XEXP (operands[1], 0);
18942 rtx op1 = XEXP (operands[1], 1);
18943
18944 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
18945 {
18946 enum machine_mode cmode;
18947
18948 /* Since we've no cmove for sse registers, don't force bad register
18949 allocation just to gain access to it. Deny movcc when the
18950 comparison mode doesn't match the move mode. */
18951 cmode = GET_MODE (op0);
18952 if (cmode == VOIDmode)
18953 cmode = GET_MODE (op1);
18954 if (cmode != mode)
18955 return false;
18956
18957 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
18958 if (code == UNKNOWN)
18959 return false;
18960
18961 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
18962 operands[2], operands[3]))
18963 return true;
18964
18965 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
18966 operands[2], operands[3]);
18967 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
18968 return true;
18969 }
18970
18971 /* The floating point conditional move instructions don't directly
18972 support conditions resulting from a signed integer comparison. */
18973
18974 compare_op = ix86_expand_compare (code, op0, op1);
18975 if (!fcmov_comparison_operator (compare_op, VOIDmode))
18976 {
18977 tmp = gen_reg_rtx (QImode);
18978 ix86_expand_setcc (tmp, code, op0, op1);
18979
18980 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
18981 }
18982
18983 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
18984 gen_rtx_IF_THEN_ELSE (mode, compare_op,
18985 operands[2], operands[3])));
18986
18987 return true;
18988 }
18989
18990 /* Expand a floating-point vector conditional move; a vcond operation
18991 rather than a movcc operation. */
18992
18993 bool
18994 ix86_expand_fp_vcond (rtx operands[])
18995 {
18996 enum rtx_code code = GET_CODE (operands[3]);
18997 rtx cmp;
18998
18999 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
19000 &operands[4], &operands[5]);
19001 if (code == UNKNOWN)
19002 {
19003 rtx temp;
19004 switch (GET_CODE (operands[3]))
19005 {
19006 case LTGT:
19007 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
19008 operands[5], operands[0], operands[0]);
19009 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
19010 operands[5], operands[1], operands[2]);
19011 code = AND;
19012 break;
19013 case UNEQ:
19014 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
19015 operands[5], operands[0], operands[0]);
19016 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
19017 operands[5], operands[1], operands[2]);
19018 code = IOR;
19019 break;
19020 default:
19021 gcc_unreachable ();
19022 }
19023 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
19024 OPTAB_DIRECT);
19025 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
19026 return true;
19027 }
19028
19029 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
19030 operands[5], operands[1], operands[2]))
19031 return true;
19032
19033 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
19034 operands[1], operands[2]);
19035 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
19036 return true;
19037 }
19038
19039 /* Expand a signed/unsigned integral vector conditional move. */
19040
19041 bool
19042 ix86_expand_int_vcond (rtx operands[])
19043 {
19044 enum machine_mode mode = GET_MODE (operands[0]);
19045 enum rtx_code code = GET_CODE (operands[3]);
19046 bool negate = false;
19047 rtx x, cop0, cop1;
19048
19049 cop0 = operands[4];
19050 cop1 = operands[5];
19051
19052 /* XOP supports all of the comparisons on all vector int types. */
19053 if (!TARGET_XOP)
19054 {
19055 /* Canonicalize the comparison to EQ, GT, GTU. */
19056 switch (code)
19057 {
19058 case EQ:
19059 case GT:
19060 case GTU:
19061 break;
19062
19063 case NE:
19064 case LE:
19065 case LEU:
19066 code = reverse_condition (code);
19067 negate = true;
19068 break;
19069
19070 case GE:
19071 case GEU:
19072 code = reverse_condition (code);
19073 negate = true;
19074 /* FALLTHRU */
19075
19076 case LT:
19077 case LTU:
19078 code = swap_condition (code);
19079 x = cop0, cop0 = cop1, cop1 = x;
19080 break;
19081
19082 default:
19083 gcc_unreachable ();
19084 }
19085
19086 /* Only SSE4.1/SSE4.2 supports V2DImode. */
19087 if (mode == V2DImode)
19088 {
19089 switch (code)
19090 {
19091 case EQ:
19092 /* SSE4.1 supports EQ. */
19093 if (!TARGET_SSE4_1)
19094 return false;
19095 break;
19096
19097 case GT:
19098 case GTU:
19099 /* SSE4.2 supports GT/GTU. */
19100 if (!TARGET_SSE4_2)
19101 return false;
19102 break;
19103
19104 default:
19105 gcc_unreachable ();
19106 }
19107 }
19108
19109 /* Unsigned parallel compare is not supported by the hardware.
19110 Play some tricks to turn this into a signed comparison
19111 against 0. */
19112 if (code == GTU)
19113 {
19114 cop0 = force_reg (mode, cop0);
19115
19116 switch (mode)
19117 {
19118 case V8SImode:
19119 case V4DImode:
19120 case V4SImode:
19121 case V2DImode:
19122 {
19123 rtx t1, t2, mask;
19124 rtx (*gen_sub3) (rtx, rtx, rtx);
19125
19126 switch (mode)
19127 {
19128 case V8SImode: gen_sub3 = gen_subv8si3; break;
19129 case V4DImode: gen_sub3 = gen_subv4di3; break;
19130 case V4SImode: gen_sub3 = gen_subv4si3; break;
19131 case V2DImode: gen_sub3 = gen_subv2di3; break;
19132 default:
19133 gcc_unreachable ();
19134 }
19135 /* Subtract (-(INT MAX) - 1) from both operands to make
19136 them signed. */
19137 mask = ix86_build_signbit_mask (mode, true, false);
19138 t1 = gen_reg_rtx (mode);
19139 emit_insn (gen_sub3 (t1, cop0, mask));
19140
19141 t2 = gen_reg_rtx (mode);
19142 emit_insn (gen_sub3 (t2, cop1, mask));
19143
19144 cop0 = t1;
19145 cop1 = t2;
19146 code = GT;
19147 }
19148 break;
19149
19150 case V32QImode:
19151 case V16HImode:
19152 case V16QImode:
19153 case V8HImode:
19154 /* Perform a parallel unsigned saturating subtraction. */
19155 x = gen_reg_rtx (mode);
19156 emit_insn (gen_rtx_SET (VOIDmode, x,
19157 gen_rtx_US_MINUS (mode, cop0, cop1)));
19158
19159 cop0 = x;
19160 cop1 = CONST0_RTX (mode);
19161 code = EQ;
19162 negate = !negate;
19163 break;
19164
19165 default:
19166 gcc_unreachable ();
19167 }
19168 }
19169 }
19170
19171 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
19172 operands[1+negate], operands[2-negate]);
19173
19174 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
19175 operands[2-negate]);
19176 return true;
19177 }
19178
19179 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
19180 true if we should do zero extension, else sign extension. HIGH_P is
19181 true if we want the N/2 high elements, else the low elements. */
19182
19183 void
19184 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
19185 {
19186 enum machine_mode imode = GET_MODE (operands[1]);
19187 rtx tmp, dest;
19188
19189 if (TARGET_SSE4_1)
19190 {
19191 rtx (*unpack)(rtx, rtx);
19192
19193 switch (imode)
19194 {
19195 case V16QImode:
19196 if (unsigned_p)
19197 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
19198 else
19199 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
19200 break;
19201 case V8HImode:
19202 if (unsigned_p)
19203 unpack = gen_sse4_1_zero_extendv4hiv4si2;
19204 else
19205 unpack = gen_sse4_1_sign_extendv4hiv4si2;
19206 break;
19207 case V4SImode:
19208 if (unsigned_p)
19209 unpack = gen_sse4_1_zero_extendv2siv2di2;
19210 else
19211 unpack = gen_sse4_1_sign_extendv2siv2di2;
19212 break;
19213 default:
19214 gcc_unreachable ();
19215 }
19216
19217 if (high_p)
19218 {
19219 /* Shift higher 8 bytes to lower 8 bytes. */
19220 tmp = gen_reg_rtx (imode);
19221 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, tmp),
19222 gen_lowpart (V1TImode, operands[1]),
19223 GEN_INT (64)));
19224 }
19225 else
19226 tmp = operands[1];
19227
19228 emit_insn (unpack (operands[0], tmp));
19229 }
19230 else
19231 {
19232 rtx (*unpack)(rtx, rtx, rtx);
19233
19234 switch (imode)
19235 {
19236 case V16QImode:
19237 if (high_p)
19238 unpack = gen_vec_interleave_highv16qi;
19239 else
19240 unpack = gen_vec_interleave_lowv16qi;
19241 break;
19242 case V8HImode:
19243 if (high_p)
19244 unpack = gen_vec_interleave_highv8hi;
19245 else
19246 unpack = gen_vec_interleave_lowv8hi;
19247 break;
19248 case V4SImode:
19249 if (high_p)
19250 unpack = gen_vec_interleave_highv4si;
19251 else
19252 unpack = gen_vec_interleave_lowv4si;
19253 break;
19254 default:
19255 gcc_unreachable ();
19256 }
19257
19258 dest = gen_lowpart (imode, operands[0]);
19259
19260 if (unsigned_p)
19261 tmp = force_reg (imode, CONST0_RTX (imode));
19262 else
19263 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
19264 operands[1], pc_rtx, pc_rtx);
19265
19266 emit_insn (unpack (dest, operands[1], tmp));
19267 }
19268 }
19269
19270 /* Expand conditional increment or decrement using adb/sbb instructions.
19271 The default case using setcc followed by the conditional move can be
19272 done by generic code. */
19273 bool
19274 ix86_expand_int_addcc (rtx operands[])
19275 {
19276 enum rtx_code code = GET_CODE (operands[1]);
19277 rtx flags;
19278 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
19279 rtx compare_op;
19280 rtx val = const0_rtx;
19281 bool fpcmp = false;
19282 enum machine_mode mode;
19283 rtx op0 = XEXP (operands[1], 0);
19284 rtx op1 = XEXP (operands[1], 1);
19285
19286 if (operands[3] != const1_rtx
19287 && operands[3] != constm1_rtx)
19288 return false;
19289 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
19290 return false;
19291 code = GET_CODE (compare_op);
19292
19293 flags = XEXP (compare_op, 0);
19294
19295 if (GET_MODE (flags) == CCFPmode
19296 || GET_MODE (flags) == CCFPUmode)
19297 {
19298 fpcmp = true;
19299 code = ix86_fp_compare_code_to_integer (code);
19300 }
19301
19302 if (code != LTU)
19303 {
19304 val = constm1_rtx;
19305 if (fpcmp)
19306 PUT_CODE (compare_op,
19307 reverse_condition_maybe_unordered
19308 (GET_CODE (compare_op)));
19309 else
19310 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
19311 }
19312
19313 mode = GET_MODE (operands[0]);
19314
19315 /* Construct either adc or sbb insn. */
19316 if ((code == LTU) == (operands[3] == constm1_rtx))
19317 {
19318 switch (mode)
19319 {
19320 case QImode:
19321 insn = gen_subqi3_carry;
19322 break;
19323 case HImode:
19324 insn = gen_subhi3_carry;
19325 break;
19326 case SImode:
19327 insn = gen_subsi3_carry;
19328 break;
19329 case DImode:
19330 insn = gen_subdi3_carry;
19331 break;
19332 default:
19333 gcc_unreachable ();
19334 }
19335 }
19336 else
19337 {
19338 switch (mode)
19339 {
19340 case QImode:
19341 insn = gen_addqi3_carry;
19342 break;
19343 case HImode:
19344 insn = gen_addhi3_carry;
19345 break;
19346 case SImode:
19347 insn = gen_addsi3_carry;
19348 break;
19349 case DImode:
19350 insn = gen_adddi3_carry;
19351 break;
19352 default:
19353 gcc_unreachable ();
19354 }
19355 }
19356 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
19357
19358 return true;
19359 }
19360
19361
19362 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
19363 but works for floating pointer parameters and nonoffsetable memories.
19364 For pushes, it returns just stack offsets; the values will be saved
19365 in the right order. Maximally three parts are generated. */
19366
19367 static int
19368 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
19369 {
19370 int size;
19371
19372 if (!TARGET_64BIT)
19373 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
19374 else
19375 size = (GET_MODE_SIZE (mode) + 4) / 8;
19376
19377 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
19378 gcc_assert (size >= 2 && size <= 4);
19379
19380 /* Optimize constant pool reference to immediates. This is used by fp
19381 moves, that force all constants to memory to allow combining. */
19382 if (MEM_P (operand) && MEM_READONLY_P (operand))
19383 {
19384 rtx tmp = maybe_get_pool_constant (operand);
19385 if (tmp)
19386 operand = tmp;
19387 }
19388
19389 if (MEM_P (operand) && !offsettable_memref_p (operand))
19390 {
19391 /* The only non-offsetable memories we handle are pushes. */
19392 int ok = push_operand (operand, VOIDmode);
19393
19394 gcc_assert (ok);
19395
19396 operand = copy_rtx (operand);
19397 PUT_MODE (operand, Pmode);
19398 parts[0] = parts[1] = parts[2] = parts[3] = operand;
19399 return size;
19400 }
19401
19402 if (GET_CODE (operand) == CONST_VECTOR)
19403 {
19404 enum machine_mode imode = int_mode_for_mode (mode);
19405 /* Caution: if we looked through a constant pool memory above,
19406 the operand may actually have a different mode now. That's
19407 ok, since we want to pun this all the way back to an integer. */
19408 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
19409 gcc_assert (operand != NULL);
19410 mode = imode;
19411 }
19412
19413 if (!TARGET_64BIT)
19414 {
19415 if (mode == DImode)
19416 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
19417 else
19418 {
19419 int i;
19420
19421 if (REG_P (operand))
19422 {
19423 gcc_assert (reload_completed);
19424 for (i = 0; i < size; i++)
19425 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
19426 }
19427 else if (offsettable_memref_p (operand))
19428 {
19429 operand = adjust_address (operand, SImode, 0);
19430 parts[0] = operand;
19431 for (i = 1; i < size; i++)
19432 parts[i] = adjust_address (operand, SImode, 4 * i);
19433 }
19434 else if (GET_CODE (operand) == CONST_DOUBLE)
19435 {
19436 REAL_VALUE_TYPE r;
19437 long l[4];
19438
19439 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
19440 switch (mode)
19441 {
19442 case TFmode:
19443 real_to_target (l, &r, mode);
19444 parts[3] = gen_int_mode (l[3], SImode);
19445 parts[2] = gen_int_mode (l[2], SImode);
19446 break;
19447 case XFmode:
19448 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
19449 parts[2] = gen_int_mode (l[2], SImode);
19450 break;
19451 case DFmode:
19452 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
19453 break;
19454 default:
19455 gcc_unreachable ();
19456 }
19457 parts[1] = gen_int_mode (l[1], SImode);
19458 parts[0] = gen_int_mode (l[0], SImode);
19459 }
19460 else
19461 gcc_unreachable ();
19462 }
19463 }
19464 else
19465 {
19466 if (mode == TImode)
19467 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
19468 if (mode == XFmode || mode == TFmode)
19469 {
19470 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
19471 if (REG_P (operand))
19472 {
19473 gcc_assert (reload_completed);
19474 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
19475 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
19476 }
19477 else if (offsettable_memref_p (operand))
19478 {
19479 operand = adjust_address (operand, DImode, 0);
19480 parts[0] = operand;
19481 parts[1] = adjust_address (operand, upper_mode, 8);
19482 }
19483 else if (GET_CODE (operand) == CONST_DOUBLE)
19484 {
19485 REAL_VALUE_TYPE r;
19486 long l[4];
19487
19488 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
19489 real_to_target (l, &r, mode);
19490
19491 /* Do not use shift by 32 to avoid warning on 32bit systems. */
19492 if (HOST_BITS_PER_WIDE_INT >= 64)
19493 parts[0]
19494 = gen_int_mode
19495 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
19496 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
19497 DImode);
19498 else
19499 parts[0] = immed_double_const (l[0], l[1], DImode);
19500
19501 if (upper_mode == SImode)
19502 parts[1] = gen_int_mode (l[2], SImode);
19503 else if (HOST_BITS_PER_WIDE_INT >= 64)
19504 parts[1]
19505 = gen_int_mode
19506 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
19507 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
19508 DImode);
19509 else
19510 parts[1] = immed_double_const (l[2], l[3], DImode);
19511 }
19512 else
19513 gcc_unreachable ();
19514 }
19515 }
19516
19517 return size;
19518 }
19519
19520 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
19521 Return false when normal moves are needed; true when all required
19522 insns have been emitted. Operands 2-4 contain the input values
19523 int the correct order; operands 5-7 contain the output values. */
19524
19525 void
19526 ix86_split_long_move (rtx operands[])
19527 {
19528 rtx part[2][4];
19529 int nparts, i, j;
19530 int push = 0;
19531 int collisions = 0;
19532 enum machine_mode mode = GET_MODE (operands[0]);
19533 bool collisionparts[4];
19534
19535 /* The DFmode expanders may ask us to move double.
19536 For 64bit target this is single move. By hiding the fact
19537 here we simplify i386.md splitters. */
19538 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
19539 {
19540 /* Optimize constant pool reference to immediates. This is used by
19541 fp moves, that force all constants to memory to allow combining. */
19542
19543 if (MEM_P (operands[1])
19544 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
19545 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
19546 operands[1] = get_pool_constant (XEXP (operands[1], 0));
19547 if (push_operand (operands[0], VOIDmode))
19548 {
19549 operands[0] = copy_rtx (operands[0]);
19550 PUT_MODE (operands[0], Pmode);
19551 }
19552 else
19553 operands[0] = gen_lowpart (DImode, operands[0]);
19554 operands[1] = gen_lowpart (DImode, operands[1]);
19555 emit_move_insn (operands[0], operands[1]);
19556 return;
19557 }
19558
19559 /* The only non-offsettable memory we handle is push. */
19560 if (push_operand (operands[0], VOIDmode))
19561 push = 1;
19562 else
19563 gcc_assert (!MEM_P (operands[0])
19564 || offsettable_memref_p (operands[0]));
19565
19566 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
19567 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
19568
19569 /* When emitting push, take care for source operands on the stack. */
19570 if (push && MEM_P (operands[1])
19571 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
19572 {
19573 rtx src_base = XEXP (part[1][nparts - 1], 0);
19574
19575 /* Compensate for the stack decrement by 4. */
19576 if (!TARGET_64BIT && nparts == 3
19577 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
19578 src_base = plus_constant (src_base, 4);
19579
19580 /* src_base refers to the stack pointer and is
19581 automatically decreased by emitted push. */
19582 for (i = 0; i < nparts; i++)
19583 part[1][i] = change_address (part[1][i],
19584 GET_MODE (part[1][i]), src_base);
19585 }
19586
19587 /* We need to do copy in the right order in case an address register
19588 of the source overlaps the destination. */
19589 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
19590 {
19591 rtx tmp;
19592
19593 for (i = 0; i < nparts; i++)
19594 {
19595 collisionparts[i]
19596 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
19597 if (collisionparts[i])
19598 collisions++;
19599 }
19600
19601 /* Collision in the middle part can be handled by reordering. */
19602 if (collisions == 1 && nparts == 3 && collisionparts [1])
19603 {
19604 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
19605 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
19606 }
19607 else if (collisions == 1
19608 && nparts == 4
19609 && (collisionparts [1] || collisionparts [2]))
19610 {
19611 if (collisionparts [1])
19612 {
19613 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
19614 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
19615 }
19616 else
19617 {
19618 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
19619 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
19620 }
19621 }
19622
19623 /* If there are more collisions, we can't handle it by reordering.
19624 Do an lea to the last part and use only one colliding move. */
19625 else if (collisions > 1)
19626 {
19627 rtx base;
19628
19629 collisions = 1;
19630
19631 base = part[0][nparts - 1];
19632
19633 /* Handle the case when the last part isn't valid for lea.
19634 Happens in 64-bit mode storing the 12-byte XFmode. */
19635 if (GET_MODE (base) != Pmode)
19636 base = gen_rtx_REG (Pmode, REGNO (base));
19637
19638 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
19639 part[1][0] = replace_equiv_address (part[1][0], base);
19640 for (i = 1; i < nparts; i++)
19641 {
19642 tmp = plus_constant (base, UNITS_PER_WORD * i);
19643 part[1][i] = replace_equiv_address (part[1][i], tmp);
19644 }
19645 }
19646 }
19647
19648 if (push)
19649 {
19650 if (!TARGET_64BIT)
19651 {
19652 if (nparts == 3)
19653 {
19654 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
19655 emit_insn (gen_addsi3 (stack_pointer_rtx,
19656 stack_pointer_rtx, GEN_INT (-4)));
19657 emit_move_insn (part[0][2], part[1][2]);
19658 }
19659 else if (nparts == 4)
19660 {
19661 emit_move_insn (part[0][3], part[1][3]);
19662 emit_move_insn (part[0][2], part[1][2]);
19663 }
19664 }
19665 else
19666 {
19667 /* In 64bit mode we don't have 32bit push available. In case this is
19668 register, it is OK - we will just use larger counterpart. We also
19669 retype memory - these comes from attempt to avoid REX prefix on
19670 moving of second half of TFmode value. */
19671 if (GET_MODE (part[1][1]) == SImode)
19672 {
19673 switch (GET_CODE (part[1][1]))
19674 {
19675 case MEM:
19676 part[1][1] = adjust_address (part[1][1], DImode, 0);
19677 break;
19678
19679 case REG:
19680 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
19681 break;
19682
19683 default:
19684 gcc_unreachable ();
19685 }
19686
19687 if (GET_MODE (part[1][0]) == SImode)
19688 part[1][0] = part[1][1];
19689 }
19690 }
19691 emit_move_insn (part[0][1], part[1][1]);
19692 emit_move_insn (part[0][0], part[1][0]);
19693 return;
19694 }
19695
19696 /* Choose correct order to not overwrite the source before it is copied. */
19697 if ((REG_P (part[0][0])
19698 && REG_P (part[1][1])
19699 && (REGNO (part[0][0]) == REGNO (part[1][1])
19700 || (nparts == 3
19701 && REGNO (part[0][0]) == REGNO (part[1][2]))
19702 || (nparts == 4
19703 && REGNO (part[0][0]) == REGNO (part[1][3]))))
19704 || (collisions > 0
19705 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
19706 {
19707 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
19708 {
19709 operands[2 + i] = part[0][j];
19710 operands[6 + i] = part[1][j];
19711 }
19712 }
19713 else
19714 {
19715 for (i = 0; i < nparts; i++)
19716 {
19717 operands[2 + i] = part[0][i];
19718 operands[6 + i] = part[1][i];
19719 }
19720 }
19721
19722 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
19723 if (optimize_insn_for_size_p ())
19724 {
19725 for (j = 0; j < nparts - 1; j++)
19726 if (CONST_INT_P (operands[6 + j])
19727 && operands[6 + j] != const0_rtx
19728 && REG_P (operands[2 + j]))
19729 for (i = j; i < nparts - 1; i++)
19730 if (CONST_INT_P (operands[7 + i])
19731 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
19732 operands[7 + i] = operands[2 + j];
19733 }
19734
19735 for (i = 0; i < nparts; i++)
19736 emit_move_insn (operands[2 + i], operands[6 + i]);
19737
19738 return;
19739 }
19740
19741 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
19742 left shift by a constant, either using a single shift or
19743 a sequence of add instructions. */
19744
19745 static void
19746 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
19747 {
19748 rtx (*insn)(rtx, rtx, rtx);
19749
19750 if (count == 1
19751 || (count * ix86_cost->add <= ix86_cost->shift_const
19752 && !optimize_insn_for_size_p ()))
19753 {
19754 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
19755 while (count-- > 0)
19756 emit_insn (insn (operand, operand, operand));
19757 }
19758 else
19759 {
19760 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
19761 emit_insn (insn (operand, operand, GEN_INT (count)));
19762 }
19763 }
19764
19765 void
19766 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
19767 {
19768 rtx (*gen_ashl3)(rtx, rtx, rtx);
19769 rtx (*gen_shld)(rtx, rtx, rtx);
19770 int half_width = GET_MODE_BITSIZE (mode) >> 1;
19771
19772 rtx low[2], high[2];
19773 int count;
19774
19775 if (CONST_INT_P (operands[2]))
19776 {
19777 split_double_mode (mode, operands, 2, low, high);
19778 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
19779
19780 if (count >= half_width)
19781 {
19782 emit_move_insn (high[0], low[1]);
19783 emit_move_insn (low[0], const0_rtx);
19784
19785 if (count > half_width)
19786 ix86_expand_ashl_const (high[0], count - half_width, mode);
19787 }
19788 else
19789 {
19790 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
19791
19792 if (!rtx_equal_p (operands[0], operands[1]))
19793 emit_move_insn (operands[0], operands[1]);
19794
19795 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
19796 ix86_expand_ashl_const (low[0], count, mode);
19797 }
19798 return;
19799 }
19800
19801 split_double_mode (mode, operands, 1, low, high);
19802
19803 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
19804
19805 if (operands[1] == const1_rtx)
19806 {
19807 /* Assuming we've chosen a QImode capable registers, then 1 << N
19808 can be done with two 32/64-bit shifts, no branches, no cmoves. */
19809 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
19810 {
19811 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
19812
19813 ix86_expand_clear (low[0]);
19814 ix86_expand_clear (high[0]);
19815 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
19816
19817 d = gen_lowpart (QImode, low[0]);
19818 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
19819 s = gen_rtx_EQ (QImode, flags, const0_rtx);
19820 emit_insn (gen_rtx_SET (VOIDmode, d, s));
19821
19822 d = gen_lowpart (QImode, high[0]);
19823 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
19824 s = gen_rtx_NE (QImode, flags, const0_rtx);
19825 emit_insn (gen_rtx_SET (VOIDmode, d, s));
19826 }
19827
19828 /* Otherwise, we can get the same results by manually performing
19829 a bit extract operation on bit 5/6, and then performing the two
19830 shifts. The two methods of getting 0/1 into low/high are exactly
19831 the same size. Avoiding the shift in the bit extract case helps
19832 pentium4 a bit; no one else seems to care much either way. */
19833 else
19834 {
19835 enum machine_mode half_mode;
19836 rtx (*gen_lshr3)(rtx, rtx, rtx);
19837 rtx (*gen_and3)(rtx, rtx, rtx);
19838 rtx (*gen_xor3)(rtx, rtx, rtx);
19839 HOST_WIDE_INT bits;
19840 rtx x;
19841
19842 if (mode == DImode)
19843 {
19844 half_mode = SImode;
19845 gen_lshr3 = gen_lshrsi3;
19846 gen_and3 = gen_andsi3;
19847 gen_xor3 = gen_xorsi3;
19848 bits = 5;
19849 }
19850 else
19851 {
19852 half_mode = DImode;
19853 gen_lshr3 = gen_lshrdi3;
19854 gen_and3 = gen_anddi3;
19855 gen_xor3 = gen_xordi3;
19856 bits = 6;
19857 }
19858
19859 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
19860 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
19861 else
19862 x = gen_lowpart (half_mode, operands[2]);
19863 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
19864
19865 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
19866 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
19867 emit_move_insn (low[0], high[0]);
19868 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
19869 }
19870
19871 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
19872 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
19873 return;
19874 }
19875
19876 if (operands[1] == constm1_rtx)
19877 {
19878 /* For -1 << N, we can avoid the shld instruction, because we
19879 know that we're shifting 0...31/63 ones into a -1. */
19880 emit_move_insn (low[0], constm1_rtx);
19881 if (optimize_insn_for_size_p ())
19882 emit_move_insn (high[0], low[0]);
19883 else
19884 emit_move_insn (high[0], constm1_rtx);
19885 }
19886 else
19887 {
19888 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
19889
19890 if (!rtx_equal_p (operands[0], operands[1]))
19891 emit_move_insn (operands[0], operands[1]);
19892
19893 split_double_mode (mode, operands, 1, low, high);
19894 emit_insn (gen_shld (high[0], low[0], operands[2]));
19895 }
19896
19897 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
19898
19899 if (TARGET_CMOVE && scratch)
19900 {
19901 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
19902 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
19903
19904 ix86_expand_clear (scratch);
19905 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
19906 }
19907 else
19908 {
19909 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
19910 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
19911
19912 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
19913 }
19914 }
19915
19916 void
19917 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
19918 {
19919 rtx (*gen_ashr3)(rtx, rtx, rtx)
19920 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
19921 rtx (*gen_shrd)(rtx, rtx, rtx);
19922 int half_width = GET_MODE_BITSIZE (mode) >> 1;
19923
19924 rtx low[2], high[2];
19925 int count;
19926
19927 if (CONST_INT_P (operands[2]))
19928 {
19929 split_double_mode (mode, operands, 2, low, high);
19930 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
19931
19932 if (count == GET_MODE_BITSIZE (mode) - 1)
19933 {
19934 emit_move_insn (high[0], high[1]);
19935 emit_insn (gen_ashr3 (high[0], high[0],
19936 GEN_INT (half_width - 1)));
19937 emit_move_insn (low[0], high[0]);
19938
19939 }
19940 else if (count >= half_width)
19941 {
19942 emit_move_insn (low[0], high[1]);
19943 emit_move_insn (high[0], low[0]);
19944 emit_insn (gen_ashr3 (high[0], high[0],
19945 GEN_INT (half_width - 1)));
19946
19947 if (count > half_width)
19948 emit_insn (gen_ashr3 (low[0], low[0],
19949 GEN_INT (count - half_width)));
19950 }
19951 else
19952 {
19953 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19954
19955 if (!rtx_equal_p (operands[0], operands[1]))
19956 emit_move_insn (operands[0], operands[1]);
19957
19958 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
19959 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
19960 }
19961 }
19962 else
19963 {
19964 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19965
19966 if (!rtx_equal_p (operands[0], operands[1]))
19967 emit_move_insn (operands[0], operands[1]);
19968
19969 split_double_mode (mode, operands, 1, low, high);
19970
19971 emit_insn (gen_shrd (low[0], high[0], operands[2]));
19972 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
19973
19974 if (TARGET_CMOVE && scratch)
19975 {
19976 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
19977 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
19978
19979 emit_move_insn (scratch, high[0]);
19980 emit_insn (gen_ashr3 (scratch, scratch,
19981 GEN_INT (half_width - 1)));
19982 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
19983 scratch));
19984 }
19985 else
19986 {
19987 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
19988 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
19989
19990 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
19991 }
19992 }
19993 }
19994
19995 void
19996 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
19997 {
19998 rtx (*gen_lshr3)(rtx, rtx, rtx)
19999 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
20000 rtx (*gen_shrd)(rtx, rtx, rtx);
20001 int half_width = GET_MODE_BITSIZE (mode) >> 1;
20002
20003 rtx low[2], high[2];
20004 int count;
20005
20006 if (CONST_INT_P (operands[2]))
20007 {
20008 split_double_mode (mode, operands, 2, low, high);
20009 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
20010
20011 if (count >= half_width)
20012 {
20013 emit_move_insn (low[0], high[1]);
20014 ix86_expand_clear (high[0]);
20015
20016 if (count > half_width)
20017 emit_insn (gen_lshr3 (low[0], low[0],
20018 GEN_INT (count - half_width)));
20019 }
20020 else
20021 {
20022 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
20023
20024 if (!rtx_equal_p (operands[0], operands[1]))
20025 emit_move_insn (operands[0], operands[1]);
20026
20027 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
20028 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
20029 }
20030 }
20031 else
20032 {
20033 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
20034
20035 if (!rtx_equal_p (operands[0], operands[1]))
20036 emit_move_insn (operands[0], operands[1]);
20037
20038 split_double_mode (mode, operands, 1, low, high);
20039
20040 emit_insn (gen_shrd (low[0], high[0], operands[2]));
20041 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
20042
20043 if (TARGET_CMOVE && scratch)
20044 {
20045 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
20046 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
20047
20048 ix86_expand_clear (scratch);
20049 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
20050 scratch));
20051 }
20052 else
20053 {
20054 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
20055 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
20056
20057 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
20058 }
20059 }
20060 }
20061
20062 /* Predict just emitted jump instruction to be taken with probability PROB. */
20063 static void
20064 predict_jump (int prob)
20065 {
20066 rtx insn = get_last_insn ();
20067 gcc_assert (JUMP_P (insn));
20068 add_reg_note (insn, REG_BR_PROB, GEN_INT (prob));
20069 }
20070
20071 /* Helper function for the string operations below. Dest VARIABLE whether
20072 it is aligned to VALUE bytes. If true, jump to the label. */
20073 static rtx
20074 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
20075 {
20076 rtx label = gen_label_rtx ();
20077 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
20078 if (GET_MODE (variable) == DImode)
20079 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
20080 else
20081 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
20082 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
20083 1, label);
20084 if (epilogue)
20085 predict_jump (REG_BR_PROB_BASE * 50 / 100);
20086 else
20087 predict_jump (REG_BR_PROB_BASE * 90 / 100);
20088 return label;
20089 }
20090
20091 /* Adjust COUNTER by the VALUE. */
20092 static void
20093 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
20094 {
20095 rtx (*gen_add)(rtx, rtx, rtx)
20096 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
20097
20098 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
20099 }
20100
20101 /* Zero extend possibly SImode EXP to Pmode register. */
20102 rtx
20103 ix86_zero_extend_to_Pmode (rtx exp)
20104 {
20105 rtx r;
20106 if (GET_MODE (exp) == VOIDmode)
20107 return force_reg (Pmode, exp);
20108 if (GET_MODE (exp) == Pmode)
20109 return copy_to_mode_reg (Pmode, exp);
20110 r = gen_reg_rtx (Pmode);
20111 emit_insn (gen_zero_extendsidi2 (r, exp));
20112 return r;
20113 }
20114
20115 /* Divide COUNTREG by SCALE. */
20116 static rtx
20117 scale_counter (rtx countreg, int scale)
20118 {
20119 rtx sc;
20120
20121 if (scale == 1)
20122 return countreg;
20123 if (CONST_INT_P (countreg))
20124 return GEN_INT (INTVAL (countreg) / scale);
20125 gcc_assert (REG_P (countreg));
20126
20127 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
20128 GEN_INT (exact_log2 (scale)),
20129 NULL, 1, OPTAB_DIRECT);
20130 return sc;
20131 }
20132
20133 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
20134 DImode for constant loop counts. */
20135
20136 static enum machine_mode
20137 counter_mode (rtx count_exp)
20138 {
20139 if (GET_MODE (count_exp) != VOIDmode)
20140 return GET_MODE (count_exp);
20141 if (!CONST_INT_P (count_exp))
20142 return Pmode;
20143 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
20144 return DImode;
20145 return SImode;
20146 }
20147
20148 /* When SRCPTR is non-NULL, output simple loop to move memory
20149 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
20150 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
20151 equivalent loop to set memory by VALUE (supposed to be in MODE).
20152
20153 The size is rounded down to whole number of chunk size moved at once.
20154 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
20155
20156
20157 static void
20158 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
20159 rtx destptr, rtx srcptr, rtx value,
20160 rtx count, enum machine_mode mode, int unroll,
20161 int expected_size)
20162 {
20163 rtx out_label, top_label, iter, tmp;
20164 enum machine_mode iter_mode = counter_mode (count);
20165 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
20166 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
20167 rtx size;
20168 rtx x_addr;
20169 rtx y_addr;
20170 int i;
20171
20172 top_label = gen_label_rtx ();
20173 out_label = gen_label_rtx ();
20174 iter = gen_reg_rtx (iter_mode);
20175
20176 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
20177 NULL, 1, OPTAB_DIRECT);
20178 /* Those two should combine. */
20179 if (piece_size == const1_rtx)
20180 {
20181 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
20182 true, out_label);
20183 predict_jump (REG_BR_PROB_BASE * 10 / 100);
20184 }
20185 emit_move_insn (iter, const0_rtx);
20186
20187 emit_label (top_label);
20188
20189 tmp = convert_modes (Pmode, iter_mode, iter, true);
20190 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
20191 destmem = change_address (destmem, mode, x_addr);
20192
20193 if (srcmem)
20194 {
20195 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
20196 srcmem = change_address (srcmem, mode, y_addr);
20197
20198 /* When unrolling for chips that reorder memory reads and writes,
20199 we can save registers by using single temporary.
20200 Also using 4 temporaries is overkill in 32bit mode. */
20201 if (!TARGET_64BIT && 0)
20202 {
20203 for (i = 0; i < unroll; i++)
20204 {
20205 if (i)
20206 {
20207 destmem =
20208 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
20209 srcmem =
20210 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
20211 }
20212 emit_move_insn (destmem, srcmem);
20213 }
20214 }
20215 else
20216 {
20217 rtx tmpreg[4];
20218 gcc_assert (unroll <= 4);
20219 for (i = 0; i < unroll; i++)
20220 {
20221 tmpreg[i] = gen_reg_rtx (mode);
20222 if (i)
20223 {
20224 srcmem =
20225 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
20226 }
20227 emit_move_insn (tmpreg[i], srcmem);
20228 }
20229 for (i = 0; i < unroll; i++)
20230 {
20231 if (i)
20232 {
20233 destmem =
20234 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
20235 }
20236 emit_move_insn (destmem, tmpreg[i]);
20237 }
20238 }
20239 }
20240 else
20241 for (i = 0; i < unroll; i++)
20242 {
20243 if (i)
20244 destmem =
20245 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
20246 emit_move_insn (destmem, value);
20247 }
20248
20249 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
20250 true, OPTAB_LIB_WIDEN);
20251 if (tmp != iter)
20252 emit_move_insn (iter, tmp);
20253
20254 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
20255 true, top_label);
20256 if (expected_size != -1)
20257 {
20258 expected_size /= GET_MODE_SIZE (mode) * unroll;
20259 if (expected_size == 0)
20260 predict_jump (0);
20261 else if (expected_size > REG_BR_PROB_BASE)
20262 predict_jump (REG_BR_PROB_BASE - 1);
20263 else
20264 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
20265 }
20266 else
20267 predict_jump (REG_BR_PROB_BASE * 80 / 100);
20268 iter = ix86_zero_extend_to_Pmode (iter);
20269 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
20270 true, OPTAB_LIB_WIDEN);
20271 if (tmp != destptr)
20272 emit_move_insn (destptr, tmp);
20273 if (srcptr)
20274 {
20275 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
20276 true, OPTAB_LIB_WIDEN);
20277 if (tmp != srcptr)
20278 emit_move_insn (srcptr, tmp);
20279 }
20280 emit_label (out_label);
20281 }
20282
20283 /* Output "rep; mov" instruction.
20284 Arguments have same meaning as for previous function */
20285 static void
20286 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
20287 rtx destptr, rtx srcptr,
20288 rtx count,
20289 enum machine_mode mode)
20290 {
20291 rtx destexp;
20292 rtx srcexp;
20293 rtx countreg;
20294 HOST_WIDE_INT rounded_count;
20295
20296 /* If the size is known, it is shorter to use rep movs. */
20297 if (mode == QImode && CONST_INT_P (count)
20298 && !(INTVAL (count) & 3))
20299 mode = SImode;
20300
20301 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
20302 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
20303 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
20304 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
20305 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
20306 if (mode != QImode)
20307 {
20308 destexp = gen_rtx_ASHIFT (Pmode, countreg,
20309 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
20310 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
20311 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
20312 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
20313 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
20314 }
20315 else
20316 {
20317 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
20318 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
20319 }
20320 if (CONST_INT_P (count))
20321 {
20322 rounded_count = (INTVAL (count)
20323 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
20324 destmem = shallow_copy_rtx (destmem);
20325 srcmem = shallow_copy_rtx (srcmem);
20326 set_mem_size (destmem, rounded_count);
20327 set_mem_size (srcmem, rounded_count);
20328 }
20329 else
20330 {
20331 if (MEM_SIZE_KNOWN_P (destmem))
20332 clear_mem_size (destmem);
20333 if (MEM_SIZE_KNOWN_P (srcmem))
20334 clear_mem_size (srcmem);
20335 }
20336 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
20337 destexp, srcexp));
20338 }
20339
20340 /* Output "rep; stos" instruction.
20341 Arguments have same meaning as for previous function */
20342 static void
20343 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
20344 rtx count, enum machine_mode mode,
20345 rtx orig_value)
20346 {
20347 rtx destexp;
20348 rtx countreg;
20349 HOST_WIDE_INT rounded_count;
20350
20351 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
20352 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
20353 value = force_reg (mode, gen_lowpart (mode, value));
20354 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
20355 if (mode != QImode)
20356 {
20357 destexp = gen_rtx_ASHIFT (Pmode, countreg,
20358 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
20359 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
20360 }
20361 else
20362 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
20363 if (orig_value == const0_rtx && CONST_INT_P (count))
20364 {
20365 rounded_count = (INTVAL (count)
20366 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
20367 destmem = shallow_copy_rtx (destmem);
20368 set_mem_size (destmem, rounded_count);
20369 }
20370 else if (MEM_SIZE_KNOWN_P (destmem))
20371 clear_mem_size (destmem);
20372 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
20373 }
20374
20375 static void
20376 emit_strmov (rtx destmem, rtx srcmem,
20377 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
20378 {
20379 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
20380 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
20381 emit_insn (gen_strmov (destptr, dest, srcptr, src));
20382 }
20383
20384 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
20385 static void
20386 expand_movmem_epilogue (rtx destmem, rtx srcmem,
20387 rtx destptr, rtx srcptr, rtx count, int max_size)
20388 {
20389 rtx src, dest;
20390 if (CONST_INT_P (count))
20391 {
20392 HOST_WIDE_INT countval = INTVAL (count);
20393 int offset = 0;
20394
20395 if ((countval & 0x10) && max_size > 16)
20396 {
20397 if (TARGET_64BIT)
20398 {
20399 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
20400 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
20401 }
20402 else
20403 gcc_unreachable ();
20404 offset += 16;
20405 }
20406 if ((countval & 0x08) && max_size > 8)
20407 {
20408 if (TARGET_64BIT)
20409 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
20410 else
20411 {
20412 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
20413 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
20414 }
20415 offset += 8;
20416 }
20417 if ((countval & 0x04) && max_size > 4)
20418 {
20419 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
20420 offset += 4;
20421 }
20422 if ((countval & 0x02) && max_size > 2)
20423 {
20424 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
20425 offset += 2;
20426 }
20427 if ((countval & 0x01) && max_size > 1)
20428 {
20429 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
20430 offset += 1;
20431 }
20432 return;
20433 }
20434 if (max_size > 8)
20435 {
20436 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
20437 count, 1, OPTAB_DIRECT);
20438 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
20439 count, QImode, 1, 4);
20440 return;
20441 }
20442
20443 /* When there are stringops, we can cheaply increase dest and src pointers.
20444 Otherwise we save code size by maintaining offset (zero is readily
20445 available from preceding rep operation) and using x86 addressing modes.
20446 */
20447 if (TARGET_SINGLE_STRINGOP)
20448 {
20449 if (max_size > 4)
20450 {
20451 rtx label = ix86_expand_aligntest (count, 4, true);
20452 src = change_address (srcmem, SImode, srcptr);
20453 dest = change_address (destmem, SImode, destptr);
20454 emit_insn (gen_strmov (destptr, dest, srcptr, src));
20455 emit_label (label);
20456 LABEL_NUSES (label) = 1;
20457 }
20458 if (max_size > 2)
20459 {
20460 rtx label = ix86_expand_aligntest (count, 2, true);
20461 src = change_address (srcmem, HImode, srcptr);
20462 dest = change_address (destmem, HImode, destptr);
20463 emit_insn (gen_strmov (destptr, dest, srcptr, src));
20464 emit_label (label);
20465 LABEL_NUSES (label) = 1;
20466 }
20467 if (max_size > 1)
20468 {
20469 rtx label = ix86_expand_aligntest (count, 1, true);
20470 src = change_address (srcmem, QImode, srcptr);
20471 dest = change_address (destmem, QImode, destptr);
20472 emit_insn (gen_strmov (destptr, dest, srcptr, src));
20473 emit_label (label);
20474 LABEL_NUSES (label) = 1;
20475 }
20476 }
20477 else
20478 {
20479 rtx offset = force_reg (Pmode, const0_rtx);
20480 rtx tmp;
20481
20482 if (max_size > 4)
20483 {
20484 rtx label = ix86_expand_aligntest (count, 4, true);
20485 src = change_address (srcmem, SImode, srcptr);
20486 dest = change_address (destmem, SImode, destptr);
20487 emit_move_insn (dest, src);
20488 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
20489 true, OPTAB_LIB_WIDEN);
20490 if (tmp != offset)
20491 emit_move_insn (offset, tmp);
20492 emit_label (label);
20493 LABEL_NUSES (label) = 1;
20494 }
20495 if (max_size > 2)
20496 {
20497 rtx label = ix86_expand_aligntest (count, 2, true);
20498 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
20499 src = change_address (srcmem, HImode, tmp);
20500 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
20501 dest = change_address (destmem, HImode, tmp);
20502 emit_move_insn (dest, src);
20503 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
20504 true, OPTAB_LIB_WIDEN);
20505 if (tmp != offset)
20506 emit_move_insn (offset, tmp);
20507 emit_label (label);
20508 LABEL_NUSES (label) = 1;
20509 }
20510 if (max_size > 1)
20511 {
20512 rtx label = ix86_expand_aligntest (count, 1, true);
20513 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
20514 src = change_address (srcmem, QImode, tmp);
20515 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
20516 dest = change_address (destmem, QImode, tmp);
20517 emit_move_insn (dest, src);
20518 emit_label (label);
20519 LABEL_NUSES (label) = 1;
20520 }
20521 }
20522 }
20523
20524 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
20525 static void
20526 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
20527 rtx count, int max_size)
20528 {
20529 count =
20530 expand_simple_binop (counter_mode (count), AND, count,
20531 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
20532 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
20533 gen_lowpart (QImode, value), count, QImode,
20534 1, max_size / 2);
20535 }
20536
20537 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
20538 static void
20539 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
20540 {
20541 rtx dest;
20542
20543 if (CONST_INT_P (count))
20544 {
20545 HOST_WIDE_INT countval = INTVAL (count);
20546 int offset = 0;
20547
20548 if ((countval & 0x10) && max_size > 16)
20549 {
20550 if (TARGET_64BIT)
20551 {
20552 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
20553 emit_insn (gen_strset (destptr, dest, value));
20554 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
20555 emit_insn (gen_strset (destptr, dest, value));
20556 }
20557 else
20558 gcc_unreachable ();
20559 offset += 16;
20560 }
20561 if ((countval & 0x08) && max_size > 8)
20562 {
20563 if (TARGET_64BIT)
20564 {
20565 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
20566 emit_insn (gen_strset (destptr, dest, value));
20567 }
20568 else
20569 {
20570 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
20571 emit_insn (gen_strset (destptr, dest, value));
20572 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
20573 emit_insn (gen_strset (destptr, dest, value));
20574 }
20575 offset += 8;
20576 }
20577 if ((countval & 0x04) && max_size > 4)
20578 {
20579 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
20580 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
20581 offset += 4;
20582 }
20583 if ((countval & 0x02) && max_size > 2)
20584 {
20585 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
20586 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
20587 offset += 2;
20588 }
20589 if ((countval & 0x01) && max_size > 1)
20590 {
20591 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
20592 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
20593 offset += 1;
20594 }
20595 return;
20596 }
20597 if (max_size > 32)
20598 {
20599 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
20600 return;
20601 }
20602 if (max_size > 16)
20603 {
20604 rtx label = ix86_expand_aligntest (count, 16, true);
20605 if (TARGET_64BIT)
20606 {
20607 dest = change_address (destmem, DImode, destptr);
20608 emit_insn (gen_strset (destptr, dest, value));
20609 emit_insn (gen_strset (destptr, dest, value));
20610 }
20611 else
20612 {
20613 dest = change_address (destmem, SImode, destptr);
20614 emit_insn (gen_strset (destptr, dest, value));
20615 emit_insn (gen_strset (destptr, dest, value));
20616 emit_insn (gen_strset (destptr, dest, value));
20617 emit_insn (gen_strset (destptr, dest, value));
20618 }
20619 emit_label (label);
20620 LABEL_NUSES (label) = 1;
20621 }
20622 if (max_size > 8)
20623 {
20624 rtx label = ix86_expand_aligntest (count, 8, true);
20625 if (TARGET_64BIT)
20626 {
20627 dest = change_address (destmem, DImode, destptr);
20628 emit_insn (gen_strset (destptr, dest, value));
20629 }
20630 else
20631 {
20632 dest = change_address (destmem, SImode, destptr);
20633 emit_insn (gen_strset (destptr, dest, value));
20634 emit_insn (gen_strset (destptr, dest, value));
20635 }
20636 emit_label (label);
20637 LABEL_NUSES (label) = 1;
20638 }
20639 if (max_size > 4)
20640 {
20641 rtx label = ix86_expand_aligntest (count, 4, true);
20642 dest = change_address (destmem, SImode, destptr);
20643 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
20644 emit_label (label);
20645 LABEL_NUSES (label) = 1;
20646 }
20647 if (max_size > 2)
20648 {
20649 rtx label = ix86_expand_aligntest (count, 2, true);
20650 dest = change_address (destmem, HImode, destptr);
20651 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
20652 emit_label (label);
20653 LABEL_NUSES (label) = 1;
20654 }
20655 if (max_size > 1)
20656 {
20657 rtx label = ix86_expand_aligntest (count, 1, true);
20658 dest = change_address (destmem, QImode, destptr);
20659 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
20660 emit_label (label);
20661 LABEL_NUSES (label) = 1;
20662 }
20663 }
20664
20665 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
20666 DESIRED_ALIGNMENT. */
20667 static void
20668 expand_movmem_prologue (rtx destmem, rtx srcmem,
20669 rtx destptr, rtx srcptr, rtx count,
20670 int align, int desired_alignment)
20671 {
20672 if (align <= 1 && desired_alignment > 1)
20673 {
20674 rtx label = ix86_expand_aligntest (destptr, 1, false);
20675 srcmem = change_address (srcmem, QImode, srcptr);
20676 destmem = change_address (destmem, QImode, destptr);
20677 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
20678 ix86_adjust_counter (count, 1);
20679 emit_label (label);
20680 LABEL_NUSES (label) = 1;
20681 }
20682 if (align <= 2 && desired_alignment > 2)
20683 {
20684 rtx label = ix86_expand_aligntest (destptr, 2, false);
20685 srcmem = change_address (srcmem, HImode, srcptr);
20686 destmem = change_address (destmem, HImode, destptr);
20687 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
20688 ix86_adjust_counter (count, 2);
20689 emit_label (label);
20690 LABEL_NUSES (label) = 1;
20691 }
20692 if (align <= 4 && desired_alignment > 4)
20693 {
20694 rtx label = ix86_expand_aligntest (destptr, 4, false);
20695 srcmem = change_address (srcmem, SImode, srcptr);
20696 destmem = change_address (destmem, SImode, destptr);
20697 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
20698 ix86_adjust_counter (count, 4);
20699 emit_label (label);
20700 LABEL_NUSES (label) = 1;
20701 }
20702 gcc_assert (desired_alignment <= 8);
20703 }
20704
20705 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
20706 ALIGN_BYTES is how many bytes need to be copied. */
20707 static rtx
20708 expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
20709 int desired_align, int align_bytes)
20710 {
20711 rtx src = *srcp;
20712 rtx orig_dst = dst;
20713 rtx orig_src = src;
20714 int off = 0;
20715 int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
20716 if (src_align_bytes >= 0)
20717 src_align_bytes = desired_align - src_align_bytes;
20718 if (align_bytes & 1)
20719 {
20720 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
20721 src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
20722 off = 1;
20723 emit_insn (gen_strmov (destreg, dst, srcreg, src));
20724 }
20725 if (align_bytes & 2)
20726 {
20727 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
20728 src = adjust_automodify_address_nv (src, HImode, srcreg, off);
20729 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
20730 set_mem_align (dst, 2 * BITS_PER_UNIT);
20731 if (src_align_bytes >= 0
20732 && (src_align_bytes & 1) == (align_bytes & 1)
20733 && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
20734 set_mem_align (src, 2 * BITS_PER_UNIT);
20735 off = 2;
20736 emit_insn (gen_strmov (destreg, dst, srcreg, src));
20737 }
20738 if (align_bytes & 4)
20739 {
20740 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
20741 src = adjust_automodify_address_nv (src, SImode, srcreg, off);
20742 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
20743 set_mem_align (dst, 4 * BITS_PER_UNIT);
20744 if (src_align_bytes >= 0)
20745 {
20746 unsigned int src_align = 0;
20747 if ((src_align_bytes & 3) == (align_bytes & 3))
20748 src_align = 4;
20749 else if ((src_align_bytes & 1) == (align_bytes & 1))
20750 src_align = 2;
20751 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
20752 set_mem_align (src, src_align * BITS_PER_UNIT);
20753 }
20754 off = 4;
20755 emit_insn (gen_strmov (destreg, dst, srcreg, src));
20756 }
20757 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
20758 src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
20759 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
20760 set_mem_align (dst, desired_align * BITS_PER_UNIT);
20761 if (src_align_bytes >= 0)
20762 {
20763 unsigned int src_align = 0;
20764 if ((src_align_bytes & 7) == (align_bytes & 7))
20765 src_align = 8;
20766 else if ((src_align_bytes & 3) == (align_bytes & 3))
20767 src_align = 4;
20768 else if ((src_align_bytes & 1) == (align_bytes & 1))
20769 src_align = 2;
20770 if (src_align > (unsigned int) desired_align)
20771 src_align = desired_align;
20772 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
20773 set_mem_align (src, src_align * BITS_PER_UNIT);
20774 }
20775 if (MEM_SIZE_KNOWN_P (orig_dst))
20776 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
20777 if (MEM_SIZE_KNOWN_P (orig_src))
20778 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
20779 *srcp = src;
20780 return dst;
20781 }
20782
20783 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
20784 DESIRED_ALIGNMENT. */
20785 static void
20786 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
20787 int align, int desired_alignment)
20788 {
20789 if (align <= 1 && desired_alignment > 1)
20790 {
20791 rtx label = ix86_expand_aligntest (destptr, 1, false);
20792 destmem = change_address (destmem, QImode, destptr);
20793 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
20794 ix86_adjust_counter (count, 1);
20795 emit_label (label);
20796 LABEL_NUSES (label) = 1;
20797 }
20798 if (align <= 2 && desired_alignment > 2)
20799 {
20800 rtx label = ix86_expand_aligntest (destptr, 2, false);
20801 destmem = change_address (destmem, HImode, destptr);
20802 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
20803 ix86_adjust_counter (count, 2);
20804 emit_label (label);
20805 LABEL_NUSES (label) = 1;
20806 }
20807 if (align <= 4 && desired_alignment > 4)
20808 {
20809 rtx label = ix86_expand_aligntest (destptr, 4, false);
20810 destmem = change_address (destmem, SImode, destptr);
20811 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
20812 ix86_adjust_counter (count, 4);
20813 emit_label (label);
20814 LABEL_NUSES (label) = 1;
20815 }
20816 gcc_assert (desired_alignment <= 8);
20817 }
20818
20819 /* Set enough from DST to align DST known to by aligned by ALIGN to
20820 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
20821 static rtx
20822 expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
20823 int desired_align, int align_bytes)
20824 {
20825 int off = 0;
20826 rtx orig_dst = dst;
20827 if (align_bytes & 1)
20828 {
20829 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
20830 off = 1;
20831 emit_insn (gen_strset (destreg, dst,
20832 gen_lowpart (QImode, value)));
20833 }
20834 if (align_bytes & 2)
20835 {
20836 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
20837 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
20838 set_mem_align (dst, 2 * BITS_PER_UNIT);
20839 off = 2;
20840 emit_insn (gen_strset (destreg, dst,
20841 gen_lowpart (HImode, value)));
20842 }
20843 if (align_bytes & 4)
20844 {
20845 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
20846 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
20847 set_mem_align (dst, 4 * BITS_PER_UNIT);
20848 off = 4;
20849 emit_insn (gen_strset (destreg, dst,
20850 gen_lowpart (SImode, value)));
20851 }
20852 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
20853 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
20854 set_mem_align (dst, desired_align * BITS_PER_UNIT);
20855 if (MEM_SIZE_KNOWN_P (orig_dst))
20856 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
20857 return dst;
20858 }
20859
20860 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
20861 static enum stringop_alg
20862 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
20863 int *dynamic_check)
20864 {
20865 const struct stringop_algs * algs;
20866 bool optimize_for_speed;
20867 /* Algorithms using the rep prefix want at least edi and ecx;
20868 additionally, memset wants eax and memcpy wants esi. Don't
20869 consider such algorithms if the user has appropriated those
20870 registers for their own purposes. */
20871 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
20872 || (memset
20873 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
20874
20875 #define ALG_USABLE_P(alg) (rep_prefix_usable \
20876 || (alg != rep_prefix_1_byte \
20877 && alg != rep_prefix_4_byte \
20878 && alg != rep_prefix_8_byte))
20879 const struct processor_costs *cost;
20880
20881 /* Even if the string operation call is cold, we still might spend a lot
20882 of time processing large blocks. */
20883 if (optimize_function_for_size_p (cfun)
20884 || (optimize_insn_for_size_p ()
20885 && expected_size != -1 && expected_size < 256))
20886 optimize_for_speed = false;
20887 else
20888 optimize_for_speed = true;
20889
20890 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
20891
20892 *dynamic_check = -1;
20893 if (memset)
20894 algs = &cost->memset[TARGET_64BIT != 0];
20895 else
20896 algs = &cost->memcpy[TARGET_64BIT != 0];
20897 if (ix86_stringop_alg != no_stringop && ALG_USABLE_P (ix86_stringop_alg))
20898 return ix86_stringop_alg;
20899 /* rep; movq or rep; movl is the smallest variant. */
20900 else if (!optimize_for_speed)
20901 {
20902 if (!count || (count & 3))
20903 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
20904 else
20905 return rep_prefix_usable ? rep_prefix_4_byte : loop;
20906 }
20907 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
20908 */
20909 else if (expected_size != -1 && expected_size < 4)
20910 return loop_1_byte;
20911 else if (expected_size != -1)
20912 {
20913 unsigned int i;
20914 enum stringop_alg alg = libcall;
20915 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
20916 {
20917 /* We get here if the algorithms that were not libcall-based
20918 were rep-prefix based and we are unable to use rep prefixes
20919 based on global register usage. Break out of the loop and
20920 use the heuristic below. */
20921 if (algs->size[i].max == 0)
20922 break;
20923 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
20924 {
20925 enum stringop_alg candidate = algs->size[i].alg;
20926
20927 if (candidate != libcall && ALG_USABLE_P (candidate))
20928 alg = candidate;
20929 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
20930 last non-libcall inline algorithm. */
20931 if (TARGET_INLINE_ALL_STRINGOPS)
20932 {
20933 /* When the current size is best to be copied by a libcall,
20934 but we are still forced to inline, run the heuristic below
20935 that will pick code for medium sized blocks. */
20936 if (alg != libcall)
20937 return alg;
20938 break;
20939 }
20940 else if (ALG_USABLE_P (candidate))
20941 return candidate;
20942 }
20943 }
20944 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
20945 }
20946 /* When asked to inline the call anyway, try to pick meaningful choice.
20947 We look for maximal size of block that is faster to copy by hand and
20948 take blocks of at most of that size guessing that average size will
20949 be roughly half of the block.
20950
20951 If this turns out to be bad, we might simply specify the preferred
20952 choice in ix86_costs. */
20953 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
20954 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
20955 {
20956 int max = -1;
20957 enum stringop_alg alg;
20958 int i;
20959 bool any_alg_usable_p = true;
20960
20961 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
20962 {
20963 enum stringop_alg candidate = algs->size[i].alg;
20964 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
20965
20966 if (candidate != libcall && candidate
20967 && ALG_USABLE_P (candidate))
20968 max = algs->size[i].max;
20969 }
20970 /* If there aren't any usable algorithms, then recursing on
20971 smaller sizes isn't going to find anything. Just return the
20972 simple byte-at-a-time copy loop. */
20973 if (!any_alg_usable_p)
20974 {
20975 /* Pick something reasonable. */
20976 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
20977 *dynamic_check = 128;
20978 return loop_1_byte;
20979 }
20980 if (max == -1)
20981 max = 4096;
20982 alg = decide_alg (count, max / 2, memset, dynamic_check);
20983 gcc_assert (*dynamic_check == -1);
20984 gcc_assert (alg != libcall);
20985 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
20986 *dynamic_check = max;
20987 return alg;
20988 }
20989 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
20990 #undef ALG_USABLE_P
20991 }
20992
20993 /* Decide on alignment. We know that the operand is already aligned to ALIGN
20994 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
20995 static int
20996 decide_alignment (int align,
20997 enum stringop_alg alg,
20998 int expected_size)
20999 {
21000 int desired_align = 0;
21001 switch (alg)
21002 {
21003 case no_stringop:
21004 gcc_unreachable ();
21005 case loop:
21006 case unrolled_loop:
21007 desired_align = GET_MODE_SIZE (Pmode);
21008 break;
21009 case rep_prefix_8_byte:
21010 desired_align = 8;
21011 break;
21012 case rep_prefix_4_byte:
21013 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
21014 copying whole cacheline at once. */
21015 if (TARGET_PENTIUMPRO)
21016 desired_align = 8;
21017 else
21018 desired_align = 4;
21019 break;
21020 case rep_prefix_1_byte:
21021 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
21022 copying whole cacheline at once. */
21023 if (TARGET_PENTIUMPRO)
21024 desired_align = 8;
21025 else
21026 desired_align = 1;
21027 break;
21028 case loop_1_byte:
21029 desired_align = 1;
21030 break;
21031 case libcall:
21032 return 0;
21033 }
21034
21035 if (optimize_size)
21036 desired_align = 1;
21037 if (desired_align < align)
21038 desired_align = align;
21039 if (expected_size != -1 && expected_size < 4)
21040 desired_align = align;
21041 return desired_align;
21042 }
21043
21044 /* Return the smallest power of 2 greater than VAL. */
21045 static int
21046 smallest_pow2_greater_than (int val)
21047 {
21048 int ret = 1;
21049 while (ret <= val)
21050 ret <<= 1;
21051 return ret;
21052 }
21053
21054 /* Expand string move (memcpy) operation. Use i386 string operations
21055 when profitable. expand_setmem contains similar code. The code
21056 depends upon architecture, block size and alignment, but always has
21057 the same overall structure:
21058
21059 1) Prologue guard: Conditional that jumps up to epilogues for small
21060 blocks that can be handled by epilogue alone. This is faster
21061 but also needed for correctness, since prologue assume the block
21062 is larger than the desired alignment.
21063
21064 Optional dynamic check for size and libcall for large
21065 blocks is emitted here too, with -minline-stringops-dynamically.
21066
21067 2) Prologue: copy first few bytes in order to get destination
21068 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
21069 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
21070 copied. We emit either a jump tree on power of two sized
21071 blocks, or a byte loop.
21072
21073 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
21074 with specified algorithm.
21075
21076 4) Epilogue: code copying tail of the block that is too small to be
21077 handled by main body (or up to size guarded by prologue guard). */
21078
21079 bool
21080 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
21081 rtx expected_align_exp, rtx expected_size_exp)
21082 {
21083 rtx destreg;
21084 rtx srcreg;
21085 rtx label = NULL;
21086 rtx tmp;
21087 rtx jump_around_label = NULL;
21088 HOST_WIDE_INT align = 1;
21089 unsigned HOST_WIDE_INT count = 0;
21090 HOST_WIDE_INT expected_size = -1;
21091 int size_needed = 0, epilogue_size_needed;
21092 int desired_align = 0, align_bytes = 0;
21093 enum stringop_alg alg;
21094 int dynamic_check;
21095 bool need_zero_guard = false;
21096
21097 if (CONST_INT_P (align_exp))
21098 align = INTVAL (align_exp);
21099 /* i386 can do misaligned access on reasonably increased cost. */
21100 if (CONST_INT_P (expected_align_exp)
21101 && INTVAL (expected_align_exp) > align)
21102 align = INTVAL (expected_align_exp);
21103 /* ALIGN is the minimum of destination and source alignment, but we care here
21104 just about destination alignment. */
21105 else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
21106 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
21107
21108 if (CONST_INT_P (count_exp))
21109 count = expected_size = INTVAL (count_exp);
21110 if (CONST_INT_P (expected_size_exp) && count == 0)
21111 expected_size = INTVAL (expected_size_exp);
21112
21113 /* Make sure we don't need to care about overflow later on. */
21114 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
21115 return false;
21116
21117 /* Step 0: Decide on preferred algorithm, desired alignment and
21118 size of chunks to be copied by main loop. */
21119
21120 alg = decide_alg (count, expected_size, false, &dynamic_check);
21121 desired_align = decide_alignment (align, alg, expected_size);
21122
21123 if (!TARGET_ALIGN_STRINGOPS)
21124 align = desired_align;
21125
21126 if (alg == libcall)
21127 return false;
21128 gcc_assert (alg != no_stringop);
21129 if (!count)
21130 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
21131 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
21132 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
21133 switch (alg)
21134 {
21135 case libcall:
21136 case no_stringop:
21137 gcc_unreachable ();
21138 case loop:
21139 need_zero_guard = true;
21140 size_needed = GET_MODE_SIZE (Pmode);
21141 break;
21142 case unrolled_loop:
21143 need_zero_guard = true;
21144 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
21145 break;
21146 case rep_prefix_8_byte:
21147 size_needed = 8;
21148 break;
21149 case rep_prefix_4_byte:
21150 size_needed = 4;
21151 break;
21152 case rep_prefix_1_byte:
21153 size_needed = 1;
21154 break;
21155 case loop_1_byte:
21156 need_zero_guard = true;
21157 size_needed = 1;
21158 break;
21159 }
21160
21161 epilogue_size_needed = size_needed;
21162
21163 /* Step 1: Prologue guard. */
21164
21165 /* Alignment code needs count to be in register. */
21166 if (CONST_INT_P (count_exp) && desired_align > align)
21167 {
21168 if (INTVAL (count_exp) > desired_align
21169 && INTVAL (count_exp) > size_needed)
21170 {
21171 align_bytes
21172 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
21173 if (align_bytes <= 0)
21174 align_bytes = 0;
21175 else
21176 align_bytes = desired_align - align_bytes;
21177 }
21178 if (align_bytes == 0)
21179 count_exp = force_reg (counter_mode (count_exp), count_exp);
21180 }
21181 gcc_assert (desired_align >= 1 && align >= 1);
21182
21183 /* Ensure that alignment prologue won't copy past end of block. */
21184 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
21185 {
21186 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
21187 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
21188 Make sure it is power of 2. */
21189 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
21190
21191 if (count)
21192 {
21193 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
21194 {
21195 /* If main algorithm works on QImode, no epilogue is needed.
21196 For small sizes just don't align anything. */
21197 if (size_needed == 1)
21198 desired_align = align;
21199 else
21200 goto epilogue;
21201 }
21202 }
21203 else
21204 {
21205 label = gen_label_rtx ();
21206 emit_cmp_and_jump_insns (count_exp,
21207 GEN_INT (epilogue_size_needed),
21208 LTU, 0, counter_mode (count_exp), 1, label);
21209 if (expected_size == -1 || expected_size < epilogue_size_needed)
21210 predict_jump (REG_BR_PROB_BASE * 60 / 100);
21211 else
21212 predict_jump (REG_BR_PROB_BASE * 20 / 100);
21213 }
21214 }
21215
21216 /* Emit code to decide on runtime whether library call or inline should be
21217 used. */
21218 if (dynamic_check != -1)
21219 {
21220 if (CONST_INT_P (count_exp))
21221 {
21222 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
21223 {
21224 emit_block_move_via_libcall (dst, src, count_exp, false);
21225 count_exp = const0_rtx;
21226 goto epilogue;
21227 }
21228 }
21229 else
21230 {
21231 rtx hot_label = gen_label_rtx ();
21232 jump_around_label = gen_label_rtx ();
21233 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
21234 LEU, 0, GET_MODE (count_exp), 1, hot_label);
21235 predict_jump (REG_BR_PROB_BASE * 90 / 100);
21236 emit_block_move_via_libcall (dst, src, count_exp, false);
21237 emit_jump (jump_around_label);
21238 emit_label (hot_label);
21239 }
21240 }
21241
21242 /* Step 2: Alignment prologue. */
21243
21244 if (desired_align > align)
21245 {
21246 if (align_bytes == 0)
21247 {
21248 /* Except for the first move in epilogue, we no longer know
21249 constant offset in aliasing info. It don't seems to worth
21250 the pain to maintain it for the first move, so throw away
21251 the info early. */
21252 src = change_address (src, BLKmode, srcreg);
21253 dst = change_address (dst, BLKmode, destreg);
21254 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
21255 desired_align);
21256 }
21257 else
21258 {
21259 /* If we know how many bytes need to be stored before dst is
21260 sufficiently aligned, maintain aliasing info accurately. */
21261 dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
21262 desired_align, align_bytes);
21263 count_exp = plus_constant (count_exp, -align_bytes);
21264 count -= align_bytes;
21265 }
21266 if (need_zero_guard
21267 && (count < (unsigned HOST_WIDE_INT) size_needed
21268 || (align_bytes == 0
21269 && count < ((unsigned HOST_WIDE_INT) size_needed
21270 + desired_align - align))))
21271 {
21272 /* It is possible that we copied enough so the main loop will not
21273 execute. */
21274 gcc_assert (size_needed > 1);
21275 if (label == NULL_RTX)
21276 label = gen_label_rtx ();
21277 emit_cmp_and_jump_insns (count_exp,
21278 GEN_INT (size_needed),
21279 LTU, 0, counter_mode (count_exp), 1, label);
21280 if (expected_size == -1
21281 || expected_size < (desired_align - align) / 2 + size_needed)
21282 predict_jump (REG_BR_PROB_BASE * 20 / 100);
21283 else
21284 predict_jump (REG_BR_PROB_BASE * 60 / 100);
21285 }
21286 }
21287 if (label && size_needed == 1)
21288 {
21289 emit_label (label);
21290 LABEL_NUSES (label) = 1;
21291 label = NULL;
21292 epilogue_size_needed = 1;
21293 }
21294 else if (label == NULL_RTX)
21295 epilogue_size_needed = size_needed;
21296
21297 /* Step 3: Main loop. */
21298
21299 switch (alg)
21300 {
21301 case libcall:
21302 case no_stringop:
21303 gcc_unreachable ();
21304 case loop_1_byte:
21305 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
21306 count_exp, QImode, 1, expected_size);
21307 break;
21308 case loop:
21309 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
21310 count_exp, Pmode, 1, expected_size);
21311 break;
21312 case unrolled_loop:
21313 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
21314 registers for 4 temporaries anyway. */
21315 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
21316 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
21317 expected_size);
21318 break;
21319 case rep_prefix_8_byte:
21320 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
21321 DImode);
21322 break;
21323 case rep_prefix_4_byte:
21324 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
21325 SImode);
21326 break;
21327 case rep_prefix_1_byte:
21328 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
21329 QImode);
21330 break;
21331 }
21332 /* Adjust properly the offset of src and dest memory for aliasing. */
21333 if (CONST_INT_P (count_exp))
21334 {
21335 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
21336 (count / size_needed) * size_needed);
21337 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
21338 (count / size_needed) * size_needed);
21339 }
21340 else
21341 {
21342 src = change_address (src, BLKmode, srcreg);
21343 dst = change_address (dst, BLKmode, destreg);
21344 }
21345
21346 /* Step 4: Epilogue to copy the remaining bytes. */
21347 epilogue:
21348 if (label)
21349 {
21350 /* When the main loop is done, COUNT_EXP might hold original count,
21351 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
21352 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
21353 bytes. Compensate if needed. */
21354
21355 if (size_needed < epilogue_size_needed)
21356 {
21357 tmp =
21358 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
21359 GEN_INT (size_needed - 1), count_exp, 1,
21360 OPTAB_DIRECT);
21361 if (tmp != count_exp)
21362 emit_move_insn (count_exp, tmp);
21363 }
21364 emit_label (label);
21365 LABEL_NUSES (label) = 1;
21366 }
21367
21368 if (count_exp != const0_rtx && epilogue_size_needed > 1)
21369 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
21370 epilogue_size_needed);
21371 if (jump_around_label)
21372 emit_label (jump_around_label);
21373 return true;
21374 }
21375
21376 /* Helper function for memcpy. For QImode value 0xXY produce
21377 0xXYXYXYXY of wide specified by MODE. This is essentially
21378 a * 0x10101010, but we can do slightly better than
21379 synth_mult by unwinding the sequence by hand on CPUs with
21380 slow multiply. */
21381 static rtx
21382 promote_duplicated_reg (enum machine_mode mode, rtx val)
21383 {
21384 enum machine_mode valmode = GET_MODE (val);
21385 rtx tmp;
21386 int nops = mode == DImode ? 3 : 2;
21387
21388 gcc_assert (mode == SImode || mode == DImode);
21389 if (val == const0_rtx)
21390 return copy_to_mode_reg (mode, const0_rtx);
21391 if (CONST_INT_P (val))
21392 {
21393 HOST_WIDE_INT v = INTVAL (val) & 255;
21394
21395 v |= v << 8;
21396 v |= v << 16;
21397 if (mode == DImode)
21398 v |= (v << 16) << 16;
21399 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
21400 }
21401
21402 if (valmode == VOIDmode)
21403 valmode = QImode;
21404 if (valmode != QImode)
21405 val = gen_lowpart (QImode, val);
21406 if (mode == QImode)
21407 return val;
21408 if (!TARGET_PARTIAL_REG_STALL)
21409 nops--;
21410 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
21411 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
21412 <= (ix86_cost->shift_const + ix86_cost->add) * nops
21413 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
21414 {
21415 rtx reg = convert_modes (mode, QImode, val, true);
21416 tmp = promote_duplicated_reg (mode, const1_rtx);
21417 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
21418 OPTAB_DIRECT);
21419 }
21420 else
21421 {
21422 rtx reg = convert_modes (mode, QImode, val, true);
21423
21424 if (!TARGET_PARTIAL_REG_STALL)
21425 if (mode == SImode)
21426 emit_insn (gen_movsi_insv_1 (reg, reg));
21427 else
21428 emit_insn (gen_movdi_insv_1 (reg, reg));
21429 else
21430 {
21431 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
21432 NULL, 1, OPTAB_DIRECT);
21433 reg =
21434 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
21435 }
21436 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
21437 NULL, 1, OPTAB_DIRECT);
21438 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
21439 if (mode == SImode)
21440 return reg;
21441 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
21442 NULL, 1, OPTAB_DIRECT);
21443 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
21444 return reg;
21445 }
21446 }
21447
21448 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
21449 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
21450 alignment from ALIGN to DESIRED_ALIGN. */
21451 static rtx
21452 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
21453 {
21454 rtx promoted_val;
21455
21456 if (TARGET_64BIT
21457 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
21458 promoted_val = promote_duplicated_reg (DImode, val);
21459 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
21460 promoted_val = promote_duplicated_reg (SImode, val);
21461 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
21462 promoted_val = promote_duplicated_reg (HImode, val);
21463 else
21464 promoted_val = val;
21465
21466 return promoted_val;
21467 }
21468
21469 /* Expand string clear operation (bzero). Use i386 string operations when
21470 profitable. See expand_movmem comment for explanation of individual
21471 steps performed. */
21472 bool
21473 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
21474 rtx expected_align_exp, rtx expected_size_exp)
21475 {
21476 rtx destreg;
21477 rtx label = NULL;
21478 rtx tmp;
21479 rtx jump_around_label = NULL;
21480 HOST_WIDE_INT align = 1;
21481 unsigned HOST_WIDE_INT count = 0;
21482 HOST_WIDE_INT expected_size = -1;
21483 int size_needed = 0, epilogue_size_needed;
21484 int desired_align = 0, align_bytes = 0;
21485 enum stringop_alg alg;
21486 rtx promoted_val = NULL;
21487 bool force_loopy_epilogue = false;
21488 int dynamic_check;
21489 bool need_zero_guard = false;
21490
21491 if (CONST_INT_P (align_exp))
21492 align = INTVAL (align_exp);
21493 /* i386 can do misaligned access on reasonably increased cost. */
21494 if (CONST_INT_P (expected_align_exp)
21495 && INTVAL (expected_align_exp) > align)
21496 align = INTVAL (expected_align_exp);
21497 if (CONST_INT_P (count_exp))
21498 count = expected_size = INTVAL (count_exp);
21499 if (CONST_INT_P (expected_size_exp) && count == 0)
21500 expected_size = INTVAL (expected_size_exp);
21501
21502 /* Make sure we don't need to care about overflow later on. */
21503 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
21504 return false;
21505
21506 /* Step 0: Decide on preferred algorithm, desired alignment and
21507 size of chunks to be copied by main loop. */
21508
21509 alg = decide_alg (count, expected_size, true, &dynamic_check);
21510 desired_align = decide_alignment (align, alg, expected_size);
21511
21512 if (!TARGET_ALIGN_STRINGOPS)
21513 align = desired_align;
21514
21515 if (alg == libcall)
21516 return false;
21517 gcc_assert (alg != no_stringop);
21518 if (!count)
21519 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
21520 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
21521 switch (alg)
21522 {
21523 case libcall:
21524 case no_stringop:
21525 gcc_unreachable ();
21526 case loop:
21527 need_zero_guard = true;
21528 size_needed = GET_MODE_SIZE (Pmode);
21529 break;
21530 case unrolled_loop:
21531 need_zero_guard = true;
21532 size_needed = GET_MODE_SIZE (Pmode) * 4;
21533 break;
21534 case rep_prefix_8_byte:
21535 size_needed = 8;
21536 break;
21537 case rep_prefix_4_byte:
21538 size_needed = 4;
21539 break;
21540 case rep_prefix_1_byte:
21541 size_needed = 1;
21542 break;
21543 case loop_1_byte:
21544 need_zero_guard = true;
21545 size_needed = 1;
21546 break;
21547 }
21548 epilogue_size_needed = size_needed;
21549
21550 /* Step 1: Prologue guard. */
21551
21552 /* Alignment code needs count to be in register. */
21553 if (CONST_INT_P (count_exp) && desired_align > align)
21554 {
21555 if (INTVAL (count_exp) > desired_align
21556 && INTVAL (count_exp) > size_needed)
21557 {
21558 align_bytes
21559 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
21560 if (align_bytes <= 0)
21561 align_bytes = 0;
21562 else
21563 align_bytes = desired_align - align_bytes;
21564 }
21565 if (align_bytes == 0)
21566 {
21567 enum machine_mode mode = SImode;
21568 if (TARGET_64BIT && (count & ~0xffffffff))
21569 mode = DImode;
21570 count_exp = force_reg (mode, count_exp);
21571 }
21572 }
21573 /* Do the cheap promotion to allow better CSE across the
21574 main loop and epilogue (ie one load of the big constant in the
21575 front of all code. */
21576 if (CONST_INT_P (val_exp))
21577 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
21578 desired_align, align);
21579 /* Ensure that alignment prologue won't copy past end of block. */
21580 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
21581 {
21582 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
21583 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
21584 Make sure it is power of 2. */
21585 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
21586
21587 /* To improve performance of small blocks, we jump around the VAL
21588 promoting mode. This mean that if the promoted VAL is not constant,
21589 we might not use it in the epilogue and have to use byte
21590 loop variant. */
21591 if (epilogue_size_needed > 2 && !promoted_val)
21592 force_loopy_epilogue = true;
21593 if (count)
21594 {
21595 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
21596 {
21597 /* If main algorithm works on QImode, no epilogue is needed.
21598 For small sizes just don't align anything. */
21599 if (size_needed == 1)
21600 desired_align = align;
21601 else
21602 goto epilogue;
21603 }
21604 }
21605 else
21606 {
21607 label = gen_label_rtx ();
21608 emit_cmp_and_jump_insns (count_exp,
21609 GEN_INT (epilogue_size_needed),
21610 LTU, 0, counter_mode (count_exp), 1, label);
21611 if (expected_size == -1 || expected_size <= epilogue_size_needed)
21612 predict_jump (REG_BR_PROB_BASE * 60 / 100);
21613 else
21614 predict_jump (REG_BR_PROB_BASE * 20 / 100);
21615 }
21616 }
21617 if (dynamic_check != -1)
21618 {
21619 rtx hot_label = gen_label_rtx ();
21620 jump_around_label = gen_label_rtx ();
21621 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
21622 LEU, 0, counter_mode (count_exp), 1, hot_label);
21623 predict_jump (REG_BR_PROB_BASE * 90 / 100);
21624 set_storage_via_libcall (dst, count_exp, val_exp, false);
21625 emit_jump (jump_around_label);
21626 emit_label (hot_label);
21627 }
21628
21629 /* Step 2: Alignment prologue. */
21630
21631 /* Do the expensive promotion once we branched off the small blocks. */
21632 if (!promoted_val)
21633 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
21634 desired_align, align);
21635 gcc_assert (desired_align >= 1 && align >= 1);
21636
21637 if (desired_align > align)
21638 {
21639 if (align_bytes == 0)
21640 {
21641 /* Except for the first move in epilogue, we no longer know
21642 constant offset in aliasing info. It don't seems to worth
21643 the pain to maintain it for the first move, so throw away
21644 the info early. */
21645 dst = change_address (dst, BLKmode, destreg);
21646 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
21647 desired_align);
21648 }
21649 else
21650 {
21651 /* If we know how many bytes need to be stored before dst is
21652 sufficiently aligned, maintain aliasing info accurately. */
21653 dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
21654 desired_align, align_bytes);
21655 count_exp = plus_constant (count_exp, -align_bytes);
21656 count -= align_bytes;
21657 }
21658 if (need_zero_guard
21659 && (count < (unsigned HOST_WIDE_INT) size_needed
21660 || (align_bytes == 0
21661 && count < ((unsigned HOST_WIDE_INT) size_needed
21662 + desired_align - align))))
21663 {
21664 /* It is possible that we copied enough so the main loop will not
21665 execute. */
21666 gcc_assert (size_needed > 1);
21667 if (label == NULL_RTX)
21668 label = gen_label_rtx ();
21669 emit_cmp_and_jump_insns (count_exp,
21670 GEN_INT (size_needed),
21671 LTU, 0, counter_mode (count_exp), 1, label);
21672 if (expected_size == -1
21673 || expected_size < (desired_align - align) / 2 + size_needed)
21674 predict_jump (REG_BR_PROB_BASE * 20 / 100);
21675 else
21676 predict_jump (REG_BR_PROB_BASE * 60 / 100);
21677 }
21678 }
21679 if (label && size_needed == 1)
21680 {
21681 emit_label (label);
21682 LABEL_NUSES (label) = 1;
21683 label = NULL;
21684 promoted_val = val_exp;
21685 epilogue_size_needed = 1;
21686 }
21687 else if (label == NULL_RTX)
21688 epilogue_size_needed = size_needed;
21689
21690 /* Step 3: Main loop. */
21691
21692 switch (alg)
21693 {
21694 case libcall:
21695 case no_stringop:
21696 gcc_unreachable ();
21697 case loop_1_byte:
21698 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
21699 count_exp, QImode, 1, expected_size);
21700 break;
21701 case loop:
21702 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
21703 count_exp, Pmode, 1, expected_size);
21704 break;
21705 case unrolled_loop:
21706 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
21707 count_exp, Pmode, 4, expected_size);
21708 break;
21709 case rep_prefix_8_byte:
21710 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
21711 DImode, val_exp);
21712 break;
21713 case rep_prefix_4_byte:
21714 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
21715 SImode, val_exp);
21716 break;
21717 case rep_prefix_1_byte:
21718 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
21719 QImode, val_exp);
21720 break;
21721 }
21722 /* Adjust properly the offset of src and dest memory for aliasing. */
21723 if (CONST_INT_P (count_exp))
21724 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
21725 (count / size_needed) * size_needed);
21726 else
21727 dst = change_address (dst, BLKmode, destreg);
21728
21729 /* Step 4: Epilogue to copy the remaining bytes. */
21730
21731 if (label)
21732 {
21733 /* When the main loop is done, COUNT_EXP might hold original count,
21734 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
21735 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
21736 bytes. Compensate if needed. */
21737
21738 if (size_needed < epilogue_size_needed)
21739 {
21740 tmp =
21741 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
21742 GEN_INT (size_needed - 1), count_exp, 1,
21743 OPTAB_DIRECT);
21744 if (tmp != count_exp)
21745 emit_move_insn (count_exp, tmp);
21746 }
21747 emit_label (label);
21748 LABEL_NUSES (label) = 1;
21749 }
21750 epilogue:
21751 if (count_exp != const0_rtx && epilogue_size_needed > 1)
21752 {
21753 if (force_loopy_epilogue)
21754 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
21755 epilogue_size_needed);
21756 else
21757 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
21758 epilogue_size_needed);
21759 }
21760 if (jump_around_label)
21761 emit_label (jump_around_label);
21762 return true;
21763 }
21764
21765 /* Expand the appropriate insns for doing strlen if not just doing
21766 repnz; scasb
21767
21768 out = result, initialized with the start address
21769 align_rtx = alignment of the address.
21770 scratch = scratch register, initialized with the startaddress when
21771 not aligned, otherwise undefined
21772
21773 This is just the body. It needs the initializations mentioned above and
21774 some address computing at the end. These things are done in i386.md. */
21775
21776 static void
21777 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
21778 {
21779 int align;
21780 rtx tmp;
21781 rtx align_2_label = NULL_RTX;
21782 rtx align_3_label = NULL_RTX;
21783 rtx align_4_label = gen_label_rtx ();
21784 rtx end_0_label = gen_label_rtx ();
21785 rtx mem;
21786 rtx tmpreg = gen_reg_rtx (SImode);
21787 rtx scratch = gen_reg_rtx (SImode);
21788 rtx cmp;
21789
21790 align = 0;
21791 if (CONST_INT_P (align_rtx))
21792 align = INTVAL (align_rtx);
21793
21794 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
21795
21796 /* Is there a known alignment and is it less than 4? */
21797 if (align < 4)
21798 {
21799 rtx scratch1 = gen_reg_rtx (Pmode);
21800 emit_move_insn (scratch1, out);
21801 /* Is there a known alignment and is it not 2? */
21802 if (align != 2)
21803 {
21804 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
21805 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
21806
21807 /* Leave just the 3 lower bits. */
21808 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
21809 NULL_RTX, 0, OPTAB_WIDEN);
21810
21811 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
21812 Pmode, 1, align_4_label);
21813 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
21814 Pmode, 1, align_2_label);
21815 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
21816 Pmode, 1, align_3_label);
21817 }
21818 else
21819 {
21820 /* Since the alignment is 2, we have to check 2 or 0 bytes;
21821 check if is aligned to 4 - byte. */
21822
21823 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
21824 NULL_RTX, 0, OPTAB_WIDEN);
21825
21826 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
21827 Pmode, 1, align_4_label);
21828 }
21829
21830 mem = change_address (src, QImode, out);
21831
21832 /* Now compare the bytes. */
21833
21834 /* Compare the first n unaligned byte on a byte per byte basis. */
21835 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
21836 QImode, 1, end_0_label);
21837
21838 /* Increment the address. */
21839 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
21840
21841 /* Not needed with an alignment of 2 */
21842 if (align != 2)
21843 {
21844 emit_label (align_2_label);
21845
21846 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
21847 end_0_label);
21848
21849 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
21850
21851 emit_label (align_3_label);
21852 }
21853
21854 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
21855 end_0_label);
21856
21857 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
21858 }
21859
21860 /* Generate loop to check 4 bytes at a time. It is not a good idea to
21861 align this loop. It gives only huge programs, but does not help to
21862 speed up. */
21863 emit_label (align_4_label);
21864
21865 mem = change_address (src, SImode, out);
21866 emit_move_insn (scratch, mem);
21867 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
21868
21869 /* This formula yields a nonzero result iff one of the bytes is zero.
21870 This saves three branches inside loop and many cycles. */
21871
21872 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
21873 emit_insn (gen_one_cmplsi2 (scratch, scratch));
21874 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
21875 emit_insn (gen_andsi3 (tmpreg, tmpreg,
21876 gen_int_mode (0x80808080, SImode)));
21877 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
21878 align_4_label);
21879
21880 if (TARGET_CMOVE)
21881 {
21882 rtx reg = gen_reg_rtx (SImode);
21883 rtx reg2 = gen_reg_rtx (Pmode);
21884 emit_move_insn (reg, tmpreg);
21885 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
21886
21887 /* If zero is not in the first two bytes, move two bytes forward. */
21888 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
21889 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
21890 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
21891 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
21892 gen_rtx_IF_THEN_ELSE (SImode, tmp,
21893 reg,
21894 tmpreg)));
21895 /* Emit lea manually to avoid clobbering of flags. */
21896 emit_insn (gen_rtx_SET (SImode, reg2,
21897 gen_rtx_PLUS (Pmode, out, const2_rtx)));
21898
21899 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
21900 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
21901 emit_insn (gen_rtx_SET (VOIDmode, out,
21902 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
21903 reg2,
21904 out)));
21905 }
21906 else
21907 {
21908 rtx end_2_label = gen_label_rtx ();
21909 /* Is zero in the first two bytes? */
21910
21911 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
21912 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
21913 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
21914 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
21915 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
21916 pc_rtx);
21917 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
21918 JUMP_LABEL (tmp) = end_2_label;
21919
21920 /* Not in the first two. Move two bytes forward. */
21921 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
21922 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
21923
21924 emit_label (end_2_label);
21925
21926 }
21927
21928 /* Avoid branch in fixing the byte. */
21929 tmpreg = gen_lowpart (QImode, tmpreg);
21930 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
21931 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
21932 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
21933 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
21934
21935 emit_label (end_0_label);
21936 }
21937
21938 /* Expand strlen. */
21939
21940 bool
21941 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
21942 {
21943 rtx addr, scratch1, scratch2, scratch3, scratch4;
21944
21945 /* The generic case of strlen expander is long. Avoid it's
21946 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
21947
21948 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
21949 && !TARGET_INLINE_ALL_STRINGOPS
21950 && !optimize_insn_for_size_p ()
21951 && (!CONST_INT_P (align) || INTVAL (align) < 4))
21952 return false;
21953
21954 addr = force_reg (Pmode, XEXP (src, 0));
21955 scratch1 = gen_reg_rtx (Pmode);
21956
21957 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
21958 && !optimize_insn_for_size_p ())
21959 {
21960 /* Well it seems that some optimizer does not combine a call like
21961 foo(strlen(bar), strlen(bar));
21962 when the move and the subtraction is done here. It does calculate
21963 the length just once when these instructions are done inside of
21964 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
21965 often used and I use one fewer register for the lifetime of
21966 output_strlen_unroll() this is better. */
21967
21968 emit_move_insn (out, addr);
21969
21970 ix86_expand_strlensi_unroll_1 (out, src, align);
21971
21972 /* strlensi_unroll_1 returns the address of the zero at the end of
21973 the string, like memchr(), so compute the length by subtracting
21974 the start address. */
21975 emit_insn (ix86_gen_sub3 (out, out, addr));
21976 }
21977 else
21978 {
21979 rtx unspec;
21980
21981 /* Can't use this if the user has appropriated eax, ecx, or edi. */
21982 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
21983 return false;
21984
21985 scratch2 = gen_reg_rtx (Pmode);
21986 scratch3 = gen_reg_rtx (Pmode);
21987 scratch4 = force_reg (Pmode, constm1_rtx);
21988
21989 emit_move_insn (scratch3, addr);
21990 eoschar = force_reg (QImode, eoschar);
21991
21992 src = replace_equiv_address_nv (src, scratch3);
21993
21994 /* If .md starts supporting :P, this can be done in .md. */
21995 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
21996 scratch4), UNSPEC_SCAS);
21997 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
21998 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
21999 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
22000 }
22001 return true;
22002 }
22003
22004 /* For given symbol (function) construct code to compute address of it's PLT
22005 entry in large x86-64 PIC model. */
22006 rtx
22007 construct_plt_address (rtx symbol)
22008 {
22009 rtx tmp = gen_reg_rtx (Pmode);
22010 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
22011
22012 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
22013 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
22014
22015 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
22016 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
22017 return tmp;
22018 }
22019
22020 rtx
22021 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
22022 rtx callarg2,
22023 rtx pop, bool sibcall)
22024 {
22025 /* We need to represent that SI and DI registers are clobbered
22026 by SYSV calls. */
22027 static int clobbered_registers[] = {
22028 XMM6_REG, XMM7_REG, XMM8_REG,
22029 XMM9_REG, XMM10_REG, XMM11_REG,
22030 XMM12_REG, XMM13_REG, XMM14_REG,
22031 XMM15_REG, SI_REG, DI_REG
22032 };
22033 rtx vec[ARRAY_SIZE (clobbered_registers) + 3];
22034 rtx use = NULL, call;
22035 unsigned int vec_len;
22036
22037 if (pop == const0_rtx)
22038 pop = NULL;
22039 gcc_assert (!TARGET_64BIT || !pop);
22040
22041 if (TARGET_MACHO && !TARGET_64BIT)
22042 {
22043 #if TARGET_MACHO
22044 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
22045 fnaddr = machopic_indirect_call_target (fnaddr);
22046 #endif
22047 }
22048 else
22049 {
22050 /* Static functions and indirect calls don't need the pic register. */
22051 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
22052 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
22053 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
22054 use_reg (&use, pic_offset_table_rtx);
22055 }
22056
22057 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
22058 {
22059 rtx al = gen_rtx_REG (QImode, AX_REG);
22060 emit_move_insn (al, callarg2);
22061 use_reg (&use, al);
22062 }
22063
22064 if (ix86_cmodel == CM_LARGE_PIC
22065 && MEM_P (fnaddr)
22066 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
22067 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
22068 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
22069 else if (sibcall
22070 ? !sibcall_insn_operand (XEXP (fnaddr, 0), Pmode)
22071 : !call_insn_operand (XEXP (fnaddr, 0), Pmode))
22072 {
22073 fnaddr = XEXP (fnaddr, 0);
22074 if (GET_MODE (fnaddr) != Pmode)
22075 fnaddr = convert_to_mode (Pmode, fnaddr, 1);
22076 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (Pmode, fnaddr));
22077 }
22078
22079 vec_len = 0;
22080 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
22081 if (retval)
22082 call = gen_rtx_SET (VOIDmode, retval, call);
22083 vec[vec_len++] = call;
22084
22085 if (pop)
22086 {
22087 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
22088 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
22089 vec[vec_len++] = pop;
22090 }
22091
22092 if (TARGET_64BIT_MS_ABI
22093 && (!callarg2 || INTVAL (callarg2) != -2))
22094 {
22095 unsigned i;
22096
22097 vec[vec_len++] = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
22098 UNSPEC_MS_TO_SYSV_CALL);
22099
22100 for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
22101 vec[vec_len++]
22102 = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
22103 ? TImode : DImode,
22104 gen_rtx_REG (SSE_REGNO_P (clobbered_registers[i])
22105 ? TImode : DImode,
22106 clobbered_registers[i]));
22107 }
22108
22109 /* Add UNSPEC_CALL_NEEDS_VZEROUPPER decoration. */
22110 if (TARGET_VZEROUPPER)
22111 {
22112 int avx256;
22113 if (cfun->machine->callee_pass_avx256_p)
22114 {
22115 if (cfun->machine->callee_return_avx256_p)
22116 avx256 = callee_return_pass_avx256;
22117 else
22118 avx256 = callee_pass_avx256;
22119 }
22120 else if (cfun->machine->callee_return_avx256_p)
22121 avx256 = callee_return_avx256;
22122 else
22123 avx256 = call_no_avx256;
22124
22125 if (reload_completed)
22126 emit_insn (gen_avx_vzeroupper (GEN_INT (avx256)));
22127 else
22128 vec[vec_len++] = gen_rtx_UNSPEC (VOIDmode,
22129 gen_rtvec (1, GEN_INT (avx256)),
22130 UNSPEC_CALL_NEEDS_VZEROUPPER);
22131 }
22132
22133 if (vec_len > 1)
22134 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
22135 call = emit_call_insn (call);
22136 if (use)
22137 CALL_INSN_FUNCTION_USAGE (call) = use;
22138
22139 return call;
22140 }
22141
22142 void
22143 ix86_split_call_vzeroupper (rtx insn, rtx vzeroupper)
22144 {
22145 rtx pat = PATTERN (insn);
22146 rtvec vec = XVEC (pat, 0);
22147 int len = GET_NUM_ELEM (vec) - 1;
22148
22149 /* Strip off the last entry of the parallel. */
22150 gcc_assert (GET_CODE (RTVEC_ELT (vec, len)) == UNSPEC);
22151 gcc_assert (XINT (RTVEC_ELT (vec, len), 1) == UNSPEC_CALL_NEEDS_VZEROUPPER);
22152 if (len == 1)
22153 pat = RTVEC_ELT (vec, 0);
22154 else
22155 pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (len, &RTVEC_ELT (vec, 0)));
22156
22157 emit_insn (gen_avx_vzeroupper (vzeroupper));
22158 emit_call_insn (pat);
22159 }
22160
22161 /* Output the assembly for a call instruction. */
22162
22163 const char *
22164 ix86_output_call_insn (rtx insn, rtx call_op)
22165 {
22166 bool direct_p = constant_call_address_operand (call_op, Pmode);
22167 bool seh_nop_p = false;
22168 const char *xasm;
22169
22170 if (SIBLING_CALL_P (insn))
22171 {
22172 if (direct_p)
22173 xasm = "jmp\t%P0";
22174 /* SEH epilogue detection requires the indirect branch case
22175 to include REX.W. */
22176 else if (TARGET_SEH)
22177 xasm = "rex.W jmp %A0";
22178 else
22179 xasm = "jmp\t%A0";
22180
22181 output_asm_insn (xasm, &call_op);
22182 return "";
22183 }
22184
22185 /* SEH unwinding can require an extra nop to be emitted in several
22186 circumstances. Determine if we have one of those. */
22187 if (TARGET_SEH)
22188 {
22189 rtx i;
22190
22191 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
22192 {
22193 /* If we get to another real insn, we don't need the nop. */
22194 if (INSN_P (i))
22195 break;
22196
22197 /* If we get to the epilogue note, prevent a catch region from
22198 being adjacent to the standard epilogue sequence. If non-
22199 call-exceptions, we'll have done this during epilogue emission. */
22200 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
22201 && !flag_non_call_exceptions
22202 && !can_throw_internal (insn))
22203 {
22204 seh_nop_p = true;
22205 break;
22206 }
22207 }
22208
22209 /* If we didn't find a real insn following the call, prevent the
22210 unwinder from looking into the next function. */
22211 if (i == NULL)
22212 seh_nop_p = true;
22213 }
22214
22215 if (direct_p)
22216 xasm = "call\t%P0";
22217 else
22218 xasm = "call\t%A0";
22219
22220 output_asm_insn (xasm, &call_op);
22221
22222 if (seh_nop_p)
22223 return "nop";
22224
22225 return "";
22226 }
22227 \f
22228 /* Clear stack slot assignments remembered from previous functions.
22229 This is called from INIT_EXPANDERS once before RTL is emitted for each
22230 function. */
22231
22232 static struct machine_function *
22233 ix86_init_machine_status (void)
22234 {
22235 struct machine_function *f;
22236
22237 f = ggc_alloc_cleared_machine_function ();
22238 f->use_fast_prologue_epilogue_nregs = -1;
22239 f->tls_descriptor_call_expanded_p = 0;
22240 f->call_abi = ix86_abi;
22241
22242 return f;
22243 }
22244
22245 /* Return a MEM corresponding to a stack slot with mode MODE.
22246 Allocate a new slot if necessary.
22247
22248 The RTL for a function can have several slots available: N is
22249 which slot to use. */
22250
22251 rtx
22252 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
22253 {
22254 struct stack_local_entry *s;
22255
22256 gcc_assert (n < MAX_386_STACK_LOCALS);
22257
22258 /* Virtual slot is valid only before vregs are instantiated. */
22259 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
22260
22261 for (s = ix86_stack_locals; s; s = s->next)
22262 if (s->mode == mode && s->n == n)
22263 return validize_mem (copy_rtx (s->rtl));
22264
22265 s = ggc_alloc_stack_local_entry ();
22266 s->n = n;
22267 s->mode = mode;
22268 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
22269
22270 s->next = ix86_stack_locals;
22271 ix86_stack_locals = s;
22272 return validize_mem (s->rtl);
22273 }
22274 \f
22275 /* Calculate the length of the memory address in the instruction encoding.
22276 Includes addr32 prefix, does not include the one-byte modrm, opcode,
22277 or other prefixes. */
22278
22279 int
22280 memory_address_length (rtx addr)
22281 {
22282 struct ix86_address parts;
22283 rtx base, index, disp;
22284 int len;
22285 int ok;
22286
22287 if (GET_CODE (addr) == PRE_DEC
22288 || GET_CODE (addr) == POST_INC
22289 || GET_CODE (addr) == PRE_MODIFY
22290 || GET_CODE (addr) == POST_MODIFY)
22291 return 0;
22292
22293 ok = ix86_decompose_address (addr, &parts);
22294 gcc_assert (ok);
22295
22296 if (parts.base && GET_CODE (parts.base) == SUBREG)
22297 parts.base = SUBREG_REG (parts.base);
22298 if (parts.index && GET_CODE (parts.index) == SUBREG)
22299 parts.index = SUBREG_REG (parts.index);
22300
22301 base = parts.base;
22302 index = parts.index;
22303 disp = parts.disp;
22304
22305 /* Add length of addr32 prefix. */
22306 len = (GET_CODE (addr) == ZERO_EXTEND
22307 || GET_CODE (addr) == AND);
22308
22309 /* Rule of thumb:
22310 - esp as the base always wants an index,
22311 - ebp as the base always wants a displacement,
22312 - r12 as the base always wants an index,
22313 - r13 as the base always wants a displacement. */
22314
22315 /* Register Indirect. */
22316 if (base && !index && !disp)
22317 {
22318 /* esp (for its index) and ebp (for its displacement) need
22319 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
22320 code. */
22321 if (REG_P (addr)
22322 && (addr == arg_pointer_rtx
22323 || addr == frame_pointer_rtx
22324 || REGNO (addr) == SP_REG
22325 || REGNO (addr) == BP_REG
22326 || REGNO (addr) == R12_REG
22327 || REGNO (addr) == R13_REG))
22328 len = 1;
22329 }
22330
22331 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
22332 is not disp32, but disp32(%rip), so for disp32
22333 SIB byte is needed, unless print_operand_address
22334 optimizes it into disp32(%rip) or (%rip) is implied
22335 by UNSPEC. */
22336 else if (disp && !base && !index)
22337 {
22338 len = 4;
22339 if (TARGET_64BIT)
22340 {
22341 rtx symbol = disp;
22342
22343 if (GET_CODE (disp) == CONST)
22344 symbol = XEXP (disp, 0);
22345 if (GET_CODE (symbol) == PLUS
22346 && CONST_INT_P (XEXP (symbol, 1)))
22347 symbol = XEXP (symbol, 0);
22348
22349 if (GET_CODE (symbol) != LABEL_REF
22350 && (GET_CODE (symbol) != SYMBOL_REF
22351 || SYMBOL_REF_TLS_MODEL (symbol) != 0)
22352 && (GET_CODE (symbol) != UNSPEC
22353 || (XINT (symbol, 1) != UNSPEC_GOTPCREL
22354 && XINT (symbol, 1) != UNSPEC_PCREL
22355 && XINT (symbol, 1) != UNSPEC_GOTNTPOFF)))
22356 len += 1;
22357 }
22358 }
22359
22360 else
22361 {
22362 /* Find the length of the displacement constant. */
22363 if (disp)
22364 {
22365 if (base && satisfies_constraint_K (disp))
22366 len = 1;
22367 else
22368 len = 4;
22369 }
22370 /* ebp always wants a displacement. Similarly r13. */
22371 else if (base && REG_P (base)
22372 && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
22373 len = 1;
22374
22375 /* An index requires the two-byte modrm form.... */
22376 if (index
22377 /* ...like esp (or r12), which always wants an index. */
22378 || base == arg_pointer_rtx
22379 || base == frame_pointer_rtx
22380 || (base && REG_P (base)
22381 && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
22382 len += 1;
22383 }
22384
22385 switch (parts.seg)
22386 {
22387 case SEG_FS:
22388 case SEG_GS:
22389 len += 1;
22390 break;
22391 default:
22392 break;
22393 }
22394
22395 return len;
22396 }
22397
22398 /* Compute default value for "length_immediate" attribute. When SHORTFORM
22399 is set, expect that insn have 8bit immediate alternative. */
22400 int
22401 ix86_attr_length_immediate_default (rtx insn, bool shortform)
22402 {
22403 int len = 0;
22404 int i;
22405 extract_insn_cached (insn);
22406 for (i = recog_data.n_operands - 1; i >= 0; --i)
22407 if (CONSTANT_P (recog_data.operand[i]))
22408 {
22409 enum attr_mode mode = get_attr_mode (insn);
22410
22411 gcc_assert (!len);
22412 if (shortform && CONST_INT_P (recog_data.operand[i]))
22413 {
22414 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
22415 switch (mode)
22416 {
22417 case MODE_QI:
22418 len = 1;
22419 continue;
22420 case MODE_HI:
22421 ival = trunc_int_for_mode (ival, HImode);
22422 break;
22423 case MODE_SI:
22424 ival = trunc_int_for_mode (ival, SImode);
22425 break;
22426 default:
22427 break;
22428 }
22429 if (IN_RANGE (ival, -128, 127))
22430 {
22431 len = 1;
22432 continue;
22433 }
22434 }
22435 switch (mode)
22436 {
22437 case MODE_QI:
22438 len = 1;
22439 break;
22440 case MODE_HI:
22441 len = 2;
22442 break;
22443 case MODE_SI:
22444 len = 4;
22445 break;
22446 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
22447 case MODE_DI:
22448 len = 4;
22449 break;
22450 default:
22451 fatal_insn ("unknown insn mode", insn);
22452 }
22453 }
22454 return len;
22455 }
22456 /* Compute default value for "length_address" attribute. */
22457 int
22458 ix86_attr_length_address_default (rtx insn)
22459 {
22460 int i;
22461
22462 if (get_attr_type (insn) == TYPE_LEA)
22463 {
22464 rtx set = PATTERN (insn), addr;
22465
22466 if (GET_CODE (set) == PARALLEL)
22467 set = XVECEXP (set, 0, 0);
22468
22469 gcc_assert (GET_CODE (set) == SET);
22470
22471 addr = SET_SRC (set);
22472 if (TARGET_64BIT && get_attr_mode (insn) == MODE_SI)
22473 {
22474 if (GET_CODE (addr) == ZERO_EXTEND)
22475 addr = XEXP (addr, 0);
22476 if (GET_CODE (addr) == SUBREG)
22477 addr = SUBREG_REG (addr);
22478 }
22479
22480 return memory_address_length (addr);
22481 }
22482
22483 extract_insn_cached (insn);
22484 for (i = recog_data.n_operands - 1; i >= 0; --i)
22485 if (MEM_P (recog_data.operand[i]))
22486 {
22487 constrain_operands_cached (reload_completed);
22488 if (which_alternative != -1)
22489 {
22490 const char *constraints = recog_data.constraints[i];
22491 int alt = which_alternative;
22492
22493 while (*constraints == '=' || *constraints == '+')
22494 constraints++;
22495 while (alt-- > 0)
22496 while (*constraints++ != ',')
22497 ;
22498 /* Skip ignored operands. */
22499 if (*constraints == 'X')
22500 continue;
22501 }
22502 return memory_address_length (XEXP (recog_data.operand[i], 0));
22503 }
22504 return 0;
22505 }
22506
22507 /* Compute default value for "length_vex" attribute. It includes
22508 2 or 3 byte VEX prefix and 1 opcode byte. */
22509
22510 int
22511 ix86_attr_length_vex_default (rtx insn, bool has_0f_opcode, bool has_vex_w)
22512 {
22513 int i;
22514
22515 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
22516 byte VEX prefix. */
22517 if (!has_0f_opcode || has_vex_w)
22518 return 3 + 1;
22519
22520 /* We can always use 2 byte VEX prefix in 32bit. */
22521 if (!TARGET_64BIT)
22522 return 2 + 1;
22523
22524 extract_insn_cached (insn);
22525
22526 for (i = recog_data.n_operands - 1; i >= 0; --i)
22527 if (REG_P (recog_data.operand[i]))
22528 {
22529 /* REX.W bit uses 3 byte VEX prefix. */
22530 if (GET_MODE (recog_data.operand[i]) == DImode
22531 && GENERAL_REG_P (recog_data.operand[i]))
22532 return 3 + 1;
22533 }
22534 else
22535 {
22536 /* REX.X or REX.B bits use 3 byte VEX prefix. */
22537 if (MEM_P (recog_data.operand[i])
22538 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
22539 return 3 + 1;
22540 }
22541
22542 return 2 + 1;
22543 }
22544 \f
22545 /* Return the maximum number of instructions a cpu can issue. */
22546
22547 static int
22548 ix86_issue_rate (void)
22549 {
22550 switch (ix86_tune)
22551 {
22552 case PROCESSOR_PENTIUM:
22553 case PROCESSOR_ATOM:
22554 case PROCESSOR_K6:
22555 return 2;
22556
22557 case PROCESSOR_PENTIUMPRO:
22558 case PROCESSOR_PENTIUM4:
22559 case PROCESSOR_CORE2_32:
22560 case PROCESSOR_CORE2_64:
22561 case PROCESSOR_COREI7_32:
22562 case PROCESSOR_COREI7_64:
22563 case PROCESSOR_ATHLON:
22564 case PROCESSOR_K8:
22565 case PROCESSOR_AMDFAM10:
22566 case PROCESSOR_NOCONA:
22567 case PROCESSOR_GENERIC32:
22568 case PROCESSOR_GENERIC64:
22569 case PROCESSOR_BDVER1:
22570 case PROCESSOR_BDVER2:
22571 case PROCESSOR_BTVER1:
22572 return 3;
22573
22574 default:
22575 return 1;
22576 }
22577 }
22578
22579 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
22580 by DEP_INSN and nothing set by DEP_INSN. */
22581
22582 static bool
22583 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
22584 {
22585 rtx set, set2;
22586
22587 /* Simplify the test for uninteresting insns. */
22588 if (insn_type != TYPE_SETCC
22589 && insn_type != TYPE_ICMOV
22590 && insn_type != TYPE_FCMOV
22591 && insn_type != TYPE_IBR)
22592 return false;
22593
22594 if ((set = single_set (dep_insn)) != 0)
22595 {
22596 set = SET_DEST (set);
22597 set2 = NULL_RTX;
22598 }
22599 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
22600 && XVECLEN (PATTERN (dep_insn), 0) == 2
22601 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
22602 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
22603 {
22604 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
22605 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
22606 }
22607 else
22608 return false;
22609
22610 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
22611 return false;
22612
22613 /* This test is true if the dependent insn reads the flags but
22614 not any other potentially set register. */
22615 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
22616 return false;
22617
22618 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
22619 return false;
22620
22621 return true;
22622 }
22623
22624 /* Return true iff USE_INSN has a memory address with operands set by
22625 SET_INSN. */
22626
22627 bool
22628 ix86_agi_dependent (rtx set_insn, rtx use_insn)
22629 {
22630 int i;
22631 extract_insn_cached (use_insn);
22632 for (i = recog_data.n_operands - 1; i >= 0; --i)
22633 if (MEM_P (recog_data.operand[i]))
22634 {
22635 rtx addr = XEXP (recog_data.operand[i], 0);
22636 return modified_in_p (addr, set_insn) != 0;
22637 }
22638 return false;
22639 }
22640
22641 static int
22642 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
22643 {
22644 enum attr_type insn_type, dep_insn_type;
22645 enum attr_memory memory;
22646 rtx set, set2;
22647 int dep_insn_code_number;
22648
22649 /* Anti and output dependencies have zero cost on all CPUs. */
22650 if (REG_NOTE_KIND (link) != 0)
22651 return 0;
22652
22653 dep_insn_code_number = recog_memoized (dep_insn);
22654
22655 /* If we can't recognize the insns, we can't really do anything. */
22656 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
22657 return cost;
22658
22659 insn_type = get_attr_type (insn);
22660 dep_insn_type = get_attr_type (dep_insn);
22661
22662 switch (ix86_tune)
22663 {
22664 case PROCESSOR_PENTIUM:
22665 /* Address Generation Interlock adds a cycle of latency. */
22666 if (insn_type == TYPE_LEA)
22667 {
22668 rtx addr = PATTERN (insn);
22669
22670 if (GET_CODE (addr) == PARALLEL)
22671 addr = XVECEXP (addr, 0, 0);
22672
22673 gcc_assert (GET_CODE (addr) == SET);
22674
22675 addr = SET_SRC (addr);
22676 if (modified_in_p (addr, dep_insn))
22677 cost += 1;
22678 }
22679 else if (ix86_agi_dependent (dep_insn, insn))
22680 cost += 1;
22681
22682 /* ??? Compares pair with jump/setcc. */
22683 if (ix86_flags_dependent (insn, dep_insn, insn_type))
22684 cost = 0;
22685
22686 /* Floating point stores require value to be ready one cycle earlier. */
22687 if (insn_type == TYPE_FMOV
22688 && get_attr_memory (insn) == MEMORY_STORE
22689 && !ix86_agi_dependent (dep_insn, insn))
22690 cost += 1;
22691 break;
22692
22693 case PROCESSOR_PENTIUMPRO:
22694 memory = get_attr_memory (insn);
22695
22696 /* INT->FP conversion is expensive. */
22697 if (get_attr_fp_int_src (dep_insn))
22698 cost += 5;
22699
22700 /* There is one cycle extra latency between an FP op and a store. */
22701 if (insn_type == TYPE_FMOV
22702 && (set = single_set (dep_insn)) != NULL_RTX
22703 && (set2 = single_set (insn)) != NULL_RTX
22704 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
22705 && MEM_P (SET_DEST (set2)))
22706 cost += 1;
22707
22708 /* Show ability of reorder buffer to hide latency of load by executing
22709 in parallel with previous instruction in case
22710 previous instruction is not needed to compute the address. */
22711 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
22712 && !ix86_agi_dependent (dep_insn, insn))
22713 {
22714 /* Claim moves to take one cycle, as core can issue one load
22715 at time and the next load can start cycle later. */
22716 if (dep_insn_type == TYPE_IMOV
22717 || dep_insn_type == TYPE_FMOV)
22718 cost = 1;
22719 else if (cost > 1)
22720 cost--;
22721 }
22722 break;
22723
22724 case PROCESSOR_K6:
22725 memory = get_attr_memory (insn);
22726
22727 /* The esp dependency is resolved before the instruction is really
22728 finished. */
22729 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
22730 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
22731 return 1;
22732
22733 /* INT->FP conversion is expensive. */
22734 if (get_attr_fp_int_src (dep_insn))
22735 cost += 5;
22736
22737 /* Show ability of reorder buffer to hide latency of load by executing
22738 in parallel with previous instruction in case
22739 previous instruction is not needed to compute the address. */
22740 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
22741 && !ix86_agi_dependent (dep_insn, insn))
22742 {
22743 /* Claim moves to take one cycle, as core can issue one load
22744 at time and the next load can start cycle later. */
22745 if (dep_insn_type == TYPE_IMOV
22746 || dep_insn_type == TYPE_FMOV)
22747 cost = 1;
22748 else if (cost > 2)
22749 cost -= 2;
22750 else
22751 cost = 1;
22752 }
22753 break;
22754
22755 case PROCESSOR_ATHLON:
22756 case PROCESSOR_K8:
22757 case PROCESSOR_AMDFAM10:
22758 case PROCESSOR_BDVER1:
22759 case PROCESSOR_BDVER2:
22760 case PROCESSOR_BTVER1:
22761 case PROCESSOR_ATOM:
22762 case PROCESSOR_GENERIC32:
22763 case PROCESSOR_GENERIC64:
22764 memory = get_attr_memory (insn);
22765
22766 /* Show ability of reorder buffer to hide latency of load by executing
22767 in parallel with previous instruction in case
22768 previous instruction is not needed to compute the address. */
22769 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
22770 && !ix86_agi_dependent (dep_insn, insn))
22771 {
22772 enum attr_unit unit = get_attr_unit (insn);
22773 int loadcost = 3;
22774
22775 /* Because of the difference between the length of integer and
22776 floating unit pipeline preparation stages, the memory operands
22777 for floating point are cheaper.
22778
22779 ??? For Athlon it the difference is most probably 2. */
22780 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
22781 loadcost = 3;
22782 else
22783 loadcost = TARGET_ATHLON ? 2 : 0;
22784
22785 if (cost >= loadcost)
22786 cost -= loadcost;
22787 else
22788 cost = 0;
22789 }
22790
22791 default:
22792 break;
22793 }
22794
22795 return cost;
22796 }
22797
22798 /* How many alternative schedules to try. This should be as wide as the
22799 scheduling freedom in the DFA, but no wider. Making this value too
22800 large results extra work for the scheduler. */
22801
22802 static int
22803 ia32_multipass_dfa_lookahead (void)
22804 {
22805 switch (ix86_tune)
22806 {
22807 case PROCESSOR_PENTIUM:
22808 return 2;
22809
22810 case PROCESSOR_PENTIUMPRO:
22811 case PROCESSOR_K6:
22812 return 1;
22813
22814 case PROCESSOR_CORE2_32:
22815 case PROCESSOR_CORE2_64:
22816 case PROCESSOR_COREI7_32:
22817 case PROCESSOR_COREI7_64:
22818 /* Generally, we want haifa-sched:max_issue() to look ahead as far
22819 as many instructions can be executed on a cycle, i.e.,
22820 issue_rate. I wonder why tuning for many CPUs does not do this. */
22821 return ix86_issue_rate ();
22822
22823 default:
22824 return 0;
22825 }
22826 }
22827
22828 \f
22829
22830 /* Model decoder of Core 2/i7.
22831 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
22832 track the instruction fetch block boundaries and make sure that long
22833 (9+ bytes) instructions are assigned to D0. */
22834
22835 /* Maximum length of an insn that can be handled by
22836 a secondary decoder unit. '8' for Core 2/i7. */
22837 static int core2i7_secondary_decoder_max_insn_size;
22838
22839 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
22840 '16' for Core 2/i7. */
22841 static int core2i7_ifetch_block_size;
22842
22843 /* Maximum number of instructions decoder can handle per cycle.
22844 '6' for Core 2/i7. */
22845 static int core2i7_ifetch_block_max_insns;
22846
22847 typedef struct ix86_first_cycle_multipass_data_ *
22848 ix86_first_cycle_multipass_data_t;
22849 typedef const struct ix86_first_cycle_multipass_data_ *
22850 const_ix86_first_cycle_multipass_data_t;
22851
22852 /* A variable to store target state across calls to max_issue within
22853 one cycle. */
22854 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
22855 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
22856
22857 /* Initialize DATA. */
22858 static void
22859 core2i7_first_cycle_multipass_init (void *_data)
22860 {
22861 ix86_first_cycle_multipass_data_t data
22862 = (ix86_first_cycle_multipass_data_t) _data;
22863
22864 data->ifetch_block_len = 0;
22865 data->ifetch_block_n_insns = 0;
22866 data->ready_try_change = NULL;
22867 data->ready_try_change_size = 0;
22868 }
22869
22870 /* Advancing the cycle; reset ifetch block counts. */
22871 static void
22872 core2i7_dfa_post_advance_cycle (void)
22873 {
22874 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
22875
22876 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
22877
22878 data->ifetch_block_len = 0;
22879 data->ifetch_block_n_insns = 0;
22880 }
22881
22882 static int min_insn_size (rtx);
22883
22884 /* Filter out insns from ready_try that the core will not be able to issue
22885 on current cycle due to decoder. */
22886 static void
22887 core2i7_first_cycle_multipass_filter_ready_try
22888 (const_ix86_first_cycle_multipass_data_t data,
22889 char *ready_try, int n_ready, bool first_cycle_insn_p)
22890 {
22891 while (n_ready--)
22892 {
22893 rtx insn;
22894 int insn_size;
22895
22896 if (ready_try[n_ready])
22897 continue;
22898
22899 insn = get_ready_element (n_ready);
22900 insn_size = min_insn_size (insn);
22901
22902 if (/* If this is a too long an insn for a secondary decoder ... */
22903 (!first_cycle_insn_p
22904 && insn_size > core2i7_secondary_decoder_max_insn_size)
22905 /* ... or it would not fit into the ifetch block ... */
22906 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
22907 /* ... or the decoder is full already ... */
22908 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
22909 /* ... mask the insn out. */
22910 {
22911 ready_try[n_ready] = 1;
22912
22913 if (data->ready_try_change)
22914 SET_BIT (data->ready_try_change, n_ready);
22915 }
22916 }
22917 }
22918
22919 /* Prepare for a new round of multipass lookahead scheduling. */
22920 static void
22921 core2i7_first_cycle_multipass_begin (void *_data, char *ready_try, int n_ready,
22922 bool first_cycle_insn_p)
22923 {
22924 ix86_first_cycle_multipass_data_t data
22925 = (ix86_first_cycle_multipass_data_t) _data;
22926 const_ix86_first_cycle_multipass_data_t prev_data
22927 = ix86_first_cycle_multipass_data;
22928
22929 /* Restore the state from the end of the previous round. */
22930 data->ifetch_block_len = prev_data->ifetch_block_len;
22931 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
22932
22933 /* Filter instructions that cannot be issued on current cycle due to
22934 decoder restrictions. */
22935 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
22936 first_cycle_insn_p);
22937 }
22938
22939 /* INSN is being issued in current solution. Account for its impact on
22940 the decoder model. */
22941 static void
22942 core2i7_first_cycle_multipass_issue (void *_data, char *ready_try, int n_ready,
22943 rtx insn, const void *_prev_data)
22944 {
22945 ix86_first_cycle_multipass_data_t data
22946 = (ix86_first_cycle_multipass_data_t) _data;
22947 const_ix86_first_cycle_multipass_data_t prev_data
22948 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
22949
22950 int insn_size = min_insn_size (insn);
22951
22952 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
22953 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
22954 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
22955 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
22956
22957 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
22958 if (!data->ready_try_change)
22959 {
22960 data->ready_try_change = sbitmap_alloc (n_ready);
22961 data->ready_try_change_size = n_ready;
22962 }
22963 else if (data->ready_try_change_size < n_ready)
22964 {
22965 data->ready_try_change = sbitmap_resize (data->ready_try_change,
22966 n_ready, 0);
22967 data->ready_try_change_size = n_ready;
22968 }
22969 sbitmap_zero (data->ready_try_change);
22970
22971 /* Filter out insns from ready_try that the core will not be able to issue
22972 on current cycle due to decoder. */
22973 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
22974 false);
22975 }
22976
22977 /* Revert the effect on ready_try. */
22978 static void
22979 core2i7_first_cycle_multipass_backtrack (const void *_data,
22980 char *ready_try,
22981 int n_ready ATTRIBUTE_UNUSED)
22982 {
22983 const_ix86_first_cycle_multipass_data_t data
22984 = (const_ix86_first_cycle_multipass_data_t) _data;
22985 unsigned int i = 0;
22986 sbitmap_iterator sbi;
22987
22988 gcc_assert (sbitmap_last_set_bit (data->ready_try_change) < n_ready);
22989 EXECUTE_IF_SET_IN_SBITMAP (data->ready_try_change, 0, i, sbi)
22990 {
22991 ready_try[i] = 0;
22992 }
22993 }
22994
22995 /* Save the result of multipass lookahead scheduling for the next round. */
22996 static void
22997 core2i7_first_cycle_multipass_end (const void *_data)
22998 {
22999 const_ix86_first_cycle_multipass_data_t data
23000 = (const_ix86_first_cycle_multipass_data_t) _data;
23001 ix86_first_cycle_multipass_data_t next_data
23002 = ix86_first_cycle_multipass_data;
23003
23004 if (data != NULL)
23005 {
23006 next_data->ifetch_block_len = data->ifetch_block_len;
23007 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
23008 }
23009 }
23010
23011 /* Deallocate target data. */
23012 static void
23013 core2i7_first_cycle_multipass_fini (void *_data)
23014 {
23015 ix86_first_cycle_multipass_data_t data
23016 = (ix86_first_cycle_multipass_data_t) _data;
23017
23018 if (data->ready_try_change)
23019 {
23020 sbitmap_free (data->ready_try_change);
23021 data->ready_try_change = NULL;
23022 data->ready_try_change_size = 0;
23023 }
23024 }
23025
23026 /* Prepare for scheduling pass. */
23027 static void
23028 ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
23029 int verbose ATTRIBUTE_UNUSED,
23030 int max_uid ATTRIBUTE_UNUSED)
23031 {
23032 /* Install scheduling hooks for current CPU. Some of these hooks are used
23033 in time-critical parts of the scheduler, so we only set them up when
23034 they are actually used. */
23035 switch (ix86_tune)
23036 {
23037 case PROCESSOR_CORE2_32:
23038 case PROCESSOR_CORE2_64:
23039 case PROCESSOR_COREI7_32:
23040 case PROCESSOR_COREI7_64:
23041 targetm.sched.dfa_post_advance_cycle
23042 = core2i7_dfa_post_advance_cycle;
23043 targetm.sched.first_cycle_multipass_init
23044 = core2i7_first_cycle_multipass_init;
23045 targetm.sched.first_cycle_multipass_begin
23046 = core2i7_first_cycle_multipass_begin;
23047 targetm.sched.first_cycle_multipass_issue
23048 = core2i7_first_cycle_multipass_issue;
23049 targetm.sched.first_cycle_multipass_backtrack
23050 = core2i7_first_cycle_multipass_backtrack;
23051 targetm.sched.first_cycle_multipass_end
23052 = core2i7_first_cycle_multipass_end;
23053 targetm.sched.first_cycle_multipass_fini
23054 = core2i7_first_cycle_multipass_fini;
23055
23056 /* Set decoder parameters. */
23057 core2i7_secondary_decoder_max_insn_size = 8;
23058 core2i7_ifetch_block_size = 16;
23059 core2i7_ifetch_block_max_insns = 6;
23060 break;
23061
23062 default:
23063 targetm.sched.dfa_post_advance_cycle = NULL;
23064 targetm.sched.first_cycle_multipass_init = NULL;
23065 targetm.sched.first_cycle_multipass_begin = NULL;
23066 targetm.sched.first_cycle_multipass_issue = NULL;
23067 targetm.sched.first_cycle_multipass_backtrack = NULL;
23068 targetm.sched.first_cycle_multipass_end = NULL;
23069 targetm.sched.first_cycle_multipass_fini = NULL;
23070 break;
23071 }
23072 }
23073
23074 \f
23075 /* Compute the alignment given to a constant that is being placed in memory.
23076 EXP is the constant and ALIGN is the alignment that the object would
23077 ordinarily have.
23078 The value of this function is used instead of that alignment to align
23079 the object. */
23080
23081 int
23082 ix86_constant_alignment (tree exp, int align)
23083 {
23084 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
23085 || TREE_CODE (exp) == INTEGER_CST)
23086 {
23087 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
23088 return 64;
23089 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
23090 return 128;
23091 }
23092 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
23093 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
23094 return BITS_PER_WORD;
23095
23096 return align;
23097 }
23098
23099 /* Compute the alignment for a static variable.
23100 TYPE is the data type, and ALIGN is the alignment that
23101 the object would ordinarily have. The value of this function is used
23102 instead of that alignment to align the object. */
23103
23104 int
23105 ix86_data_alignment (tree type, int align)
23106 {
23107 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
23108
23109 if (AGGREGATE_TYPE_P (type)
23110 && TYPE_SIZE (type)
23111 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
23112 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
23113 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
23114 && align < max_align)
23115 align = max_align;
23116
23117 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
23118 to 16byte boundary. */
23119 if (TARGET_64BIT)
23120 {
23121 if (AGGREGATE_TYPE_P (type)
23122 && TYPE_SIZE (type)
23123 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
23124 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
23125 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
23126 return 128;
23127 }
23128
23129 if (TREE_CODE (type) == ARRAY_TYPE)
23130 {
23131 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
23132 return 64;
23133 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
23134 return 128;
23135 }
23136 else if (TREE_CODE (type) == COMPLEX_TYPE)
23137 {
23138
23139 if (TYPE_MODE (type) == DCmode && align < 64)
23140 return 64;
23141 if ((TYPE_MODE (type) == XCmode
23142 || TYPE_MODE (type) == TCmode) && align < 128)
23143 return 128;
23144 }
23145 else if ((TREE_CODE (type) == RECORD_TYPE
23146 || TREE_CODE (type) == UNION_TYPE
23147 || TREE_CODE (type) == QUAL_UNION_TYPE)
23148 && TYPE_FIELDS (type))
23149 {
23150 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
23151 return 64;
23152 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
23153 return 128;
23154 }
23155 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
23156 || TREE_CODE (type) == INTEGER_TYPE)
23157 {
23158 if (TYPE_MODE (type) == DFmode && align < 64)
23159 return 64;
23160 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
23161 return 128;
23162 }
23163
23164 return align;
23165 }
23166
23167 /* Compute the alignment for a local variable or a stack slot. EXP is
23168 the data type or decl itself, MODE is the widest mode available and
23169 ALIGN is the alignment that the object would ordinarily have. The
23170 value of this macro is used instead of that alignment to align the
23171 object. */
23172
23173 unsigned int
23174 ix86_local_alignment (tree exp, enum machine_mode mode,
23175 unsigned int align)
23176 {
23177 tree type, decl;
23178
23179 if (exp && DECL_P (exp))
23180 {
23181 type = TREE_TYPE (exp);
23182 decl = exp;
23183 }
23184 else
23185 {
23186 type = exp;
23187 decl = NULL;
23188 }
23189
23190 /* Don't do dynamic stack realignment for long long objects with
23191 -mpreferred-stack-boundary=2. */
23192 if (!TARGET_64BIT
23193 && align == 64
23194 && ix86_preferred_stack_boundary < 64
23195 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
23196 && (!type || !TYPE_USER_ALIGN (type))
23197 && (!decl || !DECL_USER_ALIGN (decl)))
23198 align = 32;
23199
23200 /* If TYPE is NULL, we are allocating a stack slot for caller-save
23201 register in MODE. We will return the largest alignment of XF
23202 and DF. */
23203 if (!type)
23204 {
23205 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
23206 align = GET_MODE_ALIGNMENT (DFmode);
23207 return align;
23208 }
23209
23210 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
23211 to 16byte boundary. Exact wording is:
23212
23213 An array uses the same alignment as its elements, except that a local or
23214 global array variable of length at least 16 bytes or
23215 a C99 variable-length array variable always has alignment of at least 16 bytes.
23216
23217 This was added to allow use of aligned SSE instructions at arrays. This
23218 rule is meant for static storage (where compiler can not do the analysis
23219 by itself). We follow it for automatic variables only when convenient.
23220 We fully control everything in the function compiled and functions from
23221 other unit can not rely on the alignment.
23222
23223 Exclude va_list type. It is the common case of local array where
23224 we can not benefit from the alignment. */
23225 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
23226 && TARGET_SSE)
23227 {
23228 if (AGGREGATE_TYPE_P (type)
23229 && (va_list_type_node == NULL_TREE
23230 || (TYPE_MAIN_VARIANT (type)
23231 != TYPE_MAIN_VARIANT (va_list_type_node)))
23232 && TYPE_SIZE (type)
23233 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
23234 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
23235 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
23236 return 128;
23237 }
23238 if (TREE_CODE (type) == ARRAY_TYPE)
23239 {
23240 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
23241 return 64;
23242 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
23243 return 128;
23244 }
23245 else if (TREE_CODE (type) == COMPLEX_TYPE)
23246 {
23247 if (TYPE_MODE (type) == DCmode && align < 64)
23248 return 64;
23249 if ((TYPE_MODE (type) == XCmode
23250 || TYPE_MODE (type) == TCmode) && align < 128)
23251 return 128;
23252 }
23253 else if ((TREE_CODE (type) == RECORD_TYPE
23254 || TREE_CODE (type) == UNION_TYPE
23255 || TREE_CODE (type) == QUAL_UNION_TYPE)
23256 && TYPE_FIELDS (type))
23257 {
23258 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
23259 return 64;
23260 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
23261 return 128;
23262 }
23263 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
23264 || TREE_CODE (type) == INTEGER_TYPE)
23265 {
23266
23267 if (TYPE_MODE (type) == DFmode && align < 64)
23268 return 64;
23269 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
23270 return 128;
23271 }
23272 return align;
23273 }
23274
23275 /* Compute the minimum required alignment for dynamic stack realignment
23276 purposes for a local variable, parameter or a stack slot. EXP is
23277 the data type or decl itself, MODE is its mode and ALIGN is the
23278 alignment that the object would ordinarily have. */
23279
23280 unsigned int
23281 ix86_minimum_alignment (tree exp, enum machine_mode mode,
23282 unsigned int align)
23283 {
23284 tree type, decl;
23285
23286 if (exp && DECL_P (exp))
23287 {
23288 type = TREE_TYPE (exp);
23289 decl = exp;
23290 }
23291 else
23292 {
23293 type = exp;
23294 decl = NULL;
23295 }
23296
23297 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
23298 return align;
23299
23300 /* Don't do dynamic stack realignment for long long objects with
23301 -mpreferred-stack-boundary=2. */
23302 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
23303 && (!type || !TYPE_USER_ALIGN (type))
23304 && (!decl || !DECL_USER_ALIGN (decl)))
23305 return 32;
23306
23307 return align;
23308 }
23309 \f
23310 /* Find a location for the static chain incoming to a nested function.
23311 This is a register, unless all free registers are used by arguments. */
23312
23313 static rtx
23314 ix86_static_chain (const_tree fndecl, bool incoming_p)
23315 {
23316 unsigned regno;
23317
23318 if (!DECL_STATIC_CHAIN (fndecl))
23319 return NULL;
23320
23321 if (TARGET_64BIT)
23322 {
23323 /* We always use R10 in 64-bit mode. */
23324 regno = R10_REG;
23325 }
23326 else
23327 {
23328 tree fntype;
23329 unsigned int ccvt;
23330
23331 /* By default in 32-bit mode we use ECX to pass the static chain. */
23332 regno = CX_REG;
23333
23334 fntype = TREE_TYPE (fndecl);
23335 ccvt = ix86_get_callcvt (fntype);
23336 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) != 0)
23337 {
23338 /* Fastcall functions use ecx/edx for arguments, which leaves
23339 us with EAX for the static chain.
23340 Thiscall functions use ecx for arguments, which also
23341 leaves us with EAX for the static chain. */
23342 regno = AX_REG;
23343 }
23344 else if (ix86_function_regparm (fntype, fndecl) == 3)
23345 {
23346 /* For regparm 3, we have no free call-clobbered registers in
23347 which to store the static chain. In order to implement this,
23348 we have the trampoline push the static chain to the stack.
23349 However, we can't push a value below the return address when
23350 we call the nested function directly, so we have to use an
23351 alternate entry point. For this we use ESI, and have the
23352 alternate entry point push ESI, so that things appear the
23353 same once we're executing the nested function. */
23354 if (incoming_p)
23355 {
23356 if (fndecl == current_function_decl)
23357 ix86_static_chain_on_stack = true;
23358 return gen_frame_mem (SImode,
23359 plus_constant (arg_pointer_rtx, -8));
23360 }
23361 regno = SI_REG;
23362 }
23363 }
23364
23365 return gen_rtx_REG (Pmode, regno);
23366 }
23367
23368 /* Emit RTL insns to initialize the variable parts of a trampoline.
23369 FNDECL is the decl of the target address; M_TRAMP is a MEM for
23370 the trampoline, and CHAIN_VALUE is an RTX for the static chain
23371 to be passed to the target function. */
23372
23373 static void
23374 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
23375 {
23376 rtx mem, fnaddr;
23377 int opcode;
23378 int offset = 0;
23379
23380 fnaddr = XEXP (DECL_RTL (fndecl), 0);
23381
23382 if (TARGET_64BIT)
23383 {
23384 int size;
23385
23386 /* Load the function address to r11. Try to load address using
23387 the shorter movl instead of movabs. We may want to support
23388 movq for kernel mode, but kernel does not use trampolines at
23389 the moment. */
23390 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
23391 {
23392 fnaddr = copy_to_mode_reg (DImode, fnaddr);
23393
23394 mem = adjust_address (m_tramp, HImode, offset);
23395 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
23396
23397 mem = adjust_address (m_tramp, SImode, offset + 2);
23398 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
23399 offset += 6;
23400 }
23401 else
23402 {
23403 mem = adjust_address (m_tramp, HImode, offset);
23404 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
23405
23406 mem = adjust_address (m_tramp, DImode, offset + 2);
23407 emit_move_insn (mem, fnaddr);
23408 offset += 10;
23409 }
23410
23411 /* Load static chain using movabs to r10. Use the
23412 shorter movl instead of movabs for x32. */
23413 if (TARGET_X32)
23414 {
23415 opcode = 0xba41;
23416 size = 6;
23417 }
23418 else
23419 {
23420 opcode = 0xba49;
23421 size = 10;
23422 }
23423
23424 mem = adjust_address (m_tramp, HImode, offset);
23425 emit_move_insn (mem, gen_int_mode (opcode, HImode));
23426
23427 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
23428 emit_move_insn (mem, chain_value);
23429 offset += size;
23430
23431 /* Jump to r11; the last (unused) byte is a nop, only there to
23432 pad the write out to a single 32-bit store. */
23433 mem = adjust_address (m_tramp, SImode, offset);
23434 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
23435 offset += 4;
23436 }
23437 else
23438 {
23439 rtx disp, chain;
23440
23441 /* Depending on the static chain location, either load a register
23442 with a constant, or push the constant to the stack. All of the
23443 instructions are the same size. */
23444 chain = ix86_static_chain (fndecl, true);
23445 if (REG_P (chain))
23446 {
23447 switch (REGNO (chain))
23448 {
23449 case AX_REG:
23450 opcode = 0xb8; break;
23451 case CX_REG:
23452 opcode = 0xb9; break;
23453 default:
23454 gcc_unreachable ();
23455 }
23456 }
23457 else
23458 opcode = 0x68;
23459
23460 mem = adjust_address (m_tramp, QImode, offset);
23461 emit_move_insn (mem, gen_int_mode (opcode, QImode));
23462
23463 mem = adjust_address (m_tramp, SImode, offset + 1);
23464 emit_move_insn (mem, chain_value);
23465 offset += 5;
23466
23467 mem = adjust_address (m_tramp, QImode, offset);
23468 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
23469
23470 mem = adjust_address (m_tramp, SImode, offset + 1);
23471
23472 /* Compute offset from the end of the jmp to the target function.
23473 In the case in which the trampoline stores the static chain on
23474 the stack, we need to skip the first insn which pushes the
23475 (call-saved) register static chain; this push is 1 byte. */
23476 offset += 5;
23477 disp = expand_binop (SImode, sub_optab, fnaddr,
23478 plus_constant (XEXP (m_tramp, 0),
23479 offset - (MEM_P (chain) ? 1 : 0)),
23480 NULL_RTX, 1, OPTAB_DIRECT);
23481 emit_move_insn (mem, disp);
23482 }
23483
23484 gcc_assert (offset <= TRAMPOLINE_SIZE);
23485
23486 #ifdef HAVE_ENABLE_EXECUTE_STACK
23487 #ifdef CHECK_EXECUTE_STACK_ENABLED
23488 if (CHECK_EXECUTE_STACK_ENABLED)
23489 #endif
23490 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
23491 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
23492 #endif
23493 }
23494 \f
23495 /* The following file contains several enumerations and data structures
23496 built from the definitions in i386-builtin-types.def. */
23497
23498 #include "i386-builtin-types.inc"
23499
23500 /* Table for the ix86 builtin non-function types. */
23501 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
23502
23503 /* Retrieve an element from the above table, building some of
23504 the types lazily. */
23505
23506 static tree
23507 ix86_get_builtin_type (enum ix86_builtin_type tcode)
23508 {
23509 unsigned int index;
23510 tree type, itype;
23511
23512 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
23513
23514 type = ix86_builtin_type_tab[(int) tcode];
23515 if (type != NULL)
23516 return type;
23517
23518 gcc_assert (tcode > IX86_BT_LAST_PRIM);
23519 if (tcode <= IX86_BT_LAST_VECT)
23520 {
23521 enum machine_mode mode;
23522
23523 index = tcode - IX86_BT_LAST_PRIM - 1;
23524 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
23525 mode = ix86_builtin_type_vect_mode[index];
23526
23527 type = build_vector_type_for_mode (itype, mode);
23528 }
23529 else
23530 {
23531 int quals;
23532
23533 index = tcode - IX86_BT_LAST_VECT - 1;
23534 if (tcode <= IX86_BT_LAST_PTR)
23535 quals = TYPE_UNQUALIFIED;
23536 else
23537 quals = TYPE_QUAL_CONST;
23538
23539 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
23540 if (quals != TYPE_UNQUALIFIED)
23541 itype = build_qualified_type (itype, quals);
23542
23543 type = build_pointer_type (itype);
23544 }
23545
23546 ix86_builtin_type_tab[(int) tcode] = type;
23547 return type;
23548 }
23549
23550 /* Table for the ix86 builtin function types. */
23551 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
23552
23553 /* Retrieve an element from the above table, building some of
23554 the types lazily. */
23555
23556 static tree
23557 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
23558 {
23559 tree type;
23560
23561 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
23562
23563 type = ix86_builtin_func_type_tab[(int) tcode];
23564 if (type != NULL)
23565 return type;
23566
23567 if (tcode <= IX86_BT_LAST_FUNC)
23568 {
23569 unsigned start = ix86_builtin_func_start[(int) tcode];
23570 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
23571 tree rtype, atype, args = void_list_node;
23572 unsigned i;
23573
23574 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
23575 for (i = after - 1; i > start; --i)
23576 {
23577 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
23578 args = tree_cons (NULL, atype, args);
23579 }
23580
23581 type = build_function_type (rtype, args);
23582 }
23583 else
23584 {
23585 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
23586 enum ix86_builtin_func_type icode;
23587
23588 icode = ix86_builtin_func_alias_base[index];
23589 type = ix86_get_builtin_func_type (icode);
23590 }
23591
23592 ix86_builtin_func_type_tab[(int) tcode] = type;
23593 return type;
23594 }
23595
23596
23597 /* Codes for all the SSE/MMX builtins. */
23598 enum ix86_builtins
23599 {
23600 IX86_BUILTIN_ADDPS,
23601 IX86_BUILTIN_ADDSS,
23602 IX86_BUILTIN_DIVPS,
23603 IX86_BUILTIN_DIVSS,
23604 IX86_BUILTIN_MULPS,
23605 IX86_BUILTIN_MULSS,
23606 IX86_BUILTIN_SUBPS,
23607 IX86_BUILTIN_SUBSS,
23608
23609 IX86_BUILTIN_CMPEQPS,
23610 IX86_BUILTIN_CMPLTPS,
23611 IX86_BUILTIN_CMPLEPS,
23612 IX86_BUILTIN_CMPGTPS,
23613 IX86_BUILTIN_CMPGEPS,
23614 IX86_BUILTIN_CMPNEQPS,
23615 IX86_BUILTIN_CMPNLTPS,
23616 IX86_BUILTIN_CMPNLEPS,
23617 IX86_BUILTIN_CMPNGTPS,
23618 IX86_BUILTIN_CMPNGEPS,
23619 IX86_BUILTIN_CMPORDPS,
23620 IX86_BUILTIN_CMPUNORDPS,
23621 IX86_BUILTIN_CMPEQSS,
23622 IX86_BUILTIN_CMPLTSS,
23623 IX86_BUILTIN_CMPLESS,
23624 IX86_BUILTIN_CMPNEQSS,
23625 IX86_BUILTIN_CMPNLTSS,
23626 IX86_BUILTIN_CMPNLESS,
23627 IX86_BUILTIN_CMPNGTSS,
23628 IX86_BUILTIN_CMPNGESS,
23629 IX86_BUILTIN_CMPORDSS,
23630 IX86_BUILTIN_CMPUNORDSS,
23631
23632 IX86_BUILTIN_COMIEQSS,
23633 IX86_BUILTIN_COMILTSS,
23634 IX86_BUILTIN_COMILESS,
23635 IX86_BUILTIN_COMIGTSS,
23636 IX86_BUILTIN_COMIGESS,
23637 IX86_BUILTIN_COMINEQSS,
23638 IX86_BUILTIN_UCOMIEQSS,
23639 IX86_BUILTIN_UCOMILTSS,
23640 IX86_BUILTIN_UCOMILESS,
23641 IX86_BUILTIN_UCOMIGTSS,
23642 IX86_BUILTIN_UCOMIGESS,
23643 IX86_BUILTIN_UCOMINEQSS,
23644
23645 IX86_BUILTIN_CVTPI2PS,
23646 IX86_BUILTIN_CVTPS2PI,
23647 IX86_BUILTIN_CVTSI2SS,
23648 IX86_BUILTIN_CVTSI642SS,
23649 IX86_BUILTIN_CVTSS2SI,
23650 IX86_BUILTIN_CVTSS2SI64,
23651 IX86_BUILTIN_CVTTPS2PI,
23652 IX86_BUILTIN_CVTTSS2SI,
23653 IX86_BUILTIN_CVTTSS2SI64,
23654
23655 IX86_BUILTIN_MAXPS,
23656 IX86_BUILTIN_MAXSS,
23657 IX86_BUILTIN_MINPS,
23658 IX86_BUILTIN_MINSS,
23659
23660 IX86_BUILTIN_LOADUPS,
23661 IX86_BUILTIN_STOREUPS,
23662 IX86_BUILTIN_MOVSS,
23663
23664 IX86_BUILTIN_MOVHLPS,
23665 IX86_BUILTIN_MOVLHPS,
23666 IX86_BUILTIN_LOADHPS,
23667 IX86_BUILTIN_LOADLPS,
23668 IX86_BUILTIN_STOREHPS,
23669 IX86_BUILTIN_STORELPS,
23670
23671 IX86_BUILTIN_MASKMOVQ,
23672 IX86_BUILTIN_MOVMSKPS,
23673 IX86_BUILTIN_PMOVMSKB,
23674
23675 IX86_BUILTIN_MOVNTPS,
23676 IX86_BUILTIN_MOVNTQ,
23677
23678 IX86_BUILTIN_LOADDQU,
23679 IX86_BUILTIN_STOREDQU,
23680
23681 IX86_BUILTIN_PACKSSWB,
23682 IX86_BUILTIN_PACKSSDW,
23683 IX86_BUILTIN_PACKUSWB,
23684
23685 IX86_BUILTIN_PADDB,
23686 IX86_BUILTIN_PADDW,
23687 IX86_BUILTIN_PADDD,
23688 IX86_BUILTIN_PADDQ,
23689 IX86_BUILTIN_PADDSB,
23690 IX86_BUILTIN_PADDSW,
23691 IX86_BUILTIN_PADDUSB,
23692 IX86_BUILTIN_PADDUSW,
23693 IX86_BUILTIN_PSUBB,
23694 IX86_BUILTIN_PSUBW,
23695 IX86_BUILTIN_PSUBD,
23696 IX86_BUILTIN_PSUBQ,
23697 IX86_BUILTIN_PSUBSB,
23698 IX86_BUILTIN_PSUBSW,
23699 IX86_BUILTIN_PSUBUSB,
23700 IX86_BUILTIN_PSUBUSW,
23701
23702 IX86_BUILTIN_PAND,
23703 IX86_BUILTIN_PANDN,
23704 IX86_BUILTIN_POR,
23705 IX86_BUILTIN_PXOR,
23706
23707 IX86_BUILTIN_PAVGB,
23708 IX86_BUILTIN_PAVGW,
23709
23710 IX86_BUILTIN_PCMPEQB,
23711 IX86_BUILTIN_PCMPEQW,
23712 IX86_BUILTIN_PCMPEQD,
23713 IX86_BUILTIN_PCMPGTB,
23714 IX86_BUILTIN_PCMPGTW,
23715 IX86_BUILTIN_PCMPGTD,
23716
23717 IX86_BUILTIN_PMADDWD,
23718
23719 IX86_BUILTIN_PMAXSW,
23720 IX86_BUILTIN_PMAXUB,
23721 IX86_BUILTIN_PMINSW,
23722 IX86_BUILTIN_PMINUB,
23723
23724 IX86_BUILTIN_PMULHUW,
23725 IX86_BUILTIN_PMULHW,
23726 IX86_BUILTIN_PMULLW,
23727
23728 IX86_BUILTIN_PSADBW,
23729 IX86_BUILTIN_PSHUFW,
23730
23731 IX86_BUILTIN_PSLLW,
23732 IX86_BUILTIN_PSLLD,
23733 IX86_BUILTIN_PSLLQ,
23734 IX86_BUILTIN_PSRAW,
23735 IX86_BUILTIN_PSRAD,
23736 IX86_BUILTIN_PSRLW,
23737 IX86_BUILTIN_PSRLD,
23738 IX86_BUILTIN_PSRLQ,
23739 IX86_BUILTIN_PSLLWI,
23740 IX86_BUILTIN_PSLLDI,
23741 IX86_BUILTIN_PSLLQI,
23742 IX86_BUILTIN_PSRAWI,
23743 IX86_BUILTIN_PSRADI,
23744 IX86_BUILTIN_PSRLWI,
23745 IX86_BUILTIN_PSRLDI,
23746 IX86_BUILTIN_PSRLQI,
23747
23748 IX86_BUILTIN_PUNPCKHBW,
23749 IX86_BUILTIN_PUNPCKHWD,
23750 IX86_BUILTIN_PUNPCKHDQ,
23751 IX86_BUILTIN_PUNPCKLBW,
23752 IX86_BUILTIN_PUNPCKLWD,
23753 IX86_BUILTIN_PUNPCKLDQ,
23754
23755 IX86_BUILTIN_SHUFPS,
23756
23757 IX86_BUILTIN_RCPPS,
23758 IX86_BUILTIN_RCPSS,
23759 IX86_BUILTIN_RSQRTPS,
23760 IX86_BUILTIN_RSQRTPS_NR,
23761 IX86_BUILTIN_RSQRTSS,
23762 IX86_BUILTIN_RSQRTF,
23763 IX86_BUILTIN_SQRTPS,
23764 IX86_BUILTIN_SQRTPS_NR,
23765 IX86_BUILTIN_SQRTSS,
23766
23767 IX86_BUILTIN_UNPCKHPS,
23768 IX86_BUILTIN_UNPCKLPS,
23769
23770 IX86_BUILTIN_ANDPS,
23771 IX86_BUILTIN_ANDNPS,
23772 IX86_BUILTIN_ORPS,
23773 IX86_BUILTIN_XORPS,
23774
23775 IX86_BUILTIN_EMMS,
23776 IX86_BUILTIN_LDMXCSR,
23777 IX86_BUILTIN_STMXCSR,
23778 IX86_BUILTIN_SFENCE,
23779
23780 /* 3DNow! Original */
23781 IX86_BUILTIN_FEMMS,
23782 IX86_BUILTIN_PAVGUSB,
23783 IX86_BUILTIN_PF2ID,
23784 IX86_BUILTIN_PFACC,
23785 IX86_BUILTIN_PFADD,
23786 IX86_BUILTIN_PFCMPEQ,
23787 IX86_BUILTIN_PFCMPGE,
23788 IX86_BUILTIN_PFCMPGT,
23789 IX86_BUILTIN_PFMAX,
23790 IX86_BUILTIN_PFMIN,
23791 IX86_BUILTIN_PFMUL,
23792 IX86_BUILTIN_PFRCP,
23793 IX86_BUILTIN_PFRCPIT1,
23794 IX86_BUILTIN_PFRCPIT2,
23795 IX86_BUILTIN_PFRSQIT1,
23796 IX86_BUILTIN_PFRSQRT,
23797 IX86_BUILTIN_PFSUB,
23798 IX86_BUILTIN_PFSUBR,
23799 IX86_BUILTIN_PI2FD,
23800 IX86_BUILTIN_PMULHRW,
23801
23802 /* 3DNow! Athlon Extensions */
23803 IX86_BUILTIN_PF2IW,
23804 IX86_BUILTIN_PFNACC,
23805 IX86_BUILTIN_PFPNACC,
23806 IX86_BUILTIN_PI2FW,
23807 IX86_BUILTIN_PSWAPDSI,
23808 IX86_BUILTIN_PSWAPDSF,
23809
23810 /* SSE2 */
23811 IX86_BUILTIN_ADDPD,
23812 IX86_BUILTIN_ADDSD,
23813 IX86_BUILTIN_DIVPD,
23814 IX86_BUILTIN_DIVSD,
23815 IX86_BUILTIN_MULPD,
23816 IX86_BUILTIN_MULSD,
23817 IX86_BUILTIN_SUBPD,
23818 IX86_BUILTIN_SUBSD,
23819
23820 IX86_BUILTIN_CMPEQPD,
23821 IX86_BUILTIN_CMPLTPD,
23822 IX86_BUILTIN_CMPLEPD,
23823 IX86_BUILTIN_CMPGTPD,
23824 IX86_BUILTIN_CMPGEPD,
23825 IX86_BUILTIN_CMPNEQPD,
23826 IX86_BUILTIN_CMPNLTPD,
23827 IX86_BUILTIN_CMPNLEPD,
23828 IX86_BUILTIN_CMPNGTPD,
23829 IX86_BUILTIN_CMPNGEPD,
23830 IX86_BUILTIN_CMPORDPD,
23831 IX86_BUILTIN_CMPUNORDPD,
23832 IX86_BUILTIN_CMPEQSD,
23833 IX86_BUILTIN_CMPLTSD,
23834 IX86_BUILTIN_CMPLESD,
23835 IX86_BUILTIN_CMPNEQSD,
23836 IX86_BUILTIN_CMPNLTSD,
23837 IX86_BUILTIN_CMPNLESD,
23838 IX86_BUILTIN_CMPORDSD,
23839 IX86_BUILTIN_CMPUNORDSD,
23840
23841 IX86_BUILTIN_COMIEQSD,
23842 IX86_BUILTIN_COMILTSD,
23843 IX86_BUILTIN_COMILESD,
23844 IX86_BUILTIN_COMIGTSD,
23845 IX86_BUILTIN_COMIGESD,
23846 IX86_BUILTIN_COMINEQSD,
23847 IX86_BUILTIN_UCOMIEQSD,
23848 IX86_BUILTIN_UCOMILTSD,
23849 IX86_BUILTIN_UCOMILESD,
23850 IX86_BUILTIN_UCOMIGTSD,
23851 IX86_BUILTIN_UCOMIGESD,
23852 IX86_BUILTIN_UCOMINEQSD,
23853
23854 IX86_BUILTIN_MAXPD,
23855 IX86_BUILTIN_MAXSD,
23856 IX86_BUILTIN_MINPD,
23857 IX86_BUILTIN_MINSD,
23858
23859 IX86_BUILTIN_ANDPD,
23860 IX86_BUILTIN_ANDNPD,
23861 IX86_BUILTIN_ORPD,
23862 IX86_BUILTIN_XORPD,
23863
23864 IX86_BUILTIN_SQRTPD,
23865 IX86_BUILTIN_SQRTSD,
23866
23867 IX86_BUILTIN_UNPCKHPD,
23868 IX86_BUILTIN_UNPCKLPD,
23869
23870 IX86_BUILTIN_SHUFPD,
23871
23872 IX86_BUILTIN_LOADUPD,
23873 IX86_BUILTIN_STOREUPD,
23874 IX86_BUILTIN_MOVSD,
23875
23876 IX86_BUILTIN_LOADHPD,
23877 IX86_BUILTIN_LOADLPD,
23878
23879 IX86_BUILTIN_CVTDQ2PD,
23880 IX86_BUILTIN_CVTDQ2PS,
23881
23882 IX86_BUILTIN_CVTPD2DQ,
23883 IX86_BUILTIN_CVTPD2PI,
23884 IX86_BUILTIN_CVTPD2PS,
23885 IX86_BUILTIN_CVTTPD2DQ,
23886 IX86_BUILTIN_CVTTPD2PI,
23887
23888 IX86_BUILTIN_CVTPI2PD,
23889 IX86_BUILTIN_CVTSI2SD,
23890 IX86_BUILTIN_CVTSI642SD,
23891
23892 IX86_BUILTIN_CVTSD2SI,
23893 IX86_BUILTIN_CVTSD2SI64,
23894 IX86_BUILTIN_CVTSD2SS,
23895 IX86_BUILTIN_CVTSS2SD,
23896 IX86_BUILTIN_CVTTSD2SI,
23897 IX86_BUILTIN_CVTTSD2SI64,
23898
23899 IX86_BUILTIN_CVTPS2DQ,
23900 IX86_BUILTIN_CVTPS2PD,
23901 IX86_BUILTIN_CVTTPS2DQ,
23902
23903 IX86_BUILTIN_MOVNTI,
23904 IX86_BUILTIN_MOVNTPD,
23905 IX86_BUILTIN_MOVNTDQ,
23906
23907 IX86_BUILTIN_MOVQ128,
23908
23909 /* SSE2 MMX */
23910 IX86_BUILTIN_MASKMOVDQU,
23911 IX86_BUILTIN_MOVMSKPD,
23912 IX86_BUILTIN_PMOVMSKB128,
23913
23914 IX86_BUILTIN_PACKSSWB128,
23915 IX86_BUILTIN_PACKSSDW128,
23916 IX86_BUILTIN_PACKUSWB128,
23917
23918 IX86_BUILTIN_PADDB128,
23919 IX86_BUILTIN_PADDW128,
23920 IX86_BUILTIN_PADDD128,
23921 IX86_BUILTIN_PADDQ128,
23922 IX86_BUILTIN_PADDSB128,
23923 IX86_BUILTIN_PADDSW128,
23924 IX86_BUILTIN_PADDUSB128,
23925 IX86_BUILTIN_PADDUSW128,
23926 IX86_BUILTIN_PSUBB128,
23927 IX86_BUILTIN_PSUBW128,
23928 IX86_BUILTIN_PSUBD128,
23929 IX86_BUILTIN_PSUBQ128,
23930 IX86_BUILTIN_PSUBSB128,
23931 IX86_BUILTIN_PSUBSW128,
23932 IX86_BUILTIN_PSUBUSB128,
23933 IX86_BUILTIN_PSUBUSW128,
23934
23935 IX86_BUILTIN_PAND128,
23936 IX86_BUILTIN_PANDN128,
23937 IX86_BUILTIN_POR128,
23938 IX86_BUILTIN_PXOR128,
23939
23940 IX86_BUILTIN_PAVGB128,
23941 IX86_BUILTIN_PAVGW128,
23942
23943 IX86_BUILTIN_PCMPEQB128,
23944 IX86_BUILTIN_PCMPEQW128,
23945 IX86_BUILTIN_PCMPEQD128,
23946 IX86_BUILTIN_PCMPGTB128,
23947 IX86_BUILTIN_PCMPGTW128,
23948 IX86_BUILTIN_PCMPGTD128,
23949
23950 IX86_BUILTIN_PMADDWD128,
23951
23952 IX86_BUILTIN_PMAXSW128,
23953 IX86_BUILTIN_PMAXUB128,
23954 IX86_BUILTIN_PMINSW128,
23955 IX86_BUILTIN_PMINUB128,
23956
23957 IX86_BUILTIN_PMULUDQ,
23958 IX86_BUILTIN_PMULUDQ128,
23959 IX86_BUILTIN_PMULHUW128,
23960 IX86_BUILTIN_PMULHW128,
23961 IX86_BUILTIN_PMULLW128,
23962
23963 IX86_BUILTIN_PSADBW128,
23964 IX86_BUILTIN_PSHUFHW,
23965 IX86_BUILTIN_PSHUFLW,
23966 IX86_BUILTIN_PSHUFD,
23967
23968 IX86_BUILTIN_PSLLDQI128,
23969 IX86_BUILTIN_PSLLWI128,
23970 IX86_BUILTIN_PSLLDI128,
23971 IX86_BUILTIN_PSLLQI128,
23972 IX86_BUILTIN_PSRAWI128,
23973 IX86_BUILTIN_PSRADI128,
23974 IX86_BUILTIN_PSRLDQI128,
23975 IX86_BUILTIN_PSRLWI128,
23976 IX86_BUILTIN_PSRLDI128,
23977 IX86_BUILTIN_PSRLQI128,
23978
23979 IX86_BUILTIN_PSLLDQ128,
23980 IX86_BUILTIN_PSLLW128,
23981 IX86_BUILTIN_PSLLD128,
23982 IX86_BUILTIN_PSLLQ128,
23983 IX86_BUILTIN_PSRAW128,
23984 IX86_BUILTIN_PSRAD128,
23985 IX86_BUILTIN_PSRLW128,
23986 IX86_BUILTIN_PSRLD128,
23987 IX86_BUILTIN_PSRLQ128,
23988
23989 IX86_BUILTIN_PUNPCKHBW128,
23990 IX86_BUILTIN_PUNPCKHWD128,
23991 IX86_BUILTIN_PUNPCKHDQ128,
23992 IX86_BUILTIN_PUNPCKHQDQ128,
23993 IX86_BUILTIN_PUNPCKLBW128,
23994 IX86_BUILTIN_PUNPCKLWD128,
23995 IX86_BUILTIN_PUNPCKLDQ128,
23996 IX86_BUILTIN_PUNPCKLQDQ128,
23997
23998 IX86_BUILTIN_CLFLUSH,
23999 IX86_BUILTIN_MFENCE,
24000 IX86_BUILTIN_LFENCE,
24001 IX86_BUILTIN_PAUSE,
24002
24003 IX86_BUILTIN_BSRSI,
24004 IX86_BUILTIN_BSRDI,
24005 IX86_BUILTIN_RDPMC,
24006 IX86_BUILTIN_RDTSC,
24007 IX86_BUILTIN_RDTSCP,
24008 IX86_BUILTIN_ROLQI,
24009 IX86_BUILTIN_ROLHI,
24010 IX86_BUILTIN_RORQI,
24011 IX86_BUILTIN_RORHI,
24012
24013 /* SSE3. */
24014 IX86_BUILTIN_ADDSUBPS,
24015 IX86_BUILTIN_HADDPS,
24016 IX86_BUILTIN_HSUBPS,
24017 IX86_BUILTIN_MOVSHDUP,
24018 IX86_BUILTIN_MOVSLDUP,
24019 IX86_BUILTIN_ADDSUBPD,
24020 IX86_BUILTIN_HADDPD,
24021 IX86_BUILTIN_HSUBPD,
24022 IX86_BUILTIN_LDDQU,
24023
24024 IX86_BUILTIN_MONITOR,
24025 IX86_BUILTIN_MWAIT,
24026
24027 /* SSSE3. */
24028 IX86_BUILTIN_PHADDW,
24029 IX86_BUILTIN_PHADDD,
24030 IX86_BUILTIN_PHADDSW,
24031 IX86_BUILTIN_PHSUBW,
24032 IX86_BUILTIN_PHSUBD,
24033 IX86_BUILTIN_PHSUBSW,
24034 IX86_BUILTIN_PMADDUBSW,
24035 IX86_BUILTIN_PMULHRSW,
24036 IX86_BUILTIN_PSHUFB,
24037 IX86_BUILTIN_PSIGNB,
24038 IX86_BUILTIN_PSIGNW,
24039 IX86_BUILTIN_PSIGND,
24040 IX86_BUILTIN_PALIGNR,
24041 IX86_BUILTIN_PABSB,
24042 IX86_BUILTIN_PABSW,
24043 IX86_BUILTIN_PABSD,
24044
24045 IX86_BUILTIN_PHADDW128,
24046 IX86_BUILTIN_PHADDD128,
24047 IX86_BUILTIN_PHADDSW128,
24048 IX86_BUILTIN_PHSUBW128,
24049 IX86_BUILTIN_PHSUBD128,
24050 IX86_BUILTIN_PHSUBSW128,
24051 IX86_BUILTIN_PMADDUBSW128,
24052 IX86_BUILTIN_PMULHRSW128,
24053 IX86_BUILTIN_PSHUFB128,
24054 IX86_BUILTIN_PSIGNB128,
24055 IX86_BUILTIN_PSIGNW128,
24056 IX86_BUILTIN_PSIGND128,
24057 IX86_BUILTIN_PALIGNR128,
24058 IX86_BUILTIN_PABSB128,
24059 IX86_BUILTIN_PABSW128,
24060 IX86_BUILTIN_PABSD128,
24061
24062 /* AMDFAM10 - SSE4A New Instructions. */
24063 IX86_BUILTIN_MOVNTSD,
24064 IX86_BUILTIN_MOVNTSS,
24065 IX86_BUILTIN_EXTRQI,
24066 IX86_BUILTIN_EXTRQ,
24067 IX86_BUILTIN_INSERTQI,
24068 IX86_BUILTIN_INSERTQ,
24069
24070 /* SSE4.1. */
24071 IX86_BUILTIN_BLENDPD,
24072 IX86_BUILTIN_BLENDPS,
24073 IX86_BUILTIN_BLENDVPD,
24074 IX86_BUILTIN_BLENDVPS,
24075 IX86_BUILTIN_PBLENDVB128,
24076 IX86_BUILTIN_PBLENDW128,
24077
24078 IX86_BUILTIN_DPPD,
24079 IX86_BUILTIN_DPPS,
24080
24081 IX86_BUILTIN_INSERTPS128,
24082
24083 IX86_BUILTIN_MOVNTDQA,
24084 IX86_BUILTIN_MPSADBW128,
24085 IX86_BUILTIN_PACKUSDW128,
24086 IX86_BUILTIN_PCMPEQQ,
24087 IX86_BUILTIN_PHMINPOSUW128,
24088
24089 IX86_BUILTIN_PMAXSB128,
24090 IX86_BUILTIN_PMAXSD128,
24091 IX86_BUILTIN_PMAXUD128,
24092 IX86_BUILTIN_PMAXUW128,
24093
24094 IX86_BUILTIN_PMINSB128,
24095 IX86_BUILTIN_PMINSD128,
24096 IX86_BUILTIN_PMINUD128,
24097 IX86_BUILTIN_PMINUW128,
24098
24099 IX86_BUILTIN_PMOVSXBW128,
24100 IX86_BUILTIN_PMOVSXBD128,
24101 IX86_BUILTIN_PMOVSXBQ128,
24102 IX86_BUILTIN_PMOVSXWD128,
24103 IX86_BUILTIN_PMOVSXWQ128,
24104 IX86_BUILTIN_PMOVSXDQ128,
24105
24106 IX86_BUILTIN_PMOVZXBW128,
24107 IX86_BUILTIN_PMOVZXBD128,
24108 IX86_BUILTIN_PMOVZXBQ128,
24109 IX86_BUILTIN_PMOVZXWD128,
24110 IX86_BUILTIN_PMOVZXWQ128,
24111 IX86_BUILTIN_PMOVZXDQ128,
24112
24113 IX86_BUILTIN_PMULDQ128,
24114 IX86_BUILTIN_PMULLD128,
24115
24116 IX86_BUILTIN_ROUNDPD,
24117 IX86_BUILTIN_ROUNDPS,
24118 IX86_BUILTIN_ROUNDSD,
24119 IX86_BUILTIN_ROUNDSS,
24120
24121 IX86_BUILTIN_FLOORPD,
24122 IX86_BUILTIN_CEILPD,
24123 IX86_BUILTIN_TRUNCPD,
24124 IX86_BUILTIN_RINTPD,
24125 IX86_BUILTIN_ROUNDPD_AZ,
24126 IX86_BUILTIN_FLOORPS,
24127 IX86_BUILTIN_CEILPS,
24128 IX86_BUILTIN_TRUNCPS,
24129 IX86_BUILTIN_RINTPS,
24130 IX86_BUILTIN_ROUNDPS_AZ,
24131
24132 IX86_BUILTIN_PTESTZ,
24133 IX86_BUILTIN_PTESTC,
24134 IX86_BUILTIN_PTESTNZC,
24135
24136 IX86_BUILTIN_VEC_INIT_V2SI,
24137 IX86_BUILTIN_VEC_INIT_V4HI,
24138 IX86_BUILTIN_VEC_INIT_V8QI,
24139 IX86_BUILTIN_VEC_EXT_V2DF,
24140 IX86_BUILTIN_VEC_EXT_V2DI,
24141 IX86_BUILTIN_VEC_EXT_V4SF,
24142 IX86_BUILTIN_VEC_EXT_V4SI,
24143 IX86_BUILTIN_VEC_EXT_V8HI,
24144 IX86_BUILTIN_VEC_EXT_V2SI,
24145 IX86_BUILTIN_VEC_EXT_V4HI,
24146 IX86_BUILTIN_VEC_EXT_V16QI,
24147 IX86_BUILTIN_VEC_SET_V2DI,
24148 IX86_BUILTIN_VEC_SET_V4SF,
24149 IX86_BUILTIN_VEC_SET_V4SI,
24150 IX86_BUILTIN_VEC_SET_V8HI,
24151 IX86_BUILTIN_VEC_SET_V4HI,
24152 IX86_BUILTIN_VEC_SET_V16QI,
24153
24154 IX86_BUILTIN_VEC_PACK_SFIX,
24155
24156 /* SSE4.2. */
24157 IX86_BUILTIN_CRC32QI,
24158 IX86_BUILTIN_CRC32HI,
24159 IX86_BUILTIN_CRC32SI,
24160 IX86_BUILTIN_CRC32DI,
24161
24162 IX86_BUILTIN_PCMPESTRI128,
24163 IX86_BUILTIN_PCMPESTRM128,
24164 IX86_BUILTIN_PCMPESTRA128,
24165 IX86_BUILTIN_PCMPESTRC128,
24166 IX86_BUILTIN_PCMPESTRO128,
24167 IX86_BUILTIN_PCMPESTRS128,
24168 IX86_BUILTIN_PCMPESTRZ128,
24169 IX86_BUILTIN_PCMPISTRI128,
24170 IX86_BUILTIN_PCMPISTRM128,
24171 IX86_BUILTIN_PCMPISTRA128,
24172 IX86_BUILTIN_PCMPISTRC128,
24173 IX86_BUILTIN_PCMPISTRO128,
24174 IX86_BUILTIN_PCMPISTRS128,
24175 IX86_BUILTIN_PCMPISTRZ128,
24176
24177 IX86_BUILTIN_PCMPGTQ,
24178
24179 /* AES instructions */
24180 IX86_BUILTIN_AESENC128,
24181 IX86_BUILTIN_AESENCLAST128,
24182 IX86_BUILTIN_AESDEC128,
24183 IX86_BUILTIN_AESDECLAST128,
24184 IX86_BUILTIN_AESIMC128,
24185 IX86_BUILTIN_AESKEYGENASSIST128,
24186
24187 /* PCLMUL instruction */
24188 IX86_BUILTIN_PCLMULQDQ128,
24189
24190 /* AVX */
24191 IX86_BUILTIN_ADDPD256,
24192 IX86_BUILTIN_ADDPS256,
24193 IX86_BUILTIN_ADDSUBPD256,
24194 IX86_BUILTIN_ADDSUBPS256,
24195 IX86_BUILTIN_ANDPD256,
24196 IX86_BUILTIN_ANDPS256,
24197 IX86_BUILTIN_ANDNPD256,
24198 IX86_BUILTIN_ANDNPS256,
24199 IX86_BUILTIN_BLENDPD256,
24200 IX86_BUILTIN_BLENDPS256,
24201 IX86_BUILTIN_BLENDVPD256,
24202 IX86_BUILTIN_BLENDVPS256,
24203 IX86_BUILTIN_DIVPD256,
24204 IX86_BUILTIN_DIVPS256,
24205 IX86_BUILTIN_DPPS256,
24206 IX86_BUILTIN_HADDPD256,
24207 IX86_BUILTIN_HADDPS256,
24208 IX86_BUILTIN_HSUBPD256,
24209 IX86_BUILTIN_HSUBPS256,
24210 IX86_BUILTIN_MAXPD256,
24211 IX86_BUILTIN_MAXPS256,
24212 IX86_BUILTIN_MINPD256,
24213 IX86_BUILTIN_MINPS256,
24214 IX86_BUILTIN_MULPD256,
24215 IX86_BUILTIN_MULPS256,
24216 IX86_BUILTIN_ORPD256,
24217 IX86_BUILTIN_ORPS256,
24218 IX86_BUILTIN_SHUFPD256,
24219 IX86_BUILTIN_SHUFPS256,
24220 IX86_BUILTIN_SUBPD256,
24221 IX86_BUILTIN_SUBPS256,
24222 IX86_BUILTIN_XORPD256,
24223 IX86_BUILTIN_XORPS256,
24224 IX86_BUILTIN_CMPSD,
24225 IX86_BUILTIN_CMPSS,
24226 IX86_BUILTIN_CMPPD,
24227 IX86_BUILTIN_CMPPS,
24228 IX86_BUILTIN_CMPPD256,
24229 IX86_BUILTIN_CMPPS256,
24230 IX86_BUILTIN_CVTDQ2PD256,
24231 IX86_BUILTIN_CVTDQ2PS256,
24232 IX86_BUILTIN_CVTPD2PS256,
24233 IX86_BUILTIN_CVTPS2DQ256,
24234 IX86_BUILTIN_CVTPS2PD256,
24235 IX86_BUILTIN_CVTTPD2DQ256,
24236 IX86_BUILTIN_CVTPD2DQ256,
24237 IX86_BUILTIN_CVTTPS2DQ256,
24238 IX86_BUILTIN_EXTRACTF128PD256,
24239 IX86_BUILTIN_EXTRACTF128PS256,
24240 IX86_BUILTIN_EXTRACTF128SI256,
24241 IX86_BUILTIN_VZEROALL,
24242 IX86_BUILTIN_VZEROUPPER,
24243 IX86_BUILTIN_VPERMILVARPD,
24244 IX86_BUILTIN_VPERMILVARPS,
24245 IX86_BUILTIN_VPERMILVARPD256,
24246 IX86_BUILTIN_VPERMILVARPS256,
24247 IX86_BUILTIN_VPERMILPD,
24248 IX86_BUILTIN_VPERMILPS,
24249 IX86_BUILTIN_VPERMILPD256,
24250 IX86_BUILTIN_VPERMILPS256,
24251 IX86_BUILTIN_VPERMIL2PD,
24252 IX86_BUILTIN_VPERMIL2PS,
24253 IX86_BUILTIN_VPERMIL2PD256,
24254 IX86_BUILTIN_VPERMIL2PS256,
24255 IX86_BUILTIN_VPERM2F128PD256,
24256 IX86_BUILTIN_VPERM2F128PS256,
24257 IX86_BUILTIN_VPERM2F128SI256,
24258 IX86_BUILTIN_VBROADCASTSS,
24259 IX86_BUILTIN_VBROADCASTSD256,
24260 IX86_BUILTIN_VBROADCASTSS256,
24261 IX86_BUILTIN_VBROADCASTPD256,
24262 IX86_BUILTIN_VBROADCASTPS256,
24263 IX86_BUILTIN_VINSERTF128PD256,
24264 IX86_BUILTIN_VINSERTF128PS256,
24265 IX86_BUILTIN_VINSERTF128SI256,
24266 IX86_BUILTIN_LOADUPD256,
24267 IX86_BUILTIN_LOADUPS256,
24268 IX86_BUILTIN_STOREUPD256,
24269 IX86_BUILTIN_STOREUPS256,
24270 IX86_BUILTIN_LDDQU256,
24271 IX86_BUILTIN_MOVNTDQ256,
24272 IX86_BUILTIN_MOVNTPD256,
24273 IX86_BUILTIN_MOVNTPS256,
24274 IX86_BUILTIN_LOADDQU256,
24275 IX86_BUILTIN_STOREDQU256,
24276 IX86_BUILTIN_MASKLOADPD,
24277 IX86_BUILTIN_MASKLOADPS,
24278 IX86_BUILTIN_MASKSTOREPD,
24279 IX86_BUILTIN_MASKSTOREPS,
24280 IX86_BUILTIN_MASKLOADPD256,
24281 IX86_BUILTIN_MASKLOADPS256,
24282 IX86_BUILTIN_MASKSTOREPD256,
24283 IX86_BUILTIN_MASKSTOREPS256,
24284 IX86_BUILTIN_MOVSHDUP256,
24285 IX86_BUILTIN_MOVSLDUP256,
24286 IX86_BUILTIN_MOVDDUP256,
24287
24288 IX86_BUILTIN_SQRTPD256,
24289 IX86_BUILTIN_SQRTPS256,
24290 IX86_BUILTIN_SQRTPS_NR256,
24291 IX86_BUILTIN_RSQRTPS256,
24292 IX86_BUILTIN_RSQRTPS_NR256,
24293
24294 IX86_BUILTIN_RCPPS256,
24295
24296 IX86_BUILTIN_ROUNDPD256,
24297 IX86_BUILTIN_ROUNDPS256,
24298
24299 IX86_BUILTIN_FLOORPD256,
24300 IX86_BUILTIN_CEILPD256,
24301 IX86_BUILTIN_TRUNCPD256,
24302 IX86_BUILTIN_RINTPD256,
24303 IX86_BUILTIN_ROUNDPD_AZ256,
24304 IX86_BUILTIN_FLOORPS256,
24305 IX86_BUILTIN_CEILPS256,
24306 IX86_BUILTIN_TRUNCPS256,
24307 IX86_BUILTIN_RINTPS256,
24308 IX86_BUILTIN_ROUNDPS_AZ256,
24309
24310 IX86_BUILTIN_UNPCKHPD256,
24311 IX86_BUILTIN_UNPCKLPD256,
24312 IX86_BUILTIN_UNPCKHPS256,
24313 IX86_BUILTIN_UNPCKLPS256,
24314
24315 IX86_BUILTIN_SI256_SI,
24316 IX86_BUILTIN_PS256_PS,
24317 IX86_BUILTIN_PD256_PD,
24318 IX86_BUILTIN_SI_SI256,
24319 IX86_BUILTIN_PS_PS256,
24320 IX86_BUILTIN_PD_PD256,
24321
24322 IX86_BUILTIN_VTESTZPD,
24323 IX86_BUILTIN_VTESTCPD,
24324 IX86_BUILTIN_VTESTNZCPD,
24325 IX86_BUILTIN_VTESTZPS,
24326 IX86_BUILTIN_VTESTCPS,
24327 IX86_BUILTIN_VTESTNZCPS,
24328 IX86_BUILTIN_VTESTZPD256,
24329 IX86_BUILTIN_VTESTCPD256,
24330 IX86_BUILTIN_VTESTNZCPD256,
24331 IX86_BUILTIN_VTESTZPS256,
24332 IX86_BUILTIN_VTESTCPS256,
24333 IX86_BUILTIN_VTESTNZCPS256,
24334 IX86_BUILTIN_PTESTZ256,
24335 IX86_BUILTIN_PTESTC256,
24336 IX86_BUILTIN_PTESTNZC256,
24337
24338 IX86_BUILTIN_MOVMSKPD256,
24339 IX86_BUILTIN_MOVMSKPS256,
24340
24341 /* AVX2 */
24342 IX86_BUILTIN_MPSADBW256,
24343 IX86_BUILTIN_PABSB256,
24344 IX86_BUILTIN_PABSW256,
24345 IX86_BUILTIN_PABSD256,
24346 IX86_BUILTIN_PACKSSDW256,
24347 IX86_BUILTIN_PACKSSWB256,
24348 IX86_BUILTIN_PACKUSDW256,
24349 IX86_BUILTIN_PACKUSWB256,
24350 IX86_BUILTIN_PADDB256,
24351 IX86_BUILTIN_PADDW256,
24352 IX86_BUILTIN_PADDD256,
24353 IX86_BUILTIN_PADDQ256,
24354 IX86_BUILTIN_PADDSB256,
24355 IX86_BUILTIN_PADDSW256,
24356 IX86_BUILTIN_PADDUSB256,
24357 IX86_BUILTIN_PADDUSW256,
24358 IX86_BUILTIN_PALIGNR256,
24359 IX86_BUILTIN_AND256I,
24360 IX86_BUILTIN_ANDNOT256I,
24361 IX86_BUILTIN_PAVGB256,
24362 IX86_BUILTIN_PAVGW256,
24363 IX86_BUILTIN_PBLENDVB256,
24364 IX86_BUILTIN_PBLENDVW256,
24365 IX86_BUILTIN_PCMPEQB256,
24366 IX86_BUILTIN_PCMPEQW256,
24367 IX86_BUILTIN_PCMPEQD256,
24368 IX86_BUILTIN_PCMPEQQ256,
24369 IX86_BUILTIN_PCMPGTB256,
24370 IX86_BUILTIN_PCMPGTW256,
24371 IX86_BUILTIN_PCMPGTD256,
24372 IX86_BUILTIN_PCMPGTQ256,
24373 IX86_BUILTIN_PHADDW256,
24374 IX86_BUILTIN_PHADDD256,
24375 IX86_BUILTIN_PHADDSW256,
24376 IX86_BUILTIN_PHSUBW256,
24377 IX86_BUILTIN_PHSUBD256,
24378 IX86_BUILTIN_PHSUBSW256,
24379 IX86_BUILTIN_PMADDUBSW256,
24380 IX86_BUILTIN_PMADDWD256,
24381 IX86_BUILTIN_PMAXSB256,
24382 IX86_BUILTIN_PMAXSW256,
24383 IX86_BUILTIN_PMAXSD256,
24384 IX86_BUILTIN_PMAXUB256,
24385 IX86_BUILTIN_PMAXUW256,
24386 IX86_BUILTIN_PMAXUD256,
24387 IX86_BUILTIN_PMINSB256,
24388 IX86_BUILTIN_PMINSW256,
24389 IX86_BUILTIN_PMINSD256,
24390 IX86_BUILTIN_PMINUB256,
24391 IX86_BUILTIN_PMINUW256,
24392 IX86_BUILTIN_PMINUD256,
24393 IX86_BUILTIN_PMOVMSKB256,
24394 IX86_BUILTIN_PMOVSXBW256,
24395 IX86_BUILTIN_PMOVSXBD256,
24396 IX86_BUILTIN_PMOVSXBQ256,
24397 IX86_BUILTIN_PMOVSXWD256,
24398 IX86_BUILTIN_PMOVSXWQ256,
24399 IX86_BUILTIN_PMOVSXDQ256,
24400 IX86_BUILTIN_PMOVZXBW256,
24401 IX86_BUILTIN_PMOVZXBD256,
24402 IX86_BUILTIN_PMOVZXBQ256,
24403 IX86_BUILTIN_PMOVZXWD256,
24404 IX86_BUILTIN_PMOVZXWQ256,
24405 IX86_BUILTIN_PMOVZXDQ256,
24406 IX86_BUILTIN_PMULDQ256,
24407 IX86_BUILTIN_PMULHRSW256,
24408 IX86_BUILTIN_PMULHUW256,
24409 IX86_BUILTIN_PMULHW256,
24410 IX86_BUILTIN_PMULLW256,
24411 IX86_BUILTIN_PMULLD256,
24412 IX86_BUILTIN_PMULUDQ256,
24413 IX86_BUILTIN_POR256,
24414 IX86_BUILTIN_PSADBW256,
24415 IX86_BUILTIN_PSHUFB256,
24416 IX86_BUILTIN_PSHUFD256,
24417 IX86_BUILTIN_PSHUFHW256,
24418 IX86_BUILTIN_PSHUFLW256,
24419 IX86_BUILTIN_PSIGNB256,
24420 IX86_BUILTIN_PSIGNW256,
24421 IX86_BUILTIN_PSIGND256,
24422 IX86_BUILTIN_PSLLDQI256,
24423 IX86_BUILTIN_PSLLWI256,
24424 IX86_BUILTIN_PSLLW256,
24425 IX86_BUILTIN_PSLLDI256,
24426 IX86_BUILTIN_PSLLD256,
24427 IX86_BUILTIN_PSLLQI256,
24428 IX86_BUILTIN_PSLLQ256,
24429 IX86_BUILTIN_PSRAWI256,
24430 IX86_BUILTIN_PSRAW256,
24431 IX86_BUILTIN_PSRADI256,
24432 IX86_BUILTIN_PSRAD256,
24433 IX86_BUILTIN_PSRLDQI256,
24434 IX86_BUILTIN_PSRLWI256,
24435 IX86_BUILTIN_PSRLW256,
24436 IX86_BUILTIN_PSRLDI256,
24437 IX86_BUILTIN_PSRLD256,
24438 IX86_BUILTIN_PSRLQI256,
24439 IX86_BUILTIN_PSRLQ256,
24440 IX86_BUILTIN_PSUBB256,
24441 IX86_BUILTIN_PSUBW256,
24442 IX86_BUILTIN_PSUBD256,
24443 IX86_BUILTIN_PSUBQ256,
24444 IX86_BUILTIN_PSUBSB256,
24445 IX86_BUILTIN_PSUBSW256,
24446 IX86_BUILTIN_PSUBUSB256,
24447 IX86_BUILTIN_PSUBUSW256,
24448 IX86_BUILTIN_PUNPCKHBW256,
24449 IX86_BUILTIN_PUNPCKHWD256,
24450 IX86_BUILTIN_PUNPCKHDQ256,
24451 IX86_BUILTIN_PUNPCKHQDQ256,
24452 IX86_BUILTIN_PUNPCKLBW256,
24453 IX86_BUILTIN_PUNPCKLWD256,
24454 IX86_BUILTIN_PUNPCKLDQ256,
24455 IX86_BUILTIN_PUNPCKLQDQ256,
24456 IX86_BUILTIN_PXOR256,
24457 IX86_BUILTIN_MOVNTDQA256,
24458 IX86_BUILTIN_VBROADCASTSS_PS,
24459 IX86_BUILTIN_VBROADCASTSS_PS256,
24460 IX86_BUILTIN_VBROADCASTSD_PD256,
24461 IX86_BUILTIN_VBROADCASTSI256,
24462 IX86_BUILTIN_PBLENDD256,
24463 IX86_BUILTIN_PBLENDD128,
24464 IX86_BUILTIN_PBROADCASTB256,
24465 IX86_BUILTIN_PBROADCASTW256,
24466 IX86_BUILTIN_PBROADCASTD256,
24467 IX86_BUILTIN_PBROADCASTQ256,
24468 IX86_BUILTIN_PBROADCASTB128,
24469 IX86_BUILTIN_PBROADCASTW128,
24470 IX86_BUILTIN_PBROADCASTD128,
24471 IX86_BUILTIN_PBROADCASTQ128,
24472 IX86_BUILTIN_VPERMVARSI256,
24473 IX86_BUILTIN_VPERMDF256,
24474 IX86_BUILTIN_VPERMVARSF256,
24475 IX86_BUILTIN_VPERMDI256,
24476 IX86_BUILTIN_VPERMTI256,
24477 IX86_BUILTIN_VEXTRACT128I256,
24478 IX86_BUILTIN_VINSERT128I256,
24479 IX86_BUILTIN_MASKLOADD,
24480 IX86_BUILTIN_MASKLOADQ,
24481 IX86_BUILTIN_MASKLOADD256,
24482 IX86_BUILTIN_MASKLOADQ256,
24483 IX86_BUILTIN_MASKSTORED,
24484 IX86_BUILTIN_MASKSTOREQ,
24485 IX86_BUILTIN_MASKSTORED256,
24486 IX86_BUILTIN_MASKSTOREQ256,
24487 IX86_BUILTIN_PSLLVV4DI,
24488 IX86_BUILTIN_PSLLVV2DI,
24489 IX86_BUILTIN_PSLLVV8SI,
24490 IX86_BUILTIN_PSLLVV4SI,
24491 IX86_BUILTIN_PSRAVV8SI,
24492 IX86_BUILTIN_PSRAVV4SI,
24493 IX86_BUILTIN_PSRLVV4DI,
24494 IX86_BUILTIN_PSRLVV2DI,
24495 IX86_BUILTIN_PSRLVV8SI,
24496 IX86_BUILTIN_PSRLVV4SI,
24497
24498 IX86_BUILTIN_GATHERSIV2DF,
24499 IX86_BUILTIN_GATHERSIV4DF,
24500 IX86_BUILTIN_GATHERDIV2DF,
24501 IX86_BUILTIN_GATHERDIV4DF,
24502 IX86_BUILTIN_GATHERSIV4SF,
24503 IX86_BUILTIN_GATHERSIV8SF,
24504 IX86_BUILTIN_GATHERDIV4SF,
24505 IX86_BUILTIN_GATHERDIV8SF,
24506 IX86_BUILTIN_GATHERSIV2DI,
24507 IX86_BUILTIN_GATHERSIV4DI,
24508 IX86_BUILTIN_GATHERDIV2DI,
24509 IX86_BUILTIN_GATHERDIV4DI,
24510 IX86_BUILTIN_GATHERSIV4SI,
24511 IX86_BUILTIN_GATHERSIV8SI,
24512 IX86_BUILTIN_GATHERDIV4SI,
24513 IX86_BUILTIN_GATHERDIV8SI,
24514
24515 /* TFmode support builtins. */
24516 IX86_BUILTIN_INFQ,
24517 IX86_BUILTIN_HUGE_VALQ,
24518 IX86_BUILTIN_FABSQ,
24519 IX86_BUILTIN_COPYSIGNQ,
24520
24521 /* Vectorizer support builtins. */
24522 IX86_BUILTIN_CPYSGNPS,
24523 IX86_BUILTIN_CPYSGNPD,
24524 IX86_BUILTIN_CPYSGNPS256,
24525 IX86_BUILTIN_CPYSGNPD256,
24526
24527 IX86_BUILTIN_CVTUDQ2PS,
24528
24529 IX86_BUILTIN_VEC_PERM_V2DF,
24530 IX86_BUILTIN_VEC_PERM_V4SF,
24531 IX86_BUILTIN_VEC_PERM_V2DI,
24532 IX86_BUILTIN_VEC_PERM_V4SI,
24533 IX86_BUILTIN_VEC_PERM_V8HI,
24534 IX86_BUILTIN_VEC_PERM_V16QI,
24535 IX86_BUILTIN_VEC_PERM_V2DI_U,
24536 IX86_BUILTIN_VEC_PERM_V4SI_U,
24537 IX86_BUILTIN_VEC_PERM_V8HI_U,
24538 IX86_BUILTIN_VEC_PERM_V16QI_U,
24539 IX86_BUILTIN_VEC_PERM_V4DF,
24540 IX86_BUILTIN_VEC_PERM_V8SF,
24541
24542 /* FMA4 instructions. */
24543 IX86_BUILTIN_VFMADDSS,
24544 IX86_BUILTIN_VFMADDSD,
24545 IX86_BUILTIN_VFMADDPS,
24546 IX86_BUILTIN_VFMADDPD,
24547 IX86_BUILTIN_VFMADDPS256,
24548 IX86_BUILTIN_VFMADDPD256,
24549 IX86_BUILTIN_VFMADDSUBPS,
24550 IX86_BUILTIN_VFMADDSUBPD,
24551 IX86_BUILTIN_VFMADDSUBPS256,
24552 IX86_BUILTIN_VFMADDSUBPD256,
24553
24554 /* FMA3 instructions. */
24555 IX86_BUILTIN_VFMADDSS3,
24556 IX86_BUILTIN_VFMADDSD3,
24557
24558 /* XOP instructions. */
24559 IX86_BUILTIN_VPCMOV,
24560 IX86_BUILTIN_VPCMOV_V2DI,
24561 IX86_BUILTIN_VPCMOV_V4SI,
24562 IX86_BUILTIN_VPCMOV_V8HI,
24563 IX86_BUILTIN_VPCMOV_V16QI,
24564 IX86_BUILTIN_VPCMOV_V4SF,
24565 IX86_BUILTIN_VPCMOV_V2DF,
24566 IX86_BUILTIN_VPCMOV256,
24567 IX86_BUILTIN_VPCMOV_V4DI256,
24568 IX86_BUILTIN_VPCMOV_V8SI256,
24569 IX86_BUILTIN_VPCMOV_V16HI256,
24570 IX86_BUILTIN_VPCMOV_V32QI256,
24571 IX86_BUILTIN_VPCMOV_V8SF256,
24572 IX86_BUILTIN_VPCMOV_V4DF256,
24573
24574 IX86_BUILTIN_VPPERM,
24575
24576 IX86_BUILTIN_VPMACSSWW,
24577 IX86_BUILTIN_VPMACSWW,
24578 IX86_BUILTIN_VPMACSSWD,
24579 IX86_BUILTIN_VPMACSWD,
24580 IX86_BUILTIN_VPMACSSDD,
24581 IX86_BUILTIN_VPMACSDD,
24582 IX86_BUILTIN_VPMACSSDQL,
24583 IX86_BUILTIN_VPMACSSDQH,
24584 IX86_BUILTIN_VPMACSDQL,
24585 IX86_BUILTIN_VPMACSDQH,
24586 IX86_BUILTIN_VPMADCSSWD,
24587 IX86_BUILTIN_VPMADCSWD,
24588
24589 IX86_BUILTIN_VPHADDBW,
24590 IX86_BUILTIN_VPHADDBD,
24591 IX86_BUILTIN_VPHADDBQ,
24592 IX86_BUILTIN_VPHADDWD,
24593 IX86_BUILTIN_VPHADDWQ,
24594 IX86_BUILTIN_VPHADDDQ,
24595 IX86_BUILTIN_VPHADDUBW,
24596 IX86_BUILTIN_VPHADDUBD,
24597 IX86_BUILTIN_VPHADDUBQ,
24598 IX86_BUILTIN_VPHADDUWD,
24599 IX86_BUILTIN_VPHADDUWQ,
24600 IX86_BUILTIN_VPHADDUDQ,
24601 IX86_BUILTIN_VPHSUBBW,
24602 IX86_BUILTIN_VPHSUBWD,
24603 IX86_BUILTIN_VPHSUBDQ,
24604
24605 IX86_BUILTIN_VPROTB,
24606 IX86_BUILTIN_VPROTW,
24607 IX86_BUILTIN_VPROTD,
24608 IX86_BUILTIN_VPROTQ,
24609 IX86_BUILTIN_VPROTB_IMM,
24610 IX86_BUILTIN_VPROTW_IMM,
24611 IX86_BUILTIN_VPROTD_IMM,
24612 IX86_BUILTIN_VPROTQ_IMM,
24613
24614 IX86_BUILTIN_VPSHLB,
24615 IX86_BUILTIN_VPSHLW,
24616 IX86_BUILTIN_VPSHLD,
24617 IX86_BUILTIN_VPSHLQ,
24618 IX86_BUILTIN_VPSHAB,
24619 IX86_BUILTIN_VPSHAW,
24620 IX86_BUILTIN_VPSHAD,
24621 IX86_BUILTIN_VPSHAQ,
24622
24623 IX86_BUILTIN_VFRCZSS,
24624 IX86_BUILTIN_VFRCZSD,
24625 IX86_BUILTIN_VFRCZPS,
24626 IX86_BUILTIN_VFRCZPD,
24627 IX86_BUILTIN_VFRCZPS256,
24628 IX86_BUILTIN_VFRCZPD256,
24629
24630 IX86_BUILTIN_VPCOMEQUB,
24631 IX86_BUILTIN_VPCOMNEUB,
24632 IX86_BUILTIN_VPCOMLTUB,
24633 IX86_BUILTIN_VPCOMLEUB,
24634 IX86_BUILTIN_VPCOMGTUB,
24635 IX86_BUILTIN_VPCOMGEUB,
24636 IX86_BUILTIN_VPCOMFALSEUB,
24637 IX86_BUILTIN_VPCOMTRUEUB,
24638
24639 IX86_BUILTIN_VPCOMEQUW,
24640 IX86_BUILTIN_VPCOMNEUW,
24641 IX86_BUILTIN_VPCOMLTUW,
24642 IX86_BUILTIN_VPCOMLEUW,
24643 IX86_BUILTIN_VPCOMGTUW,
24644 IX86_BUILTIN_VPCOMGEUW,
24645 IX86_BUILTIN_VPCOMFALSEUW,
24646 IX86_BUILTIN_VPCOMTRUEUW,
24647
24648 IX86_BUILTIN_VPCOMEQUD,
24649 IX86_BUILTIN_VPCOMNEUD,
24650 IX86_BUILTIN_VPCOMLTUD,
24651 IX86_BUILTIN_VPCOMLEUD,
24652 IX86_BUILTIN_VPCOMGTUD,
24653 IX86_BUILTIN_VPCOMGEUD,
24654 IX86_BUILTIN_VPCOMFALSEUD,
24655 IX86_BUILTIN_VPCOMTRUEUD,
24656
24657 IX86_BUILTIN_VPCOMEQUQ,
24658 IX86_BUILTIN_VPCOMNEUQ,
24659 IX86_BUILTIN_VPCOMLTUQ,
24660 IX86_BUILTIN_VPCOMLEUQ,
24661 IX86_BUILTIN_VPCOMGTUQ,
24662 IX86_BUILTIN_VPCOMGEUQ,
24663 IX86_BUILTIN_VPCOMFALSEUQ,
24664 IX86_BUILTIN_VPCOMTRUEUQ,
24665
24666 IX86_BUILTIN_VPCOMEQB,
24667 IX86_BUILTIN_VPCOMNEB,
24668 IX86_BUILTIN_VPCOMLTB,
24669 IX86_BUILTIN_VPCOMLEB,
24670 IX86_BUILTIN_VPCOMGTB,
24671 IX86_BUILTIN_VPCOMGEB,
24672 IX86_BUILTIN_VPCOMFALSEB,
24673 IX86_BUILTIN_VPCOMTRUEB,
24674
24675 IX86_BUILTIN_VPCOMEQW,
24676 IX86_BUILTIN_VPCOMNEW,
24677 IX86_BUILTIN_VPCOMLTW,
24678 IX86_BUILTIN_VPCOMLEW,
24679 IX86_BUILTIN_VPCOMGTW,
24680 IX86_BUILTIN_VPCOMGEW,
24681 IX86_BUILTIN_VPCOMFALSEW,
24682 IX86_BUILTIN_VPCOMTRUEW,
24683
24684 IX86_BUILTIN_VPCOMEQD,
24685 IX86_BUILTIN_VPCOMNED,
24686 IX86_BUILTIN_VPCOMLTD,
24687 IX86_BUILTIN_VPCOMLED,
24688 IX86_BUILTIN_VPCOMGTD,
24689 IX86_BUILTIN_VPCOMGED,
24690 IX86_BUILTIN_VPCOMFALSED,
24691 IX86_BUILTIN_VPCOMTRUED,
24692
24693 IX86_BUILTIN_VPCOMEQQ,
24694 IX86_BUILTIN_VPCOMNEQ,
24695 IX86_BUILTIN_VPCOMLTQ,
24696 IX86_BUILTIN_VPCOMLEQ,
24697 IX86_BUILTIN_VPCOMGTQ,
24698 IX86_BUILTIN_VPCOMGEQ,
24699 IX86_BUILTIN_VPCOMFALSEQ,
24700 IX86_BUILTIN_VPCOMTRUEQ,
24701
24702 /* LWP instructions. */
24703 IX86_BUILTIN_LLWPCB,
24704 IX86_BUILTIN_SLWPCB,
24705 IX86_BUILTIN_LWPVAL32,
24706 IX86_BUILTIN_LWPVAL64,
24707 IX86_BUILTIN_LWPINS32,
24708 IX86_BUILTIN_LWPINS64,
24709
24710 IX86_BUILTIN_CLZS,
24711
24712 /* BMI instructions. */
24713 IX86_BUILTIN_BEXTR32,
24714 IX86_BUILTIN_BEXTR64,
24715 IX86_BUILTIN_CTZS,
24716
24717 /* TBM instructions. */
24718 IX86_BUILTIN_BEXTRI32,
24719 IX86_BUILTIN_BEXTRI64,
24720
24721 /* BMI2 instructions. */
24722 IX86_BUILTIN_BZHI32,
24723 IX86_BUILTIN_BZHI64,
24724 IX86_BUILTIN_PDEP32,
24725 IX86_BUILTIN_PDEP64,
24726 IX86_BUILTIN_PEXT32,
24727 IX86_BUILTIN_PEXT64,
24728
24729 /* FSGSBASE instructions. */
24730 IX86_BUILTIN_RDFSBASE32,
24731 IX86_BUILTIN_RDFSBASE64,
24732 IX86_BUILTIN_RDGSBASE32,
24733 IX86_BUILTIN_RDGSBASE64,
24734 IX86_BUILTIN_WRFSBASE32,
24735 IX86_BUILTIN_WRFSBASE64,
24736 IX86_BUILTIN_WRGSBASE32,
24737 IX86_BUILTIN_WRGSBASE64,
24738
24739 /* RDRND instructions. */
24740 IX86_BUILTIN_RDRAND16_STEP,
24741 IX86_BUILTIN_RDRAND32_STEP,
24742 IX86_BUILTIN_RDRAND64_STEP,
24743
24744 /* F16C instructions. */
24745 IX86_BUILTIN_CVTPH2PS,
24746 IX86_BUILTIN_CVTPH2PS256,
24747 IX86_BUILTIN_CVTPS2PH,
24748 IX86_BUILTIN_CVTPS2PH256,
24749
24750 /* CFString built-in for darwin */
24751 IX86_BUILTIN_CFSTRING,
24752
24753 IX86_BUILTIN_MAX
24754 };
24755
24756 /* Table for the ix86 builtin decls. */
24757 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
24758
24759 /* Table of all of the builtin functions that are possible with different ISA's
24760 but are waiting to be built until a function is declared to use that
24761 ISA. */
24762 struct builtin_isa {
24763 const char *name; /* function name */
24764 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
24765 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
24766 bool const_p; /* true if the declaration is constant */
24767 bool set_and_not_built_p;
24768 };
24769
24770 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
24771
24772
24773 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
24774 of which isa_flags to use in the ix86_builtins_isa array. Stores the
24775 function decl in the ix86_builtins array. Returns the function decl or
24776 NULL_TREE, if the builtin was not added.
24777
24778 If the front end has a special hook for builtin functions, delay adding
24779 builtin functions that aren't in the current ISA until the ISA is changed
24780 with function specific optimization. Doing so, can save about 300K for the
24781 default compiler. When the builtin is expanded, check at that time whether
24782 it is valid.
24783
24784 If the front end doesn't have a special hook, record all builtins, even if
24785 it isn't an instruction set in the current ISA in case the user uses
24786 function specific options for a different ISA, so that we don't get scope
24787 errors if a builtin is added in the middle of a function scope. */
24788
24789 static inline tree
24790 def_builtin (HOST_WIDE_INT mask, const char *name,
24791 enum ix86_builtin_func_type tcode,
24792 enum ix86_builtins code)
24793 {
24794 tree decl = NULL_TREE;
24795
24796 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
24797 {
24798 ix86_builtins_isa[(int) code].isa = mask;
24799
24800 mask &= ~OPTION_MASK_ISA_64BIT;
24801 if (mask == 0
24802 || (mask & ix86_isa_flags) != 0
24803 || (lang_hooks.builtin_function
24804 == lang_hooks.builtin_function_ext_scope))
24805
24806 {
24807 tree type = ix86_get_builtin_func_type (tcode);
24808 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
24809 NULL, NULL_TREE);
24810 ix86_builtins[(int) code] = decl;
24811 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
24812 }
24813 else
24814 {
24815 ix86_builtins[(int) code] = NULL_TREE;
24816 ix86_builtins_isa[(int) code].tcode = tcode;
24817 ix86_builtins_isa[(int) code].name = name;
24818 ix86_builtins_isa[(int) code].const_p = false;
24819 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
24820 }
24821 }
24822
24823 return decl;
24824 }
24825
24826 /* Like def_builtin, but also marks the function decl "const". */
24827
24828 static inline tree
24829 def_builtin_const (HOST_WIDE_INT mask, const char *name,
24830 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
24831 {
24832 tree decl = def_builtin (mask, name, tcode, code);
24833 if (decl)
24834 TREE_READONLY (decl) = 1;
24835 else
24836 ix86_builtins_isa[(int) code].const_p = true;
24837
24838 return decl;
24839 }
24840
24841 /* Add any new builtin functions for a given ISA that may not have been
24842 declared. This saves a bit of space compared to adding all of the
24843 declarations to the tree, even if we didn't use them. */
24844
24845 static void
24846 ix86_add_new_builtins (HOST_WIDE_INT isa)
24847 {
24848 int i;
24849
24850 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
24851 {
24852 if ((ix86_builtins_isa[i].isa & isa) != 0
24853 && ix86_builtins_isa[i].set_and_not_built_p)
24854 {
24855 tree decl, type;
24856
24857 /* Don't define the builtin again. */
24858 ix86_builtins_isa[i].set_and_not_built_p = false;
24859
24860 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
24861 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
24862 type, i, BUILT_IN_MD, NULL,
24863 NULL_TREE);
24864
24865 ix86_builtins[i] = decl;
24866 if (ix86_builtins_isa[i].const_p)
24867 TREE_READONLY (decl) = 1;
24868 }
24869 }
24870 }
24871
24872 /* Bits for builtin_description.flag. */
24873
24874 /* Set when we don't support the comparison natively, and should
24875 swap_comparison in order to support it. */
24876 #define BUILTIN_DESC_SWAP_OPERANDS 1
24877
24878 struct builtin_description
24879 {
24880 const HOST_WIDE_INT mask;
24881 const enum insn_code icode;
24882 const char *const name;
24883 const enum ix86_builtins code;
24884 const enum rtx_code comparison;
24885 const int flag;
24886 };
24887
24888 static const struct builtin_description bdesc_comi[] =
24889 {
24890 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
24891 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
24892 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
24893 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
24894 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
24895 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
24896 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
24897 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
24898 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
24899 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
24900 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
24901 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
24902 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
24903 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
24904 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
24905 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
24906 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
24907 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
24908 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
24909 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
24910 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
24911 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
24912 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
24913 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
24914 };
24915
24916 static const struct builtin_description bdesc_pcmpestr[] =
24917 {
24918 /* SSE4.2 */
24919 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
24920 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
24921 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
24922 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
24923 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
24924 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
24925 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
24926 };
24927
24928 static const struct builtin_description bdesc_pcmpistr[] =
24929 {
24930 /* SSE4.2 */
24931 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
24932 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
24933 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
24934 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
24935 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
24936 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
24937 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
24938 };
24939
24940 /* Special builtins with variable number of arguments. */
24941 static const struct builtin_description bdesc_special_args[] =
24942 {
24943 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtsc, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
24944 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtscp, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
24945 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
24946
24947 /* MMX */
24948 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
24949
24950 /* 3DNow! */
24951 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
24952
24953 /* SSE */
24954 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
24955 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
24956 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
24957
24958 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
24959 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
24960 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
24961 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
24962
24963 /* SSE or 3DNow!A */
24964 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
24965 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
24966
24967 /* SSE2 */
24968 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
24969 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
24970 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
24971 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
24972 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
24973 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
24974 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
24975 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
24976 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
24977
24978 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
24979 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
24980
24981 /* SSE3 */
24982 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
24983
24984 /* SSE4.1 */
24985 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
24986
24987 /* SSE4A */
24988 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
24989 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
24990
24991 /* AVX */
24992 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
24993 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
24994
24995 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
24996 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
24997 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
24998 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
24999 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
25000
25001 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
25002 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
25003 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
25004 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
25005 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
25006 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
25007 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
25008
25009 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
25010 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
25011 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
25012
25013 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
25014 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
25015 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
25016 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
25017 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
25018 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
25019 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
25020 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
25021
25022 /* AVX2 */
25023 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_movntdqa, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256, UNKNOWN, (int) V4DI_FTYPE_PV4DI },
25024 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI },
25025 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI },
25026 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd256, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI },
25027 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq256, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI },
25028 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_V4SI },
25029 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_V2DI },
25030 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI },
25031 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI },
25032
25033 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
25034 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
25035 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
25036 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
25037 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
25038 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
25039
25040 /* FSGSBASE */
25041 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
25042 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
25043 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
25044 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
25045 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
25046 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
25047 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
25048 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
25049 };
25050
25051 /* Builtins with variable number of arguments. */
25052 static const struct builtin_description bdesc_args[] =
25053 {
25054 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
25055 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
25056 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdpmc, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
25057 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
25058 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
25059 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
25060 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
25061
25062 /* MMX */
25063 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25064 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25065 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25066 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25067 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25068 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25069
25070 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25071 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25072 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25073 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25074 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25075 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25076 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25077 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25078
25079 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25080 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25081
25082 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25083 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25084 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25085 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25086
25087 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25088 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25089 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25090 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25091 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25092 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25093
25094 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25095 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25096 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25097 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25098 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
25099 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
25100
25101 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
25102 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
25103 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
25104
25105 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
25106
25107 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
25108 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
25109 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
25110 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
25111 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
25112 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
25113
25114 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
25115 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
25116 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
25117 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
25118 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
25119 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
25120
25121 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
25122 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
25123 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
25124 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
25125
25126 /* 3DNow! */
25127 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
25128 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
25129 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
25130 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
25131
25132 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25133 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
25134 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
25135 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
25136 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
25137 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
25138 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
25139 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
25140 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
25141 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
25142 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
25143 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
25144 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
25145 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
25146 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25147
25148 /* 3DNow!A */
25149 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
25150 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
25151 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
25152 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
25153 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
25154 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
25155
25156 /* SSE */
25157 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
25158 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
25159 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
25160 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
25161 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
25162 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
25163 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
25164 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
25165 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
25166 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
25167 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
25168 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
25169
25170 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
25171
25172 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25173 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25174 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25175 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25176 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25177 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25178 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25179 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25180
25181 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
25182 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
25183 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
25184 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
25185 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
25186 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
25187 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
25188 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
25189 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
25190 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
25191 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
25192 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
25193 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
25194 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
25195 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
25196 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
25197 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
25198 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
25199 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
25200 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
25201 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
25202 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
25203
25204 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25205 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25206 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25207 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25208
25209 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25210 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25211 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25212 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25213
25214 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25215
25216 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25217 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25218 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25219 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25220 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25221
25222 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
25223 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
25224 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
25225
25226 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
25227
25228 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
25229 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
25230 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
25231
25232 /* SSE MMX or 3Dnow!A */
25233 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25234 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25235 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25236
25237 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25238 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25239 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25240 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25241
25242 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
25243 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
25244
25245 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
25246
25247 /* SSE2 */
25248 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
25249
25250 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2df", IX86_BUILTIN_VEC_PERM_V2DF, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI },
25251 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4sf", IX86_BUILTIN_VEC_PERM_V4SF, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI },
25252 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di", IX86_BUILTIN_VEC_PERM_V2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI },
25253 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si", IX86_BUILTIN_VEC_PERM_V4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
25254 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi", IX86_BUILTIN_VEC_PERM_V8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI },
25255 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi", IX86_BUILTIN_VEC_PERM_V16QI, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
25256 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di_u", IX86_BUILTIN_VEC_PERM_V2DI_U, UNKNOWN, (int) V2UDI_FTYPE_V2UDI_V2UDI_V2UDI },
25257 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si_u", IX86_BUILTIN_VEC_PERM_V4SI_U, UNKNOWN, (int) V4USI_FTYPE_V4USI_V4USI_V4USI },
25258 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi_u", IX86_BUILTIN_VEC_PERM_V8HI_U, UNKNOWN, (int) V8UHI_FTYPE_V8UHI_V8UHI_V8UHI },
25259 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi_u", IX86_BUILTIN_VEC_PERM_V16QI_U, UNKNOWN, (int) V16UQI_FTYPE_V16UQI_V16UQI_V16UQI },
25260 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4df", IX86_BUILTIN_VEC_PERM_V4DF, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI },
25261 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8sf", IX86_BUILTIN_VEC_PERM_V8SF, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI },
25262
25263 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
25264 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
25265 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
25266 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
25267 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
25268 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtudq2ps, "__builtin_ia32_cvtudq2ps", IX86_BUILTIN_CVTUDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
25269
25270 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
25271 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
25272 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
25273 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
25274 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
25275
25276 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
25277
25278 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
25279 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
25280 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
25281 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
25282
25283 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
25284 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
25285 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
25286
25287 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25288 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25289 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25290 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25291 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25292 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25293 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25294 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25295
25296 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
25297 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
25298 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
25299 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
25300 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
25301 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
25302 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
25303 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
25304 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
25305 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
25306 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
25307 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
25308 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
25309 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
25310 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
25311 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
25312 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
25313 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
25314 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
25315 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
25316
25317 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25318 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25319 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25320 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25321
25322 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25323 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25324 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25325 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25326
25327 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25328
25329 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25330 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25331 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25332
25333 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
25334
25335 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25336 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25337 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25338 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25339 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25340 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25341 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25342 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25343
25344 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25345 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25346 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25347 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25348 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25349 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25350 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25351 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25352
25353 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25354 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
25355
25356 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25357 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25358 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25359 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25360
25361 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25362 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25363
25364 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25365 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25366 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25367 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25368 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25369 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25370
25371 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25372 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25373 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25374 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25375
25376 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25377 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25378 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25379 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25380 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25381 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25382 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25383 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25384
25385 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
25386 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
25387 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
25388
25389 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25390 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
25391
25392 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
25393 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
25394
25395 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
25396
25397 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
25398 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
25399 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
25400 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
25401
25402 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
25403 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
25404 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
25405 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
25406 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
25407 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
25408 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
25409
25410 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
25411 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
25412 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
25413 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
25414 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
25415 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
25416 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
25417
25418 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
25419 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
25420 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
25421 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
25422
25423 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
25424 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
25425 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
25426
25427 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
25428
25429 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
25430 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
25431
25432 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
25433
25434 /* SSE2 MMX */
25435 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
25436 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
25437
25438 /* SSE3 */
25439 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
25440 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
25441
25442 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25443 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25444 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25445 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25446 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25447 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25448
25449 /* SSSE3 */
25450 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
25451 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
25452 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
25453 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
25454 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
25455 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
25456
25457 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25458 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25459 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25460 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25461 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25462 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25463 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25464 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25465 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25466 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25467 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25468 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25469 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
25470 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
25471 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25472 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25473 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25474 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25475 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25476 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25477 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25478 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25479 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25480 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25481
25482 /* SSSE3. */
25483 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
25484 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
25485
25486 /* SSE4.1 */
25487 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
25488 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
25489 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
25490 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
25491 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
25492 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
25493 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
25494 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
25495 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
25496 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
25497
25498 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
25499 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
25500 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
25501 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
25502 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
25503 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
25504 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
25505 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
25506 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
25507 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
25508 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
25509 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
25510 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
25511
25512 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
25513 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25514 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25515 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25516 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25517 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25518 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25519 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25520 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25521 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25522 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
25523 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25524
25525 /* SSE4.1 */
25526 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
25527 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
25528 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
25529 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
25530
25531 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
25532 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
25533 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
25534 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
25535
25536 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF },
25537
25538 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
25539 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
25540 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
25541 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
25542
25543 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF },
25544
25545 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
25546 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
25547 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
25548
25549 /* SSE4.2 */
25550 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25551 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
25552 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
25553 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
25554 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
25555
25556 /* SSE4A */
25557 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
25558 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
25559 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
25560 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25561
25562 /* AES */
25563 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
25564 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
25565
25566 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25567 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25568 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25569 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25570
25571 /* PCLMUL */
25572 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
25573
25574 /* AVX */
25575 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25576 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25577 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25578 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25579 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25580 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25581 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25582 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25583 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25584 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25585 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25586 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25587 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25588 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25589 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25590 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25591 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25592 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25593 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25594 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25595 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25596 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25597 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25598 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25599 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25600 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25601
25602 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
25603 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
25604 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
25605 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
25606
25607 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
25608 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
25609 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
25610 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
25611 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
25612 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
25613 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
25614 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
25615 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
25616 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
25617 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
25618 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
25619 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
25620 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
25621 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
25622 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
25623 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
25624 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
25625 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
25626 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
25627 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
25628 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
25629 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
25630 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
25631 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
25632 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
25633 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
25634 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
25635 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
25636 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
25637 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
25638 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
25639 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
25640 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
25641
25642 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25643 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25644 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
25645
25646 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
25647 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25648 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25649 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25650 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25651
25652 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25653
25654 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
25655 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
25656
25657 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
25658 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
25659 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
25660 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
25661
25662 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
25663
25664 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
25665 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
25666 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
25667 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
25668
25669 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25670
25671 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25672 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25673 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25674 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25675
25676 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
25677 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
25678 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
25679 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
25680 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
25681 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
25682
25683 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
25684 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
25685 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
25686 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
25687 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
25688 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
25689 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
25690 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
25691 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
25692 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
25693 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
25694 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
25695 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
25696 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
25697 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
25698
25699 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
25700 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
25701
25702 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25703 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25704
25705 /* AVX2 */
25706 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
25707 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
25708 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI },
25709 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI },
25710 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
25711 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
25712 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
25713 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packuswb, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
25714 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv32qi3, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
25715 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv16hi3, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
25716 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv8si3, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
25717 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv4di3, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
25718 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv32qi3, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
25719 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
25720 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
25721 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
25722 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv4di, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT },
25723 { OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
25724 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
25725 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
25726 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
25727 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI },
25728 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT },
25729 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv32qi3, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
25730 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv16hi3, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
25731 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv8si3, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
25732 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv4di3, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
25733 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv32qi3, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
25734 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv16hi3, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
25735 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv8si3, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
25736 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv4di3, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
25737 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddwv16hi3, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
25738 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phadddv8si3, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
25739 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddswv16hi3, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
25740 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubwv16hi3, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
25741 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubdv8si3, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
25742 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubswv16hi3, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
25743 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddubsw256, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
25744 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddwd, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI },
25745 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
25746 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
25747 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
25748 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
25749 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
25750 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
25751 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
25752 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
25753 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
25754 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
25755 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
25756 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
25757 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmovmskb, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256, UNKNOWN, (int) INT_FTYPE_V32QI },
25758 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv16qiv16hi2, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
25759 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8qiv8si2 , "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
25760 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4qiv4di2 , "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
25761 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8hiv8si2 , "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
25762 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4hiv4di2 , "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
25763 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4siv4di2 , "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
25764 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv16qiv16hi2, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
25765 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8qiv8si2 , "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
25766 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4qiv4di2 , "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
25767 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8hiv8si2 , "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
25768 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2 , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
25769 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2 , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
25770 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mulv4siv4di3 , "__builtin_ia32_pmuldq256" , IX86_BUILTIN_PMULDQ256 , UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
25771 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_umulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
25772 { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
25773 { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
25774 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
25775 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256 , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
25776 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_umulv4siv4di3 , "__builtin_ia32_pmuludq256" , IX86_BUILTIN_PMULUDQ256 , UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
25777 { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
25778 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
25779 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
25780 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufdv3, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT },
25781 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufhwv3, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
25782 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshuflwv3, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
25783 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
25784 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
25785 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
25786 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlqv4di3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
25787 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
25788 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
25789 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
25790 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlv8si3, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
25791 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlv4di3, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
25792 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlv4di3, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
25793 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
25794 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
25795 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
25796 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
25797 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrqv4di3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
25798 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
25799 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
25800 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
25801 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
25802 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
25803 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
25804 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv32qi3, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
25805 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv16hi3, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
25806 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv8si3, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
25807 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv4di3, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
25808 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv32qi3, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
25809 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv16hi3, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
25810 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv32qi3, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
25811 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv16hi3, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
25812 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv32qi, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
25813 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv16hi, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
25814 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv8si, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
25815 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv4di, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
25816 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv32qi, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
25817 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
25818 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
25819 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
25820 { OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
25821 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
25822 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF },
25823 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF },
25824 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
25825 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
25826 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
25827 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv32qi, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256, UNKNOWN, (int) V32QI_FTYPE_V16QI },
25828 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16hi, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256, UNKNOWN, (int) V16HI_FTYPE_V8HI },
25829 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8si, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256, UNKNOWN, (int) V8SI_FTYPE_V4SI },
25830 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4di, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
25831 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16qi, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
25832 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8hi, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
25833 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
25834 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
25835 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
25836 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
25837 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25838 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
25839 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
25840 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_extracti128, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
25841 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_inserti128, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
25842 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
25843 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25844 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
25845 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25846 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv8si, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
25847 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv4si, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25848 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4di, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
25849 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv2di, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25850 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
25851 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25852
25853 { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
25854
25855 /* BMI */
25856 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
25857 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
25858 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
25859
25860 /* TBM */
25861 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
25862 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
25863
25864 /* F16C */
25865 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
25866 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
25867 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
25868 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
25869
25870 /* BMI2 */
25871 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
25872 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
25873 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
25874 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
25875 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
25876 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
25877 };
25878
25879 /* FMA4 and XOP. */
25880 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
25881 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
25882 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
25883 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
25884 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
25885 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
25886 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
25887 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
25888 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
25889 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
25890 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
25891 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
25892 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
25893 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
25894 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
25895 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
25896 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
25897 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
25898 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
25899 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
25900 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
25901 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
25902 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
25903 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
25904 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
25905 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
25906 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
25907 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
25908 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
25909 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
25910 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
25911 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
25912 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
25913 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
25914 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
25915 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
25916 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
25917 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
25918 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
25919 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
25920 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
25921 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
25922 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
25923 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
25924 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
25925 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
25926 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
25927 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
25928 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
25929 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
25930 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
25931 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
25932
25933 static const struct builtin_description bdesc_multi_arg[] =
25934 {
25935 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
25936 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
25937 UNKNOWN, (int)MULTI_ARG_3_SF },
25938 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
25939 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
25940 UNKNOWN, (int)MULTI_ARG_3_DF },
25941
25942 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf,
25943 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3,
25944 UNKNOWN, (int)MULTI_ARG_3_SF },
25945 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df,
25946 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3,
25947 UNKNOWN, (int)MULTI_ARG_3_DF },
25948
25949 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
25950 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
25951 UNKNOWN, (int)MULTI_ARG_3_SF },
25952 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
25953 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
25954 UNKNOWN, (int)MULTI_ARG_3_DF },
25955 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
25956 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
25957 UNKNOWN, (int)MULTI_ARG_3_SF2 },
25958 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
25959 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
25960 UNKNOWN, (int)MULTI_ARG_3_DF2 },
25961
25962 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
25963 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
25964 UNKNOWN, (int)MULTI_ARG_3_SF },
25965 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
25966 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
25967 UNKNOWN, (int)MULTI_ARG_3_DF },
25968 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
25969 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
25970 UNKNOWN, (int)MULTI_ARG_3_SF2 },
25971 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
25972 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
25973 UNKNOWN, (int)MULTI_ARG_3_DF2 },
25974
25975 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
25976 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
25977 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
25978 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
25979 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
25980 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
25981 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
25982
25983 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
25984 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
25985 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
25986 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
25987 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
25988 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
25989 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
25990
25991 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
25992
25993 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
25994 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
25995 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
25996 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
25997 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
25998 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
25999 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
26000 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
26001 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
26002 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
26003 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
26004 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
26005
26006 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
26007 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
26008 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
26009 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
26010 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
26011 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
26012 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
26013 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
26014 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
26015 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
26016 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
26017 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
26018 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
26019 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
26020 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
26021 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
26022
26023 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_2_SF },
26024 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_2_DF },
26025 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
26026 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
26027 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
26028 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
26029
26030 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
26031 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
26032 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
26033 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
26034 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
26035 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
26036 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
26037 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
26038 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
26039 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
26040 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
26041 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
26042 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
26043 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
26044 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
26045
26046 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
26047 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
26048 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
26049 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
26050 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
26051 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
26052 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
26053
26054 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
26055 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
26056 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
26057 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
26058 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
26059 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
26060 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
26061
26062 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
26063 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
26064 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
26065 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
26066 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
26067 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
26068 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
26069
26070 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
26071 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
26072 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
26073 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
26074 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
26075 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
26076 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
26077
26078 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
26079 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
26080 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
26081 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
26082 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
26083 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
26084 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
26085
26086 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
26087 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
26088 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
26089 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
26090 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
26091 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
26092 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
26093
26094 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
26095 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
26096 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
26097 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
26098 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
26099 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
26100 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
26101
26102 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
26103 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
26104 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
26105 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
26106 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
26107 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
26108 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
26109
26110 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
26111 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
26112 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
26113 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
26114 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
26115 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
26116 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
26117 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
26118
26119 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
26120 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
26121 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
26122 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
26123 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
26124 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
26125 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
26126 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
26127
26128 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
26129 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
26130 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
26131 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
26132
26133 };
26134
26135 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
26136 in the current target ISA to allow the user to compile particular modules
26137 with different target specific options that differ from the command line
26138 options. */
26139 static void
26140 ix86_init_mmx_sse_builtins (void)
26141 {
26142 const struct builtin_description * d;
26143 enum ix86_builtin_func_type ftype;
26144 size_t i;
26145
26146 /* Add all special builtins with variable number of operands. */
26147 for (i = 0, d = bdesc_special_args;
26148 i < ARRAY_SIZE (bdesc_special_args);
26149 i++, d++)
26150 {
26151 if (d->name == 0)
26152 continue;
26153
26154 ftype = (enum ix86_builtin_func_type) d->flag;
26155 def_builtin (d->mask, d->name, ftype, d->code);
26156 }
26157
26158 /* Add all builtins with variable number of operands. */
26159 for (i = 0, d = bdesc_args;
26160 i < ARRAY_SIZE (bdesc_args);
26161 i++, d++)
26162 {
26163 if (d->name == 0)
26164 continue;
26165
26166 ftype = (enum ix86_builtin_func_type) d->flag;
26167 def_builtin_const (d->mask, d->name, ftype, d->code);
26168 }
26169
26170 /* pcmpestr[im] insns. */
26171 for (i = 0, d = bdesc_pcmpestr;
26172 i < ARRAY_SIZE (bdesc_pcmpestr);
26173 i++, d++)
26174 {
26175 if (d->code == IX86_BUILTIN_PCMPESTRM128)
26176 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
26177 else
26178 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
26179 def_builtin_const (d->mask, d->name, ftype, d->code);
26180 }
26181
26182 /* pcmpistr[im] insns. */
26183 for (i = 0, d = bdesc_pcmpistr;
26184 i < ARRAY_SIZE (bdesc_pcmpistr);
26185 i++, d++)
26186 {
26187 if (d->code == IX86_BUILTIN_PCMPISTRM128)
26188 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
26189 else
26190 ftype = INT_FTYPE_V16QI_V16QI_INT;
26191 def_builtin_const (d->mask, d->name, ftype, d->code);
26192 }
26193
26194 /* comi/ucomi insns. */
26195 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
26196 {
26197 if (d->mask == OPTION_MASK_ISA_SSE2)
26198 ftype = INT_FTYPE_V2DF_V2DF;
26199 else
26200 ftype = INT_FTYPE_V4SF_V4SF;
26201 def_builtin_const (d->mask, d->name, ftype, d->code);
26202 }
26203
26204 /* SSE */
26205 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
26206 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
26207 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
26208 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
26209
26210 /* SSE or 3DNow!A */
26211 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
26212 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
26213 IX86_BUILTIN_MASKMOVQ);
26214
26215 /* SSE2 */
26216 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
26217 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
26218
26219 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
26220 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
26221 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
26222 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
26223
26224 /* SSE3. */
26225 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
26226 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
26227 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
26228 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
26229
26230 /* AES */
26231 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
26232 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
26233 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
26234 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
26235 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
26236 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
26237 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
26238 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
26239 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
26240 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
26241 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
26242 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
26243
26244 /* PCLMUL */
26245 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
26246 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
26247
26248 /* RDRND */
26249 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
26250 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
26251 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
26252 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
26253 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
26254 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
26255 IX86_BUILTIN_RDRAND64_STEP);
26256
26257 /* AVX2 */
26258 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
26259 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
26260 IX86_BUILTIN_GATHERSIV2DF);
26261
26262 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
26263 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
26264 IX86_BUILTIN_GATHERSIV4DF);
26265
26266 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
26267 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
26268 IX86_BUILTIN_GATHERDIV2DF);
26269
26270 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
26271 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
26272 IX86_BUILTIN_GATHERDIV4DF);
26273
26274 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
26275 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
26276 IX86_BUILTIN_GATHERSIV4SF);
26277
26278 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
26279 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
26280 IX86_BUILTIN_GATHERSIV8SF);
26281
26282 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
26283 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
26284 IX86_BUILTIN_GATHERDIV4SF);
26285
26286 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
26287 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
26288 IX86_BUILTIN_GATHERDIV8SF);
26289
26290 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
26291 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
26292 IX86_BUILTIN_GATHERSIV2DI);
26293
26294 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
26295 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
26296 IX86_BUILTIN_GATHERSIV4DI);
26297
26298 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
26299 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
26300 IX86_BUILTIN_GATHERDIV2DI);
26301
26302 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
26303 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
26304 IX86_BUILTIN_GATHERDIV4DI);
26305
26306 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
26307 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
26308 IX86_BUILTIN_GATHERSIV4SI);
26309
26310 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
26311 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
26312 IX86_BUILTIN_GATHERSIV8SI);
26313
26314 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
26315 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
26316 IX86_BUILTIN_GATHERDIV4SI);
26317
26318 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
26319 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
26320 IX86_BUILTIN_GATHERDIV8SI);
26321
26322 /* MMX access to the vec_init patterns. */
26323 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
26324 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
26325
26326 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
26327 V4HI_FTYPE_HI_HI_HI_HI,
26328 IX86_BUILTIN_VEC_INIT_V4HI);
26329
26330 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
26331 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
26332 IX86_BUILTIN_VEC_INIT_V8QI);
26333
26334 /* Access to the vec_extract patterns. */
26335 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
26336 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
26337 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
26338 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
26339 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
26340 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
26341 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
26342 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
26343 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
26344 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
26345
26346 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
26347 "__builtin_ia32_vec_ext_v4hi",
26348 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
26349
26350 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
26351 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
26352
26353 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
26354 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
26355
26356 /* Access to the vec_set patterns. */
26357 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
26358 "__builtin_ia32_vec_set_v2di",
26359 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
26360
26361 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
26362 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
26363
26364 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
26365 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
26366
26367 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
26368 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
26369
26370 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
26371 "__builtin_ia32_vec_set_v4hi",
26372 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
26373
26374 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
26375 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
26376
26377 /* Add FMA4 multi-arg argument instructions */
26378 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
26379 {
26380 if (d->name == 0)
26381 continue;
26382
26383 ftype = (enum ix86_builtin_func_type) d->flag;
26384 def_builtin_const (d->mask, d->name, ftype, d->code);
26385 }
26386 }
26387
26388 /* Internal method for ix86_init_builtins. */
26389
26390 static void
26391 ix86_init_builtins_va_builtins_abi (void)
26392 {
26393 tree ms_va_ref, sysv_va_ref;
26394 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
26395 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
26396 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
26397 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
26398
26399 if (!TARGET_64BIT)
26400 return;
26401 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
26402 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
26403 ms_va_ref = build_reference_type (ms_va_list_type_node);
26404 sysv_va_ref =
26405 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
26406
26407 fnvoid_va_end_ms =
26408 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
26409 fnvoid_va_start_ms =
26410 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
26411 fnvoid_va_end_sysv =
26412 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
26413 fnvoid_va_start_sysv =
26414 build_varargs_function_type_list (void_type_node, sysv_va_ref,
26415 NULL_TREE);
26416 fnvoid_va_copy_ms =
26417 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
26418 NULL_TREE);
26419 fnvoid_va_copy_sysv =
26420 build_function_type_list (void_type_node, sysv_va_ref,
26421 sysv_va_ref, NULL_TREE);
26422
26423 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
26424 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
26425 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
26426 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
26427 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
26428 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
26429 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
26430 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
26431 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
26432 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
26433 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
26434 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
26435 }
26436
26437 static void
26438 ix86_init_builtin_types (void)
26439 {
26440 tree float128_type_node, float80_type_node;
26441
26442 /* The __float80 type. */
26443 float80_type_node = long_double_type_node;
26444 if (TYPE_MODE (float80_type_node) != XFmode)
26445 {
26446 /* The __float80 type. */
26447 float80_type_node = make_node (REAL_TYPE);
26448
26449 TYPE_PRECISION (float80_type_node) = 80;
26450 layout_type (float80_type_node);
26451 }
26452 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
26453
26454 /* The __float128 type. */
26455 float128_type_node = make_node (REAL_TYPE);
26456 TYPE_PRECISION (float128_type_node) = 128;
26457 layout_type (float128_type_node);
26458 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
26459
26460 /* This macro is built by i386-builtin-types.awk. */
26461 DEFINE_BUILTIN_PRIMITIVE_TYPES;
26462 }
26463
26464 static void
26465 ix86_init_builtins (void)
26466 {
26467 tree t;
26468
26469 ix86_init_builtin_types ();
26470
26471 /* TFmode support builtins. */
26472 def_builtin_const (0, "__builtin_infq",
26473 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
26474 def_builtin_const (0, "__builtin_huge_valq",
26475 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
26476
26477 /* We will expand them to normal call if SSE2 isn't available since
26478 they are used by libgcc. */
26479 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
26480 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
26481 BUILT_IN_MD, "__fabstf2", NULL_TREE);
26482 TREE_READONLY (t) = 1;
26483 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
26484
26485 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
26486 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
26487 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
26488 TREE_READONLY (t) = 1;
26489 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
26490
26491 ix86_init_mmx_sse_builtins ();
26492
26493 if (TARGET_LP64)
26494 ix86_init_builtins_va_builtins_abi ();
26495
26496 #ifdef SUBTARGET_INIT_BUILTINS
26497 SUBTARGET_INIT_BUILTINS;
26498 #endif
26499 }
26500
26501 /* Return the ix86 builtin for CODE. */
26502
26503 static tree
26504 ix86_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
26505 {
26506 if (code >= IX86_BUILTIN_MAX)
26507 return error_mark_node;
26508
26509 return ix86_builtins[code];
26510 }
26511
26512 /* Errors in the source file can cause expand_expr to return const0_rtx
26513 where we expect a vector. To avoid crashing, use one of the vector
26514 clear instructions. */
26515 static rtx
26516 safe_vector_operand (rtx x, enum machine_mode mode)
26517 {
26518 if (x == const0_rtx)
26519 x = CONST0_RTX (mode);
26520 return x;
26521 }
26522
26523 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
26524
26525 static rtx
26526 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
26527 {
26528 rtx pat;
26529 tree arg0 = CALL_EXPR_ARG (exp, 0);
26530 tree arg1 = CALL_EXPR_ARG (exp, 1);
26531 rtx op0 = expand_normal (arg0);
26532 rtx op1 = expand_normal (arg1);
26533 enum machine_mode tmode = insn_data[icode].operand[0].mode;
26534 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
26535 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
26536
26537 if (VECTOR_MODE_P (mode0))
26538 op0 = safe_vector_operand (op0, mode0);
26539 if (VECTOR_MODE_P (mode1))
26540 op1 = safe_vector_operand (op1, mode1);
26541
26542 if (optimize || !target
26543 || GET_MODE (target) != tmode
26544 || !insn_data[icode].operand[0].predicate (target, tmode))
26545 target = gen_reg_rtx (tmode);
26546
26547 if (GET_MODE (op1) == SImode && mode1 == TImode)
26548 {
26549 rtx x = gen_reg_rtx (V4SImode);
26550 emit_insn (gen_sse2_loadd (x, op1));
26551 op1 = gen_lowpart (TImode, x);
26552 }
26553
26554 if (!insn_data[icode].operand[1].predicate (op0, mode0))
26555 op0 = copy_to_mode_reg (mode0, op0);
26556 if (!insn_data[icode].operand[2].predicate (op1, mode1))
26557 op1 = copy_to_mode_reg (mode1, op1);
26558
26559 pat = GEN_FCN (icode) (target, op0, op1);
26560 if (! pat)
26561 return 0;
26562
26563 emit_insn (pat);
26564
26565 return target;
26566 }
26567
26568 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
26569
26570 static rtx
26571 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
26572 enum ix86_builtin_func_type m_type,
26573 enum rtx_code sub_code)
26574 {
26575 rtx pat;
26576 int i;
26577 int nargs;
26578 bool comparison_p = false;
26579 bool tf_p = false;
26580 bool last_arg_constant = false;
26581 int num_memory = 0;
26582 struct {
26583 rtx op;
26584 enum machine_mode mode;
26585 } args[4];
26586
26587 enum machine_mode tmode = insn_data[icode].operand[0].mode;
26588
26589 switch (m_type)
26590 {
26591 case MULTI_ARG_4_DF2_DI_I:
26592 case MULTI_ARG_4_DF2_DI_I1:
26593 case MULTI_ARG_4_SF2_SI_I:
26594 case MULTI_ARG_4_SF2_SI_I1:
26595 nargs = 4;
26596 last_arg_constant = true;
26597 break;
26598
26599 case MULTI_ARG_3_SF:
26600 case MULTI_ARG_3_DF:
26601 case MULTI_ARG_3_SF2:
26602 case MULTI_ARG_3_DF2:
26603 case MULTI_ARG_3_DI:
26604 case MULTI_ARG_3_SI:
26605 case MULTI_ARG_3_SI_DI:
26606 case MULTI_ARG_3_HI:
26607 case MULTI_ARG_3_HI_SI:
26608 case MULTI_ARG_3_QI:
26609 case MULTI_ARG_3_DI2:
26610 case MULTI_ARG_3_SI2:
26611 case MULTI_ARG_3_HI2:
26612 case MULTI_ARG_3_QI2:
26613 nargs = 3;
26614 break;
26615
26616 case MULTI_ARG_2_SF:
26617 case MULTI_ARG_2_DF:
26618 case MULTI_ARG_2_DI:
26619 case MULTI_ARG_2_SI:
26620 case MULTI_ARG_2_HI:
26621 case MULTI_ARG_2_QI:
26622 nargs = 2;
26623 break;
26624
26625 case MULTI_ARG_2_DI_IMM:
26626 case MULTI_ARG_2_SI_IMM:
26627 case MULTI_ARG_2_HI_IMM:
26628 case MULTI_ARG_2_QI_IMM:
26629 nargs = 2;
26630 last_arg_constant = true;
26631 break;
26632
26633 case MULTI_ARG_1_SF:
26634 case MULTI_ARG_1_DF:
26635 case MULTI_ARG_1_SF2:
26636 case MULTI_ARG_1_DF2:
26637 case MULTI_ARG_1_DI:
26638 case MULTI_ARG_1_SI:
26639 case MULTI_ARG_1_HI:
26640 case MULTI_ARG_1_QI:
26641 case MULTI_ARG_1_SI_DI:
26642 case MULTI_ARG_1_HI_DI:
26643 case MULTI_ARG_1_HI_SI:
26644 case MULTI_ARG_1_QI_DI:
26645 case MULTI_ARG_1_QI_SI:
26646 case MULTI_ARG_1_QI_HI:
26647 nargs = 1;
26648 break;
26649
26650 case MULTI_ARG_2_DI_CMP:
26651 case MULTI_ARG_2_SI_CMP:
26652 case MULTI_ARG_2_HI_CMP:
26653 case MULTI_ARG_2_QI_CMP:
26654 nargs = 2;
26655 comparison_p = true;
26656 break;
26657
26658 case MULTI_ARG_2_SF_TF:
26659 case MULTI_ARG_2_DF_TF:
26660 case MULTI_ARG_2_DI_TF:
26661 case MULTI_ARG_2_SI_TF:
26662 case MULTI_ARG_2_HI_TF:
26663 case MULTI_ARG_2_QI_TF:
26664 nargs = 2;
26665 tf_p = true;
26666 break;
26667
26668 default:
26669 gcc_unreachable ();
26670 }
26671
26672 if (optimize || !target
26673 || GET_MODE (target) != tmode
26674 || !insn_data[icode].operand[0].predicate (target, tmode))
26675 target = gen_reg_rtx (tmode);
26676
26677 gcc_assert (nargs <= 4);
26678
26679 for (i = 0; i < nargs; i++)
26680 {
26681 tree arg = CALL_EXPR_ARG (exp, i);
26682 rtx op = expand_normal (arg);
26683 int adjust = (comparison_p) ? 1 : 0;
26684 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
26685
26686 if (last_arg_constant && i == nargs - 1)
26687 {
26688 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
26689 {
26690 enum insn_code new_icode = icode;
26691 switch (icode)
26692 {
26693 case CODE_FOR_xop_vpermil2v2df3:
26694 case CODE_FOR_xop_vpermil2v4sf3:
26695 case CODE_FOR_xop_vpermil2v4df3:
26696 case CODE_FOR_xop_vpermil2v8sf3:
26697 error ("the last argument must be a 2-bit immediate");
26698 return gen_reg_rtx (tmode);
26699 case CODE_FOR_xop_rotlv2di3:
26700 new_icode = CODE_FOR_rotlv2di3;
26701 goto xop_rotl;
26702 case CODE_FOR_xop_rotlv4si3:
26703 new_icode = CODE_FOR_rotlv4si3;
26704 goto xop_rotl;
26705 case CODE_FOR_xop_rotlv8hi3:
26706 new_icode = CODE_FOR_rotlv8hi3;
26707 goto xop_rotl;
26708 case CODE_FOR_xop_rotlv16qi3:
26709 new_icode = CODE_FOR_rotlv16qi3;
26710 xop_rotl:
26711 if (CONST_INT_P (op))
26712 {
26713 int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
26714 op = GEN_INT (INTVAL (op) & mask);
26715 gcc_checking_assert
26716 (insn_data[icode].operand[i + 1].predicate (op, mode));
26717 }
26718 else
26719 {
26720 gcc_checking_assert
26721 (nargs == 2
26722 && insn_data[new_icode].operand[0].mode == tmode
26723 && insn_data[new_icode].operand[1].mode == tmode
26724 && insn_data[new_icode].operand[2].mode == mode
26725 && insn_data[new_icode].operand[0].predicate
26726 == insn_data[icode].operand[0].predicate
26727 && insn_data[new_icode].operand[1].predicate
26728 == insn_data[icode].operand[1].predicate);
26729 icode = new_icode;
26730 goto non_constant;
26731 }
26732 break;
26733 default:
26734 gcc_unreachable ();
26735 }
26736 }
26737 }
26738 else
26739 {
26740 non_constant:
26741 if (VECTOR_MODE_P (mode))
26742 op = safe_vector_operand (op, mode);
26743
26744 /* If we aren't optimizing, only allow one memory operand to be
26745 generated. */
26746 if (memory_operand (op, mode))
26747 num_memory++;
26748
26749 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
26750
26751 if (optimize
26752 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
26753 || num_memory > 1)
26754 op = force_reg (mode, op);
26755 }
26756
26757 args[i].op = op;
26758 args[i].mode = mode;
26759 }
26760
26761 switch (nargs)
26762 {
26763 case 1:
26764 pat = GEN_FCN (icode) (target, args[0].op);
26765 break;
26766
26767 case 2:
26768 if (tf_p)
26769 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
26770 GEN_INT ((int)sub_code));
26771 else if (! comparison_p)
26772 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
26773 else
26774 {
26775 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
26776 args[0].op,
26777 args[1].op);
26778
26779 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
26780 }
26781 break;
26782
26783 case 3:
26784 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
26785 break;
26786
26787 case 4:
26788 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
26789 break;
26790
26791 default:
26792 gcc_unreachable ();
26793 }
26794
26795 if (! pat)
26796 return 0;
26797
26798 emit_insn (pat);
26799 return target;
26800 }
26801
26802 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
26803 insns with vec_merge. */
26804
26805 static rtx
26806 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
26807 rtx target)
26808 {
26809 rtx pat;
26810 tree arg0 = CALL_EXPR_ARG (exp, 0);
26811 rtx op1, op0 = expand_normal (arg0);
26812 enum machine_mode tmode = insn_data[icode].operand[0].mode;
26813 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
26814
26815 if (optimize || !target
26816 || GET_MODE (target) != tmode
26817 || !insn_data[icode].operand[0].predicate (target, tmode))
26818 target = gen_reg_rtx (tmode);
26819
26820 if (VECTOR_MODE_P (mode0))
26821 op0 = safe_vector_operand (op0, mode0);
26822
26823 if ((optimize && !register_operand (op0, mode0))
26824 || !insn_data[icode].operand[1].predicate (op0, mode0))
26825 op0 = copy_to_mode_reg (mode0, op0);
26826
26827 op1 = op0;
26828 if (!insn_data[icode].operand[2].predicate (op1, mode0))
26829 op1 = copy_to_mode_reg (mode0, op1);
26830
26831 pat = GEN_FCN (icode) (target, op0, op1);
26832 if (! pat)
26833 return 0;
26834 emit_insn (pat);
26835 return target;
26836 }
26837
26838 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
26839
26840 static rtx
26841 ix86_expand_sse_compare (const struct builtin_description *d,
26842 tree exp, rtx target, bool swap)
26843 {
26844 rtx pat;
26845 tree arg0 = CALL_EXPR_ARG (exp, 0);
26846 tree arg1 = CALL_EXPR_ARG (exp, 1);
26847 rtx op0 = expand_normal (arg0);
26848 rtx op1 = expand_normal (arg1);
26849 rtx op2;
26850 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
26851 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
26852 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
26853 enum rtx_code comparison = d->comparison;
26854
26855 if (VECTOR_MODE_P (mode0))
26856 op0 = safe_vector_operand (op0, mode0);
26857 if (VECTOR_MODE_P (mode1))
26858 op1 = safe_vector_operand (op1, mode1);
26859
26860 /* Swap operands if we have a comparison that isn't available in
26861 hardware. */
26862 if (swap)
26863 {
26864 rtx tmp = gen_reg_rtx (mode1);
26865 emit_move_insn (tmp, op1);
26866 op1 = op0;
26867 op0 = tmp;
26868 }
26869
26870 if (optimize || !target
26871 || GET_MODE (target) != tmode
26872 || !insn_data[d->icode].operand[0].predicate (target, tmode))
26873 target = gen_reg_rtx (tmode);
26874
26875 if ((optimize && !register_operand (op0, mode0))
26876 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
26877 op0 = copy_to_mode_reg (mode0, op0);
26878 if ((optimize && !register_operand (op1, mode1))
26879 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
26880 op1 = copy_to_mode_reg (mode1, op1);
26881
26882 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
26883 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
26884 if (! pat)
26885 return 0;
26886 emit_insn (pat);
26887 return target;
26888 }
26889
26890 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
26891
26892 static rtx
26893 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
26894 rtx target)
26895 {
26896 rtx pat;
26897 tree arg0 = CALL_EXPR_ARG (exp, 0);
26898 tree arg1 = CALL_EXPR_ARG (exp, 1);
26899 rtx op0 = expand_normal (arg0);
26900 rtx op1 = expand_normal (arg1);
26901 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
26902 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
26903 enum rtx_code comparison = d->comparison;
26904
26905 if (VECTOR_MODE_P (mode0))
26906 op0 = safe_vector_operand (op0, mode0);
26907 if (VECTOR_MODE_P (mode1))
26908 op1 = safe_vector_operand (op1, mode1);
26909
26910 /* Swap operands if we have a comparison that isn't available in
26911 hardware. */
26912 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
26913 {
26914 rtx tmp = op1;
26915 op1 = op0;
26916 op0 = tmp;
26917 }
26918
26919 target = gen_reg_rtx (SImode);
26920 emit_move_insn (target, const0_rtx);
26921 target = gen_rtx_SUBREG (QImode, target, 0);
26922
26923 if ((optimize && !register_operand (op0, mode0))
26924 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
26925 op0 = copy_to_mode_reg (mode0, op0);
26926 if ((optimize && !register_operand (op1, mode1))
26927 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
26928 op1 = copy_to_mode_reg (mode1, op1);
26929
26930 pat = GEN_FCN (d->icode) (op0, op1);
26931 if (! pat)
26932 return 0;
26933 emit_insn (pat);
26934 emit_insn (gen_rtx_SET (VOIDmode,
26935 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
26936 gen_rtx_fmt_ee (comparison, QImode,
26937 SET_DEST (pat),
26938 const0_rtx)));
26939
26940 return SUBREG_REG (target);
26941 }
26942
26943 /* Subroutine of ix86_expand_args_builtin to take care of round insns. */
26944
26945 static rtx
26946 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
26947 rtx target)
26948 {
26949 rtx pat;
26950 tree arg0 = CALL_EXPR_ARG (exp, 0);
26951 rtx op1, op0 = expand_normal (arg0);
26952 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
26953 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
26954
26955 if (optimize || target == 0
26956 || GET_MODE (target) != tmode
26957 || !insn_data[d->icode].operand[0].predicate (target, tmode))
26958 target = gen_reg_rtx (tmode);
26959
26960 if (VECTOR_MODE_P (mode0))
26961 op0 = safe_vector_operand (op0, mode0);
26962
26963 if ((optimize && !register_operand (op0, mode0))
26964 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
26965 op0 = copy_to_mode_reg (mode0, op0);
26966
26967 op1 = GEN_INT (d->comparison);
26968
26969 pat = GEN_FCN (d->icode) (target, op0, op1);
26970 if (! pat)
26971 return 0;
26972 emit_insn (pat);
26973 return target;
26974 }
26975
26976 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
26977
26978 static rtx
26979 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
26980 rtx target)
26981 {
26982 rtx pat;
26983 tree arg0 = CALL_EXPR_ARG (exp, 0);
26984 tree arg1 = CALL_EXPR_ARG (exp, 1);
26985 rtx op0 = expand_normal (arg0);
26986 rtx op1 = expand_normal (arg1);
26987 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
26988 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
26989 enum rtx_code comparison = d->comparison;
26990
26991 if (VECTOR_MODE_P (mode0))
26992 op0 = safe_vector_operand (op0, mode0);
26993 if (VECTOR_MODE_P (mode1))
26994 op1 = safe_vector_operand (op1, mode1);
26995
26996 target = gen_reg_rtx (SImode);
26997 emit_move_insn (target, const0_rtx);
26998 target = gen_rtx_SUBREG (QImode, target, 0);
26999
27000 if ((optimize && !register_operand (op0, mode0))
27001 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
27002 op0 = copy_to_mode_reg (mode0, op0);
27003 if ((optimize && !register_operand (op1, mode1))
27004 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
27005 op1 = copy_to_mode_reg (mode1, op1);
27006
27007 pat = GEN_FCN (d->icode) (op0, op1);
27008 if (! pat)
27009 return 0;
27010 emit_insn (pat);
27011 emit_insn (gen_rtx_SET (VOIDmode,
27012 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
27013 gen_rtx_fmt_ee (comparison, QImode,
27014 SET_DEST (pat),
27015 const0_rtx)));
27016
27017 return SUBREG_REG (target);
27018 }
27019
27020 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
27021
27022 static rtx
27023 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
27024 tree exp, rtx target)
27025 {
27026 rtx pat;
27027 tree arg0 = CALL_EXPR_ARG (exp, 0);
27028 tree arg1 = CALL_EXPR_ARG (exp, 1);
27029 tree arg2 = CALL_EXPR_ARG (exp, 2);
27030 tree arg3 = CALL_EXPR_ARG (exp, 3);
27031 tree arg4 = CALL_EXPR_ARG (exp, 4);
27032 rtx scratch0, scratch1;
27033 rtx op0 = expand_normal (arg0);
27034 rtx op1 = expand_normal (arg1);
27035 rtx op2 = expand_normal (arg2);
27036 rtx op3 = expand_normal (arg3);
27037 rtx op4 = expand_normal (arg4);
27038 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
27039
27040 tmode0 = insn_data[d->icode].operand[0].mode;
27041 tmode1 = insn_data[d->icode].operand[1].mode;
27042 modev2 = insn_data[d->icode].operand[2].mode;
27043 modei3 = insn_data[d->icode].operand[3].mode;
27044 modev4 = insn_data[d->icode].operand[4].mode;
27045 modei5 = insn_data[d->icode].operand[5].mode;
27046 modeimm = insn_data[d->icode].operand[6].mode;
27047
27048 if (VECTOR_MODE_P (modev2))
27049 op0 = safe_vector_operand (op0, modev2);
27050 if (VECTOR_MODE_P (modev4))
27051 op2 = safe_vector_operand (op2, modev4);
27052
27053 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
27054 op0 = copy_to_mode_reg (modev2, op0);
27055 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
27056 op1 = copy_to_mode_reg (modei3, op1);
27057 if ((optimize && !register_operand (op2, modev4))
27058 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
27059 op2 = copy_to_mode_reg (modev4, op2);
27060 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
27061 op3 = copy_to_mode_reg (modei5, op3);
27062
27063 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
27064 {
27065 error ("the fifth argument must be an 8-bit immediate");
27066 return const0_rtx;
27067 }
27068
27069 if (d->code == IX86_BUILTIN_PCMPESTRI128)
27070 {
27071 if (optimize || !target
27072 || GET_MODE (target) != tmode0
27073 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
27074 target = gen_reg_rtx (tmode0);
27075
27076 scratch1 = gen_reg_rtx (tmode1);
27077
27078 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
27079 }
27080 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
27081 {
27082 if (optimize || !target
27083 || GET_MODE (target) != tmode1
27084 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
27085 target = gen_reg_rtx (tmode1);
27086
27087 scratch0 = gen_reg_rtx (tmode0);
27088
27089 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
27090 }
27091 else
27092 {
27093 gcc_assert (d->flag);
27094
27095 scratch0 = gen_reg_rtx (tmode0);
27096 scratch1 = gen_reg_rtx (tmode1);
27097
27098 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
27099 }
27100
27101 if (! pat)
27102 return 0;
27103
27104 emit_insn (pat);
27105
27106 if (d->flag)
27107 {
27108 target = gen_reg_rtx (SImode);
27109 emit_move_insn (target, const0_rtx);
27110 target = gen_rtx_SUBREG (QImode, target, 0);
27111
27112 emit_insn
27113 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
27114 gen_rtx_fmt_ee (EQ, QImode,
27115 gen_rtx_REG ((enum machine_mode) d->flag,
27116 FLAGS_REG),
27117 const0_rtx)));
27118 return SUBREG_REG (target);
27119 }
27120 else
27121 return target;
27122 }
27123
27124
27125 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
27126
27127 static rtx
27128 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
27129 tree exp, rtx target)
27130 {
27131 rtx pat;
27132 tree arg0 = CALL_EXPR_ARG (exp, 0);
27133 tree arg1 = CALL_EXPR_ARG (exp, 1);
27134 tree arg2 = CALL_EXPR_ARG (exp, 2);
27135 rtx scratch0, scratch1;
27136 rtx op0 = expand_normal (arg0);
27137 rtx op1 = expand_normal (arg1);
27138 rtx op2 = expand_normal (arg2);
27139 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
27140
27141 tmode0 = insn_data[d->icode].operand[0].mode;
27142 tmode1 = insn_data[d->icode].operand[1].mode;
27143 modev2 = insn_data[d->icode].operand[2].mode;
27144 modev3 = insn_data[d->icode].operand[3].mode;
27145 modeimm = insn_data[d->icode].operand[4].mode;
27146
27147 if (VECTOR_MODE_P (modev2))
27148 op0 = safe_vector_operand (op0, modev2);
27149 if (VECTOR_MODE_P (modev3))
27150 op1 = safe_vector_operand (op1, modev3);
27151
27152 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
27153 op0 = copy_to_mode_reg (modev2, op0);
27154 if ((optimize && !register_operand (op1, modev3))
27155 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
27156 op1 = copy_to_mode_reg (modev3, op1);
27157
27158 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
27159 {
27160 error ("the third argument must be an 8-bit immediate");
27161 return const0_rtx;
27162 }
27163
27164 if (d->code == IX86_BUILTIN_PCMPISTRI128)
27165 {
27166 if (optimize || !target
27167 || GET_MODE (target) != tmode0
27168 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
27169 target = gen_reg_rtx (tmode0);
27170
27171 scratch1 = gen_reg_rtx (tmode1);
27172
27173 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
27174 }
27175 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
27176 {
27177 if (optimize || !target
27178 || GET_MODE (target) != tmode1
27179 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
27180 target = gen_reg_rtx (tmode1);
27181
27182 scratch0 = gen_reg_rtx (tmode0);
27183
27184 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
27185 }
27186 else
27187 {
27188 gcc_assert (d->flag);
27189
27190 scratch0 = gen_reg_rtx (tmode0);
27191 scratch1 = gen_reg_rtx (tmode1);
27192
27193 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
27194 }
27195
27196 if (! pat)
27197 return 0;
27198
27199 emit_insn (pat);
27200
27201 if (d->flag)
27202 {
27203 target = gen_reg_rtx (SImode);
27204 emit_move_insn (target, const0_rtx);
27205 target = gen_rtx_SUBREG (QImode, target, 0);
27206
27207 emit_insn
27208 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
27209 gen_rtx_fmt_ee (EQ, QImode,
27210 gen_rtx_REG ((enum machine_mode) d->flag,
27211 FLAGS_REG),
27212 const0_rtx)));
27213 return SUBREG_REG (target);
27214 }
27215 else
27216 return target;
27217 }
27218
27219 /* Subroutine of ix86_expand_builtin to take care of insns with
27220 variable number of operands. */
27221
27222 static rtx
27223 ix86_expand_args_builtin (const struct builtin_description *d,
27224 tree exp, rtx target)
27225 {
27226 rtx pat, real_target;
27227 unsigned int i, nargs;
27228 unsigned int nargs_constant = 0;
27229 int num_memory = 0;
27230 struct
27231 {
27232 rtx op;
27233 enum machine_mode mode;
27234 } args[4];
27235 bool last_arg_count = false;
27236 enum insn_code icode = d->icode;
27237 const struct insn_data_d *insn_p = &insn_data[icode];
27238 enum machine_mode tmode = insn_p->operand[0].mode;
27239 enum machine_mode rmode = VOIDmode;
27240 bool swap = false;
27241 enum rtx_code comparison = d->comparison;
27242
27243 switch ((enum ix86_builtin_func_type) d->flag)
27244 {
27245 case V2DF_FTYPE_V2DF_ROUND:
27246 case V4DF_FTYPE_V4DF_ROUND:
27247 case V4SF_FTYPE_V4SF_ROUND:
27248 case V8SF_FTYPE_V8SF_ROUND:
27249 return ix86_expand_sse_round (d, exp, target);
27250 case INT_FTYPE_V8SF_V8SF_PTEST:
27251 case INT_FTYPE_V4DI_V4DI_PTEST:
27252 case INT_FTYPE_V4DF_V4DF_PTEST:
27253 case INT_FTYPE_V4SF_V4SF_PTEST:
27254 case INT_FTYPE_V2DI_V2DI_PTEST:
27255 case INT_FTYPE_V2DF_V2DF_PTEST:
27256 return ix86_expand_sse_ptest (d, exp, target);
27257 case FLOAT128_FTYPE_FLOAT128:
27258 case FLOAT_FTYPE_FLOAT:
27259 case INT_FTYPE_INT:
27260 case UINT64_FTYPE_INT:
27261 case UINT16_FTYPE_UINT16:
27262 case INT64_FTYPE_INT64:
27263 case INT64_FTYPE_V4SF:
27264 case INT64_FTYPE_V2DF:
27265 case INT_FTYPE_V16QI:
27266 case INT_FTYPE_V8QI:
27267 case INT_FTYPE_V8SF:
27268 case INT_FTYPE_V4DF:
27269 case INT_FTYPE_V4SF:
27270 case INT_FTYPE_V2DF:
27271 case INT_FTYPE_V32QI:
27272 case V16QI_FTYPE_V16QI:
27273 case V8SI_FTYPE_V8SF:
27274 case V8SI_FTYPE_V4SI:
27275 case V8HI_FTYPE_V8HI:
27276 case V8HI_FTYPE_V16QI:
27277 case V8QI_FTYPE_V8QI:
27278 case V8SF_FTYPE_V8SF:
27279 case V8SF_FTYPE_V8SI:
27280 case V8SF_FTYPE_V4SF:
27281 case V8SF_FTYPE_V8HI:
27282 case V4SI_FTYPE_V4SI:
27283 case V4SI_FTYPE_V16QI:
27284 case V4SI_FTYPE_V4SF:
27285 case V4SI_FTYPE_V8SI:
27286 case V4SI_FTYPE_V8HI:
27287 case V4SI_FTYPE_V4DF:
27288 case V4SI_FTYPE_V2DF:
27289 case V4HI_FTYPE_V4HI:
27290 case V4DF_FTYPE_V4DF:
27291 case V4DF_FTYPE_V4SI:
27292 case V4DF_FTYPE_V4SF:
27293 case V4DF_FTYPE_V2DF:
27294 case V4SF_FTYPE_V4SF:
27295 case V4SF_FTYPE_V4SI:
27296 case V4SF_FTYPE_V8SF:
27297 case V4SF_FTYPE_V4DF:
27298 case V4SF_FTYPE_V8HI:
27299 case V4SF_FTYPE_V2DF:
27300 case V2DI_FTYPE_V2DI:
27301 case V2DI_FTYPE_V16QI:
27302 case V2DI_FTYPE_V8HI:
27303 case V2DI_FTYPE_V4SI:
27304 case V2DF_FTYPE_V2DF:
27305 case V2DF_FTYPE_V4SI:
27306 case V2DF_FTYPE_V4DF:
27307 case V2DF_FTYPE_V4SF:
27308 case V2DF_FTYPE_V2SI:
27309 case V2SI_FTYPE_V2SI:
27310 case V2SI_FTYPE_V4SF:
27311 case V2SI_FTYPE_V2SF:
27312 case V2SI_FTYPE_V2DF:
27313 case V2SF_FTYPE_V2SF:
27314 case V2SF_FTYPE_V2SI:
27315 case V32QI_FTYPE_V32QI:
27316 case V32QI_FTYPE_V16QI:
27317 case V16HI_FTYPE_V16HI:
27318 case V16HI_FTYPE_V8HI:
27319 case V8SI_FTYPE_V8SI:
27320 case V16HI_FTYPE_V16QI:
27321 case V8SI_FTYPE_V16QI:
27322 case V4DI_FTYPE_V16QI:
27323 case V8SI_FTYPE_V8HI:
27324 case V4DI_FTYPE_V8HI:
27325 case V4DI_FTYPE_V4SI:
27326 case V4DI_FTYPE_V2DI:
27327 nargs = 1;
27328 break;
27329 case V4SF_FTYPE_V4SF_VEC_MERGE:
27330 case V2DF_FTYPE_V2DF_VEC_MERGE:
27331 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
27332 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
27333 case V16QI_FTYPE_V16QI_V16QI:
27334 case V16QI_FTYPE_V8HI_V8HI:
27335 case V8QI_FTYPE_V8QI_V8QI:
27336 case V8QI_FTYPE_V4HI_V4HI:
27337 case V8HI_FTYPE_V8HI_V8HI:
27338 case V8HI_FTYPE_V16QI_V16QI:
27339 case V8HI_FTYPE_V4SI_V4SI:
27340 case V8SF_FTYPE_V8SF_V8SF:
27341 case V8SF_FTYPE_V8SF_V8SI:
27342 case V4SI_FTYPE_V4SI_V4SI:
27343 case V4SI_FTYPE_V8HI_V8HI:
27344 case V4SI_FTYPE_V4SF_V4SF:
27345 case V4SI_FTYPE_V2DF_V2DF:
27346 case V4HI_FTYPE_V4HI_V4HI:
27347 case V4HI_FTYPE_V8QI_V8QI:
27348 case V4HI_FTYPE_V2SI_V2SI:
27349 case V4DF_FTYPE_V4DF_V4DF:
27350 case V4DF_FTYPE_V4DF_V4DI:
27351 case V4SF_FTYPE_V4SF_V4SF:
27352 case V4SF_FTYPE_V4SF_V4SI:
27353 case V4SF_FTYPE_V4SF_V2SI:
27354 case V4SF_FTYPE_V4SF_V2DF:
27355 case V4SF_FTYPE_V4SF_DI:
27356 case V4SF_FTYPE_V4SF_SI:
27357 case V2DI_FTYPE_V2DI_V2DI:
27358 case V2DI_FTYPE_V16QI_V16QI:
27359 case V2DI_FTYPE_V4SI_V4SI:
27360 case V2DI_FTYPE_V2DI_V16QI:
27361 case V2DI_FTYPE_V2DF_V2DF:
27362 case V2SI_FTYPE_V2SI_V2SI:
27363 case V2SI_FTYPE_V4HI_V4HI:
27364 case V2SI_FTYPE_V2SF_V2SF:
27365 case V2DF_FTYPE_V2DF_V2DF:
27366 case V2DF_FTYPE_V2DF_V4SF:
27367 case V2DF_FTYPE_V2DF_V2DI:
27368 case V2DF_FTYPE_V2DF_DI:
27369 case V2DF_FTYPE_V2DF_SI:
27370 case V2SF_FTYPE_V2SF_V2SF:
27371 case V1DI_FTYPE_V1DI_V1DI:
27372 case V1DI_FTYPE_V8QI_V8QI:
27373 case V1DI_FTYPE_V2SI_V2SI:
27374 case V32QI_FTYPE_V16HI_V16HI:
27375 case V16HI_FTYPE_V8SI_V8SI:
27376 case V32QI_FTYPE_V32QI_V32QI:
27377 case V16HI_FTYPE_V32QI_V32QI:
27378 case V16HI_FTYPE_V16HI_V16HI:
27379 case V8SI_FTYPE_V8SI_V8SI:
27380 case V8SI_FTYPE_V16HI_V16HI:
27381 case V4DI_FTYPE_V4DI_V4DI:
27382 case V4DI_FTYPE_V8SI_V8SI:
27383 if (comparison == UNKNOWN)
27384 return ix86_expand_binop_builtin (icode, exp, target);
27385 nargs = 2;
27386 break;
27387 case V4SF_FTYPE_V4SF_V4SF_SWAP:
27388 case V2DF_FTYPE_V2DF_V2DF_SWAP:
27389 gcc_assert (comparison != UNKNOWN);
27390 nargs = 2;
27391 swap = true;
27392 break;
27393 case V16HI_FTYPE_V16HI_V8HI_COUNT:
27394 case V16HI_FTYPE_V16HI_SI_COUNT:
27395 case V8SI_FTYPE_V8SI_V4SI_COUNT:
27396 case V8SI_FTYPE_V8SI_SI_COUNT:
27397 case V4DI_FTYPE_V4DI_V2DI_COUNT:
27398 case V4DI_FTYPE_V4DI_INT_COUNT:
27399 case V8HI_FTYPE_V8HI_V8HI_COUNT:
27400 case V8HI_FTYPE_V8HI_SI_COUNT:
27401 case V4SI_FTYPE_V4SI_V4SI_COUNT:
27402 case V4SI_FTYPE_V4SI_SI_COUNT:
27403 case V4HI_FTYPE_V4HI_V4HI_COUNT:
27404 case V4HI_FTYPE_V4HI_SI_COUNT:
27405 case V2DI_FTYPE_V2DI_V2DI_COUNT:
27406 case V2DI_FTYPE_V2DI_SI_COUNT:
27407 case V2SI_FTYPE_V2SI_V2SI_COUNT:
27408 case V2SI_FTYPE_V2SI_SI_COUNT:
27409 case V1DI_FTYPE_V1DI_V1DI_COUNT:
27410 case V1DI_FTYPE_V1DI_SI_COUNT:
27411 nargs = 2;
27412 last_arg_count = true;
27413 break;
27414 case UINT64_FTYPE_UINT64_UINT64:
27415 case UINT_FTYPE_UINT_UINT:
27416 case UINT_FTYPE_UINT_USHORT:
27417 case UINT_FTYPE_UINT_UCHAR:
27418 case UINT16_FTYPE_UINT16_INT:
27419 case UINT8_FTYPE_UINT8_INT:
27420 nargs = 2;
27421 break;
27422 case V2DI_FTYPE_V2DI_INT_CONVERT:
27423 nargs = 2;
27424 rmode = V1TImode;
27425 nargs_constant = 1;
27426 break;
27427 case V8HI_FTYPE_V8HI_INT:
27428 case V8HI_FTYPE_V8SF_INT:
27429 case V8HI_FTYPE_V4SF_INT:
27430 case V8SF_FTYPE_V8SF_INT:
27431 case V4SI_FTYPE_V4SI_INT:
27432 case V4SI_FTYPE_V8SI_INT:
27433 case V4HI_FTYPE_V4HI_INT:
27434 case V4DF_FTYPE_V4DF_INT:
27435 case V4SF_FTYPE_V4SF_INT:
27436 case V4SF_FTYPE_V8SF_INT:
27437 case V2DI_FTYPE_V2DI_INT:
27438 case V2DF_FTYPE_V2DF_INT:
27439 case V2DF_FTYPE_V4DF_INT:
27440 case V16HI_FTYPE_V16HI_INT:
27441 case V8SI_FTYPE_V8SI_INT:
27442 case V4DI_FTYPE_V4DI_INT:
27443 case V2DI_FTYPE_V4DI_INT:
27444 nargs = 2;
27445 nargs_constant = 1;
27446 break;
27447 case V16QI_FTYPE_V16QI_V16QI_V16QI:
27448 case V8SF_FTYPE_V8SF_V8SF_V8SF:
27449 case V4DF_FTYPE_V4DF_V4DF_V4DF:
27450 case V4SF_FTYPE_V4SF_V4SF_V4SF:
27451 case V2DF_FTYPE_V2DF_V2DF_V2DF:
27452 case V32QI_FTYPE_V32QI_V32QI_V32QI:
27453 nargs = 3;
27454 break;
27455 case V32QI_FTYPE_V32QI_V32QI_INT:
27456 case V16HI_FTYPE_V16HI_V16HI_INT:
27457 case V16QI_FTYPE_V16QI_V16QI_INT:
27458 case V4DI_FTYPE_V4DI_V4DI_INT:
27459 case V8HI_FTYPE_V8HI_V8HI_INT:
27460 case V8SI_FTYPE_V8SI_V8SI_INT:
27461 case V8SI_FTYPE_V8SI_V4SI_INT:
27462 case V8SF_FTYPE_V8SF_V8SF_INT:
27463 case V8SF_FTYPE_V8SF_V4SF_INT:
27464 case V4SI_FTYPE_V4SI_V4SI_INT:
27465 case V4DF_FTYPE_V4DF_V4DF_INT:
27466 case V4DF_FTYPE_V4DF_V2DF_INT:
27467 case V4SF_FTYPE_V4SF_V4SF_INT:
27468 case V2DI_FTYPE_V2DI_V2DI_INT:
27469 case V4DI_FTYPE_V4DI_V2DI_INT:
27470 case V2DF_FTYPE_V2DF_V2DF_INT:
27471 nargs = 3;
27472 nargs_constant = 1;
27473 break;
27474 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
27475 nargs = 3;
27476 rmode = V4DImode;
27477 nargs_constant = 1;
27478 break;
27479 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
27480 nargs = 3;
27481 rmode = V2DImode;
27482 nargs_constant = 1;
27483 break;
27484 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
27485 nargs = 3;
27486 rmode = DImode;
27487 nargs_constant = 1;
27488 break;
27489 case V2DI_FTYPE_V2DI_UINT_UINT:
27490 nargs = 3;
27491 nargs_constant = 2;
27492 break;
27493 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
27494 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
27495 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
27496 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
27497 nargs = 4;
27498 nargs_constant = 1;
27499 break;
27500 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
27501 nargs = 4;
27502 nargs_constant = 2;
27503 break;
27504 default:
27505 gcc_unreachable ();
27506 }
27507
27508 gcc_assert (nargs <= ARRAY_SIZE (args));
27509
27510 if (comparison != UNKNOWN)
27511 {
27512 gcc_assert (nargs == 2);
27513 return ix86_expand_sse_compare (d, exp, target, swap);
27514 }
27515
27516 if (rmode == VOIDmode || rmode == tmode)
27517 {
27518 if (optimize
27519 || target == 0
27520 || GET_MODE (target) != tmode
27521 || !insn_p->operand[0].predicate (target, tmode))
27522 target = gen_reg_rtx (tmode);
27523 real_target = target;
27524 }
27525 else
27526 {
27527 target = gen_reg_rtx (rmode);
27528 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
27529 }
27530
27531 for (i = 0; i < nargs; i++)
27532 {
27533 tree arg = CALL_EXPR_ARG (exp, i);
27534 rtx op = expand_normal (arg);
27535 enum machine_mode mode = insn_p->operand[i + 1].mode;
27536 bool match = insn_p->operand[i + 1].predicate (op, mode);
27537
27538 if (last_arg_count && (i + 1) == nargs)
27539 {
27540 /* SIMD shift insns take either an 8-bit immediate or
27541 register as count. But builtin functions take int as
27542 count. If count doesn't match, we put it in register. */
27543 if (!match)
27544 {
27545 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
27546 if (!insn_p->operand[i + 1].predicate (op, mode))
27547 op = copy_to_reg (op);
27548 }
27549 }
27550 else if ((nargs - i) <= nargs_constant)
27551 {
27552 if (!match)
27553 switch (icode)
27554 {
27555 case CODE_FOR_avx2_inserti128:
27556 case CODE_FOR_avx2_extracti128:
27557 error ("the last argument must be an 1-bit immediate");
27558 return const0_rtx;
27559
27560 case CODE_FOR_sse4_1_roundpd:
27561 case CODE_FOR_sse4_1_roundps:
27562 case CODE_FOR_sse4_1_roundsd:
27563 case CODE_FOR_sse4_1_roundss:
27564 case CODE_FOR_sse4_1_blendps:
27565 case CODE_FOR_avx_blendpd256:
27566 case CODE_FOR_avx_vpermilv4df:
27567 case CODE_FOR_avx_roundpd256:
27568 case CODE_FOR_avx_roundps256:
27569 error ("the last argument must be a 4-bit immediate");
27570 return const0_rtx;
27571
27572 case CODE_FOR_sse4_1_blendpd:
27573 case CODE_FOR_avx_vpermilv2df:
27574 case CODE_FOR_xop_vpermil2v2df3:
27575 case CODE_FOR_xop_vpermil2v4sf3:
27576 case CODE_FOR_xop_vpermil2v4df3:
27577 case CODE_FOR_xop_vpermil2v8sf3:
27578 error ("the last argument must be a 2-bit immediate");
27579 return const0_rtx;
27580
27581 case CODE_FOR_avx_vextractf128v4df:
27582 case CODE_FOR_avx_vextractf128v8sf:
27583 case CODE_FOR_avx_vextractf128v8si:
27584 case CODE_FOR_avx_vinsertf128v4df:
27585 case CODE_FOR_avx_vinsertf128v8sf:
27586 case CODE_FOR_avx_vinsertf128v8si:
27587 error ("the last argument must be a 1-bit immediate");
27588 return const0_rtx;
27589
27590 case CODE_FOR_avx_vmcmpv2df3:
27591 case CODE_FOR_avx_vmcmpv4sf3:
27592 case CODE_FOR_avx_cmpv2df3:
27593 case CODE_FOR_avx_cmpv4sf3:
27594 case CODE_FOR_avx_cmpv4df3:
27595 case CODE_FOR_avx_cmpv8sf3:
27596 error ("the last argument must be a 5-bit immediate");
27597 return const0_rtx;
27598
27599 default:
27600 switch (nargs_constant)
27601 {
27602 case 2:
27603 if ((nargs - i) == nargs_constant)
27604 {
27605 error ("the next to last argument must be an 8-bit immediate");
27606 break;
27607 }
27608 case 1:
27609 error ("the last argument must be an 8-bit immediate");
27610 break;
27611 default:
27612 gcc_unreachable ();
27613 }
27614 return const0_rtx;
27615 }
27616 }
27617 else
27618 {
27619 if (VECTOR_MODE_P (mode))
27620 op = safe_vector_operand (op, mode);
27621
27622 /* If we aren't optimizing, only allow one memory operand to
27623 be generated. */
27624 if (memory_operand (op, mode))
27625 num_memory++;
27626
27627 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
27628 {
27629 if (optimize || !match || num_memory > 1)
27630 op = copy_to_mode_reg (mode, op);
27631 }
27632 else
27633 {
27634 op = copy_to_reg (op);
27635 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
27636 }
27637 }
27638
27639 args[i].op = op;
27640 args[i].mode = mode;
27641 }
27642
27643 switch (nargs)
27644 {
27645 case 1:
27646 pat = GEN_FCN (icode) (real_target, args[0].op);
27647 break;
27648 case 2:
27649 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
27650 break;
27651 case 3:
27652 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
27653 args[2].op);
27654 break;
27655 case 4:
27656 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
27657 args[2].op, args[3].op);
27658 break;
27659 default:
27660 gcc_unreachable ();
27661 }
27662
27663 if (! pat)
27664 return 0;
27665
27666 emit_insn (pat);
27667 return target;
27668 }
27669
27670 /* Subroutine of ix86_expand_builtin to take care of special insns
27671 with variable number of operands. */
27672
27673 static rtx
27674 ix86_expand_special_args_builtin (const struct builtin_description *d,
27675 tree exp, rtx target)
27676 {
27677 tree arg;
27678 rtx pat, op;
27679 unsigned int i, nargs, arg_adjust, memory;
27680 struct
27681 {
27682 rtx op;
27683 enum machine_mode mode;
27684 } args[3];
27685 enum insn_code icode = d->icode;
27686 bool last_arg_constant = false;
27687 const struct insn_data_d *insn_p = &insn_data[icode];
27688 enum machine_mode tmode = insn_p->operand[0].mode;
27689 enum { load, store } klass;
27690
27691 switch ((enum ix86_builtin_func_type) d->flag)
27692 {
27693 case VOID_FTYPE_VOID:
27694 if (icode == CODE_FOR_avx_vzeroupper)
27695 target = GEN_INT (vzeroupper_intrinsic);
27696 emit_insn (GEN_FCN (icode) (target));
27697 return 0;
27698 case VOID_FTYPE_UINT64:
27699 case VOID_FTYPE_UNSIGNED:
27700 nargs = 0;
27701 klass = store;
27702 memory = 0;
27703 break;
27704 break;
27705 case UINT64_FTYPE_VOID:
27706 case UNSIGNED_FTYPE_VOID:
27707 nargs = 0;
27708 klass = load;
27709 memory = 0;
27710 break;
27711 case UINT64_FTYPE_PUNSIGNED:
27712 case V2DI_FTYPE_PV2DI:
27713 case V4DI_FTYPE_PV4DI:
27714 case V32QI_FTYPE_PCCHAR:
27715 case V16QI_FTYPE_PCCHAR:
27716 case V8SF_FTYPE_PCV4SF:
27717 case V8SF_FTYPE_PCFLOAT:
27718 case V4SF_FTYPE_PCFLOAT:
27719 case V4DF_FTYPE_PCV2DF:
27720 case V4DF_FTYPE_PCDOUBLE:
27721 case V2DF_FTYPE_PCDOUBLE:
27722 case VOID_FTYPE_PVOID:
27723 nargs = 1;
27724 klass = load;
27725 memory = 0;
27726 break;
27727 case VOID_FTYPE_PV2SF_V4SF:
27728 case VOID_FTYPE_PV4DI_V4DI:
27729 case VOID_FTYPE_PV2DI_V2DI:
27730 case VOID_FTYPE_PCHAR_V32QI:
27731 case VOID_FTYPE_PCHAR_V16QI:
27732 case VOID_FTYPE_PFLOAT_V8SF:
27733 case VOID_FTYPE_PFLOAT_V4SF:
27734 case VOID_FTYPE_PDOUBLE_V4DF:
27735 case VOID_FTYPE_PDOUBLE_V2DF:
27736 case VOID_FTYPE_PULONGLONG_ULONGLONG:
27737 case VOID_FTYPE_PINT_INT:
27738 nargs = 1;
27739 klass = store;
27740 /* Reserve memory operand for target. */
27741 memory = ARRAY_SIZE (args);
27742 break;
27743 case V4SF_FTYPE_V4SF_PCV2SF:
27744 case V2DF_FTYPE_V2DF_PCDOUBLE:
27745 nargs = 2;
27746 klass = load;
27747 memory = 1;
27748 break;
27749 case V8SF_FTYPE_PCV8SF_V8SI:
27750 case V4DF_FTYPE_PCV4DF_V4DI:
27751 case V4SF_FTYPE_PCV4SF_V4SI:
27752 case V2DF_FTYPE_PCV2DF_V2DI:
27753 case V8SI_FTYPE_PCV8SI_V8SI:
27754 case V4DI_FTYPE_PCV4DI_V4DI:
27755 case V4SI_FTYPE_PCV4SI_V4SI:
27756 case V2DI_FTYPE_PCV2DI_V2DI:
27757 nargs = 2;
27758 klass = load;
27759 memory = 0;
27760 break;
27761 case VOID_FTYPE_PV8SF_V8SI_V8SF:
27762 case VOID_FTYPE_PV4DF_V4DI_V4DF:
27763 case VOID_FTYPE_PV4SF_V4SI_V4SF:
27764 case VOID_FTYPE_PV2DF_V2DI_V2DF:
27765 case VOID_FTYPE_PV8SI_V8SI_V8SI:
27766 case VOID_FTYPE_PV4DI_V4DI_V4DI:
27767 case VOID_FTYPE_PV4SI_V4SI_V4SI:
27768 case VOID_FTYPE_PV2DI_V2DI_V2DI:
27769 nargs = 2;
27770 klass = store;
27771 /* Reserve memory operand for target. */
27772 memory = ARRAY_SIZE (args);
27773 break;
27774 case VOID_FTYPE_UINT_UINT_UINT:
27775 case VOID_FTYPE_UINT64_UINT_UINT:
27776 case UCHAR_FTYPE_UINT_UINT_UINT:
27777 case UCHAR_FTYPE_UINT64_UINT_UINT:
27778 nargs = 3;
27779 klass = load;
27780 memory = ARRAY_SIZE (args);
27781 last_arg_constant = true;
27782 break;
27783 default:
27784 gcc_unreachable ();
27785 }
27786
27787 gcc_assert (nargs <= ARRAY_SIZE (args));
27788
27789 if (klass == store)
27790 {
27791 arg = CALL_EXPR_ARG (exp, 0);
27792 op = expand_normal (arg);
27793 gcc_assert (target == 0);
27794 if (memory)
27795 {
27796 if (GET_MODE (op) != Pmode)
27797 op = convert_to_mode (Pmode, op, 1);
27798 target = gen_rtx_MEM (tmode, force_reg (Pmode, op));
27799 }
27800 else
27801 target = force_reg (tmode, op);
27802 arg_adjust = 1;
27803 }
27804 else
27805 {
27806 arg_adjust = 0;
27807 if (optimize
27808 || target == 0
27809 || GET_MODE (target) != tmode
27810 || !insn_p->operand[0].predicate (target, tmode))
27811 target = gen_reg_rtx (tmode);
27812 }
27813
27814 for (i = 0; i < nargs; i++)
27815 {
27816 enum machine_mode mode = insn_p->operand[i + 1].mode;
27817 bool match;
27818
27819 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
27820 op = expand_normal (arg);
27821 match = insn_p->operand[i + 1].predicate (op, mode);
27822
27823 if (last_arg_constant && (i + 1) == nargs)
27824 {
27825 if (!match)
27826 {
27827 if (icode == CODE_FOR_lwp_lwpvalsi3
27828 || icode == CODE_FOR_lwp_lwpinssi3
27829 || icode == CODE_FOR_lwp_lwpvaldi3
27830 || icode == CODE_FOR_lwp_lwpinsdi3)
27831 error ("the last argument must be a 32-bit immediate");
27832 else
27833 error ("the last argument must be an 8-bit immediate");
27834 return const0_rtx;
27835 }
27836 }
27837 else
27838 {
27839 if (i == memory)
27840 {
27841 /* This must be the memory operand. */
27842 if (GET_MODE (op) != Pmode)
27843 op = convert_to_mode (Pmode, op, 1);
27844 op = gen_rtx_MEM (mode, force_reg (Pmode, op));
27845 gcc_assert (GET_MODE (op) == mode
27846 || GET_MODE (op) == VOIDmode);
27847 }
27848 else
27849 {
27850 /* This must be register. */
27851 if (VECTOR_MODE_P (mode))
27852 op = safe_vector_operand (op, mode);
27853
27854 gcc_assert (GET_MODE (op) == mode
27855 || GET_MODE (op) == VOIDmode);
27856 op = copy_to_mode_reg (mode, op);
27857 }
27858 }
27859
27860 args[i].op = op;
27861 args[i].mode = mode;
27862 }
27863
27864 switch (nargs)
27865 {
27866 case 0:
27867 pat = GEN_FCN (icode) (target);
27868 break;
27869 case 1:
27870 pat = GEN_FCN (icode) (target, args[0].op);
27871 break;
27872 case 2:
27873 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
27874 break;
27875 case 3:
27876 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
27877 break;
27878 default:
27879 gcc_unreachable ();
27880 }
27881
27882 if (! pat)
27883 return 0;
27884 emit_insn (pat);
27885 return klass == store ? 0 : target;
27886 }
27887
27888 /* Return the integer constant in ARG. Constrain it to be in the range
27889 of the subparts of VEC_TYPE; issue an error if not. */
27890
27891 static int
27892 get_element_number (tree vec_type, tree arg)
27893 {
27894 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
27895
27896 if (!host_integerp (arg, 1)
27897 || (elt = tree_low_cst (arg, 1), elt > max))
27898 {
27899 error ("selector must be an integer constant in the range 0..%wi", max);
27900 return 0;
27901 }
27902
27903 return elt;
27904 }
27905
27906 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
27907 ix86_expand_vector_init. We DO have language-level syntax for this, in
27908 the form of (type){ init-list }. Except that since we can't place emms
27909 instructions from inside the compiler, we can't allow the use of MMX
27910 registers unless the user explicitly asks for it. So we do *not* define
27911 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
27912 we have builtins invoked by mmintrin.h that gives us license to emit
27913 these sorts of instructions. */
27914
27915 static rtx
27916 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
27917 {
27918 enum machine_mode tmode = TYPE_MODE (type);
27919 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
27920 int i, n_elt = GET_MODE_NUNITS (tmode);
27921 rtvec v = rtvec_alloc (n_elt);
27922
27923 gcc_assert (VECTOR_MODE_P (tmode));
27924 gcc_assert (call_expr_nargs (exp) == n_elt);
27925
27926 for (i = 0; i < n_elt; ++i)
27927 {
27928 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
27929 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
27930 }
27931
27932 if (!target || !register_operand (target, tmode))
27933 target = gen_reg_rtx (tmode);
27934
27935 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
27936 return target;
27937 }
27938
27939 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
27940 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
27941 had a language-level syntax for referencing vector elements. */
27942
27943 static rtx
27944 ix86_expand_vec_ext_builtin (tree exp, rtx target)
27945 {
27946 enum machine_mode tmode, mode0;
27947 tree arg0, arg1;
27948 int elt;
27949 rtx op0;
27950
27951 arg0 = CALL_EXPR_ARG (exp, 0);
27952 arg1 = CALL_EXPR_ARG (exp, 1);
27953
27954 op0 = expand_normal (arg0);
27955 elt = get_element_number (TREE_TYPE (arg0), arg1);
27956
27957 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
27958 mode0 = TYPE_MODE (TREE_TYPE (arg0));
27959 gcc_assert (VECTOR_MODE_P (mode0));
27960
27961 op0 = force_reg (mode0, op0);
27962
27963 if (optimize || !target || !register_operand (target, tmode))
27964 target = gen_reg_rtx (tmode);
27965
27966 ix86_expand_vector_extract (true, target, op0, elt);
27967
27968 return target;
27969 }
27970
27971 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
27972 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
27973 a language-level syntax for referencing vector elements. */
27974
27975 static rtx
27976 ix86_expand_vec_set_builtin (tree exp)
27977 {
27978 enum machine_mode tmode, mode1;
27979 tree arg0, arg1, arg2;
27980 int elt;
27981 rtx op0, op1, target;
27982
27983 arg0 = CALL_EXPR_ARG (exp, 0);
27984 arg1 = CALL_EXPR_ARG (exp, 1);
27985 arg2 = CALL_EXPR_ARG (exp, 2);
27986
27987 tmode = TYPE_MODE (TREE_TYPE (arg0));
27988 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
27989 gcc_assert (VECTOR_MODE_P (tmode));
27990
27991 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
27992 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
27993 elt = get_element_number (TREE_TYPE (arg0), arg2);
27994
27995 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
27996 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
27997
27998 op0 = force_reg (tmode, op0);
27999 op1 = force_reg (mode1, op1);
28000
28001 /* OP0 is the source of these builtin functions and shouldn't be
28002 modified. Create a copy, use it and return it as target. */
28003 target = gen_reg_rtx (tmode);
28004 emit_move_insn (target, op0);
28005 ix86_expand_vector_set (true, target, op1, elt);
28006
28007 return target;
28008 }
28009
28010 /* Expand an expression EXP that calls a built-in function,
28011 with result going to TARGET if that's convenient
28012 (and in mode MODE if that's convenient).
28013 SUBTARGET may be used as the target for computing one of EXP's operands.
28014 IGNORE is nonzero if the value is to be ignored. */
28015
28016 static rtx
28017 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
28018 enum machine_mode mode ATTRIBUTE_UNUSED,
28019 int ignore ATTRIBUTE_UNUSED)
28020 {
28021 const struct builtin_description *d;
28022 size_t i;
28023 enum insn_code icode;
28024 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
28025 tree arg0, arg1, arg2, arg3, arg4;
28026 rtx op0, op1, op2, op3, op4, pat;
28027 enum machine_mode mode0, mode1, mode2, mode3, mode4;
28028 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
28029
28030 /* Determine whether the builtin function is available under the current ISA.
28031 Originally the builtin was not created if it wasn't applicable to the
28032 current ISA based on the command line switches. With function specific
28033 options, we need to check in the context of the function making the call
28034 whether it is supported. */
28035 if (ix86_builtins_isa[fcode].isa
28036 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
28037 {
28038 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
28039 NULL, (enum fpmath_unit) 0, false);
28040
28041 if (!opts)
28042 error ("%qE needs unknown isa option", fndecl);
28043 else
28044 {
28045 gcc_assert (opts != NULL);
28046 error ("%qE needs isa option %s", fndecl, opts);
28047 free (opts);
28048 }
28049 return const0_rtx;
28050 }
28051
28052 switch (fcode)
28053 {
28054 case IX86_BUILTIN_MASKMOVQ:
28055 case IX86_BUILTIN_MASKMOVDQU:
28056 icode = (fcode == IX86_BUILTIN_MASKMOVQ
28057 ? CODE_FOR_mmx_maskmovq
28058 : CODE_FOR_sse2_maskmovdqu);
28059 /* Note the arg order is different from the operand order. */
28060 arg1 = CALL_EXPR_ARG (exp, 0);
28061 arg2 = CALL_EXPR_ARG (exp, 1);
28062 arg0 = CALL_EXPR_ARG (exp, 2);
28063 op0 = expand_normal (arg0);
28064 op1 = expand_normal (arg1);
28065 op2 = expand_normal (arg2);
28066 mode0 = insn_data[icode].operand[0].mode;
28067 mode1 = insn_data[icode].operand[1].mode;
28068 mode2 = insn_data[icode].operand[2].mode;
28069
28070 if (GET_MODE (op0) != Pmode)
28071 op0 = convert_to_mode (Pmode, op0, 1);
28072 op0 = gen_rtx_MEM (mode1, force_reg (Pmode, op0));
28073
28074 if (!insn_data[icode].operand[0].predicate (op0, mode0))
28075 op0 = copy_to_mode_reg (mode0, op0);
28076 if (!insn_data[icode].operand[1].predicate (op1, mode1))
28077 op1 = copy_to_mode_reg (mode1, op1);
28078 if (!insn_data[icode].operand[2].predicate (op2, mode2))
28079 op2 = copy_to_mode_reg (mode2, op2);
28080 pat = GEN_FCN (icode) (op0, op1, op2);
28081 if (! pat)
28082 return 0;
28083 emit_insn (pat);
28084 return 0;
28085
28086 case IX86_BUILTIN_LDMXCSR:
28087 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
28088 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
28089 emit_move_insn (target, op0);
28090 emit_insn (gen_sse_ldmxcsr (target));
28091 return 0;
28092
28093 case IX86_BUILTIN_STMXCSR:
28094 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
28095 emit_insn (gen_sse_stmxcsr (target));
28096 return copy_to_mode_reg (SImode, target);
28097
28098 case IX86_BUILTIN_CLFLUSH:
28099 arg0 = CALL_EXPR_ARG (exp, 0);
28100 op0 = expand_normal (arg0);
28101 icode = CODE_FOR_sse2_clflush;
28102 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
28103 {
28104 if (GET_MODE (op0) != Pmode)
28105 op0 = convert_to_mode (Pmode, op0, 1);
28106 op0 = force_reg (Pmode, op0);
28107 }
28108
28109 emit_insn (gen_sse2_clflush (op0));
28110 return 0;
28111
28112 case IX86_BUILTIN_MONITOR:
28113 arg0 = CALL_EXPR_ARG (exp, 0);
28114 arg1 = CALL_EXPR_ARG (exp, 1);
28115 arg2 = CALL_EXPR_ARG (exp, 2);
28116 op0 = expand_normal (arg0);
28117 op1 = expand_normal (arg1);
28118 op2 = expand_normal (arg2);
28119 if (!REG_P (op0))
28120 {
28121 if (GET_MODE (op0) != Pmode)
28122 op0 = convert_to_mode (Pmode, op0, 1);
28123 op0 = force_reg (Pmode, op0);
28124 }
28125 if (!REG_P (op1))
28126 op1 = copy_to_mode_reg (SImode, op1);
28127 if (!REG_P (op2))
28128 op2 = copy_to_mode_reg (SImode, op2);
28129 emit_insn (ix86_gen_monitor (op0, op1, op2));
28130 return 0;
28131
28132 case IX86_BUILTIN_MWAIT:
28133 arg0 = CALL_EXPR_ARG (exp, 0);
28134 arg1 = CALL_EXPR_ARG (exp, 1);
28135 op0 = expand_normal (arg0);
28136 op1 = expand_normal (arg1);
28137 if (!REG_P (op0))
28138 op0 = copy_to_mode_reg (SImode, op0);
28139 if (!REG_P (op1))
28140 op1 = copy_to_mode_reg (SImode, op1);
28141 emit_insn (gen_sse3_mwait (op0, op1));
28142 return 0;
28143
28144 case IX86_BUILTIN_VEC_INIT_V2SI:
28145 case IX86_BUILTIN_VEC_INIT_V4HI:
28146 case IX86_BUILTIN_VEC_INIT_V8QI:
28147 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
28148
28149 case IX86_BUILTIN_VEC_EXT_V2DF:
28150 case IX86_BUILTIN_VEC_EXT_V2DI:
28151 case IX86_BUILTIN_VEC_EXT_V4SF:
28152 case IX86_BUILTIN_VEC_EXT_V4SI:
28153 case IX86_BUILTIN_VEC_EXT_V8HI:
28154 case IX86_BUILTIN_VEC_EXT_V2SI:
28155 case IX86_BUILTIN_VEC_EXT_V4HI:
28156 case IX86_BUILTIN_VEC_EXT_V16QI:
28157 return ix86_expand_vec_ext_builtin (exp, target);
28158
28159 case IX86_BUILTIN_VEC_SET_V2DI:
28160 case IX86_BUILTIN_VEC_SET_V4SF:
28161 case IX86_BUILTIN_VEC_SET_V4SI:
28162 case IX86_BUILTIN_VEC_SET_V8HI:
28163 case IX86_BUILTIN_VEC_SET_V4HI:
28164 case IX86_BUILTIN_VEC_SET_V16QI:
28165 return ix86_expand_vec_set_builtin (exp);
28166
28167 case IX86_BUILTIN_VEC_PERM_V2DF:
28168 case IX86_BUILTIN_VEC_PERM_V4SF:
28169 case IX86_BUILTIN_VEC_PERM_V2DI:
28170 case IX86_BUILTIN_VEC_PERM_V4SI:
28171 case IX86_BUILTIN_VEC_PERM_V8HI:
28172 case IX86_BUILTIN_VEC_PERM_V16QI:
28173 case IX86_BUILTIN_VEC_PERM_V2DI_U:
28174 case IX86_BUILTIN_VEC_PERM_V4SI_U:
28175 case IX86_BUILTIN_VEC_PERM_V8HI_U:
28176 case IX86_BUILTIN_VEC_PERM_V16QI_U:
28177 case IX86_BUILTIN_VEC_PERM_V4DF:
28178 case IX86_BUILTIN_VEC_PERM_V8SF:
28179 return ix86_expand_vec_perm_builtin (exp);
28180
28181 case IX86_BUILTIN_INFQ:
28182 case IX86_BUILTIN_HUGE_VALQ:
28183 {
28184 REAL_VALUE_TYPE inf;
28185 rtx tmp;
28186
28187 real_inf (&inf);
28188 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
28189
28190 tmp = validize_mem (force_const_mem (mode, tmp));
28191
28192 if (target == 0)
28193 target = gen_reg_rtx (mode);
28194
28195 emit_move_insn (target, tmp);
28196 return target;
28197 }
28198
28199 case IX86_BUILTIN_LLWPCB:
28200 arg0 = CALL_EXPR_ARG (exp, 0);
28201 op0 = expand_normal (arg0);
28202 icode = CODE_FOR_lwp_llwpcb;
28203 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
28204 {
28205 if (GET_MODE (op0) != Pmode)
28206 op0 = convert_to_mode (Pmode, op0, 1);
28207 op0 = force_reg (Pmode, op0);
28208 }
28209 emit_insn (gen_lwp_llwpcb (op0));
28210 return 0;
28211
28212 case IX86_BUILTIN_SLWPCB:
28213 icode = CODE_FOR_lwp_slwpcb;
28214 if (!target
28215 || !insn_data[icode].operand[0].predicate (target, Pmode))
28216 target = gen_reg_rtx (Pmode);
28217 emit_insn (gen_lwp_slwpcb (target));
28218 return target;
28219
28220 case IX86_BUILTIN_BEXTRI32:
28221 case IX86_BUILTIN_BEXTRI64:
28222 arg0 = CALL_EXPR_ARG (exp, 0);
28223 arg1 = CALL_EXPR_ARG (exp, 1);
28224 op0 = expand_normal (arg0);
28225 op1 = expand_normal (arg1);
28226 icode = (fcode == IX86_BUILTIN_BEXTRI32
28227 ? CODE_FOR_tbm_bextri_si
28228 : CODE_FOR_tbm_bextri_di);
28229 if (!CONST_INT_P (op1))
28230 {
28231 error ("last argument must be an immediate");
28232 return const0_rtx;
28233 }
28234 else
28235 {
28236 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
28237 unsigned char lsb_index = INTVAL (op1) & 0xFF;
28238 op1 = GEN_INT (length);
28239 op2 = GEN_INT (lsb_index);
28240 pat = GEN_FCN (icode) (target, op0, op1, op2);
28241 if (pat)
28242 emit_insn (pat);
28243 return target;
28244 }
28245
28246 case IX86_BUILTIN_RDRAND16_STEP:
28247 icode = CODE_FOR_rdrandhi_1;
28248 mode0 = HImode;
28249 goto rdrand_step;
28250
28251 case IX86_BUILTIN_RDRAND32_STEP:
28252 icode = CODE_FOR_rdrandsi_1;
28253 mode0 = SImode;
28254 goto rdrand_step;
28255
28256 case IX86_BUILTIN_RDRAND64_STEP:
28257 icode = CODE_FOR_rdranddi_1;
28258 mode0 = DImode;
28259
28260 rdrand_step:
28261 op0 = gen_reg_rtx (mode0);
28262 emit_insn (GEN_FCN (icode) (op0));
28263
28264 arg0 = CALL_EXPR_ARG (exp, 0);
28265 op1 = expand_normal (arg0);
28266 if (!address_operand (op1, VOIDmode))
28267 {
28268 op1 = convert_memory_address (Pmode, op1);
28269 op1 = copy_addr_to_reg (op1);
28270 }
28271 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
28272
28273 op1 = gen_reg_rtx (SImode);
28274 emit_move_insn (op1, CONST1_RTX (SImode));
28275
28276 /* Emit SImode conditional move. */
28277 if (mode0 == HImode)
28278 {
28279 op2 = gen_reg_rtx (SImode);
28280 emit_insn (gen_zero_extendhisi2 (op2, op0));
28281 }
28282 else if (mode0 == SImode)
28283 op2 = op0;
28284 else
28285 op2 = gen_rtx_SUBREG (SImode, op0, 0);
28286
28287 if (target == 0)
28288 target = gen_reg_rtx (SImode);
28289
28290 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
28291 const0_rtx);
28292 emit_insn (gen_rtx_SET (VOIDmode, target,
28293 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
28294 return target;
28295
28296 case IX86_BUILTIN_GATHERSIV2DF:
28297 icode = CODE_FOR_avx2_gathersiv2df;
28298 goto gather_gen;
28299 case IX86_BUILTIN_GATHERSIV4DF:
28300 icode = CODE_FOR_avx2_gathersiv4df;
28301 goto gather_gen;
28302 case IX86_BUILTIN_GATHERDIV2DF:
28303 icode = CODE_FOR_avx2_gatherdiv2df;
28304 goto gather_gen;
28305 case IX86_BUILTIN_GATHERDIV4DF:
28306 icode = CODE_FOR_avx2_gatherdiv4df;
28307 goto gather_gen;
28308 case IX86_BUILTIN_GATHERSIV4SF:
28309 icode = CODE_FOR_avx2_gathersiv4sf;
28310 goto gather_gen;
28311 case IX86_BUILTIN_GATHERSIV8SF:
28312 icode = CODE_FOR_avx2_gathersiv8sf;
28313 goto gather_gen;
28314 case IX86_BUILTIN_GATHERDIV4SF:
28315 icode = CODE_FOR_avx2_gatherdiv4sf;
28316 goto gather_gen;
28317 case IX86_BUILTIN_GATHERDIV8SF:
28318 icode = CODE_FOR_avx2_gatherdiv4sf256;
28319 goto gather_gen;
28320 case IX86_BUILTIN_GATHERSIV2DI:
28321 icode = CODE_FOR_avx2_gathersiv2di;
28322 goto gather_gen;
28323 case IX86_BUILTIN_GATHERSIV4DI:
28324 icode = CODE_FOR_avx2_gathersiv4di;
28325 goto gather_gen;
28326 case IX86_BUILTIN_GATHERDIV2DI:
28327 icode = CODE_FOR_avx2_gatherdiv2di;
28328 goto gather_gen;
28329 case IX86_BUILTIN_GATHERDIV4DI:
28330 icode = CODE_FOR_avx2_gatherdiv4di;
28331 goto gather_gen;
28332 case IX86_BUILTIN_GATHERSIV4SI:
28333 icode = CODE_FOR_avx2_gathersiv4si;
28334 goto gather_gen;
28335 case IX86_BUILTIN_GATHERSIV8SI:
28336 icode = CODE_FOR_avx2_gathersiv8si;
28337 goto gather_gen;
28338 case IX86_BUILTIN_GATHERDIV4SI:
28339 icode = CODE_FOR_avx2_gatherdiv4si;
28340 goto gather_gen;
28341 case IX86_BUILTIN_GATHERDIV8SI:
28342 icode = CODE_FOR_avx2_gatherdiv4si256;
28343
28344 gather_gen:
28345 arg0 = CALL_EXPR_ARG (exp, 0);
28346 arg1 = CALL_EXPR_ARG (exp, 1);
28347 arg2 = CALL_EXPR_ARG (exp, 2);
28348 arg3 = CALL_EXPR_ARG (exp, 3);
28349 arg4 = CALL_EXPR_ARG (exp, 4);
28350 op0 = expand_normal (arg0);
28351 op1 = expand_normal (arg1);
28352 op2 = expand_normal (arg2);
28353 op3 = expand_normal (arg3);
28354 op4 = expand_normal (arg4);
28355 /* Note the arg order is different from the operand order. */
28356 mode0 = insn_data[icode].operand[1].mode;
28357 mode1 = insn_data[icode].operand[2].mode;
28358 mode2 = insn_data[icode].operand[3].mode;
28359 mode3 = insn_data[icode].operand[4].mode;
28360 mode4 = insn_data[icode].operand[5].mode;
28361
28362 if (target == NULL_RTX)
28363 target = gen_reg_rtx (insn_data[icode].operand[0].mode);
28364
28365 /* Force memory operand only with base register here. But we
28366 don't want to do it on memory operand for other builtin
28367 functions. */
28368 if (GET_MODE (op1) != Pmode)
28369 op1 = convert_to_mode (Pmode, op1, 1);
28370 op1 = force_reg (Pmode, op1);
28371 op1 = gen_rtx_MEM (mode1, op1);
28372
28373 if (!insn_data[icode].operand[1].predicate (op0, mode0))
28374 op0 = copy_to_mode_reg (mode0, op0);
28375 if (!insn_data[icode].operand[2].predicate (op1, mode1))
28376 op1 = copy_to_mode_reg (mode1, op1);
28377 if (!insn_data[icode].operand[3].predicate (op2, mode2))
28378 op2 = copy_to_mode_reg (mode2, op2);
28379 if (!insn_data[icode].operand[4].predicate (op3, mode3))
28380 op3 = copy_to_mode_reg (mode3, op3);
28381 if (!insn_data[icode].operand[5].predicate (op4, mode4))
28382 {
28383 error ("last argument must be scale 1, 2, 4, 8");
28384 return const0_rtx;
28385 }
28386 pat = GEN_FCN (icode) (target, op0, op1, op2, op3, op4);
28387 if (! pat)
28388 return const0_rtx;
28389 emit_insn (pat);
28390 return target;
28391
28392 default:
28393 break;
28394 }
28395
28396 for (i = 0, d = bdesc_special_args;
28397 i < ARRAY_SIZE (bdesc_special_args);
28398 i++, d++)
28399 if (d->code == fcode)
28400 return ix86_expand_special_args_builtin (d, exp, target);
28401
28402 for (i = 0, d = bdesc_args;
28403 i < ARRAY_SIZE (bdesc_args);
28404 i++, d++)
28405 if (d->code == fcode)
28406 switch (fcode)
28407 {
28408 case IX86_BUILTIN_FABSQ:
28409 case IX86_BUILTIN_COPYSIGNQ:
28410 if (!TARGET_SSE2)
28411 /* Emit a normal call if SSE2 isn't available. */
28412 return expand_call (exp, target, ignore);
28413 default:
28414 return ix86_expand_args_builtin (d, exp, target);
28415 }
28416
28417 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
28418 if (d->code == fcode)
28419 return ix86_expand_sse_comi (d, exp, target);
28420
28421 for (i = 0, d = bdesc_pcmpestr;
28422 i < ARRAY_SIZE (bdesc_pcmpestr);
28423 i++, d++)
28424 if (d->code == fcode)
28425 return ix86_expand_sse_pcmpestr (d, exp, target);
28426
28427 for (i = 0, d = bdesc_pcmpistr;
28428 i < ARRAY_SIZE (bdesc_pcmpistr);
28429 i++, d++)
28430 if (d->code == fcode)
28431 return ix86_expand_sse_pcmpistr (d, exp, target);
28432
28433 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
28434 if (d->code == fcode)
28435 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
28436 (enum ix86_builtin_func_type)
28437 d->flag, d->comparison);
28438
28439 gcc_unreachable ();
28440 }
28441
28442 /* Returns a function decl for a vectorized version of the builtin function
28443 with builtin function code FN and the result vector type TYPE, or NULL_TREE
28444 if it is not available. */
28445
28446 static tree
28447 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
28448 tree type_in)
28449 {
28450 enum machine_mode in_mode, out_mode;
28451 int in_n, out_n;
28452 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
28453
28454 if (TREE_CODE (type_out) != VECTOR_TYPE
28455 || TREE_CODE (type_in) != VECTOR_TYPE
28456 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
28457 return NULL_TREE;
28458
28459 out_mode = TYPE_MODE (TREE_TYPE (type_out));
28460 out_n = TYPE_VECTOR_SUBPARTS (type_out);
28461 in_mode = TYPE_MODE (TREE_TYPE (type_in));
28462 in_n = TYPE_VECTOR_SUBPARTS (type_in);
28463
28464 switch (fn)
28465 {
28466 case BUILT_IN_SQRT:
28467 if (out_mode == DFmode && in_mode == DFmode)
28468 {
28469 if (out_n == 2 && in_n == 2)
28470 return ix86_builtins[IX86_BUILTIN_SQRTPD];
28471 else if (out_n == 4 && in_n == 4)
28472 return ix86_builtins[IX86_BUILTIN_SQRTPD256];
28473 }
28474 break;
28475
28476 case BUILT_IN_SQRTF:
28477 if (out_mode == SFmode && in_mode == SFmode)
28478 {
28479 if (out_n == 4 && in_n == 4)
28480 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
28481 else if (out_n == 8 && in_n == 8)
28482 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR256];
28483 }
28484 break;
28485
28486 case BUILT_IN_LRINT:
28487 if (out_mode == SImode && out_n == 4
28488 && in_mode == DFmode && in_n == 2)
28489 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
28490 break;
28491
28492 case BUILT_IN_LRINTF:
28493 if (out_mode == SImode && in_mode == SFmode)
28494 {
28495 if (out_n == 4 && in_n == 4)
28496 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
28497 else if (out_n == 8 && in_n == 8)
28498 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ256];
28499 }
28500 break;
28501
28502 case BUILT_IN_COPYSIGN:
28503 if (out_mode == DFmode && in_mode == DFmode)
28504 {
28505 if (out_n == 2 && in_n == 2)
28506 return ix86_builtins[IX86_BUILTIN_CPYSGNPD];
28507 else if (out_n == 4 && in_n == 4)
28508 return ix86_builtins[IX86_BUILTIN_CPYSGNPD256];
28509 }
28510 break;
28511
28512 case BUILT_IN_COPYSIGNF:
28513 if (out_mode == SFmode && in_mode == SFmode)
28514 {
28515 if (out_n == 4 && in_n == 4)
28516 return ix86_builtins[IX86_BUILTIN_CPYSGNPS];
28517 else if (out_n == 8 && in_n == 8)
28518 return ix86_builtins[IX86_BUILTIN_CPYSGNPS256];
28519 }
28520 break;
28521
28522 case BUILT_IN_FLOOR:
28523 /* The round insn does not trap on denormals. */
28524 if (flag_trapping_math || !TARGET_ROUND)
28525 break;
28526
28527 if (out_mode == DFmode && in_mode == DFmode)
28528 {
28529 if (out_n == 2 && in_n == 2)
28530 return ix86_builtins[IX86_BUILTIN_FLOORPD];
28531 else if (out_n == 4 && in_n == 4)
28532 return ix86_builtins[IX86_BUILTIN_FLOORPD256];
28533 }
28534 break;
28535
28536 case BUILT_IN_FLOORF:
28537 /* The round insn does not trap on denormals. */
28538 if (flag_trapping_math || !TARGET_ROUND)
28539 break;
28540
28541 if (out_mode == SFmode && in_mode == SFmode)
28542 {
28543 if (out_n == 4 && in_n == 4)
28544 return ix86_builtins[IX86_BUILTIN_FLOORPS];
28545 else if (out_n == 8 && in_n == 8)
28546 return ix86_builtins[IX86_BUILTIN_FLOORPS256];
28547 }
28548 break;
28549
28550 case BUILT_IN_CEIL:
28551 /* The round insn does not trap on denormals. */
28552 if (flag_trapping_math || !TARGET_ROUND)
28553 break;
28554
28555 if (out_mode == DFmode && in_mode == DFmode)
28556 {
28557 if (out_n == 2 && in_n == 2)
28558 return ix86_builtins[IX86_BUILTIN_CEILPD];
28559 else if (out_n == 4 && in_n == 4)
28560 return ix86_builtins[IX86_BUILTIN_CEILPD256];
28561 }
28562 break;
28563
28564 case BUILT_IN_CEILF:
28565 /* The round insn does not trap on denormals. */
28566 if (flag_trapping_math || !TARGET_ROUND)
28567 break;
28568
28569 if (out_mode == SFmode && in_mode == SFmode)
28570 {
28571 if (out_n == 4 && in_n == 4)
28572 return ix86_builtins[IX86_BUILTIN_CEILPS];
28573 else if (out_n == 8 && in_n == 8)
28574 return ix86_builtins[IX86_BUILTIN_CEILPS256];
28575 }
28576 break;
28577
28578 case BUILT_IN_TRUNC:
28579 /* The round insn does not trap on denormals. */
28580 if (flag_trapping_math || !TARGET_ROUND)
28581 break;
28582
28583 if (out_mode == DFmode && in_mode == DFmode)
28584 {
28585 if (out_n == 2 && in_n == 2)
28586 return ix86_builtins[IX86_BUILTIN_TRUNCPD];
28587 else if (out_n == 4 && in_n == 4)
28588 return ix86_builtins[IX86_BUILTIN_TRUNCPD256];
28589 }
28590 break;
28591
28592 case BUILT_IN_TRUNCF:
28593 /* The round insn does not trap on denormals. */
28594 if (flag_trapping_math || !TARGET_ROUND)
28595 break;
28596
28597 if (out_mode == SFmode && in_mode == SFmode)
28598 {
28599 if (out_n == 4 && in_n == 4)
28600 return ix86_builtins[IX86_BUILTIN_TRUNCPS];
28601 else if (out_n == 8 && in_n == 8)
28602 return ix86_builtins[IX86_BUILTIN_TRUNCPS256];
28603 }
28604 break;
28605
28606 case BUILT_IN_RINT:
28607 /* The round insn does not trap on denormals. */
28608 if (flag_trapping_math || !TARGET_ROUND)
28609 break;
28610
28611 if (out_mode == DFmode && in_mode == DFmode)
28612 {
28613 if (out_n == 2 && in_n == 2)
28614 return ix86_builtins[IX86_BUILTIN_RINTPD];
28615 else if (out_n == 4 && in_n == 4)
28616 return ix86_builtins[IX86_BUILTIN_RINTPD256];
28617 }
28618 break;
28619
28620 case BUILT_IN_RINTF:
28621 /* The round insn does not trap on denormals. */
28622 if (flag_trapping_math || !TARGET_ROUND)
28623 break;
28624
28625 if (out_mode == SFmode && in_mode == SFmode)
28626 {
28627 if (out_n == 4 && in_n == 4)
28628 return ix86_builtins[IX86_BUILTIN_RINTPS];
28629 else if (out_n == 8 && in_n == 8)
28630 return ix86_builtins[IX86_BUILTIN_RINTPS256];
28631 }
28632 break;
28633
28634 case BUILT_IN_ROUND:
28635 /* The round insn does not trap on denormals. */
28636 if (flag_trapping_math || !TARGET_ROUND)
28637 break;
28638
28639 if (out_mode == DFmode && in_mode == DFmode)
28640 {
28641 if (out_n == 2 && in_n == 2)
28642 return ix86_builtins[IX86_BUILTIN_ROUNDPD_AZ];
28643 else if (out_n == 4 && in_n == 4)
28644 return ix86_builtins[IX86_BUILTIN_ROUNDPD_AZ256];
28645 }
28646 break;
28647
28648 case BUILT_IN_ROUNDF:
28649 /* The round insn does not trap on denormals. */
28650 if (flag_trapping_math || !TARGET_ROUND)
28651 break;
28652
28653 if (out_mode == SFmode && in_mode == SFmode)
28654 {
28655 if (out_n == 4 && in_n == 4)
28656 return ix86_builtins[IX86_BUILTIN_ROUNDPS_AZ];
28657 else if (out_n == 8 && in_n == 8)
28658 return ix86_builtins[IX86_BUILTIN_ROUNDPS_AZ256];
28659 }
28660 break;
28661
28662 case BUILT_IN_FMA:
28663 if (out_mode == DFmode && in_mode == DFmode)
28664 {
28665 if (out_n == 2 && in_n == 2)
28666 return ix86_builtins[IX86_BUILTIN_VFMADDPD];
28667 if (out_n == 4 && in_n == 4)
28668 return ix86_builtins[IX86_BUILTIN_VFMADDPD256];
28669 }
28670 break;
28671
28672 case BUILT_IN_FMAF:
28673 if (out_mode == SFmode && in_mode == SFmode)
28674 {
28675 if (out_n == 4 && in_n == 4)
28676 return ix86_builtins[IX86_BUILTIN_VFMADDPS];
28677 if (out_n == 8 && in_n == 8)
28678 return ix86_builtins[IX86_BUILTIN_VFMADDPS256];
28679 }
28680 break;
28681
28682 default:
28683 break;
28684 }
28685
28686 /* Dispatch to a handler for a vectorization library. */
28687 if (ix86_veclib_handler)
28688 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
28689 type_in);
28690
28691 return NULL_TREE;
28692 }
28693
28694 /* Handler for an SVML-style interface to
28695 a library with vectorized intrinsics. */
28696
28697 static tree
28698 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
28699 {
28700 char name[20];
28701 tree fntype, new_fndecl, args;
28702 unsigned arity;
28703 const char *bname;
28704 enum machine_mode el_mode, in_mode;
28705 int n, in_n;
28706
28707 /* The SVML is suitable for unsafe math only. */
28708 if (!flag_unsafe_math_optimizations)
28709 return NULL_TREE;
28710
28711 el_mode = TYPE_MODE (TREE_TYPE (type_out));
28712 n = TYPE_VECTOR_SUBPARTS (type_out);
28713 in_mode = TYPE_MODE (TREE_TYPE (type_in));
28714 in_n = TYPE_VECTOR_SUBPARTS (type_in);
28715 if (el_mode != in_mode
28716 || n != in_n)
28717 return NULL_TREE;
28718
28719 switch (fn)
28720 {
28721 case BUILT_IN_EXP:
28722 case BUILT_IN_LOG:
28723 case BUILT_IN_LOG10:
28724 case BUILT_IN_POW:
28725 case BUILT_IN_TANH:
28726 case BUILT_IN_TAN:
28727 case BUILT_IN_ATAN:
28728 case BUILT_IN_ATAN2:
28729 case BUILT_IN_ATANH:
28730 case BUILT_IN_CBRT:
28731 case BUILT_IN_SINH:
28732 case BUILT_IN_SIN:
28733 case BUILT_IN_ASINH:
28734 case BUILT_IN_ASIN:
28735 case BUILT_IN_COSH:
28736 case BUILT_IN_COS:
28737 case BUILT_IN_ACOSH:
28738 case BUILT_IN_ACOS:
28739 if (el_mode != DFmode || n != 2)
28740 return NULL_TREE;
28741 break;
28742
28743 case BUILT_IN_EXPF:
28744 case BUILT_IN_LOGF:
28745 case BUILT_IN_LOG10F:
28746 case BUILT_IN_POWF:
28747 case BUILT_IN_TANHF:
28748 case BUILT_IN_TANF:
28749 case BUILT_IN_ATANF:
28750 case BUILT_IN_ATAN2F:
28751 case BUILT_IN_ATANHF:
28752 case BUILT_IN_CBRTF:
28753 case BUILT_IN_SINHF:
28754 case BUILT_IN_SINF:
28755 case BUILT_IN_ASINHF:
28756 case BUILT_IN_ASINF:
28757 case BUILT_IN_COSHF:
28758 case BUILT_IN_COSF:
28759 case BUILT_IN_ACOSHF:
28760 case BUILT_IN_ACOSF:
28761 if (el_mode != SFmode || n != 4)
28762 return NULL_TREE;
28763 break;
28764
28765 default:
28766 return NULL_TREE;
28767 }
28768
28769 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
28770
28771 if (fn == BUILT_IN_LOGF)
28772 strcpy (name, "vmlsLn4");
28773 else if (fn == BUILT_IN_LOG)
28774 strcpy (name, "vmldLn2");
28775 else if (n == 4)
28776 {
28777 sprintf (name, "vmls%s", bname+10);
28778 name[strlen (name)-1] = '4';
28779 }
28780 else
28781 sprintf (name, "vmld%s2", bname+10);
28782
28783 /* Convert to uppercase. */
28784 name[4] &= ~0x20;
28785
28786 arity = 0;
28787 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
28788 args = TREE_CHAIN (args))
28789 arity++;
28790
28791 if (arity == 1)
28792 fntype = build_function_type_list (type_out, type_in, NULL);
28793 else
28794 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
28795
28796 /* Build a function declaration for the vectorized function. */
28797 new_fndecl = build_decl (BUILTINS_LOCATION,
28798 FUNCTION_DECL, get_identifier (name), fntype);
28799 TREE_PUBLIC (new_fndecl) = 1;
28800 DECL_EXTERNAL (new_fndecl) = 1;
28801 DECL_IS_NOVOPS (new_fndecl) = 1;
28802 TREE_READONLY (new_fndecl) = 1;
28803
28804 return new_fndecl;
28805 }
28806
28807 /* Handler for an ACML-style interface to
28808 a library with vectorized intrinsics. */
28809
28810 static tree
28811 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
28812 {
28813 char name[20] = "__vr.._";
28814 tree fntype, new_fndecl, args;
28815 unsigned arity;
28816 const char *bname;
28817 enum machine_mode el_mode, in_mode;
28818 int n, in_n;
28819
28820 /* The ACML is 64bits only and suitable for unsafe math only as
28821 it does not correctly support parts of IEEE with the required
28822 precision such as denormals. */
28823 if (!TARGET_64BIT
28824 || !flag_unsafe_math_optimizations)
28825 return NULL_TREE;
28826
28827 el_mode = TYPE_MODE (TREE_TYPE (type_out));
28828 n = TYPE_VECTOR_SUBPARTS (type_out);
28829 in_mode = TYPE_MODE (TREE_TYPE (type_in));
28830 in_n = TYPE_VECTOR_SUBPARTS (type_in);
28831 if (el_mode != in_mode
28832 || n != in_n)
28833 return NULL_TREE;
28834
28835 switch (fn)
28836 {
28837 case BUILT_IN_SIN:
28838 case BUILT_IN_COS:
28839 case BUILT_IN_EXP:
28840 case BUILT_IN_LOG:
28841 case BUILT_IN_LOG2:
28842 case BUILT_IN_LOG10:
28843 name[4] = 'd';
28844 name[5] = '2';
28845 if (el_mode != DFmode
28846 || n != 2)
28847 return NULL_TREE;
28848 break;
28849
28850 case BUILT_IN_SINF:
28851 case BUILT_IN_COSF:
28852 case BUILT_IN_EXPF:
28853 case BUILT_IN_POWF:
28854 case BUILT_IN_LOGF:
28855 case BUILT_IN_LOG2F:
28856 case BUILT_IN_LOG10F:
28857 name[4] = 's';
28858 name[5] = '4';
28859 if (el_mode != SFmode
28860 || n != 4)
28861 return NULL_TREE;
28862 break;
28863
28864 default:
28865 return NULL_TREE;
28866 }
28867
28868 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
28869 sprintf (name + 7, "%s", bname+10);
28870
28871 arity = 0;
28872 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
28873 args = TREE_CHAIN (args))
28874 arity++;
28875
28876 if (arity == 1)
28877 fntype = build_function_type_list (type_out, type_in, NULL);
28878 else
28879 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
28880
28881 /* Build a function declaration for the vectorized function. */
28882 new_fndecl = build_decl (BUILTINS_LOCATION,
28883 FUNCTION_DECL, get_identifier (name), fntype);
28884 TREE_PUBLIC (new_fndecl) = 1;
28885 DECL_EXTERNAL (new_fndecl) = 1;
28886 DECL_IS_NOVOPS (new_fndecl) = 1;
28887 TREE_READONLY (new_fndecl) = 1;
28888
28889 return new_fndecl;
28890 }
28891
28892
28893 /* Returns a decl of a function that implements conversion of an integer vector
28894 into a floating-point vector, or vice-versa. DEST_TYPE and SRC_TYPE
28895 are the types involved when converting according to CODE.
28896 Return NULL_TREE if it is not available. */
28897
28898 static tree
28899 ix86_vectorize_builtin_conversion (unsigned int code,
28900 tree dest_type, tree src_type)
28901 {
28902 if (! TARGET_SSE2)
28903 return NULL_TREE;
28904
28905 switch (code)
28906 {
28907 case FLOAT_EXPR:
28908 switch (TYPE_MODE (src_type))
28909 {
28910 case V4SImode:
28911 switch (TYPE_MODE (dest_type))
28912 {
28913 case V4SFmode:
28914 return (TYPE_UNSIGNED (src_type)
28915 ? ix86_builtins[IX86_BUILTIN_CVTUDQ2PS]
28916 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS]);
28917 case V4DFmode:
28918 return (TYPE_UNSIGNED (src_type)
28919 ? NULL_TREE
28920 : ix86_builtins[IX86_BUILTIN_CVTDQ2PD256]);
28921 default:
28922 return NULL_TREE;
28923 }
28924 break;
28925 case V8SImode:
28926 switch (TYPE_MODE (dest_type))
28927 {
28928 case V8SFmode:
28929 return (TYPE_UNSIGNED (src_type)
28930 ? NULL_TREE
28931 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS256]);
28932 default:
28933 return NULL_TREE;
28934 }
28935 break;
28936 default:
28937 return NULL_TREE;
28938 }
28939
28940 case FIX_TRUNC_EXPR:
28941 switch (TYPE_MODE (dest_type))
28942 {
28943 case V4SImode:
28944 switch (TYPE_MODE (src_type))
28945 {
28946 case V4SFmode:
28947 return (TYPE_UNSIGNED (dest_type)
28948 ? NULL_TREE
28949 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ]);
28950 case V4DFmode:
28951 return (TYPE_UNSIGNED (dest_type)
28952 ? NULL_TREE
28953 : ix86_builtins[IX86_BUILTIN_CVTTPD2DQ256]);
28954 default:
28955 return NULL_TREE;
28956 }
28957 break;
28958
28959 case V8SImode:
28960 switch (TYPE_MODE (src_type))
28961 {
28962 case V8SFmode:
28963 return (TYPE_UNSIGNED (dest_type)
28964 ? NULL_TREE
28965 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ256]);
28966 default:
28967 return NULL_TREE;
28968 }
28969 break;
28970
28971 default:
28972 return NULL_TREE;
28973 }
28974
28975 default:
28976 return NULL_TREE;
28977 }
28978
28979 return NULL_TREE;
28980 }
28981
28982 /* Returns a code for a target-specific builtin that implements
28983 reciprocal of the function, or NULL_TREE if not available. */
28984
28985 static tree
28986 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
28987 bool sqrt ATTRIBUTE_UNUSED)
28988 {
28989 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
28990 && flag_finite_math_only && !flag_trapping_math
28991 && flag_unsafe_math_optimizations))
28992 return NULL_TREE;
28993
28994 if (md_fn)
28995 /* Machine dependent builtins. */
28996 switch (fn)
28997 {
28998 /* Vectorized version of sqrt to rsqrt conversion. */
28999 case IX86_BUILTIN_SQRTPS_NR:
29000 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
29001
29002 case IX86_BUILTIN_SQRTPS_NR256:
29003 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR256];
29004
29005 default:
29006 return NULL_TREE;
29007 }
29008 else
29009 /* Normal builtins. */
29010 switch (fn)
29011 {
29012 /* Sqrt to rsqrt conversion. */
29013 case BUILT_IN_SQRTF:
29014 return ix86_builtins[IX86_BUILTIN_RSQRTF];
29015
29016 default:
29017 return NULL_TREE;
29018 }
29019 }
29020 \f
29021 /* Helper for avx_vpermilps256_operand et al. This is also used by
29022 the expansion functions to turn the parallel back into a mask.
29023 The return value is 0 for no match and the imm8+1 for a match. */
29024
29025 int
29026 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
29027 {
29028 unsigned i, nelt = GET_MODE_NUNITS (mode);
29029 unsigned mask = 0;
29030 unsigned char ipar[8];
29031
29032 if (XVECLEN (par, 0) != (int) nelt)
29033 return 0;
29034
29035 /* Validate that all of the elements are constants, and not totally
29036 out of range. Copy the data into an integral array to make the
29037 subsequent checks easier. */
29038 for (i = 0; i < nelt; ++i)
29039 {
29040 rtx er = XVECEXP (par, 0, i);
29041 unsigned HOST_WIDE_INT ei;
29042
29043 if (!CONST_INT_P (er))
29044 return 0;
29045 ei = INTVAL (er);
29046 if (ei >= nelt)
29047 return 0;
29048 ipar[i] = ei;
29049 }
29050
29051 switch (mode)
29052 {
29053 case V4DFmode:
29054 /* In the 256-bit DFmode case, we can only move elements within
29055 a 128-bit lane. */
29056 for (i = 0; i < 2; ++i)
29057 {
29058 if (ipar[i] >= 2)
29059 return 0;
29060 mask |= ipar[i] << i;
29061 }
29062 for (i = 2; i < 4; ++i)
29063 {
29064 if (ipar[i] < 2)
29065 return 0;
29066 mask |= (ipar[i] - 2) << i;
29067 }
29068 break;
29069
29070 case V8SFmode:
29071 /* In the 256-bit SFmode case, we have full freedom of movement
29072 within the low 128-bit lane, but the high 128-bit lane must
29073 mirror the exact same pattern. */
29074 for (i = 0; i < 4; ++i)
29075 if (ipar[i] + 4 != ipar[i + 4])
29076 return 0;
29077 nelt = 4;
29078 /* FALLTHRU */
29079
29080 case V2DFmode:
29081 case V4SFmode:
29082 /* In the 128-bit case, we've full freedom in the placement of
29083 the elements from the source operand. */
29084 for (i = 0; i < nelt; ++i)
29085 mask |= ipar[i] << (i * (nelt / 2));
29086 break;
29087
29088 default:
29089 gcc_unreachable ();
29090 }
29091
29092 /* Make sure success has a non-zero value by adding one. */
29093 return mask + 1;
29094 }
29095
29096 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
29097 the expansion functions to turn the parallel back into a mask.
29098 The return value is 0 for no match and the imm8+1 for a match. */
29099
29100 int
29101 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
29102 {
29103 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
29104 unsigned mask = 0;
29105 unsigned char ipar[8];
29106
29107 if (XVECLEN (par, 0) != (int) nelt)
29108 return 0;
29109
29110 /* Validate that all of the elements are constants, and not totally
29111 out of range. Copy the data into an integral array to make the
29112 subsequent checks easier. */
29113 for (i = 0; i < nelt; ++i)
29114 {
29115 rtx er = XVECEXP (par, 0, i);
29116 unsigned HOST_WIDE_INT ei;
29117
29118 if (!CONST_INT_P (er))
29119 return 0;
29120 ei = INTVAL (er);
29121 if (ei >= 2 * nelt)
29122 return 0;
29123 ipar[i] = ei;
29124 }
29125
29126 /* Validate that the halves of the permute are halves. */
29127 for (i = 0; i < nelt2 - 1; ++i)
29128 if (ipar[i] + 1 != ipar[i + 1])
29129 return 0;
29130 for (i = nelt2; i < nelt - 1; ++i)
29131 if (ipar[i] + 1 != ipar[i + 1])
29132 return 0;
29133
29134 /* Reconstruct the mask. */
29135 for (i = 0; i < 2; ++i)
29136 {
29137 unsigned e = ipar[i * nelt2];
29138 if (e % nelt2)
29139 return 0;
29140 e /= nelt2;
29141 mask |= e << (i * 4);
29142 }
29143
29144 /* Make sure success has a non-zero value by adding one. */
29145 return mask + 1;
29146 }
29147 \f
29148
29149 /* Store OPERAND to the memory after reload is completed. This means
29150 that we can't easily use assign_stack_local. */
29151 rtx
29152 ix86_force_to_memory (enum machine_mode mode, rtx operand)
29153 {
29154 rtx result;
29155
29156 gcc_assert (reload_completed);
29157 if (ix86_using_red_zone ())
29158 {
29159 result = gen_rtx_MEM (mode,
29160 gen_rtx_PLUS (Pmode,
29161 stack_pointer_rtx,
29162 GEN_INT (-RED_ZONE_SIZE)));
29163 emit_move_insn (result, operand);
29164 }
29165 else if (TARGET_64BIT)
29166 {
29167 switch (mode)
29168 {
29169 case HImode:
29170 case SImode:
29171 operand = gen_lowpart (DImode, operand);
29172 /* FALLTHRU */
29173 case DImode:
29174 emit_insn (
29175 gen_rtx_SET (VOIDmode,
29176 gen_rtx_MEM (DImode,
29177 gen_rtx_PRE_DEC (DImode,
29178 stack_pointer_rtx)),
29179 operand));
29180 break;
29181 default:
29182 gcc_unreachable ();
29183 }
29184 result = gen_rtx_MEM (mode, stack_pointer_rtx);
29185 }
29186 else
29187 {
29188 switch (mode)
29189 {
29190 case DImode:
29191 {
29192 rtx operands[2];
29193 split_double_mode (mode, &operand, 1, operands, operands + 1);
29194 emit_insn (
29195 gen_rtx_SET (VOIDmode,
29196 gen_rtx_MEM (SImode,
29197 gen_rtx_PRE_DEC (Pmode,
29198 stack_pointer_rtx)),
29199 operands[1]));
29200 emit_insn (
29201 gen_rtx_SET (VOIDmode,
29202 gen_rtx_MEM (SImode,
29203 gen_rtx_PRE_DEC (Pmode,
29204 stack_pointer_rtx)),
29205 operands[0]));
29206 }
29207 break;
29208 case HImode:
29209 /* Store HImodes as SImodes. */
29210 operand = gen_lowpart (SImode, operand);
29211 /* FALLTHRU */
29212 case SImode:
29213 emit_insn (
29214 gen_rtx_SET (VOIDmode,
29215 gen_rtx_MEM (GET_MODE (operand),
29216 gen_rtx_PRE_DEC (SImode,
29217 stack_pointer_rtx)),
29218 operand));
29219 break;
29220 default:
29221 gcc_unreachable ();
29222 }
29223 result = gen_rtx_MEM (mode, stack_pointer_rtx);
29224 }
29225 return result;
29226 }
29227
29228 /* Free operand from the memory. */
29229 void
29230 ix86_free_from_memory (enum machine_mode mode)
29231 {
29232 if (!ix86_using_red_zone ())
29233 {
29234 int size;
29235
29236 if (mode == DImode || TARGET_64BIT)
29237 size = 8;
29238 else
29239 size = 4;
29240 /* Use LEA to deallocate stack space. In peephole2 it will be converted
29241 to pop or add instruction if registers are available. */
29242 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
29243 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
29244 GEN_INT (size))));
29245 }
29246 }
29247
29248 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
29249
29250 Put float CONST_DOUBLE in the constant pool instead of fp regs.
29251 QImode must go into class Q_REGS.
29252 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
29253 movdf to do mem-to-mem moves through integer regs. */
29254
29255 static reg_class_t
29256 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
29257 {
29258 enum machine_mode mode = GET_MODE (x);
29259
29260 /* We're only allowed to return a subclass of CLASS. Many of the
29261 following checks fail for NO_REGS, so eliminate that early. */
29262 if (regclass == NO_REGS)
29263 return NO_REGS;
29264
29265 /* All classes can load zeros. */
29266 if (x == CONST0_RTX (mode))
29267 return regclass;
29268
29269 /* Force constants into memory if we are loading a (nonzero) constant into
29270 an MMX or SSE register. This is because there are no MMX/SSE instructions
29271 to load from a constant. */
29272 if (CONSTANT_P (x)
29273 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
29274 return NO_REGS;
29275
29276 /* Prefer SSE regs only, if we can use them for math. */
29277 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
29278 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
29279
29280 /* Floating-point constants need more complex checks. */
29281 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
29282 {
29283 /* General regs can load everything. */
29284 if (reg_class_subset_p (regclass, GENERAL_REGS))
29285 return regclass;
29286
29287 /* Floats can load 0 and 1 plus some others. Note that we eliminated
29288 zero above. We only want to wind up preferring 80387 registers if
29289 we plan on doing computation with them. */
29290 if (TARGET_80387
29291 && standard_80387_constant_p (x) > 0)
29292 {
29293 /* Limit class to non-sse. */
29294 if (regclass == FLOAT_SSE_REGS)
29295 return FLOAT_REGS;
29296 if (regclass == FP_TOP_SSE_REGS)
29297 return FP_TOP_REG;
29298 if (regclass == FP_SECOND_SSE_REGS)
29299 return FP_SECOND_REG;
29300 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
29301 return regclass;
29302 }
29303
29304 return NO_REGS;
29305 }
29306
29307 /* Generally when we see PLUS here, it's the function invariant
29308 (plus soft-fp const_int). Which can only be computed into general
29309 regs. */
29310 if (GET_CODE (x) == PLUS)
29311 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
29312
29313 /* QImode constants are easy to load, but non-constant QImode data
29314 must go into Q_REGS. */
29315 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
29316 {
29317 if (reg_class_subset_p (regclass, Q_REGS))
29318 return regclass;
29319 if (reg_class_subset_p (Q_REGS, regclass))
29320 return Q_REGS;
29321 return NO_REGS;
29322 }
29323
29324 return regclass;
29325 }
29326
29327 /* Discourage putting floating-point values in SSE registers unless
29328 SSE math is being used, and likewise for the 387 registers. */
29329 static reg_class_t
29330 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
29331 {
29332 enum machine_mode mode = GET_MODE (x);
29333
29334 /* Restrict the output reload class to the register bank that we are doing
29335 math on. If we would like not to return a subset of CLASS, reject this
29336 alternative: if reload cannot do this, it will still use its choice. */
29337 mode = GET_MODE (x);
29338 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
29339 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
29340
29341 if (X87_FLOAT_MODE_P (mode))
29342 {
29343 if (regclass == FP_TOP_SSE_REGS)
29344 return FP_TOP_REG;
29345 else if (regclass == FP_SECOND_SSE_REGS)
29346 return FP_SECOND_REG;
29347 else
29348 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
29349 }
29350
29351 return regclass;
29352 }
29353
29354 static reg_class_t
29355 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
29356 enum machine_mode mode, secondary_reload_info *sri)
29357 {
29358 /* Double-word spills from general registers to non-offsettable memory
29359 references (zero-extended addresses) require special handling. */
29360 if (TARGET_64BIT
29361 && MEM_P (x)
29362 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
29363 && rclass == GENERAL_REGS
29364 && !offsettable_memref_p (x))
29365 {
29366 sri->icode = (in_p
29367 ? CODE_FOR_reload_noff_load
29368 : CODE_FOR_reload_noff_store);
29369 /* Add the cost of moving address to a temporary. */
29370 sri->extra_cost = 1;
29371
29372 return NO_REGS;
29373 }
29374
29375 /* QImode spills from non-QI registers require
29376 intermediate register on 32bit targets. */
29377 if (!TARGET_64BIT
29378 && !in_p && mode == QImode
29379 && (rclass == GENERAL_REGS
29380 || rclass == LEGACY_REGS
29381 || rclass == INDEX_REGS))
29382 {
29383 int regno;
29384
29385 if (REG_P (x))
29386 regno = REGNO (x);
29387 else
29388 regno = -1;
29389
29390 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
29391 regno = true_regnum (x);
29392
29393 /* Return Q_REGS if the operand is in memory. */
29394 if (regno == -1)
29395 return Q_REGS;
29396 }
29397
29398 /* This condition handles corner case where an expression involving
29399 pointers gets vectorized. We're trying to use the address of a
29400 stack slot as a vector initializer.
29401
29402 (set (reg:V2DI 74 [ vect_cst_.2 ])
29403 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
29404
29405 Eventually frame gets turned into sp+offset like this:
29406
29407 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
29408 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
29409 (const_int 392 [0x188]))))
29410
29411 That later gets turned into:
29412
29413 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
29414 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
29415 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
29416
29417 We'll have the following reload recorded:
29418
29419 Reload 0: reload_in (DI) =
29420 (plus:DI (reg/f:DI 7 sp)
29421 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
29422 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
29423 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
29424 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
29425 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
29426 reload_reg_rtx: (reg:V2DI 22 xmm1)
29427
29428 Which isn't going to work since SSE instructions can't handle scalar
29429 additions. Returning GENERAL_REGS forces the addition into integer
29430 register and reload can handle subsequent reloads without problems. */
29431
29432 if (in_p && GET_CODE (x) == PLUS
29433 && SSE_CLASS_P (rclass)
29434 && SCALAR_INT_MODE_P (mode))
29435 return GENERAL_REGS;
29436
29437 return NO_REGS;
29438 }
29439
29440 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
29441
29442 static bool
29443 ix86_class_likely_spilled_p (reg_class_t rclass)
29444 {
29445 switch (rclass)
29446 {
29447 case AREG:
29448 case DREG:
29449 case CREG:
29450 case BREG:
29451 case AD_REGS:
29452 case SIREG:
29453 case DIREG:
29454 case SSE_FIRST_REG:
29455 case FP_TOP_REG:
29456 case FP_SECOND_REG:
29457 return true;
29458
29459 default:
29460 break;
29461 }
29462
29463 return false;
29464 }
29465
29466 /* If we are copying between general and FP registers, we need a memory
29467 location. The same is true for SSE and MMX registers.
29468
29469 To optimize register_move_cost performance, allow inline variant.
29470
29471 The macro can't work reliably when one of the CLASSES is class containing
29472 registers from multiple units (SSE, MMX, integer). We avoid this by never
29473 combining those units in single alternative in the machine description.
29474 Ensure that this constraint holds to avoid unexpected surprises.
29475
29476 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
29477 enforce these sanity checks. */
29478
29479 static inline bool
29480 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
29481 enum machine_mode mode, int strict)
29482 {
29483 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
29484 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
29485 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
29486 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
29487 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
29488 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
29489 {
29490 gcc_assert (!strict);
29491 return true;
29492 }
29493
29494 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
29495 return true;
29496
29497 /* ??? This is a lie. We do have moves between mmx/general, and for
29498 mmx/sse2. But by saying we need secondary memory we discourage the
29499 register allocator from using the mmx registers unless needed. */
29500 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
29501 return true;
29502
29503 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
29504 {
29505 /* SSE1 doesn't have any direct moves from other classes. */
29506 if (!TARGET_SSE2)
29507 return true;
29508
29509 /* If the target says that inter-unit moves are more expensive
29510 than moving through memory, then don't generate them. */
29511 if (!TARGET_INTER_UNIT_MOVES)
29512 return true;
29513
29514 /* Between SSE and general, we have moves no larger than word size. */
29515 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
29516 return true;
29517 }
29518
29519 return false;
29520 }
29521
29522 bool
29523 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
29524 enum machine_mode mode, int strict)
29525 {
29526 return inline_secondary_memory_needed (class1, class2, mode, strict);
29527 }
29528
29529 /* Implement the TARGET_CLASS_MAX_NREGS hook.
29530
29531 On the 80386, this is the size of MODE in words,
29532 except in the FP regs, where a single reg is always enough. */
29533
29534 static unsigned char
29535 ix86_class_max_nregs (reg_class_t rclass, enum machine_mode mode)
29536 {
29537 if (MAYBE_INTEGER_CLASS_P (rclass))
29538 {
29539 if (mode == XFmode)
29540 return (TARGET_64BIT ? 2 : 3);
29541 else if (mode == XCmode)
29542 return (TARGET_64BIT ? 4 : 6);
29543 else
29544 return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
29545 }
29546 else
29547 {
29548 if (COMPLEX_MODE_P (mode))
29549 return 2;
29550 else
29551 return 1;
29552 }
29553 }
29554
29555 /* Return true if the registers in CLASS cannot represent the change from
29556 modes FROM to TO. */
29557
29558 bool
29559 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
29560 enum reg_class regclass)
29561 {
29562 if (from == to)
29563 return false;
29564
29565 /* x87 registers can't do subreg at all, as all values are reformatted
29566 to extended precision. */
29567 if (MAYBE_FLOAT_CLASS_P (regclass))
29568 return true;
29569
29570 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
29571 {
29572 /* Vector registers do not support QI or HImode loads. If we don't
29573 disallow a change to these modes, reload will assume it's ok to
29574 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
29575 the vec_dupv4hi pattern. */
29576 if (GET_MODE_SIZE (from) < 4)
29577 return true;
29578
29579 /* Vector registers do not support subreg with nonzero offsets, which
29580 are otherwise valid for integer registers. Since we can't see
29581 whether we have a nonzero offset from here, prohibit all
29582 nonparadoxical subregs changing size. */
29583 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
29584 return true;
29585 }
29586
29587 return false;
29588 }
29589
29590 /* Return the cost of moving data of mode M between a
29591 register and memory. A value of 2 is the default; this cost is
29592 relative to those in `REGISTER_MOVE_COST'.
29593
29594 This function is used extensively by register_move_cost that is used to
29595 build tables at startup. Make it inline in this case.
29596 When IN is 2, return maximum of in and out move cost.
29597
29598 If moving between registers and memory is more expensive than
29599 between two registers, you should define this macro to express the
29600 relative cost.
29601
29602 Model also increased moving costs of QImode registers in non
29603 Q_REGS classes.
29604 */
29605 static inline int
29606 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
29607 int in)
29608 {
29609 int cost;
29610 if (FLOAT_CLASS_P (regclass))
29611 {
29612 int index;
29613 switch (mode)
29614 {
29615 case SFmode:
29616 index = 0;
29617 break;
29618 case DFmode:
29619 index = 1;
29620 break;
29621 case XFmode:
29622 index = 2;
29623 break;
29624 default:
29625 return 100;
29626 }
29627 if (in == 2)
29628 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
29629 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
29630 }
29631 if (SSE_CLASS_P (regclass))
29632 {
29633 int index;
29634 switch (GET_MODE_SIZE (mode))
29635 {
29636 case 4:
29637 index = 0;
29638 break;
29639 case 8:
29640 index = 1;
29641 break;
29642 case 16:
29643 index = 2;
29644 break;
29645 default:
29646 return 100;
29647 }
29648 if (in == 2)
29649 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
29650 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
29651 }
29652 if (MMX_CLASS_P (regclass))
29653 {
29654 int index;
29655 switch (GET_MODE_SIZE (mode))
29656 {
29657 case 4:
29658 index = 0;
29659 break;
29660 case 8:
29661 index = 1;
29662 break;
29663 default:
29664 return 100;
29665 }
29666 if (in)
29667 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
29668 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
29669 }
29670 switch (GET_MODE_SIZE (mode))
29671 {
29672 case 1:
29673 if (Q_CLASS_P (regclass) || TARGET_64BIT)
29674 {
29675 if (!in)
29676 return ix86_cost->int_store[0];
29677 if (TARGET_PARTIAL_REG_DEPENDENCY
29678 && optimize_function_for_speed_p (cfun))
29679 cost = ix86_cost->movzbl_load;
29680 else
29681 cost = ix86_cost->int_load[0];
29682 if (in == 2)
29683 return MAX (cost, ix86_cost->int_store[0]);
29684 return cost;
29685 }
29686 else
29687 {
29688 if (in == 2)
29689 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
29690 if (in)
29691 return ix86_cost->movzbl_load;
29692 else
29693 return ix86_cost->int_store[0] + 4;
29694 }
29695 break;
29696 case 2:
29697 if (in == 2)
29698 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
29699 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
29700 default:
29701 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
29702 if (mode == TFmode)
29703 mode = XFmode;
29704 if (in == 2)
29705 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
29706 else if (in)
29707 cost = ix86_cost->int_load[2];
29708 else
29709 cost = ix86_cost->int_store[2];
29710 return (cost * (((int) GET_MODE_SIZE (mode)
29711 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
29712 }
29713 }
29714
29715 static int
29716 ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
29717 bool in)
29718 {
29719 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
29720 }
29721
29722
29723 /* Return the cost of moving data from a register in class CLASS1 to
29724 one in class CLASS2.
29725
29726 It is not required that the cost always equal 2 when FROM is the same as TO;
29727 on some machines it is expensive to move between registers if they are not
29728 general registers. */
29729
29730 static int
29731 ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
29732 reg_class_t class2_i)
29733 {
29734 enum reg_class class1 = (enum reg_class) class1_i;
29735 enum reg_class class2 = (enum reg_class) class2_i;
29736
29737 /* In case we require secondary memory, compute cost of the store followed
29738 by load. In order to avoid bad register allocation choices, we need
29739 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
29740
29741 if (inline_secondary_memory_needed (class1, class2, mode, 0))
29742 {
29743 int cost = 1;
29744
29745 cost += inline_memory_move_cost (mode, class1, 2);
29746 cost += inline_memory_move_cost (mode, class2, 2);
29747
29748 /* In case of copying from general_purpose_register we may emit multiple
29749 stores followed by single load causing memory size mismatch stall.
29750 Count this as arbitrarily high cost of 20. */
29751 if (targetm.class_max_nregs (class1, mode)
29752 > targetm.class_max_nregs (class2, mode))
29753 cost += 20;
29754
29755 /* In the case of FP/MMX moves, the registers actually overlap, and we
29756 have to switch modes in order to treat them differently. */
29757 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
29758 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
29759 cost += 20;
29760
29761 return cost;
29762 }
29763
29764 /* Moves between SSE/MMX and integer unit are expensive. */
29765 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
29766 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
29767
29768 /* ??? By keeping returned value relatively high, we limit the number
29769 of moves between integer and MMX/SSE registers for all targets.
29770 Additionally, high value prevents problem with x86_modes_tieable_p(),
29771 where integer modes in MMX/SSE registers are not tieable
29772 because of missing QImode and HImode moves to, from or between
29773 MMX/SSE registers. */
29774 return MAX (8, ix86_cost->mmxsse_to_integer);
29775
29776 if (MAYBE_FLOAT_CLASS_P (class1))
29777 return ix86_cost->fp_move;
29778 if (MAYBE_SSE_CLASS_P (class1))
29779 return ix86_cost->sse_move;
29780 if (MAYBE_MMX_CLASS_P (class1))
29781 return ix86_cost->mmx_move;
29782 return 2;
29783 }
29784
29785 /* Return TRUE if hard register REGNO can hold a value of machine-mode
29786 MODE. */
29787
29788 bool
29789 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
29790 {
29791 /* Flags and only flags can only hold CCmode values. */
29792 if (CC_REGNO_P (regno))
29793 return GET_MODE_CLASS (mode) == MODE_CC;
29794 if (GET_MODE_CLASS (mode) == MODE_CC
29795 || GET_MODE_CLASS (mode) == MODE_RANDOM
29796 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
29797 return false;
29798 if (FP_REGNO_P (regno))
29799 return VALID_FP_MODE_P (mode);
29800 if (SSE_REGNO_P (regno))
29801 {
29802 /* We implement the move patterns for all vector modes into and
29803 out of SSE registers, even when no operation instructions
29804 are available. OImode move is available only when AVX is
29805 enabled. */
29806 return ((TARGET_AVX && mode == OImode)
29807 || VALID_AVX256_REG_MODE (mode)
29808 || VALID_SSE_REG_MODE (mode)
29809 || VALID_SSE2_REG_MODE (mode)
29810 || VALID_MMX_REG_MODE (mode)
29811 || VALID_MMX_REG_MODE_3DNOW (mode));
29812 }
29813 if (MMX_REGNO_P (regno))
29814 {
29815 /* We implement the move patterns for 3DNOW modes even in MMX mode,
29816 so if the register is available at all, then we can move data of
29817 the given mode into or out of it. */
29818 return (VALID_MMX_REG_MODE (mode)
29819 || VALID_MMX_REG_MODE_3DNOW (mode));
29820 }
29821
29822 if (mode == QImode)
29823 {
29824 /* Take care for QImode values - they can be in non-QI regs,
29825 but then they do cause partial register stalls. */
29826 if (regno <= BX_REG || TARGET_64BIT)
29827 return true;
29828 if (!TARGET_PARTIAL_REG_STALL)
29829 return true;
29830 return !can_create_pseudo_p ();
29831 }
29832 /* We handle both integer and floats in the general purpose registers. */
29833 else if (VALID_INT_MODE_P (mode))
29834 return true;
29835 else if (VALID_FP_MODE_P (mode))
29836 return true;
29837 else if (VALID_DFP_MODE_P (mode))
29838 return true;
29839 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
29840 on to use that value in smaller contexts, this can easily force a
29841 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
29842 supporting DImode, allow it. */
29843 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
29844 return true;
29845
29846 return false;
29847 }
29848
29849 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
29850 tieable integer mode. */
29851
29852 static bool
29853 ix86_tieable_integer_mode_p (enum machine_mode mode)
29854 {
29855 switch (mode)
29856 {
29857 case HImode:
29858 case SImode:
29859 return true;
29860
29861 case QImode:
29862 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
29863
29864 case DImode:
29865 return TARGET_64BIT;
29866
29867 default:
29868 return false;
29869 }
29870 }
29871
29872 /* Return true if MODE1 is accessible in a register that can hold MODE2
29873 without copying. That is, all register classes that can hold MODE2
29874 can also hold MODE1. */
29875
29876 bool
29877 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
29878 {
29879 if (mode1 == mode2)
29880 return true;
29881
29882 if (ix86_tieable_integer_mode_p (mode1)
29883 && ix86_tieable_integer_mode_p (mode2))
29884 return true;
29885
29886 /* MODE2 being XFmode implies fp stack or general regs, which means we
29887 can tie any smaller floating point modes to it. Note that we do not
29888 tie this with TFmode. */
29889 if (mode2 == XFmode)
29890 return mode1 == SFmode || mode1 == DFmode;
29891
29892 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
29893 that we can tie it with SFmode. */
29894 if (mode2 == DFmode)
29895 return mode1 == SFmode;
29896
29897 /* If MODE2 is only appropriate for an SSE register, then tie with
29898 any other mode acceptable to SSE registers. */
29899 if (GET_MODE_SIZE (mode2) == 16
29900 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
29901 return (GET_MODE_SIZE (mode1) == 16
29902 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
29903
29904 /* If MODE2 is appropriate for an MMX register, then tie
29905 with any other mode acceptable to MMX registers. */
29906 if (GET_MODE_SIZE (mode2) == 8
29907 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
29908 return (GET_MODE_SIZE (mode1) == 8
29909 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
29910
29911 return false;
29912 }
29913
29914 /* Compute a (partial) cost for rtx X. Return true if the complete
29915 cost has been computed, and false if subexpressions should be
29916 scanned. In either case, *TOTAL contains the cost result. */
29917
29918 static bool
29919 ix86_rtx_costs (rtx x, int code, int outer_code_i, int opno, int *total,
29920 bool speed)
29921 {
29922 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
29923 enum machine_mode mode = GET_MODE (x);
29924 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
29925
29926 switch (code)
29927 {
29928 case CONST_INT:
29929 case CONST:
29930 case LABEL_REF:
29931 case SYMBOL_REF:
29932 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
29933 *total = 3;
29934 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
29935 *total = 2;
29936 else if (flag_pic && SYMBOLIC_CONST (x)
29937 && (!TARGET_64BIT
29938 || (!GET_CODE (x) != LABEL_REF
29939 && (GET_CODE (x) != SYMBOL_REF
29940 || !SYMBOL_REF_LOCAL_P (x)))))
29941 *total = 1;
29942 else
29943 *total = 0;
29944 return true;
29945
29946 case CONST_DOUBLE:
29947 if (mode == VOIDmode)
29948 *total = 0;
29949 else
29950 switch (standard_80387_constant_p (x))
29951 {
29952 case 1: /* 0.0 */
29953 *total = 1;
29954 break;
29955 default: /* Other constants */
29956 *total = 2;
29957 break;
29958 case 0:
29959 case -1:
29960 /* Start with (MEM (SYMBOL_REF)), since that's where
29961 it'll probably end up. Add a penalty for size. */
29962 *total = (COSTS_N_INSNS (1)
29963 + (flag_pic != 0 && !TARGET_64BIT)
29964 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
29965 break;
29966 }
29967 return true;
29968
29969 case ZERO_EXTEND:
29970 /* The zero extensions is often completely free on x86_64, so make
29971 it as cheap as possible. */
29972 if (TARGET_64BIT && mode == DImode
29973 && GET_MODE (XEXP (x, 0)) == SImode)
29974 *total = 1;
29975 else if (TARGET_ZERO_EXTEND_WITH_AND)
29976 *total = cost->add;
29977 else
29978 *total = cost->movzx;
29979 return false;
29980
29981 case SIGN_EXTEND:
29982 *total = cost->movsx;
29983 return false;
29984
29985 case ASHIFT:
29986 if (CONST_INT_P (XEXP (x, 1))
29987 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
29988 {
29989 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
29990 if (value == 1)
29991 {
29992 *total = cost->add;
29993 return false;
29994 }
29995 if ((value == 2 || value == 3)
29996 && cost->lea <= cost->shift_const)
29997 {
29998 *total = cost->lea;
29999 return false;
30000 }
30001 }
30002 /* FALLTHRU */
30003
30004 case ROTATE:
30005 case ASHIFTRT:
30006 case LSHIFTRT:
30007 case ROTATERT:
30008 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
30009 {
30010 if (CONST_INT_P (XEXP (x, 1)))
30011 {
30012 if (INTVAL (XEXP (x, 1)) > 32)
30013 *total = cost->shift_const + COSTS_N_INSNS (2);
30014 else
30015 *total = cost->shift_const * 2;
30016 }
30017 else
30018 {
30019 if (GET_CODE (XEXP (x, 1)) == AND)
30020 *total = cost->shift_var * 2;
30021 else
30022 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
30023 }
30024 }
30025 else
30026 {
30027 if (CONST_INT_P (XEXP (x, 1)))
30028 *total = cost->shift_const;
30029 else
30030 *total = cost->shift_var;
30031 }
30032 return false;
30033
30034 case FMA:
30035 {
30036 rtx sub;
30037
30038 gcc_assert (FLOAT_MODE_P (mode));
30039 gcc_assert (TARGET_FMA || TARGET_FMA4);
30040
30041 /* ??? SSE scalar/vector cost should be used here. */
30042 /* ??? Bald assumption that fma has the same cost as fmul. */
30043 *total = cost->fmul;
30044 *total += rtx_cost (XEXP (x, 1), FMA, 1, speed);
30045
30046 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
30047 sub = XEXP (x, 0);
30048 if (GET_CODE (sub) == NEG)
30049 sub = XEXP (sub, 0);
30050 *total += rtx_cost (sub, FMA, 0, speed);
30051
30052 sub = XEXP (x, 2);
30053 if (GET_CODE (sub) == NEG)
30054 sub = XEXP (sub, 0);
30055 *total += rtx_cost (sub, FMA, 2, speed);
30056 return true;
30057 }
30058
30059 case MULT:
30060 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
30061 {
30062 /* ??? SSE scalar cost should be used here. */
30063 *total = cost->fmul;
30064 return false;
30065 }
30066 else if (X87_FLOAT_MODE_P (mode))
30067 {
30068 *total = cost->fmul;
30069 return false;
30070 }
30071 else if (FLOAT_MODE_P (mode))
30072 {
30073 /* ??? SSE vector cost should be used here. */
30074 *total = cost->fmul;
30075 return false;
30076 }
30077 else
30078 {
30079 rtx op0 = XEXP (x, 0);
30080 rtx op1 = XEXP (x, 1);
30081 int nbits;
30082 if (CONST_INT_P (XEXP (x, 1)))
30083 {
30084 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
30085 for (nbits = 0; value != 0; value &= value - 1)
30086 nbits++;
30087 }
30088 else
30089 /* This is arbitrary. */
30090 nbits = 7;
30091
30092 /* Compute costs correctly for widening multiplication. */
30093 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
30094 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
30095 == GET_MODE_SIZE (mode))
30096 {
30097 int is_mulwiden = 0;
30098 enum machine_mode inner_mode = GET_MODE (op0);
30099
30100 if (GET_CODE (op0) == GET_CODE (op1))
30101 is_mulwiden = 1, op1 = XEXP (op1, 0);
30102 else if (CONST_INT_P (op1))
30103 {
30104 if (GET_CODE (op0) == SIGN_EXTEND)
30105 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
30106 == INTVAL (op1);
30107 else
30108 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
30109 }
30110
30111 if (is_mulwiden)
30112 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
30113 }
30114
30115 *total = (cost->mult_init[MODE_INDEX (mode)]
30116 + nbits * cost->mult_bit
30117 + rtx_cost (op0, outer_code, opno, speed)
30118 + rtx_cost (op1, outer_code, opno, speed));
30119
30120 return true;
30121 }
30122
30123 case DIV:
30124 case UDIV:
30125 case MOD:
30126 case UMOD:
30127 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
30128 /* ??? SSE cost should be used here. */
30129 *total = cost->fdiv;
30130 else if (X87_FLOAT_MODE_P (mode))
30131 *total = cost->fdiv;
30132 else if (FLOAT_MODE_P (mode))
30133 /* ??? SSE vector cost should be used here. */
30134 *total = cost->fdiv;
30135 else
30136 *total = cost->divide[MODE_INDEX (mode)];
30137 return false;
30138
30139 case PLUS:
30140 if (GET_MODE_CLASS (mode) == MODE_INT
30141 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
30142 {
30143 if (GET_CODE (XEXP (x, 0)) == PLUS
30144 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
30145 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
30146 && CONSTANT_P (XEXP (x, 1)))
30147 {
30148 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
30149 if (val == 2 || val == 4 || val == 8)
30150 {
30151 *total = cost->lea;
30152 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
30153 outer_code, opno, speed);
30154 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
30155 outer_code, opno, speed);
30156 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
30157 return true;
30158 }
30159 }
30160 else if (GET_CODE (XEXP (x, 0)) == MULT
30161 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
30162 {
30163 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
30164 if (val == 2 || val == 4 || val == 8)
30165 {
30166 *total = cost->lea;
30167 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
30168 outer_code, opno, speed);
30169 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
30170 return true;
30171 }
30172 }
30173 else if (GET_CODE (XEXP (x, 0)) == PLUS)
30174 {
30175 *total = cost->lea;
30176 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
30177 outer_code, opno, speed);
30178 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
30179 outer_code, opno, speed);
30180 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
30181 return true;
30182 }
30183 }
30184 /* FALLTHRU */
30185
30186 case MINUS:
30187 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
30188 {
30189 /* ??? SSE cost should be used here. */
30190 *total = cost->fadd;
30191 return false;
30192 }
30193 else if (X87_FLOAT_MODE_P (mode))
30194 {
30195 *total = cost->fadd;
30196 return false;
30197 }
30198 else if (FLOAT_MODE_P (mode))
30199 {
30200 /* ??? SSE vector cost should be used here. */
30201 *total = cost->fadd;
30202 return false;
30203 }
30204 /* FALLTHRU */
30205
30206 case AND:
30207 case IOR:
30208 case XOR:
30209 if (!TARGET_64BIT && mode == DImode)
30210 {
30211 *total = (cost->add * 2
30212 + (rtx_cost (XEXP (x, 0), outer_code, opno, speed)
30213 << (GET_MODE (XEXP (x, 0)) != DImode))
30214 + (rtx_cost (XEXP (x, 1), outer_code, opno, speed)
30215 << (GET_MODE (XEXP (x, 1)) != DImode)));
30216 return true;
30217 }
30218 /* FALLTHRU */
30219
30220 case NEG:
30221 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
30222 {
30223 /* ??? SSE cost should be used here. */
30224 *total = cost->fchs;
30225 return false;
30226 }
30227 else if (X87_FLOAT_MODE_P (mode))
30228 {
30229 *total = cost->fchs;
30230 return false;
30231 }
30232 else if (FLOAT_MODE_P (mode))
30233 {
30234 /* ??? SSE vector cost should be used here. */
30235 *total = cost->fchs;
30236 return false;
30237 }
30238 /* FALLTHRU */
30239
30240 case NOT:
30241 if (!TARGET_64BIT && mode == DImode)
30242 *total = cost->add * 2;
30243 else
30244 *total = cost->add;
30245 return false;
30246
30247 case COMPARE:
30248 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
30249 && XEXP (XEXP (x, 0), 1) == const1_rtx
30250 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
30251 && XEXP (x, 1) == const0_rtx)
30252 {
30253 /* This kind of construct is implemented using test[bwl].
30254 Treat it as if we had an AND. */
30255 *total = (cost->add
30256 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, opno, speed)
30257 + rtx_cost (const1_rtx, outer_code, opno, speed));
30258 return true;
30259 }
30260 return false;
30261
30262 case FLOAT_EXTEND:
30263 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
30264 *total = 0;
30265 return false;
30266
30267 case ABS:
30268 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
30269 /* ??? SSE cost should be used here. */
30270 *total = cost->fabs;
30271 else if (X87_FLOAT_MODE_P (mode))
30272 *total = cost->fabs;
30273 else if (FLOAT_MODE_P (mode))
30274 /* ??? SSE vector cost should be used here. */
30275 *total = cost->fabs;
30276 return false;
30277
30278 case SQRT:
30279 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
30280 /* ??? SSE cost should be used here. */
30281 *total = cost->fsqrt;
30282 else if (X87_FLOAT_MODE_P (mode))
30283 *total = cost->fsqrt;
30284 else if (FLOAT_MODE_P (mode))
30285 /* ??? SSE vector cost should be used here. */
30286 *total = cost->fsqrt;
30287 return false;
30288
30289 case UNSPEC:
30290 if (XINT (x, 1) == UNSPEC_TP)
30291 *total = 0;
30292 return false;
30293
30294 case VEC_SELECT:
30295 case VEC_CONCAT:
30296 case VEC_MERGE:
30297 case VEC_DUPLICATE:
30298 /* ??? Assume all of these vector manipulation patterns are
30299 recognizable. In which case they all pretty much have the
30300 same cost. */
30301 *total = COSTS_N_INSNS (1);
30302 return true;
30303
30304 default:
30305 return false;
30306 }
30307 }
30308
30309 #if TARGET_MACHO
30310
30311 static int current_machopic_label_num;
30312
30313 /* Given a symbol name and its associated stub, write out the
30314 definition of the stub. */
30315
30316 void
30317 machopic_output_stub (FILE *file, const char *symb, const char *stub)
30318 {
30319 unsigned int length;
30320 char *binder_name, *symbol_name, lazy_ptr_name[32];
30321 int label = ++current_machopic_label_num;
30322
30323 /* For 64-bit we shouldn't get here. */
30324 gcc_assert (!TARGET_64BIT);
30325
30326 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
30327 symb = targetm.strip_name_encoding (symb);
30328
30329 length = strlen (stub);
30330 binder_name = XALLOCAVEC (char, length + 32);
30331 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
30332
30333 length = strlen (symb);
30334 symbol_name = XALLOCAVEC (char, length + 32);
30335 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
30336
30337 sprintf (lazy_ptr_name, "L%d$lz", label);
30338
30339 if (MACHOPIC_ATT_STUB)
30340 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
30341 else if (MACHOPIC_PURE)
30342 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
30343 else
30344 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
30345
30346 fprintf (file, "%s:\n", stub);
30347 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
30348
30349 if (MACHOPIC_ATT_STUB)
30350 {
30351 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
30352 }
30353 else if (MACHOPIC_PURE)
30354 {
30355 /* PIC stub. */
30356 /* 25-byte PIC stub using "CALL get_pc_thunk". */
30357 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
30358 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
30359 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
30360 label, lazy_ptr_name, label);
30361 fprintf (file, "\tjmp\t*%%ecx\n");
30362 }
30363 else
30364 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
30365
30366 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
30367 it needs no stub-binding-helper. */
30368 if (MACHOPIC_ATT_STUB)
30369 return;
30370
30371 fprintf (file, "%s:\n", binder_name);
30372
30373 if (MACHOPIC_PURE)
30374 {
30375 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
30376 fprintf (file, "\tpushl\t%%ecx\n");
30377 }
30378 else
30379 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
30380
30381 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
30382
30383 /* N.B. Keep the correspondence of these
30384 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
30385 old-pic/new-pic/non-pic stubs; altering this will break
30386 compatibility with existing dylibs. */
30387 if (MACHOPIC_PURE)
30388 {
30389 /* 25-byte PIC stub using "CALL get_pc_thunk". */
30390 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
30391 }
30392 else
30393 /* 16-byte -mdynamic-no-pic stub. */
30394 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
30395
30396 fprintf (file, "%s:\n", lazy_ptr_name);
30397 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
30398 fprintf (file, ASM_LONG "%s\n", binder_name);
30399 }
30400 #endif /* TARGET_MACHO */
30401
30402 /* Order the registers for register allocator. */
30403
30404 void
30405 x86_order_regs_for_local_alloc (void)
30406 {
30407 int pos = 0;
30408 int i;
30409
30410 /* First allocate the local general purpose registers. */
30411 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
30412 if (GENERAL_REGNO_P (i) && call_used_regs[i])
30413 reg_alloc_order [pos++] = i;
30414
30415 /* Global general purpose registers. */
30416 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
30417 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
30418 reg_alloc_order [pos++] = i;
30419
30420 /* x87 registers come first in case we are doing FP math
30421 using them. */
30422 if (!TARGET_SSE_MATH)
30423 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
30424 reg_alloc_order [pos++] = i;
30425
30426 /* SSE registers. */
30427 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
30428 reg_alloc_order [pos++] = i;
30429 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
30430 reg_alloc_order [pos++] = i;
30431
30432 /* x87 registers. */
30433 if (TARGET_SSE_MATH)
30434 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
30435 reg_alloc_order [pos++] = i;
30436
30437 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
30438 reg_alloc_order [pos++] = i;
30439
30440 /* Initialize the rest of array as we do not allocate some registers
30441 at all. */
30442 while (pos < FIRST_PSEUDO_REGISTER)
30443 reg_alloc_order [pos++] = 0;
30444 }
30445
30446 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
30447 in struct attribute_spec handler. */
30448 static tree
30449 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
30450 tree args,
30451 int flags ATTRIBUTE_UNUSED,
30452 bool *no_add_attrs)
30453 {
30454 if (TREE_CODE (*node) != FUNCTION_TYPE
30455 && TREE_CODE (*node) != METHOD_TYPE
30456 && TREE_CODE (*node) != FIELD_DECL
30457 && TREE_CODE (*node) != TYPE_DECL)
30458 {
30459 warning (OPT_Wattributes, "%qE attribute only applies to functions",
30460 name);
30461 *no_add_attrs = true;
30462 return NULL_TREE;
30463 }
30464 if (TARGET_64BIT)
30465 {
30466 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
30467 name);
30468 *no_add_attrs = true;
30469 return NULL_TREE;
30470 }
30471 if (is_attribute_p ("callee_pop_aggregate_return", name))
30472 {
30473 tree cst;
30474
30475 cst = TREE_VALUE (args);
30476 if (TREE_CODE (cst) != INTEGER_CST)
30477 {
30478 warning (OPT_Wattributes,
30479 "%qE attribute requires an integer constant argument",
30480 name);
30481 *no_add_attrs = true;
30482 }
30483 else if (compare_tree_int (cst, 0) != 0
30484 && compare_tree_int (cst, 1) != 0)
30485 {
30486 warning (OPT_Wattributes,
30487 "argument to %qE attribute is neither zero, nor one",
30488 name);
30489 *no_add_attrs = true;
30490 }
30491
30492 return NULL_TREE;
30493 }
30494
30495 return NULL_TREE;
30496 }
30497
30498 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
30499 struct attribute_spec.handler. */
30500 static tree
30501 ix86_handle_abi_attribute (tree *node, tree name,
30502 tree args ATTRIBUTE_UNUSED,
30503 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
30504 {
30505 if (TREE_CODE (*node) != FUNCTION_TYPE
30506 && TREE_CODE (*node) != METHOD_TYPE
30507 && TREE_CODE (*node) != FIELD_DECL
30508 && TREE_CODE (*node) != TYPE_DECL)
30509 {
30510 warning (OPT_Wattributes, "%qE attribute only applies to functions",
30511 name);
30512 *no_add_attrs = true;
30513 return NULL_TREE;
30514 }
30515
30516 /* Can combine regparm with all attributes but fastcall. */
30517 if (is_attribute_p ("ms_abi", name))
30518 {
30519 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
30520 {
30521 error ("ms_abi and sysv_abi attributes are not compatible");
30522 }
30523
30524 return NULL_TREE;
30525 }
30526 else if (is_attribute_p ("sysv_abi", name))
30527 {
30528 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
30529 {
30530 error ("ms_abi and sysv_abi attributes are not compatible");
30531 }
30532
30533 return NULL_TREE;
30534 }
30535
30536 return NULL_TREE;
30537 }
30538
30539 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
30540 struct attribute_spec.handler. */
30541 static tree
30542 ix86_handle_struct_attribute (tree *node, tree name,
30543 tree args ATTRIBUTE_UNUSED,
30544 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
30545 {
30546 tree *type = NULL;
30547 if (DECL_P (*node))
30548 {
30549 if (TREE_CODE (*node) == TYPE_DECL)
30550 type = &TREE_TYPE (*node);
30551 }
30552 else
30553 type = node;
30554
30555 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
30556 || TREE_CODE (*type) == UNION_TYPE)))
30557 {
30558 warning (OPT_Wattributes, "%qE attribute ignored",
30559 name);
30560 *no_add_attrs = true;
30561 }
30562
30563 else if ((is_attribute_p ("ms_struct", name)
30564 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
30565 || ((is_attribute_p ("gcc_struct", name)
30566 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
30567 {
30568 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
30569 name);
30570 *no_add_attrs = true;
30571 }
30572
30573 return NULL_TREE;
30574 }
30575
30576 static tree
30577 ix86_handle_fndecl_attribute (tree *node, tree name,
30578 tree args ATTRIBUTE_UNUSED,
30579 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
30580 {
30581 if (TREE_CODE (*node) != FUNCTION_DECL)
30582 {
30583 warning (OPT_Wattributes, "%qE attribute only applies to functions",
30584 name);
30585 *no_add_attrs = true;
30586 }
30587 return NULL_TREE;
30588 }
30589
30590 static bool
30591 ix86_ms_bitfield_layout_p (const_tree record_type)
30592 {
30593 return ((TARGET_MS_BITFIELD_LAYOUT
30594 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
30595 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
30596 }
30597
30598 /* Returns an expression indicating where the this parameter is
30599 located on entry to the FUNCTION. */
30600
30601 static rtx
30602 x86_this_parameter (tree function)
30603 {
30604 tree type = TREE_TYPE (function);
30605 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
30606 int nregs;
30607
30608 if (TARGET_64BIT)
30609 {
30610 const int *parm_regs;
30611
30612 if (ix86_function_type_abi (type) == MS_ABI)
30613 parm_regs = x86_64_ms_abi_int_parameter_registers;
30614 else
30615 parm_regs = x86_64_int_parameter_registers;
30616 return gen_rtx_REG (DImode, parm_regs[aggr]);
30617 }
30618
30619 nregs = ix86_function_regparm (type, function);
30620
30621 if (nregs > 0 && !stdarg_p (type))
30622 {
30623 int regno;
30624 unsigned int ccvt = ix86_get_callcvt (type);
30625
30626 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
30627 regno = aggr ? DX_REG : CX_REG;
30628 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
30629 {
30630 regno = CX_REG;
30631 if (aggr)
30632 return gen_rtx_MEM (SImode,
30633 plus_constant (stack_pointer_rtx, 4));
30634 }
30635 else
30636 {
30637 regno = AX_REG;
30638 if (aggr)
30639 {
30640 regno = DX_REG;
30641 if (nregs == 1)
30642 return gen_rtx_MEM (SImode,
30643 plus_constant (stack_pointer_rtx, 4));
30644 }
30645 }
30646 return gen_rtx_REG (SImode, regno);
30647 }
30648
30649 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
30650 }
30651
30652 /* Determine whether x86_output_mi_thunk can succeed. */
30653
30654 static bool
30655 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
30656 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
30657 HOST_WIDE_INT vcall_offset, const_tree function)
30658 {
30659 /* 64-bit can handle anything. */
30660 if (TARGET_64BIT)
30661 return true;
30662
30663 /* For 32-bit, everything's fine if we have one free register. */
30664 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
30665 return true;
30666
30667 /* Need a free register for vcall_offset. */
30668 if (vcall_offset)
30669 return false;
30670
30671 /* Need a free register for GOT references. */
30672 if (flag_pic && !targetm.binds_local_p (function))
30673 return false;
30674
30675 /* Otherwise ok. */
30676 return true;
30677 }
30678
30679 /* Output the assembler code for a thunk function. THUNK_DECL is the
30680 declaration for the thunk function itself, FUNCTION is the decl for
30681 the target function. DELTA is an immediate constant offset to be
30682 added to THIS. If VCALL_OFFSET is nonzero, the word at
30683 *(*this + vcall_offset) should be added to THIS. */
30684
30685 static void
30686 x86_output_mi_thunk (FILE *file,
30687 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
30688 HOST_WIDE_INT vcall_offset, tree function)
30689 {
30690 rtx this_param = x86_this_parameter (function);
30691 rtx this_reg, tmp, fnaddr;
30692
30693 emit_note (NOTE_INSN_PROLOGUE_END);
30694
30695 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
30696 pull it in now and let DELTA benefit. */
30697 if (REG_P (this_param))
30698 this_reg = this_param;
30699 else if (vcall_offset)
30700 {
30701 /* Put the this parameter into %eax. */
30702 this_reg = gen_rtx_REG (Pmode, AX_REG);
30703 emit_move_insn (this_reg, this_param);
30704 }
30705 else
30706 this_reg = NULL_RTX;
30707
30708 /* Adjust the this parameter by a fixed constant. */
30709 if (delta)
30710 {
30711 rtx delta_rtx = GEN_INT (delta);
30712 rtx delta_dst = this_reg ? this_reg : this_param;
30713
30714 if (TARGET_64BIT)
30715 {
30716 if (!x86_64_general_operand (delta_rtx, Pmode))
30717 {
30718 tmp = gen_rtx_REG (Pmode, R10_REG);
30719 emit_move_insn (tmp, delta_rtx);
30720 delta_rtx = tmp;
30721 }
30722 }
30723
30724 emit_insn (ix86_gen_add3 (delta_dst, delta_dst, delta_rtx));
30725 }
30726
30727 /* Adjust the this parameter by a value stored in the vtable. */
30728 if (vcall_offset)
30729 {
30730 rtx vcall_addr, vcall_mem, this_mem;
30731 unsigned int tmp_regno;
30732
30733 if (TARGET_64BIT)
30734 tmp_regno = R10_REG;
30735 else
30736 {
30737 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
30738 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) != 0)
30739 tmp_regno = AX_REG;
30740 else
30741 tmp_regno = CX_REG;
30742 }
30743 tmp = gen_rtx_REG (Pmode, tmp_regno);
30744
30745 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
30746 if (Pmode != ptr_mode)
30747 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
30748 emit_move_insn (tmp, this_mem);
30749
30750 /* Adjust the this parameter. */
30751 vcall_addr = plus_constant (tmp, vcall_offset);
30752 if (TARGET_64BIT
30753 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
30754 {
30755 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
30756 emit_move_insn (tmp2, GEN_INT (vcall_offset));
30757 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
30758 }
30759
30760 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
30761 if (Pmode != ptr_mode)
30762 emit_insn (gen_addsi_1_zext (this_reg,
30763 gen_rtx_REG (ptr_mode,
30764 REGNO (this_reg)),
30765 vcall_mem));
30766 else
30767 emit_insn (ix86_gen_add3 (this_reg, this_reg, vcall_mem));
30768 }
30769
30770 /* If necessary, drop THIS back to its stack slot. */
30771 if (this_reg && this_reg != this_param)
30772 emit_move_insn (this_param, this_reg);
30773
30774 fnaddr = XEXP (DECL_RTL (function), 0);
30775 if (TARGET_64BIT)
30776 {
30777 if (!flag_pic || targetm.binds_local_p (function)
30778 || cfun->machine->call_abi == MS_ABI)
30779 ;
30780 else
30781 {
30782 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
30783 tmp = gen_rtx_CONST (Pmode, tmp);
30784 fnaddr = gen_rtx_MEM (Pmode, tmp);
30785 }
30786 }
30787 else
30788 {
30789 if (!flag_pic || targetm.binds_local_p (function))
30790 ;
30791 #if TARGET_MACHO
30792 else if (TARGET_MACHO)
30793 {
30794 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
30795 fnaddr = XEXP (fnaddr, 0);
30796 }
30797 #endif /* TARGET_MACHO */
30798 else
30799 {
30800 tmp = gen_rtx_REG (Pmode, CX_REG);
30801 output_set_got (tmp, NULL_RTX);
30802
30803 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
30804 fnaddr = gen_rtx_PLUS (Pmode, fnaddr, tmp);
30805 fnaddr = gen_rtx_MEM (Pmode, fnaddr);
30806 }
30807 }
30808
30809 /* Our sibling call patterns do not allow memories, because we have no
30810 predicate that can distinguish between frame and non-frame memory.
30811 For our purposes here, we can get away with (ab)using a jump pattern,
30812 because we're going to do no optimization. */
30813 if (MEM_P (fnaddr))
30814 emit_jump_insn (gen_indirect_jump (fnaddr));
30815 else
30816 {
30817 tmp = gen_rtx_MEM (QImode, fnaddr);
30818 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
30819 tmp = emit_call_insn (tmp);
30820 SIBLING_CALL_P (tmp) = 1;
30821 }
30822 emit_barrier ();
30823
30824 /* Emit just enough of rest_of_compilation to get the insns emitted.
30825 Note that use_thunk calls assemble_start_function et al. */
30826 tmp = get_insns ();
30827 insn_locators_alloc ();
30828 shorten_branches (tmp);
30829 final_start_function (tmp, file, 1);
30830 final (tmp, file, 1);
30831 final_end_function ();
30832 }
30833
30834 static void
30835 x86_file_start (void)
30836 {
30837 default_file_start ();
30838 #if TARGET_MACHO
30839 darwin_file_start ();
30840 #endif
30841 if (X86_FILE_START_VERSION_DIRECTIVE)
30842 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
30843 if (X86_FILE_START_FLTUSED)
30844 fputs ("\t.global\t__fltused\n", asm_out_file);
30845 if (ix86_asm_dialect == ASM_INTEL)
30846 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
30847 }
30848
30849 int
30850 x86_field_alignment (tree field, int computed)
30851 {
30852 enum machine_mode mode;
30853 tree type = TREE_TYPE (field);
30854
30855 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
30856 return computed;
30857 mode = TYPE_MODE (strip_array_types (type));
30858 if (mode == DFmode || mode == DCmode
30859 || GET_MODE_CLASS (mode) == MODE_INT
30860 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
30861 return MIN (32, computed);
30862 return computed;
30863 }
30864
30865 /* Output assembler code to FILE to increment profiler label # LABELNO
30866 for profiling a function entry. */
30867 void
30868 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
30869 {
30870 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
30871 : MCOUNT_NAME);
30872
30873 if (TARGET_64BIT)
30874 {
30875 #ifndef NO_PROFILE_COUNTERS
30876 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
30877 #endif
30878
30879 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
30880 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
30881 else
30882 fprintf (file, "\tcall\t%s\n", mcount_name);
30883 }
30884 else if (flag_pic)
30885 {
30886 #ifndef NO_PROFILE_COUNTERS
30887 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
30888 LPREFIX, labelno);
30889 #endif
30890 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
30891 }
30892 else
30893 {
30894 #ifndef NO_PROFILE_COUNTERS
30895 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
30896 LPREFIX, labelno);
30897 #endif
30898 fprintf (file, "\tcall\t%s\n", mcount_name);
30899 }
30900 }
30901
30902 /* We don't have exact information about the insn sizes, but we may assume
30903 quite safely that we are informed about all 1 byte insns and memory
30904 address sizes. This is enough to eliminate unnecessary padding in
30905 99% of cases. */
30906
30907 static int
30908 min_insn_size (rtx insn)
30909 {
30910 int l = 0, len;
30911
30912 if (!INSN_P (insn) || !active_insn_p (insn))
30913 return 0;
30914
30915 /* Discard alignments we've emit and jump instructions. */
30916 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
30917 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
30918 return 0;
30919 if (JUMP_TABLE_DATA_P (insn))
30920 return 0;
30921
30922 /* Important case - calls are always 5 bytes.
30923 It is common to have many calls in the row. */
30924 if (CALL_P (insn)
30925 && symbolic_reference_mentioned_p (PATTERN (insn))
30926 && !SIBLING_CALL_P (insn))
30927 return 5;
30928 len = get_attr_length (insn);
30929 if (len <= 1)
30930 return 1;
30931
30932 /* For normal instructions we rely on get_attr_length being exact,
30933 with a few exceptions. */
30934 if (!JUMP_P (insn))
30935 {
30936 enum attr_type type = get_attr_type (insn);
30937
30938 switch (type)
30939 {
30940 case TYPE_MULTI:
30941 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
30942 || asm_noperands (PATTERN (insn)) >= 0)
30943 return 0;
30944 break;
30945 case TYPE_OTHER:
30946 case TYPE_FCMP:
30947 break;
30948 default:
30949 /* Otherwise trust get_attr_length. */
30950 return len;
30951 }
30952
30953 l = get_attr_length_address (insn);
30954 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
30955 l = 4;
30956 }
30957 if (l)
30958 return 1+l;
30959 else
30960 return 2;
30961 }
30962
30963 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
30964
30965 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
30966 window. */
30967
30968 static void
30969 ix86_avoid_jump_mispredicts (void)
30970 {
30971 rtx insn, start = get_insns ();
30972 int nbytes = 0, njumps = 0;
30973 int isjump = 0;
30974
30975 /* Look for all minimal intervals of instructions containing 4 jumps.
30976 The intervals are bounded by START and INSN. NBYTES is the total
30977 size of instructions in the interval including INSN and not including
30978 START. When the NBYTES is smaller than 16 bytes, it is possible
30979 that the end of START and INSN ends up in the same 16byte page.
30980
30981 The smallest offset in the page INSN can start is the case where START
30982 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
30983 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
30984 */
30985 for (insn = start; insn; insn = NEXT_INSN (insn))
30986 {
30987 int min_size;
30988
30989 if (LABEL_P (insn))
30990 {
30991 int align = label_to_alignment (insn);
30992 int max_skip = label_to_max_skip (insn);
30993
30994 if (max_skip > 15)
30995 max_skip = 15;
30996 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
30997 already in the current 16 byte page, because otherwise
30998 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
30999 bytes to reach 16 byte boundary. */
31000 if (align <= 0
31001 || (align <= 3 && max_skip != (1 << align) - 1))
31002 max_skip = 0;
31003 if (dump_file)
31004 fprintf (dump_file, "Label %i with max_skip %i\n",
31005 INSN_UID (insn), max_skip);
31006 if (max_skip)
31007 {
31008 while (nbytes + max_skip >= 16)
31009 {
31010 start = NEXT_INSN (start);
31011 if ((JUMP_P (start)
31012 && GET_CODE (PATTERN (start)) != ADDR_VEC
31013 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
31014 || CALL_P (start))
31015 njumps--, isjump = 1;
31016 else
31017 isjump = 0;
31018 nbytes -= min_insn_size (start);
31019 }
31020 }
31021 continue;
31022 }
31023
31024 min_size = min_insn_size (insn);
31025 nbytes += min_size;
31026 if (dump_file)
31027 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
31028 INSN_UID (insn), min_size);
31029 if ((JUMP_P (insn)
31030 && GET_CODE (PATTERN (insn)) != ADDR_VEC
31031 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
31032 || CALL_P (insn))
31033 njumps++;
31034 else
31035 continue;
31036
31037 while (njumps > 3)
31038 {
31039 start = NEXT_INSN (start);
31040 if ((JUMP_P (start)
31041 && GET_CODE (PATTERN (start)) != ADDR_VEC
31042 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
31043 || CALL_P (start))
31044 njumps--, isjump = 1;
31045 else
31046 isjump = 0;
31047 nbytes -= min_insn_size (start);
31048 }
31049 gcc_assert (njumps >= 0);
31050 if (dump_file)
31051 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
31052 INSN_UID (start), INSN_UID (insn), nbytes);
31053
31054 if (njumps == 3 && isjump && nbytes < 16)
31055 {
31056 int padsize = 15 - nbytes + min_insn_size (insn);
31057
31058 if (dump_file)
31059 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
31060 INSN_UID (insn), padsize);
31061 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
31062 }
31063 }
31064 }
31065 #endif
31066
31067 /* AMD Athlon works faster
31068 when RET is not destination of conditional jump or directly preceded
31069 by other jump instruction. We avoid the penalty by inserting NOP just
31070 before the RET instructions in such cases. */
31071 static void
31072 ix86_pad_returns (void)
31073 {
31074 edge e;
31075 edge_iterator ei;
31076
31077 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
31078 {
31079 basic_block bb = e->src;
31080 rtx ret = BB_END (bb);
31081 rtx prev;
31082 bool replace = false;
31083
31084 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
31085 || optimize_bb_for_size_p (bb))
31086 continue;
31087 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
31088 if (active_insn_p (prev) || LABEL_P (prev))
31089 break;
31090 if (prev && LABEL_P (prev))
31091 {
31092 edge e;
31093 edge_iterator ei;
31094
31095 FOR_EACH_EDGE (e, ei, bb->preds)
31096 if (EDGE_FREQUENCY (e) && e->src->index >= 0
31097 && !(e->flags & EDGE_FALLTHRU))
31098 replace = true;
31099 }
31100 if (!replace)
31101 {
31102 prev = prev_active_insn (ret);
31103 if (prev
31104 && ((JUMP_P (prev) && any_condjump_p (prev))
31105 || CALL_P (prev)))
31106 replace = true;
31107 /* Empty functions get branch mispredict even when
31108 the jump destination is not visible to us. */
31109 if (!prev && !optimize_function_for_size_p (cfun))
31110 replace = true;
31111 }
31112 if (replace)
31113 {
31114 emit_jump_insn_before (gen_return_internal_long (), ret);
31115 delete_insn (ret);
31116 }
31117 }
31118 }
31119
31120 /* Count the minimum number of instructions in BB. Return 4 if the
31121 number of instructions >= 4. */
31122
31123 static int
31124 ix86_count_insn_bb (basic_block bb)
31125 {
31126 rtx insn;
31127 int insn_count = 0;
31128
31129 /* Count number of instructions in this block. Return 4 if the number
31130 of instructions >= 4. */
31131 FOR_BB_INSNS (bb, insn)
31132 {
31133 /* Only happen in exit blocks. */
31134 if (JUMP_P (insn)
31135 && ANY_RETURN_P (PATTERN (insn)))
31136 break;
31137
31138 if (NONDEBUG_INSN_P (insn)
31139 && GET_CODE (PATTERN (insn)) != USE
31140 && GET_CODE (PATTERN (insn)) != CLOBBER)
31141 {
31142 insn_count++;
31143 if (insn_count >= 4)
31144 return insn_count;
31145 }
31146 }
31147
31148 return insn_count;
31149 }
31150
31151
31152 /* Count the minimum number of instructions in code path in BB.
31153 Return 4 if the number of instructions >= 4. */
31154
31155 static int
31156 ix86_count_insn (basic_block bb)
31157 {
31158 edge e;
31159 edge_iterator ei;
31160 int min_prev_count;
31161
31162 /* Only bother counting instructions along paths with no
31163 more than 2 basic blocks between entry and exit. Given
31164 that BB has an edge to exit, determine if a predecessor
31165 of BB has an edge from entry. If so, compute the number
31166 of instructions in the predecessor block. If there
31167 happen to be multiple such blocks, compute the minimum. */
31168 min_prev_count = 4;
31169 FOR_EACH_EDGE (e, ei, bb->preds)
31170 {
31171 edge prev_e;
31172 edge_iterator prev_ei;
31173
31174 if (e->src == ENTRY_BLOCK_PTR)
31175 {
31176 min_prev_count = 0;
31177 break;
31178 }
31179 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
31180 {
31181 if (prev_e->src == ENTRY_BLOCK_PTR)
31182 {
31183 int count = ix86_count_insn_bb (e->src);
31184 if (count < min_prev_count)
31185 min_prev_count = count;
31186 break;
31187 }
31188 }
31189 }
31190
31191 if (min_prev_count < 4)
31192 min_prev_count += ix86_count_insn_bb (bb);
31193
31194 return min_prev_count;
31195 }
31196
31197 /* Pad short funtion to 4 instructions. */
31198
31199 static void
31200 ix86_pad_short_function (void)
31201 {
31202 edge e;
31203 edge_iterator ei;
31204
31205 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
31206 {
31207 rtx ret = BB_END (e->src);
31208 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
31209 {
31210 int insn_count = ix86_count_insn (e->src);
31211
31212 /* Pad short function. */
31213 if (insn_count < 4)
31214 {
31215 rtx insn = ret;
31216
31217 /* Find epilogue. */
31218 while (insn
31219 && (!NOTE_P (insn)
31220 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
31221 insn = PREV_INSN (insn);
31222
31223 if (!insn)
31224 insn = ret;
31225
31226 /* Two NOPs count as one instruction. */
31227 insn_count = 2 * (4 - insn_count);
31228 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
31229 }
31230 }
31231 }
31232 }
31233
31234 /* Implement machine specific optimizations. We implement padding of returns
31235 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
31236 static void
31237 ix86_reorg (void)
31238 {
31239 /* We are freeing block_for_insn in the toplev to keep compatibility
31240 with old MDEP_REORGS that are not CFG based. Recompute it now. */
31241 compute_bb_for_insn ();
31242
31243 /* Run the vzeroupper optimization if needed. */
31244 if (TARGET_VZEROUPPER)
31245 move_or_delete_vzeroupper ();
31246
31247 if (optimize && optimize_function_for_speed_p (cfun))
31248 {
31249 if (TARGET_PAD_SHORT_FUNCTION)
31250 ix86_pad_short_function ();
31251 else if (TARGET_PAD_RETURNS)
31252 ix86_pad_returns ();
31253 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
31254 if (TARGET_FOUR_JUMP_LIMIT)
31255 ix86_avoid_jump_mispredicts ();
31256 #endif
31257 }
31258 }
31259
31260 /* Return nonzero when QImode register that must be represented via REX prefix
31261 is used. */
31262 bool
31263 x86_extended_QIreg_mentioned_p (rtx insn)
31264 {
31265 int i;
31266 extract_insn_cached (insn);
31267 for (i = 0; i < recog_data.n_operands; i++)
31268 if (REG_P (recog_data.operand[i])
31269 && REGNO (recog_data.operand[i]) > BX_REG)
31270 return true;
31271 return false;
31272 }
31273
31274 /* Return nonzero when P points to register encoded via REX prefix.
31275 Called via for_each_rtx. */
31276 static int
31277 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
31278 {
31279 unsigned int regno;
31280 if (!REG_P (*p))
31281 return 0;
31282 regno = REGNO (*p);
31283 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
31284 }
31285
31286 /* Return true when INSN mentions register that must be encoded using REX
31287 prefix. */
31288 bool
31289 x86_extended_reg_mentioned_p (rtx insn)
31290 {
31291 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
31292 extended_reg_mentioned_1, NULL);
31293 }
31294
31295 /* If profitable, negate (without causing overflow) integer constant
31296 of mode MODE at location LOC. Return true in this case. */
31297 bool
31298 x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
31299 {
31300 HOST_WIDE_INT val;
31301
31302 if (!CONST_INT_P (*loc))
31303 return false;
31304
31305 switch (mode)
31306 {
31307 case DImode:
31308 /* DImode x86_64 constants must fit in 32 bits. */
31309 gcc_assert (x86_64_immediate_operand (*loc, mode));
31310
31311 mode = SImode;
31312 break;
31313
31314 case SImode:
31315 case HImode:
31316 case QImode:
31317 break;
31318
31319 default:
31320 gcc_unreachable ();
31321 }
31322
31323 /* Avoid overflows. */
31324 if (mode_signbit_p (mode, *loc))
31325 return false;
31326
31327 val = INTVAL (*loc);
31328
31329 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
31330 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
31331 if ((val < 0 && val != -128)
31332 || val == 128)
31333 {
31334 *loc = GEN_INT (-val);
31335 return true;
31336 }
31337
31338 return false;
31339 }
31340
31341 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
31342 optabs would emit if we didn't have TFmode patterns. */
31343
31344 void
31345 x86_emit_floatuns (rtx operands[2])
31346 {
31347 rtx neglab, donelab, i0, i1, f0, in, out;
31348 enum machine_mode mode, inmode;
31349
31350 inmode = GET_MODE (operands[1]);
31351 gcc_assert (inmode == SImode || inmode == DImode);
31352
31353 out = operands[0];
31354 in = force_reg (inmode, operands[1]);
31355 mode = GET_MODE (out);
31356 neglab = gen_label_rtx ();
31357 donelab = gen_label_rtx ();
31358 f0 = gen_reg_rtx (mode);
31359
31360 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
31361
31362 expand_float (out, in, 0);
31363
31364 emit_jump_insn (gen_jump (donelab));
31365 emit_barrier ();
31366
31367 emit_label (neglab);
31368
31369 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
31370 1, OPTAB_DIRECT);
31371 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
31372 1, OPTAB_DIRECT);
31373 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
31374
31375 expand_float (f0, i0, 0);
31376
31377 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
31378
31379 emit_label (donelab);
31380 }
31381 \f
31382 /* AVX does not support 32-byte integer vector operations,
31383 thus the longest vector we are faced with is V16QImode. */
31384 #define MAX_VECT_LEN 16
31385
31386 struct expand_vec_perm_d
31387 {
31388 rtx target, op0, op1;
31389 unsigned char perm[MAX_VECT_LEN];
31390 enum machine_mode vmode;
31391 unsigned char nelt;
31392 bool testing_p;
31393 };
31394
31395 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
31396 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
31397
31398 /* Get a vector mode of the same size as the original but with elements
31399 twice as wide. This is only guaranteed to apply to integral vectors. */
31400
31401 static inline enum machine_mode
31402 get_mode_wider_vector (enum machine_mode o)
31403 {
31404 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
31405 enum machine_mode n = GET_MODE_WIDER_MODE (o);
31406 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
31407 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
31408 return n;
31409 }
31410
31411 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
31412 with all elements equal to VAR. Return true if successful. */
31413
31414 static bool
31415 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
31416 rtx target, rtx val)
31417 {
31418 bool ok;
31419
31420 switch (mode)
31421 {
31422 case V2SImode:
31423 case V2SFmode:
31424 if (!mmx_ok)
31425 return false;
31426 /* FALLTHRU */
31427
31428 case V4DFmode:
31429 case V4DImode:
31430 case V8SFmode:
31431 case V8SImode:
31432 case V2DFmode:
31433 case V2DImode:
31434 case V4SFmode:
31435 case V4SImode:
31436 {
31437 rtx insn, dup;
31438
31439 /* First attempt to recognize VAL as-is. */
31440 dup = gen_rtx_VEC_DUPLICATE (mode, val);
31441 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
31442 if (recog_memoized (insn) < 0)
31443 {
31444 rtx seq;
31445 /* If that fails, force VAL into a register. */
31446
31447 start_sequence ();
31448 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
31449 seq = get_insns ();
31450 end_sequence ();
31451 if (seq)
31452 emit_insn_before (seq, insn);
31453
31454 ok = recog_memoized (insn) >= 0;
31455 gcc_assert (ok);
31456 }
31457 }
31458 return true;
31459
31460 case V4HImode:
31461 if (!mmx_ok)
31462 return false;
31463 if (TARGET_SSE || TARGET_3DNOW_A)
31464 {
31465 rtx x;
31466
31467 val = gen_lowpart (SImode, val);
31468 x = gen_rtx_TRUNCATE (HImode, val);
31469 x = gen_rtx_VEC_DUPLICATE (mode, x);
31470 emit_insn (gen_rtx_SET (VOIDmode, target, x));
31471 return true;
31472 }
31473 goto widen;
31474
31475 case V8QImode:
31476 if (!mmx_ok)
31477 return false;
31478 goto widen;
31479
31480 case V8HImode:
31481 if (TARGET_SSE2)
31482 {
31483 struct expand_vec_perm_d dperm;
31484 rtx tmp1, tmp2;
31485
31486 permute:
31487 memset (&dperm, 0, sizeof (dperm));
31488 dperm.target = target;
31489 dperm.vmode = mode;
31490 dperm.nelt = GET_MODE_NUNITS (mode);
31491 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
31492
31493 /* Extend to SImode using a paradoxical SUBREG. */
31494 tmp1 = gen_reg_rtx (SImode);
31495 emit_move_insn (tmp1, gen_lowpart (SImode, val));
31496
31497 /* Insert the SImode value as low element of a V4SImode vector. */
31498 tmp2 = gen_lowpart (V4SImode, dperm.op0);
31499 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
31500
31501 ok = (expand_vec_perm_1 (&dperm)
31502 || expand_vec_perm_broadcast_1 (&dperm));
31503 gcc_assert (ok);
31504 return ok;
31505 }
31506 goto widen;
31507
31508 case V16QImode:
31509 if (TARGET_SSE2)
31510 goto permute;
31511 goto widen;
31512
31513 widen:
31514 /* Replicate the value once into the next wider mode and recurse. */
31515 {
31516 enum machine_mode smode, wsmode, wvmode;
31517 rtx x;
31518
31519 smode = GET_MODE_INNER (mode);
31520 wvmode = get_mode_wider_vector (mode);
31521 wsmode = GET_MODE_INNER (wvmode);
31522
31523 val = convert_modes (wsmode, smode, val, true);
31524 x = expand_simple_binop (wsmode, ASHIFT, val,
31525 GEN_INT (GET_MODE_BITSIZE (smode)),
31526 NULL_RTX, 1, OPTAB_LIB_WIDEN);
31527 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
31528
31529 x = gen_lowpart (wvmode, target);
31530 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
31531 gcc_assert (ok);
31532 return ok;
31533 }
31534
31535 case V16HImode:
31536 case V32QImode:
31537 {
31538 enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
31539 rtx x = gen_reg_rtx (hvmode);
31540
31541 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
31542 gcc_assert (ok);
31543
31544 x = gen_rtx_VEC_CONCAT (mode, x, x);
31545 emit_insn (gen_rtx_SET (VOIDmode, target, x));
31546 }
31547 return true;
31548
31549 default:
31550 return false;
31551 }
31552 }
31553
31554 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
31555 whose ONE_VAR element is VAR, and other elements are zero. Return true
31556 if successful. */
31557
31558 static bool
31559 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
31560 rtx target, rtx var, int one_var)
31561 {
31562 enum machine_mode vsimode;
31563 rtx new_target;
31564 rtx x, tmp;
31565 bool use_vector_set = false;
31566
31567 switch (mode)
31568 {
31569 case V2DImode:
31570 /* For SSE4.1, we normally use vector set. But if the second
31571 element is zero and inter-unit moves are OK, we use movq
31572 instead. */
31573 use_vector_set = (TARGET_64BIT
31574 && TARGET_SSE4_1
31575 && !(TARGET_INTER_UNIT_MOVES
31576 && one_var == 0));
31577 break;
31578 case V16QImode:
31579 case V4SImode:
31580 case V4SFmode:
31581 use_vector_set = TARGET_SSE4_1;
31582 break;
31583 case V8HImode:
31584 use_vector_set = TARGET_SSE2;
31585 break;
31586 case V4HImode:
31587 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
31588 break;
31589 case V32QImode:
31590 case V16HImode:
31591 case V8SImode:
31592 case V8SFmode:
31593 case V4DFmode:
31594 use_vector_set = TARGET_AVX;
31595 break;
31596 case V4DImode:
31597 /* Use ix86_expand_vector_set in 64bit mode only. */
31598 use_vector_set = TARGET_AVX && TARGET_64BIT;
31599 break;
31600 default:
31601 break;
31602 }
31603
31604 if (use_vector_set)
31605 {
31606 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
31607 var = force_reg (GET_MODE_INNER (mode), var);
31608 ix86_expand_vector_set (mmx_ok, target, var, one_var);
31609 return true;
31610 }
31611
31612 switch (mode)
31613 {
31614 case V2SFmode:
31615 case V2SImode:
31616 if (!mmx_ok)
31617 return false;
31618 /* FALLTHRU */
31619
31620 case V2DFmode:
31621 case V2DImode:
31622 if (one_var != 0)
31623 return false;
31624 var = force_reg (GET_MODE_INNER (mode), var);
31625 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
31626 emit_insn (gen_rtx_SET (VOIDmode, target, x));
31627 return true;
31628
31629 case V4SFmode:
31630 case V4SImode:
31631 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
31632 new_target = gen_reg_rtx (mode);
31633 else
31634 new_target = target;
31635 var = force_reg (GET_MODE_INNER (mode), var);
31636 x = gen_rtx_VEC_DUPLICATE (mode, var);
31637 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
31638 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
31639 if (one_var != 0)
31640 {
31641 /* We need to shuffle the value to the correct position, so
31642 create a new pseudo to store the intermediate result. */
31643
31644 /* With SSE2, we can use the integer shuffle insns. */
31645 if (mode != V4SFmode && TARGET_SSE2)
31646 {
31647 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
31648 const1_rtx,
31649 GEN_INT (one_var == 1 ? 0 : 1),
31650 GEN_INT (one_var == 2 ? 0 : 1),
31651 GEN_INT (one_var == 3 ? 0 : 1)));
31652 if (target != new_target)
31653 emit_move_insn (target, new_target);
31654 return true;
31655 }
31656
31657 /* Otherwise convert the intermediate result to V4SFmode and
31658 use the SSE1 shuffle instructions. */
31659 if (mode != V4SFmode)
31660 {
31661 tmp = gen_reg_rtx (V4SFmode);
31662 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
31663 }
31664 else
31665 tmp = new_target;
31666
31667 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
31668 const1_rtx,
31669 GEN_INT (one_var == 1 ? 0 : 1),
31670 GEN_INT (one_var == 2 ? 0+4 : 1+4),
31671 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
31672
31673 if (mode != V4SFmode)
31674 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
31675 else if (tmp != target)
31676 emit_move_insn (target, tmp);
31677 }
31678 else if (target != new_target)
31679 emit_move_insn (target, new_target);
31680 return true;
31681
31682 case V8HImode:
31683 case V16QImode:
31684 vsimode = V4SImode;
31685 goto widen;
31686 case V4HImode:
31687 case V8QImode:
31688 if (!mmx_ok)
31689 return false;
31690 vsimode = V2SImode;
31691 goto widen;
31692 widen:
31693 if (one_var != 0)
31694 return false;
31695
31696 /* Zero extend the variable element to SImode and recurse. */
31697 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
31698
31699 x = gen_reg_rtx (vsimode);
31700 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
31701 var, one_var))
31702 gcc_unreachable ();
31703
31704 emit_move_insn (target, gen_lowpart (mode, x));
31705 return true;
31706
31707 default:
31708 return false;
31709 }
31710 }
31711
31712 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
31713 consisting of the values in VALS. It is known that all elements
31714 except ONE_VAR are constants. Return true if successful. */
31715
31716 static bool
31717 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
31718 rtx target, rtx vals, int one_var)
31719 {
31720 rtx var = XVECEXP (vals, 0, one_var);
31721 enum machine_mode wmode;
31722 rtx const_vec, x;
31723
31724 const_vec = copy_rtx (vals);
31725 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
31726 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
31727
31728 switch (mode)
31729 {
31730 case V2DFmode:
31731 case V2DImode:
31732 case V2SFmode:
31733 case V2SImode:
31734 /* For the two element vectors, it's just as easy to use
31735 the general case. */
31736 return false;
31737
31738 case V4DImode:
31739 /* Use ix86_expand_vector_set in 64bit mode only. */
31740 if (!TARGET_64BIT)
31741 return false;
31742 case V4DFmode:
31743 case V8SFmode:
31744 case V8SImode:
31745 case V16HImode:
31746 case V32QImode:
31747 case V4SFmode:
31748 case V4SImode:
31749 case V8HImode:
31750 case V4HImode:
31751 break;
31752
31753 case V16QImode:
31754 if (TARGET_SSE4_1)
31755 break;
31756 wmode = V8HImode;
31757 goto widen;
31758 case V8QImode:
31759 wmode = V4HImode;
31760 goto widen;
31761 widen:
31762 /* There's no way to set one QImode entry easily. Combine
31763 the variable value with its adjacent constant value, and
31764 promote to an HImode set. */
31765 x = XVECEXP (vals, 0, one_var ^ 1);
31766 if (one_var & 1)
31767 {
31768 var = convert_modes (HImode, QImode, var, true);
31769 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
31770 NULL_RTX, 1, OPTAB_LIB_WIDEN);
31771 x = GEN_INT (INTVAL (x) & 0xff);
31772 }
31773 else
31774 {
31775 var = convert_modes (HImode, QImode, var, true);
31776 x = gen_int_mode (INTVAL (x) << 8, HImode);
31777 }
31778 if (x != const0_rtx)
31779 var = expand_simple_binop (HImode, IOR, var, x, var,
31780 1, OPTAB_LIB_WIDEN);
31781
31782 x = gen_reg_rtx (wmode);
31783 emit_move_insn (x, gen_lowpart (wmode, const_vec));
31784 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
31785
31786 emit_move_insn (target, gen_lowpart (mode, x));
31787 return true;
31788
31789 default:
31790 return false;
31791 }
31792
31793 emit_move_insn (target, const_vec);
31794 ix86_expand_vector_set (mmx_ok, target, var, one_var);
31795 return true;
31796 }
31797
31798 /* A subroutine of ix86_expand_vector_init_general. Use vector
31799 concatenate to handle the most general case: all values variable,
31800 and none identical. */
31801
31802 static void
31803 ix86_expand_vector_init_concat (enum machine_mode mode,
31804 rtx target, rtx *ops, int n)
31805 {
31806 enum machine_mode cmode, hmode = VOIDmode;
31807 rtx first[8], second[4];
31808 rtvec v;
31809 int i, j;
31810
31811 switch (n)
31812 {
31813 case 2:
31814 switch (mode)
31815 {
31816 case V8SImode:
31817 cmode = V4SImode;
31818 break;
31819 case V8SFmode:
31820 cmode = V4SFmode;
31821 break;
31822 case V4DImode:
31823 cmode = V2DImode;
31824 break;
31825 case V4DFmode:
31826 cmode = V2DFmode;
31827 break;
31828 case V4SImode:
31829 cmode = V2SImode;
31830 break;
31831 case V4SFmode:
31832 cmode = V2SFmode;
31833 break;
31834 case V2DImode:
31835 cmode = DImode;
31836 break;
31837 case V2SImode:
31838 cmode = SImode;
31839 break;
31840 case V2DFmode:
31841 cmode = DFmode;
31842 break;
31843 case V2SFmode:
31844 cmode = SFmode;
31845 break;
31846 default:
31847 gcc_unreachable ();
31848 }
31849
31850 if (!register_operand (ops[1], cmode))
31851 ops[1] = force_reg (cmode, ops[1]);
31852 if (!register_operand (ops[0], cmode))
31853 ops[0] = force_reg (cmode, ops[0]);
31854 emit_insn (gen_rtx_SET (VOIDmode, target,
31855 gen_rtx_VEC_CONCAT (mode, ops[0],
31856 ops[1])));
31857 break;
31858
31859 case 4:
31860 switch (mode)
31861 {
31862 case V4DImode:
31863 cmode = V2DImode;
31864 break;
31865 case V4DFmode:
31866 cmode = V2DFmode;
31867 break;
31868 case V4SImode:
31869 cmode = V2SImode;
31870 break;
31871 case V4SFmode:
31872 cmode = V2SFmode;
31873 break;
31874 default:
31875 gcc_unreachable ();
31876 }
31877 goto half;
31878
31879 case 8:
31880 switch (mode)
31881 {
31882 case V8SImode:
31883 cmode = V2SImode;
31884 hmode = V4SImode;
31885 break;
31886 case V8SFmode:
31887 cmode = V2SFmode;
31888 hmode = V4SFmode;
31889 break;
31890 default:
31891 gcc_unreachable ();
31892 }
31893 goto half;
31894
31895 half:
31896 /* FIXME: We process inputs backward to help RA. PR 36222. */
31897 i = n - 1;
31898 j = (n >> 1) - 1;
31899 for (; i > 0; i -= 2, j--)
31900 {
31901 first[j] = gen_reg_rtx (cmode);
31902 v = gen_rtvec (2, ops[i - 1], ops[i]);
31903 ix86_expand_vector_init (false, first[j],
31904 gen_rtx_PARALLEL (cmode, v));
31905 }
31906
31907 n >>= 1;
31908 if (n > 2)
31909 {
31910 gcc_assert (hmode != VOIDmode);
31911 for (i = j = 0; i < n; i += 2, j++)
31912 {
31913 second[j] = gen_reg_rtx (hmode);
31914 ix86_expand_vector_init_concat (hmode, second [j],
31915 &first [i], 2);
31916 }
31917 n >>= 1;
31918 ix86_expand_vector_init_concat (mode, target, second, n);
31919 }
31920 else
31921 ix86_expand_vector_init_concat (mode, target, first, n);
31922 break;
31923
31924 default:
31925 gcc_unreachable ();
31926 }
31927 }
31928
31929 /* A subroutine of ix86_expand_vector_init_general. Use vector
31930 interleave to handle the most general case: all values variable,
31931 and none identical. */
31932
31933 static void
31934 ix86_expand_vector_init_interleave (enum machine_mode mode,
31935 rtx target, rtx *ops, int n)
31936 {
31937 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
31938 int i, j;
31939 rtx op0, op1;
31940 rtx (*gen_load_even) (rtx, rtx, rtx);
31941 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
31942 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
31943
31944 switch (mode)
31945 {
31946 case V8HImode:
31947 gen_load_even = gen_vec_setv8hi;
31948 gen_interleave_first_low = gen_vec_interleave_lowv4si;
31949 gen_interleave_second_low = gen_vec_interleave_lowv2di;
31950 inner_mode = HImode;
31951 first_imode = V4SImode;
31952 second_imode = V2DImode;
31953 third_imode = VOIDmode;
31954 break;
31955 case V16QImode:
31956 gen_load_even = gen_vec_setv16qi;
31957 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
31958 gen_interleave_second_low = gen_vec_interleave_lowv4si;
31959 inner_mode = QImode;
31960 first_imode = V8HImode;
31961 second_imode = V4SImode;
31962 third_imode = V2DImode;
31963 break;
31964 default:
31965 gcc_unreachable ();
31966 }
31967
31968 for (i = 0; i < n; i++)
31969 {
31970 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
31971 op0 = gen_reg_rtx (SImode);
31972 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
31973
31974 /* Insert the SImode value as low element of V4SImode vector. */
31975 op1 = gen_reg_rtx (V4SImode);
31976 op0 = gen_rtx_VEC_MERGE (V4SImode,
31977 gen_rtx_VEC_DUPLICATE (V4SImode,
31978 op0),
31979 CONST0_RTX (V4SImode),
31980 const1_rtx);
31981 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
31982
31983 /* Cast the V4SImode vector back to a vector in orignal mode. */
31984 op0 = gen_reg_rtx (mode);
31985 emit_move_insn (op0, gen_lowpart (mode, op1));
31986
31987 /* Load even elements into the second positon. */
31988 emit_insn (gen_load_even (op0,
31989 force_reg (inner_mode,
31990 ops [i + i + 1]),
31991 const1_rtx));
31992
31993 /* Cast vector to FIRST_IMODE vector. */
31994 ops[i] = gen_reg_rtx (first_imode);
31995 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
31996 }
31997
31998 /* Interleave low FIRST_IMODE vectors. */
31999 for (i = j = 0; i < n; i += 2, j++)
32000 {
32001 op0 = gen_reg_rtx (first_imode);
32002 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
32003
32004 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
32005 ops[j] = gen_reg_rtx (second_imode);
32006 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
32007 }
32008
32009 /* Interleave low SECOND_IMODE vectors. */
32010 switch (second_imode)
32011 {
32012 case V4SImode:
32013 for (i = j = 0; i < n / 2; i += 2, j++)
32014 {
32015 op0 = gen_reg_rtx (second_imode);
32016 emit_insn (gen_interleave_second_low (op0, ops[i],
32017 ops[i + 1]));
32018
32019 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
32020 vector. */
32021 ops[j] = gen_reg_rtx (third_imode);
32022 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
32023 }
32024 second_imode = V2DImode;
32025 gen_interleave_second_low = gen_vec_interleave_lowv2di;
32026 /* FALLTHRU */
32027
32028 case V2DImode:
32029 op0 = gen_reg_rtx (second_imode);
32030 emit_insn (gen_interleave_second_low (op0, ops[0],
32031 ops[1]));
32032
32033 /* Cast the SECOND_IMODE vector back to a vector on original
32034 mode. */
32035 emit_insn (gen_rtx_SET (VOIDmode, target,
32036 gen_lowpart (mode, op0)));
32037 break;
32038
32039 default:
32040 gcc_unreachable ();
32041 }
32042 }
32043
32044 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
32045 all values variable, and none identical. */
32046
32047 static void
32048 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
32049 rtx target, rtx vals)
32050 {
32051 rtx ops[32], op0, op1;
32052 enum machine_mode half_mode = VOIDmode;
32053 int n, i;
32054
32055 switch (mode)
32056 {
32057 case V2SFmode:
32058 case V2SImode:
32059 if (!mmx_ok && !TARGET_SSE)
32060 break;
32061 /* FALLTHRU */
32062
32063 case V8SFmode:
32064 case V8SImode:
32065 case V4DFmode:
32066 case V4DImode:
32067 case V4SFmode:
32068 case V4SImode:
32069 case V2DFmode:
32070 case V2DImode:
32071 n = GET_MODE_NUNITS (mode);
32072 for (i = 0; i < n; i++)
32073 ops[i] = XVECEXP (vals, 0, i);
32074 ix86_expand_vector_init_concat (mode, target, ops, n);
32075 return;
32076
32077 case V32QImode:
32078 half_mode = V16QImode;
32079 goto half;
32080
32081 case V16HImode:
32082 half_mode = V8HImode;
32083 goto half;
32084
32085 half:
32086 n = GET_MODE_NUNITS (mode);
32087 for (i = 0; i < n; i++)
32088 ops[i] = XVECEXP (vals, 0, i);
32089 op0 = gen_reg_rtx (half_mode);
32090 op1 = gen_reg_rtx (half_mode);
32091 ix86_expand_vector_init_interleave (half_mode, op0, ops,
32092 n >> 2);
32093 ix86_expand_vector_init_interleave (half_mode, op1,
32094 &ops [n >> 1], n >> 2);
32095 emit_insn (gen_rtx_SET (VOIDmode, target,
32096 gen_rtx_VEC_CONCAT (mode, op0, op1)));
32097 return;
32098
32099 case V16QImode:
32100 if (!TARGET_SSE4_1)
32101 break;
32102 /* FALLTHRU */
32103
32104 case V8HImode:
32105 if (!TARGET_SSE2)
32106 break;
32107
32108 /* Don't use ix86_expand_vector_init_interleave if we can't
32109 move from GPR to SSE register directly. */
32110 if (!TARGET_INTER_UNIT_MOVES)
32111 break;
32112
32113 n = GET_MODE_NUNITS (mode);
32114 for (i = 0; i < n; i++)
32115 ops[i] = XVECEXP (vals, 0, i);
32116 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
32117 return;
32118
32119 case V4HImode:
32120 case V8QImode:
32121 break;
32122
32123 default:
32124 gcc_unreachable ();
32125 }
32126
32127 {
32128 int i, j, n_elts, n_words, n_elt_per_word;
32129 enum machine_mode inner_mode;
32130 rtx words[4], shift;
32131
32132 inner_mode = GET_MODE_INNER (mode);
32133 n_elts = GET_MODE_NUNITS (mode);
32134 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
32135 n_elt_per_word = n_elts / n_words;
32136 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
32137
32138 for (i = 0; i < n_words; ++i)
32139 {
32140 rtx word = NULL_RTX;
32141
32142 for (j = 0; j < n_elt_per_word; ++j)
32143 {
32144 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
32145 elt = convert_modes (word_mode, inner_mode, elt, true);
32146
32147 if (j == 0)
32148 word = elt;
32149 else
32150 {
32151 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
32152 word, 1, OPTAB_LIB_WIDEN);
32153 word = expand_simple_binop (word_mode, IOR, word, elt,
32154 word, 1, OPTAB_LIB_WIDEN);
32155 }
32156 }
32157
32158 words[i] = word;
32159 }
32160
32161 if (n_words == 1)
32162 emit_move_insn (target, gen_lowpart (mode, words[0]));
32163 else if (n_words == 2)
32164 {
32165 rtx tmp = gen_reg_rtx (mode);
32166 emit_clobber (tmp);
32167 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
32168 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
32169 emit_move_insn (target, tmp);
32170 }
32171 else if (n_words == 4)
32172 {
32173 rtx tmp = gen_reg_rtx (V4SImode);
32174 gcc_assert (word_mode == SImode);
32175 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
32176 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
32177 emit_move_insn (target, gen_lowpart (mode, tmp));
32178 }
32179 else
32180 gcc_unreachable ();
32181 }
32182 }
32183
32184 /* Initialize vector TARGET via VALS. Suppress the use of MMX
32185 instructions unless MMX_OK is true. */
32186
32187 void
32188 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
32189 {
32190 enum machine_mode mode = GET_MODE (target);
32191 enum machine_mode inner_mode = GET_MODE_INNER (mode);
32192 int n_elts = GET_MODE_NUNITS (mode);
32193 int n_var = 0, one_var = -1;
32194 bool all_same = true, all_const_zero = true;
32195 int i;
32196 rtx x;
32197
32198 for (i = 0; i < n_elts; ++i)
32199 {
32200 x = XVECEXP (vals, 0, i);
32201 if (!(CONST_INT_P (x)
32202 || GET_CODE (x) == CONST_DOUBLE
32203 || GET_CODE (x) == CONST_FIXED))
32204 n_var++, one_var = i;
32205 else if (x != CONST0_RTX (inner_mode))
32206 all_const_zero = false;
32207 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
32208 all_same = false;
32209 }
32210
32211 /* Constants are best loaded from the constant pool. */
32212 if (n_var == 0)
32213 {
32214 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
32215 return;
32216 }
32217
32218 /* If all values are identical, broadcast the value. */
32219 if (all_same
32220 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
32221 XVECEXP (vals, 0, 0)))
32222 return;
32223
32224 /* Values where only one field is non-constant are best loaded from
32225 the pool and overwritten via move later. */
32226 if (n_var == 1)
32227 {
32228 if (all_const_zero
32229 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
32230 XVECEXP (vals, 0, one_var),
32231 one_var))
32232 return;
32233
32234 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
32235 return;
32236 }
32237
32238 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
32239 }
32240
32241 void
32242 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
32243 {
32244 enum machine_mode mode = GET_MODE (target);
32245 enum machine_mode inner_mode = GET_MODE_INNER (mode);
32246 enum machine_mode half_mode;
32247 bool use_vec_merge = false;
32248 rtx tmp;
32249 static rtx (*gen_extract[6][2]) (rtx, rtx)
32250 = {
32251 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
32252 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
32253 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
32254 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
32255 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
32256 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
32257 };
32258 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
32259 = {
32260 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
32261 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
32262 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
32263 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
32264 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
32265 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
32266 };
32267 int i, j, n;
32268
32269 switch (mode)
32270 {
32271 case V2SFmode:
32272 case V2SImode:
32273 if (mmx_ok)
32274 {
32275 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
32276 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
32277 if (elt == 0)
32278 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
32279 else
32280 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
32281 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
32282 return;
32283 }
32284 break;
32285
32286 case V2DImode:
32287 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
32288 if (use_vec_merge)
32289 break;
32290
32291 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
32292 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
32293 if (elt == 0)
32294 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
32295 else
32296 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
32297 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
32298 return;
32299
32300 case V2DFmode:
32301 {
32302 rtx op0, op1;
32303
32304 /* For the two element vectors, we implement a VEC_CONCAT with
32305 the extraction of the other element. */
32306
32307 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
32308 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
32309
32310 if (elt == 0)
32311 op0 = val, op1 = tmp;
32312 else
32313 op0 = tmp, op1 = val;
32314
32315 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
32316 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
32317 }
32318 return;
32319
32320 case V4SFmode:
32321 use_vec_merge = TARGET_SSE4_1;
32322 if (use_vec_merge)
32323 break;
32324
32325 switch (elt)
32326 {
32327 case 0:
32328 use_vec_merge = true;
32329 break;
32330
32331 case 1:
32332 /* tmp = target = A B C D */
32333 tmp = copy_to_reg (target);
32334 /* target = A A B B */
32335 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
32336 /* target = X A B B */
32337 ix86_expand_vector_set (false, target, val, 0);
32338 /* target = A X C D */
32339 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
32340 const1_rtx, const0_rtx,
32341 GEN_INT (2+4), GEN_INT (3+4)));
32342 return;
32343
32344 case 2:
32345 /* tmp = target = A B C D */
32346 tmp = copy_to_reg (target);
32347 /* tmp = X B C D */
32348 ix86_expand_vector_set (false, tmp, val, 0);
32349 /* target = A B X D */
32350 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
32351 const0_rtx, const1_rtx,
32352 GEN_INT (0+4), GEN_INT (3+4)));
32353 return;
32354
32355 case 3:
32356 /* tmp = target = A B C D */
32357 tmp = copy_to_reg (target);
32358 /* tmp = X B C D */
32359 ix86_expand_vector_set (false, tmp, val, 0);
32360 /* target = A B X D */
32361 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
32362 const0_rtx, const1_rtx,
32363 GEN_INT (2+4), GEN_INT (0+4)));
32364 return;
32365
32366 default:
32367 gcc_unreachable ();
32368 }
32369 break;
32370
32371 case V4SImode:
32372 use_vec_merge = TARGET_SSE4_1;
32373 if (use_vec_merge)
32374 break;
32375
32376 /* Element 0 handled by vec_merge below. */
32377 if (elt == 0)
32378 {
32379 use_vec_merge = true;
32380 break;
32381 }
32382
32383 if (TARGET_SSE2)
32384 {
32385 /* With SSE2, use integer shuffles to swap element 0 and ELT,
32386 store into element 0, then shuffle them back. */
32387
32388 rtx order[4];
32389
32390 order[0] = GEN_INT (elt);
32391 order[1] = const1_rtx;
32392 order[2] = const2_rtx;
32393 order[3] = GEN_INT (3);
32394 order[elt] = const0_rtx;
32395
32396 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
32397 order[1], order[2], order[3]));
32398
32399 ix86_expand_vector_set (false, target, val, 0);
32400
32401 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
32402 order[1], order[2], order[3]));
32403 }
32404 else
32405 {
32406 /* For SSE1, we have to reuse the V4SF code. */
32407 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
32408 gen_lowpart (SFmode, val), elt);
32409 }
32410 return;
32411
32412 case V8HImode:
32413 use_vec_merge = TARGET_SSE2;
32414 break;
32415 case V4HImode:
32416 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
32417 break;
32418
32419 case V16QImode:
32420 use_vec_merge = TARGET_SSE4_1;
32421 break;
32422
32423 case V8QImode:
32424 break;
32425
32426 case V32QImode:
32427 half_mode = V16QImode;
32428 j = 0;
32429 n = 16;
32430 goto half;
32431
32432 case V16HImode:
32433 half_mode = V8HImode;
32434 j = 1;
32435 n = 8;
32436 goto half;
32437
32438 case V8SImode:
32439 half_mode = V4SImode;
32440 j = 2;
32441 n = 4;
32442 goto half;
32443
32444 case V4DImode:
32445 half_mode = V2DImode;
32446 j = 3;
32447 n = 2;
32448 goto half;
32449
32450 case V8SFmode:
32451 half_mode = V4SFmode;
32452 j = 4;
32453 n = 4;
32454 goto half;
32455
32456 case V4DFmode:
32457 half_mode = V2DFmode;
32458 j = 5;
32459 n = 2;
32460 goto half;
32461
32462 half:
32463 /* Compute offset. */
32464 i = elt / n;
32465 elt %= n;
32466
32467 gcc_assert (i <= 1);
32468
32469 /* Extract the half. */
32470 tmp = gen_reg_rtx (half_mode);
32471 emit_insn (gen_extract[j][i] (tmp, target));
32472
32473 /* Put val in tmp at elt. */
32474 ix86_expand_vector_set (false, tmp, val, elt);
32475
32476 /* Put it back. */
32477 emit_insn (gen_insert[j][i] (target, target, tmp));
32478 return;
32479
32480 default:
32481 break;
32482 }
32483
32484 if (use_vec_merge)
32485 {
32486 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
32487 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
32488 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
32489 }
32490 else
32491 {
32492 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
32493
32494 emit_move_insn (mem, target);
32495
32496 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
32497 emit_move_insn (tmp, val);
32498
32499 emit_move_insn (target, mem);
32500 }
32501 }
32502
32503 void
32504 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
32505 {
32506 enum machine_mode mode = GET_MODE (vec);
32507 enum machine_mode inner_mode = GET_MODE_INNER (mode);
32508 bool use_vec_extr = false;
32509 rtx tmp;
32510
32511 switch (mode)
32512 {
32513 case V2SImode:
32514 case V2SFmode:
32515 if (!mmx_ok)
32516 break;
32517 /* FALLTHRU */
32518
32519 case V2DFmode:
32520 case V2DImode:
32521 use_vec_extr = true;
32522 break;
32523
32524 case V4SFmode:
32525 use_vec_extr = TARGET_SSE4_1;
32526 if (use_vec_extr)
32527 break;
32528
32529 switch (elt)
32530 {
32531 case 0:
32532 tmp = vec;
32533 break;
32534
32535 case 1:
32536 case 3:
32537 tmp = gen_reg_rtx (mode);
32538 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
32539 GEN_INT (elt), GEN_INT (elt),
32540 GEN_INT (elt+4), GEN_INT (elt+4)));
32541 break;
32542
32543 case 2:
32544 tmp = gen_reg_rtx (mode);
32545 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
32546 break;
32547
32548 default:
32549 gcc_unreachable ();
32550 }
32551 vec = tmp;
32552 use_vec_extr = true;
32553 elt = 0;
32554 break;
32555
32556 case V4SImode:
32557 use_vec_extr = TARGET_SSE4_1;
32558 if (use_vec_extr)
32559 break;
32560
32561 if (TARGET_SSE2)
32562 {
32563 switch (elt)
32564 {
32565 case 0:
32566 tmp = vec;
32567 break;
32568
32569 case 1:
32570 case 3:
32571 tmp = gen_reg_rtx (mode);
32572 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
32573 GEN_INT (elt), GEN_INT (elt),
32574 GEN_INT (elt), GEN_INT (elt)));
32575 break;
32576
32577 case 2:
32578 tmp = gen_reg_rtx (mode);
32579 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
32580 break;
32581
32582 default:
32583 gcc_unreachable ();
32584 }
32585 vec = tmp;
32586 use_vec_extr = true;
32587 elt = 0;
32588 }
32589 else
32590 {
32591 /* For SSE1, we have to reuse the V4SF code. */
32592 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
32593 gen_lowpart (V4SFmode, vec), elt);
32594 return;
32595 }
32596 break;
32597
32598 case V8HImode:
32599 use_vec_extr = TARGET_SSE2;
32600 break;
32601 case V4HImode:
32602 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
32603 break;
32604
32605 case V16QImode:
32606 use_vec_extr = TARGET_SSE4_1;
32607 break;
32608
32609 case V8SFmode:
32610 if (TARGET_AVX)
32611 {
32612 tmp = gen_reg_rtx (V4SFmode);
32613 if (elt < 4)
32614 emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
32615 else
32616 emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
32617 ix86_expand_vector_extract (false, target, tmp, elt & 3);
32618 return;
32619 }
32620 break;
32621
32622 case V4DFmode:
32623 if (TARGET_AVX)
32624 {
32625 tmp = gen_reg_rtx (V2DFmode);
32626 if (elt < 2)
32627 emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
32628 else
32629 emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
32630 ix86_expand_vector_extract (false, target, tmp, elt & 1);
32631 return;
32632 }
32633 break;
32634
32635 case V32QImode:
32636 if (TARGET_AVX)
32637 {
32638 tmp = gen_reg_rtx (V16QImode);
32639 if (elt < 16)
32640 emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
32641 else
32642 emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
32643 ix86_expand_vector_extract (false, target, tmp, elt & 15);
32644 return;
32645 }
32646 break;
32647
32648 case V16HImode:
32649 if (TARGET_AVX)
32650 {
32651 tmp = gen_reg_rtx (V8HImode);
32652 if (elt < 8)
32653 emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
32654 else
32655 emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
32656 ix86_expand_vector_extract (false, target, tmp, elt & 7);
32657 return;
32658 }
32659 break;
32660
32661 case V8SImode:
32662 if (TARGET_AVX)
32663 {
32664 tmp = gen_reg_rtx (V4SImode);
32665 if (elt < 4)
32666 emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
32667 else
32668 emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
32669 ix86_expand_vector_extract (false, target, tmp, elt & 3);
32670 return;
32671 }
32672 break;
32673
32674 case V4DImode:
32675 if (TARGET_AVX)
32676 {
32677 tmp = gen_reg_rtx (V2DImode);
32678 if (elt < 2)
32679 emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
32680 else
32681 emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
32682 ix86_expand_vector_extract (false, target, tmp, elt & 1);
32683 return;
32684 }
32685 break;
32686
32687 case V8QImode:
32688 /* ??? Could extract the appropriate HImode element and shift. */
32689 default:
32690 break;
32691 }
32692
32693 if (use_vec_extr)
32694 {
32695 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
32696 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
32697
32698 /* Let the rtl optimizers know about the zero extension performed. */
32699 if (inner_mode == QImode || inner_mode == HImode)
32700 {
32701 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
32702 target = gen_lowpart (SImode, target);
32703 }
32704
32705 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
32706 }
32707 else
32708 {
32709 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
32710
32711 emit_move_insn (mem, vec);
32712
32713 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
32714 emit_move_insn (target, tmp);
32715 }
32716 }
32717
32718 /* Expand a vector reduction. FN is the binary pattern to reduce;
32719 DEST is the destination; IN is the input vector. */
32720
32721 void
32722 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
32723 {
32724 rtx tmp1, tmp2, tmp3, tmp4, tmp5;
32725 enum machine_mode mode = GET_MODE (in);
32726 int i;
32727
32728 tmp1 = gen_reg_rtx (mode);
32729 tmp2 = gen_reg_rtx (mode);
32730 tmp3 = gen_reg_rtx (mode);
32731
32732 switch (mode)
32733 {
32734 case V4SFmode:
32735 emit_insn (gen_sse_movhlps (tmp1, in, in));
32736 emit_insn (fn (tmp2, tmp1, in));
32737 emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
32738 const1_rtx, const1_rtx,
32739 GEN_INT (1+4), GEN_INT (1+4)));
32740 break;
32741 case V8SFmode:
32742 tmp4 = gen_reg_rtx (mode);
32743 tmp5 = gen_reg_rtx (mode);
32744 emit_insn (gen_avx_vperm2f128v8sf3 (tmp4, in, in, const1_rtx));
32745 emit_insn (fn (tmp5, tmp4, in));
32746 emit_insn (gen_avx_shufps256 (tmp1, tmp5, tmp5, GEN_INT (2+12)));
32747 emit_insn (fn (tmp2, tmp1, tmp5));
32748 emit_insn (gen_avx_shufps256 (tmp3, tmp2, tmp2, const1_rtx));
32749 break;
32750 case V4DFmode:
32751 emit_insn (gen_avx_vperm2f128v4df3 (tmp1, in, in, const1_rtx));
32752 emit_insn (fn (tmp2, tmp1, in));
32753 emit_insn (gen_avx_shufpd256 (tmp3, tmp2, tmp2, const1_rtx));
32754 break;
32755 case V32QImode:
32756 case V16HImode:
32757 case V8SImode:
32758 case V4DImode:
32759 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode, tmp1),
32760 gen_lowpart (V4DImode, in),
32761 gen_lowpart (V4DImode, in),
32762 const1_rtx));
32763 tmp4 = in;
32764 tmp5 = tmp1;
32765 for (i = 64; i >= GET_MODE_BITSIZE (GET_MODE_INNER (mode)); i >>= 1)
32766 {
32767 if (i != 64)
32768 {
32769 tmp2 = gen_reg_rtx (mode);
32770 tmp3 = gen_reg_rtx (mode);
32771 }
32772 emit_insn (fn (tmp2, tmp4, tmp5));
32773 emit_insn (gen_avx2_lshrv2ti3 (gen_lowpart (V2TImode, tmp3),
32774 gen_lowpart (V2TImode, tmp2),
32775 GEN_INT (i)));
32776 tmp4 = tmp2;
32777 tmp5 = tmp3;
32778 }
32779 break;
32780 default:
32781 gcc_unreachable ();
32782 }
32783 emit_insn (fn (dest, tmp2, tmp3));
32784 }
32785 \f
32786 /* Target hook for scalar_mode_supported_p. */
32787 static bool
32788 ix86_scalar_mode_supported_p (enum machine_mode mode)
32789 {
32790 if (DECIMAL_FLOAT_MODE_P (mode))
32791 return default_decimal_float_supported_p ();
32792 else if (mode == TFmode)
32793 return true;
32794 else
32795 return default_scalar_mode_supported_p (mode);
32796 }
32797
32798 /* Implements target hook vector_mode_supported_p. */
32799 static bool
32800 ix86_vector_mode_supported_p (enum machine_mode mode)
32801 {
32802 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
32803 return true;
32804 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
32805 return true;
32806 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
32807 return true;
32808 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
32809 return true;
32810 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
32811 return true;
32812 return false;
32813 }
32814
32815 /* Target hook for c_mode_for_suffix. */
32816 static enum machine_mode
32817 ix86_c_mode_for_suffix (char suffix)
32818 {
32819 if (suffix == 'q')
32820 return TFmode;
32821 if (suffix == 'w')
32822 return XFmode;
32823
32824 return VOIDmode;
32825 }
32826
32827 /* Worker function for TARGET_MD_ASM_CLOBBERS.
32828
32829 We do this in the new i386 backend to maintain source compatibility
32830 with the old cc0-based compiler. */
32831
32832 static tree
32833 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
32834 tree inputs ATTRIBUTE_UNUSED,
32835 tree clobbers)
32836 {
32837 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
32838 clobbers);
32839 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
32840 clobbers);
32841 return clobbers;
32842 }
32843
32844 /* Implements target vector targetm.asm.encode_section_info. */
32845
32846 static void ATTRIBUTE_UNUSED
32847 ix86_encode_section_info (tree decl, rtx rtl, int first)
32848 {
32849 default_encode_section_info (decl, rtl, first);
32850
32851 if (TREE_CODE (decl) == VAR_DECL
32852 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
32853 && ix86_in_large_data_p (decl))
32854 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
32855 }
32856
32857 /* Worker function for REVERSE_CONDITION. */
32858
32859 enum rtx_code
32860 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
32861 {
32862 return (mode != CCFPmode && mode != CCFPUmode
32863 ? reverse_condition (code)
32864 : reverse_condition_maybe_unordered (code));
32865 }
32866
32867 /* Output code to perform an x87 FP register move, from OPERANDS[1]
32868 to OPERANDS[0]. */
32869
32870 const char *
32871 output_387_reg_move (rtx insn, rtx *operands)
32872 {
32873 if (REG_P (operands[0]))
32874 {
32875 if (REG_P (operands[1])
32876 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
32877 {
32878 if (REGNO (operands[0]) == FIRST_STACK_REG)
32879 return output_387_ffreep (operands, 0);
32880 return "fstp\t%y0";
32881 }
32882 if (STACK_TOP_P (operands[0]))
32883 return "fld%Z1\t%y1";
32884 return "fst\t%y0";
32885 }
32886 else if (MEM_P (operands[0]))
32887 {
32888 gcc_assert (REG_P (operands[1]));
32889 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
32890 return "fstp%Z0\t%y0";
32891 else
32892 {
32893 /* There is no non-popping store to memory for XFmode.
32894 So if we need one, follow the store with a load. */
32895 if (GET_MODE (operands[0]) == XFmode)
32896 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
32897 else
32898 return "fst%Z0\t%y0";
32899 }
32900 }
32901 else
32902 gcc_unreachable();
32903 }
32904
32905 /* Output code to perform a conditional jump to LABEL, if C2 flag in
32906 FP status register is set. */
32907
32908 void
32909 ix86_emit_fp_unordered_jump (rtx label)
32910 {
32911 rtx reg = gen_reg_rtx (HImode);
32912 rtx temp;
32913
32914 emit_insn (gen_x86_fnstsw_1 (reg));
32915
32916 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
32917 {
32918 emit_insn (gen_x86_sahf_1 (reg));
32919
32920 temp = gen_rtx_REG (CCmode, FLAGS_REG);
32921 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
32922 }
32923 else
32924 {
32925 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
32926
32927 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
32928 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
32929 }
32930
32931 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
32932 gen_rtx_LABEL_REF (VOIDmode, label),
32933 pc_rtx);
32934 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
32935
32936 emit_jump_insn (temp);
32937 predict_jump (REG_BR_PROB_BASE * 10 / 100);
32938 }
32939
32940 /* Output code to perform a log1p XFmode calculation. */
32941
32942 void ix86_emit_i387_log1p (rtx op0, rtx op1)
32943 {
32944 rtx label1 = gen_label_rtx ();
32945 rtx label2 = gen_label_rtx ();
32946
32947 rtx tmp = gen_reg_rtx (XFmode);
32948 rtx tmp2 = gen_reg_rtx (XFmode);
32949 rtx test;
32950
32951 emit_insn (gen_absxf2 (tmp, op1));
32952 test = gen_rtx_GE (VOIDmode, tmp,
32953 CONST_DOUBLE_FROM_REAL_VALUE (
32954 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
32955 XFmode));
32956 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
32957
32958 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
32959 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
32960 emit_jump (label2);
32961
32962 emit_label (label1);
32963 emit_move_insn (tmp, CONST1_RTX (XFmode));
32964 emit_insn (gen_addxf3 (tmp, op1, tmp));
32965 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
32966 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
32967
32968 emit_label (label2);
32969 }
32970
32971 /* Emit code for round calculation. */
32972 void ix86_emit_i387_round (rtx op0, rtx op1)
32973 {
32974 enum machine_mode inmode = GET_MODE (op1);
32975 enum machine_mode outmode = GET_MODE (op0);
32976 rtx e1, e2, res, tmp, tmp1, half;
32977 rtx scratch = gen_reg_rtx (HImode);
32978 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
32979 rtx jump_label = gen_label_rtx ();
32980 rtx insn;
32981 rtx (*gen_abs) (rtx, rtx);
32982 rtx (*gen_neg) (rtx, rtx);
32983
32984 switch (inmode)
32985 {
32986 case SFmode:
32987 gen_abs = gen_abssf2;
32988 break;
32989 case DFmode:
32990 gen_abs = gen_absdf2;
32991 break;
32992 case XFmode:
32993 gen_abs = gen_absxf2;
32994 break;
32995 default:
32996 gcc_unreachable ();
32997 }
32998
32999 switch (outmode)
33000 {
33001 case SFmode:
33002 gen_neg = gen_negsf2;
33003 break;
33004 case DFmode:
33005 gen_neg = gen_negdf2;
33006 break;
33007 case XFmode:
33008 gen_neg = gen_negxf2;
33009 break;
33010 case HImode:
33011 gen_neg = gen_neghi2;
33012 break;
33013 case SImode:
33014 gen_neg = gen_negsi2;
33015 break;
33016 case DImode:
33017 gen_neg = gen_negdi2;
33018 break;
33019 default:
33020 gcc_unreachable ();
33021 }
33022
33023 e1 = gen_reg_rtx (inmode);
33024 e2 = gen_reg_rtx (inmode);
33025 res = gen_reg_rtx (outmode);
33026
33027 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, inmode);
33028
33029 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
33030
33031 /* scratch = fxam(op1) */
33032 emit_insn (gen_rtx_SET (VOIDmode, scratch,
33033 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
33034 UNSPEC_FXAM)));
33035 /* e1 = fabs(op1) */
33036 emit_insn (gen_abs (e1, op1));
33037
33038 /* e2 = e1 + 0.5 */
33039 half = force_reg (inmode, half);
33040 emit_insn (gen_rtx_SET (VOIDmode, e2,
33041 gen_rtx_PLUS (inmode, e1, half)));
33042
33043 /* res = floor(e2) */
33044 if (inmode != XFmode)
33045 {
33046 tmp1 = gen_reg_rtx (XFmode);
33047
33048 emit_insn (gen_rtx_SET (VOIDmode, tmp1,
33049 gen_rtx_FLOAT_EXTEND (XFmode, e2)));
33050 }
33051 else
33052 tmp1 = e2;
33053
33054 switch (outmode)
33055 {
33056 case SFmode:
33057 case DFmode:
33058 {
33059 rtx tmp0 = gen_reg_rtx (XFmode);
33060
33061 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
33062
33063 emit_insn (gen_rtx_SET (VOIDmode, res,
33064 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
33065 UNSPEC_TRUNC_NOOP)));
33066 }
33067 break;
33068 case XFmode:
33069 emit_insn (gen_frndintxf2_floor (res, tmp1));
33070 break;
33071 case HImode:
33072 emit_insn (gen_lfloorxfhi2 (res, tmp1));
33073 break;
33074 case SImode:
33075 emit_insn (gen_lfloorxfsi2 (res, tmp1));
33076 break;
33077 case DImode:
33078 emit_insn (gen_lfloorxfdi2 (res, tmp1));
33079 break;
33080 default:
33081 gcc_unreachable ();
33082 }
33083
33084 /* flags = signbit(a) */
33085 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
33086
33087 /* if (flags) then res = -res */
33088 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
33089 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
33090 gen_rtx_LABEL_REF (VOIDmode, jump_label),
33091 pc_rtx);
33092 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
33093 predict_jump (REG_BR_PROB_BASE * 50 / 100);
33094 JUMP_LABEL (insn) = jump_label;
33095
33096 emit_insn (gen_neg (res, res));
33097
33098 emit_label (jump_label);
33099 LABEL_NUSES (jump_label) = 1;
33100
33101 emit_move_insn (op0, res);
33102 }
33103
33104 /* Output code to perform a Newton-Rhapson approximation of a single precision
33105 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
33106
33107 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
33108 {
33109 rtx x0, x1, e0, e1;
33110
33111 x0 = gen_reg_rtx (mode);
33112 e0 = gen_reg_rtx (mode);
33113 e1 = gen_reg_rtx (mode);
33114 x1 = gen_reg_rtx (mode);
33115
33116 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
33117
33118 /* x0 = rcp(b) estimate */
33119 emit_insn (gen_rtx_SET (VOIDmode, x0,
33120 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
33121 UNSPEC_RCP)));
33122 /* e0 = x0 * b */
33123 emit_insn (gen_rtx_SET (VOIDmode, e0,
33124 gen_rtx_MULT (mode, x0, b)));
33125
33126 /* e0 = x0 * e0 */
33127 emit_insn (gen_rtx_SET (VOIDmode, e0,
33128 gen_rtx_MULT (mode, x0, e0)));
33129
33130 /* e1 = x0 + x0 */
33131 emit_insn (gen_rtx_SET (VOIDmode, e1,
33132 gen_rtx_PLUS (mode, x0, x0)));
33133
33134 /* x1 = e1 - e0 */
33135 emit_insn (gen_rtx_SET (VOIDmode, x1,
33136 gen_rtx_MINUS (mode, e1, e0)));
33137
33138 /* res = a * x1 */
33139 emit_insn (gen_rtx_SET (VOIDmode, res,
33140 gen_rtx_MULT (mode, a, x1)));
33141 }
33142
33143 /* Output code to perform a Newton-Rhapson approximation of a
33144 single precision floating point [reciprocal] square root. */
33145
33146 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
33147 bool recip)
33148 {
33149 rtx x0, e0, e1, e2, e3, mthree, mhalf;
33150 REAL_VALUE_TYPE r;
33151
33152 x0 = gen_reg_rtx (mode);
33153 e0 = gen_reg_rtx (mode);
33154 e1 = gen_reg_rtx (mode);
33155 e2 = gen_reg_rtx (mode);
33156 e3 = gen_reg_rtx (mode);
33157
33158 real_from_integer (&r, VOIDmode, -3, -1, 0);
33159 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
33160
33161 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
33162 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
33163
33164 if (VECTOR_MODE_P (mode))
33165 {
33166 mthree = ix86_build_const_vector (mode, true, mthree);
33167 mhalf = ix86_build_const_vector (mode, true, mhalf);
33168 }
33169
33170 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
33171 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
33172
33173 /* x0 = rsqrt(a) estimate */
33174 emit_insn (gen_rtx_SET (VOIDmode, x0,
33175 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
33176 UNSPEC_RSQRT)));
33177
33178 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
33179 if (!recip)
33180 {
33181 rtx zero, mask;
33182
33183 zero = gen_reg_rtx (mode);
33184 mask = gen_reg_rtx (mode);
33185
33186 zero = force_reg (mode, CONST0_RTX(mode));
33187 emit_insn (gen_rtx_SET (VOIDmode, mask,
33188 gen_rtx_NE (mode, zero, a)));
33189
33190 emit_insn (gen_rtx_SET (VOIDmode, x0,
33191 gen_rtx_AND (mode, x0, mask)));
33192 }
33193
33194 /* e0 = x0 * a */
33195 emit_insn (gen_rtx_SET (VOIDmode, e0,
33196 gen_rtx_MULT (mode, x0, a)));
33197 /* e1 = e0 * x0 */
33198 emit_insn (gen_rtx_SET (VOIDmode, e1,
33199 gen_rtx_MULT (mode, e0, x0)));
33200
33201 /* e2 = e1 - 3. */
33202 mthree = force_reg (mode, mthree);
33203 emit_insn (gen_rtx_SET (VOIDmode, e2,
33204 gen_rtx_PLUS (mode, e1, mthree)));
33205
33206 mhalf = force_reg (mode, mhalf);
33207 if (recip)
33208 /* e3 = -.5 * x0 */
33209 emit_insn (gen_rtx_SET (VOIDmode, e3,
33210 gen_rtx_MULT (mode, x0, mhalf)));
33211 else
33212 /* e3 = -.5 * e0 */
33213 emit_insn (gen_rtx_SET (VOIDmode, e3,
33214 gen_rtx_MULT (mode, e0, mhalf)));
33215 /* ret = e2 * e3 */
33216 emit_insn (gen_rtx_SET (VOIDmode, res,
33217 gen_rtx_MULT (mode, e2, e3)));
33218 }
33219
33220 #ifdef TARGET_SOLARIS
33221 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
33222
33223 static void
33224 i386_solaris_elf_named_section (const char *name, unsigned int flags,
33225 tree decl)
33226 {
33227 /* With Binutils 2.15, the "@unwind" marker must be specified on
33228 every occurrence of the ".eh_frame" section, not just the first
33229 one. */
33230 if (TARGET_64BIT
33231 && strcmp (name, ".eh_frame") == 0)
33232 {
33233 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
33234 flags & SECTION_WRITE ? "aw" : "a");
33235 return;
33236 }
33237
33238 #ifndef USE_GAS
33239 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
33240 {
33241 solaris_elf_asm_comdat_section (name, flags, decl);
33242 return;
33243 }
33244 #endif
33245
33246 default_elf_asm_named_section (name, flags, decl);
33247 }
33248 #endif /* TARGET_SOLARIS */
33249
33250 /* Return the mangling of TYPE if it is an extended fundamental type. */
33251
33252 static const char *
33253 ix86_mangle_type (const_tree type)
33254 {
33255 type = TYPE_MAIN_VARIANT (type);
33256
33257 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
33258 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
33259 return NULL;
33260
33261 switch (TYPE_MODE (type))
33262 {
33263 case TFmode:
33264 /* __float128 is "g". */
33265 return "g";
33266 case XFmode:
33267 /* "long double" or __float80 is "e". */
33268 return "e";
33269 default:
33270 return NULL;
33271 }
33272 }
33273
33274 /* For 32-bit code we can save PIC register setup by using
33275 __stack_chk_fail_local hidden function instead of calling
33276 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
33277 register, so it is better to call __stack_chk_fail directly. */
33278
33279 static tree ATTRIBUTE_UNUSED
33280 ix86_stack_protect_fail (void)
33281 {
33282 return TARGET_64BIT
33283 ? default_external_stack_protect_fail ()
33284 : default_hidden_stack_protect_fail ();
33285 }
33286
33287 /* Select a format to encode pointers in exception handling data. CODE
33288 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
33289 true if the symbol may be affected by dynamic relocations.
33290
33291 ??? All x86 object file formats are capable of representing this.
33292 After all, the relocation needed is the same as for the call insn.
33293 Whether or not a particular assembler allows us to enter such, I
33294 guess we'll have to see. */
33295 int
33296 asm_preferred_eh_data_format (int code, int global)
33297 {
33298 if (flag_pic)
33299 {
33300 int type = DW_EH_PE_sdata8;
33301 if (!TARGET_64BIT
33302 || ix86_cmodel == CM_SMALL_PIC
33303 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
33304 type = DW_EH_PE_sdata4;
33305 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
33306 }
33307 if (ix86_cmodel == CM_SMALL
33308 || (ix86_cmodel == CM_MEDIUM && code))
33309 return DW_EH_PE_udata4;
33310 return DW_EH_PE_absptr;
33311 }
33312 \f
33313 /* Expand copysign from SIGN to the positive value ABS_VALUE
33314 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
33315 the sign-bit. */
33316 static void
33317 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
33318 {
33319 enum machine_mode mode = GET_MODE (sign);
33320 rtx sgn = gen_reg_rtx (mode);
33321 if (mask == NULL_RTX)
33322 {
33323 enum machine_mode vmode;
33324
33325 if (mode == SFmode)
33326 vmode = V4SFmode;
33327 else if (mode == DFmode)
33328 vmode = V2DFmode;
33329 else
33330 vmode = mode;
33331
33332 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
33333 if (!VECTOR_MODE_P (mode))
33334 {
33335 /* We need to generate a scalar mode mask in this case. */
33336 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
33337 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
33338 mask = gen_reg_rtx (mode);
33339 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
33340 }
33341 }
33342 else
33343 mask = gen_rtx_NOT (mode, mask);
33344 emit_insn (gen_rtx_SET (VOIDmode, sgn,
33345 gen_rtx_AND (mode, mask, sign)));
33346 emit_insn (gen_rtx_SET (VOIDmode, result,
33347 gen_rtx_IOR (mode, abs_value, sgn)));
33348 }
33349
33350 /* Expand fabs (OP0) and return a new rtx that holds the result. The
33351 mask for masking out the sign-bit is stored in *SMASK, if that is
33352 non-null. */
33353 static rtx
33354 ix86_expand_sse_fabs (rtx op0, rtx *smask)
33355 {
33356 enum machine_mode vmode, mode = GET_MODE (op0);
33357 rtx xa, mask;
33358
33359 xa = gen_reg_rtx (mode);
33360 if (mode == SFmode)
33361 vmode = V4SFmode;
33362 else if (mode == DFmode)
33363 vmode = V2DFmode;
33364 else
33365 vmode = mode;
33366 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
33367 if (!VECTOR_MODE_P (mode))
33368 {
33369 /* We need to generate a scalar mode mask in this case. */
33370 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
33371 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
33372 mask = gen_reg_rtx (mode);
33373 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
33374 }
33375 emit_insn (gen_rtx_SET (VOIDmode, xa,
33376 gen_rtx_AND (mode, op0, mask)));
33377
33378 if (smask)
33379 *smask = mask;
33380
33381 return xa;
33382 }
33383
33384 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
33385 swapping the operands if SWAP_OPERANDS is true. The expanded
33386 code is a forward jump to a newly created label in case the
33387 comparison is true. The generated label rtx is returned. */
33388 static rtx
33389 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
33390 bool swap_operands)
33391 {
33392 rtx label, tmp;
33393
33394 if (swap_operands)
33395 {
33396 tmp = op0;
33397 op0 = op1;
33398 op1 = tmp;
33399 }
33400
33401 label = gen_label_rtx ();
33402 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
33403 emit_insn (gen_rtx_SET (VOIDmode, tmp,
33404 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
33405 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
33406 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
33407 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
33408 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
33409 JUMP_LABEL (tmp) = label;
33410
33411 return label;
33412 }
33413
33414 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
33415 using comparison code CODE. Operands are swapped for the comparison if
33416 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
33417 static rtx
33418 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
33419 bool swap_operands)
33420 {
33421 rtx (*insn)(rtx, rtx, rtx, rtx);
33422 enum machine_mode mode = GET_MODE (op0);
33423 rtx mask = gen_reg_rtx (mode);
33424
33425 if (swap_operands)
33426 {
33427 rtx tmp = op0;
33428 op0 = op1;
33429 op1 = tmp;
33430 }
33431
33432 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
33433
33434 emit_insn (insn (mask, op0, op1,
33435 gen_rtx_fmt_ee (code, mode, op0, op1)));
33436 return mask;
33437 }
33438
33439 /* Generate and return a rtx of mode MODE for 2**n where n is the number
33440 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
33441 static rtx
33442 ix86_gen_TWO52 (enum machine_mode mode)
33443 {
33444 REAL_VALUE_TYPE TWO52r;
33445 rtx TWO52;
33446
33447 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
33448 TWO52 = const_double_from_real_value (TWO52r, mode);
33449 TWO52 = force_reg (mode, TWO52);
33450
33451 return TWO52;
33452 }
33453
33454 /* Expand SSE sequence for computing lround from OP1 storing
33455 into OP0. */
33456 void
33457 ix86_expand_lround (rtx op0, rtx op1)
33458 {
33459 /* C code for the stuff we're doing below:
33460 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
33461 return (long)tmp;
33462 */
33463 enum machine_mode mode = GET_MODE (op1);
33464 const struct real_format *fmt;
33465 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
33466 rtx adj;
33467
33468 /* load nextafter (0.5, 0.0) */
33469 fmt = REAL_MODE_FORMAT (mode);
33470 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
33471 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
33472
33473 /* adj = copysign (0.5, op1) */
33474 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
33475 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
33476
33477 /* adj = op1 + adj */
33478 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
33479
33480 /* op0 = (imode)adj */
33481 expand_fix (op0, adj, 0);
33482 }
33483
33484 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
33485 into OPERAND0. */
33486 void
33487 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
33488 {
33489 /* C code for the stuff we're doing below (for do_floor):
33490 xi = (long)op1;
33491 xi -= (double)xi > op1 ? 1 : 0;
33492 return xi;
33493 */
33494 enum machine_mode fmode = GET_MODE (op1);
33495 enum machine_mode imode = GET_MODE (op0);
33496 rtx ireg, freg, label, tmp;
33497
33498 /* reg = (long)op1 */
33499 ireg = gen_reg_rtx (imode);
33500 expand_fix (ireg, op1, 0);
33501
33502 /* freg = (double)reg */
33503 freg = gen_reg_rtx (fmode);
33504 expand_float (freg, ireg, 0);
33505
33506 /* ireg = (freg > op1) ? ireg - 1 : ireg */
33507 label = ix86_expand_sse_compare_and_jump (UNLE,
33508 freg, op1, !do_floor);
33509 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
33510 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
33511 emit_move_insn (ireg, tmp);
33512
33513 emit_label (label);
33514 LABEL_NUSES (label) = 1;
33515
33516 emit_move_insn (op0, ireg);
33517 }
33518
33519 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
33520 result in OPERAND0. */
33521 void
33522 ix86_expand_rint (rtx operand0, rtx operand1)
33523 {
33524 /* C code for the stuff we're doing below:
33525 xa = fabs (operand1);
33526 if (!isless (xa, 2**52))
33527 return operand1;
33528 xa = xa + 2**52 - 2**52;
33529 return copysign (xa, operand1);
33530 */
33531 enum machine_mode mode = GET_MODE (operand0);
33532 rtx res, xa, label, TWO52, mask;
33533
33534 res = gen_reg_rtx (mode);
33535 emit_move_insn (res, operand1);
33536
33537 /* xa = abs (operand1) */
33538 xa = ix86_expand_sse_fabs (res, &mask);
33539
33540 /* if (!isless (xa, TWO52)) goto label; */
33541 TWO52 = ix86_gen_TWO52 (mode);
33542 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
33543
33544 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
33545 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
33546
33547 ix86_sse_copysign_to_positive (res, xa, res, mask);
33548
33549 emit_label (label);
33550 LABEL_NUSES (label) = 1;
33551
33552 emit_move_insn (operand0, res);
33553 }
33554
33555 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
33556 into OPERAND0. */
33557 void
33558 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
33559 {
33560 /* C code for the stuff we expand below.
33561 double xa = fabs (x), x2;
33562 if (!isless (xa, TWO52))
33563 return x;
33564 xa = xa + TWO52 - TWO52;
33565 x2 = copysign (xa, x);
33566 Compensate. Floor:
33567 if (x2 > x)
33568 x2 -= 1;
33569 Compensate. Ceil:
33570 if (x2 < x)
33571 x2 -= -1;
33572 return x2;
33573 */
33574 enum machine_mode mode = GET_MODE (operand0);
33575 rtx xa, TWO52, tmp, label, one, res, mask;
33576
33577 TWO52 = ix86_gen_TWO52 (mode);
33578
33579 /* Temporary for holding the result, initialized to the input
33580 operand to ease control flow. */
33581 res = gen_reg_rtx (mode);
33582 emit_move_insn (res, operand1);
33583
33584 /* xa = abs (operand1) */
33585 xa = ix86_expand_sse_fabs (res, &mask);
33586
33587 /* if (!isless (xa, TWO52)) goto label; */
33588 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
33589
33590 /* xa = xa + TWO52 - TWO52; */
33591 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
33592 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
33593
33594 /* xa = copysign (xa, operand1) */
33595 ix86_sse_copysign_to_positive (xa, xa, res, mask);
33596
33597 /* generate 1.0 or -1.0 */
33598 one = force_reg (mode,
33599 const_double_from_real_value (do_floor
33600 ? dconst1 : dconstm1, mode));
33601
33602 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
33603 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
33604 emit_insn (gen_rtx_SET (VOIDmode, tmp,
33605 gen_rtx_AND (mode, one, tmp)));
33606 /* We always need to subtract here to preserve signed zero. */
33607 tmp = expand_simple_binop (mode, MINUS,
33608 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
33609 emit_move_insn (res, tmp);
33610
33611 emit_label (label);
33612 LABEL_NUSES (label) = 1;
33613
33614 emit_move_insn (operand0, res);
33615 }
33616
33617 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
33618 into OPERAND0. */
33619 void
33620 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
33621 {
33622 /* C code for the stuff we expand below.
33623 double xa = fabs (x), x2;
33624 if (!isless (xa, TWO52))
33625 return x;
33626 x2 = (double)(long)x;
33627 Compensate. Floor:
33628 if (x2 > x)
33629 x2 -= 1;
33630 Compensate. Ceil:
33631 if (x2 < x)
33632 x2 += 1;
33633 if (HONOR_SIGNED_ZEROS (mode))
33634 return copysign (x2, x);
33635 return x2;
33636 */
33637 enum machine_mode mode = GET_MODE (operand0);
33638 rtx xa, xi, TWO52, tmp, label, one, res, mask;
33639
33640 TWO52 = ix86_gen_TWO52 (mode);
33641
33642 /* Temporary for holding the result, initialized to the input
33643 operand to ease control flow. */
33644 res = gen_reg_rtx (mode);
33645 emit_move_insn (res, operand1);
33646
33647 /* xa = abs (operand1) */
33648 xa = ix86_expand_sse_fabs (res, &mask);
33649
33650 /* if (!isless (xa, TWO52)) goto label; */
33651 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
33652
33653 /* xa = (double)(long)x */
33654 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
33655 expand_fix (xi, res, 0);
33656 expand_float (xa, xi, 0);
33657
33658 /* generate 1.0 */
33659 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
33660
33661 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
33662 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
33663 emit_insn (gen_rtx_SET (VOIDmode, tmp,
33664 gen_rtx_AND (mode, one, tmp)));
33665 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
33666 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
33667 emit_move_insn (res, tmp);
33668
33669 if (HONOR_SIGNED_ZEROS (mode))
33670 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
33671
33672 emit_label (label);
33673 LABEL_NUSES (label) = 1;
33674
33675 emit_move_insn (operand0, res);
33676 }
33677
33678 /* Expand SSE sequence for computing round from OPERAND1 storing
33679 into OPERAND0. Sequence that works without relying on DImode truncation
33680 via cvttsd2siq that is only available on 64bit targets. */
33681 void
33682 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
33683 {
33684 /* C code for the stuff we expand below.
33685 double xa = fabs (x), xa2, x2;
33686 if (!isless (xa, TWO52))
33687 return x;
33688 Using the absolute value and copying back sign makes
33689 -0.0 -> -0.0 correct.
33690 xa2 = xa + TWO52 - TWO52;
33691 Compensate.
33692 dxa = xa2 - xa;
33693 if (dxa <= -0.5)
33694 xa2 += 1;
33695 else if (dxa > 0.5)
33696 xa2 -= 1;
33697 x2 = copysign (xa2, x);
33698 return x2;
33699 */
33700 enum machine_mode mode = GET_MODE (operand0);
33701 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
33702
33703 TWO52 = ix86_gen_TWO52 (mode);
33704
33705 /* Temporary for holding the result, initialized to the input
33706 operand to ease control flow. */
33707 res = gen_reg_rtx (mode);
33708 emit_move_insn (res, operand1);
33709
33710 /* xa = abs (operand1) */
33711 xa = ix86_expand_sse_fabs (res, &mask);
33712
33713 /* if (!isless (xa, TWO52)) goto label; */
33714 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
33715
33716 /* xa2 = xa + TWO52 - TWO52; */
33717 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
33718 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
33719
33720 /* dxa = xa2 - xa; */
33721 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
33722
33723 /* generate 0.5, 1.0 and -0.5 */
33724 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
33725 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
33726 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
33727 0, OPTAB_DIRECT);
33728
33729 /* Compensate. */
33730 tmp = gen_reg_rtx (mode);
33731 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
33732 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
33733 emit_insn (gen_rtx_SET (VOIDmode, tmp,
33734 gen_rtx_AND (mode, one, tmp)));
33735 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
33736 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
33737 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
33738 emit_insn (gen_rtx_SET (VOIDmode, tmp,
33739 gen_rtx_AND (mode, one, tmp)));
33740 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
33741
33742 /* res = copysign (xa2, operand1) */
33743 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
33744
33745 emit_label (label);
33746 LABEL_NUSES (label) = 1;
33747
33748 emit_move_insn (operand0, res);
33749 }
33750
33751 /* Expand SSE sequence for computing trunc from OPERAND1 storing
33752 into OPERAND0. */
33753 void
33754 ix86_expand_trunc (rtx operand0, rtx operand1)
33755 {
33756 /* C code for SSE variant we expand below.
33757 double xa = fabs (x), x2;
33758 if (!isless (xa, TWO52))
33759 return x;
33760 x2 = (double)(long)x;
33761 if (HONOR_SIGNED_ZEROS (mode))
33762 return copysign (x2, x);
33763 return x2;
33764 */
33765 enum machine_mode mode = GET_MODE (operand0);
33766 rtx xa, xi, TWO52, label, res, mask;
33767
33768 TWO52 = ix86_gen_TWO52 (mode);
33769
33770 /* Temporary for holding the result, initialized to the input
33771 operand to ease control flow. */
33772 res = gen_reg_rtx (mode);
33773 emit_move_insn (res, operand1);
33774
33775 /* xa = abs (operand1) */
33776 xa = ix86_expand_sse_fabs (res, &mask);
33777
33778 /* if (!isless (xa, TWO52)) goto label; */
33779 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
33780
33781 /* x = (double)(long)x */
33782 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
33783 expand_fix (xi, res, 0);
33784 expand_float (res, xi, 0);
33785
33786 if (HONOR_SIGNED_ZEROS (mode))
33787 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
33788
33789 emit_label (label);
33790 LABEL_NUSES (label) = 1;
33791
33792 emit_move_insn (operand0, res);
33793 }
33794
33795 /* Expand SSE sequence for computing trunc from OPERAND1 storing
33796 into OPERAND0. */
33797 void
33798 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
33799 {
33800 enum machine_mode mode = GET_MODE (operand0);
33801 rtx xa, mask, TWO52, label, one, res, smask, tmp;
33802
33803 /* C code for SSE variant we expand below.
33804 double xa = fabs (x), x2;
33805 if (!isless (xa, TWO52))
33806 return x;
33807 xa2 = xa + TWO52 - TWO52;
33808 Compensate:
33809 if (xa2 > xa)
33810 xa2 -= 1.0;
33811 x2 = copysign (xa2, x);
33812 return x2;
33813 */
33814
33815 TWO52 = ix86_gen_TWO52 (mode);
33816
33817 /* Temporary for holding the result, initialized to the input
33818 operand to ease control flow. */
33819 res = gen_reg_rtx (mode);
33820 emit_move_insn (res, operand1);
33821
33822 /* xa = abs (operand1) */
33823 xa = ix86_expand_sse_fabs (res, &smask);
33824
33825 /* if (!isless (xa, TWO52)) goto label; */
33826 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
33827
33828 /* res = xa + TWO52 - TWO52; */
33829 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
33830 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
33831 emit_move_insn (res, tmp);
33832
33833 /* generate 1.0 */
33834 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
33835
33836 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
33837 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
33838 emit_insn (gen_rtx_SET (VOIDmode, mask,
33839 gen_rtx_AND (mode, mask, one)));
33840 tmp = expand_simple_binop (mode, MINUS,
33841 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
33842 emit_move_insn (res, tmp);
33843
33844 /* res = copysign (res, operand1) */
33845 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
33846
33847 emit_label (label);
33848 LABEL_NUSES (label) = 1;
33849
33850 emit_move_insn (operand0, res);
33851 }
33852
33853 /* Expand SSE sequence for computing round from OPERAND1 storing
33854 into OPERAND0. */
33855 void
33856 ix86_expand_round (rtx operand0, rtx operand1)
33857 {
33858 /* C code for the stuff we're doing below:
33859 double xa = fabs (x);
33860 if (!isless (xa, TWO52))
33861 return x;
33862 xa = (double)(long)(xa + nextafter (0.5, 0.0));
33863 return copysign (xa, x);
33864 */
33865 enum machine_mode mode = GET_MODE (operand0);
33866 rtx res, TWO52, xa, label, xi, half, mask;
33867 const struct real_format *fmt;
33868 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
33869
33870 /* Temporary for holding the result, initialized to the input
33871 operand to ease control flow. */
33872 res = gen_reg_rtx (mode);
33873 emit_move_insn (res, operand1);
33874
33875 TWO52 = ix86_gen_TWO52 (mode);
33876 xa = ix86_expand_sse_fabs (res, &mask);
33877 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
33878
33879 /* load nextafter (0.5, 0.0) */
33880 fmt = REAL_MODE_FORMAT (mode);
33881 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
33882 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
33883
33884 /* xa = xa + 0.5 */
33885 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
33886 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
33887
33888 /* xa = (double)(int64_t)xa */
33889 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
33890 expand_fix (xi, xa, 0);
33891 expand_float (xa, xi, 0);
33892
33893 /* res = copysign (xa, operand1) */
33894 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
33895
33896 emit_label (label);
33897 LABEL_NUSES (label) = 1;
33898
33899 emit_move_insn (operand0, res);
33900 }
33901
33902 /* Expand SSE sequence for computing round
33903 from OP1 storing into OP0 using sse4 round insn. */
33904 void
33905 ix86_expand_round_sse4 (rtx op0, rtx op1)
33906 {
33907 enum machine_mode mode = GET_MODE (op0);
33908 rtx e1, e2, res, half;
33909 const struct real_format *fmt;
33910 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
33911 rtx (*gen_copysign) (rtx, rtx, rtx);
33912 rtx (*gen_round) (rtx, rtx, rtx);
33913
33914 switch (mode)
33915 {
33916 case SFmode:
33917 gen_copysign = gen_copysignsf3;
33918 gen_round = gen_sse4_1_roundsf2;
33919 break;
33920 case DFmode:
33921 gen_copysign = gen_copysigndf3;
33922 gen_round = gen_sse4_1_rounddf2;
33923 break;
33924 default:
33925 gcc_unreachable ();
33926 }
33927
33928 /* round (a) = trunc (a + copysign (0.5, a)) */
33929
33930 /* load nextafter (0.5, 0.0) */
33931 fmt = REAL_MODE_FORMAT (mode);
33932 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
33933 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
33934 half = const_double_from_real_value (pred_half, mode);
33935
33936 /* e1 = copysign (0.5, op1) */
33937 e1 = gen_reg_rtx (mode);
33938 emit_insn (gen_copysign (e1, half, op1));
33939
33940 /* e2 = op1 + e1 */
33941 e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
33942
33943 /* res = trunc (e2) */
33944 res = gen_reg_rtx (mode);
33945 emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
33946
33947 emit_move_insn (op0, res);
33948 }
33949 \f
33950
33951 /* Table of valid machine attributes. */
33952 static const struct attribute_spec ix86_attribute_table[] =
33953 {
33954 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
33955 affects_type_identity } */
33956 /* Stdcall attribute says callee is responsible for popping arguments
33957 if they are not variable. */
33958 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
33959 true },
33960 /* Fastcall attribute says callee is responsible for popping arguments
33961 if they are not variable. */
33962 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
33963 true },
33964 /* Thiscall attribute says callee is responsible for popping arguments
33965 if they are not variable. */
33966 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
33967 true },
33968 /* Cdecl attribute says the callee is a normal C declaration */
33969 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
33970 true },
33971 /* Regparm attribute specifies how many integer arguments are to be
33972 passed in registers. */
33973 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
33974 true },
33975 /* Sseregparm attribute says we are using x86_64 calling conventions
33976 for FP arguments. */
33977 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
33978 true },
33979 /* force_align_arg_pointer says this function realigns the stack at entry. */
33980 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
33981 false, true, true, ix86_handle_cconv_attribute, false },
33982 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
33983 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
33984 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
33985 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
33986 false },
33987 #endif
33988 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
33989 false },
33990 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
33991 false },
33992 #ifdef SUBTARGET_ATTRIBUTE_TABLE
33993 SUBTARGET_ATTRIBUTE_TABLE,
33994 #endif
33995 /* ms_abi and sysv_abi calling convention function attributes. */
33996 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
33997 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
33998 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
33999 false },
34000 { "callee_pop_aggregate_return", 1, 1, false, true, true,
34001 ix86_handle_callee_pop_aggregate_return, true },
34002 /* End element. */
34003 { NULL, 0, 0, false, false, false, NULL, false }
34004 };
34005
34006 /* Implement targetm.vectorize.builtin_vectorization_cost. */
34007 static int
34008 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
34009 tree vectype ATTRIBUTE_UNUSED,
34010 int misalign ATTRIBUTE_UNUSED)
34011 {
34012 switch (type_of_cost)
34013 {
34014 case scalar_stmt:
34015 return ix86_cost->scalar_stmt_cost;
34016
34017 case scalar_load:
34018 return ix86_cost->scalar_load_cost;
34019
34020 case scalar_store:
34021 return ix86_cost->scalar_store_cost;
34022
34023 case vector_stmt:
34024 return ix86_cost->vec_stmt_cost;
34025
34026 case vector_load:
34027 return ix86_cost->vec_align_load_cost;
34028
34029 case vector_store:
34030 return ix86_cost->vec_store_cost;
34031
34032 case vec_to_scalar:
34033 return ix86_cost->vec_to_scalar_cost;
34034
34035 case scalar_to_vec:
34036 return ix86_cost->scalar_to_vec_cost;
34037
34038 case unaligned_load:
34039 case unaligned_store:
34040 return ix86_cost->vec_unalign_load_cost;
34041
34042 case cond_branch_taken:
34043 return ix86_cost->cond_taken_branch_cost;
34044
34045 case cond_branch_not_taken:
34046 return ix86_cost->cond_not_taken_branch_cost;
34047
34048 case vec_perm:
34049 return 1;
34050
34051 default:
34052 gcc_unreachable ();
34053 }
34054 }
34055
34056
34057 /* Implement targetm.vectorize.builtin_vec_perm. */
34058
34059 static tree
34060 ix86_vectorize_builtin_vec_perm (tree vec_type, tree *mask_type)
34061 {
34062 tree itype = TREE_TYPE (vec_type);
34063 bool u = TYPE_UNSIGNED (itype);
34064 enum machine_mode vmode = TYPE_MODE (vec_type);
34065 enum ix86_builtins fcode;
34066 bool ok = TARGET_SSE2;
34067
34068 switch (vmode)
34069 {
34070 case V4DFmode:
34071 ok = TARGET_AVX;
34072 fcode = IX86_BUILTIN_VEC_PERM_V4DF;
34073 goto get_di;
34074 case V2DFmode:
34075 fcode = IX86_BUILTIN_VEC_PERM_V2DF;
34076 get_di:
34077 itype = ix86_get_builtin_type (IX86_BT_DI);
34078 break;
34079
34080 case V8SFmode:
34081 ok = TARGET_AVX;
34082 fcode = IX86_BUILTIN_VEC_PERM_V8SF;
34083 goto get_si;
34084 case V4SFmode:
34085 ok = TARGET_SSE;
34086 fcode = IX86_BUILTIN_VEC_PERM_V4SF;
34087 get_si:
34088 itype = ix86_get_builtin_type (IX86_BT_SI);
34089 break;
34090
34091 case V2DImode:
34092 fcode = u ? IX86_BUILTIN_VEC_PERM_V2DI_U : IX86_BUILTIN_VEC_PERM_V2DI;
34093 break;
34094 case V4SImode:
34095 fcode = u ? IX86_BUILTIN_VEC_PERM_V4SI_U : IX86_BUILTIN_VEC_PERM_V4SI;
34096 break;
34097 case V8HImode:
34098 fcode = u ? IX86_BUILTIN_VEC_PERM_V8HI_U : IX86_BUILTIN_VEC_PERM_V8HI;
34099 break;
34100 case V16QImode:
34101 fcode = u ? IX86_BUILTIN_VEC_PERM_V16QI_U : IX86_BUILTIN_VEC_PERM_V16QI;
34102 break;
34103 default:
34104 ok = false;
34105 break;
34106 }
34107
34108 if (!ok)
34109 return NULL_TREE;
34110
34111 *mask_type = itype;
34112 return ix86_builtins[(int) fcode];
34113 }
34114
34115 /* Return a vector mode with twice as many elements as VMODE. */
34116 /* ??? Consider moving this to a table generated by genmodes.c. */
34117
34118 static enum machine_mode
34119 doublesize_vector_mode (enum machine_mode vmode)
34120 {
34121 switch (vmode)
34122 {
34123 case V2SFmode: return V4SFmode;
34124 case V1DImode: return V2DImode;
34125 case V2SImode: return V4SImode;
34126 case V4HImode: return V8HImode;
34127 case V8QImode: return V16QImode;
34128
34129 case V2DFmode: return V4DFmode;
34130 case V4SFmode: return V8SFmode;
34131 case V2DImode: return V4DImode;
34132 case V4SImode: return V8SImode;
34133 case V8HImode: return V16HImode;
34134 case V16QImode: return V32QImode;
34135
34136 case V4DFmode: return V8DFmode;
34137 case V8SFmode: return V16SFmode;
34138 case V4DImode: return V8DImode;
34139 case V8SImode: return V16SImode;
34140 case V16HImode: return V32HImode;
34141 case V32QImode: return V64QImode;
34142
34143 default:
34144 gcc_unreachable ();
34145 }
34146 }
34147
34148 /* Construct (set target (vec_select op0 (parallel perm))) and
34149 return true if that's a valid instruction in the active ISA. */
34150
34151 static bool
34152 expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
34153 {
34154 rtx rperm[MAX_VECT_LEN], x;
34155 unsigned i;
34156
34157 for (i = 0; i < nelt; ++i)
34158 rperm[i] = GEN_INT (perm[i]);
34159
34160 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
34161 x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
34162 x = gen_rtx_SET (VOIDmode, target, x);
34163
34164 x = emit_insn (x);
34165 if (recog_memoized (x) < 0)
34166 {
34167 remove_insn (x);
34168 return false;
34169 }
34170 return true;
34171 }
34172
34173 /* Similar, but generate a vec_concat from op0 and op1 as well. */
34174
34175 static bool
34176 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
34177 const unsigned char *perm, unsigned nelt)
34178 {
34179 enum machine_mode v2mode;
34180 rtx x;
34181
34182 v2mode = doublesize_vector_mode (GET_MODE (op0));
34183 x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
34184 return expand_vselect (target, x, perm, nelt);
34185 }
34186
34187 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
34188 in terms of blendp[sd] / pblendw / pblendvb. */
34189
34190 static bool
34191 expand_vec_perm_blend (struct expand_vec_perm_d *d)
34192 {
34193 enum machine_mode vmode = d->vmode;
34194 unsigned i, mask, nelt = d->nelt;
34195 rtx target, op0, op1, x;
34196
34197 if (!TARGET_SSE4_1 || d->op0 == d->op1)
34198 return false;
34199 if (!(GET_MODE_SIZE (vmode) == 16 || vmode == V4DFmode || vmode == V8SFmode))
34200 return false;
34201
34202 /* This is a blend, not a permute. Elements must stay in their
34203 respective lanes. */
34204 for (i = 0; i < nelt; ++i)
34205 {
34206 unsigned e = d->perm[i];
34207 if (!(e == i || e == i + nelt))
34208 return false;
34209 }
34210
34211 if (d->testing_p)
34212 return true;
34213
34214 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
34215 decision should be extracted elsewhere, so that we only try that
34216 sequence once all budget==3 options have been tried. */
34217
34218 /* For bytes, see if bytes move in pairs so we can use pblendw with
34219 an immediate argument, rather than pblendvb with a vector argument. */
34220 if (vmode == V16QImode)
34221 {
34222 bool pblendw_ok = true;
34223 for (i = 0; i < 16 && pblendw_ok; i += 2)
34224 pblendw_ok = (d->perm[i] + 1 == d->perm[i + 1]);
34225
34226 if (!pblendw_ok)
34227 {
34228 rtx rperm[16], vperm;
34229
34230 for (i = 0; i < nelt; ++i)
34231 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
34232
34233 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
34234 vperm = force_reg (V16QImode, vperm);
34235
34236 emit_insn (gen_sse4_1_pblendvb (d->target, d->op0, d->op1, vperm));
34237 return true;
34238 }
34239 }
34240
34241 target = d->target;
34242 op0 = d->op0;
34243 op1 = d->op1;
34244 mask = 0;
34245
34246 switch (vmode)
34247 {
34248 case V4DFmode:
34249 case V8SFmode:
34250 case V2DFmode:
34251 case V4SFmode:
34252 case V8HImode:
34253 for (i = 0; i < nelt; ++i)
34254 mask |= (d->perm[i] >= nelt) << i;
34255 break;
34256
34257 case V2DImode:
34258 for (i = 0; i < 2; ++i)
34259 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
34260 goto do_subreg;
34261
34262 case V4SImode:
34263 for (i = 0; i < 4; ++i)
34264 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
34265 goto do_subreg;
34266
34267 case V16QImode:
34268 for (i = 0; i < 8; ++i)
34269 mask |= (d->perm[i * 2] >= 16) << i;
34270
34271 do_subreg:
34272 vmode = V8HImode;
34273 target = gen_lowpart (vmode, target);
34274 op0 = gen_lowpart (vmode, op0);
34275 op1 = gen_lowpart (vmode, op1);
34276 break;
34277
34278 default:
34279 gcc_unreachable ();
34280 }
34281
34282 /* This matches five different patterns with the different modes. */
34283 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
34284 x = gen_rtx_SET (VOIDmode, target, x);
34285 emit_insn (x);
34286
34287 return true;
34288 }
34289
34290 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
34291 in terms of the variable form of vpermilps.
34292
34293 Note that we will have already failed the immediate input vpermilps,
34294 which requires that the high and low part shuffle be identical; the
34295 variable form doesn't require that. */
34296
34297 static bool
34298 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
34299 {
34300 rtx rperm[8], vperm;
34301 unsigned i;
34302
34303 if (!TARGET_AVX || d->vmode != V8SFmode || d->op0 != d->op1)
34304 return false;
34305
34306 /* We can only permute within the 128-bit lane. */
34307 for (i = 0; i < 8; ++i)
34308 {
34309 unsigned e = d->perm[i];
34310 if (i < 4 ? e >= 4 : e < 4)
34311 return false;
34312 }
34313
34314 if (d->testing_p)
34315 return true;
34316
34317 for (i = 0; i < 8; ++i)
34318 {
34319 unsigned e = d->perm[i];
34320
34321 /* Within each 128-bit lane, the elements of op0 are numbered
34322 from 0 and the elements of op1 are numbered from 4. */
34323 if (e >= 8 + 4)
34324 e -= 8;
34325 else if (e >= 4)
34326 e -= 4;
34327
34328 rperm[i] = GEN_INT (e);
34329 }
34330
34331 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
34332 vperm = force_reg (V8SImode, vperm);
34333 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
34334
34335 return true;
34336 }
34337
34338 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
34339 in terms of pshufb or vpperm. */
34340
34341 static bool
34342 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
34343 {
34344 unsigned i, nelt, eltsz;
34345 rtx rperm[16], vperm, target, op0, op1;
34346
34347 if (!(d->op0 == d->op1 ? TARGET_SSSE3 : TARGET_XOP))
34348 return false;
34349 if (GET_MODE_SIZE (d->vmode) != 16)
34350 return false;
34351
34352 if (d->testing_p)
34353 return true;
34354
34355 nelt = d->nelt;
34356 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
34357
34358 for (i = 0; i < nelt; ++i)
34359 {
34360 unsigned j, e = d->perm[i];
34361 for (j = 0; j < eltsz; ++j)
34362 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
34363 }
34364
34365 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
34366 vperm = force_reg (V16QImode, vperm);
34367
34368 target = gen_lowpart (V16QImode, d->target);
34369 op0 = gen_lowpart (V16QImode, d->op0);
34370 if (d->op0 == d->op1)
34371 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
34372 else
34373 {
34374 op1 = gen_lowpart (V16QImode, d->op1);
34375 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
34376 }
34377
34378 return true;
34379 }
34380
34381 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
34382 in a single instruction. */
34383
34384 static bool
34385 expand_vec_perm_1 (struct expand_vec_perm_d *d)
34386 {
34387 unsigned i, nelt = d->nelt;
34388 unsigned char perm2[MAX_VECT_LEN];
34389
34390 /* Check plain VEC_SELECT first, because AVX has instructions that could
34391 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
34392 input where SEL+CONCAT may not. */
34393 if (d->op0 == d->op1)
34394 {
34395 int mask = nelt - 1;
34396
34397 for (i = 0; i < nelt; i++)
34398 perm2[i] = d->perm[i] & mask;
34399
34400 if (expand_vselect (d->target, d->op0, perm2, nelt))
34401 return true;
34402
34403 /* There are plenty of patterns in sse.md that are written for
34404 SEL+CONCAT and are not replicated for a single op. Perhaps
34405 that should be changed, to avoid the nastiness here. */
34406
34407 /* Recognize interleave style patterns, which means incrementing
34408 every other permutation operand. */
34409 for (i = 0; i < nelt; i += 2)
34410 {
34411 perm2[i] = d->perm[i] & mask;
34412 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
34413 }
34414 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
34415 return true;
34416
34417 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
34418 if (nelt >= 4)
34419 {
34420 for (i = 0; i < nelt; i += 4)
34421 {
34422 perm2[i + 0] = d->perm[i + 0] & mask;
34423 perm2[i + 1] = d->perm[i + 1] & mask;
34424 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
34425 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
34426 }
34427
34428 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
34429 return true;
34430 }
34431 }
34432
34433 /* Finally, try the fully general two operand permute. */
34434 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt))
34435 return true;
34436
34437 /* Recognize interleave style patterns with reversed operands. */
34438 if (d->op0 != d->op1)
34439 {
34440 for (i = 0; i < nelt; ++i)
34441 {
34442 unsigned e = d->perm[i];
34443 if (e >= nelt)
34444 e -= nelt;
34445 else
34446 e += nelt;
34447 perm2[i] = e;
34448 }
34449
34450 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
34451 return true;
34452 }
34453
34454 /* Try the SSE4.1 blend variable merge instructions. */
34455 if (expand_vec_perm_blend (d))
34456 return true;
34457
34458 /* Try one of the AVX vpermil variable permutations. */
34459 if (expand_vec_perm_vpermil (d))
34460 return true;
34461
34462 /* Try the SSSE3 pshufb or XOP vpperm variable permutation. */
34463 if (expand_vec_perm_pshufb (d))
34464 return true;
34465
34466 return false;
34467 }
34468
34469 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
34470 in terms of a pair of pshuflw + pshufhw instructions. */
34471
34472 static bool
34473 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
34474 {
34475 unsigned char perm2[MAX_VECT_LEN];
34476 unsigned i;
34477 bool ok;
34478
34479 if (d->vmode != V8HImode || d->op0 != d->op1)
34480 return false;
34481
34482 /* The two permutations only operate in 64-bit lanes. */
34483 for (i = 0; i < 4; ++i)
34484 if (d->perm[i] >= 4)
34485 return false;
34486 for (i = 4; i < 8; ++i)
34487 if (d->perm[i] < 4)
34488 return false;
34489
34490 if (d->testing_p)
34491 return true;
34492
34493 /* Emit the pshuflw. */
34494 memcpy (perm2, d->perm, 4);
34495 for (i = 4; i < 8; ++i)
34496 perm2[i] = i;
34497 ok = expand_vselect (d->target, d->op0, perm2, 8);
34498 gcc_assert (ok);
34499
34500 /* Emit the pshufhw. */
34501 memcpy (perm2 + 4, d->perm + 4, 4);
34502 for (i = 0; i < 4; ++i)
34503 perm2[i] = i;
34504 ok = expand_vselect (d->target, d->target, perm2, 8);
34505 gcc_assert (ok);
34506
34507 return true;
34508 }
34509
34510 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
34511 the permutation using the SSSE3 palignr instruction. This succeeds
34512 when all of the elements in PERM fit within one vector and we merely
34513 need to shift them down so that a single vector permutation has a
34514 chance to succeed. */
34515
34516 static bool
34517 expand_vec_perm_palignr (struct expand_vec_perm_d *d)
34518 {
34519 unsigned i, nelt = d->nelt;
34520 unsigned min, max;
34521 bool in_order, ok;
34522 rtx shift;
34523
34524 /* Even with AVX, palignr only operates on 128-bit vectors. */
34525 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
34526 return false;
34527
34528 min = nelt, max = 0;
34529 for (i = 0; i < nelt; ++i)
34530 {
34531 unsigned e = d->perm[i];
34532 if (e < min)
34533 min = e;
34534 if (e > max)
34535 max = e;
34536 }
34537 if (min == 0 || max - min >= nelt)
34538 return false;
34539
34540 /* Given that we have SSSE3, we know we'll be able to implement the
34541 single operand permutation after the palignr with pshufb. */
34542 if (d->testing_p)
34543 return true;
34544
34545 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
34546 emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode, d->target),
34547 gen_lowpart (TImode, d->op1),
34548 gen_lowpart (TImode, d->op0), shift));
34549
34550 d->op0 = d->op1 = d->target;
34551
34552 in_order = true;
34553 for (i = 0; i < nelt; ++i)
34554 {
34555 unsigned e = d->perm[i] - min;
34556 if (e != i)
34557 in_order = false;
34558 d->perm[i] = e;
34559 }
34560
34561 /* Test for the degenerate case where the alignment by itself
34562 produces the desired permutation. */
34563 if (in_order)
34564 return true;
34565
34566 ok = expand_vec_perm_1 (d);
34567 gcc_assert (ok);
34568
34569 return ok;
34570 }
34571
34572 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
34573 a two vector permutation into a single vector permutation by using
34574 an interleave operation to merge the vectors. */
34575
34576 static bool
34577 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
34578 {
34579 struct expand_vec_perm_d dremap, dfinal;
34580 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
34581 unsigned contents, h1, h2, h3, h4;
34582 unsigned char remap[2 * MAX_VECT_LEN];
34583 rtx seq;
34584 bool ok;
34585
34586 if (d->op0 == d->op1)
34587 return false;
34588
34589 /* The 256-bit unpck[lh]p[sd] instructions only operate within the 128-bit
34590 lanes. We can use similar techniques with the vperm2f128 instruction,
34591 but it requires slightly different logic. */
34592 if (GET_MODE_SIZE (d->vmode) != 16)
34593 return false;
34594
34595 /* Examine from whence the elements come. */
34596 contents = 0;
34597 for (i = 0; i < nelt; ++i)
34598 contents |= 1u << d->perm[i];
34599
34600 /* Split the two input vectors into 4 halves. */
34601 h1 = (1u << nelt2) - 1;
34602 h2 = h1 << nelt2;
34603 h3 = h2 << nelt2;
34604 h4 = h3 << nelt2;
34605
34606 memset (remap, 0xff, sizeof (remap));
34607 dremap = *d;
34608
34609 /* If the elements from the low halves use interleave low, and similarly
34610 for interleave high. If the elements are from mis-matched halves, we
34611 can use shufps for V4SF/V4SI or do a DImode shuffle. */
34612 if ((contents & (h1 | h3)) == contents)
34613 {
34614 for (i = 0; i < nelt2; ++i)
34615 {
34616 remap[i] = i * 2;
34617 remap[i + nelt] = i * 2 + 1;
34618 dremap.perm[i * 2] = i;
34619 dremap.perm[i * 2 + 1] = i + nelt;
34620 }
34621 }
34622 else if ((contents & (h2 | h4)) == contents)
34623 {
34624 for (i = 0; i < nelt2; ++i)
34625 {
34626 remap[i + nelt2] = i * 2;
34627 remap[i + nelt + nelt2] = i * 2 + 1;
34628 dremap.perm[i * 2] = i + nelt2;
34629 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
34630 }
34631 }
34632 else if ((contents & (h1 | h4)) == contents)
34633 {
34634 for (i = 0; i < nelt2; ++i)
34635 {
34636 remap[i] = i;
34637 remap[i + nelt + nelt2] = i + nelt2;
34638 dremap.perm[i] = i;
34639 dremap.perm[i + nelt2] = i + nelt + nelt2;
34640 }
34641 if (nelt != 4)
34642 {
34643 dremap.vmode = V2DImode;
34644 dremap.nelt = 2;
34645 dremap.perm[0] = 0;
34646 dremap.perm[1] = 3;
34647 }
34648 }
34649 else if ((contents & (h2 | h3)) == contents)
34650 {
34651 for (i = 0; i < nelt2; ++i)
34652 {
34653 remap[i + nelt2] = i;
34654 remap[i + nelt] = i + nelt2;
34655 dremap.perm[i] = i + nelt2;
34656 dremap.perm[i + nelt2] = i + nelt;
34657 }
34658 if (nelt != 4)
34659 {
34660 dremap.vmode = V2DImode;
34661 dremap.nelt = 2;
34662 dremap.perm[0] = 1;
34663 dremap.perm[1] = 2;
34664 }
34665 }
34666 else
34667 return false;
34668
34669 /* Use the remapping array set up above to move the elements from their
34670 swizzled locations into their final destinations. */
34671 dfinal = *d;
34672 for (i = 0; i < nelt; ++i)
34673 {
34674 unsigned e = remap[d->perm[i]];
34675 gcc_assert (e < nelt);
34676 dfinal.perm[i] = e;
34677 }
34678 dfinal.op0 = gen_reg_rtx (dfinal.vmode);
34679 dfinal.op1 = dfinal.op0;
34680 dremap.target = dfinal.op0;
34681
34682 /* Test if the final remap can be done with a single insn. For V4SFmode or
34683 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
34684 start_sequence ();
34685 ok = expand_vec_perm_1 (&dfinal);
34686 seq = get_insns ();
34687 end_sequence ();
34688
34689 if (!ok)
34690 return false;
34691
34692 if (dremap.vmode != dfinal.vmode)
34693 {
34694 dremap.target = gen_lowpart (dremap.vmode, dremap.target);
34695 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
34696 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
34697 }
34698
34699 ok = expand_vec_perm_1 (&dremap);
34700 gcc_assert (ok);
34701
34702 emit_insn (seq);
34703 return true;
34704 }
34705
34706 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
34707 permutation with two pshufb insns and an ior. We should have already
34708 failed all two instruction sequences. */
34709
34710 static bool
34711 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
34712 {
34713 rtx rperm[2][16], vperm, l, h, op, m128;
34714 unsigned int i, nelt, eltsz;
34715
34716 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
34717 return false;
34718 gcc_assert (d->op0 != d->op1);
34719
34720 nelt = d->nelt;
34721 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
34722
34723 /* Generate two permutation masks. If the required element is within
34724 the given vector it is shuffled into the proper lane. If the required
34725 element is in the other vector, force a zero into the lane by setting
34726 bit 7 in the permutation mask. */
34727 m128 = GEN_INT (-128);
34728 for (i = 0; i < nelt; ++i)
34729 {
34730 unsigned j, e = d->perm[i];
34731 unsigned which = (e >= nelt);
34732 if (e >= nelt)
34733 e -= nelt;
34734
34735 for (j = 0; j < eltsz; ++j)
34736 {
34737 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
34738 rperm[1-which][i*eltsz + j] = m128;
34739 }
34740 }
34741
34742 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
34743 vperm = force_reg (V16QImode, vperm);
34744
34745 l = gen_reg_rtx (V16QImode);
34746 op = gen_lowpart (V16QImode, d->op0);
34747 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
34748
34749 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
34750 vperm = force_reg (V16QImode, vperm);
34751
34752 h = gen_reg_rtx (V16QImode);
34753 op = gen_lowpart (V16QImode, d->op1);
34754 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
34755
34756 op = gen_lowpart (V16QImode, d->target);
34757 emit_insn (gen_iorv16qi3 (op, l, h));
34758
34759 return true;
34760 }
34761
34762 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
34763 and extract-odd permutations. */
34764
34765 static bool
34766 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
34767 {
34768 rtx t1, t2, t3;
34769
34770 switch (d->vmode)
34771 {
34772 case V4DFmode:
34773 t1 = gen_reg_rtx (V4DFmode);
34774 t2 = gen_reg_rtx (V4DFmode);
34775
34776 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
34777 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
34778 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
34779
34780 /* Now an unpck[lh]pd will produce the result required. */
34781 if (odd)
34782 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
34783 else
34784 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
34785 emit_insn (t3);
34786 break;
34787
34788 case V8SFmode:
34789 {
34790 int mask = odd ? 0xdd : 0x88;
34791
34792 t1 = gen_reg_rtx (V8SFmode);
34793 t2 = gen_reg_rtx (V8SFmode);
34794 t3 = gen_reg_rtx (V8SFmode);
34795
34796 /* Shuffle within the 128-bit lanes to produce:
34797 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
34798 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
34799 GEN_INT (mask)));
34800
34801 /* Shuffle the lanes around to produce:
34802 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
34803 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
34804 GEN_INT (0x3)));
34805
34806 /* Shuffle within the 128-bit lanes to produce:
34807 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
34808 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
34809
34810 /* Shuffle within the 128-bit lanes to produce:
34811 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
34812 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
34813
34814 /* Shuffle the lanes around to produce:
34815 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
34816 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
34817 GEN_INT (0x20)));
34818 }
34819 break;
34820
34821 case V2DFmode:
34822 case V4SFmode:
34823 case V2DImode:
34824 case V4SImode:
34825 /* These are always directly implementable by expand_vec_perm_1. */
34826 gcc_unreachable ();
34827
34828 case V8HImode:
34829 if (TARGET_SSSE3)
34830 return expand_vec_perm_pshufb2 (d);
34831 else
34832 {
34833 /* We need 2*log2(N)-1 operations to achieve odd/even
34834 with interleave. */
34835 t1 = gen_reg_rtx (V8HImode);
34836 t2 = gen_reg_rtx (V8HImode);
34837 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
34838 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
34839 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
34840 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
34841 if (odd)
34842 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
34843 else
34844 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
34845 emit_insn (t3);
34846 }
34847 break;
34848
34849 case V16QImode:
34850 if (TARGET_SSSE3)
34851 return expand_vec_perm_pshufb2 (d);
34852 else
34853 {
34854 t1 = gen_reg_rtx (V16QImode);
34855 t2 = gen_reg_rtx (V16QImode);
34856 t3 = gen_reg_rtx (V16QImode);
34857 emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
34858 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
34859 emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
34860 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
34861 emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
34862 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
34863 if (odd)
34864 t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
34865 else
34866 t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
34867 emit_insn (t3);
34868 }
34869 break;
34870
34871 default:
34872 gcc_unreachable ();
34873 }
34874
34875 return true;
34876 }
34877
34878 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
34879 extract-even and extract-odd permutations. */
34880
34881 static bool
34882 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
34883 {
34884 unsigned i, odd, nelt = d->nelt;
34885
34886 odd = d->perm[0];
34887 if (odd != 0 && odd != 1)
34888 return false;
34889
34890 for (i = 1; i < nelt; ++i)
34891 if (d->perm[i] != 2 * i + odd)
34892 return false;
34893
34894 return expand_vec_perm_even_odd_1 (d, odd);
34895 }
34896
34897 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
34898 permutations. We assume that expand_vec_perm_1 has already failed. */
34899
34900 static bool
34901 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
34902 {
34903 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
34904 enum machine_mode vmode = d->vmode;
34905 unsigned char perm2[4];
34906 rtx op0 = d->op0;
34907 bool ok;
34908
34909 switch (vmode)
34910 {
34911 case V4DFmode:
34912 case V8SFmode:
34913 /* These are special-cased in sse.md so that we can optionally
34914 use the vbroadcast instruction. They expand to two insns
34915 if the input happens to be in a register. */
34916 gcc_unreachable ();
34917
34918 case V2DFmode:
34919 case V2DImode:
34920 case V4SFmode:
34921 case V4SImode:
34922 /* These are always implementable using standard shuffle patterns. */
34923 gcc_unreachable ();
34924
34925 case V8HImode:
34926 case V16QImode:
34927 /* These can be implemented via interleave. We save one insn by
34928 stopping once we have promoted to V4SImode and then use pshufd. */
34929 do
34930 {
34931 optab otab = vec_interleave_low_optab;
34932
34933 if (elt >= nelt2)
34934 {
34935 otab = vec_interleave_high_optab;
34936 elt -= nelt2;
34937 }
34938 nelt2 /= 2;
34939
34940 op0 = expand_binop (vmode, otab, op0, op0, NULL, 0, OPTAB_DIRECT);
34941 vmode = get_mode_wider_vector (vmode);
34942 op0 = gen_lowpart (vmode, op0);
34943 }
34944 while (vmode != V4SImode);
34945
34946 memset (perm2, elt, 4);
34947 ok = expand_vselect (gen_lowpart (V4SImode, d->target), op0, perm2, 4);
34948 gcc_assert (ok);
34949 return true;
34950
34951 default:
34952 gcc_unreachable ();
34953 }
34954 }
34955
34956 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
34957 broadcast permutations. */
34958
34959 static bool
34960 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
34961 {
34962 unsigned i, elt, nelt = d->nelt;
34963
34964 if (d->op0 != d->op1)
34965 return false;
34966
34967 elt = d->perm[0];
34968 for (i = 1; i < nelt; ++i)
34969 if (d->perm[i] != elt)
34970 return false;
34971
34972 return expand_vec_perm_broadcast_1 (d);
34973 }
34974
34975 /* The guts of ix86_expand_vec_perm_builtin, also used by the ok hook.
34976 With all of the interface bits taken care of, perform the expansion
34977 in D and return true on success. */
34978
34979 static bool
34980 ix86_expand_vec_perm_builtin_1 (struct expand_vec_perm_d *d)
34981 {
34982 /* Try a single instruction expansion. */
34983 if (expand_vec_perm_1 (d))
34984 return true;
34985
34986 /* Try sequences of two instructions. */
34987
34988 if (expand_vec_perm_pshuflw_pshufhw (d))
34989 return true;
34990
34991 if (expand_vec_perm_palignr (d))
34992 return true;
34993
34994 if (expand_vec_perm_interleave2 (d))
34995 return true;
34996
34997 if (expand_vec_perm_broadcast (d))
34998 return true;
34999
35000 /* Try sequences of three instructions. */
35001
35002 if (expand_vec_perm_pshufb2 (d))
35003 return true;
35004
35005 /* ??? Look for narrow permutations whose element orderings would
35006 allow the promotion to a wider mode. */
35007
35008 /* ??? Look for sequences of interleave or a wider permute that place
35009 the data into the correct lanes for a half-vector shuffle like
35010 pshuf[lh]w or vpermilps. */
35011
35012 /* ??? Look for sequences of interleave that produce the desired results.
35013 The combinatorics of punpck[lh] get pretty ugly... */
35014
35015 if (expand_vec_perm_even_odd (d))
35016 return true;
35017
35018 return false;
35019 }
35020
35021 /* Extract the values from the vector CST into the permutation array in D.
35022 Return 0 on error, 1 if all values from the permutation come from the
35023 first vector, 2 if all values from the second vector, and 3 otherwise. */
35024
35025 static int
35026 extract_vec_perm_cst (struct expand_vec_perm_d *d, tree cst)
35027 {
35028 tree list = TREE_VECTOR_CST_ELTS (cst);
35029 unsigned i, nelt = d->nelt;
35030 int ret = 0;
35031
35032 for (i = 0; i < nelt; ++i, list = TREE_CHAIN (list))
35033 {
35034 unsigned HOST_WIDE_INT e;
35035
35036 if (!host_integerp (TREE_VALUE (list), 1))
35037 return 0;
35038 e = tree_low_cst (TREE_VALUE (list), 1);
35039 if (e >= 2 * nelt)
35040 return 0;
35041
35042 ret |= (e < nelt ? 1 : 2);
35043 d->perm[i] = e;
35044 }
35045 gcc_assert (list == NULL);
35046
35047 /* For all elements from second vector, fold the elements to first. */
35048 if (ret == 2)
35049 for (i = 0; i < nelt; ++i)
35050 d->perm[i] -= nelt;
35051
35052 return ret;
35053 }
35054
35055 static rtx
35056 ix86_expand_vec_perm_builtin (tree exp)
35057 {
35058 struct expand_vec_perm_d d;
35059 tree arg0, arg1, arg2;
35060
35061 arg0 = CALL_EXPR_ARG (exp, 0);
35062 arg1 = CALL_EXPR_ARG (exp, 1);
35063 arg2 = CALL_EXPR_ARG (exp, 2);
35064
35065 d.vmode = TYPE_MODE (TREE_TYPE (arg0));
35066 d.nelt = GET_MODE_NUNITS (d.vmode);
35067 d.testing_p = false;
35068 gcc_assert (VECTOR_MODE_P (d.vmode));
35069
35070 if (TREE_CODE (arg2) != VECTOR_CST)
35071 {
35072 error_at (EXPR_LOCATION (exp),
35073 "vector permutation requires vector constant");
35074 goto exit_error;
35075 }
35076
35077 switch (extract_vec_perm_cst (&d, arg2))
35078 {
35079 default:
35080 gcc_unreachable();
35081
35082 case 0:
35083 error_at (EXPR_LOCATION (exp), "invalid vector permutation constant");
35084 goto exit_error;
35085
35086 case 3:
35087 if (!operand_equal_p (arg0, arg1, 0))
35088 {
35089 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
35090 d.op0 = force_reg (d.vmode, d.op0);
35091 d.op1 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
35092 d.op1 = force_reg (d.vmode, d.op1);
35093 break;
35094 }
35095
35096 /* The elements of PERM do not suggest that only the first operand
35097 is used, but both operands are identical. Allow easier matching
35098 of the permutation by folding the permutation into the single
35099 input vector. */
35100 {
35101 unsigned i, nelt = d.nelt;
35102 for (i = 0; i < nelt; ++i)
35103 if (d.perm[i] >= nelt)
35104 d.perm[i] -= nelt;
35105 }
35106 /* FALLTHRU */
35107
35108 case 1:
35109 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
35110 d.op0 = force_reg (d.vmode, d.op0);
35111 d.op1 = d.op0;
35112 break;
35113
35114 case 2:
35115 d.op0 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
35116 d.op0 = force_reg (d.vmode, d.op0);
35117 d.op1 = d.op0;
35118 break;
35119 }
35120
35121 d.target = gen_reg_rtx (d.vmode);
35122 if (ix86_expand_vec_perm_builtin_1 (&d))
35123 return d.target;
35124
35125 /* For compiler generated permutations, we should never got here, because
35126 the compiler should also be checking the ok hook. But since this is a
35127 builtin the user has access too, so don't abort. */
35128 switch (d.nelt)
35129 {
35130 case 2:
35131 sorry ("vector permutation (%d %d)", d.perm[0], d.perm[1]);
35132 break;
35133 case 4:
35134 sorry ("vector permutation (%d %d %d %d)",
35135 d.perm[0], d.perm[1], d.perm[2], d.perm[3]);
35136 break;
35137 case 8:
35138 sorry ("vector permutation (%d %d %d %d %d %d %d %d)",
35139 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
35140 d.perm[4], d.perm[5], d.perm[6], d.perm[7]);
35141 break;
35142 case 16:
35143 sorry ("vector permutation "
35144 "(%d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d)",
35145 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
35146 d.perm[4], d.perm[5], d.perm[6], d.perm[7],
35147 d.perm[8], d.perm[9], d.perm[10], d.perm[11],
35148 d.perm[12], d.perm[13], d.perm[14], d.perm[15]);
35149 break;
35150 default:
35151 gcc_unreachable ();
35152 }
35153 exit_error:
35154 return CONST0_RTX (d.vmode);
35155 }
35156
35157 /* Implement targetm.vectorize.builtin_vec_perm_ok. */
35158
35159 static bool
35160 ix86_vectorize_builtin_vec_perm_ok (tree vec_type, tree mask)
35161 {
35162 struct expand_vec_perm_d d;
35163 int vec_mask;
35164 bool ret, one_vec;
35165
35166 d.vmode = TYPE_MODE (vec_type);
35167 d.nelt = GET_MODE_NUNITS (d.vmode);
35168 d.testing_p = true;
35169
35170 /* Given sufficient ISA support we can just return true here
35171 for selected vector modes. */
35172 if (GET_MODE_SIZE (d.vmode) == 16)
35173 {
35174 /* All implementable with a single vpperm insn. */
35175 if (TARGET_XOP)
35176 return true;
35177 /* All implementable with 2 pshufb + 1 ior. */
35178 if (TARGET_SSSE3)
35179 return true;
35180 /* All implementable with shufpd or unpck[lh]pd. */
35181 if (d.nelt == 2)
35182 return true;
35183 }
35184
35185 vec_mask = extract_vec_perm_cst (&d, mask);
35186
35187 /* This hook is cannot be called in response to something that the
35188 user does (unlike the builtin expander) so we shouldn't ever see
35189 an error generated from the extract. */
35190 gcc_assert (vec_mask > 0 && vec_mask <= 3);
35191 one_vec = (vec_mask != 3);
35192
35193 /* Implementable with shufps or pshufd. */
35194 if (one_vec && (d.vmode == V4SFmode || d.vmode == V4SImode))
35195 return true;
35196
35197 /* Otherwise we have to go through the motions and see if we can
35198 figure out how to generate the requested permutation. */
35199 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
35200 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
35201 if (!one_vec)
35202 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
35203
35204 start_sequence ();
35205 ret = ix86_expand_vec_perm_builtin_1 (&d);
35206 end_sequence ();
35207
35208 return ret;
35209 }
35210
35211 void
35212 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
35213 {
35214 struct expand_vec_perm_d d;
35215 unsigned i, nelt;
35216
35217 d.target = targ;
35218 d.op0 = op0;
35219 d.op1 = op1;
35220 d.vmode = GET_MODE (targ);
35221 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
35222 d.testing_p = false;
35223
35224 for (i = 0; i < nelt; ++i)
35225 d.perm[i] = i * 2 + odd;
35226
35227 /* We'll either be able to implement the permutation directly... */
35228 if (expand_vec_perm_1 (&d))
35229 return;
35230
35231 /* ... or we use the special-case patterns. */
35232 expand_vec_perm_even_odd_1 (&d, odd);
35233 }
35234
35235 /* Expand an insert into a vector register through pinsr insn.
35236 Return true if successful. */
35237
35238 bool
35239 ix86_expand_pinsr (rtx *operands)
35240 {
35241 rtx dst = operands[0];
35242 rtx src = operands[3];
35243
35244 unsigned int size = INTVAL (operands[1]);
35245 unsigned int pos = INTVAL (operands[2]);
35246
35247 if (GET_CODE (dst) == SUBREG)
35248 {
35249 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
35250 dst = SUBREG_REG (dst);
35251 }
35252
35253 if (GET_CODE (src) == SUBREG)
35254 src = SUBREG_REG (src);
35255
35256 switch (GET_MODE (dst))
35257 {
35258 case V16QImode:
35259 case V8HImode:
35260 case V4SImode:
35261 case V2DImode:
35262 {
35263 enum machine_mode srcmode, dstmode;
35264 rtx (*pinsr)(rtx, rtx, rtx, rtx);
35265
35266 srcmode = mode_for_size (size, MODE_INT, 0);
35267
35268 switch (srcmode)
35269 {
35270 case QImode:
35271 if (!TARGET_SSE4_1)
35272 return false;
35273 dstmode = V16QImode;
35274 pinsr = gen_sse4_1_pinsrb;
35275 break;
35276
35277 case HImode:
35278 if (!TARGET_SSE2)
35279 return false;
35280 dstmode = V8HImode;
35281 pinsr = gen_sse2_pinsrw;
35282 break;
35283
35284 case SImode:
35285 if (!TARGET_SSE4_1)
35286 return false;
35287 dstmode = V4SImode;
35288 pinsr = gen_sse4_1_pinsrd;
35289 break;
35290
35291 case DImode:
35292 gcc_assert (TARGET_64BIT);
35293 if (!TARGET_SSE4_1)
35294 return false;
35295 dstmode = V2DImode;
35296 pinsr = gen_sse4_1_pinsrq;
35297 break;
35298
35299 default:
35300 return false;
35301 }
35302
35303 dst = gen_lowpart (dstmode, dst);
35304 src = gen_lowpart (srcmode, src);
35305
35306 pos /= size;
35307
35308 emit_insn (pinsr (dst, dst, src, GEN_INT (1 << pos)));
35309 return true;
35310 }
35311
35312 default:
35313 return false;
35314 }
35315 }
35316 \f
35317 /* This function returns the calling abi specific va_list type node.
35318 It returns the FNDECL specific va_list type. */
35319
35320 static tree
35321 ix86_fn_abi_va_list (tree fndecl)
35322 {
35323 if (!TARGET_64BIT)
35324 return va_list_type_node;
35325 gcc_assert (fndecl != NULL_TREE);
35326
35327 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
35328 return ms_va_list_type_node;
35329 else
35330 return sysv_va_list_type_node;
35331 }
35332
35333 /* Returns the canonical va_list type specified by TYPE. If there
35334 is no valid TYPE provided, it return NULL_TREE. */
35335
35336 static tree
35337 ix86_canonical_va_list_type (tree type)
35338 {
35339 tree wtype, htype;
35340
35341 /* Resolve references and pointers to va_list type. */
35342 if (TREE_CODE (type) == MEM_REF)
35343 type = TREE_TYPE (type);
35344 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
35345 type = TREE_TYPE (type);
35346 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
35347 type = TREE_TYPE (type);
35348
35349 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
35350 {
35351 wtype = va_list_type_node;
35352 gcc_assert (wtype != NULL_TREE);
35353 htype = type;
35354 if (TREE_CODE (wtype) == ARRAY_TYPE)
35355 {
35356 /* If va_list is an array type, the argument may have decayed
35357 to a pointer type, e.g. by being passed to another function.
35358 In that case, unwrap both types so that we can compare the
35359 underlying records. */
35360 if (TREE_CODE (htype) == ARRAY_TYPE
35361 || POINTER_TYPE_P (htype))
35362 {
35363 wtype = TREE_TYPE (wtype);
35364 htype = TREE_TYPE (htype);
35365 }
35366 }
35367 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
35368 return va_list_type_node;
35369 wtype = sysv_va_list_type_node;
35370 gcc_assert (wtype != NULL_TREE);
35371 htype = type;
35372 if (TREE_CODE (wtype) == ARRAY_TYPE)
35373 {
35374 /* If va_list is an array type, the argument may have decayed
35375 to a pointer type, e.g. by being passed to another function.
35376 In that case, unwrap both types so that we can compare the
35377 underlying records. */
35378 if (TREE_CODE (htype) == ARRAY_TYPE
35379 || POINTER_TYPE_P (htype))
35380 {
35381 wtype = TREE_TYPE (wtype);
35382 htype = TREE_TYPE (htype);
35383 }
35384 }
35385 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
35386 return sysv_va_list_type_node;
35387 wtype = ms_va_list_type_node;
35388 gcc_assert (wtype != NULL_TREE);
35389 htype = type;
35390 if (TREE_CODE (wtype) == ARRAY_TYPE)
35391 {
35392 /* If va_list is an array type, the argument may have decayed
35393 to a pointer type, e.g. by being passed to another function.
35394 In that case, unwrap both types so that we can compare the
35395 underlying records. */
35396 if (TREE_CODE (htype) == ARRAY_TYPE
35397 || POINTER_TYPE_P (htype))
35398 {
35399 wtype = TREE_TYPE (wtype);
35400 htype = TREE_TYPE (htype);
35401 }
35402 }
35403 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
35404 return ms_va_list_type_node;
35405 return NULL_TREE;
35406 }
35407 return std_canonical_va_list_type (type);
35408 }
35409
35410 /* Iterate through the target-specific builtin types for va_list.
35411 IDX denotes the iterator, *PTREE is set to the result type of
35412 the va_list builtin, and *PNAME to its internal type.
35413 Returns zero if there is no element for this index, otherwise
35414 IDX should be increased upon the next call.
35415 Note, do not iterate a base builtin's name like __builtin_va_list.
35416 Used from c_common_nodes_and_builtins. */
35417
35418 static int
35419 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
35420 {
35421 if (TARGET_64BIT)
35422 {
35423 switch (idx)
35424 {
35425 default:
35426 break;
35427
35428 case 0:
35429 *ptree = ms_va_list_type_node;
35430 *pname = "__builtin_ms_va_list";
35431 return 1;
35432
35433 case 1:
35434 *ptree = sysv_va_list_type_node;
35435 *pname = "__builtin_sysv_va_list";
35436 return 1;
35437 }
35438 }
35439
35440 return 0;
35441 }
35442
35443 #undef TARGET_SCHED_DISPATCH
35444 #define TARGET_SCHED_DISPATCH has_dispatch
35445 #undef TARGET_SCHED_DISPATCH_DO
35446 #define TARGET_SCHED_DISPATCH_DO do_dispatch
35447 #undef TARGET_SCHED_REASSOCIATION_WIDTH
35448 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
35449
35450 /* The size of the dispatch window is the total number of bytes of
35451 object code allowed in a window. */
35452 #define DISPATCH_WINDOW_SIZE 16
35453
35454 /* Number of dispatch windows considered for scheduling. */
35455 #define MAX_DISPATCH_WINDOWS 3
35456
35457 /* Maximum number of instructions in a window. */
35458 #define MAX_INSN 4
35459
35460 /* Maximum number of immediate operands in a window. */
35461 #define MAX_IMM 4
35462
35463 /* Maximum number of immediate bits allowed in a window. */
35464 #define MAX_IMM_SIZE 128
35465
35466 /* Maximum number of 32 bit immediates allowed in a window. */
35467 #define MAX_IMM_32 4
35468
35469 /* Maximum number of 64 bit immediates allowed in a window. */
35470 #define MAX_IMM_64 2
35471
35472 /* Maximum total of loads or prefetches allowed in a window. */
35473 #define MAX_LOAD 2
35474
35475 /* Maximum total of stores allowed in a window. */
35476 #define MAX_STORE 1
35477
35478 #undef BIG
35479 #define BIG 100
35480
35481
35482 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
35483 enum dispatch_group {
35484 disp_no_group = 0,
35485 disp_load,
35486 disp_store,
35487 disp_load_store,
35488 disp_prefetch,
35489 disp_imm,
35490 disp_imm_32,
35491 disp_imm_64,
35492 disp_branch,
35493 disp_cmp,
35494 disp_jcc,
35495 disp_last
35496 };
35497
35498 /* Number of allowable groups in a dispatch window. It is an array
35499 indexed by dispatch_group enum. 100 is used as a big number,
35500 because the number of these kind of operations does not have any
35501 effect in dispatch window, but we need them for other reasons in
35502 the table. */
35503 static unsigned int num_allowable_groups[disp_last] = {
35504 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
35505 };
35506
35507 char group_name[disp_last + 1][16] = {
35508 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
35509 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
35510 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
35511 };
35512
35513 /* Instruction path. */
35514 enum insn_path {
35515 no_path = 0,
35516 path_single, /* Single micro op. */
35517 path_double, /* Double micro op. */
35518 path_multi, /* Instructions with more than 2 micro op.. */
35519 last_path
35520 };
35521
35522 /* sched_insn_info defines a window to the instructions scheduled in
35523 the basic block. It contains a pointer to the insn_info table and
35524 the instruction scheduled.
35525
35526 Windows are allocated for each basic block and are linked
35527 together. */
35528 typedef struct sched_insn_info_s {
35529 rtx insn;
35530 enum dispatch_group group;
35531 enum insn_path path;
35532 int byte_len;
35533 int imm_bytes;
35534 } sched_insn_info;
35535
35536 /* Linked list of dispatch windows. This is a two way list of
35537 dispatch windows of a basic block. It contains information about
35538 the number of uops in the window and the total number of
35539 instructions and of bytes in the object code for this dispatch
35540 window. */
35541 typedef struct dispatch_windows_s {
35542 int num_insn; /* Number of insn in the window. */
35543 int num_uops; /* Number of uops in the window. */
35544 int window_size; /* Number of bytes in the window. */
35545 int window_num; /* Window number between 0 or 1. */
35546 int num_imm; /* Number of immediates in an insn. */
35547 int num_imm_32; /* Number of 32 bit immediates in an insn. */
35548 int num_imm_64; /* Number of 64 bit immediates in an insn. */
35549 int imm_size; /* Total immediates in the window. */
35550 int num_loads; /* Total memory loads in the window. */
35551 int num_stores; /* Total memory stores in the window. */
35552 int violation; /* Violation exists in window. */
35553 sched_insn_info *window; /* Pointer to the window. */
35554 struct dispatch_windows_s *next;
35555 struct dispatch_windows_s *prev;
35556 } dispatch_windows;
35557
35558 /* Immediate valuse used in an insn. */
35559 typedef struct imm_info_s
35560 {
35561 int imm;
35562 int imm32;
35563 int imm64;
35564 } imm_info;
35565
35566 static dispatch_windows *dispatch_window_list;
35567 static dispatch_windows *dispatch_window_list1;
35568
35569 /* Get dispatch group of insn. */
35570
35571 static enum dispatch_group
35572 get_mem_group (rtx insn)
35573 {
35574 enum attr_memory memory;
35575
35576 if (INSN_CODE (insn) < 0)
35577 return disp_no_group;
35578 memory = get_attr_memory (insn);
35579 if (memory == MEMORY_STORE)
35580 return disp_store;
35581
35582 if (memory == MEMORY_LOAD)
35583 return disp_load;
35584
35585 if (memory == MEMORY_BOTH)
35586 return disp_load_store;
35587
35588 return disp_no_group;
35589 }
35590
35591 /* Return true if insn is a compare instruction. */
35592
35593 static bool
35594 is_cmp (rtx insn)
35595 {
35596 enum attr_type type;
35597
35598 type = get_attr_type (insn);
35599 return (type == TYPE_TEST
35600 || type == TYPE_ICMP
35601 || type == TYPE_FCMP
35602 || GET_CODE (PATTERN (insn)) == COMPARE);
35603 }
35604
35605 /* Return true if a dispatch violation encountered. */
35606
35607 static bool
35608 dispatch_violation (void)
35609 {
35610 if (dispatch_window_list->next)
35611 return dispatch_window_list->next->violation;
35612 return dispatch_window_list->violation;
35613 }
35614
35615 /* Return true if insn is a branch instruction. */
35616
35617 static bool
35618 is_branch (rtx insn)
35619 {
35620 return (CALL_P (insn) || JUMP_P (insn));
35621 }
35622
35623 /* Return true if insn is a prefetch instruction. */
35624
35625 static bool
35626 is_prefetch (rtx insn)
35627 {
35628 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
35629 }
35630
35631 /* This function initializes a dispatch window and the list container holding a
35632 pointer to the window. */
35633
35634 static void
35635 init_window (int window_num)
35636 {
35637 int i;
35638 dispatch_windows *new_list;
35639
35640 if (window_num == 0)
35641 new_list = dispatch_window_list;
35642 else
35643 new_list = dispatch_window_list1;
35644
35645 new_list->num_insn = 0;
35646 new_list->num_uops = 0;
35647 new_list->window_size = 0;
35648 new_list->next = NULL;
35649 new_list->prev = NULL;
35650 new_list->window_num = window_num;
35651 new_list->num_imm = 0;
35652 new_list->num_imm_32 = 0;
35653 new_list->num_imm_64 = 0;
35654 new_list->imm_size = 0;
35655 new_list->num_loads = 0;
35656 new_list->num_stores = 0;
35657 new_list->violation = false;
35658
35659 for (i = 0; i < MAX_INSN; i++)
35660 {
35661 new_list->window[i].insn = NULL;
35662 new_list->window[i].group = disp_no_group;
35663 new_list->window[i].path = no_path;
35664 new_list->window[i].byte_len = 0;
35665 new_list->window[i].imm_bytes = 0;
35666 }
35667 return;
35668 }
35669
35670 /* This function allocates and initializes a dispatch window and the
35671 list container holding a pointer to the window. */
35672
35673 static dispatch_windows *
35674 allocate_window (void)
35675 {
35676 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
35677 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
35678
35679 return new_list;
35680 }
35681
35682 /* This routine initializes the dispatch scheduling information. It
35683 initiates building dispatch scheduler tables and constructs the
35684 first dispatch window. */
35685
35686 static void
35687 init_dispatch_sched (void)
35688 {
35689 /* Allocate a dispatch list and a window. */
35690 dispatch_window_list = allocate_window ();
35691 dispatch_window_list1 = allocate_window ();
35692 init_window (0);
35693 init_window (1);
35694 }
35695
35696 /* This function returns true if a branch is detected. End of a basic block
35697 does not have to be a branch, but here we assume only branches end a
35698 window. */
35699
35700 static bool
35701 is_end_basic_block (enum dispatch_group group)
35702 {
35703 return group == disp_branch;
35704 }
35705
35706 /* This function is called when the end of a window processing is reached. */
35707
35708 static void
35709 process_end_window (void)
35710 {
35711 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
35712 if (dispatch_window_list->next)
35713 {
35714 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
35715 gcc_assert (dispatch_window_list->window_size
35716 + dispatch_window_list1->window_size <= 48);
35717 init_window (1);
35718 }
35719 init_window (0);
35720 }
35721
35722 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
35723 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
35724 for 48 bytes of instructions. Note that these windows are not dispatch
35725 windows that their sizes are DISPATCH_WINDOW_SIZE. */
35726
35727 static dispatch_windows *
35728 allocate_next_window (int window_num)
35729 {
35730 if (window_num == 0)
35731 {
35732 if (dispatch_window_list->next)
35733 init_window (1);
35734 init_window (0);
35735 return dispatch_window_list;
35736 }
35737
35738 dispatch_window_list->next = dispatch_window_list1;
35739 dispatch_window_list1->prev = dispatch_window_list;
35740
35741 return dispatch_window_list1;
35742 }
35743
35744 /* Increment the number of immediate operands of an instruction. */
35745
35746 static int
35747 find_constant_1 (rtx *in_rtx, imm_info *imm_values)
35748 {
35749 if (*in_rtx == 0)
35750 return 0;
35751
35752 switch ( GET_CODE (*in_rtx))
35753 {
35754 case CONST:
35755 case SYMBOL_REF:
35756 case CONST_INT:
35757 (imm_values->imm)++;
35758 if (x86_64_immediate_operand (*in_rtx, SImode))
35759 (imm_values->imm32)++;
35760 else
35761 (imm_values->imm64)++;
35762 break;
35763
35764 case CONST_DOUBLE:
35765 (imm_values->imm)++;
35766 (imm_values->imm64)++;
35767 break;
35768
35769 case CODE_LABEL:
35770 if (LABEL_KIND (*in_rtx) == LABEL_NORMAL)
35771 {
35772 (imm_values->imm)++;
35773 (imm_values->imm32)++;
35774 }
35775 break;
35776
35777 default:
35778 break;
35779 }
35780
35781 return 0;
35782 }
35783
35784 /* Compute number of immediate operands of an instruction. */
35785
35786 static void
35787 find_constant (rtx in_rtx, imm_info *imm_values)
35788 {
35789 for_each_rtx (INSN_P (in_rtx) ? &PATTERN (in_rtx) : &in_rtx,
35790 (rtx_function) find_constant_1, (void *) imm_values);
35791 }
35792
35793 /* Return total size of immediate operands of an instruction along with number
35794 of corresponding immediate-operands. It initializes its parameters to zero
35795 befor calling FIND_CONSTANT.
35796 INSN is the input instruction. IMM is the total of immediates.
35797 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
35798 bit immediates. */
35799
35800 static int
35801 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
35802 {
35803 imm_info imm_values = {0, 0, 0};
35804
35805 find_constant (insn, &imm_values);
35806 *imm = imm_values.imm;
35807 *imm32 = imm_values.imm32;
35808 *imm64 = imm_values.imm64;
35809 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
35810 }
35811
35812 /* This function indicates if an operand of an instruction is an
35813 immediate. */
35814
35815 static bool
35816 has_immediate (rtx insn)
35817 {
35818 int num_imm_operand;
35819 int num_imm32_operand;
35820 int num_imm64_operand;
35821
35822 if (insn)
35823 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
35824 &num_imm64_operand);
35825 return false;
35826 }
35827
35828 /* Return single or double path for instructions. */
35829
35830 static enum insn_path
35831 get_insn_path (rtx insn)
35832 {
35833 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
35834
35835 if ((int)path == 0)
35836 return path_single;
35837
35838 if ((int)path == 1)
35839 return path_double;
35840
35841 return path_multi;
35842 }
35843
35844 /* Return insn dispatch group. */
35845
35846 static enum dispatch_group
35847 get_insn_group (rtx insn)
35848 {
35849 enum dispatch_group group = get_mem_group (insn);
35850 if (group)
35851 return group;
35852
35853 if (is_branch (insn))
35854 return disp_branch;
35855
35856 if (is_cmp (insn))
35857 return disp_cmp;
35858
35859 if (has_immediate (insn))
35860 return disp_imm;
35861
35862 if (is_prefetch (insn))
35863 return disp_prefetch;
35864
35865 return disp_no_group;
35866 }
35867
35868 /* Count number of GROUP restricted instructions in a dispatch
35869 window WINDOW_LIST. */
35870
35871 static int
35872 count_num_restricted (rtx insn, dispatch_windows *window_list)
35873 {
35874 enum dispatch_group group = get_insn_group (insn);
35875 int imm_size;
35876 int num_imm_operand;
35877 int num_imm32_operand;
35878 int num_imm64_operand;
35879
35880 if (group == disp_no_group)
35881 return 0;
35882
35883 if (group == disp_imm)
35884 {
35885 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
35886 &num_imm64_operand);
35887 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
35888 || num_imm_operand + window_list->num_imm > MAX_IMM
35889 || (num_imm32_operand > 0
35890 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
35891 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
35892 || (num_imm64_operand > 0
35893 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
35894 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
35895 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
35896 && num_imm64_operand > 0
35897 && ((window_list->num_imm_64 > 0
35898 && window_list->num_insn >= 2)
35899 || window_list->num_insn >= 3)))
35900 return BIG;
35901
35902 return 1;
35903 }
35904
35905 if ((group == disp_load_store
35906 && (window_list->num_loads >= MAX_LOAD
35907 || window_list->num_stores >= MAX_STORE))
35908 || ((group == disp_load
35909 || group == disp_prefetch)
35910 && window_list->num_loads >= MAX_LOAD)
35911 || (group == disp_store
35912 && window_list->num_stores >= MAX_STORE))
35913 return BIG;
35914
35915 return 1;
35916 }
35917
35918 /* This function returns true if insn satisfies dispatch rules on the
35919 last window scheduled. */
35920
35921 static bool
35922 fits_dispatch_window (rtx insn)
35923 {
35924 dispatch_windows *window_list = dispatch_window_list;
35925 dispatch_windows *window_list_next = dispatch_window_list->next;
35926 unsigned int num_restrict;
35927 enum dispatch_group group = get_insn_group (insn);
35928 enum insn_path path = get_insn_path (insn);
35929 int sum;
35930
35931 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
35932 instructions should be given the lowest priority in the
35933 scheduling process in Haifa scheduler to make sure they will be
35934 scheduled in the same dispatch window as the refrence to them. */
35935 if (group == disp_jcc || group == disp_cmp)
35936 return false;
35937
35938 /* Check nonrestricted. */
35939 if (group == disp_no_group || group == disp_branch)
35940 return true;
35941
35942 /* Get last dispatch window. */
35943 if (window_list_next)
35944 window_list = window_list_next;
35945
35946 if (window_list->window_num == 1)
35947 {
35948 sum = window_list->prev->window_size + window_list->window_size;
35949
35950 if (sum == 32
35951 || (min_insn_size (insn) + sum) >= 48)
35952 /* Window 1 is full. Go for next window. */
35953 return true;
35954 }
35955
35956 num_restrict = count_num_restricted (insn, window_list);
35957
35958 if (num_restrict > num_allowable_groups[group])
35959 return false;
35960
35961 /* See if it fits in the first window. */
35962 if (window_list->window_num == 0)
35963 {
35964 /* The first widow should have only single and double path
35965 uops. */
35966 if (path == path_double
35967 && (window_list->num_uops + 2) > MAX_INSN)
35968 return false;
35969 else if (path != path_single)
35970 return false;
35971 }
35972 return true;
35973 }
35974
35975 /* Add an instruction INSN with NUM_UOPS micro-operations to the
35976 dispatch window WINDOW_LIST. */
35977
35978 static void
35979 add_insn_window (rtx insn, dispatch_windows *window_list, int num_uops)
35980 {
35981 int byte_len = min_insn_size (insn);
35982 int num_insn = window_list->num_insn;
35983 int imm_size;
35984 sched_insn_info *window = window_list->window;
35985 enum dispatch_group group = get_insn_group (insn);
35986 enum insn_path path = get_insn_path (insn);
35987 int num_imm_operand;
35988 int num_imm32_operand;
35989 int num_imm64_operand;
35990
35991 if (!window_list->violation && group != disp_cmp
35992 && !fits_dispatch_window (insn))
35993 window_list->violation = true;
35994
35995 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
35996 &num_imm64_operand);
35997
35998 /* Initialize window with new instruction. */
35999 window[num_insn].insn = insn;
36000 window[num_insn].byte_len = byte_len;
36001 window[num_insn].group = group;
36002 window[num_insn].path = path;
36003 window[num_insn].imm_bytes = imm_size;
36004
36005 window_list->window_size += byte_len;
36006 window_list->num_insn = num_insn + 1;
36007 window_list->num_uops = window_list->num_uops + num_uops;
36008 window_list->imm_size += imm_size;
36009 window_list->num_imm += num_imm_operand;
36010 window_list->num_imm_32 += num_imm32_operand;
36011 window_list->num_imm_64 += num_imm64_operand;
36012
36013 if (group == disp_store)
36014 window_list->num_stores += 1;
36015 else if (group == disp_load
36016 || group == disp_prefetch)
36017 window_list->num_loads += 1;
36018 else if (group == disp_load_store)
36019 {
36020 window_list->num_stores += 1;
36021 window_list->num_loads += 1;
36022 }
36023 }
36024
36025 /* Adds a scheduled instruction, INSN, to the current dispatch window.
36026 If the total bytes of instructions or the number of instructions in
36027 the window exceed allowable, it allocates a new window. */
36028
36029 static void
36030 add_to_dispatch_window (rtx insn)
36031 {
36032 int byte_len;
36033 dispatch_windows *window_list;
36034 dispatch_windows *next_list;
36035 dispatch_windows *window0_list;
36036 enum insn_path path;
36037 enum dispatch_group insn_group;
36038 bool insn_fits;
36039 int num_insn;
36040 int num_uops;
36041 int window_num;
36042 int insn_num_uops;
36043 int sum;
36044
36045 if (INSN_CODE (insn) < 0)
36046 return;
36047
36048 byte_len = min_insn_size (insn);
36049 window_list = dispatch_window_list;
36050 next_list = window_list->next;
36051 path = get_insn_path (insn);
36052 insn_group = get_insn_group (insn);
36053
36054 /* Get the last dispatch window. */
36055 if (next_list)
36056 window_list = dispatch_window_list->next;
36057
36058 if (path == path_single)
36059 insn_num_uops = 1;
36060 else if (path == path_double)
36061 insn_num_uops = 2;
36062 else
36063 insn_num_uops = (int) path;
36064
36065 /* If current window is full, get a new window.
36066 Window number zero is full, if MAX_INSN uops are scheduled in it.
36067 Window number one is full, if window zero's bytes plus window
36068 one's bytes is 32, or if the bytes of the new instruction added
36069 to the total makes it greater than 48, or it has already MAX_INSN
36070 instructions in it. */
36071 num_insn = window_list->num_insn;
36072 num_uops = window_list->num_uops;
36073 window_num = window_list->window_num;
36074 insn_fits = fits_dispatch_window (insn);
36075
36076 if (num_insn >= MAX_INSN
36077 || num_uops + insn_num_uops > MAX_INSN
36078 || !(insn_fits))
36079 {
36080 window_num = ~window_num & 1;
36081 window_list = allocate_next_window (window_num);
36082 }
36083
36084 if (window_num == 0)
36085 {
36086 add_insn_window (insn, window_list, insn_num_uops);
36087 if (window_list->num_insn >= MAX_INSN
36088 && insn_group == disp_branch)
36089 {
36090 process_end_window ();
36091 return;
36092 }
36093 }
36094 else if (window_num == 1)
36095 {
36096 window0_list = window_list->prev;
36097 sum = window0_list->window_size + window_list->window_size;
36098 if (sum == 32
36099 || (byte_len + sum) >= 48)
36100 {
36101 process_end_window ();
36102 window_list = dispatch_window_list;
36103 }
36104
36105 add_insn_window (insn, window_list, insn_num_uops);
36106 }
36107 else
36108 gcc_unreachable ();
36109
36110 if (is_end_basic_block (insn_group))
36111 {
36112 /* End of basic block is reached do end-basic-block process. */
36113 process_end_window ();
36114 return;
36115 }
36116 }
36117
36118 /* Print the dispatch window, WINDOW_NUM, to FILE. */
36119
36120 DEBUG_FUNCTION static void
36121 debug_dispatch_window_file (FILE *file, int window_num)
36122 {
36123 dispatch_windows *list;
36124 int i;
36125
36126 if (window_num == 0)
36127 list = dispatch_window_list;
36128 else
36129 list = dispatch_window_list1;
36130
36131 fprintf (file, "Window #%d:\n", list->window_num);
36132 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
36133 list->num_insn, list->num_uops, list->window_size);
36134 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
36135 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
36136
36137 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
36138 list->num_stores);
36139 fprintf (file, " insn info:\n");
36140
36141 for (i = 0; i < MAX_INSN; i++)
36142 {
36143 if (!list->window[i].insn)
36144 break;
36145 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
36146 i, group_name[list->window[i].group],
36147 i, (void *)list->window[i].insn,
36148 i, list->window[i].path,
36149 i, list->window[i].byte_len,
36150 i, list->window[i].imm_bytes);
36151 }
36152 }
36153
36154 /* Print to stdout a dispatch window. */
36155
36156 DEBUG_FUNCTION void
36157 debug_dispatch_window (int window_num)
36158 {
36159 debug_dispatch_window_file (stdout, window_num);
36160 }
36161
36162 /* Print INSN dispatch information to FILE. */
36163
36164 DEBUG_FUNCTION static void
36165 debug_insn_dispatch_info_file (FILE *file, rtx insn)
36166 {
36167 int byte_len;
36168 enum insn_path path;
36169 enum dispatch_group group;
36170 int imm_size;
36171 int num_imm_operand;
36172 int num_imm32_operand;
36173 int num_imm64_operand;
36174
36175 if (INSN_CODE (insn) < 0)
36176 return;
36177
36178 byte_len = min_insn_size (insn);
36179 path = get_insn_path (insn);
36180 group = get_insn_group (insn);
36181 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
36182 &num_imm64_operand);
36183
36184 fprintf (file, " insn info:\n");
36185 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
36186 group_name[group], path, byte_len);
36187 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
36188 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
36189 }
36190
36191 /* Print to STDERR the status of the ready list with respect to
36192 dispatch windows. */
36193
36194 DEBUG_FUNCTION void
36195 debug_ready_dispatch (void)
36196 {
36197 int i;
36198 int no_ready = number_in_ready ();
36199
36200 fprintf (stdout, "Number of ready: %d\n", no_ready);
36201
36202 for (i = 0; i < no_ready; i++)
36203 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
36204 }
36205
36206 /* This routine is the driver of the dispatch scheduler. */
36207
36208 static void
36209 do_dispatch (rtx insn, int mode)
36210 {
36211 if (mode == DISPATCH_INIT)
36212 init_dispatch_sched ();
36213 else if (mode == ADD_TO_DISPATCH_WINDOW)
36214 add_to_dispatch_window (insn);
36215 }
36216
36217 /* Return TRUE if Dispatch Scheduling is supported. */
36218
36219 static bool
36220 has_dispatch (rtx insn, int action)
36221 {
36222 if ((ix86_tune == PROCESSOR_BDVER1 || ix86_tune == PROCESSOR_BDVER2)
36223 && flag_dispatch_scheduler)
36224 switch (action)
36225 {
36226 default:
36227 return false;
36228
36229 case IS_DISPATCH_ON:
36230 return true;
36231 break;
36232
36233 case IS_CMP:
36234 return is_cmp (insn);
36235
36236 case DISPATCH_VIOLATION:
36237 return dispatch_violation ();
36238
36239 case FITS_DISPATCH_WINDOW:
36240 return fits_dispatch_window (insn);
36241 }
36242
36243 return false;
36244 }
36245
36246 /* Implementation of reassociation_width target hook used by
36247 reassoc phase to identify parallelism level in reassociated
36248 tree. Statements tree_code is passed in OPC. Arguments type
36249 is passed in MODE.
36250
36251 Currently parallel reassociation is enabled for Atom
36252 processors only and we set reassociation width to be 2
36253 because Atom may issue up to 2 instructions per cycle.
36254
36255 Return value should be fixed if parallel reassociation is
36256 enabled for other processors. */
36257
36258 static int
36259 ix86_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
36260 enum machine_mode mode)
36261 {
36262 int res = 1;
36263
36264 if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL)
36265 res = 2;
36266 else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL)
36267 res = 2;
36268
36269 return res;
36270 }
36271
36272 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
36273 place emms and femms instructions. */
36274
36275 static enum machine_mode
36276 ix86_preferred_simd_mode (enum machine_mode mode)
36277 {
36278 if (!TARGET_SSE)
36279 return word_mode;
36280
36281 switch (mode)
36282 {
36283 case QImode:
36284 return TARGET_AVX2 ? V32QImode : V16QImode;
36285 case HImode:
36286 return TARGET_AVX2 ? V16HImode : V8HImode;
36287 case SImode:
36288 return TARGET_AVX2 ? V8SImode : V4SImode;
36289 case DImode:
36290 return TARGET_AVX2 ? V4DImode : V2DImode;
36291
36292 case SFmode:
36293 if (TARGET_AVX && !TARGET_PREFER_AVX128)
36294 return V8SFmode;
36295 else
36296 return V4SFmode;
36297
36298 case DFmode:
36299 if (!TARGET_VECTORIZE_DOUBLE)
36300 return word_mode;
36301 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
36302 return V4DFmode;
36303 else if (TARGET_SSE2)
36304 return V2DFmode;
36305 /* FALLTHRU */
36306
36307 default:
36308 return word_mode;
36309 }
36310 }
36311
36312 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
36313 vectors. */
36314
36315 static unsigned int
36316 ix86_autovectorize_vector_sizes (void)
36317 {
36318 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
36319 }
36320
36321 /* Initialize the GCC target structure. */
36322 #undef TARGET_RETURN_IN_MEMORY
36323 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
36324
36325 #undef TARGET_LEGITIMIZE_ADDRESS
36326 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
36327
36328 #undef TARGET_ATTRIBUTE_TABLE
36329 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
36330 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
36331 # undef TARGET_MERGE_DECL_ATTRIBUTES
36332 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
36333 #endif
36334
36335 #undef TARGET_COMP_TYPE_ATTRIBUTES
36336 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
36337
36338 #undef TARGET_INIT_BUILTINS
36339 #define TARGET_INIT_BUILTINS ix86_init_builtins
36340 #undef TARGET_BUILTIN_DECL
36341 #define TARGET_BUILTIN_DECL ix86_builtin_decl
36342 #undef TARGET_EXPAND_BUILTIN
36343 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
36344
36345 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
36346 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
36347 ix86_builtin_vectorized_function
36348
36349 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
36350 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
36351
36352 #undef TARGET_BUILTIN_RECIPROCAL
36353 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
36354
36355 #undef TARGET_ASM_FUNCTION_EPILOGUE
36356 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
36357
36358 #undef TARGET_ENCODE_SECTION_INFO
36359 #ifndef SUBTARGET_ENCODE_SECTION_INFO
36360 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
36361 #else
36362 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
36363 #endif
36364
36365 #undef TARGET_ASM_OPEN_PAREN
36366 #define TARGET_ASM_OPEN_PAREN ""
36367 #undef TARGET_ASM_CLOSE_PAREN
36368 #define TARGET_ASM_CLOSE_PAREN ""
36369
36370 #undef TARGET_ASM_BYTE_OP
36371 #define TARGET_ASM_BYTE_OP ASM_BYTE
36372
36373 #undef TARGET_ASM_ALIGNED_HI_OP
36374 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
36375 #undef TARGET_ASM_ALIGNED_SI_OP
36376 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
36377 #ifdef ASM_QUAD
36378 #undef TARGET_ASM_ALIGNED_DI_OP
36379 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
36380 #endif
36381
36382 #undef TARGET_PROFILE_BEFORE_PROLOGUE
36383 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
36384
36385 #undef TARGET_ASM_UNALIGNED_HI_OP
36386 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
36387 #undef TARGET_ASM_UNALIGNED_SI_OP
36388 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
36389 #undef TARGET_ASM_UNALIGNED_DI_OP
36390 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
36391
36392 #undef TARGET_PRINT_OPERAND
36393 #define TARGET_PRINT_OPERAND ix86_print_operand
36394 #undef TARGET_PRINT_OPERAND_ADDRESS
36395 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
36396 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
36397 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
36398 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
36399 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
36400
36401 #undef TARGET_SCHED_INIT_GLOBAL
36402 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
36403 #undef TARGET_SCHED_ADJUST_COST
36404 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
36405 #undef TARGET_SCHED_ISSUE_RATE
36406 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
36407 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
36408 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
36409 ia32_multipass_dfa_lookahead
36410
36411 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
36412 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
36413
36414 #ifdef HAVE_AS_TLS
36415 #undef TARGET_HAVE_TLS
36416 #define TARGET_HAVE_TLS true
36417 #endif
36418 #undef TARGET_CANNOT_FORCE_CONST_MEM
36419 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
36420 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
36421 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
36422
36423 #undef TARGET_DELEGITIMIZE_ADDRESS
36424 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
36425
36426 #undef TARGET_MS_BITFIELD_LAYOUT_P
36427 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
36428
36429 #if TARGET_MACHO
36430 #undef TARGET_BINDS_LOCAL_P
36431 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
36432 #endif
36433 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
36434 #undef TARGET_BINDS_LOCAL_P
36435 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
36436 #endif
36437
36438 #undef TARGET_ASM_OUTPUT_MI_THUNK
36439 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
36440 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
36441 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
36442
36443 #undef TARGET_ASM_FILE_START
36444 #define TARGET_ASM_FILE_START x86_file_start
36445
36446 #undef TARGET_OPTION_OVERRIDE
36447 #define TARGET_OPTION_OVERRIDE ix86_option_override
36448
36449 #undef TARGET_REGISTER_MOVE_COST
36450 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
36451 #undef TARGET_MEMORY_MOVE_COST
36452 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
36453 #undef TARGET_RTX_COSTS
36454 #define TARGET_RTX_COSTS ix86_rtx_costs
36455 #undef TARGET_ADDRESS_COST
36456 #define TARGET_ADDRESS_COST ix86_address_cost
36457
36458 #undef TARGET_FIXED_CONDITION_CODE_REGS
36459 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
36460 #undef TARGET_CC_MODES_COMPATIBLE
36461 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
36462
36463 #undef TARGET_MACHINE_DEPENDENT_REORG
36464 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
36465
36466 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
36467 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
36468
36469 #undef TARGET_BUILD_BUILTIN_VA_LIST
36470 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
36471
36472 #undef TARGET_ENUM_VA_LIST_P
36473 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
36474
36475 #undef TARGET_FN_ABI_VA_LIST
36476 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
36477
36478 #undef TARGET_CANONICAL_VA_LIST_TYPE
36479 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
36480
36481 #undef TARGET_EXPAND_BUILTIN_VA_START
36482 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
36483
36484 #undef TARGET_MD_ASM_CLOBBERS
36485 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
36486
36487 #undef TARGET_PROMOTE_PROTOTYPES
36488 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
36489 #undef TARGET_STRUCT_VALUE_RTX
36490 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
36491 #undef TARGET_SETUP_INCOMING_VARARGS
36492 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
36493 #undef TARGET_MUST_PASS_IN_STACK
36494 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
36495 #undef TARGET_FUNCTION_ARG_ADVANCE
36496 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
36497 #undef TARGET_FUNCTION_ARG
36498 #define TARGET_FUNCTION_ARG ix86_function_arg
36499 #undef TARGET_FUNCTION_ARG_BOUNDARY
36500 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
36501 #undef TARGET_PASS_BY_REFERENCE
36502 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
36503 #undef TARGET_INTERNAL_ARG_POINTER
36504 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
36505 #undef TARGET_UPDATE_STACK_BOUNDARY
36506 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
36507 #undef TARGET_GET_DRAP_RTX
36508 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
36509 #undef TARGET_STRICT_ARGUMENT_NAMING
36510 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
36511 #undef TARGET_STATIC_CHAIN
36512 #define TARGET_STATIC_CHAIN ix86_static_chain
36513 #undef TARGET_TRAMPOLINE_INIT
36514 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
36515 #undef TARGET_RETURN_POPS_ARGS
36516 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
36517
36518 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
36519 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
36520
36521 #undef TARGET_SCALAR_MODE_SUPPORTED_P
36522 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
36523
36524 #undef TARGET_VECTOR_MODE_SUPPORTED_P
36525 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
36526
36527 #undef TARGET_C_MODE_FOR_SUFFIX
36528 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
36529
36530 #ifdef HAVE_AS_TLS
36531 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
36532 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
36533 #endif
36534
36535 #ifdef SUBTARGET_INSERT_ATTRIBUTES
36536 #undef TARGET_INSERT_ATTRIBUTES
36537 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
36538 #endif
36539
36540 #undef TARGET_MANGLE_TYPE
36541 #define TARGET_MANGLE_TYPE ix86_mangle_type
36542
36543 #ifndef TARGET_MACHO
36544 #undef TARGET_STACK_PROTECT_FAIL
36545 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
36546 #endif
36547
36548 #undef TARGET_FUNCTION_VALUE
36549 #define TARGET_FUNCTION_VALUE ix86_function_value
36550
36551 #undef TARGET_FUNCTION_VALUE_REGNO_P
36552 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
36553
36554 #undef TARGET_PROMOTE_FUNCTION_MODE
36555 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
36556
36557 #undef TARGET_SECONDARY_RELOAD
36558 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
36559
36560 #undef TARGET_CLASS_MAX_NREGS
36561 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
36562
36563 #undef TARGET_PREFERRED_RELOAD_CLASS
36564 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
36565 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
36566 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
36567 #undef TARGET_CLASS_LIKELY_SPILLED_P
36568 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
36569
36570 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
36571 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
36572 ix86_builtin_vectorization_cost
36573 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
36574 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM \
36575 ix86_vectorize_builtin_vec_perm
36576 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK
36577 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK \
36578 ix86_vectorize_builtin_vec_perm_ok
36579 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
36580 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
36581 ix86_preferred_simd_mode
36582 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
36583 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
36584 ix86_autovectorize_vector_sizes
36585
36586 #undef TARGET_SET_CURRENT_FUNCTION
36587 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
36588
36589 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
36590 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
36591
36592 #undef TARGET_OPTION_SAVE
36593 #define TARGET_OPTION_SAVE ix86_function_specific_save
36594
36595 #undef TARGET_OPTION_RESTORE
36596 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
36597
36598 #undef TARGET_OPTION_PRINT
36599 #define TARGET_OPTION_PRINT ix86_function_specific_print
36600
36601 #undef TARGET_CAN_INLINE_P
36602 #define TARGET_CAN_INLINE_P ix86_can_inline_p
36603
36604 #undef TARGET_EXPAND_TO_RTL_HOOK
36605 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
36606
36607 #undef TARGET_LEGITIMATE_ADDRESS_P
36608 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
36609
36610 #undef TARGET_LEGITIMATE_CONSTANT_P
36611 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
36612
36613 #undef TARGET_FRAME_POINTER_REQUIRED
36614 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
36615
36616 #undef TARGET_CAN_ELIMINATE
36617 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
36618
36619 #undef TARGET_EXTRA_LIVE_ON_ENTRY
36620 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
36621
36622 #undef TARGET_ASM_CODE_END
36623 #define TARGET_ASM_CODE_END ix86_code_end
36624
36625 #undef TARGET_CONDITIONAL_REGISTER_USAGE
36626 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
36627
36628 #if TARGET_MACHO
36629 #undef TARGET_INIT_LIBFUNCS
36630 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
36631 #endif
36632
36633 struct gcc_target targetm = TARGET_INITIALIZER;
36634 \f
36635 #include "gt-i386.h"