Call copy_to_mode_reg instead of force_reg.
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
3 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
4 Free Software Foundation, Inc.
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
12
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
33 #include "output.h"
34 #include "insn-codes.h"
35 #include "insn-attr.h"
36 #include "flags.h"
37 #include "except.h"
38 #include "function.h"
39 #include "recog.h"
40 #include "expr.h"
41 #include "optabs.h"
42 #include "diagnostic-core.h"
43 #include "toplev.h"
44 #include "basic-block.h"
45 #include "ggc.h"
46 #include "target.h"
47 #include "target-def.h"
48 #include "common/common-target.h"
49 #include "langhooks.h"
50 #include "cgraph.h"
51 #include "gimple.h"
52 #include "dwarf2.h"
53 #include "df.h"
54 #include "tm-constrs.h"
55 #include "params.h"
56 #include "cselib.h"
57 #include "debug.h"
58 #include "sched-int.h"
59 #include "sbitmap.h"
60 #include "fibheap.h"
61 #include "opts.h"
62 #include "diagnostic.h"
63
64 enum upper_128bits_state
65 {
66 unknown = 0,
67 unused,
68 used
69 };
70
71 typedef struct block_info_def
72 {
73 /* State of the upper 128bits of AVX registers at exit. */
74 enum upper_128bits_state state;
75 /* TRUE if state of the upper 128bits of AVX registers is unchanged
76 in this block. */
77 bool unchanged;
78 /* TRUE if block has been processed. */
79 bool processed;
80 /* TRUE if block has been scanned. */
81 bool scanned;
82 /* Previous state of the upper 128bits of AVX registers at entry. */
83 enum upper_128bits_state prev;
84 } *block_info;
85
86 #define BLOCK_INFO(B) ((block_info) (B)->aux)
87
88 enum call_avx256_state
89 {
90 /* Callee returns 256bit AVX register. */
91 callee_return_avx256 = -1,
92 /* Callee returns and passes 256bit AVX register. */
93 callee_return_pass_avx256,
94 /* Callee passes 256bit AVX register. */
95 callee_pass_avx256,
96 /* Callee doesn't return nor passe 256bit AVX register, or no
97 256bit AVX register in function return. */
98 call_no_avx256,
99 /* vzeroupper intrinsic. */
100 vzeroupper_intrinsic
101 };
102
103 /* Check if a 256bit AVX register is referenced in stores. */
104
105 static void
106 check_avx256_stores (rtx dest, const_rtx set, void *data)
107 {
108 if ((REG_P (dest)
109 && VALID_AVX256_REG_MODE (GET_MODE (dest)))
110 || (GET_CODE (set) == SET
111 && REG_P (SET_SRC (set))
112 && VALID_AVX256_REG_MODE (GET_MODE (SET_SRC (set)))))
113 {
114 enum upper_128bits_state *state
115 = (enum upper_128bits_state *) data;
116 *state = used;
117 }
118 }
119
120 /* Helper function for move_or_delete_vzeroupper_1. Look for vzeroupper
121 in basic block BB. Delete it if upper 128bit AVX registers are
122 unused. If it isn't deleted, move it to just before a jump insn.
123
124 STATE is state of the upper 128bits of AVX registers at entry. */
125
126 static void
127 move_or_delete_vzeroupper_2 (basic_block bb,
128 enum upper_128bits_state state)
129 {
130 rtx insn, bb_end;
131 rtx vzeroupper_insn = NULL_RTX;
132 rtx pat;
133 int avx256;
134 bool unchanged;
135
136 if (BLOCK_INFO (bb)->unchanged)
137 {
138 if (dump_file)
139 fprintf (dump_file, " [bb %i] unchanged: upper 128bits: %d\n",
140 bb->index, state);
141
142 BLOCK_INFO (bb)->state = state;
143 return;
144 }
145
146 if (BLOCK_INFO (bb)->scanned && BLOCK_INFO (bb)->prev == state)
147 {
148 if (dump_file)
149 fprintf (dump_file, " [bb %i] scanned: upper 128bits: %d\n",
150 bb->index, BLOCK_INFO (bb)->state);
151 return;
152 }
153
154 BLOCK_INFO (bb)->prev = state;
155
156 if (dump_file)
157 fprintf (dump_file, " [bb %i] entry: upper 128bits: %d\n",
158 bb->index, state);
159
160 unchanged = true;
161
162 /* BB_END changes when it is deleted. */
163 bb_end = BB_END (bb);
164 insn = BB_HEAD (bb);
165 while (insn != bb_end)
166 {
167 insn = NEXT_INSN (insn);
168
169 if (!NONDEBUG_INSN_P (insn))
170 continue;
171
172 /* Move vzeroupper before jump/call. */
173 if (JUMP_P (insn) || CALL_P (insn))
174 {
175 if (!vzeroupper_insn)
176 continue;
177
178 if (PREV_INSN (insn) != vzeroupper_insn)
179 {
180 if (dump_file)
181 {
182 fprintf (dump_file, "Move vzeroupper after:\n");
183 print_rtl_single (dump_file, PREV_INSN (insn));
184 fprintf (dump_file, "before:\n");
185 print_rtl_single (dump_file, insn);
186 }
187 reorder_insns_nobb (vzeroupper_insn, vzeroupper_insn,
188 PREV_INSN (insn));
189 }
190 vzeroupper_insn = NULL_RTX;
191 continue;
192 }
193
194 pat = PATTERN (insn);
195
196 /* Check insn for vzeroupper intrinsic. */
197 if (GET_CODE (pat) == UNSPEC_VOLATILE
198 && XINT (pat, 1) == UNSPECV_VZEROUPPER)
199 {
200 if (dump_file)
201 {
202 /* Found vzeroupper intrinsic. */
203 fprintf (dump_file, "Found vzeroupper:\n");
204 print_rtl_single (dump_file, insn);
205 }
206 }
207 else
208 {
209 /* Check insn for vzeroall intrinsic. */
210 if (GET_CODE (pat) == PARALLEL
211 && GET_CODE (XVECEXP (pat, 0, 0)) == UNSPEC_VOLATILE
212 && XINT (XVECEXP (pat, 0, 0), 1) == UNSPECV_VZEROALL)
213 {
214 state = unused;
215 unchanged = false;
216
217 /* Delete pending vzeroupper insertion. */
218 if (vzeroupper_insn)
219 {
220 delete_insn (vzeroupper_insn);
221 vzeroupper_insn = NULL_RTX;
222 }
223 }
224 else if (state != used)
225 {
226 note_stores (pat, check_avx256_stores, &state);
227 if (state == used)
228 unchanged = false;
229 }
230 continue;
231 }
232
233 /* Process vzeroupper intrinsic. */
234 avx256 = INTVAL (XVECEXP (pat, 0, 0));
235
236 if (state == unused)
237 {
238 /* Since the upper 128bits are cleared, callee must not pass
239 256bit AVX register. We only need to check if callee
240 returns 256bit AVX register. */
241 if (avx256 == callee_return_avx256)
242 {
243 state = used;
244 unchanged = false;
245 }
246
247 /* Remove unnecessary vzeroupper since upper 128bits are
248 cleared. */
249 if (dump_file)
250 {
251 fprintf (dump_file, "Delete redundant vzeroupper:\n");
252 print_rtl_single (dump_file, insn);
253 }
254 delete_insn (insn);
255 }
256 else
257 {
258 /* Set state to UNUSED if callee doesn't return 256bit AVX
259 register. */
260 if (avx256 != callee_return_pass_avx256)
261 state = unused;
262
263 if (avx256 == callee_return_pass_avx256
264 || avx256 == callee_pass_avx256)
265 {
266 /* Must remove vzeroupper since callee passes in 256bit
267 AVX register. */
268 if (dump_file)
269 {
270 fprintf (dump_file, "Delete callee pass vzeroupper:\n");
271 print_rtl_single (dump_file, insn);
272 }
273 delete_insn (insn);
274 }
275 else
276 {
277 vzeroupper_insn = insn;
278 unchanged = false;
279 }
280 }
281 }
282
283 BLOCK_INFO (bb)->state = state;
284 BLOCK_INFO (bb)->unchanged = unchanged;
285 BLOCK_INFO (bb)->scanned = true;
286
287 if (dump_file)
288 fprintf (dump_file, " [bb %i] exit: %s: upper 128bits: %d\n",
289 bb->index, unchanged ? "unchanged" : "changed",
290 state);
291 }
292
293 /* Helper function for move_or_delete_vzeroupper. Process vzeroupper
294 in BLOCK and check its predecessor blocks. Treat UNKNOWN state
295 as USED if UNKNOWN_IS_UNUSED is true. Return TRUE if the exit
296 state is changed. */
297
298 static bool
299 move_or_delete_vzeroupper_1 (basic_block block, bool unknown_is_unused)
300 {
301 edge e;
302 edge_iterator ei;
303 enum upper_128bits_state state, old_state, new_state;
304 bool seen_unknown;
305
306 if (dump_file)
307 fprintf (dump_file, " Process [bb %i]: status: %d\n",
308 block->index, BLOCK_INFO (block)->processed);
309
310 if (BLOCK_INFO (block)->processed)
311 return false;
312
313 state = unused;
314
315 /* Check all predecessor edges of this block. */
316 seen_unknown = false;
317 FOR_EACH_EDGE (e, ei, block->preds)
318 {
319 if (e->src == block)
320 continue;
321 switch (BLOCK_INFO (e->src)->state)
322 {
323 case unknown:
324 if (!unknown_is_unused)
325 seen_unknown = true;
326 case unused:
327 break;
328 case used:
329 state = used;
330 goto done;
331 }
332 }
333
334 if (seen_unknown)
335 state = unknown;
336
337 done:
338 old_state = BLOCK_INFO (block)->state;
339 move_or_delete_vzeroupper_2 (block, state);
340 new_state = BLOCK_INFO (block)->state;
341
342 if (state != unknown || new_state == used)
343 BLOCK_INFO (block)->processed = true;
344
345 /* Need to rescan if the upper 128bits of AVX registers are changed
346 to USED at exit. */
347 if (new_state != old_state)
348 {
349 if (new_state == used)
350 cfun->machine->rescan_vzeroupper_p = 1;
351 return true;
352 }
353 else
354 return false;
355 }
356
357 /* Go through the instruction stream looking for vzeroupper. Delete
358 it if upper 128bit AVX registers are unused. If it isn't deleted,
359 move it to just before a jump insn. */
360
361 static void
362 move_or_delete_vzeroupper (void)
363 {
364 edge e;
365 edge_iterator ei;
366 basic_block bb;
367 fibheap_t worklist, pending, fibheap_swap;
368 sbitmap visited, in_worklist, in_pending, sbitmap_swap;
369 int *bb_order;
370 int *rc_order;
371 int i;
372
373 /* Set up block info for each basic block. */
374 alloc_aux_for_blocks (sizeof (struct block_info_def));
375
376 /* Process outgoing edges of entry point. */
377 if (dump_file)
378 fprintf (dump_file, "Process outgoing edges of entry point\n");
379
380 FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR->succs)
381 {
382 move_or_delete_vzeroupper_2 (e->dest,
383 cfun->machine->caller_pass_avx256_p
384 ? used : unused);
385 BLOCK_INFO (e->dest)->processed = true;
386 }
387
388 /* Compute reverse completion order of depth first search of the CFG
389 so that the data-flow runs faster. */
390 rc_order = XNEWVEC (int, n_basic_blocks - NUM_FIXED_BLOCKS);
391 bb_order = XNEWVEC (int, last_basic_block);
392 pre_and_rev_post_order_compute (NULL, rc_order, false);
393 for (i = 0; i < n_basic_blocks - NUM_FIXED_BLOCKS; i++)
394 bb_order[rc_order[i]] = i;
395 free (rc_order);
396
397 worklist = fibheap_new ();
398 pending = fibheap_new ();
399 visited = sbitmap_alloc (last_basic_block);
400 in_worklist = sbitmap_alloc (last_basic_block);
401 in_pending = sbitmap_alloc (last_basic_block);
402 sbitmap_zero (in_worklist);
403
404 /* Don't check outgoing edges of entry point. */
405 sbitmap_ones (in_pending);
406 FOR_EACH_BB (bb)
407 if (BLOCK_INFO (bb)->processed)
408 RESET_BIT (in_pending, bb->index);
409 else
410 {
411 move_or_delete_vzeroupper_1 (bb, false);
412 fibheap_insert (pending, bb_order[bb->index], bb);
413 }
414
415 if (dump_file)
416 fprintf (dump_file, "Check remaining basic blocks\n");
417
418 while (!fibheap_empty (pending))
419 {
420 fibheap_swap = pending;
421 pending = worklist;
422 worklist = fibheap_swap;
423 sbitmap_swap = in_pending;
424 in_pending = in_worklist;
425 in_worklist = sbitmap_swap;
426
427 sbitmap_zero (visited);
428
429 cfun->machine->rescan_vzeroupper_p = 0;
430
431 while (!fibheap_empty (worklist))
432 {
433 bb = (basic_block) fibheap_extract_min (worklist);
434 RESET_BIT (in_worklist, bb->index);
435 gcc_assert (!TEST_BIT (visited, bb->index));
436 if (!TEST_BIT (visited, bb->index))
437 {
438 edge_iterator ei;
439
440 SET_BIT (visited, bb->index);
441
442 if (move_or_delete_vzeroupper_1 (bb, false))
443 FOR_EACH_EDGE (e, ei, bb->succs)
444 {
445 if (e->dest == EXIT_BLOCK_PTR
446 || BLOCK_INFO (e->dest)->processed)
447 continue;
448
449 if (TEST_BIT (visited, e->dest->index))
450 {
451 if (!TEST_BIT (in_pending, e->dest->index))
452 {
453 /* Send E->DEST to next round. */
454 SET_BIT (in_pending, e->dest->index);
455 fibheap_insert (pending,
456 bb_order[e->dest->index],
457 e->dest);
458 }
459 }
460 else if (!TEST_BIT (in_worklist, e->dest->index))
461 {
462 /* Add E->DEST to current round. */
463 SET_BIT (in_worklist, e->dest->index);
464 fibheap_insert (worklist, bb_order[e->dest->index],
465 e->dest);
466 }
467 }
468 }
469 }
470
471 if (!cfun->machine->rescan_vzeroupper_p)
472 break;
473 }
474
475 free (bb_order);
476 fibheap_delete (worklist);
477 fibheap_delete (pending);
478 sbitmap_free (visited);
479 sbitmap_free (in_worklist);
480 sbitmap_free (in_pending);
481
482 if (dump_file)
483 fprintf (dump_file, "Process remaining basic blocks\n");
484
485 FOR_EACH_BB (bb)
486 move_or_delete_vzeroupper_1 (bb, true);
487
488 free_aux_for_blocks ();
489 }
490
491 static rtx legitimize_dllimport_symbol (rtx, bool);
492
493 #ifndef CHECK_STACK_LIMIT
494 #define CHECK_STACK_LIMIT (-1)
495 #endif
496
497 /* Return index of given mode in mult and division cost tables. */
498 #define MODE_INDEX(mode) \
499 ((mode) == QImode ? 0 \
500 : (mode) == HImode ? 1 \
501 : (mode) == SImode ? 2 \
502 : (mode) == DImode ? 3 \
503 : 4)
504
505 /* Processor costs (relative to an add) */
506 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
507 #define COSTS_N_BYTES(N) ((N) * 2)
508
509 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
510
511 const
512 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
513 COSTS_N_BYTES (2), /* cost of an add instruction */
514 COSTS_N_BYTES (3), /* cost of a lea instruction */
515 COSTS_N_BYTES (2), /* variable shift costs */
516 COSTS_N_BYTES (3), /* constant shift costs */
517 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
518 COSTS_N_BYTES (3), /* HI */
519 COSTS_N_BYTES (3), /* SI */
520 COSTS_N_BYTES (3), /* DI */
521 COSTS_N_BYTES (5)}, /* other */
522 0, /* cost of multiply per each bit set */
523 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
524 COSTS_N_BYTES (3), /* HI */
525 COSTS_N_BYTES (3), /* SI */
526 COSTS_N_BYTES (3), /* DI */
527 COSTS_N_BYTES (5)}, /* other */
528 COSTS_N_BYTES (3), /* cost of movsx */
529 COSTS_N_BYTES (3), /* cost of movzx */
530 0, /* "large" insn */
531 2, /* MOVE_RATIO */
532 2, /* cost for loading QImode using movzbl */
533 {2, 2, 2}, /* cost of loading integer registers
534 in QImode, HImode and SImode.
535 Relative to reg-reg move (2). */
536 {2, 2, 2}, /* cost of storing integer registers */
537 2, /* cost of reg,reg fld/fst */
538 {2, 2, 2}, /* cost of loading fp registers
539 in SFmode, DFmode and XFmode */
540 {2, 2, 2}, /* cost of storing fp registers
541 in SFmode, DFmode and XFmode */
542 3, /* cost of moving MMX register */
543 {3, 3}, /* cost of loading MMX registers
544 in SImode and DImode */
545 {3, 3}, /* cost of storing MMX registers
546 in SImode and DImode */
547 3, /* cost of moving SSE register */
548 {3, 3, 3}, /* cost of loading SSE registers
549 in SImode, DImode and TImode */
550 {3, 3, 3}, /* cost of storing SSE registers
551 in SImode, DImode and TImode */
552 3, /* MMX or SSE register to integer */
553 0, /* size of l1 cache */
554 0, /* size of l2 cache */
555 0, /* size of prefetch block */
556 0, /* number of parallel prefetches */
557 2, /* Branch cost */
558 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
559 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
560 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
561 COSTS_N_BYTES (2), /* cost of FABS instruction. */
562 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
563 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
564 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
565 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
566 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
567 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
568 1, /* scalar_stmt_cost. */
569 1, /* scalar load_cost. */
570 1, /* scalar_store_cost. */
571 1, /* vec_stmt_cost. */
572 1, /* vec_to_scalar_cost. */
573 1, /* scalar_to_vec_cost. */
574 1, /* vec_align_load_cost. */
575 1, /* vec_unalign_load_cost. */
576 1, /* vec_store_cost. */
577 1, /* cond_taken_branch_cost. */
578 1, /* cond_not_taken_branch_cost. */
579 };
580
581 /* Processor costs (relative to an add) */
582 static const
583 struct processor_costs i386_cost = { /* 386 specific costs */
584 COSTS_N_INSNS (1), /* cost of an add instruction */
585 COSTS_N_INSNS (1), /* cost of a lea instruction */
586 COSTS_N_INSNS (3), /* variable shift costs */
587 COSTS_N_INSNS (2), /* constant shift costs */
588 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
589 COSTS_N_INSNS (6), /* HI */
590 COSTS_N_INSNS (6), /* SI */
591 COSTS_N_INSNS (6), /* DI */
592 COSTS_N_INSNS (6)}, /* other */
593 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
594 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
595 COSTS_N_INSNS (23), /* HI */
596 COSTS_N_INSNS (23), /* SI */
597 COSTS_N_INSNS (23), /* DI */
598 COSTS_N_INSNS (23)}, /* other */
599 COSTS_N_INSNS (3), /* cost of movsx */
600 COSTS_N_INSNS (2), /* cost of movzx */
601 15, /* "large" insn */
602 3, /* MOVE_RATIO */
603 4, /* cost for loading QImode using movzbl */
604 {2, 4, 2}, /* cost of loading integer registers
605 in QImode, HImode and SImode.
606 Relative to reg-reg move (2). */
607 {2, 4, 2}, /* cost of storing integer registers */
608 2, /* cost of reg,reg fld/fst */
609 {8, 8, 8}, /* cost of loading fp registers
610 in SFmode, DFmode and XFmode */
611 {8, 8, 8}, /* cost of storing fp registers
612 in SFmode, DFmode and XFmode */
613 2, /* cost of moving MMX register */
614 {4, 8}, /* cost of loading MMX registers
615 in SImode and DImode */
616 {4, 8}, /* cost of storing MMX registers
617 in SImode and DImode */
618 2, /* cost of moving SSE register */
619 {4, 8, 16}, /* cost of loading SSE registers
620 in SImode, DImode and TImode */
621 {4, 8, 16}, /* cost of storing SSE registers
622 in SImode, DImode and TImode */
623 3, /* MMX or SSE register to integer */
624 0, /* size of l1 cache */
625 0, /* size of l2 cache */
626 0, /* size of prefetch block */
627 0, /* number of parallel prefetches */
628 1, /* Branch cost */
629 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
630 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
631 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
632 COSTS_N_INSNS (22), /* cost of FABS instruction. */
633 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
634 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
635 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
636 DUMMY_STRINGOP_ALGS},
637 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
638 DUMMY_STRINGOP_ALGS},
639 1, /* scalar_stmt_cost. */
640 1, /* scalar load_cost. */
641 1, /* scalar_store_cost. */
642 1, /* vec_stmt_cost. */
643 1, /* vec_to_scalar_cost. */
644 1, /* scalar_to_vec_cost. */
645 1, /* vec_align_load_cost. */
646 2, /* vec_unalign_load_cost. */
647 1, /* vec_store_cost. */
648 3, /* cond_taken_branch_cost. */
649 1, /* cond_not_taken_branch_cost. */
650 };
651
652 static const
653 struct processor_costs i486_cost = { /* 486 specific costs */
654 COSTS_N_INSNS (1), /* cost of an add instruction */
655 COSTS_N_INSNS (1), /* cost of a lea instruction */
656 COSTS_N_INSNS (3), /* variable shift costs */
657 COSTS_N_INSNS (2), /* constant shift costs */
658 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
659 COSTS_N_INSNS (12), /* HI */
660 COSTS_N_INSNS (12), /* SI */
661 COSTS_N_INSNS (12), /* DI */
662 COSTS_N_INSNS (12)}, /* other */
663 1, /* cost of multiply per each bit set */
664 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
665 COSTS_N_INSNS (40), /* HI */
666 COSTS_N_INSNS (40), /* SI */
667 COSTS_N_INSNS (40), /* DI */
668 COSTS_N_INSNS (40)}, /* other */
669 COSTS_N_INSNS (3), /* cost of movsx */
670 COSTS_N_INSNS (2), /* cost of movzx */
671 15, /* "large" insn */
672 3, /* MOVE_RATIO */
673 4, /* cost for loading QImode using movzbl */
674 {2, 4, 2}, /* cost of loading integer registers
675 in QImode, HImode and SImode.
676 Relative to reg-reg move (2). */
677 {2, 4, 2}, /* cost of storing integer registers */
678 2, /* cost of reg,reg fld/fst */
679 {8, 8, 8}, /* cost of loading fp registers
680 in SFmode, DFmode and XFmode */
681 {8, 8, 8}, /* cost of storing fp registers
682 in SFmode, DFmode and XFmode */
683 2, /* cost of moving MMX register */
684 {4, 8}, /* cost of loading MMX registers
685 in SImode and DImode */
686 {4, 8}, /* cost of storing MMX registers
687 in SImode and DImode */
688 2, /* cost of moving SSE register */
689 {4, 8, 16}, /* cost of loading SSE registers
690 in SImode, DImode and TImode */
691 {4, 8, 16}, /* cost of storing SSE registers
692 in SImode, DImode and TImode */
693 3, /* MMX or SSE register to integer */
694 4, /* size of l1 cache. 486 has 8kB cache
695 shared for code and data, so 4kB is
696 not really precise. */
697 4, /* size of l2 cache */
698 0, /* size of prefetch block */
699 0, /* number of parallel prefetches */
700 1, /* Branch cost */
701 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
702 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
703 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
704 COSTS_N_INSNS (3), /* cost of FABS instruction. */
705 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
706 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
707 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
708 DUMMY_STRINGOP_ALGS},
709 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
710 DUMMY_STRINGOP_ALGS},
711 1, /* scalar_stmt_cost. */
712 1, /* scalar load_cost. */
713 1, /* scalar_store_cost. */
714 1, /* vec_stmt_cost. */
715 1, /* vec_to_scalar_cost. */
716 1, /* scalar_to_vec_cost. */
717 1, /* vec_align_load_cost. */
718 2, /* vec_unalign_load_cost. */
719 1, /* vec_store_cost. */
720 3, /* cond_taken_branch_cost. */
721 1, /* cond_not_taken_branch_cost. */
722 };
723
724 static const
725 struct processor_costs pentium_cost = {
726 COSTS_N_INSNS (1), /* cost of an add instruction */
727 COSTS_N_INSNS (1), /* cost of a lea instruction */
728 COSTS_N_INSNS (4), /* variable shift costs */
729 COSTS_N_INSNS (1), /* constant shift costs */
730 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
731 COSTS_N_INSNS (11), /* HI */
732 COSTS_N_INSNS (11), /* SI */
733 COSTS_N_INSNS (11), /* DI */
734 COSTS_N_INSNS (11)}, /* other */
735 0, /* cost of multiply per each bit set */
736 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
737 COSTS_N_INSNS (25), /* HI */
738 COSTS_N_INSNS (25), /* SI */
739 COSTS_N_INSNS (25), /* DI */
740 COSTS_N_INSNS (25)}, /* other */
741 COSTS_N_INSNS (3), /* cost of movsx */
742 COSTS_N_INSNS (2), /* cost of movzx */
743 8, /* "large" insn */
744 6, /* MOVE_RATIO */
745 6, /* cost for loading QImode using movzbl */
746 {2, 4, 2}, /* cost of loading integer registers
747 in QImode, HImode and SImode.
748 Relative to reg-reg move (2). */
749 {2, 4, 2}, /* cost of storing integer registers */
750 2, /* cost of reg,reg fld/fst */
751 {2, 2, 6}, /* cost of loading fp registers
752 in SFmode, DFmode and XFmode */
753 {4, 4, 6}, /* cost of storing fp registers
754 in SFmode, DFmode and XFmode */
755 8, /* cost of moving MMX register */
756 {8, 8}, /* cost of loading MMX registers
757 in SImode and DImode */
758 {8, 8}, /* cost of storing MMX registers
759 in SImode and DImode */
760 2, /* cost of moving SSE register */
761 {4, 8, 16}, /* cost of loading SSE registers
762 in SImode, DImode and TImode */
763 {4, 8, 16}, /* cost of storing SSE registers
764 in SImode, DImode and TImode */
765 3, /* MMX or SSE register to integer */
766 8, /* size of l1 cache. */
767 8, /* size of l2 cache */
768 0, /* size of prefetch block */
769 0, /* number of parallel prefetches */
770 2, /* Branch cost */
771 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
772 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
773 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
774 COSTS_N_INSNS (1), /* cost of FABS instruction. */
775 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
776 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
777 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
778 DUMMY_STRINGOP_ALGS},
779 {{libcall, {{-1, rep_prefix_4_byte}}},
780 DUMMY_STRINGOP_ALGS},
781 1, /* scalar_stmt_cost. */
782 1, /* scalar load_cost. */
783 1, /* scalar_store_cost. */
784 1, /* vec_stmt_cost. */
785 1, /* vec_to_scalar_cost. */
786 1, /* scalar_to_vec_cost. */
787 1, /* vec_align_load_cost. */
788 2, /* vec_unalign_load_cost. */
789 1, /* vec_store_cost. */
790 3, /* cond_taken_branch_cost. */
791 1, /* cond_not_taken_branch_cost. */
792 };
793
794 static const
795 struct processor_costs pentiumpro_cost = {
796 COSTS_N_INSNS (1), /* cost of an add instruction */
797 COSTS_N_INSNS (1), /* cost of a lea instruction */
798 COSTS_N_INSNS (1), /* variable shift costs */
799 COSTS_N_INSNS (1), /* constant shift costs */
800 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
801 COSTS_N_INSNS (4), /* HI */
802 COSTS_N_INSNS (4), /* SI */
803 COSTS_N_INSNS (4), /* DI */
804 COSTS_N_INSNS (4)}, /* other */
805 0, /* cost of multiply per each bit set */
806 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
807 COSTS_N_INSNS (17), /* HI */
808 COSTS_N_INSNS (17), /* SI */
809 COSTS_N_INSNS (17), /* DI */
810 COSTS_N_INSNS (17)}, /* other */
811 COSTS_N_INSNS (1), /* cost of movsx */
812 COSTS_N_INSNS (1), /* cost of movzx */
813 8, /* "large" insn */
814 6, /* MOVE_RATIO */
815 2, /* cost for loading QImode using movzbl */
816 {4, 4, 4}, /* cost of loading integer registers
817 in QImode, HImode and SImode.
818 Relative to reg-reg move (2). */
819 {2, 2, 2}, /* cost of storing integer registers */
820 2, /* cost of reg,reg fld/fst */
821 {2, 2, 6}, /* cost of loading fp registers
822 in SFmode, DFmode and XFmode */
823 {4, 4, 6}, /* cost of storing fp registers
824 in SFmode, DFmode and XFmode */
825 2, /* cost of moving MMX register */
826 {2, 2}, /* cost of loading MMX registers
827 in SImode and DImode */
828 {2, 2}, /* cost of storing MMX registers
829 in SImode and DImode */
830 2, /* cost of moving SSE register */
831 {2, 2, 8}, /* cost of loading SSE registers
832 in SImode, DImode and TImode */
833 {2, 2, 8}, /* cost of storing SSE registers
834 in SImode, DImode and TImode */
835 3, /* MMX or SSE register to integer */
836 8, /* size of l1 cache. */
837 256, /* size of l2 cache */
838 32, /* size of prefetch block */
839 6, /* number of parallel prefetches */
840 2, /* Branch cost */
841 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
842 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
843 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
844 COSTS_N_INSNS (2), /* cost of FABS instruction. */
845 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
846 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
847 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
848 (we ensure the alignment). For small blocks inline loop is still a
849 noticeable win, for bigger blocks either rep movsl or rep movsb is
850 way to go. Rep movsb has apparently more expensive startup time in CPU,
851 but after 4K the difference is down in the noise. */
852 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
853 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
854 DUMMY_STRINGOP_ALGS},
855 {{rep_prefix_4_byte, {{1024, unrolled_loop},
856 {8192, rep_prefix_4_byte}, {-1, libcall}}},
857 DUMMY_STRINGOP_ALGS},
858 1, /* scalar_stmt_cost. */
859 1, /* scalar load_cost. */
860 1, /* scalar_store_cost. */
861 1, /* vec_stmt_cost. */
862 1, /* vec_to_scalar_cost. */
863 1, /* scalar_to_vec_cost. */
864 1, /* vec_align_load_cost. */
865 2, /* vec_unalign_load_cost. */
866 1, /* vec_store_cost. */
867 3, /* cond_taken_branch_cost. */
868 1, /* cond_not_taken_branch_cost. */
869 };
870
871 static const
872 struct processor_costs geode_cost = {
873 COSTS_N_INSNS (1), /* cost of an add instruction */
874 COSTS_N_INSNS (1), /* cost of a lea instruction */
875 COSTS_N_INSNS (2), /* variable shift costs */
876 COSTS_N_INSNS (1), /* constant shift costs */
877 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
878 COSTS_N_INSNS (4), /* HI */
879 COSTS_N_INSNS (7), /* SI */
880 COSTS_N_INSNS (7), /* DI */
881 COSTS_N_INSNS (7)}, /* other */
882 0, /* cost of multiply per each bit set */
883 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
884 COSTS_N_INSNS (23), /* HI */
885 COSTS_N_INSNS (39), /* SI */
886 COSTS_N_INSNS (39), /* DI */
887 COSTS_N_INSNS (39)}, /* other */
888 COSTS_N_INSNS (1), /* cost of movsx */
889 COSTS_N_INSNS (1), /* cost of movzx */
890 8, /* "large" insn */
891 4, /* MOVE_RATIO */
892 1, /* cost for loading QImode using movzbl */
893 {1, 1, 1}, /* cost of loading integer registers
894 in QImode, HImode and SImode.
895 Relative to reg-reg move (2). */
896 {1, 1, 1}, /* cost of storing integer registers */
897 1, /* cost of reg,reg fld/fst */
898 {1, 1, 1}, /* cost of loading fp registers
899 in SFmode, DFmode and XFmode */
900 {4, 6, 6}, /* cost of storing fp registers
901 in SFmode, DFmode and XFmode */
902
903 1, /* cost of moving MMX register */
904 {1, 1}, /* cost of loading MMX registers
905 in SImode and DImode */
906 {1, 1}, /* cost of storing MMX registers
907 in SImode and DImode */
908 1, /* cost of moving SSE register */
909 {1, 1, 1}, /* cost of loading SSE registers
910 in SImode, DImode and TImode */
911 {1, 1, 1}, /* cost of storing SSE registers
912 in SImode, DImode and TImode */
913 1, /* MMX or SSE register to integer */
914 64, /* size of l1 cache. */
915 128, /* size of l2 cache. */
916 32, /* size of prefetch block */
917 1, /* number of parallel prefetches */
918 1, /* Branch cost */
919 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
920 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
921 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
922 COSTS_N_INSNS (1), /* cost of FABS instruction. */
923 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
924 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
925 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
926 DUMMY_STRINGOP_ALGS},
927 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
928 DUMMY_STRINGOP_ALGS},
929 1, /* scalar_stmt_cost. */
930 1, /* scalar load_cost. */
931 1, /* scalar_store_cost. */
932 1, /* vec_stmt_cost. */
933 1, /* vec_to_scalar_cost. */
934 1, /* scalar_to_vec_cost. */
935 1, /* vec_align_load_cost. */
936 2, /* vec_unalign_load_cost. */
937 1, /* vec_store_cost. */
938 3, /* cond_taken_branch_cost. */
939 1, /* cond_not_taken_branch_cost. */
940 };
941
942 static const
943 struct processor_costs k6_cost = {
944 COSTS_N_INSNS (1), /* cost of an add instruction */
945 COSTS_N_INSNS (2), /* cost of a lea instruction */
946 COSTS_N_INSNS (1), /* variable shift costs */
947 COSTS_N_INSNS (1), /* constant shift costs */
948 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
949 COSTS_N_INSNS (3), /* HI */
950 COSTS_N_INSNS (3), /* SI */
951 COSTS_N_INSNS (3), /* DI */
952 COSTS_N_INSNS (3)}, /* other */
953 0, /* cost of multiply per each bit set */
954 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
955 COSTS_N_INSNS (18), /* HI */
956 COSTS_N_INSNS (18), /* SI */
957 COSTS_N_INSNS (18), /* DI */
958 COSTS_N_INSNS (18)}, /* other */
959 COSTS_N_INSNS (2), /* cost of movsx */
960 COSTS_N_INSNS (2), /* cost of movzx */
961 8, /* "large" insn */
962 4, /* MOVE_RATIO */
963 3, /* cost for loading QImode using movzbl */
964 {4, 5, 4}, /* cost of loading integer registers
965 in QImode, HImode and SImode.
966 Relative to reg-reg move (2). */
967 {2, 3, 2}, /* cost of storing integer registers */
968 4, /* cost of reg,reg fld/fst */
969 {6, 6, 6}, /* cost of loading fp registers
970 in SFmode, DFmode and XFmode */
971 {4, 4, 4}, /* cost of storing fp registers
972 in SFmode, DFmode and XFmode */
973 2, /* cost of moving MMX register */
974 {2, 2}, /* cost of loading MMX registers
975 in SImode and DImode */
976 {2, 2}, /* cost of storing MMX registers
977 in SImode and DImode */
978 2, /* cost of moving SSE register */
979 {2, 2, 8}, /* cost of loading SSE registers
980 in SImode, DImode and TImode */
981 {2, 2, 8}, /* cost of storing SSE registers
982 in SImode, DImode and TImode */
983 6, /* MMX or SSE register to integer */
984 32, /* size of l1 cache. */
985 32, /* size of l2 cache. Some models
986 have integrated l2 cache, but
987 optimizing for k6 is not important
988 enough to worry about that. */
989 32, /* size of prefetch block */
990 1, /* number of parallel prefetches */
991 1, /* Branch cost */
992 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
993 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
994 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
995 COSTS_N_INSNS (2), /* cost of FABS instruction. */
996 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
997 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
998 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
999 DUMMY_STRINGOP_ALGS},
1000 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
1001 DUMMY_STRINGOP_ALGS},
1002 1, /* scalar_stmt_cost. */
1003 1, /* scalar load_cost. */
1004 1, /* scalar_store_cost. */
1005 1, /* vec_stmt_cost. */
1006 1, /* vec_to_scalar_cost. */
1007 1, /* scalar_to_vec_cost. */
1008 1, /* vec_align_load_cost. */
1009 2, /* vec_unalign_load_cost. */
1010 1, /* vec_store_cost. */
1011 3, /* cond_taken_branch_cost. */
1012 1, /* cond_not_taken_branch_cost. */
1013 };
1014
1015 static const
1016 struct processor_costs athlon_cost = {
1017 COSTS_N_INSNS (1), /* cost of an add instruction */
1018 COSTS_N_INSNS (2), /* cost of a lea instruction */
1019 COSTS_N_INSNS (1), /* variable shift costs */
1020 COSTS_N_INSNS (1), /* constant shift costs */
1021 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
1022 COSTS_N_INSNS (5), /* HI */
1023 COSTS_N_INSNS (5), /* SI */
1024 COSTS_N_INSNS (5), /* DI */
1025 COSTS_N_INSNS (5)}, /* other */
1026 0, /* cost of multiply per each bit set */
1027 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1028 COSTS_N_INSNS (26), /* HI */
1029 COSTS_N_INSNS (42), /* SI */
1030 COSTS_N_INSNS (74), /* DI */
1031 COSTS_N_INSNS (74)}, /* other */
1032 COSTS_N_INSNS (1), /* cost of movsx */
1033 COSTS_N_INSNS (1), /* cost of movzx */
1034 8, /* "large" insn */
1035 9, /* MOVE_RATIO */
1036 4, /* cost for loading QImode using movzbl */
1037 {3, 4, 3}, /* cost of loading integer registers
1038 in QImode, HImode and SImode.
1039 Relative to reg-reg move (2). */
1040 {3, 4, 3}, /* cost of storing integer registers */
1041 4, /* cost of reg,reg fld/fst */
1042 {4, 4, 12}, /* cost of loading fp registers
1043 in SFmode, DFmode and XFmode */
1044 {6, 6, 8}, /* cost of storing fp registers
1045 in SFmode, DFmode and XFmode */
1046 2, /* cost of moving MMX register */
1047 {4, 4}, /* cost of loading MMX registers
1048 in SImode and DImode */
1049 {4, 4}, /* cost of storing MMX registers
1050 in SImode and DImode */
1051 2, /* cost of moving SSE register */
1052 {4, 4, 6}, /* cost of loading SSE registers
1053 in SImode, DImode and TImode */
1054 {4, 4, 5}, /* cost of storing SSE registers
1055 in SImode, DImode and TImode */
1056 5, /* MMX or SSE register to integer */
1057 64, /* size of l1 cache. */
1058 256, /* size of l2 cache. */
1059 64, /* size of prefetch block */
1060 6, /* number of parallel prefetches */
1061 5, /* Branch cost */
1062 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1063 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1064 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
1065 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1066 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1067 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1068 /* For some reason, Athlon deals better with REP prefix (relative to loops)
1069 compared to K8. Alignment becomes important after 8 bytes for memcpy and
1070 128 bytes for memset. */
1071 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
1072 DUMMY_STRINGOP_ALGS},
1073 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
1074 DUMMY_STRINGOP_ALGS},
1075 1, /* scalar_stmt_cost. */
1076 1, /* scalar load_cost. */
1077 1, /* scalar_store_cost. */
1078 1, /* vec_stmt_cost. */
1079 1, /* vec_to_scalar_cost. */
1080 1, /* scalar_to_vec_cost. */
1081 1, /* vec_align_load_cost. */
1082 2, /* vec_unalign_load_cost. */
1083 1, /* vec_store_cost. */
1084 3, /* cond_taken_branch_cost. */
1085 1, /* cond_not_taken_branch_cost. */
1086 };
1087
1088 static const
1089 struct processor_costs k8_cost = {
1090 COSTS_N_INSNS (1), /* cost of an add instruction */
1091 COSTS_N_INSNS (2), /* cost of a lea instruction */
1092 COSTS_N_INSNS (1), /* variable shift costs */
1093 COSTS_N_INSNS (1), /* constant shift costs */
1094 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1095 COSTS_N_INSNS (4), /* HI */
1096 COSTS_N_INSNS (3), /* SI */
1097 COSTS_N_INSNS (4), /* DI */
1098 COSTS_N_INSNS (5)}, /* other */
1099 0, /* cost of multiply per each bit set */
1100 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1101 COSTS_N_INSNS (26), /* HI */
1102 COSTS_N_INSNS (42), /* SI */
1103 COSTS_N_INSNS (74), /* DI */
1104 COSTS_N_INSNS (74)}, /* other */
1105 COSTS_N_INSNS (1), /* cost of movsx */
1106 COSTS_N_INSNS (1), /* cost of movzx */
1107 8, /* "large" insn */
1108 9, /* MOVE_RATIO */
1109 4, /* cost for loading QImode using movzbl */
1110 {3, 4, 3}, /* cost of loading integer registers
1111 in QImode, HImode and SImode.
1112 Relative to reg-reg move (2). */
1113 {3, 4, 3}, /* cost of storing integer registers */
1114 4, /* cost of reg,reg fld/fst */
1115 {4, 4, 12}, /* cost of loading fp registers
1116 in SFmode, DFmode and XFmode */
1117 {6, 6, 8}, /* cost of storing fp registers
1118 in SFmode, DFmode and XFmode */
1119 2, /* cost of moving MMX register */
1120 {3, 3}, /* cost of loading MMX registers
1121 in SImode and DImode */
1122 {4, 4}, /* cost of storing MMX registers
1123 in SImode and DImode */
1124 2, /* cost of moving SSE register */
1125 {4, 3, 6}, /* cost of loading SSE registers
1126 in SImode, DImode and TImode */
1127 {4, 4, 5}, /* cost of storing SSE registers
1128 in SImode, DImode and TImode */
1129 5, /* MMX or SSE register to integer */
1130 64, /* size of l1 cache. */
1131 512, /* size of l2 cache. */
1132 64, /* size of prefetch block */
1133 /* New AMD processors never drop prefetches; if they cannot be performed
1134 immediately, they are queued. We set number of simultaneous prefetches
1135 to a large constant to reflect this (it probably is not a good idea not
1136 to limit number of prefetches at all, as their execution also takes some
1137 time). */
1138 100, /* number of parallel prefetches */
1139 3, /* Branch cost */
1140 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1141 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1142 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1143 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1144 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1145 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1146 /* K8 has optimized REP instruction for medium sized blocks, but for very
1147 small blocks it is better to use loop. For large blocks, libcall can
1148 do nontemporary accesses and beat inline considerably. */
1149 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1150 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1151 {{libcall, {{8, loop}, {24, unrolled_loop},
1152 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1153 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1154 4, /* scalar_stmt_cost. */
1155 2, /* scalar load_cost. */
1156 2, /* scalar_store_cost. */
1157 5, /* vec_stmt_cost. */
1158 0, /* vec_to_scalar_cost. */
1159 2, /* scalar_to_vec_cost. */
1160 2, /* vec_align_load_cost. */
1161 3, /* vec_unalign_load_cost. */
1162 3, /* vec_store_cost. */
1163 3, /* cond_taken_branch_cost. */
1164 2, /* cond_not_taken_branch_cost. */
1165 };
1166
1167 struct processor_costs amdfam10_cost = {
1168 COSTS_N_INSNS (1), /* cost of an add instruction */
1169 COSTS_N_INSNS (2), /* cost of a lea instruction */
1170 COSTS_N_INSNS (1), /* variable shift costs */
1171 COSTS_N_INSNS (1), /* constant shift costs */
1172 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1173 COSTS_N_INSNS (4), /* HI */
1174 COSTS_N_INSNS (3), /* SI */
1175 COSTS_N_INSNS (4), /* DI */
1176 COSTS_N_INSNS (5)}, /* other */
1177 0, /* cost of multiply per each bit set */
1178 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1179 COSTS_N_INSNS (35), /* HI */
1180 COSTS_N_INSNS (51), /* SI */
1181 COSTS_N_INSNS (83), /* DI */
1182 COSTS_N_INSNS (83)}, /* other */
1183 COSTS_N_INSNS (1), /* cost of movsx */
1184 COSTS_N_INSNS (1), /* cost of movzx */
1185 8, /* "large" insn */
1186 9, /* MOVE_RATIO */
1187 4, /* cost for loading QImode using movzbl */
1188 {3, 4, 3}, /* cost of loading integer registers
1189 in QImode, HImode and SImode.
1190 Relative to reg-reg move (2). */
1191 {3, 4, 3}, /* cost of storing integer registers */
1192 4, /* cost of reg,reg fld/fst */
1193 {4, 4, 12}, /* cost of loading fp registers
1194 in SFmode, DFmode and XFmode */
1195 {6, 6, 8}, /* cost of storing fp registers
1196 in SFmode, DFmode and XFmode */
1197 2, /* cost of moving MMX register */
1198 {3, 3}, /* cost of loading MMX registers
1199 in SImode and DImode */
1200 {4, 4}, /* cost of storing MMX registers
1201 in SImode and DImode */
1202 2, /* cost of moving SSE register */
1203 {4, 4, 3}, /* cost of loading SSE registers
1204 in SImode, DImode and TImode */
1205 {4, 4, 5}, /* cost of storing SSE registers
1206 in SImode, DImode and TImode */
1207 3, /* MMX or SSE register to integer */
1208 /* On K8:
1209 MOVD reg64, xmmreg Double FSTORE 4
1210 MOVD reg32, xmmreg Double FSTORE 4
1211 On AMDFAM10:
1212 MOVD reg64, xmmreg Double FADD 3
1213 1/1 1/1
1214 MOVD reg32, xmmreg Double FADD 3
1215 1/1 1/1 */
1216 64, /* size of l1 cache. */
1217 512, /* size of l2 cache. */
1218 64, /* size of prefetch block */
1219 /* New AMD processors never drop prefetches; if they cannot be performed
1220 immediately, they are queued. We set number of simultaneous prefetches
1221 to a large constant to reflect this (it probably is not a good idea not
1222 to limit number of prefetches at all, as their execution also takes some
1223 time). */
1224 100, /* number of parallel prefetches */
1225 2, /* Branch cost */
1226 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1227 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1228 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1229 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1230 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1231 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1232
1233 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
1234 very small blocks it is better to use loop. For large blocks, libcall can
1235 do nontemporary accesses and beat inline considerably. */
1236 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1237 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1238 {{libcall, {{8, loop}, {24, unrolled_loop},
1239 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1240 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1241 4, /* scalar_stmt_cost. */
1242 2, /* scalar load_cost. */
1243 2, /* scalar_store_cost. */
1244 6, /* vec_stmt_cost. */
1245 0, /* vec_to_scalar_cost. */
1246 2, /* scalar_to_vec_cost. */
1247 2, /* vec_align_load_cost. */
1248 2, /* vec_unalign_load_cost. */
1249 2, /* vec_store_cost. */
1250 2, /* cond_taken_branch_cost. */
1251 1, /* cond_not_taken_branch_cost. */
1252 };
1253
1254 struct processor_costs bdver1_cost = {
1255 COSTS_N_INSNS (1), /* cost of an add instruction */
1256 COSTS_N_INSNS (1), /* cost of a lea instruction */
1257 COSTS_N_INSNS (1), /* variable shift costs */
1258 COSTS_N_INSNS (1), /* constant shift costs */
1259 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1260 COSTS_N_INSNS (4), /* HI */
1261 COSTS_N_INSNS (4), /* SI */
1262 COSTS_N_INSNS (6), /* DI */
1263 COSTS_N_INSNS (6)}, /* other */
1264 0, /* cost of multiply per each bit set */
1265 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1266 COSTS_N_INSNS (35), /* HI */
1267 COSTS_N_INSNS (51), /* SI */
1268 COSTS_N_INSNS (83), /* DI */
1269 COSTS_N_INSNS (83)}, /* other */
1270 COSTS_N_INSNS (1), /* cost of movsx */
1271 COSTS_N_INSNS (1), /* cost of movzx */
1272 8, /* "large" insn */
1273 9, /* MOVE_RATIO */
1274 4, /* cost for loading QImode using movzbl */
1275 {5, 5, 4}, /* cost of loading integer registers
1276 in QImode, HImode and SImode.
1277 Relative to reg-reg move (2). */
1278 {4, 4, 4}, /* cost of storing integer registers */
1279 2, /* cost of reg,reg fld/fst */
1280 {5, 5, 12}, /* cost of loading fp registers
1281 in SFmode, DFmode and XFmode */
1282 {4, 4, 8}, /* cost of storing fp registers
1283 in SFmode, DFmode and XFmode */
1284 2, /* cost of moving MMX register */
1285 {4, 4}, /* cost of loading MMX registers
1286 in SImode and DImode */
1287 {4, 4}, /* cost of storing MMX registers
1288 in SImode and DImode */
1289 2, /* cost of moving SSE register */
1290 {4, 4, 4}, /* cost of loading SSE registers
1291 in SImode, DImode and TImode */
1292 {4, 4, 4}, /* cost of storing SSE registers
1293 in SImode, DImode and TImode */
1294 2, /* MMX or SSE register to integer */
1295 /* On K8:
1296 MOVD reg64, xmmreg Double FSTORE 4
1297 MOVD reg32, xmmreg Double FSTORE 4
1298 On AMDFAM10:
1299 MOVD reg64, xmmreg Double FADD 3
1300 1/1 1/1
1301 MOVD reg32, xmmreg Double FADD 3
1302 1/1 1/1 */
1303 16, /* size of l1 cache. */
1304 2048, /* size of l2 cache. */
1305 64, /* size of prefetch block */
1306 /* New AMD processors never drop prefetches; if they cannot be performed
1307 immediately, they are queued. We set number of simultaneous prefetches
1308 to a large constant to reflect this (it probably is not a good idea not
1309 to limit number of prefetches at all, as their execution also takes some
1310 time). */
1311 100, /* number of parallel prefetches */
1312 2, /* Branch cost */
1313 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1314 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1315 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1316 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1317 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1318 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1319
1320 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
1321 very small blocks it is better to use loop. For large blocks, libcall
1322 can do nontemporary accesses and beat inline considerably. */
1323 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1324 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1325 {{libcall, {{8, loop}, {24, unrolled_loop},
1326 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1327 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1328 6, /* scalar_stmt_cost. */
1329 4, /* scalar load_cost. */
1330 4, /* scalar_store_cost. */
1331 6, /* vec_stmt_cost. */
1332 0, /* vec_to_scalar_cost. */
1333 2, /* scalar_to_vec_cost. */
1334 4, /* vec_align_load_cost. */
1335 4, /* vec_unalign_load_cost. */
1336 4, /* vec_store_cost. */
1337 2, /* cond_taken_branch_cost. */
1338 1, /* cond_not_taken_branch_cost. */
1339 };
1340
1341 struct processor_costs bdver2_cost = {
1342 COSTS_N_INSNS (1), /* cost of an add instruction */
1343 COSTS_N_INSNS (1), /* cost of a lea instruction */
1344 COSTS_N_INSNS (1), /* variable shift costs */
1345 COSTS_N_INSNS (1), /* constant shift costs */
1346 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1347 COSTS_N_INSNS (4), /* HI */
1348 COSTS_N_INSNS (4), /* SI */
1349 COSTS_N_INSNS (6), /* DI */
1350 COSTS_N_INSNS (6)}, /* other */
1351 0, /* cost of multiply per each bit set */
1352 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1353 COSTS_N_INSNS (35), /* HI */
1354 COSTS_N_INSNS (51), /* SI */
1355 COSTS_N_INSNS (83), /* DI */
1356 COSTS_N_INSNS (83)}, /* other */
1357 COSTS_N_INSNS (1), /* cost of movsx */
1358 COSTS_N_INSNS (1), /* cost of movzx */
1359 8, /* "large" insn */
1360 9, /* MOVE_RATIO */
1361 4, /* cost for loading QImode using movzbl */
1362 {5, 5, 4}, /* cost of loading integer registers
1363 in QImode, HImode and SImode.
1364 Relative to reg-reg move (2). */
1365 {4, 4, 4}, /* cost of storing integer registers */
1366 2, /* cost of reg,reg fld/fst */
1367 {5, 5, 12}, /* cost of loading fp registers
1368 in SFmode, DFmode and XFmode */
1369 {4, 4, 8}, /* cost of storing fp registers
1370 in SFmode, DFmode and XFmode */
1371 2, /* cost of moving MMX register */
1372 {4, 4}, /* cost of loading MMX registers
1373 in SImode and DImode */
1374 {4, 4}, /* cost of storing MMX registers
1375 in SImode and DImode */
1376 2, /* cost of moving SSE register */
1377 {4, 4, 4}, /* cost of loading SSE registers
1378 in SImode, DImode and TImode */
1379 {4, 4, 4}, /* cost of storing SSE registers
1380 in SImode, DImode and TImode */
1381 2, /* MMX or SSE register to integer */
1382 /* On K8:
1383 MOVD reg64, xmmreg Double FSTORE 4
1384 MOVD reg32, xmmreg Double FSTORE 4
1385 On AMDFAM10:
1386 MOVD reg64, xmmreg Double FADD 3
1387 1/1 1/1
1388 MOVD reg32, xmmreg Double FADD 3
1389 1/1 1/1 */
1390 16, /* size of l1 cache. */
1391 2048, /* size of l2 cache. */
1392 64, /* size of prefetch block */
1393 /* New AMD processors never drop prefetches; if they cannot be performed
1394 immediately, they are queued. We set number of simultaneous prefetches
1395 to a large constant to reflect this (it probably is not a good idea not
1396 to limit number of prefetches at all, as their execution also takes some
1397 time). */
1398 100, /* number of parallel prefetches */
1399 2, /* Branch cost */
1400 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1401 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1402 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1403 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1404 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1405 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1406
1407 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1408 very small blocks it is better to use loop. For large blocks, libcall
1409 can do nontemporary accesses and beat inline considerably. */
1410 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1411 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1412 {{libcall, {{8, loop}, {24, unrolled_loop},
1413 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1414 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1415 6, /* scalar_stmt_cost. */
1416 4, /* scalar load_cost. */
1417 4, /* scalar_store_cost. */
1418 6, /* vec_stmt_cost. */
1419 0, /* vec_to_scalar_cost. */
1420 2, /* scalar_to_vec_cost. */
1421 4, /* vec_align_load_cost. */
1422 4, /* vec_unalign_load_cost. */
1423 4, /* vec_store_cost. */
1424 2, /* cond_taken_branch_cost. */
1425 1, /* cond_not_taken_branch_cost. */
1426 };
1427
1428 struct processor_costs btver1_cost = {
1429 COSTS_N_INSNS (1), /* cost of an add instruction */
1430 COSTS_N_INSNS (2), /* cost of a lea instruction */
1431 COSTS_N_INSNS (1), /* variable shift costs */
1432 COSTS_N_INSNS (1), /* constant shift costs */
1433 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1434 COSTS_N_INSNS (4), /* HI */
1435 COSTS_N_INSNS (3), /* SI */
1436 COSTS_N_INSNS (4), /* DI */
1437 COSTS_N_INSNS (5)}, /* other */
1438 0, /* cost of multiply per each bit set */
1439 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1440 COSTS_N_INSNS (35), /* HI */
1441 COSTS_N_INSNS (51), /* SI */
1442 COSTS_N_INSNS (83), /* DI */
1443 COSTS_N_INSNS (83)}, /* other */
1444 COSTS_N_INSNS (1), /* cost of movsx */
1445 COSTS_N_INSNS (1), /* cost of movzx */
1446 8, /* "large" insn */
1447 9, /* MOVE_RATIO */
1448 4, /* cost for loading QImode using movzbl */
1449 {3, 4, 3}, /* cost of loading integer registers
1450 in QImode, HImode and SImode.
1451 Relative to reg-reg move (2). */
1452 {3, 4, 3}, /* cost of storing integer registers */
1453 4, /* cost of reg,reg fld/fst */
1454 {4, 4, 12}, /* cost of loading fp registers
1455 in SFmode, DFmode and XFmode */
1456 {6, 6, 8}, /* cost of storing fp registers
1457 in SFmode, DFmode and XFmode */
1458 2, /* cost of moving MMX register */
1459 {3, 3}, /* cost of loading MMX registers
1460 in SImode and DImode */
1461 {4, 4}, /* cost of storing MMX registers
1462 in SImode and DImode */
1463 2, /* cost of moving SSE register */
1464 {4, 4, 3}, /* cost of loading SSE registers
1465 in SImode, DImode and TImode */
1466 {4, 4, 5}, /* cost of storing SSE registers
1467 in SImode, DImode and TImode */
1468 3, /* MMX or SSE register to integer */
1469 /* On K8:
1470 MOVD reg64, xmmreg Double FSTORE 4
1471 MOVD reg32, xmmreg Double FSTORE 4
1472 On AMDFAM10:
1473 MOVD reg64, xmmreg Double FADD 3
1474 1/1 1/1
1475 MOVD reg32, xmmreg Double FADD 3
1476 1/1 1/1 */
1477 32, /* size of l1 cache. */
1478 512, /* size of l2 cache. */
1479 64, /* size of prefetch block */
1480 100, /* number of parallel prefetches */
1481 2, /* Branch cost */
1482 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1483 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1484 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1485 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1486 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1487 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1488
1489 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1490 very small blocks it is better to use loop. For large blocks, libcall can
1491 do nontemporary accesses and beat inline considerably. */
1492 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1493 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1494 {{libcall, {{8, loop}, {24, unrolled_loop},
1495 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1496 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1497 4, /* scalar_stmt_cost. */
1498 2, /* scalar load_cost. */
1499 2, /* scalar_store_cost. */
1500 6, /* vec_stmt_cost. */
1501 0, /* vec_to_scalar_cost. */
1502 2, /* scalar_to_vec_cost. */
1503 2, /* vec_align_load_cost. */
1504 2, /* vec_unalign_load_cost. */
1505 2, /* vec_store_cost. */
1506 2, /* cond_taken_branch_cost. */
1507 1, /* cond_not_taken_branch_cost. */
1508 };
1509
1510 static const
1511 struct processor_costs pentium4_cost = {
1512 COSTS_N_INSNS (1), /* cost of an add instruction */
1513 COSTS_N_INSNS (3), /* cost of a lea instruction */
1514 COSTS_N_INSNS (4), /* variable shift costs */
1515 COSTS_N_INSNS (4), /* constant shift costs */
1516 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1517 COSTS_N_INSNS (15), /* HI */
1518 COSTS_N_INSNS (15), /* SI */
1519 COSTS_N_INSNS (15), /* DI */
1520 COSTS_N_INSNS (15)}, /* other */
1521 0, /* cost of multiply per each bit set */
1522 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1523 COSTS_N_INSNS (56), /* HI */
1524 COSTS_N_INSNS (56), /* SI */
1525 COSTS_N_INSNS (56), /* DI */
1526 COSTS_N_INSNS (56)}, /* other */
1527 COSTS_N_INSNS (1), /* cost of movsx */
1528 COSTS_N_INSNS (1), /* cost of movzx */
1529 16, /* "large" insn */
1530 6, /* MOVE_RATIO */
1531 2, /* cost for loading QImode using movzbl */
1532 {4, 5, 4}, /* cost of loading integer registers
1533 in QImode, HImode and SImode.
1534 Relative to reg-reg move (2). */
1535 {2, 3, 2}, /* cost of storing integer registers */
1536 2, /* cost of reg,reg fld/fst */
1537 {2, 2, 6}, /* cost of loading fp registers
1538 in SFmode, DFmode and XFmode */
1539 {4, 4, 6}, /* cost of storing fp registers
1540 in SFmode, DFmode and XFmode */
1541 2, /* cost of moving MMX register */
1542 {2, 2}, /* cost of loading MMX registers
1543 in SImode and DImode */
1544 {2, 2}, /* cost of storing MMX registers
1545 in SImode and DImode */
1546 12, /* cost of moving SSE register */
1547 {12, 12, 12}, /* cost of loading SSE registers
1548 in SImode, DImode and TImode */
1549 {2, 2, 8}, /* cost of storing SSE registers
1550 in SImode, DImode and TImode */
1551 10, /* MMX or SSE register to integer */
1552 8, /* size of l1 cache. */
1553 256, /* size of l2 cache. */
1554 64, /* size of prefetch block */
1555 6, /* number of parallel prefetches */
1556 2, /* Branch cost */
1557 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1558 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1559 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1560 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1561 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1562 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1563 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1564 DUMMY_STRINGOP_ALGS},
1565 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1566 {-1, libcall}}},
1567 DUMMY_STRINGOP_ALGS},
1568 1, /* scalar_stmt_cost. */
1569 1, /* scalar load_cost. */
1570 1, /* scalar_store_cost. */
1571 1, /* vec_stmt_cost. */
1572 1, /* vec_to_scalar_cost. */
1573 1, /* scalar_to_vec_cost. */
1574 1, /* vec_align_load_cost. */
1575 2, /* vec_unalign_load_cost. */
1576 1, /* vec_store_cost. */
1577 3, /* cond_taken_branch_cost. */
1578 1, /* cond_not_taken_branch_cost. */
1579 };
1580
1581 static const
1582 struct processor_costs nocona_cost = {
1583 COSTS_N_INSNS (1), /* cost of an add instruction */
1584 COSTS_N_INSNS (1), /* cost of a lea instruction */
1585 COSTS_N_INSNS (1), /* variable shift costs */
1586 COSTS_N_INSNS (1), /* constant shift costs */
1587 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1588 COSTS_N_INSNS (10), /* HI */
1589 COSTS_N_INSNS (10), /* SI */
1590 COSTS_N_INSNS (10), /* DI */
1591 COSTS_N_INSNS (10)}, /* other */
1592 0, /* cost of multiply per each bit set */
1593 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1594 COSTS_N_INSNS (66), /* HI */
1595 COSTS_N_INSNS (66), /* SI */
1596 COSTS_N_INSNS (66), /* DI */
1597 COSTS_N_INSNS (66)}, /* other */
1598 COSTS_N_INSNS (1), /* cost of movsx */
1599 COSTS_N_INSNS (1), /* cost of movzx */
1600 16, /* "large" insn */
1601 17, /* MOVE_RATIO */
1602 4, /* cost for loading QImode using movzbl */
1603 {4, 4, 4}, /* cost of loading integer registers
1604 in QImode, HImode and SImode.
1605 Relative to reg-reg move (2). */
1606 {4, 4, 4}, /* cost of storing integer registers */
1607 3, /* cost of reg,reg fld/fst */
1608 {12, 12, 12}, /* cost of loading fp registers
1609 in SFmode, DFmode and XFmode */
1610 {4, 4, 4}, /* cost of storing fp registers
1611 in SFmode, DFmode and XFmode */
1612 6, /* cost of moving MMX register */
1613 {12, 12}, /* cost of loading MMX registers
1614 in SImode and DImode */
1615 {12, 12}, /* cost of storing MMX registers
1616 in SImode and DImode */
1617 6, /* cost of moving SSE register */
1618 {12, 12, 12}, /* cost of loading SSE registers
1619 in SImode, DImode and TImode */
1620 {12, 12, 12}, /* cost of storing SSE registers
1621 in SImode, DImode and TImode */
1622 8, /* MMX or SSE register to integer */
1623 8, /* size of l1 cache. */
1624 1024, /* size of l2 cache. */
1625 128, /* size of prefetch block */
1626 8, /* number of parallel prefetches */
1627 1, /* Branch cost */
1628 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1629 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1630 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1631 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1632 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1633 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1634 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1635 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
1636 {100000, unrolled_loop}, {-1, libcall}}}},
1637 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1638 {-1, libcall}}},
1639 {libcall, {{24, loop}, {64, unrolled_loop},
1640 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1641 1, /* scalar_stmt_cost. */
1642 1, /* scalar load_cost. */
1643 1, /* scalar_store_cost. */
1644 1, /* vec_stmt_cost. */
1645 1, /* vec_to_scalar_cost. */
1646 1, /* scalar_to_vec_cost. */
1647 1, /* vec_align_load_cost. */
1648 2, /* vec_unalign_load_cost. */
1649 1, /* vec_store_cost. */
1650 3, /* cond_taken_branch_cost. */
1651 1, /* cond_not_taken_branch_cost. */
1652 };
1653
1654 static const
1655 struct processor_costs atom_cost = {
1656 COSTS_N_INSNS (1), /* cost of an add instruction */
1657 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1658 COSTS_N_INSNS (1), /* variable shift costs */
1659 COSTS_N_INSNS (1), /* constant shift costs */
1660 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1661 COSTS_N_INSNS (4), /* HI */
1662 COSTS_N_INSNS (3), /* SI */
1663 COSTS_N_INSNS (4), /* DI */
1664 COSTS_N_INSNS (2)}, /* other */
1665 0, /* cost of multiply per each bit set */
1666 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1667 COSTS_N_INSNS (26), /* HI */
1668 COSTS_N_INSNS (42), /* SI */
1669 COSTS_N_INSNS (74), /* DI */
1670 COSTS_N_INSNS (74)}, /* other */
1671 COSTS_N_INSNS (1), /* cost of movsx */
1672 COSTS_N_INSNS (1), /* cost of movzx */
1673 8, /* "large" insn */
1674 17, /* MOVE_RATIO */
1675 2, /* cost for loading QImode using movzbl */
1676 {4, 4, 4}, /* cost of loading integer registers
1677 in QImode, HImode and SImode.
1678 Relative to reg-reg move (2). */
1679 {4, 4, 4}, /* cost of storing integer registers */
1680 4, /* cost of reg,reg fld/fst */
1681 {12, 12, 12}, /* cost of loading fp registers
1682 in SFmode, DFmode and XFmode */
1683 {6, 6, 8}, /* cost of storing fp registers
1684 in SFmode, DFmode and XFmode */
1685 2, /* cost of moving MMX register */
1686 {8, 8}, /* cost of loading MMX registers
1687 in SImode and DImode */
1688 {8, 8}, /* cost of storing MMX registers
1689 in SImode and DImode */
1690 2, /* cost of moving SSE register */
1691 {8, 8, 8}, /* cost of loading SSE registers
1692 in SImode, DImode and TImode */
1693 {8, 8, 8}, /* cost of storing SSE registers
1694 in SImode, DImode and TImode */
1695 5, /* MMX or SSE register to integer */
1696 32, /* size of l1 cache. */
1697 256, /* size of l2 cache. */
1698 64, /* size of prefetch block */
1699 6, /* number of parallel prefetches */
1700 3, /* Branch cost */
1701 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1702 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1703 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1704 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1705 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1706 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1707 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1708 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1709 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1710 {{libcall, {{8, loop}, {15, unrolled_loop},
1711 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1712 {libcall, {{24, loop}, {32, unrolled_loop},
1713 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1714 1, /* scalar_stmt_cost. */
1715 1, /* scalar load_cost. */
1716 1, /* scalar_store_cost. */
1717 1, /* vec_stmt_cost. */
1718 1, /* vec_to_scalar_cost. */
1719 1, /* scalar_to_vec_cost. */
1720 1, /* vec_align_load_cost. */
1721 2, /* vec_unalign_load_cost. */
1722 1, /* vec_store_cost. */
1723 3, /* cond_taken_branch_cost. */
1724 1, /* cond_not_taken_branch_cost. */
1725 };
1726
1727 /* Generic64 should produce code tuned for Nocona and K8. */
1728 static const
1729 struct processor_costs generic64_cost = {
1730 COSTS_N_INSNS (1), /* cost of an add instruction */
1731 /* On all chips taken into consideration lea is 2 cycles and more. With
1732 this cost however our current implementation of synth_mult results in
1733 use of unnecessary temporary registers causing regression on several
1734 SPECfp benchmarks. */
1735 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1736 COSTS_N_INSNS (1), /* variable shift costs */
1737 COSTS_N_INSNS (1), /* constant shift costs */
1738 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1739 COSTS_N_INSNS (4), /* HI */
1740 COSTS_N_INSNS (3), /* SI */
1741 COSTS_N_INSNS (4), /* DI */
1742 COSTS_N_INSNS (2)}, /* other */
1743 0, /* cost of multiply per each bit set */
1744 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1745 COSTS_N_INSNS (26), /* HI */
1746 COSTS_N_INSNS (42), /* SI */
1747 COSTS_N_INSNS (74), /* DI */
1748 COSTS_N_INSNS (74)}, /* other */
1749 COSTS_N_INSNS (1), /* cost of movsx */
1750 COSTS_N_INSNS (1), /* cost of movzx */
1751 8, /* "large" insn */
1752 17, /* MOVE_RATIO */
1753 4, /* cost for loading QImode using movzbl */
1754 {4, 4, 4}, /* cost of loading integer registers
1755 in QImode, HImode and SImode.
1756 Relative to reg-reg move (2). */
1757 {4, 4, 4}, /* cost of storing integer registers */
1758 4, /* cost of reg,reg fld/fst */
1759 {12, 12, 12}, /* cost of loading fp registers
1760 in SFmode, DFmode and XFmode */
1761 {6, 6, 8}, /* cost of storing fp registers
1762 in SFmode, DFmode and XFmode */
1763 2, /* cost of moving MMX register */
1764 {8, 8}, /* cost of loading MMX registers
1765 in SImode and DImode */
1766 {8, 8}, /* cost of storing MMX registers
1767 in SImode and DImode */
1768 2, /* cost of moving SSE register */
1769 {8, 8, 8}, /* cost of loading SSE registers
1770 in SImode, DImode and TImode */
1771 {8, 8, 8}, /* cost of storing SSE registers
1772 in SImode, DImode and TImode */
1773 5, /* MMX or SSE register to integer */
1774 32, /* size of l1 cache. */
1775 512, /* size of l2 cache. */
1776 64, /* size of prefetch block */
1777 6, /* number of parallel prefetches */
1778 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1779 value is increased to perhaps more appropriate value of 5. */
1780 3, /* Branch cost */
1781 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1782 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1783 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1784 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1785 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1786 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1787 {DUMMY_STRINGOP_ALGS,
1788 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1789 {DUMMY_STRINGOP_ALGS,
1790 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1791 1, /* scalar_stmt_cost. */
1792 1, /* scalar load_cost. */
1793 1, /* scalar_store_cost. */
1794 1, /* vec_stmt_cost. */
1795 1, /* vec_to_scalar_cost. */
1796 1, /* scalar_to_vec_cost. */
1797 1, /* vec_align_load_cost. */
1798 2, /* vec_unalign_load_cost. */
1799 1, /* vec_store_cost. */
1800 3, /* cond_taken_branch_cost. */
1801 1, /* cond_not_taken_branch_cost. */
1802 };
1803
1804 /* Generic32 should produce code tuned for PPro, Pentium4, Nocona,
1805 Athlon and K8. */
1806 static const
1807 struct processor_costs generic32_cost = {
1808 COSTS_N_INSNS (1), /* cost of an add instruction */
1809 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1810 COSTS_N_INSNS (1), /* variable shift costs */
1811 COSTS_N_INSNS (1), /* constant shift costs */
1812 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1813 COSTS_N_INSNS (4), /* HI */
1814 COSTS_N_INSNS (3), /* SI */
1815 COSTS_N_INSNS (4), /* DI */
1816 COSTS_N_INSNS (2)}, /* other */
1817 0, /* cost of multiply per each bit set */
1818 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1819 COSTS_N_INSNS (26), /* HI */
1820 COSTS_N_INSNS (42), /* SI */
1821 COSTS_N_INSNS (74), /* DI */
1822 COSTS_N_INSNS (74)}, /* other */
1823 COSTS_N_INSNS (1), /* cost of movsx */
1824 COSTS_N_INSNS (1), /* cost of movzx */
1825 8, /* "large" insn */
1826 17, /* MOVE_RATIO */
1827 4, /* cost for loading QImode using movzbl */
1828 {4, 4, 4}, /* cost of loading integer registers
1829 in QImode, HImode and SImode.
1830 Relative to reg-reg move (2). */
1831 {4, 4, 4}, /* cost of storing integer registers */
1832 4, /* cost of reg,reg fld/fst */
1833 {12, 12, 12}, /* cost of loading fp registers
1834 in SFmode, DFmode and XFmode */
1835 {6, 6, 8}, /* cost of storing fp registers
1836 in SFmode, DFmode and XFmode */
1837 2, /* cost of moving MMX register */
1838 {8, 8}, /* cost of loading MMX registers
1839 in SImode and DImode */
1840 {8, 8}, /* cost of storing MMX registers
1841 in SImode and DImode */
1842 2, /* cost of moving SSE register */
1843 {8, 8, 8}, /* cost of loading SSE registers
1844 in SImode, DImode and TImode */
1845 {8, 8, 8}, /* cost of storing SSE registers
1846 in SImode, DImode and TImode */
1847 5, /* MMX or SSE register to integer */
1848 32, /* size of l1 cache. */
1849 256, /* size of l2 cache. */
1850 64, /* size of prefetch block */
1851 6, /* number of parallel prefetches */
1852 3, /* Branch cost */
1853 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1854 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1855 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1856 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1857 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1858 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1859 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1860 DUMMY_STRINGOP_ALGS},
1861 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1862 DUMMY_STRINGOP_ALGS},
1863 1, /* scalar_stmt_cost. */
1864 1, /* scalar load_cost. */
1865 1, /* scalar_store_cost. */
1866 1, /* vec_stmt_cost. */
1867 1, /* vec_to_scalar_cost. */
1868 1, /* scalar_to_vec_cost. */
1869 1, /* vec_align_load_cost. */
1870 2, /* vec_unalign_load_cost. */
1871 1, /* vec_store_cost. */
1872 3, /* cond_taken_branch_cost. */
1873 1, /* cond_not_taken_branch_cost. */
1874 };
1875
1876 const struct processor_costs *ix86_cost = &pentium_cost;
1877
1878 /* Processor feature/optimization bitmasks. */
1879 #define m_386 (1<<PROCESSOR_I386)
1880 #define m_486 (1<<PROCESSOR_I486)
1881 #define m_PENT (1<<PROCESSOR_PENTIUM)
1882 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1883 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1884 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1885 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
1886 #define m_CORE2_32 (1<<PROCESSOR_CORE2_32)
1887 #define m_CORE2_64 (1<<PROCESSOR_CORE2_64)
1888 #define m_COREI7_32 (1<<PROCESSOR_COREI7_32)
1889 #define m_COREI7_64 (1<<PROCESSOR_COREI7_64)
1890 #define m_COREI7 (m_COREI7_32 | m_COREI7_64)
1891 #define m_CORE2I7_32 (m_CORE2_32 | m_COREI7_32)
1892 #define m_CORE2I7_64 (m_CORE2_64 | m_COREI7_64)
1893 #define m_CORE2I7 (m_CORE2I7_32 | m_CORE2I7_64)
1894 #define m_ATOM (1<<PROCESSOR_ATOM)
1895
1896 #define m_GEODE (1<<PROCESSOR_GEODE)
1897 #define m_K6 (1<<PROCESSOR_K6)
1898 #define m_K6_GEODE (m_K6 | m_GEODE)
1899 #define m_K8 (1<<PROCESSOR_K8)
1900 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1901 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1902 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1903 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
1904 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
1905 #define m_BDVER (m_BDVER1 | m_BDVER2)
1906 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
1907 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1)
1908
1909 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1910 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1911
1912 /* Generic instruction choice should be common subset of supported CPUs
1913 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1914 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1915
1916 /* Feature tests against the various tunings. */
1917 unsigned char ix86_tune_features[X86_TUNE_LAST];
1918
1919 /* Feature tests against the various tunings used to create ix86_tune_features
1920 based on the processor mask. */
1921 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1922 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1923 negatively, so enabling for Generic64 seems like good code size
1924 tradeoff. We can't enable it for 32bit generic because it does not
1925 work well with PPro base chips. */
1926 m_386 | m_CORE2I7_64 | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC64,
1927
1928 /* X86_TUNE_PUSH_MEMORY */
1929 m_386 | m_P4_NOCONA | m_CORE2I7 | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
1930
1931 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1932 m_486 | m_PENT,
1933
1934 /* X86_TUNE_UNROLL_STRLEN */
1935 m_486 | m_PENT | m_PPRO | m_ATOM | m_CORE2I7 | m_K6 | m_AMD_MULTIPLE | m_GENERIC,
1936
1937 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1938 on simulation result. But after P4 was made, no performance benefit
1939 was observed with branch hints. It also increases the code size.
1940 As a result, icc never generates branch hints. */
1941 0,
1942
1943 /* X86_TUNE_DOUBLE_WITH_ADD */
1944 ~m_386,
1945
1946 /* X86_TUNE_USE_SAHF */
1947 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1 | m_GENERIC,
1948
1949 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1950 partial dependencies. */
1951 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_GEODE | m_AMD_MULTIPLE | m_GENERIC,
1952
1953 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1954 register stalls on Generic32 compilation setting as well. However
1955 in current implementation the partial register stalls are not eliminated
1956 very well - they can be introduced via subregs synthesized by combine
1957 and can happen in caller/callee saving sequences. Because this option
1958 pays back little on PPro based chips and is in conflict with partial reg
1959 dependencies used by Athlon/P4 based chips, it is better to leave it off
1960 for generic32 for now. */
1961 m_PPRO,
1962
1963 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1964 m_CORE2I7 | m_GENERIC,
1965
1966 /* X86_TUNE_USE_HIMODE_FIOP */
1967 m_386 | m_486 | m_K6_GEODE,
1968
1969 /* X86_TUNE_USE_SIMODE_FIOP */
1970 ~(m_PENT | m_PPRO | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC),
1971
1972 /* X86_TUNE_USE_MOV0 */
1973 m_K6,
1974
1975 /* X86_TUNE_USE_CLTD */
1976 ~(m_PENT | m_CORE2I7 | m_ATOM | m_K6 | m_GENERIC),
1977
1978 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1979 m_PENT4,
1980
1981 /* X86_TUNE_SPLIT_LONG_MOVES */
1982 m_PPRO,
1983
1984 /* X86_TUNE_READ_MODIFY_WRITE */
1985 ~m_PENT,
1986
1987 /* X86_TUNE_READ_MODIFY */
1988 ~(m_PENT | m_PPRO),
1989
1990 /* X86_TUNE_PROMOTE_QIMODE */
1991 m_386 | m_486 | m_PENT | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
1992
1993 /* X86_TUNE_FAST_PREFIX */
1994 ~(m_386 | m_486 | m_PENT),
1995
1996 /* X86_TUNE_SINGLE_STRINGOP */
1997 m_386 | m_P4_NOCONA,
1998
1999 /* X86_TUNE_QIMODE_MATH */
2000 ~0,
2001
2002 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
2003 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
2004 might be considered for Generic32 if our scheme for avoiding partial
2005 stalls was more effective. */
2006 ~m_PPRO,
2007
2008 /* X86_TUNE_PROMOTE_QI_REGS */
2009 0,
2010
2011 /* X86_TUNE_PROMOTE_HI_REGS */
2012 m_PPRO,
2013
2014 /* X86_TUNE_SINGLE_POP: Enable if single pop insn is preferred
2015 over esp addition. */
2016 m_386 | m_486 | m_PENT | m_PPRO,
2017
2018 /* X86_TUNE_DOUBLE_POP: Enable if double pop insn is preferred
2019 over esp addition. */
2020 m_PENT,
2021
2022 /* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred
2023 over esp subtraction. */
2024 m_386 | m_486 | m_PENT | m_K6_GEODE,
2025
2026 /* X86_TUNE_DOUBLE_PUSH. Enable if double push insn is preferred
2027 over esp subtraction. */
2028 m_PENT | m_K6_GEODE,
2029
2030 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
2031 for DFmode copies */
2032 ~(m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_GEODE | m_AMD_MULTIPLE | m_ATOM | m_GENERIC),
2033
2034 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
2035 m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
2036
2037 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
2038 conflict here in between PPro/Pentium4 based chips that thread 128bit
2039 SSE registers as single units versus K8 based chips that divide SSE
2040 registers to two 64bit halves. This knob promotes all store destinations
2041 to be 128bit to allow register renaming on 128bit SSE units, but usually
2042 results in one extra microop on 64bit SSE units. Experimental results
2043 shows that disabling this option on P4 brings over 20% SPECfp regression,
2044 while enabling it on K8 brings roughly 2.4% regression that can be partly
2045 masked by careful scheduling of moves. */
2046 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMDFAM10 | m_BDVER | m_GENERIC,
2047
2048 /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
2049 m_COREI7 | m_AMDFAM10 | m_BDVER | m_BTVER1,
2050
2051 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
2052 m_COREI7 | m_BDVER,
2053
2054 /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
2055 m_BDVER ,
2056
2057 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
2058 are resolved on SSE register parts instead of whole registers, so we may
2059 maintain just lower part of scalar values in proper format leaving the
2060 upper part undefined. */
2061 m_ATHLON_K8,
2062
2063 /* X86_TUNE_SSE_TYPELESS_STORES */
2064 m_AMD_MULTIPLE,
2065
2066 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
2067 m_PPRO | m_P4_NOCONA,
2068
2069 /* X86_TUNE_MEMORY_MISMATCH_STALL */
2070 m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
2071
2072 /* X86_TUNE_PROLOGUE_USING_MOVE */
2073 m_PPRO | m_CORE2I7 | m_ATOM | m_ATHLON_K8 | m_GENERIC,
2074
2075 /* X86_TUNE_EPILOGUE_USING_MOVE */
2076 m_PPRO | m_CORE2I7 | m_ATOM | m_ATHLON_K8 | m_GENERIC,
2077
2078 /* X86_TUNE_SHIFT1 */
2079 ~m_486,
2080
2081 /* X86_TUNE_USE_FFREEP */
2082 m_AMD_MULTIPLE,
2083
2084 /* X86_TUNE_INTER_UNIT_MOVES */
2085 ~(m_AMD_MULTIPLE | m_GENERIC),
2086
2087 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
2088 ~(m_AMDFAM10 | m_BDVER ),
2089
2090 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
2091 than 4 branch instructions in the 16 byte window. */
2092 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
2093
2094 /* X86_TUNE_SCHEDULE */
2095 m_PENT | m_PPRO | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
2096
2097 /* X86_TUNE_USE_BT */
2098 m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
2099
2100 /* X86_TUNE_USE_INCDEC */
2101 ~(m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_GENERIC),
2102
2103 /* X86_TUNE_PAD_RETURNS */
2104 m_CORE2I7 | m_AMD_MULTIPLE | m_GENERIC,
2105
2106 /* X86_TUNE_PAD_SHORT_FUNCTION: Pad short funtion. */
2107 m_ATOM,
2108
2109 /* X86_TUNE_EXT_80387_CONSTANTS */
2110 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_ATHLON_K8 | m_GENERIC,
2111
2112 /* X86_TUNE_SHORTEN_X87_SSE */
2113 ~m_K8,
2114
2115 /* X86_TUNE_AVOID_VECTOR_DECODE */
2116 m_CORE2I7_64 | m_K8 | m_GENERIC64,
2117
2118 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
2119 and SImode multiply, but 386 and 486 do HImode multiply faster. */
2120 ~(m_386 | m_486),
2121
2122 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
2123 vector path on AMD machines. */
2124 m_CORE2I7_64 | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1 | m_GENERIC64,
2125
2126 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
2127 machines. */
2128 m_CORE2I7_64 | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1 | m_GENERIC64,
2129
2130 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
2131 than a MOV. */
2132 m_PENT,
2133
2134 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
2135 but one byte longer. */
2136 m_PENT,
2137
2138 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
2139 operand that cannot be represented using a modRM byte. The XOR
2140 replacement is long decoded, so this split helps here as well. */
2141 m_K6,
2142
2143 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
2144 from FP to FP. */
2145 m_CORE2I7 | m_AMDFAM10 | m_GENERIC,
2146
2147 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
2148 from integer to FP. */
2149 m_AMDFAM10,
2150
2151 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
2152 with a subsequent conditional jump instruction into a single
2153 compare-and-branch uop. */
2154 m_BDVER,
2155
2156 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
2157 will impact LEA instruction selection. */
2158 m_ATOM,
2159
2160 /* X86_TUNE_VECTORIZE_DOUBLE: Enable double precision vector
2161 instructions. */
2162 ~m_ATOM,
2163
2164 /* X86_SOFTARE_PREFETCHING_BENEFICIAL: Enable software prefetching
2165 at -O3. For the moment, the prefetching seems badly tuned for Intel
2166 chips. */
2167 m_K6_GEODE | m_AMD_MULTIPLE,
2168
2169 /* X86_TUNE_AVX128_OPTIMAL: Enable 128-bit AVX instruction generation for
2170 the auto-vectorizer. */
2171 m_BDVER
2172 };
2173
2174 /* Feature tests against the various architecture variations. */
2175 unsigned char ix86_arch_features[X86_ARCH_LAST];
2176
2177 /* Feature tests against the various architecture variations, used to create
2178 ix86_arch_features based on the processor mask. */
2179 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2180 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
2181 ~(m_386 | m_486 | m_PENT | m_K6),
2182
2183 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2184 ~m_386,
2185
2186 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2187 ~(m_386 | m_486),
2188
2189 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2190 ~m_386,
2191
2192 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2193 ~m_386,
2194 };
2195
2196 static const unsigned int x86_accumulate_outgoing_args
2197 = m_PPRO | m_P4_NOCONA | m_ATOM | m_CORE2I7 | m_AMD_MULTIPLE | m_GENERIC;
2198
2199 static const unsigned int x86_arch_always_fancy_math_387
2200 = m_PENT | m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC;
2201
2202 static const unsigned int x86_avx256_split_unaligned_load
2203 = m_COREI7 | m_GENERIC;
2204
2205 static const unsigned int x86_avx256_split_unaligned_store
2206 = m_COREI7 | m_BDVER | m_GENERIC;
2207
2208 /* In case the average insn count for single function invocation is
2209 lower than this constant, emit fast (but longer) prologue and
2210 epilogue code. */
2211 #define FAST_PROLOGUE_INSN_COUNT 20
2212
2213 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2214 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2215 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2216 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2217
2218 /* Array of the smallest class containing reg number REGNO, indexed by
2219 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2220
2221 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2222 {
2223 /* ax, dx, cx, bx */
2224 AREG, DREG, CREG, BREG,
2225 /* si, di, bp, sp */
2226 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2227 /* FP registers */
2228 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2229 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2230 /* arg pointer */
2231 NON_Q_REGS,
2232 /* flags, fpsr, fpcr, frame */
2233 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2234 /* SSE registers */
2235 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2236 SSE_REGS, SSE_REGS,
2237 /* MMX registers */
2238 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2239 MMX_REGS, MMX_REGS,
2240 /* REX registers */
2241 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2242 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2243 /* SSE REX registers */
2244 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2245 SSE_REGS, SSE_REGS,
2246 };
2247
2248 /* The "default" register map used in 32bit mode. */
2249
2250 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2251 {
2252 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2253 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2254 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2255 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2256 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2257 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2258 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2259 };
2260
2261 /* The "default" register map used in 64bit mode. */
2262
2263 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2264 {
2265 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2266 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2267 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2268 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2269 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2270 8,9,10,11,12,13,14,15, /* extended integer registers */
2271 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2272 };
2273
2274 /* Define the register numbers to be used in Dwarf debugging information.
2275 The SVR4 reference port C compiler uses the following register numbers
2276 in its Dwarf output code:
2277 0 for %eax (gcc regno = 0)
2278 1 for %ecx (gcc regno = 2)
2279 2 for %edx (gcc regno = 1)
2280 3 for %ebx (gcc regno = 3)
2281 4 for %esp (gcc regno = 7)
2282 5 for %ebp (gcc regno = 6)
2283 6 for %esi (gcc regno = 4)
2284 7 for %edi (gcc regno = 5)
2285 The following three DWARF register numbers are never generated by
2286 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2287 believes these numbers have these meanings.
2288 8 for %eip (no gcc equivalent)
2289 9 for %eflags (gcc regno = 17)
2290 10 for %trapno (no gcc equivalent)
2291 It is not at all clear how we should number the FP stack registers
2292 for the x86 architecture. If the version of SDB on x86/svr4 were
2293 a bit less brain dead with respect to floating-point then we would
2294 have a precedent to follow with respect to DWARF register numbers
2295 for x86 FP registers, but the SDB on x86/svr4 is so completely
2296 broken with respect to FP registers that it is hardly worth thinking
2297 of it as something to strive for compatibility with.
2298 The version of x86/svr4 SDB I have at the moment does (partially)
2299 seem to believe that DWARF register number 11 is associated with
2300 the x86 register %st(0), but that's about all. Higher DWARF
2301 register numbers don't seem to be associated with anything in
2302 particular, and even for DWARF regno 11, SDB only seems to under-
2303 stand that it should say that a variable lives in %st(0) (when
2304 asked via an `=' command) if we said it was in DWARF regno 11,
2305 but SDB still prints garbage when asked for the value of the
2306 variable in question (via a `/' command).
2307 (Also note that the labels SDB prints for various FP stack regs
2308 when doing an `x' command are all wrong.)
2309 Note that these problems generally don't affect the native SVR4
2310 C compiler because it doesn't allow the use of -O with -g and
2311 because when it is *not* optimizing, it allocates a memory
2312 location for each floating-point variable, and the memory
2313 location is what gets described in the DWARF AT_location
2314 attribute for the variable in question.
2315 Regardless of the severe mental illness of the x86/svr4 SDB, we
2316 do something sensible here and we use the following DWARF
2317 register numbers. Note that these are all stack-top-relative
2318 numbers.
2319 11 for %st(0) (gcc regno = 8)
2320 12 for %st(1) (gcc regno = 9)
2321 13 for %st(2) (gcc regno = 10)
2322 14 for %st(3) (gcc regno = 11)
2323 15 for %st(4) (gcc regno = 12)
2324 16 for %st(5) (gcc regno = 13)
2325 17 for %st(6) (gcc regno = 14)
2326 18 for %st(7) (gcc regno = 15)
2327 */
2328 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2329 {
2330 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2331 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2332 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2333 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2334 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2335 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2336 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2337 };
2338
2339 /* Define parameter passing and return registers. */
2340
2341 static int const x86_64_int_parameter_registers[6] =
2342 {
2343 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2344 };
2345
2346 static int const x86_64_ms_abi_int_parameter_registers[4] =
2347 {
2348 CX_REG, DX_REG, R8_REG, R9_REG
2349 };
2350
2351 static int const x86_64_int_return_registers[4] =
2352 {
2353 AX_REG, DX_REG, DI_REG, SI_REG
2354 };
2355
2356 /* Define the structure for the machine field in struct function. */
2357
2358 struct GTY(()) stack_local_entry {
2359 unsigned short mode;
2360 unsigned short n;
2361 rtx rtl;
2362 struct stack_local_entry *next;
2363 };
2364
2365 /* Structure describing stack frame layout.
2366 Stack grows downward:
2367
2368 [arguments]
2369 <- ARG_POINTER
2370 saved pc
2371
2372 saved static chain if ix86_static_chain_on_stack
2373
2374 saved frame pointer if frame_pointer_needed
2375 <- HARD_FRAME_POINTER
2376 [saved regs]
2377 <- regs_save_offset
2378 [padding0]
2379
2380 [saved SSE regs]
2381 <- sse_regs_save_offset
2382 [padding1] |
2383 | <- FRAME_POINTER
2384 [va_arg registers] |
2385 |
2386 [frame] |
2387 |
2388 [padding2] | = to_allocate
2389 <- STACK_POINTER
2390 */
2391 struct ix86_frame
2392 {
2393 int nsseregs;
2394 int nregs;
2395 int va_arg_size;
2396 int red_zone_size;
2397 int outgoing_arguments_size;
2398 HOST_WIDE_INT frame;
2399
2400 /* The offsets relative to ARG_POINTER. */
2401 HOST_WIDE_INT frame_pointer_offset;
2402 HOST_WIDE_INT hard_frame_pointer_offset;
2403 HOST_WIDE_INT stack_pointer_offset;
2404 HOST_WIDE_INT hfp_save_offset;
2405 HOST_WIDE_INT reg_save_offset;
2406 HOST_WIDE_INT sse_reg_save_offset;
2407
2408 /* When save_regs_using_mov is set, emit prologue using
2409 move instead of push instructions. */
2410 bool save_regs_using_mov;
2411 };
2412
2413 /* Which cpu are we scheduling for. */
2414 enum attr_cpu ix86_schedule;
2415
2416 /* Which cpu are we optimizing for. */
2417 enum processor_type ix86_tune;
2418
2419 /* Which instruction set architecture to use. */
2420 enum processor_type ix86_arch;
2421
2422 /* true if sse prefetch instruction is not NOOP. */
2423 int x86_prefetch_sse;
2424
2425 /* -mstackrealign option */
2426 static const char ix86_force_align_arg_pointer_string[]
2427 = "force_align_arg_pointer";
2428
2429 static rtx (*ix86_gen_leave) (void);
2430 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2431 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2432 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2433 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2434 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2435 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2436 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2437 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2438 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2439
2440 /* Preferred alignment for stack boundary in bits. */
2441 unsigned int ix86_preferred_stack_boundary;
2442
2443 /* Alignment for incoming stack boundary in bits specified at
2444 command line. */
2445 static unsigned int ix86_user_incoming_stack_boundary;
2446
2447 /* Default alignment for incoming stack boundary in bits. */
2448 static unsigned int ix86_default_incoming_stack_boundary;
2449
2450 /* Alignment for incoming stack boundary in bits. */
2451 unsigned int ix86_incoming_stack_boundary;
2452
2453 /* Calling abi specific va_list type nodes. */
2454 static GTY(()) tree sysv_va_list_type_node;
2455 static GTY(()) tree ms_va_list_type_node;
2456
2457 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2458 char internal_label_prefix[16];
2459 int internal_label_prefix_len;
2460
2461 /* Fence to use after loop using movnt. */
2462 tree x86_mfence;
2463
2464 /* Register class used for passing given 64bit part of the argument.
2465 These represent classes as documented by the PS ABI, with the exception
2466 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2467 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2468
2469 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2470 whenever possible (upper half does contain padding). */
2471 enum x86_64_reg_class
2472 {
2473 X86_64_NO_CLASS,
2474 X86_64_INTEGER_CLASS,
2475 X86_64_INTEGERSI_CLASS,
2476 X86_64_SSE_CLASS,
2477 X86_64_SSESF_CLASS,
2478 X86_64_SSEDF_CLASS,
2479 X86_64_SSEUP_CLASS,
2480 X86_64_X87_CLASS,
2481 X86_64_X87UP_CLASS,
2482 X86_64_COMPLEX_X87_CLASS,
2483 X86_64_MEMORY_CLASS
2484 };
2485
2486 #define MAX_CLASSES 4
2487
2488 /* Table of constants used by fldpi, fldln2, etc.... */
2489 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2490 static bool ext_80387_constants_init = 0;
2491
2492 \f
2493 static struct machine_function * ix86_init_machine_status (void);
2494 static rtx ix86_function_value (const_tree, const_tree, bool);
2495 static bool ix86_function_value_regno_p (const unsigned int);
2496 static unsigned int ix86_function_arg_boundary (enum machine_mode,
2497 const_tree);
2498 static rtx ix86_static_chain (const_tree, bool);
2499 static int ix86_function_regparm (const_tree, const_tree);
2500 static void ix86_compute_frame_layout (struct ix86_frame *);
2501 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
2502 rtx, rtx, int);
2503 static void ix86_add_new_builtins (int);
2504 static rtx ix86_expand_vec_perm_builtin (tree);
2505 static tree ix86_canonical_va_list_type (tree);
2506 static void predict_jump (int);
2507 static unsigned int split_stack_prologue_scratch_regno (void);
2508 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2509
2510 enum ix86_function_specific_strings
2511 {
2512 IX86_FUNCTION_SPECIFIC_ARCH,
2513 IX86_FUNCTION_SPECIFIC_TUNE,
2514 IX86_FUNCTION_SPECIFIC_MAX
2515 };
2516
2517 static char *ix86_target_string (int, int, const char *, const char *,
2518 enum fpmath_unit, bool);
2519 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
2520 static void ix86_function_specific_save (struct cl_target_option *);
2521 static void ix86_function_specific_restore (struct cl_target_option *);
2522 static void ix86_function_specific_print (FILE *, int,
2523 struct cl_target_option *);
2524 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2525 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2526 struct gcc_options *);
2527 static bool ix86_can_inline_p (tree, tree);
2528 static void ix86_set_current_function (tree);
2529 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2530
2531 static enum calling_abi ix86_function_abi (const_tree);
2532
2533 \f
2534 #ifndef SUBTARGET32_DEFAULT_CPU
2535 #define SUBTARGET32_DEFAULT_CPU "i386"
2536 #endif
2537
2538 /* The svr4 ABI for the i386 says that records and unions are returned
2539 in memory. */
2540 #ifndef DEFAULT_PCC_STRUCT_RETURN
2541 #define DEFAULT_PCC_STRUCT_RETURN 1
2542 #endif
2543
2544 /* Whether -mtune= or -march= were specified */
2545 static int ix86_tune_defaulted;
2546 static int ix86_arch_specified;
2547
2548 /* Vectorization library interface and handlers. */
2549 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2550
2551 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2552 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2553
2554 /* Processor target table, indexed by processor number */
2555 struct ptt
2556 {
2557 const struct processor_costs *cost; /* Processor costs */
2558 const int align_loop; /* Default alignments. */
2559 const int align_loop_max_skip;
2560 const int align_jump;
2561 const int align_jump_max_skip;
2562 const int align_func;
2563 };
2564
2565 static const struct ptt processor_target_table[PROCESSOR_max] =
2566 {
2567 {&i386_cost, 4, 3, 4, 3, 4},
2568 {&i486_cost, 16, 15, 16, 15, 16},
2569 {&pentium_cost, 16, 7, 16, 7, 16},
2570 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2571 {&geode_cost, 0, 0, 0, 0, 0},
2572 {&k6_cost, 32, 7, 32, 7, 32},
2573 {&athlon_cost, 16, 7, 16, 7, 16},
2574 {&pentium4_cost, 0, 0, 0, 0, 0},
2575 {&k8_cost, 16, 7, 16, 7, 16},
2576 {&nocona_cost, 0, 0, 0, 0, 0},
2577 /* Core 2 32-bit. */
2578 {&generic32_cost, 16, 10, 16, 10, 16},
2579 /* Core 2 64-bit. */
2580 {&generic64_cost, 16, 10, 16, 10, 16},
2581 /* Core i7 32-bit. */
2582 {&generic32_cost, 16, 10, 16, 10, 16},
2583 /* Core i7 64-bit. */
2584 {&generic64_cost, 16, 10, 16, 10, 16},
2585 {&generic32_cost, 16, 7, 16, 7, 16},
2586 {&generic64_cost, 16, 10, 16, 10, 16},
2587 {&amdfam10_cost, 32, 24, 32, 7, 32},
2588 {&bdver1_cost, 32, 24, 32, 7, 32},
2589 {&bdver2_cost, 32, 24, 32, 7, 32},
2590 {&btver1_cost, 32, 24, 32, 7, 32},
2591 {&atom_cost, 16, 7, 16, 7, 16}
2592 };
2593
2594 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2595 {
2596 "generic",
2597 "i386",
2598 "i486",
2599 "pentium",
2600 "pentium-mmx",
2601 "pentiumpro",
2602 "pentium2",
2603 "pentium3",
2604 "pentium4",
2605 "pentium-m",
2606 "prescott",
2607 "nocona",
2608 "core2",
2609 "corei7",
2610 "atom",
2611 "geode",
2612 "k6",
2613 "k6-2",
2614 "k6-3",
2615 "athlon",
2616 "athlon-4",
2617 "k8",
2618 "amdfam10",
2619 "bdver1",
2620 "bdver2",
2621 "btver1"
2622 };
2623 \f
2624 /* Return true if a red-zone is in use. */
2625
2626 static inline bool
2627 ix86_using_red_zone (void)
2628 {
2629 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2630 }
2631 \f
2632 /* Return a string that documents the current -m options. The caller is
2633 responsible for freeing the string. */
2634
2635 static char *
2636 ix86_target_string (int isa, int flags, const char *arch, const char *tune,
2637 enum fpmath_unit fpmath, bool add_nl_p)
2638 {
2639 struct ix86_target_opts
2640 {
2641 const char *option; /* option string */
2642 int mask; /* isa mask options */
2643 };
2644
2645 /* This table is ordered so that options like -msse4.2 that imply
2646 preceding options while match those first. */
2647 static struct ix86_target_opts isa_opts[] =
2648 {
2649 { "-m64", OPTION_MASK_ISA_64BIT },
2650 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2651 { "-mfma", OPTION_MASK_ISA_FMA },
2652 { "-mxop", OPTION_MASK_ISA_XOP },
2653 { "-mlwp", OPTION_MASK_ISA_LWP },
2654 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2655 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2656 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2657 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2658 { "-msse3", OPTION_MASK_ISA_SSE3 },
2659 { "-msse2", OPTION_MASK_ISA_SSE2 },
2660 { "-msse", OPTION_MASK_ISA_SSE },
2661 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2662 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2663 { "-mmmx", OPTION_MASK_ISA_MMX },
2664 { "-mabm", OPTION_MASK_ISA_ABM },
2665 { "-mbmi", OPTION_MASK_ISA_BMI },
2666 { "-mtbm", OPTION_MASK_ISA_TBM },
2667 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2668 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2669 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2670 { "-maes", OPTION_MASK_ISA_AES },
2671 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2672 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2673 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2674 { "-mf16c", OPTION_MASK_ISA_F16C },
2675 };
2676
2677 /* Flag options. */
2678 static struct ix86_target_opts flag_opts[] =
2679 {
2680 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2681 { "-m80387", MASK_80387 },
2682 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2683 { "-malign-double", MASK_ALIGN_DOUBLE },
2684 { "-mcld", MASK_CLD },
2685 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2686 { "-mieee-fp", MASK_IEEE_FP },
2687 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2688 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2689 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2690 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2691 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2692 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2693 { "-mno-red-zone", MASK_NO_RED_ZONE },
2694 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2695 { "-mrecip", MASK_RECIP },
2696 { "-mrtd", MASK_RTD },
2697 { "-msseregparm", MASK_SSEREGPARM },
2698 { "-mstack-arg-probe", MASK_STACK_PROBE },
2699 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2700 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2701 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2702 { "-mvzeroupper", MASK_VZEROUPPER },
2703 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
2704 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
2705 { "-mprefer-avx128", MASK_PREFER_AVX128},
2706 };
2707
2708 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2709
2710 char isa_other[40];
2711 char target_other[40];
2712 unsigned num = 0;
2713 unsigned i, j;
2714 char *ret;
2715 char *ptr;
2716 size_t len;
2717 size_t line_len;
2718 size_t sep_len;
2719
2720 memset (opts, '\0', sizeof (opts));
2721
2722 /* Add -march= option. */
2723 if (arch)
2724 {
2725 opts[num][0] = "-march=";
2726 opts[num++][1] = arch;
2727 }
2728
2729 /* Add -mtune= option. */
2730 if (tune)
2731 {
2732 opts[num][0] = "-mtune=";
2733 opts[num++][1] = tune;
2734 }
2735
2736 /* Pick out the options in isa options. */
2737 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2738 {
2739 if ((isa & isa_opts[i].mask) != 0)
2740 {
2741 opts[num++][0] = isa_opts[i].option;
2742 isa &= ~ isa_opts[i].mask;
2743 }
2744 }
2745
2746 if (isa && add_nl_p)
2747 {
2748 opts[num++][0] = isa_other;
2749 sprintf (isa_other, "(other isa: %#x)", isa);
2750 }
2751
2752 /* Add flag options. */
2753 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2754 {
2755 if ((flags & flag_opts[i].mask) != 0)
2756 {
2757 opts[num++][0] = flag_opts[i].option;
2758 flags &= ~ flag_opts[i].mask;
2759 }
2760 }
2761
2762 if (flags && add_nl_p)
2763 {
2764 opts[num++][0] = target_other;
2765 sprintf (target_other, "(other flags: %#x)", flags);
2766 }
2767
2768 /* Add -fpmath= option. */
2769 if (fpmath)
2770 {
2771 opts[num][0] = "-mfpmath=";
2772 switch ((int) fpmath)
2773 {
2774 case FPMATH_387:
2775 opts[num++][1] = "387";
2776 break;
2777
2778 case FPMATH_SSE:
2779 opts[num++][1] = "sse";
2780 break;
2781
2782 case FPMATH_387 | FPMATH_SSE:
2783 opts[num++][1] = "sse+387";
2784 break;
2785
2786 default:
2787 gcc_unreachable ();
2788 }
2789 }
2790
2791 /* Any options? */
2792 if (num == 0)
2793 return NULL;
2794
2795 gcc_assert (num < ARRAY_SIZE (opts));
2796
2797 /* Size the string. */
2798 len = 0;
2799 sep_len = (add_nl_p) ? 3 : 1;
2800 for (i = 0; i < num; i++)
2801 {
2802 len += sep_len;
2803 for (j = 0; j < 2; j++)
2804 if (opts[i][j])
2805 len += strlen (opts[i][j]);
2806 }
2807
2808 /* Build the string. */
2809 ret = ptr = (char *) xmalloc (len);
2810 line_len = 0;
2811
2812 for (i = 0; i < num; i++)
2813 {
2814 size_t len2[2];
2815
2816 for (j = 0; j < 2; j++)
2817 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2818
2819 if (i != 0)
2820 {
2821 *ptr++ = ' ';
2822 line_len++;
2823
2824 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2825 {
2826 *ptr++ = '\\';
2827 *ptr++ = '\n';
2828 line_len = 0;
2829 }
2830 }
2831
2832 for (j = 0; j < 2; j++)
2833 if (opts[i][j])
2834 {
2835 memcpy (ptr, opts[i][j], len2[j]);
2836 ptr += len2[j];
2837 line_len += len2[j];
2838 }
2839 }
2840
2841 *ptr = '\0';
2842 gcc_assert (ret + len >= ptr);
2843
2844 return ret;
2845 }
2846
2847 /* Return true, if profiling code should be emitted before
2848 prologue. Otherwise it returns false.
2849 Note: For x86 with "hotfix" it is sorried. */
2850 static bool
2851 ix86_profile_before_prologue (void)
2852 {
2853 return flag_fentry != 0;
2854 }
2855
2856 /* Function that is callable from the debugger to print the current
2857 options. */
2858 void
2859 ix86_debug_options (void)
2860 {
2861 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2862 ix86_arch_string, ix86_tune_string,
2863 ix86_fpmath, true);
2864
2865 if (opts)
2866 {
2867 fprintf (stderr, "%s\n\n", opts);
2868 free (opts);
2869 }
2870 else
2871 fputs ("<no options>\n\n", stderr);
2872
2873 return;
2874 }
2875 \f
2876 /* Override various settings based on options. If MAIN_ARGS_P, the
2877 options are from the command line, otherwise they are from
2878 attributes. */
2879
2880 static void
2881 ix86_option_override_internal (bool main_args_p)
2882 {
2883 int i;
2884 unsigned int ix86_arch_mask, ix86_tune_mask;
2885 const bool ix86_tune_specified = (ix86_tune_string != NULL);
2886 const char *prefix;
2887 const char *suffix;
2888 const char *sw;
2889
2890 enum pta_flags
2891 {
2892 PTA_SSE = 1 << 0,
2893 PTA_SSE2 = 1 << 1,
2894 PTA_SSE3 = 1 << 2,
2895 PTA_MMX = 1 << 3,
2896 PTA_PREFETCH_SSE = 1 << 4,
2897 PTA_3DNOW = 1 << 5,
2898 PTA_3DNOW_A = 1 << 6,
2899 PTA_64BIT = 1 << 7,
2900 PTA_SSSE3 = 1 << 8,
2901 PTA_CX16 = 1 << 9,
2902 PTA_POPCNT = 1 << 10,
2903 PTA_ABM = 1 << 11,
2904 PTA_SSE4A = 1 << 12,
2905 PTA_NO_SAHF = 1 << 13,
2906 PTA_SSE4_1 = 1 << 14,
2907 PTA_SSE4_2 = 1 << 15,
2908 PTA_AES = 1 << 16,
2909 PTA_PCLMUL = 1 << 17,
2910 PTA_AVX = 1 << 18,
2911 PTA_FMA = 1 << 19,
2912 PTA_MOVBE = 1 << 20,
2913 PTA_FMA4 = 1 << 21,
2914 PTA_XOP = 1 << 22,
2915 PTA_LWP = 1 << 23,
2916 PTA_FSGSBASE = 1 << 24,
2917 PTA_RDRND = 1 << 25,
2918 PTA_F16C = 1 << 26,
2919 PTA_BMI = 1 << 27,
2920 PTA_TBM = 1 << 28
2921 /* if this reaches 32, need to widen struct pta flags below */
2922 };
2923
2924 static struct pta
2925 {
2926 const char *const name; /* processor name or nickname. */
2927 const enum processor_type processor;
2928 const enum attr_cpu schedule;
2929 const unsigned /*enum pta_flags*/ flags;
2930 }
2931 const processor_alias_table[] =
2932 {
2933 {"i386", PROCESSOR_I386, CPU_NONE, 0},
2934 {"i486", PROCESSOR_I486, CPU_NONE, 0},
2935 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2936 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2937 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
2938 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
2939 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2940 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2941 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
2942 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2943 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2944 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
2945 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2946 PTA_MMX | PTA_SSE},
2947 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2948 PTA_MMX | PTA_SSE},
2949 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2950 PTA_MMX | PTA_SSE | PTA_SSE2},
2951 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
2952 PTA_MMX |PTA_SSE | PTA_SSE2},
2953 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
2954 PTA_MMX | PTA_SSE | PTA_SSE2},
2955 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
2956 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2957 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
2958 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2959 | PTA_CX16 | PTA_NO_SAHF},
2960 {"core2", PROCESSOR_CORE2_64, CPU_CORE2,
2961 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2962 | PTA_SSSE3 | PTA_CX16},
2963 {"corei7", PROCESSOR_COREI7_64, CPU_COREI7,
2964 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2965 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_CX16},
2966 {"corei7-avx", PROCESSOR_COREI7_64, CPU_COREI7,
2967 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2968 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX
2969 | PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL},
2970 {"atom", PROCESSOR_ATOM, CPU_ATOM,
2971 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2972 | PTA_SSSE3 | PTA_CX16 | PTA_MOVBE},
2973 {"geode", PROCESSOR_GEODE, CPU_GEODE,
2974 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
2975 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
2976 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2977 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2978 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
2979 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2980 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
2981 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2982 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
2983 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2984 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
2985 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2986 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
2987 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2988 {"x86-64", PROCESSOR_K8, CPU_K8,
2989 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
2990 {"k8", PROCESSOR_K8, CPU_K8,
2991 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2992 | PTA_SSE2 | PTA_NO_SAHF},
2993 {"k8-sse3", PROCESSOR_K8, CPU_K8,
2994 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2995 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2996 {"opteron", PROCESSOR_K8, CPU_K8,
2997 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2998 | PTA_SSE2 | PTA_NO_SAHF},
2999 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3000 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3001 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3002 {"athlon64", PROCESSOR_K8, CPU_K8,
3003 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3004 | PTA_SSE2 | PTA_NO_SAHF},
3005 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3006 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3007 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3008 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3009 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3010 | PTA_SSE2 | PTA_NO_SAHF},
3011 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3012 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3013 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
3014 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3015 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3016 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
3017 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3018 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3019 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3020 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3021 | PTA_XOP | PTA_LWP},
3022 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3023 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3024 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3025 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3026 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3027 | PTA_FMA},
3028 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC64,
3029 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3030 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16},
3031 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
3032 0 /* flags are only used for -march switch. */ },
3033 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
3034 PTA_64BIT /* flags are only used for -march switch. */ },
3035 };
3036
3037 int const pta_size = ARRAY_SIZE (processor_alias_table);
3038
3039 /* Set up prefix/suffix so the error messages refer to either the command
3040 line argument, or the attribute(target). */
3041 if (main_args_p)
3042 {
3043 prefix = "-m";
3044 suffix = "";
3045 sw = "switch";
3046 }
3047 else
3048 {
3049 prefix = "option(\"";
3050 suffix = "\")";
3051 sw = "attribute";
3052 }
3053
3054 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3055 SUBTARGET_OVERRIDE_OPTIONS;
3056 #endif
3057
3058 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3059 SUBSUBTARGET_OVERRIDE_OPTIONS;
3060 #endif
3061
3062 if (TARGET_X32)
3063 ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3064
3065 /* -fPIC is the default for x86_64. */
3066 if (TARGET_MACHO && TARGET_64BIT)
3067 flag_pic = 2;
3068
3069 /* Need to check -mtune=generic first. */
3070 if (ix86_tune_string)
3071 {
3072 if (!strcmp (ix86_tune_string, "generic")
3073 || !strcmp (ix86_tune_string, "i686")
3074 /* As special support for cross compilers we read -mtune=native
3075 as -mtune=generic. With native compilers we won't see the
3076 -mtune=native, as it was changed by the driver. */
3077 || !strcmp (ix86_tune_string, "native"))
3078 {
3079 if (TARGET_64BIT)
3080 ix86_tune_string = "generic64";
3081 else
3082 ix86_tune_string = "generic32";
3083 }
3084 /* If this call is for setting the option attribute, allow the
3085 generic32/generic64 that was previously set. */
3086 else if (!main_args_p
3087 && (!strcmp (ix86_tune_string, "generic32")
3088 || !strcmp (ix86_tune_string, "generic64")))
3089 ;
3090 else if (!strncmp (ix86_tune_string, "generic", 7))
3091 error ("bad value (%s) for %stune=%s %s",
3092 ix86_tune_string, prefix, suffix, sw);
3093 else if (!strcmp (ix86_tune_string, "x86-64"))
3094 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3095 "%stune=k8%s or %stune=generic%s instead as appropriate",
3096 prefix, suffix, prefix, suffix, prefix, suffix);
3097 }
3098 else
3099 {
3100 if (ix86_arch_string)
3101 ix86_tune_string = ix86_arch_string;
3102 if (!ix86_tune_string)
3103 {
3104 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
3105 ix86_tune_defaulted = 1;
3106 }
3107
3108 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
3109 need to use a sensible tune option. */
3110 if (!strcmp (ix86_tune_string, "generic")
3111 || !strcmp (ix86_tune_string, "x86-64")
3112 || !strcmp (ix86_tune_string, "i686"))
3113 {
3114 if (TARGET_64BIT)
3115 ix86_tune_string = "generic64";
3116 else
3117 ix86_tune_string = "generic32";
3118 }
3119 }
3120
3121 if (ix86_stringop_alg == rep_prefix_8_byte && !TARGET_64BIT)
3122 {
3123 /* rep; movq isn't available in 32-bit code. */
3124 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3125 ix86_stringop_alg = no_stringop;
3126 }
3127
3128 if (!ix86_arch_string)
3129 ix86_arch_string = TARGET_64BIT ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3130 else
3131 ix86_arch_specified = 1;
3132
3133 if (!global_options_set.x_ix86_abi)
3134 ix86_abi = DEFAULT_ABI;
3135
3136 if (global_options_set.x_ix86_cmodel)
3137 {
3138 switch (ix86_cmodel)
3139 {
3140 case CM_SMALL:
3141 case CM_SMALL_PIC:
3142 if (flag_pic)
3143 ix86_cmodel = CM_SMALL_PIC;
3144 if (!TARGET_64BIT)
3145 error ("code model %qs not supported in the %s bit mode",
3146 "small", "32");
3147 break;
3148
3149 case CM_MEDIUM:
3150 case CM_MEDIUM_PIC:
3151 if (flag_pic)
3152 ix86_cmodel = CM_MEDIUM_PIC;
3153 if (!TARGET_64BIT)
3154 error ("code model %qs not supported in the %s bit mode",
3155 "medium", "32");
3156 else if (TARGET_X32)
3157 error ("code model %qs not supported in x32 mode",
3158 "medium");
3159 break;
3160
3161 case CM_LARGE:
3162 case CM_LARGE_PIC:
3163 if (flag_pic)
3164 ix86_cmodel = CM_LARGE_PIC;
3165 if (!TARGET_64BIT)
3166 error ("code model %qs not supported in the %s bit mode",
3167 "large", "32");
3168 else if (TARGET_X32)
3169 error ("code model %qs not supported in x32 mode",
3170 "medium");
3171 break;
3172
3173 case CM_32:
3174 if (flag_pic)
3175 error ("code model %s does not support PIC mode", "32");
3176 if (TARGET_64BIT)
3177 error ("code model %qs not supported in the %s bit mode",
3178 "32", "64");
3179 break;
3180
3181 case CM_KERNEL:
3182 if (flag_pic)
3183 {
3184 error ("code model %s does not support PIC mode", "kernel");
3185 ix86_cmodel = CM_32;
3186 }
3187 if (!TARGET_64BIT)
3188 error ("code model %qs not supported in the %s bit mode",
3189 "kernel", "32");
3190 break;
3191
3192 default:
3193 gcc_unreachable ();
3194 }
3195 }
3196 else
3197 {
3198 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3199 use of rip-relative addressing. This eliminates fixups that
3200 would otherwise be needed if this object is to be placed in a
3201 DLL, and is essentially just as efficient as direct addressing. */
3202 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
3203 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
3204 else if (TARGET_64BIT)
3205 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
3206 else
3207 ix86_cmodel = CM_32;
3208 }
3209 if (TARGET_MACHO && ix86_asm_dialect == ASM_INTEL)
3210 {
3211 error ("-masm=intel not supported in this configuration");
3212 ix86_asm_dialect = ASM_ATT;
3213 }
3214 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3215 sorry ("%i-bit mode not compiled in",
3216 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3217
3218 for (i = 0; i < pta_size; i++)
3219 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
3220 {
3221 ix86_schedule = processor_alias_table[i].schedule;
3222 ix86_arch = processor_alias_table[i].processor;
3223 /* Default cpu tuning to the architecture. */
3224 ix86_tune = ix86_arch;
3225
3226 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3227 error ("CPU you selected does not support x86-64 "
3228 "instruction set");
3229
3230 if (processor_alias_table[i].flags & PTA_MMX
3231 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3232 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3233 if (processor_alias_table[i].flags & PTA_3DNOW
3234 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3235 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3236 if (processor_alias_table[i].flags & PTA_3DNOW_A
3237 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3238 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3239 if (processor_alias_table[i].flags & PTA_SSE
3240 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3241 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3242 if (processor_alias_table[i].flags & PTA_SSE2
3243 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3244 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3245 if (processor_alias_table[i].flags & PTA_SSE3
3246 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3247 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3248 if (processor_alias_table[i].flags & PTA_SSSE3
3249 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3250 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3251 if (processor_alias_table[i].flags & PTA_SSE4_1
3252 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3253 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3254 if (processor_alias_table[i].flags & PTA_SSE4_2
3255 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3256 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3257 if (processor_alias_table[i].flags & PTA_AVX
3258 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3259 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3260 if (processor_alias_table[i].flags & PTA_FMA
3261 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3262 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3263 if (processor_alias_table[i].flags & PTA_SSE4A
3264 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3265 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3266 if (processor_alias_table[i].flags & PTA_FMA4
3267 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3268 ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3269 if (processor_alias_table[i].flags & PTA_XOP
3270 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3271 ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3272 if (processor_alias_table[i].flags & PTA_LWP
3273 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3274 ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3275 if (processor_alias_table[i].flags & PTA_ABM
3276 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3277 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3278 if (processor_alias_table[i].flags & PTA_BMI
3279 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3280 ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3281 if (processor_alias_table[i].flags & PTA_TBM
3282 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3283 ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3284 if (processor_alias_table[i].flags & PTA_CX16
3285 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3286 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3287 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3288 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3289 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3290 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
3291 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3292 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3293 if (processor_alias_table[i].flags & PTA_MOVBE
3294 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3295 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3296 if (processor_alias_table[i].flags & PTA_AES
3297 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3298 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3299 if (processor_alias_table[i].flags & PTA_PCLMUL
3300 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3301 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3302 if (processor_alias_table[i].flags & PTA_FSGSBASE
3303 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3304 ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3305 if (processor_alias_table[i].flags & PTA_RDRND
3306 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3307 ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3308 if (processor_alias_table[i].flags & PTA_F16C
3309 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3310 ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3311 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3312 x86_prefetch_sse = true;
3313
3314 break;
3315 }
3316
3317 if (!strcmp (ix86_arch_string, "generic"))
3318 error ("generic CPU can be used only for %stune=%s %s",
3319 prefix, suffix, sw);
3320 else if (!strncmp (ix86_arch_string, "generic", 7) || i == pta_size)
3321 error ("bad value (%s) for %sarch=%s %s",
3322 ix86_arch_string, prefix, suffix, sw);
3323
3324 ix86_arch_mask = 1u << ix86_arch;
3325 for (i = 0; i < X86_ARCH_LAST; ++i)
3326 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3327
3328 for (i = 0; i < pta_size; i++)
3329 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
3330 {
3331 ix86_schedule = processor_alias_table[i].schedule;
3332 ix86_tune = processor_alias_table[i].processor;
3333 if (TARGET_64BIT)
3334 {
3335 if (!(processor_alias_table[i].flags & PTA_64BIT))
3336 {
3337 if (ix86_tune_defaulted)
3338 {
3339 ix86_tune_string = "x86-64";
3340 for (i = 0; i < pta_size; i++)
3341 if (! strcmp (ix86_tune_string,
3342 processor_alias_table[i].name))
3343 break;
3344 ix86_schedule = processor_alias_table[i].schedule;
3345 ix86_tune = processor_alias_table[i].processor;
3346 }
3347 else
3348 error ("CPU you selected does not support x86-64 "
3349 "instruction set");
3350 }
3351 }
3352 else
3353 {
3354 /* Adjust tuning when compiling for 32-bit ABI. */
3355 switch (ix86_tune)
3356 {
3357 case PROCESSOR_GENERIC64:
3358 ix86_tune = PROCESSOR_GENERIC32;
3359 ix86_schedule = CPU_PENTIUMPRO;
3360 break;
3361
3362 case PROCESSOR_CORE2_64:
3363 ix86_tune = PROCESSOR_CORE2_32;
3364 break;
3365
3366 case PROCESSOR_COREI7_64:
3367 ix86_tune = PROCESSOR_COREI7_32;
3368 break;
3369
3370 default:
3371 break;
3372 }
3373 }
3374 /* Intel CPUs have always interpreted SSE prefetch instructions as
3375 NOPs; so, we can enable SSE prefetch instructions even when
3376 -mtune (rather than -march) points us to a processor that has them.
3377 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3378 higher processors. */
3379 if (TARGET_CMOVE
3380 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3381 x86_prefetch_sse = true;
3382 break;
3383 }
3384
3385 if (ix86_tune_specified && i == pta_size)
3386 error ("bad value (%s) for %stune=%s %s",
3387 ix86_tune_string, prefix, suffix, sw);
3388
3389 ix86_tune_mask = 1u << ix86_tune;
3390 for (i = 0; i < X86_TUNE_LAST; ++i)
3391 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3392
3393 #ifndef USE_IX86_FRAME_POINTER
3394 #define USE_IX86_FRAME_POINTER 0
3395 #endif
3396
3397 #ifndef USE_X86_64_FRAME_POINTER
3398 #define USE_X86_64_FRAME_POINTER 0
3399 #endif
3400
3401 /* Set the default values for switches whose default depends on TARGET_64BIT
3402 in case they weren't overwritten by command line options. */
3403 if (TARGET_64BIT)
3404 {
3405 if (optimize > 1 && !global_options_set.x_flag_zee)
3406 flag_zee = 1;
3407 if (optimize >= 1 && !global_options_set.x_flag_omit_frame_pointer)
3408 flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3409 if (flag_asynchronous_unwind_tables == 2)
3410 flag_unwind_tables = flag_asynchronous_unwind_tables = 1;
3411 if (flag_pcc_struct_return == 2)
3412 flag_pcc_struct_return = 0;
3413 }
3414 else
3415 {
3416 if (optimize >= 1 && !global_options_set.x_flag_omit_frame_pointer)
3417 flag_omit_frame_pointer = !(USE_IX86_FRAME_POINTER || optimize_size);
3418 if (flag_asynchronous_unwind_tables == 2)
3419 flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3420 if (flag_pcc_struct_return == 2)
3421 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3422 }
3423
3424 if (optimize_size)
3425 ix86_cost = &ix86_size_cost;
3426 else
3427 ix86_cost = processor_target_table[ix86_tune].cost;
3428
3429 /* Arrange to set up i386_stack_locals for all functions. */
3430 init_machine_status = ix86_init_machine_status;
3431
3432 /* Validate -mregparm= value. */
3433 if (global_options_set.x_ix86_regparm)
3434 {
3435 if (TARGET_64BIT)
3436 warning (0, "-mregparm is ignored in 64-bit mode");
3437 if (ix86_regparm > REGPARM_MAX)
3438 {
3439 error ("-mregparm=%d is not between 0 and %d",
3440 ix86_regparm, REGPARM_MAX);
3441 ix86_regparm = 0;
3442 }
3443 }
3444 if (TARGET_64BIT)
3445 ix86_regparm = REGPARM_MAX;
3446
3447 /* Default align_* from the processor table. */
3448 if (align_loops == 0)
3449 {
3450 align_loops = processor_target_table[ix86_tune].align_loop;
3451 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3452 }
3453 if (align_jumps == 0)
3454 {
3455 align_jumps = processor_target_table[ix86_tune].align_jump;
3456 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3457 }
3458 if (align_functions == 0)
3459 {
3460 align_functions = processor_target_table[ix86_tune].align_func;
3461 }
3462
3463 /* Provide default for -mbranch-cost= value. */
3464 if (!global_options_set.x_ix86_branch_cost)
3465 ix86_branch_cost = ix86_cost->branch_cost;
3466
3467 if (TARGET_64BIT)
3468 {
3469 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3470
3471 /* Enable by default the SSE and MMX builtins. Do allow the user to
3472 explicitly disable any of these. In particular, disabling SSE and
3473 MMX for kernel code is extremely useful. */
3474 if (!ix86_arch_specified)
3475 ix86_isa_flags
3476 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3477 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3478
3479 if (TARGET_RTD)
3480 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3481 }
3482 else
3483 {
3484 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3485
3486 if (!ix86_arch_specified)
3487 ix86_isa_flags
3488 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3489
3490 /* i386 ABI does not specify red zone. It still makes sense to use it
3491 when programmer takes care to stack from being destroyed. */
3492 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3493 target_flags |= MASK_NO_RED_ZONE;
3494 }
3495
3496 /* Keep nonleaf frame pointers. */
3497 if (flag_omit_frame_pointer)
3498 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3499 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3500 flag_omit_frame_pointer = 1;
3501
3502 /* If we're doing fast math, we don't care about comparison order
3503 wrt NaNs. This lets us use a shorter comparison sequence. */
3504 if (flag_finite_math_only)
3505 target_flags &= ~MASK_IEEE_FP;
3506
3507 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3508 since the insns won't need emulation. */
3509 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3510 target_flags &= ~MASK_NO_FANCY_MATH_387;
3511
3512 /* Likewise, if the target doesn't have a 387, or we've specified
3513 software floating point, don't use 387 inline intrinsics. */
3514 if (!TARGET_80387)
3515 target_flags |= MASK_NO_FANCY_MATH_387;
3516
3517 /* Turn on MMX builtins for -msse. */
3518 if (TARGET_SSE)
3519 {
3520 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3521 x86_prefetch_sse = true;
3522 }
3523
3524 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3525 if (TARGET_SSE4_2 || TARGET_ABM)
3526 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3527
3528 /* Validate -mpreferred-stack-boundary= value or default it to
3529 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3530 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3531 if (global_options_set.x_ix86_preferred_stack_boundary_arg)
3532 {
3533 int min = (TARGET_64BIT ? 4 : 2);
3534 int max = (TARGET_SEH ? 4 : 12);
3535
3536 if (ix86_preferred_stack_boundary_arg < min
3537 || ix86_preferred_stack_boundary_arg > max)
3538 {
3539 if (min == max)
3540 error ("-mpreferred-stack-boundary is not supported "
3541 "for this target");
3542 else
3543 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3544 ix86_preferred_stack_boundary_arg, min, max);
3545 }
3546 else
3547 ix86_preferred_stack_boundary
3548 = (1 << ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
3549 }
3550
3551 /* Set the default value for -mstackrealign. */
3552 if (ix86_force_align_arg_pointer == -1)
3553 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3554
3555 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3556
3557 /* Validate -mincoming-stack-boundary= value or default it to
3558 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3559 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3560 if (global_options_set.x_ix86_incoming_stack_boundary_arg)
3561 {
3562 if (ix86_incoming_stack_boundary_arg < (TARGET_64BIT ? 4 : 2)
3563 || ix86_incoming_stack_boundary_arg > 12)
3564 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3565 ix86_incoming_stack_boundary_arg, TARGET_64BIT ? 4 : 2);
3566 else
3567 {
3568 ix86_user_incoming_stack_boundary
3569 = (1 << ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
3570 ix86_incoming_stack_boundary
3571 = ix86_user_incoming_stack_boundary;
3572 }
3573 }
3574
3575 /* Accept -msseregparm only if at least SSE support is enabled. */
3576 if (TARGET_SSEREGPARM
3577 && ! TARGET_SSE)
3578 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3579
3580 if (global_options_set.x_ix86_fpmath)
3581 {
3582 if (ix86_fpmath & FPMATH_SSE)
3583 {
3584 if (!TARGET_SSE)
3585 {
3586 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3587 ix86_fpmath = FPMATH_387;
3588 }
3589 else if ((ix86_fpmath & FPMATH_387) && !TARGET_80387)
3590 {
3591 warning (0, "387 instruction set disabled, using SSE arithmetics");
3592 ix86_fpmath = FPMATH_SSE;
3593 }
3594 }
3595 }
3596 else
3597 ix86_fpmath = TARGET_FPMATH_DEFAULT;
3598
3599 /* If the i387 is disabled, then do not return values in it. */
3600 if (!TARGET_80387)
3601 target_flags &= ~MASK_FLOAT_RETURNS;
3602
3603 /* Use external vectorized library in vectorizing intrinsics. */
3604 if (global_options_set.x_ix86_veclibabi_type)
3605 switch (ix86_veclibabi_type)
3606 {
3607 case ix86_veclibabi_type_svml:
3608 ix86_veclib_handler = ix86_veclibabi_svml;
3609 break;
3610
3611 case ix86_veclibabi_type_acml:
3612 ix86_veclib_handler = ix86_veclibabi_acml;
3613 break;
3614
3615 default:
3616 gcc_unreachable ();
3617 }
3618
3619 if ((!USE_IX86_FRAME_POINTER
3620 || (x86_accumulate_outgoing_args & ix86_tune_mask))
3621 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3622 && !optimize_size)
3623 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3624
3625 /* ??? Unwind info is not correct around the CFG unless either a frame
3626 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3627 unwind info generation to be aware of the CFG and propagating states
3628 around edges. */
3629 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
3630 || flag_exceptions || flag_non_call_exceptions)
3631 && flag_omit_frame_pointer
3632 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3633 {
3634 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3635 warning (0, "unwind tables currently require either a frame pointer "
3636 "or %saccumulate-outgoing-args%s for correctness",
3637 prefix, suffix);
3638 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3639 }
3640
3641 /* If stack probes are required, the space used for large function
3642 arguments on the stack must also be probed, so enable
3643 -maccumulate-outgoing-args so this happens in the prologue. */
3644 if (TARGET_STACK_PROBE
3645 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3646 {
3647 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3648 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3649 "for correctness", prefix, suffix);
3650 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3651 }
3652
3653 /* For sane SSE instruction set generation we need fcomi instruction.
3654 It is safe to enable all CMOVE instructions. Also, RDRAND intrinsic
3655 expands to a sequence that includes conditional move. */
3656 if (TARGET_SSE || TARGET_RDRND)
3657 TARGET_CMOVE = 1;
3658
3659 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3660 {
3661 char *p;
3662 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
3663 p = strchr (internal_label_prefix, 'X');
3664 internal_label_prefix_len = p - internal_label_prefix;
3665 *p = '\0';
3666 }
3667
3668 /* When scheduling description is not available, disable scheduler pass
3669 so it won't slow down the compilation and make x87 code slower. */
3670 if (!TARGET_SCHEDULE)
3671 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
3672
3673 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3674 ix86_cost->simultaneous_prefetches,
3675 global_options.x_param_values,
3676 global_options_set.x_param_values);
3677 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, ix86_cost->prefetch_block,
3678 global_options.x_param_values,
3679 global_options_set.x_param_values);
3680 maybe_set_param_value (PARAM_L1_CACHE_SIZE, ix86_cost->l1_cache_size,
3681 global_options.x_param_values,
3682 global_options_set.x_param_values);
3683 maybe_set_param_value (PARAM_L2_CACHE_SIZE, ix86_cost->l2_cache_size,
3684 global_options.x_param_values,
3685 global_options_set.x_param_values);
3686
3687 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
3688 if (flag_prefetch_loop_arrays < 0
3689 && HAVE_prefetch
3690 && optimize >= 3
3691 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
3692 flag_prefetch_loop_arrays = 1;
3693
3694 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3695 can be optimized to ap = __builtin_next_arg (0). */
3696 if (!TARGET_64BIT && !flag_split_stack)
3697 targetm.expand_builtin_va_start = NULL;
3698
3699 if (TARGET_64BIT)
3700 {
3701 ix86_gen_leave = gen_leave_rex64;
3702 ix86_gen_add3 = gen_adddi3;
3703 ix86_gen_sub3 = gen_subdi3;
3704 ix86_gen_sub3_carry = gen_subdi3_carry;
3705 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
3706 ix86_gen_monitor = gen_sse3_monitor64;
3707 ix86_gen_andsp = gen_anddi3;
3708 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
3709 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
3710 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
3711 }
3712 else
3713 {
3714 ix86_gen_leave = gen_leave;
3715 ix86_gen_add3 = gen_addsi3;
3716 ix86_gen_sub3 = gen_subsi3;
3717 ix86_gen_sub3_carry = gen_subsi3_carry;
3718 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
3719 ix86_gen_monitor = gen_sse3_monitor;
3720 ix86_gen_andsp = gen_andsi3;
3721 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
3722 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
3723 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
3724 }
3725
3726 #ifdef USE_IX86_CLD
3727 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3728 if (!TARGET_64BIT)
3729 target_flags |= MASK_CLD & ~target_flags_explicit;
3730 #endif
3731
3732 if (!TARGET_64BIT && flag_pic)
3733 {
3734 if (flag_fentry > 0)
3735 sorry ("-mfentry isn%'t supported for 32-bit in combination "
3736 "with -fpic");
3737 flag_fentry = 0;
3738 }
3739 else if (TARGET_SEH)
3740 {
3741 if (flag_fentry == 0)
3742 sorry ("-mno-fentry isn%'t compatible with SEH");
3743 flag_fentry = 1;
3744 }
3745 else if (flag_fentry < 0)
3746 {
3747 #if defined(PROFILE_BEFORE_PROLOGUE)
3748 flag_fentry = 1;
3749 #else
3750 flag_fentry = 0;
3751 #endif
3752 }
3753
3754 if (TARGET_AVX)
3755 {
3756 /* When not optimize for size, enable vzeroupper optimization for
3757 TARGET_AVX with -fexpensive-optimizations and split 32-byte
3758 AVX unaligned load/store. */
3759 if (!optimize_size)
3760 {
3761 if (flag_expensive_optimizations
3762 && !(target_flags_explicit & MASK_VZEROUPPER))
3763 target_flags |= MASK_VZEROUPPER;
3764 if ((x86_avx256_split_unaligned_load & ix86_tune_mask)
3765 && !(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
3766 target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
3767 if ((x86_avx256_split_unaligned_store & ix86_tune_mask)
3768 && !(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_STORE))
3769 target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
3770 /* Enable 128-bit AVX instruction generation for the auto-vectorizer. */
3771 if (TARGET_AVX128_OPTIMAL && !(target_flags_explicit & MASK_PREFER_AVX128))
3772 target_flags |= MASK_PREFER_AVX128;
3773 }
3774 }
3775 else
3776 {
3777 /* Disable vzeroupper pass if TARGET_AVX is disabled. */
3778 target_flags &= ~MASK_VZEROUPPER;
3779 }
3780
3781 /* Save the initial options in case the user does function specific
3782 options. */
3783 if (main_args_p)
3784 target_option_default_node = target_option_current_node
3785 = build_target_option_node ();
3786 }
3787
3788 /* Return TRUE if VAL is passed in register with 256bit AVX modes. */
3789
3790 static bool
3791 function_pass_avx256_p (const_rtx val)
3792 {
3793 if (!val)
3794 return false;
3795
3796 if (REG_P (val) && VALID_AVX256_REG_MODE (GET_MODE (val)))
3797 return true;
3798
3799 if (GET_CODE (val) == PARALLEL)
3800 {
3801 int i;
3802 rtx r;
3803
3804 for (i = XVECLEN (val, 0) - 1; i >= 0; i--)
3805 {
3806 r = XVECEXP (val, 0, i);
3807 if (GET_CODE (r) == EXPR_LIST
3808 && XEXP (r, 0)
3809 && REG_P (XEXP (r, 0))
3810 && (GET_MODE (XEXP (r, 0)) == OImode
3811 || VALID_AVX256_REG_MODE (GET_MODE (XEXP (r, 0)))))
3812 return true;
3813 }
3814 }
3815
3816 return false;
3817 }
3818
3819 /* Implement the TARGET_OPTION_OVERRIDE hook. */
3820
3821 static void
3822 ix86_option_override (void)
3823 {
3824 ix86_option_override_internal (true);
3825 }
3826
3827 /* Update register usage after having seen the compiler flags. */
3828
3829 static void
3830 ix86_conditional_register_usage (void)
3831 {
3832 int i;
3833 unsigned int j;
3834
3835 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3836 {
3837 if (fixed_regs[i] > 1)
3838 fixed_regs[i] = (fixed_regs[i] == (TARGET_64BIT ? 3 : 2));
3839 if (call_used_regs[i] > 1)
3840 call_used_regs[i] = (call_used_regs[i] == (TARGET_64BIT ? 3 : 2));
3841 }
3842
3843 /* The PIC register, if it exists, is fixed. */
3844 j = PIC_OFFSET_TABLE_REGNUM;
3845 if (j != INVALID_REGNUM)
3846 fixed_regs[j] = call_used_regs[j] = 1;
3847
3848 /* The 64-bit MS_ABI changes the set of call-used registers. */
3849 if (TARGET_64BIT_MS_ABI)
3850 {
3851 call_used_regs[SI_REG] = 0;
3852 call_used_regs[DI_REG] = 0;
3853 call_used_regs[XMM6_REG] = 0;
3854 call_used_regs[XMM7_REG] = 0;
3855 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3856 call_used_regs[i] = 0;
3857 }
3858
3859 /* The default setting of CLOBBERED_REGS is for 32-bit; add in the
3860 other call-clobbered regs for 64-bit. */
3861 if (TARGET_64BIT)
3862 {
3863 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
3864
3865 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3866 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
3867 && call_used_regs[i])
3868 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
3869 }
3870
3871 /* If MMX is disabled, squash the registers. */
3872 if (! TARGET_MMX)
3873 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3874 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
3875 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3876
3877 /* If SSE is disabled, squash the registers. */
3878 if (! TARGET_SSE)
3879 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3880 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
3881 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3882
3883 /* If the FPU is disabled, squash the registers. */
3884 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
3885 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3886 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
3887 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3888
3889 /* If 32-bit, squash the 64-bit registers. */
3890 if (! TARGET_64BIT)
3891 {
3892 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
3893 reg_names[i] = "";
3894 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3895 reg_names[i] = "";
3896 }
3897 }
3898
3899 \f
3900 /* Save the current options */
3901
3902 static void
3903 ix86_function_specific_save (struct cl_target_option *ptr)
3904 {
3905 ptr->arch = ix86_arch;
3906 ptr->schedule = ix86_schedule;
3907 ptr->tune = ix86_tune;
3908 ptr->branch_cost = ix86_branch_cost;
3909 ptr->tune_defaulted = ix86_tune_defaulted;
3910 ptr->arch_specified = ix86_arch_specified;
3911 ptr->x_ix86_isa_flags_explicit = ix86_isa_flags_explicit;
3912 ptr->ix86_target_flags_explicit = target_flags_explicit;
3913
3914 /* The fields are char but the variables are not; make sure the
3915 values fit in the fields. */
3916 gcc_assert (ptr->arch == ix86_arch);
3917 gcc_assert (ptr->schedule == ix86_schedule);
3918 gcc_assert (ptr->tune == ix86_tune);
3919 gcc_assert (ptr->branch_cost == ix86_branch_cost);
3920 }
3921
3922 /* Restore the current options */
3923
3924 static void
3925 ix86_function_specific_restore (struct cl_target_option *ptr)
3926 {
3927 enum processor_type old_tune = ix86_tune;
3928 enum processor_type old_arch = ix86_arch;
3929 unsigned int ix86_arch_mask, ix86_tune_mask;
3930 int i;
3931
3932 ix86_arch = (enum processor_type) ptr->arch;
3933 ix86_schedule = (enum attr_cpu) ptr->schedule;
3934 ix86_tune = (enum processor_type) ptr->tune;
3935 ix86_branch_cost = ptr->branch_cost;
3936 ix86_tune_defaulted = ptr->tune_defaulted;
3937 ix86_arch_specified = ptr->arch_specified;
3938 ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
3939 target_flags_explicit = ptr->ix86_target_flags_explicit;
3940
3941 /* Recreate the arch feature tests if the arch changed */
3942 if (old_arch != ix86_arch)
3943 {
3944 ix86_arch_mask = 1u << ix86_arch;
3945 for (i = 0; i < X86_ARCH_LAST; ++i)
3946 ix86_arch_features[i]
3947 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3948 }
3949
3950 /* Recreate the tune optimization tests */
3951 if (old_tune != ix86_tune)
3952 {
3953 ix86_tune_mask = 1u << ix86_tune;
3954 for (i = 0; i < X86_TUNE_LAST; ++i)
3955 ix86_tune_features[i]
3956 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3957 }
3958 }
3959
3960 /* Print the current options */
3961
3962 static void
3963 ix86_function_specific_print (FILE *file, int indent,
3964 struct cl_target_option *ptr)
3965 {
3966 char *target_string
3967 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
3968 NULL, NULL, ptr->x_ix86_fpmath, false);
3969
3970 fprintf (file, "%*sarch = %d (%s)\n",
3971 indent, "",
3972 ptr->arch,
3973 ((ptr->arch < TARGET_CPU_DEFAULT_max)
3974 ? cpu_names[ptr->arch]
3975 : "<unknown>"));
3976
3977 fprintf (file, "%*stune = %d (%s)\n",
3978 indent, "",
3979 ptr->tune,
3980 ((ptr->tune < TARGET_CPU_DEFAULT_max)
3981 ? cpu_names[ptr->tune]
3982 : "<unknown>"));
3983
3984 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
3985
3986 if (target_string)
3987 {
3988 fprintf (file, "%*s%s\n", indent, "", target_string);
3989 free (target_string);
3990 }
3991 }
3992
3993 \f
3994 /* Inner function to process the attribute((target(...))), take an argument and
3995 set the current options from the argument. If we have a list, recursively go
3996 over the list. */
3997
3998 static bool
3999 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
4000 struct gcc_options *enum_opts_set)
4001 {
4002 char *next_optstr;
4003 bool ret = true;
4004
4005 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4006 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4007 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4008 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4009 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4010
4011 enum ix86_opt_type
4012 {
4013 ix86_opt_unknown,
4014 ix86_opt_yes,
4015 ix86_opt_no,
4016 ix86_opt_str,
4017 ix86_opt_enum,
4018 ix86_opt_isa
4019 };
4020
4021 static const struct
4022 {
4023 const char *string;
4024 size_t len;
4025 enum ix86_opt_type type;
4026 int opt;
4027 int mask;
4028 } attrs[] = {
4029 /* isa options */
4030 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4031 IX86_ATTR_ISA ("abm", OPT_mabm),
4032 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4033 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4034 IX86_ATTR_ISA ("aes", OPT_maes),
4035 IX86_ATTR_ISA ("avx", OPT_mavx),
4036 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4037 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4038 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4039 IX86_ATTR_ISA ("sse", OPT_msse),
4040 IX86_ATTR_ISA ("sse2", OPT_msse2),
4041 IX86_ATTR_ISA ("sse3", OPT_msse3),
4042 IX86_ATTR_ISA ("sse4", OPT_msse4),
4043 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4044 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4045 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4046 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4047 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4048 IX86_ATTR_ISA ("xop", OPT_mxop),
4049 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4050 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4051 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4052 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4053
4054 /* enum options */
4055 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
4056
4057 /* string options */
4058 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4059 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4060
4061 /* flag options */
4062 IX86_ATTR_YES ("cld",
4063 OPT_mcld,
4064 MASK_CLD),
4065
4066 IX86_ATTR_NO ("fancy-math-387",
4067 OPT_mfancy_math_387,
4068 MASK_NO_FANCY_MATH_387),
4069
4070 IX86_ATTR_YES ("ieee-fp",
4071 OPT_mieee_fp,
4072 MASK_IEEE_FP),
4073
4074 IX86_ATTR_YES ("inline-all-stringops",
4075 OPT_minline_all_stringops,
4076 MASK_INLINE_ALL_STRINGOPS),
4077
4078 IX86_ATTR_YES ("inline-stringops-dynamically",
4079 OPT_minline_stringops_dynamically,
4080 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4081
4082 IX86_ATTR_NO ("align-stringops",
4083 OPT_mno_align_stringops,
4084 MASK_NO_ALIGN_STRINGOPS),
4085
4086 IX86_ATTR_YES ("recip",
4087 OPT_mrecip,
4088 MASK_RECIP),
4089
4090 };
4091
4092 /* If this is a list, recurse to get the options. */
4093 if (TREE_CODE (args) == TREE_LIST)
4094 {
4095 bool ret = true;
4096
4097 for (; args; args = TREE_CHAIN (args))
4098 if (TREE_VALUE (args)
4099 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
4100 p_strings, enum_opts_set))
4101 ret = false;
4102
4103 return ret;
4104 }
4105
4106 else if (TREE_CODE (args) != STRING_CST)
4107 gcc_unreachable ();
4108
4109 /* Handle multiple arguments separated by commas. */
4110 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4111
4112 while (next_optstr && *next_optstr != '\0')
4113 {
4114 char *p = next_optstr;
4115 char *orig_p = p;
4116 char *comma = strchr (next_optstr, ',');
4117 const char *opt_string;
4118 size_t len, opt_len;
4119 int opt;
4120 bool opt_set_p;
4121 char ch;
4122 unsigned i;
4123 enum ix86_opt_type type = ix86_opt_unknown;
4124 int mask = 0;
4125
4126 if (comma)
4127 {
4128 *comma = '\0';
4129 len = comma - next_optstr;
4130 next_optstr = comma + 1;
4131 }
4132 else
4133 {
4134 len = strlen (p);
4135 next_optstr = NULL;
4136 }
4137
4138 /* Recognize no-xxx. */
4139 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4140 {
4141 opt_set_p = false;
4142 p += 3;
4143 len -= 3;
4144 }
4145 else
4146 opt_set_p = true;
4147
4148 /* Find the option. */
4149 ch = *p;
4150 opt = N_OPTS;
4151 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4152 {
4153 type = attrs[i].type;
4154 opt_len = attrs[i].len;
4155 if (ch == attrs[i].string[0]
4156 && ((type != ix86_opt_str && type != ix86_opt_enum)
4157 ? len == opt_len
4158 : len > opt_len)
4159 && memcmp (p, attrs[i].string, opt_len) == 0)
4160 {
4161 opt = attrs[i].opt;
4162 mask = attrs[i].mask;
4163 opt_string = attrs[i].string;
4164 break;
4165 }
4166 }
4167
4168 /* Process the option. */
4169 if (opt == N_OPTS)
4170 {
4171 error ("attribute(target(\"%s\")) is unknown", orig_p);
4172 ret = false;
4173 }
4174
4175 else if (type == ix86_opt_isa)
4176 {
4177 struct cl_decoded_option decoded;
4178
4179 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
4180 ix86_handle_option (&global_options, &global_options_set,
4181 &decoded, input_location);
4182 }
4183
4184 else if (type == ix86_opt_yes || type == ix86_opt_no)
4185 {
4186 if (type == ix86_opt_no)
4187 opt_set_p = !opt_set_p;
4188
4189 if (opt_set_p)
4190 target_flags |= mask;
4191 else
4192 target_flags &= ~mask;
4193 }
4194
4195 else if (type == ix86_opt_str)
4196 {
4197 if (p_strings[opt])
4198 {
4199 error ("option(\"%s\") was already specified", opt_string);
4200 ret = false;
4201 }
4202 else
4203 p_strings[opt] = xstrdup (p + opt_len);
4204 }
4205
4206 else if (type == ix86_opt_enum)
4207 {
4208 bool arg_ok;
4209 int value;
4210
4211 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
4212 if (arg_ok)
4213 set_option (&global_options, enum_opts_set, opt, value,
4214 p + opt_len, DK_UNSPECIFIED, input_location,
4215 global_dc);
4216 else
4217 {
4218 error ("attribute(target(\"%s\")) is unknown", orig_p);
4219 ret = false;
4220 }
4221 }
4222
4223 else
4224 gcc_unreachable ();
4225 }
4226
4227 return ret;
4228 }
4229
4230 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4231
4232 tree
4233 ix86_valid_target_attribute_tree (tree args)
4234 {
4235 const char *orig_arch_string = ix86_arch_string;
4236 const char *orig_tune_string = ix86_tune_string;
4237 enum fpmath_unit orig_fpmath_set = global_options_set.x_ix86_fpmath;
4238 int orig_tune_defaulted = ix86_tune_defaulted;
4239 int orig_arch_specified = ix86_arch_specified;
4240 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
4241 tree t = NULL_TREE;
4242 int i;
4243 struct cl_target_option *def
4244 = TREE_TARGET_OPTION (target_option_default_node);
4245 struct gcc_options enum_opts_set;
4246
4247 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
4248
4249 /* Process each of the options on the chain. */
4250 if (! ix86_valid_target_attribute_inner_p (args, option_strings,
4251 &enum_opts_set))
4252 return NULL_TREE;
4253
4254 /* If the changed options are different from the default, rerun
4255 ix86_option_override_internal, and then save the options away.
4256 The string options are are attribute options, and will be undone
4257 when we copy the save structure. */
4258 if (ix86_isa_flags != def->x_ix86_isa_flags
4259 || target_flags != def->x_target_flags
4260 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4261 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4262 || enum_opts_set.x_ix86_fpmath)
4263 {
4264 /* If we are using the default tune= or arch=, undo the string assigned,
4265 and use the default. */
4266 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4267 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4268 else if (!orig_arch_specified)
4269 ix86_arch_string = NULL;
4270
4271 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4272 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4273 else if (orig_tune_defaulted)
4274 ix86_tune_string = NULL;
4275
4276 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4277 if (enum_opts_set.x_ix86_fpmath)
4278 global_options_set.x_ix86_fpmath = (enum fpmath_unit) 1;
4279 else if (!TARGET_64BIT && TARGET_SSE)
4280 {
4281 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
4282 global_options_set.x_ix86_fpmath = (enum fpmath_unit) 1;
4283 }
4284
4285 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4286 ix86_option_override_internal (false);
4287
4288 /* Add any builtin functions with the new isa if any. */
4289 ix86_add_new_builtins (ix86_isa_flags);
4290
4291 /* Save the current options unless we are validating options for
4292 #pragma. */
4293 t = build_target_option_node ();
4294
4295 ix86_arch_string = orig_arch_string;
4296 ix86_tune_string = orig_tune_string;
4297 global_options_set.x_ix86_fpmath = orig_fpmath_set;
4298
4299 /* Free up memory allocated to hold the strings */
4300 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4301 free (option_strings[i]);
4302 }
4303
4304 return t;
4305 }
4306
4307 /* Hook to validate attribute((target("string"))). */
4308
4309 static bool
4310 ix86_valid_target_attribute_p (tree fndecl,
4311 tree ARG_UNUSED (name),
4312 tree args,
4313 int ARG_UNUSED (flags))
4314 {
4315 struct cl_target_option cur_target;
4316 bool ret = true;
4317 tree old_optimize = build_optimization_node ();
4318 tree new_target, new_optimize;
4319 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4320
4321 /* If the function changed the optimization levels as well as setting target
4322 options, start with the optimizations specified. */
4323 if (func_optimize && func_optimize != old_optimize)
4324 cl_optimization_restore (&global_options,
4325 TREE_OPTIMIZATION (func_optimize));
4326
4327 /* The target attributes may also change some optimization flags, so update
4328 the optimization options if necessary. */
4329 cl_target_option_save (&cur_target, &global_options);
4330 new_target = ix86_valid_target_attribute_tree (args);
4331 new_optimize = build_optimization_node ();
4332
4333 if (!new_target)
4334 ret = false;
4335
4336 else if (fndecl)
4337 {
4338 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4339
4340 if (old_optimize != new_optimize)
4341 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4342 }
4343
4344 cl_target_option_restore (&global_options, &cur_target);
4345
4346 if (old_optimize != new_optimize)
4347 cl_optimization_restore (&global_options,
4348 TREE_OPTIMIZATION (old_optimize));
4349
4350 return ret;
4351 }
4352
4353 \f
4354 /* Hook to determine if one function can safely inline another. */
4355
4356 static bool
4357 ix86_can_inline_p (tree caller, tree callee)
4358 {
4359 bool ret = false;
4360 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4361 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4362
4363 /* If callee has no option attributes, then it is ok to inline. */
4364 if (!callee_tree)
4365 ret = true;
4366
4367 /* If caller has no option attributes, but callee does then it is not ok to
4368 inline. */
4369 else if (!caller_tree)
4370 ret = false;
4371
4372 else
4373 {
4374 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4375 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4376
4377 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4378 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4379 function. */
4380 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
4381 != callee_opts->x_ix86_isa_flags)
4382 ret = false;
4383
4384 /* See if we have the same non-isa options. */
4385 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
4386 ret = false;
4387
4388 /* See if arch, tune, etc. are the same. */
4389 else if (caller_opts->arch != callee_opts->arch)
4390 ret = false;
4391
4392 else if (caller_opts->tune != callee_opts->tune)
4393 ret = false;
4394
4395 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
4396 ret = false;
4397
4398 else if (caller_opts->branch_cost != callee_opts->branch_cost)
4399 ret = false;
4400
4401 else
4402 ret = true;
4403 }
4404
4405 return ret;
4406 }
4407
4408 \f
4409 /* Remember the last target of ix86_set_current_function. */
4410 static GTY(()) tree ix86_previous_fndecl;
4411
4412 /* Establish appropriate back-end context for processing the function
4413 FNDECL. The argument might be NULL to indicate processing at top
4414 level, outside of any function scope. */
4415 static void
4416 ix86_set_current_function (tree fndecl)
4417 {
4418 /* Only change the context if the function changes. This hook is called
4419 several times in the course of compiling a function, and we don't want to
4420 slow things down too much or call target_reinit when it isn't safe. */
4421 if (fndecl && fndecl != ix86_previous_fndecl)
4422 {
4423 tree old_tree = (ix86_previous_fndecl
4424 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
4425 : NULL_TREE);
4426
4427 tree new_tree = (fndecl
4428 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
4429 : NULL_TREE);
4430
4431 ix86_previous_fndecl = fndecl;
4432 if (old_tree == new_tree)
4433 ;
4434
4435 else if (new_tree)
4436 {
4437 cl_target_option_restore (&global_options,
4438 TREE_TARGET_OPTION (new_tree));
4439 target_reinit ();
4440 }
4441
4442 else if (old_tree)
4443 {
4444 struct cl_target_option *def
4445 = TREE_TARGET_OPTION (target_option_current_node);
4446
4447 cl_target_option_restore (&global_options, def);
4448 target_reinit ();
4449 }
4450 }
4451 }
4452
4453 \f
4454 /* Return true if this goes in large data/bss. */
4455
4456 static bool
4457 ix86_in_large_data_p (tree exp)
4458 {
4459 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
4460 return false;
4461
4462 /* Functions are never large data. */
4463 if (TREE_CODE (exp) == FUNCTION_DECL)
4464 return false;
4465
4466 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
4467 {
4468 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
4469 if (strcmp (section, ".ldata") == 0
4470 || strcmp (section, ".lbss") == 0)
4471 return true;
4472 return false;
4473 }
4474 else
4475 {
4476 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
4477
4478 /* If this is an incomplete type with size 0, then we can't put it
4479 in data because it might be too big when completed. */
4480 if (!size || size > ix86_section_threshold)
4481 return true;
4482 }
4483
4484 return false;
4485 }
4486
4487 /* Switch to the appropriate section for output of DECL.
4488 DECL is either a `VAR_DECL' node or a constant of some sort.
4489 RELOC indicates whether forming the initial value of DECL requires
4490 link-time relocations. */
4491
4492 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
4493 ATTRIBUTE_UNUSED;
4494
4495 static section *
4496 x86_64_elf_select_section (tree decl, int reloc,
4497 unsigned HOST_WIDE_INT align)
4498 {
4499 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4500 && ix86_in_large_data_p (decl))
4501 {
4502 const char *sname = NULL;
4503 unsigned int flags = SECTION_WRITE;
4504 switch (categorize_decl_for_section (decl, reloc))
4505 {
4506 case SECCAT_DATA:
4507 sname = ".ldata";
4508 break;
4509 case SECCAT_DATA_REL:
4510 sname = ".ldata.rel";
4511 break;
4512 case SECCAT_DATA_REL_LOCAL:
4513 sname = ".ldata.rel.local";
4514 break;
4515 case SECCAT_DATA_REL_RO:
4516 sname = ".ldata.rel.ro";
4517 break;
4518 case SECCAT_DATA_REL_RO_LOCAL:
4519 sname = ".ldata.rel.ro.local";
4520 break;
4521 case SECCAT_BSS:
4522 sname = ".lbss";
4523 flags |= SECTION_BSS;
4524 break;
4525 case SECCAT_RODATA:
4526 case SECCAT_RODATA_MERGE_STR:
4527 case SECCAT_RODATA_MERGE_STR_INIT:
4528 case SECCAT_RODATA_MERGE_CONST:
4529 sname = ".lrodata";
4530 flags = 0;
4531 break;
4532 case SECCAT_SRODATA:
4533 case SECCAT_SDATA:
4534 case SECCAT_SBSS:
4535 gcc_unreachable ();
4536 case SECCAT_TEXT:
4537 case SECCAT_TDATA:
4538 case SECCAT_TBSS:
4539 /* We don't split these for medium model. Place them into
4540 default sections and hope for best. */
4541 break;
4542 }
4543 if (sname)
4544 {
4545 /* We might get called with string constants, but get_named_section
4546 doesn't like them as they are not DECLs. Also, we need to set
4547 flags in that case. */
4548 if (!DECL_P (decl))
4549 return get_section (sname, flags, NULL);
4550 return get_named_section (decl, sname, reloc);
4551 }
4552 }
4553 return default_elf_select_section (decl, reloc, align);
4554 }
4555
4556 /* Build up a unique section name, expressed as a
4557 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4558 RELOC indicates whether the initial value of EXP requires
4559 link-time relocations. */
4560
4561 static void ATTRIBUTE_UNUSED
4562 x86_64_elf_unique_section (tree decl, int reloc)
4563 {
4564 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4565 && ix86_in_large_data_p (decl))
4566 {
4567 const char *prefix = NULL;
4568 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4569 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
4570
4571 switch (categorize_decl_for_section (decl, reloc))
4572 {
4573 case SECCAT_DATA:
4574 case SECCAT_DATA_REL:
4575 case SECCAT_DATA_REL_LOCAL:
4576 case SECCAT_DATA_REL_RO:
4577 case SECCAT_DATA_REL_RO_LOCAL:
4578 prefix = one_only ? ".ld" : ".ldata";
4579 break;
4580 case SECCAT_BSS:
4581 prefix = one_only ? ".lb" : ".lbss";
4582 break;
4583 case SECCAT_RODATA:
4584 case SECCAT_RODATA_MERGE_STR:
4585 case SECCAT_RODATA_MERGE_STR_INIT:
4586 case SECCAT_RODATA_MERGE_CONST:
4587 prefix = one_only ? ".lr" : ".lrodata";
4588 break;
4589 case SECCAT_SRODATA:
4590 case SECCAT_SDATA:
4591 case SECCAT_SBSS:
4592 gcc_unreachable ();
4593 case SECCAT_TEXT:
4594 case SECCAT_TDATA:
4595 case SECCAT_TBSS:
4596 /* We don't split these for medium model. Place them into
4597 default sections and hope for best. */
4598 break;
4599 }
4600 if (prefix)
4601 {
4602 const char *name, *linkonce;
4603 char *string;
4604
4605 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
4606 name = targetm.strip_name_encoding (name);
4607
4608 /* If we're using one_only, then there needs to be a .gnu.linkonce
4609 prefix to the section name. */
4610 linkonce = one_only ? ".gnu.linkonce" : "";
4611
4612 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
4613
4614 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
4615 return;
4616 }
4617 }
4618 default_unique_section (decl, reloc);
4619 }
4620
4621 #ifdef COMMON_ASM_OP
4622 /* This says how to output assembler code to declare an
4623 uninitialized external linkage data object.
4624
4625 For medium model x86-64 we need to use .largecomm opcode for
4626 large objects. */
4627 void
4628 x86_elf_aligned_common (FILE *file,
4629 const char *name, unsigned HOST_WIDE_INT size,
4630 int align)
4631 {
4632 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4633 && size > (unsigned int)ix86_section_threshold)
4634 fputs (".largecomm\t", file);
4635 else
4636 fputs (COMMON_ASM_OP, file);
4637 assemble_name (file, name);
4638 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
4639 size, align / BITS_PER_UNIT);
4640 }
4641 #endif
4642
4643 /* Utility function for targets to use in implementing
4644 ASM_OUTPUT_ALIGNED_BSS. */
4645
4646 void
4647 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
4648 const char *name, unsigned HOST_WIDE_INT size,
4649 int align)
4650 {
4651 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4652 && size > (unsigned int)ix86_section_threshold)
4653 switch_to_section (get_named_section (decl, ".lbss", 0));
4654 else
4655 switch_to_section (bss_section);
4656 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
4657 #ifdef ASM_DECLARE_OBJECT_NAME
4658 last_assemble_variable_decl = decl;
4659 ASM_DECLARE_OBJECT_NAME (file, name, decl);
4660 #else
4661 /* Standard thing is just output label for the object. */
4662 ASM_OUTPUT_LABEL (file, name);
4663 #endif /* ASM_DECLARE_OBJECT_NAME */
4664 ASM_OUTPUT_SKIP (file, size ? size : 1);
4665 }
4666 \f
4667 /* Decide whether we must probe the stack before any space allocation
4668 on this target. It's essentially TARGET_STACK_PROBE except when
4669 -fstack-check causes the stack to be already probed differently. */
4670
4671 bool
4672 ix86_target_stack_probe (void)
4673 {
4674 /* Do not probe the stack twice if static stack checking is enabled. */
4675 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
4676 return false;
4677
4678 return TARGET_STACK_PROBE;
4679 }
4680 \f
4681 /* Decide whether we can make a sibling call to a function. DECL is the
4682 declaration of the function being targeted by the call and EXP is the
4683 CALL_EXPR representing the call. */
4684
4685 static bool
4686 ix86_function_ok_for_sibcall (tree decl, tree exp)
4687 {
4688 tree type, decl_or_type;
4689 rtx a, b;
4690
4691 /* If we are generating position-independent code, we cannot sibcall
4692 optimize any indirect call, or a direct call to a global function,
4693 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
4694 if (!TARGET_MACHO
4695 && !TARGET_64BIT
4696 && flag_pic
4697 && (!decl || !targetm.binds_local_p (decl)))
4698 return false;
4699
4700 /* If we need to align the outgoing stack, then sibcalling would
4701 unalign the stack, which may break the called function. */
4702 if (ix86_minimum_incoming_stack_boundary (true)
4703 < PREFERRED_STACK_BOUNDARY)
4704 return false;
4705
4706 if (decl)
4707 {
4708 decl_or_type = decl;
4709 type = TREE_TYPE (decl);
4710 }
4711 else
4712 {
4713 /* We're looking at the CALL_EXPR, we need the type of the function. */
4714 type = CALL_EXPR_FN (exp); /* pointer expression */
4715 type = TREE_TYPE (type); /* pointer type */
4716 type = TREE_TYPE (type); /* function type */
4717 decl_or_type = type;
4718 }
4719
4720 /* Check that the return value locations are the same. Like
4721 if we are returning floats on the 80387 register stack, we cannot
4722 make a sibcall from a function that doesn't return a float to a
4723 function that does or, conversely, from a function that does return
4724 a float to a function that doesn't; the necessary stack adjustment
4725 would not be executed. This is also the place we notice
4726 differences in the return value ABI. Note that it is ok for one
4727 of the functions to have void return type as long as the return
4728 value of the other is passed in a register. */
4729 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
4730 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4731 cfun->decl, false);
4732 if (STACK_REG_P (a) || STACK_REG_P (b))
4733 {
4734 if (!rtx_equal_p (a, b))
4735 return false;
4736 }
4737 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4738 {
4739 /* Disable sibcall if we need to generate vzeroupper after
4740 callee returns. */
4741 if (TARGET_VZEROUPPER
4742 && cfun->machine->callee_return_avx256_p
4743 && !cfun->machine->caller_return_avx256_p)
4744 return false;
4745 }
4746 else if (!rtx_equal_p (a, b))
4747 return false;
4748
4749 if (TARGET_64BIT)
4750 {
4751 /* The SYSV ABI has more call-clobbered registers;
4752 disallow sibcalls from MS to SYSV. */
4753 if (cfun->machine->call_abi == MS_ABI
4754 && ix86_function_type_abi (type) == SYSV_ABI)
4755 return false;
4756 }
4757 else
4758 {
4759 /* If this call is indirect, we'll need to be able to use a
4760 call-clobbered register for the address of the target function.
4761 Make sure that all such registers are not used for passing
4762 parameters. Note that DLLIMPORT functions are indirect. */
4763 if (!decl
4764 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
4765 {
4766 if (ix86_function_regparm (type, NULL) >= 3)
4767 {
4768 /* ??? Need to count the actual number of registers to be used,
4769 not the possible number of registers. Fix later. */
4770 return false;
4771 }
4772 }
4773 }
4774
4775 /* Otherwise okay. That also includes certain types of indirect calls. */
4776 return true;
4777 }
4778
4779 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
4780 and "sseregparm" calling convention attributes;
4781 arguments as in struct attribute_spec.handler. */
4782
4783 static tree
4784 ix86_handle_cconv_attribute (tree *node, tree name,
4785 tree args,
4786 int flags ATTRIBUTE_UNUSED,
4787 bool *no_add_attrs)
4788 {
4789 if (TREE_CODE (*node) != FUNCTION_TYPE
4790 && TREE_CODE (*node) != METHOD_TYPE
4791 && TREE_CODE (*node) != FIELD_DECL
4792 && TREE_CODE (*node) != TYPE_DECL)
4793 {
4794 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4795 name);
4796 *no_add_attrs = true;
4797 return NULL_TREE;
4798 }
4799
4800 /* Can combine regparm with all attributes but fastcall, and thiscall. */
4801 if (is_attribute_p ("regparm", name))
4802 {
4803 tree cst;
4804
4805 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4806 {
4807 error ("fastcall and regparm attributes are not compatible");
4808 }
4809
4810 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4811 {
4812 error ("regparam and thiscall attributes are not compatible");
4813 }
4814
4815 cst = TREE_VALUE (args);
4816 if (TREE_CODE (cst) != INTEGER_CST)
4817 {
4818 warning (OPT_Wattributes,
4819 "%qE attribute requires an integer constant argument",
4820 name);
4821 *no_add_attrs = true;
4822 }
4823 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
4824 {
4825 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
4826 name, REGPARM_MAX);
4827 *no_add_attrs = true;
4828 }
4829
4830 return NULL_TREE;
4831 }
4832
4833 if (TARGET_64BIT)
4834 {
4835 /* Do not warn when emulating the MS ABI. */
4836 if ((TREE_CODE (*node) != FUNCTION_TYPE
4837 && TREE_CODE (*node) != METHOD_TYPE)
4838 || ix86_function_type_abi (*node) != MS_ABI)
4839 warning (OPT_Wattributes, "%qE attribute ignored",
4840 name);
4841 *no_add_attrs = true;
4842 return NULL_TREE;
4843 }
4844
4845 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
4846 if (is_attribute_p ("fastcall", name))
4847 {
4848 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4849 {
4850 error ("fastcall and cdecl attributes are not compatible");
4851 }
4852 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4853 {
4854 error ("fastcall and stdcall attributes are not compatible");
4855 }
4856 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
4857 {
4858 error ("fastcall and regparm attributes are not compatible");
4859 }
4860 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4861 {
4862 error ("fastcall and thiscall attributes are not compatible");
4863 }
4864 }
4865
4866 /* Can combine stdcall with fastcall (redundant), regparm and
4867 sseregparm. */
4868 else if (is_attribute_p ("stdcall", name))
4869 {
4870 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4871 {
4872 error ("stdcall and cdecl attributes are not compatible");
4873 }
4874 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4875 {
4876 error ("stdcall and fastcall attributes are not compatible");
4877 }
4878 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4879 {
4880 error ("stdcall and thiscall attributes are not compatible");
4881 }
4882 }
4883
4884 /* Can combine cdecl with regparm and sseregparm. */
4885 else if (is_attribute_p ("cdecl", name))
4886 {
4887 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4888 {
4889 error ("stdcall and cdecl attributes are not compatible");
4890 }
4891 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4892 {
4893 error ("fastcall and cdecl attributes are not compatible");
4894 }
4895 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4896 {
4897 error ("cdecl and thiscall attributes are not compatible");
4898 }
4899 }
4900 else if (is_attribute_p ("thiscall", name))
4901 {
4902 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
4903 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
4904 name);
4905 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4906 {
4907 error ("stdcall and thiscall attributes are not compatible");
4908 }
4909 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4910 {
4911 error ("fastcall and thiscall attributes are not compatible");
4912 }
4913 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4914 {
4915 error ("cdecl and thiscall attributes are not compatible");
4916 }
4917 }
4918
4919 /* Can combine sseregparm with all attributes. */
4920
4921 return NULL_TREE;
4922 }
4923
4924 /* This function determines from TYPE the calling-convention. */
4925
4926 unsigned int
4927 ix86_get_callcvt (const_tree type)
4928 {
4929 unsigned int ret = 0;
4930 bool is_stdarg;
4931 tree attrs;
4932
4933 if (TARGET_64BIT)
4934 return IX86_CALLCVT_CDECL;
4935
4936 attrs = TYPE_ATTRIBUTES (type);
4937 if (attrs != NULL_TREE)
4938 {
4939 if (lookup_attribute ("cdecl", attrs))
4940 ret |= IX86_CALLCVT_CDECL;
4941 else if (lookup_attribute ("stdcall", attrs))
4942 ret |= IX86_CALLCVT_STDCALL;
4943 else if (lookup_attribute ("fastcall", attrs))
4944 ret |= IX86_CALLCVT_FASTCALL;
4945 else if (lookup_attribute ("thiscall", attrs))
4946 ret |= IX86_CALLCVT_THISCALL;
4947
4948 /* Regparam isn't allowed for thiscall and fastcall. */
4949 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
4950 {
4951 if (lookup_attribute ("regparm", attrs))
4952 ret |= IX86_CALLCVT_REGPARM;
4953 if (lookup_attribute ("sseregparm", attrs))
4954 ret |= IX86_CALLCVT_SSEREGPARM;
4955 }
4956
4957 if (IX86_BASE_CALLCVT(ret) != 0)
4958 return ret;
4959 }
4960
4961 is_stdarg = stdarg_p (type);
4962 if (TARGET_RTD && !is_stdarg)
4963 return IX86_CALLCVT_STDCALL | ret;
4964
4965 if (ret != 0
4966 || is_stdarg
4967 || TREE_CODE (type) != METHOD_TYPE
4968 || ix86_function_type_abi (type) != MS_ABI)
4969 return IX86_CALLCVT_CDECL | ret;
4970
4971 return IX86_CALLCVT_THISCALL;
4972 }
4973
4974 /* Return 0 if the attributes for two types are incompatible, 1 if they
4975 are compatible, and 2 if they are nearly compatible (which causes a
4976 warning to be generated). */
4977
4978 static int
4979 ix86_comp_type_attributes (const_tree type1, const_tree type2)
4980 {
4981 unsigned int ccvt1, ccvt2;
4982
4983 if (TREE_CODE (type1) != FUNCTION_TYPE
4984 && TREE_CODE (type1) != METHOD_TYPE)
4985 return 1;
4986
4987 ccvt1 = ix86_get_callcvt (type1);
4988 ccvt2 = ix86_get_callcvt (type2);
4989 if (ccvt1 != ccvt2)
4990 return 0;
4991 if (ix86_function_regparm (type1, NULL)
4992 != ix86_function_regparm (type2, NULL))
4993 return 0;
4994
4995 return 1;
4996 }
4997 \f
4998 /* Return the regparm value for a function with the indicated TYPE and DECL.
4999 DECL may be NULL when calling function indirectly
5000 or considering a libcall. */
5001
5002 static int
5003 ix86_function_regparm (const_tree type, const_tree decl)
5004 {
5005 tree attr;
5006 int regparm;
5007 unsigned int ccvt;
5008
5009 if (TARGET_64BIT)
5010 return (ix86_function_type_abi (type) == SYSV_ABI
5011 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5012 ccvt = ix86_get_callcvt (type);
5013 regparm = ix86_regparm;
5014
5015 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
5016 {
5017 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5018 if (attr)
5019 {
5020 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5021 return regparm;
5022 }
5023 }
5024 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5025 return 2;
5026 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5027 return 1;
5028
5029 /* Use register calling convention for local functions when possible. */
5030 if (decl
5031 && TREE_CODE (decl) == FUNCTION_DECL
5032 && optimize
5033 && !(profile_flag && !flag_fentry))
5034 {
5035 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5036 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE (decl));
5037 if (i && i->local && i->can_change_signature)
5038 {
5039 int local_regparm, globals = 0, regno;
5040
5041 /* Make sure no regparm register is taken by a
5042 fixed register variable. */
5043 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
5044 if (fixed_regs[local_regparm])
5045 break;
5046
5047 /* We don't want to use regparm(3) for nested functions as
5048 these use a static chain pointer in the third argument. */
5049 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
5050 local_regparm = 2;
5051
5052 /* In 32-bit mode save a register for the split stack. */
5053 if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack)
5054 local_regparm = 2;
5055
5056 /* Each fixed register usage increases register pressure,
5057 so less registers should be used for argument passing.
5058 This functionality can be overriden by an explicit
5059 regparm value. */
5060 for (regno = 0; regno <= DI_REG; regno++)
5061 if (fixed_regs[regno])
5062 globals++;
5063
5064 local_regparm
5065 = globals < local_regparm ? local_regparm - globals : 0;
5066
5067 if (local_regparm > regparm)
5068 regparm = local_regparm;
5069 }
5070 }
5071
5072 return regparm;
5073 }
5074
5075 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5076 DFmode (2) arguments in SSE registers for a function with the
5077 indicated TYPE and DECL. DECL may be NULL when calling function
5078 indirectly or considering a libcall. Otherwise return 0. */
5079
5080 static int
5081 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5082 {
5083 gcc_assert (!TARGET_64BIT);
5084
5085 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5086 by the sseregparm attribute. */
5087 if (TARGET_SSEREGPARM
5088 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5089 {
5090 if (!TARGET_SSE)
5091 {
5092 if (warn)
5093 {
5094 if (decl)
5095 error ("calling %qD with attribute sseregparm without "
5096 "SSE/SSE2 enabled", decl);
5097 else
5098 error ("calling %qT with attribute sseregparm without "
5099 "SSE/SSE2 enabled", type);
5100 }
5101 return 0;
5102 }
5103
5104 return 2;
5105 }
5106
5107 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5108 (and DFmode for SSE2) arguments in SSE registers. */
5109 if (decl && TARGET_SSE_MATH && optimize
5110 && !(profile_flag && !flag_fentry))
5111 {
5112 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5113 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
5114 if (i && i->local && i->can_change_signature)
5115 return TARGET_SSE2 ? 2 : 1;
5116 }
5117
5118 return 0;
5119 }
5120
5121 /* Return true if EAX is live at the start of the function. Used by
5122 ix86_expand_prologue to determine if we need special help before
5123 calling allocate_stack_worker. */
5124
5125 static bool
5126 ix86_eax_live_at_start_p (void)
5127 {
5128 /* Cheat. Don't bother working forward from ix86_function_regparm
5129 to the function type to whether an actual argument is located in
5130 eax. Instead just look at cfg info, which is still close enough
5131 to correct at this point. This gives false positives for broken
5132 functions that might use uninitialized data that happens to be
5133 allocated in eax, but who cares? */
5134 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
5135 }
5136
5137 static bool
5138 ix86_keep_aggregate_return_pointer (tree fntype)
5139 {
5140 tree attr;
5141
5142 if (!TARGET_64BIT)
5143 {
5144 attr = lookup_attribute ("callee_pop_aggregate_return",
5145 TYPE_ATTRIBUTES (fntype));
5146 if (attr)
5147 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
5148
5149 /* For 32-bit MS-ABI the default is to keep aggregate
5150 return pointer. */
5151 if (ix86_function_type_abi (fntype) == MS_ABI)
5152 return true;
5153 }
5154 return KEEP_AGGREGATE_RETURN_POINTER != 0;
5155 }
5156
5157 /* Value is the number of bytes of arguments automatically
5158 popped when returning from a subroutine call.
5159 FUNDECL is the declaration node of the function (as a tree),
5160 FUNTYPE is the data type of the function (as a tree),
5161 or for a library call it is an identifier node for the subroutine name.
5162 SIZE is the number of bytes of arguments passed on the stack.
5163
5164 On the 80386, the RTD insn may be used to pop them if the number
5165 of args is fixed, but if the number is variable then the caller
5166 must pop them all. RTD can't be used for library calls now
5167 because the library is compiled with the Unix compiler.
5168 Use of RTD is a selectable option, since it is incompatible with
5169 standard Unix calling sequences. If the option is not selected,
5170 the caller must always pop the args.
5171
5172 The attribute stdcall is equivalent to RTD on a per module basis. */
5173
5174 static int
5175 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5176 {
5177 unsigned int ccvt;
5178
5179 /* None of the 64-bit ABIs pop arguments. */
5180 if (TARGET_64BIT)
5181 return 0;
5182
5183 ccvt = ix86_get_callcvt (funtype);
5184
5185 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
5186 | IX86_CALLCVT_THISCALL)) != 0
5187 && ! stdarg_p (funtype))
5188 return size;
5189
5190 /* Lose any fake structure return argument if it is passed on the stack. */
5191 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5192 && !ix86_keep_aggregate_return_pointer (funtype))
5193 {
5194 int nregs = ix86_function_regparm (funtype, fundecl);
5195 if (nregs == 0)
5196 return GET_MODE_SIZE (Pmode);
5197 }
5198
5199 return 0;
5200 }
5201 \f
5202 /* Argument support functions. */
5203
5204 /* Return true when register may be used to pass function parameters. */
5205 bool
5206 ix86_function_arg_regno_p (int regno)
5207 {
5208 int i;
5209 const int *parm_regs;
5210
5211 if (!TARGET_64BIT)
5212 {
5213 if (TARGET_MACHO)
5214 return (regno < REGPARM_MAX
5215 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
5216 else
5217 return (regno < REGPARM_MAX
5218 || (TARGET_MMX && MMX_REGNO_P (regno)
5219 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
5220 || (TARGET_SSE && SSE_REGNO_P (regno)
5221 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
5222 }
5223
5224 if (TARGET_MACHO)
5225 {
5226 if (SSE_REGNO_P (regno) && TARGET_SSE)
5227 return true;
5228 }
5229 else
5230 {
5231 if (TARGET_SSE && SSE_REGNO_P (regno)
5232 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
5233 return true;
5234 }
5235
5236 /* TODO: The function should depend on current function ABI but
5237 builtins.c would need updating then. Therefore we use the
5238 default ABI. */
5239
5240 /* RAX is used as hidden argument to va_arg functions. */
5241 if (ix86_abi == SYSV_ABI && regno == AX_REG)
5242 return true;
5243
5244 if (ix86_abi == MS_ABI)
5245 parm_regs = x86_64_ms_abi_int_parameter_registers;
5246 else
5247 parm_regs = x86_64_int_parameter_registers;
5248 for (i = 0; i < (ix86_abi == MS_ABI
5249 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
5250 if (regno == parm_regs[i])
5251 return true;
5252 return false;
5253 }
5254
5255 /* Return if we do not know how to pass TYPE solely in registers. */
5256
5257 static bool
5258 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
5259 {
5260 if (must_pass_in_stack_var_size_or_pad (mode, type))
5261 return true;
5262
5263 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5264 The layout_type routine is crafty and tries to trick us into passing
5265 currently unsupported vector types on the stack by using TImode. */
5266 return (!TARGET_64BIT && mode == TImode
5267 && type && TREE_CODE (type) != VECTOR_TYPE);
5268 }
5269
5270 /* It returns the size, in bytes, of the area reserved for arguments passed
5271 in registers for the function represented by fndecl dependent to the used
5272 abi format. */
5273 int
5274 ix86_reg_parm_stack_space (const_tree fndecl)
5275 {
5276 enum calling_abi call_abi = SYSV_ABI;
5277 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
5278 call_abi = ix86_function_abi (fndecl);
5279 else
5280 call_abi = ix86_function_type_abi (fndecl);
5281 if (TARGET_64BIT && call_abi == MS_ABI)
5282 return 32;
5283 return 0;
5284 }
5285
5286 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5287 call abi used. */
5288 enum calling_abi
5289 ix86_function_type_abi (const_tree fntype)
5290 {
5291 if (fntype != NULL_TREE && TYPE_ATTRIBUTES (fntype) != NULL_TREE)
5292 {
5293 enum calling_abi abi = ix86_abi;
5294 if (abi == SYSV_ABI)
5295 {
5296 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
5297 abi = MS_ABI;
5298 }
5299 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
5300 abi = SYSV_ABI;
5301 return abi;
5302 }
5303 return ix86_abi;
5304 }
5305
5306 static bool
5307 ix86_function_ms_hook_prologue (const_tree fn)
5308 {
5309 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
5310 {
5311 if (decl_function_context (fn) != NULL_TREE)
5312 error_at (DECL_SOURCE_LOCATION (fn),
5313 "ms_hook_prologue is not compatible with nested function");
5314 else
5315 return true;
5316 }
5317 return false;
5318 }
5319
5320 static enum calling_abi
5321 ix86_function_abi (const_tree fndecl)
5322 {
5323 if (! fndecl)
5324 return ix86_abi;
5325 return ix86_function_type_abi (TREE_TYPE (fndecl));
5326 }
5327
5328 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
5329 call abi used. */
5330 enum calling_abi
5331 ix86_cfun_abi (void)
5332 {
5333 if (! cfun)
5334 return ix86_abi;
5335 return cfun->machine->call_abi;
5336 }
5337
5338 /* Write the extra assembler code needed to declare a function properly. */
5339
5340 void
5341 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
5342 tree decl)
5343 {
5344 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
5345
5346 if (is_ms_hook)
5347 {
5348 int i, filler_count = (TARGET_64BIT ? 32 : 16);
5349 unsigned int filler_cc = 0xcccccccc;
5350
5351 for (i = 0; i < filler_count; i += 4)
5352 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
5353 }
5354
5355 #ifdef SUBTARGET_ASM_UNWIND_INIT
5356 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
5357 #endif
5358
5359 ASM_OUTPUT_LABEL (asm_out_file, fname);
5360
5361 /* Output magic byte marker, if hot-patch attribute is set. */
5362 if (is_ms_hook)
5363 {
5364 if (TARGET_64BIT)
5365 {
5366 /* leaq [%rsp + 0], %rsp */
5367 asm_fprintf (asm_out_file, ASM_BYTE
5368 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
5369 }
5370 else
5371 {
5372 /* movl.s %edi, %edi
5373 push %ebp
5374 movl.s %esp, %ebp */
5375 asm_fprintf (asm_out_file, ASM_BYTE
5376 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
5377 }
5378 }
5379 }
5380
5381 /* regclass.c */
5382 extern void init_regs (void);
5383
5384 /* Implementation of call abi switching target hook. Specific to FNDECL
5385 the specific call register sets are set. See also
5386 ix86_conditional_register_usage for more details. */
5387 void
5388 ix86_call_abi_override (const_tree fndecl)
5389 {
5390 if (fndecl == NULL_TREE)
5391 cfun->machine->call_abi = ix86_abi;
5392 else
5393 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
5394 }
5395
5396 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
5397 expensive re-initialization of init_regs each time we switch function context
5398 since this is needed only during RTL expansion. */
5399 static void
5400 ix86_maybe_switch_abi (void)
5401 {
5402 if (TARGET_64BIT &&
5403 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
5404 reinit_regs ();
5405 }
5406
5407 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5408 for a call to a function whose data type is FNTYPE.
5409 For a library call, FNTYPE is 0. */
5410
5411 void
5412 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
5413 tree fntype, /* tree ptr for function decl */
5414 rtx libname, /* SYMBOL_REF of library name or 0 */
5415 tree fndecl,
5416 int caller)
5417 {
5418 struct cgraph_local_info *i;
5419 tree fnret_type;
5420
5421 memset (cum, 0, sizeof (*cum));
5422
5423 /* Initialize for the current callee. */
5424 if (caller)
5425 {
5426 cfun->machine->callee_pass_avx256_p = false;
5427 cfun->machine->callee_return_avx256_p = false;
5428 }
5429
5430 if (fndecl)
5431 {
5432 i = cgraph_local_info (fndecl);
5433 cum->call_abi = ix86_function_abi (fndecl);
5434 fnret_type = TREE_TYPE (TREE_TYPE (fndecl));
5435 }
5436 else
5437 {
5438 i = NULL;
5439 cum->call_abi = ix86_function_type_abi (fntype);
5440 if (fntype)
5441 fnret_type = TREE_TYPE (fntype);
5442 else
5443 fnret_type = NULL;
5444 }
5445
5446 if (TARGET_VZEROUPPER && fnret_type)
5447 {
5448 rtx fnret_value = ix86_function_value (fnret_type, fntype,
5449 false);
5450 if (function_pass_avx256_p (fnret_value))
5451 {
5452 /* The return value of this function uses 256bit AVX modes. */
5453 if (caller)
5454 cfun->machine->callee_return_avx256_p = true;
5455 else
5456 cfun->machine->caller_return_avx256_p = true;
5457 }
5458 }
5459
5460 cum->caller = caller;
5461
5462 /* Set up the number of registers to use for passing arguments. */
5463
5464 if (TARGET_64BIT && cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
5465 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
5466 "or subtarget optimization implying it");
5467 cum->nregs = ix86_regparm;
5468 if (TARGET_64BIT)
5469 {
5470 cum->nregs = (cum->call_abi == SYSV_ABI
5471 ? X86_64_REGPARM_MAX
5472 : X86_64_MS_REGPARM_MAX);
5473 }
5474 if (TARGET_SSE)
5475 {
5476 cum->sse_nregs = SSE_REGPARM_MAX;
5477 if (TARGET_64BIT)
5478 {
5479 cum->sse_nregs = (cum->call_abi == SYSV_ABI
5480 ? X86_64_SSE_REGPARM_MAX
5481 : X86_64_MS_SSE_REGPARM_MAX);
5482 }
5483 }
5484 if (TARGET_MMX)
5485 cum->mmx_nregs = MMX_REGPARM_MAX;
5486 cum->warn_avx = true;
5487 cum->warn_sse = true;
5488 cum->warn_mmx = true;
5489
5490 /* Because type might mismatch in between caller and callee, we need to
5491 use actual type of function for local calls.
5492 FIXME: cgraph_analyze can be told to actually record if function uses
5493 va_start so for local functions maybe_vaarg can be made aggressive
5494 helping K&R code.
5495 FIXME: once typesytem is fixed, we won't need this code anymore. */
5496 if (i && i->local && i->can_change_signature)
5497 fntype = TREE_TYPE (fndecl);
5498 cum->maybe_vaarg = (fntype
5499 ? (!prototype_p (fntype) || stdarg_p (fntype))
5500 : !libname);
5501
5502 if (!TARGET_64BIT)
5503 {
5504 /* If there are variable arguments, then we won't pass anything
5505 in registers in 32-bit mode. */
5506 if (stdarg_p (fntype))
5507 {
5508 cum->nregs = 0;
5509 cum->sse_nregs = 0;
5510 cum->mmx_nregs = 0;
5511 cum->warn_avx = 0;
5512 cum->warn_sse = 0;
5513 cum->warn_mmx = 0;
5514 return;
5515 }
5516
5517 /* Use ecx and edx registers if function has fastcall attribute,
5518 else look for regparm information. */
5519 if (fntype)
5520 {
5521 unsigned int ccvt = ix86_get_callcvt (fntype);
5522 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5523 {
5524 cum->nregs = 1;
5525 cum->fastcall = 1; /* Same first register as in fastcall. */
5526 }
5527 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5528 {
5529 cum->nregs = 2;
5530 cum->fastcall = 1;
5531 }
5532 else
5533 cum->nregs = ix86_function_regparm (fntype, fndecl);
5534 }
5535
5536 /* Set up the number of SSE registers used for passing SFmode
5537 and DFmode arguments. Warn for mismatching ABI. */
5538 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
5539 }
5540 }
5541
5542 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
5543 But in the case of vector types, it is some vector mode.
5544
5545 When we have only some of our vector isa extensions enabled, then there
5546 are some modes for which vector_mode_supported_p is false. For these
5547 modes, the generic vector support in gcc will choose some non-vector mode
5548 in order to implement the type. By computing the natural mode, we'll
5549 select the proper ABI location for the operand and not depend on whatever
5550 the middle-end decides to do with these vector types.
5551
5552 The midde-end can't deal with the vector types > 16 bytes. In this
5553 case, we return the original mode and warn ABI change if CUM isn't
5554 NULL. */
5555
5556 static enum machine_mode
5557 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum)
5558 {
5559 enum machine_mode mode = TYPE_MODE (type);
5560
5561 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
5562 {
5563 HOST_WIDE_INT size = int_size_in_bytes (type);
5564 if ((size == 8 || size == 16 || size == 32)
5565 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
5566 && TYPE_VECTOR_SUBPARTS (type) > 1)
5567 {
5568 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
5569
5570 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
5571 mode = MIN_MODE_VECTOR_FLOAT;
5572 else
5573 mode = MIN_MODE_VECTOR_INT;
5574
5575 /* Get the mode which has this inner mode and number of units. */
5576 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
5577 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
5578 && GET_MODE_INNER (mode) == innermode)
5579 {
5580 if (size == 32 && !TARGET_AVX)
5581 {
5582 static bool warnedavx;
5583
5584 if (cum
5585 && !warnedavx
5586 && cum->warn_avx)
5587 {
5588 warnedavx = true;
5589 warning (0, "AVX vector argument without AVX "
5590 "enabled changes the ABI");
5591 }
5592 return TYPE_MODE (type);
5593 }
5594 else
5595 return mode;
5596 }
5597
5598 gcc_unreachable ();
5599 }
5600 }
5601
5602 return mode;
5603 }
5604
5605 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
5606 this may not agree with the mode that the type system has chosen for the
5607 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
5608 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
5609
5610 static rtx
5611 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
5612 unsigned int regno)
5613 {
5614 rtx tmp;
5615
5616 if (orig_mode != BLKmode)
5617 tmp = gen_rtx_REG (orig_mode, regno);
5618 else
5619 {
5620 tmp = gen_rtx_REG (mode, regno);
5621 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
5622 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
5623 }
5624
5625 return tmp;
5626 }
5627
5628 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
5629 of this code is to classify each 8bytes of incoming argument by the register
5630 class and assign registers accordingly. */
5631
5632 /* Return the union class of CLASS1 and CLASS2.
5633 See the x86-64 PS ABI for details. */
5634
5635 static enum x86_64_reg_class
5636 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
5637 {
5638 /* Rule #1: If both classes are equal, this is the resulting class. */
5639 if (class1 == class2)
5640 return class1;
5641
5642 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
5643 the other class. */
5644 if (class1 == X86_64_NO_CLASS)
5645 return class2;
5646 if (class2 == X86_64_NO_CLASS)
5647 return class1;
5648
5649 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
5650 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
5651 return X86_64_MEMORY_CLASS;
5652
5653 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
5654 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
5655 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
5656 return X86_64_INTEGERSI_CLASS;
5657 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
5658 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
5659 return X86_64_INTEGER_CLASS;
5660
5661 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
5662 MEMORY is used. */
5663 if (class1 == X86_64_X87_CLASS
5664 || class1 == X86_64_X87UP_CLASS
5665 || class1 == X86_64_COMPLEX_X87_CLASS
5666 || class2 == X86_64_X87_CLASS
5667 || class2 == X86_64_X87UP_CLASS
5668 || class2 == X86_64_COMPLEX_X87_CLASS)
5669 return X86_64_MEMORY_CLASS;
5670
5671 /* Rule #6: Otherwise class SSE is used. */
5672 return X86_64_SSE_CLASS;
5673 }
5674
5675 /* Classify the argument of type TYPE and mode MODE.
5676 CLASSES will be filled by the register class used to pass each word
5677 of the operand. The number of words is returned. In case the parameter
5678 should be passed in memory, 0 is returned. As a special case for zero
5679 sized containers, classes[0] will be NO_CLASS and 1 is returned.
5680
5681 BIT_OFFSET is used internally for handling records and specifies offset
5682 of the offset in bits modulo 256 to avoid overflow cases.
5683
5684 See the x86-64 PS ABI for details.
5685 */
5686
5687 static int
5688 classify_argument (enum machine_mode mode, const_tree type,
5689 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
5690 {
5691 HOST_WIDE_INT bytes =
5692 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5693 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5694
5695 /* Variable sized entities are always passed/returned in memory. */
5696 if (bytes < 0)
5697 return 0;
5698
5699 if (mode != VOIDmode
5700 && targetm.calls.must_pass_in_stack (mode, type))
5701 return 0;
5702
5703 if (type && AGGREGATE_TYPE_P (type))
5704 {
5705 int i;
5706 tree field;
5707 enum x86_64_reg_class subclasses[MAX_CLASSES];
5708
5709 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
5710 if (bytes > 32)
5711 return 0;
5712
5713 for (i = 0; i < words; i++)
5714 classes[i] = X86_64_NO_CLASS;
5715
5716 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
5717 signalize memory class, so handle it as special case. */
5718 if (!words)
5719 {
5720 classes[0] = X86_64_NO_CLASS;
5721 return 1;
5722 }
5723
5724 /* Classify each field of record and merge classes. */
5725 switch (TREE_CODE (type))
5726 {
5727 case RECORD_TYPE:
5728 /* And now merge the fields of structure. */
5729 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5730 {
5731 if (TREE_CODE (field) == FIELD_DECL)
5732 {
5733 int num;
5734
5735 if (TREE_TYPE (field) == error_mark_node)
5736 continue;
5737
5738 /* Bitfields are always classified as integer. Handle them
5739 early, since later code would consider them to be
5740 misaligned integers. */
5741 if (DECL_BIT_FIELD (field))
5742 {
5743 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5744 i < ((int_bit_position (field) + (bit_offset % 64))
5745 + tree_low_cst (DECL_SIZE (field), 0)
5746 + 63) / 8 / 8; i++)
5747 classes[i] =
5748 merge_classes (X86_64_INTEGER_CLASS,
5749 classes[i]);
5750 }
5751 else
5752 {
5753 int pos;
5754
5755 type = TREE_TYPE (field);
5756
5757 /* Flexible array member is ignored. */
5758 if (TYPE_MODE (type) == BLKmode
5759 && TREE_CODE (type) == ARRAY_TYPE
5760 && TYPE_SIZE (type) == NULL_TREE
5761 && TYPE_DOMAIN (type) != NULL_TREE
5762 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
5763 == NULL_TREE))
5764 {
5765 static bool warned;
5766
5767 if (!warned && warn_psabi)
5768 {
5769 warned = true;
5770 inform (input_location,
5771 "the ABI of passing struct with"
5772 " a flexible array member has"
5773 " changed in GCC 4.4");
5774 }
5775 continue;
5776 }
5777 num = classify_argument (TYPE_MODE (type), type,
5778 subclasses,
5779 (int_bit_position (field)
5780 + bit_offset) % 256);
5781 if (!num)
5782 return 0;
5783 pos = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5784 for (i = 0; i < num && (i + pos) < words; i++)
5785 classes[i + pos] =
5786 merge_classes (subclasses[i], classes[i + pos]);
5787 }
5788 }
5789 }
5790 break;
5791
5792 case ARRAY_TYPE:
5793 /* Arrays are handled as small records. */
5794 {
5795 int num;
5796 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
5797 TREE_TYPE (type), subclasses, bit_offset);
5798 if (!num)
5799 return 0;
5800
5801 /* The partial classes are now full classes. */
5802 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
5803 subclasses[0] = X86_64_SSE_CLASS;
5804 if (subclasses[0] == X86_64_INTEGERSI_CLASS
5805 && !((bit_offset % 64) == 0 && bytes == 4))
5806 subclasses[0] = X86_64_INTEGER_CLASS;
5807
5808 for (i = 0; i < words; i++)
5809 classes[i] = subclasses[i % num];
5810
5811 break;
5812 }
5813 case UNION_TYPE:
5814 case QUAL_UNION_TYPE:
5815 /* Unions are similar to RECORD_TYPE but offset is always 0.
5816 */
5817 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5818 {
5819 if (TREE_CODE (field) == FIELD_DECL)
5820 {
5821 int num;
5822
5823 if (TREE_TYPE (field) == error_mark_node)
5824 continue;
5825
5826 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
5827 TREE_TYPE (field), subclasses,
5828 bit_offset);
5829 if (!num)
5830 return 0;
5831 for (i = 0; i < num; i++)
5832 classes[i] = merge_classes (subclasses[i], classes[i]);
5833 }
5834 }
5835 break;
5836
5837 default:
5838 gcc_unreachable ();
5839 }
5840
5841 if (words > 2)
5842 {
5843 /* When size > 16 bytes, if the first one isn't
5844 X86_64_SSE_CLASS or any other ones aren't
5845 X86_64_SSEUP_CLASS, everything should be passed in
5846 memory. */
5847 if (classes[0] != X86_64_SSE_CLASS)
5848 return 0;
5849
5850 for (i = 1; i < words; i++)
5851 if (classes[i] != X86_64_SSEUP_CLASS)
5852 return 0;
5853 }
5854
5855 /* Final merger cleanup. */
5856 for (i = 0; i < words; i++)
5857 {
5858 /* If one class is MEMORY, everything should be passed in
5859 memory. */
5860 if (classes[i] == X86_64_MEMORY_CLASS)
5861 return 0;
5862
5863 /* The X86_64_SSEUP_CLASS should be always preceded by
5864 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
5865 if (classes[i] == X86_64_SSEUP_CLASS
5866 && classes[i - 1] != X86_64_SSE_CLASS
5867 && classes[i - 1] != X86_64_SSEUP_CLASS)
5868 {
5869 /* The first one should never be X86_64_SSEUP_CLASS. */
5870 gcc_assert (i != 0);
5871 classes[i] = X86_64_SSE_CLASS;
5872 }
5873
5874 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
5875 everything should be passed in memory. */
5876 if (classes[i] == X86_64_X87UP_CLASS
5877 && (classes[i - 1] != X86_64_X87_CLASS))
5878 {
5879 static bool warned;
5880
5881 /* The first one should never be X86_64_X87UP_CLASS. */
5882 gcc_assert (i != 0);
5883 if (!warned && warn_psabi)
5884 {
5885 warned = true;
5886 inform (input_location,
5887 "the ABI of passing union with long double"
5888 " has changed in GCC 4.4");
5889 }
5890 return 0;
5891 }
5892 }
5893 return words;
5894 }
5895
5896 /* Compute alignment needed. We align all types to natural boundaries with
5897 exception of XFmode that is aligned to 64bits. */
5898 if (mode != VOIDmode && mode != BLKmode)
5899 {
5900 int mode_alignment = GET_MODE_BITSIZE (mode);
5901
5902 if (mode == XFmode)
5903 mode_alignment = 128;
5904 else if (mode == XCmode)
5905 mode_alignment = 256;
5906 if (COMPLEX_MODE_P (mode))
5907 mode_alignment /= 2;
5908 /* Misaligned fields are always returned in memory. */
5909 if (bit_offset % mode_alignment)
5910 return 0;
5911 }
5912
5913 /* for V1xx modes, just use the base mode */
5914 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
5915 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
5916 mode = GET_MODE_INNER (mode);
5917
5918 /* Classification of atomic types. */
5919 switch (mode)
5920 {
5921 case SDmode:
5922 case DDmode:
5923 classes[0] = X86_64_SSE_CLASS;
5924 return 1;
5925 case TDmode:
5926 classes[0] = X86_64_SSE_CLASS;
5927 classes[1] = X86_64_SSEUP_CLASS;
5928 return 2;
5929 case DImode:
5930 case SImode:
5931 case HImode:
5932 case QImode:
5933 case CSImode:
5934 case CHImode:
5935 case CQImode:
5936 {
5937 int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
5938
5939 if (size <= 32)
5940 {
5941 classes[0] = X86_64_INTEGERSI_CLASS;
5942 return 1;
5943 }
5944 else if (size <= 64)
5945 {
5946 classes[0] = X86_64_INTEGER_CLASS;
5947 return 1;
5948 }
5949 else if (size <= 64+32)
5950 {
5951 classes[0] = X86_64_INTEGER_CLASS;
5952 classes[1] = X86_64_INTEGERSI_CLASS;
5953 return 2;
5954 }
5955 else if (size <= 64+64)
5956 {
5957 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5958 return 2;
5959 }
5960 else
5961 gcc_unreachable ();
5962 }
5963 case CDImode:
5964 case TImode:
5965 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5966 return 2;
5967 case COImode:
5968 case OImode:
5969 /* OImode shouldn't be used directly. */
5970 gcc_unreachable ();
5971 case CTImode:
5972 return 0;
5973 case SFmode:
5974 if (!(bit_offset % 64))
5975 classes[0] = X86_64_SSESF_CLASS;
5976 else
5977 classes[0] = X86_64_SSE_CLASS;
5978 return 1;
5979 case DFmode:
5980 classes[0] = X86_64_SSEDF_CLASS;
5981 return 1;
5982 case XFmode:
5983 classes[0] = X86_64_X87_CLASS;
5984 classes[1] = X86_64_X87UP_CLASS;
5985 return 2;
5986 case TFmode:
5987 classes[0] = X86_64_SSE_CLASS;
5988 classes[1] = X86_64_SSEUP_CLASS;
5989 return 2;
5990 case SCmode:
5991 classes[0] = X86_64_SSE_CLASS;
5992 if (!(bit_offset % 64))
5993 return 1;
5994 else
5995 {
5996 static bool warned;
5997
5998 if (!warned && warn_psabi)
5999 {
6000 warned = true;
6001 inform (input_location,
6002 "the ABI of passing structure with complex float"
6003 " member has changed in GCC 4.4");
6004 }
6005 classes[1] = X86_64_SSESF_CLASS;
6006 return 2;
6007 }
6008 case DCmode:
6009 classes[0] = X86_64_SSEDF_CLASS;
6010 classes[1] = X86_64_SSEDF_CLASS;
6011 return 2;
6012 case XCmode:
6013 classes[0] = X86_64_COMPLEX_X87_CLASS;
6014 return 1;
6015 case TCmode:
6016 /* This modes is larger than 16 bytes. */
6017 return 0;
6018 case V8SFmode:
6019 case V8SImode:
6020 case V32QImode:
6021 case V16HImode:
6022 case V4DFmode:
6023 case V4DImode:
6024 classes[0] = X86_64_SSE_CLASS;
6025 classes[1] = X86_64_SSEUP_CLASS;
6026 classes[2] = X86_64_SSEUP_CLASS;
6027 classes[3] = X86_64_SSEUP_CLASS;
6028 return 4;
6029 case V4SFmode:
6030 case V4SImode:
6031 case V16QImode:
6032 case V8HImode:
6033 case V2DFmode:
6034 case V2DImode:
6035 classes[0] = X86_64_SSE_CLASS;
6036 classes[1] = X86_64_SSEUP_CLASS;
6037 return 2;
6038 case V1TImode:
6039 case V1DImode:
6040 case V2SFmode:
6041 case V2SImode:
6042 case V4HImode:
6043 case V8QImode:
6044 classes[0] = X86_64_SSE_CLASS;
6045 return 1;
6046 case BLKmode:
6047 case VOIDmode:
6048 return 0;
6049 default:
6050 gcc_assert (VECTOR_MODE_P (mode));
6051
6052 if (bytes > 16)
6053 return 0;
6054
6055 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
6056
6057 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
6058 classes[0] = X86_64_INTEGERSI_CLASS;
6059 else
6060 classes[0] = X86_64_INTEGER_CLASS;
6061 classes[1] = X86_64_INTEGER_CLASS;
6062 return 1 + (bytes > 8);
6063 }
6064 }
6065
6066 /* Examine the argument and return set number of register required in each
6067 class. Return 0 iff parameter should be passed in memory. */
6068 static int
6069 examine_argument (enum machine_mode mode, const_tree type, int in_return,
6070 int *int_nregs, int *sse_nregs)
6071 {
6072 enum x86_64_reg_class regclass[MAX_CLASSES];
6073 int n = classify_argument (mode, type, regclass, 0);
6074
6075 *int_nregs = 0;
6076 *sse_nregs = 0;
6077 if (!n)
6078 return 0;
6079 for (n--; n >= 0; n--)
6080 switch (regclass[n])
6081 {
6082 case X86_64_INTEGER_CLASS:
6083 case X86_64_INTEGERSI_CLASS:
6084 (*int_nregs)++;
6085 break;
6086 case X86_64_SSE_CLASS:
6087 case X86_64_SSESF_CLASS:
6088 case X86_64_SSEDF_CLASS:
6089 (*sse_nregs)++;
6090 break;
6091 case X86_64_NO_CLASS:
6092 case X86_64_SSEUP_CLASS:
6093 break;
6094 case X86_64_X87_CLASS:
6095 case X86_64_X87UP_CLASS:
6096 if (!in_return)
6097 return 0;
6098 break;
6099 case X86_64_COMPLEX_X87_CLASS:
6100 return in_return ? 2 : 0;
6101 case X86_64_MEMORY_CLASS:
6102 gcc_unreachable ();
6103 }
6104 return 1;
6105 }
6106
6107 /* Construct container for the argument used by GCC interface. See
6108 FUNCTION_ARG for the detailed description. */
6109
6110 static rtx
6111 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
6112 const_tree type, int in_return, int nintregs, int nsseregs,
6113 const int *intreg, int sse_regno)
6114 {
6115 /* The following variables hold the static issued_error state. */
6116 static bool issued_sse_arg_error;
6117 static bool issued_sse_ret_error;
6118 static bool issued_x87_ret_error;
6119
6120 enum machine_mode tmpmode;
6121 int bytes =
6122 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6123 enum x86_64_reg_class regclass[MAX_CLASSES];
6124 int n;
6125 int i;
6126 int nexps = 0;
6127 int needed_sseregs, needed_intregs;
6128 rtx exp[MAX_CLASSES];
6129 rtx ret;
6130
6131 n = classify_argument (mode, type, regclass, 0);
6132 if (!n)
6133 return NULL;
6134 if (!examine_argument (mode, type, in_return, &needed_intregs,
6135 &needed_sseregs))
6136 return NULL;
6137 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
6138 return NULL;
6139
6140 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6141 some less clueful developer tries to use floating-point anyway. */
6142 if (needed_sseregs && !TARGET_SSE)
6143 {
6144 if (in_return)
6145 {
6146 if (!issued_sse_ret_error)
6147 {
6148 error ("SSE register return with SSE disabled");
6149 issued_sse_ret_error = true;
6150 }
6151 }
6152 else if (!issued_sse_arg_error)
6153 {
6154 error ("SSE register argument with SSE disabled");
6155 issued_sse_arg_error = true;
6156 }
6157 return NULL;
6158 }
6159
6160 /* Likewise, error if the ABI requires us to return values in the
6161 x87 registers and the user specified -mno-80387. */
6162 if (!TARGET_80387 && in_return)
6163 for (i = 0; i < n; i++)
6164 if (regclass[i] == X86_64_X87_CLASS
6165 || regclass[i] == X86_64_X87UP_CLASS
6166 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
6167 {
6168 if (!issued_x87_ret_error)
6169 {
6170 error ("x87 register return with x87 disabled");
6171 issued_x87_ret_error = true;
6172 }
6173 return NULL;
6174 }
6175
6176 /* First construct simple cases. Avoid SCmode, since we want to use
6177 single register to pass this type. */
6178 if (n == 1 && mode != SCmode)
6179 switch (regclass[0])
6180 {
6181 case X86_64_INTEGER_CLASS:
6182 case X86_64_INTEGERSI_CLASS:
6183 return gen_rtx_REG (mode, intreg[0]);
6184 case X86_64_SSE_CLASS:
6185 case X86_64_SSESF_CLASS:
6186 case X86_64_SSEDF_CLASS:
6187 if (mode != BLKmode)
6188 return gen_reg_or_parallel (mode, orig_mode,
6189 SSE_REGNO (sse_regno));
6190 break;
6191 case X86_64_X87_CLASS:
6192 case X86_64_COMPLEX_X87_CLASS:
6193 return gen_rtx_REG (mode, FIRST_STACK_REG);
6194 case X86_64_NO_CLASS:
6195 /* Zero sized array, struct or class. */
6196 return NULL;
6197 default:
6198 gcc_unreachable ();
6199 }
6200 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
6201 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
6202 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
6203 if (n == 4
6204 && regclass[0] == X86_64_SSE_CLASS
6205 && regclass[1] == X86_64_SSEUP_CLASS
6206 && regclass[2] == X86_64_SSEUP_CLASS
6207 && regclass[3] == X86_64_SSEUP_CLASS
6208 && mode != BLKmode)
6209 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
6210
6211 if (n == 2
6212 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
6213 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
6214 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
6215 && regclass[1] == X86_64_INTEGER_CLASS
6216 && (mode == CDImode || mode == TImode || mode == TFmode)
6217 && intreg[0] + 1 == intreg[1])
6218 return gen_rtx_REG (mode, intreg[0]);
6219
6220 /* Otherwise figure out the entries of the PARALLEL. */
6221 for (i = 0; i < n; i++)
6222 {
6223 int pos;
6224
6225 switch (regclass[i])
6226 {
6227 case X86_64_NO_CLASS:
6228 break;
6229 case X86_64_INTEGER_CLASS:
6230 case X86_64_INTEGERSI_CLASS:
6231 /* Merge TImodes on aligned occasions here too. */
6232 if (i * 8 + 8 > bytes)
6233 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
6234 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
6235 tmpmode = SImode;
6236 else
6237 tmpmode = DImode;
6238 /* We've requested 24 bytes we don't have mode for. Use DImode. */
6239 if (tmpmode == BLKmode)
6240 tmpmode = DImode;
6241 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6242 gen_rtx_REG (tmpmode, *intreg),
6243 GEN_INT (i*8));
6244 intreg++;
6245 break;
6246 case X86_64_SSESF_CLASS:
6247 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6248 gen_rtx_REG (SFmode,
6249 SSE_REGNO (sse_regno)),
6250 GEN_INT (i*8));
6251 sse_regno++;
6252 break;
6253 case X86_64_SSEDF_CLASS:
6254 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6255 gen_rtx_REG (DFmode,
6256 SSE_REGNO (sse_regno)),
6257 GEN_INT (i*8));
6258 sse_regno++;
6259 break;
6260 case X86_64_SSE_CLASS:
6261 pos = i;
6262 switch (n)
6263 {
6264 case 1:
6265 tmpmode = DImode;
6266 break;
6267 case 2:
6268 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
6269 {
6270 tmpmode = TImode;
6271 i++;
6272 }
6273 else
6274 tmpmode = DImode;
6275 break;
6276 case 4:
6277 gcc_assert (i == 0
6278 && regclass[1] == X86_64_SSEUP_CLASS
6279 && regclass[2] == X86_64_SSEUP_CLASS
6280 && regclass[3] == X86_64_SSEUP_CLASS);
6281 tmpmode = OImode;
6282 i += 3;
6283 break;
6284 default:
6285 gcc_unreachable ();
6286 }
6287 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6288 gen_rtx_REG (tmpmode,
6289 SSE_REGNO (sse_regno)),
6290 GEN_INT (pos*8));
6291 sse_regno++;
6292 break;
6293 default:
6294 gcc_unreachable ();
6295 }
6296 }
6297
6298 /* Empty aligned struct, union or class. */
6299 if (nexps == 0)
6300 return NULL;
6301
6302 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
6303 for (i = 0; i < nexps; i++)
6304 XVECEXP (ret, 0, i) = exp [i];
6305 return ret;
6306 }
6307
6308 /* Update the data in CUM to advance over an argument of mode MODE
6309 and data type TYPE. (TYPE is null for libcalls where that information
6310 may not be available.) */
6311
6312 static void
6313 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6314 const_tree type, HOST_WIDE_INT bytes,
6315 HOST_WIDE_INT words)
6316 {
6317 switch (mode)
6318 {
6319 default:
6320 break;
6321
6322 case BLKmode:
6323 if (bytes < 0)
6324 break;
6325 /* FALLTHRU */
6326
6327 case DImode:
6328 case SImode:
6329 case HImode:
6330 case QImode:
6331 cum->words += words;
6332 cum->nregs -= words;
6333 cum->regno += words;
6334
6335 if (cum->nregs <= 0)
6336 {
6337 cum->nregs = 0;
6338 cum->regno = 0;
6339 }
6340 break;
6341
6342 case OImode:
6343 /* OImode shouldn't be used directly. */
6344 gcc_unreachable ();
6345
6346 case DFmode:
6347 if (cum->float_in_sse < 2)
6348 break;
6349 case SFmode:
6350 if (cum->float_in_sse < 1)
6351 break;
6352 /* FALLTHRU */
6353
6354 case V8SFmode:
6355 case V8SImode:
6356 case V32QImode:
6357 case V16HImode:
6358 case V4DFmode:
6359 case V4DImode:
6360 case TImode:
6361 case V16QImode:
6362 case V8HImode:
6363 case V4SImode:
6364 case V2DImode:
6365 case V4SFmode:
6366 case V2DFmode:
6367 if (!type || !AGGREGATE_TYPE_P (type))
6368 {
6369 cum->sse_words += words;
6370 cum->sse_nregs -= 1;
6371 cum->sse_regno += 1;
6372 if (cum->sse_nregs <= 0)
6373 {
6374 cum->sse_nregs = 0;
6375 cum->sse_regno = 0;
6376 }
6377 }
6378 break;
6379
6380 case V8QImode:
6381 case V4HImode:
6382 case V2SImode:
6383 case V2SFmode:
6384 case V1TImode:
6385 case V1DImode:
6386 if (!type || !AGGREGATE_TYPE_P (type))
6387 {
6388 cum->mmx_words += words;
6389 cum->mmx_nregs -= 1;
6390 cum->mmx_regno += 1;
6391 if (cum->mmx_nregs <= 0)
6392 {
6393 cum->mmx_nregs = 0;
6394 cum->mmx_regno = 0;
6395 }
6396 }
6397 break;
6398 }
6399 }
6400
6401 static void
6402 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6403 const_tree type, HOST_WIDE_INT words, bool named)
6404 {
6405 int int_nregs, sse_nregs;
6406
6407 /* Unnamed 256bit vector mode parameters are passed on stack. */
6408 if (!named && VALID_AVX256_REG_MODE (mode))
6409 return;
6410
6411 if (examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
6412 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
6413 {
6414 cum->nregs -= int_nregs;
6415 cum->sse_nregs -= sse_nregs;
6416 cum->regno += int_nregs;
6417 cum->sse_regno += sse_nregs;
6418 }
6419 else
6420 {
6421 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
6422 cum->words = (cum->words + align - 1) & ~(align - 1);
6423 cum->words += words;
6424 }
6425 }
6426
6427 static void
6428 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
6429 HOST_WIDE_INT words)
6430 {
6431 /* Otherwise, this should be passed indirect. */
6432 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
6433
6434 cum->words += words;
6435 if (cum->nregs > 0)
6436 {
6437 cum->nregs -= 1;
6438 cum->regno += 1;
6439 }
6440 }
6441
6442 /* Update the data in CUM to advance over an argument of mode MODE and
6443 data type TYPE. (TYPE is null for libcalls where that information
6444 may not be available.) */
6445
6446 static void
6447 ix86_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
6448 const_tree type, bool named)
6449 {
6450 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6451 HOST_WIDE_INT bytes, words;
6452
6453 if (mode == BLKmode)
6454 bytes = int_size_in_bytes (type);
6455 else
6456 bytes = GET_MODE_SIZE (mode);
6457 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6458
6459 if (type)
6460 mode = type_natural_mode (type, NULL);
6461
6462 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6463 function_arg_advance_ms_64 (cum, bytes, words);
6464 else if (TARGET_64BIT)
6465 function_arg_advance_64 (cum, mode, type, words, named);
6466 else
6467 function_arg_advance_32 (cum, mode, type, bytes, words);
6468 }
6469
6470 /* Define where to put the arguments to a function.
6471 Value is zero to push the argument on the stack,
6472 or a hard register in which to store the argument.
6473
6474 MODE is the argument's machine mode.
6475 TYPE is the data type of the argument (as a tree).
6476 This is null for libcalls where that information may
6477 not be available.
6478 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6479 the preceding args and about the function being called.
6480 NAMED is nonzero if this argument is a named parameter
6481 (otherwise it is an extra parameter matching an ellipsis). */
6482
6483 static rtx
6484 function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6485 enum machine_mode orig_mode, const_tree type,
6486 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
6487 {
6488 static bool warnedsse, warnedmmx;
6489
6490 /* Avoid the AL settings for the Unix64 ABI. */
6491 if (mode == VOIDmode)
6492 return constm1_rtx;
6493
6494 switch (mode)
6495 {
6496 default:
6497 break;
6498
6499 case BLKmode:
6500 if (bytes < 0)
6501 break;
6502 /* FALLTHRU */
6503 case DImode:
6504 case SImode:
6505 case HImode:
6506 case QImode:
6507 if (words <= cum->nregs)
6508 {
6509 int regno = cum->regno;
6510
6511 /* Fastcall allocates the first two DWORD (SImode) or
6512 smaller arguments to ECX and EDX if it isn't an
6513 aggregate type . */
6514 if (cum->fastcall)
6515 {
6516 if (mode == BLKmode
6517 || mode == DImode
6518 || (type && AGGREGATE_TYPE_P (type)))
6519 break;
6520
6521 /* ECX not EAX is the first allocated register. */
6522 if (regno == AX_REG)
6523 regno = CX_REG;
6524 }
6525 return gen_rtx_REG (mode, regno);
6526 }
6527 break;
6528
6529 case DFmode:
6530 if (cum->float_in_sse < 2)
6531 break;
6532 case SFmode:
6533 if (cum->float_in_sse < 1)
6534 break;
6535 /* FALLTHRU */
6536 case TImode:
6537 /* In 32bit, we pass TImode in xmm registers. */
6538 case V16QImode:
6539 case V8HImode:
6540 case V4SImode:
6541 case V2DImode:
6542 case V4SFmode:
6543 case V2DFmode:
6544 if (!type || !AGGREGATE_TYPE_P (type))
6545 {
6546 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
6547 {
6548 warnedsse = true;
6549 warning (0, "SSE vector argument without SSE enabled "
6550 "changes the ABI");
6551 }
6552 if (cum->sse_nregs)
6553 return gen_reg_or_parallel (mode, orig_mode,
6554 cum->sse_regno + FIRST_SSE_REG);
6555 }
6556 break;
6557
6558 case OImode:
6559 /* OImode shouldn't be used directly. */
6560 gcc_unreachable ();
6561
6562 case V8SFmode:
6563 case V8SImode:
6564 case V32QImode:
6565 case V16HImode:
6566 case V4DFmode:
6567 case V4DImode:
6568 if (!type || !AGGREGATE_TYPE_P (type))
6569 {
6570 if (cum->sse_nregs)
6571 return gen_reg_or_parallel (mode, orig_mode,
6572 cum->sse_regno + FIRST_SSE_REG);
6573 }
6574 break;
6575
6576 case V8QImode:
6577 case V4HImode:
6578 case V2SImode:
6579 case V2SFmode:
6580 case V1TImode:
6581 case V1DImode:
6582 if (!type || !AGGREGATE_TYPE_P (type))
6583 {
6584 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
6585 {
6586 warnedmmx = true;
6587 warning (0, "MMX vector argument without MMX enabled "
6588 "changes the ABI");
6589 }
6590 if (cum->mmx_nregs)
6591 return gen_reg_or_parallel (mode, orig_mode,
6592 cum->mmx_regno + FIRST_MMX_REG);
6593 }
6594 break;
6595 }
6596
6597 return NULL_RTX;
6598 }
6599
6600 static rtx
6601 function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6602 enum machine_mode orig_mode, const_tree type, bool named)
6603 {
6604 /* Handle a hidden AL argument containing number of registers
6605 for varargs x86-64 functions. */
6606 if (mode == VOIDmode)
6607 return GEN_INT (cum->maybe_vaarg
6608 ? (cum->sse_nregs < 0
6609 ? X86_64_SSE_REGPARM_MAX
6610 : cum->sse_regno)
6611 : -1);
6612
6613 switch (mode)
6614 {
6615 default:
6616 break;
6617
6618 case V8SFmode:
6619 case V8SImode:
6620 case V32QImode:
6621 case V16HImode:
6622 case V4DFmode:
6623 case V4DImode:
6624 /* Unnamed 256bit vector mode parameters are passed on stack. */
6625 if (!named)
6626 return NULL;
6627 break;
6628 }
6629
6630 return construct_container (mode, orig_mode, type, 0, cum->nregs,
6631 cum->sse_nregs,
6632 &x86_64_int_parameter_registers [cum->regno],
6633 cum->sse_regno);
6634 }
6635
6636 static rtx
6637 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6638 enum machine_mode orig_mode, bool named,
6639 HOST_WIDE_INT bytes)
6640 {
6641 unsigned int regno;
6642
6643 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
6644 We use value of -2 to specify that current function call is MSABI. */
6645 if (mode == VOIDmode)
6646 return GEN_INT (-2);
6647
6648 /* If we've run out of registers, it goes on the stack. */
6649 if (cum->nregs == 0)
6650 return NULL_RTX;
6651
6652 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
6653
6654 /* Only floating point modes are passed in anything but integer regs. */
6655 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
6656 {
6657 if (named)
6658 regno = cum->regno + FIRST_SSE_REG;
6659 else
6660 {
6661 rtx t1, t2;
6662
6663 /* Unnamed floating parameters are passed in both the
6664 SSE and integer registers. */
6665 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
6666 t2 = gen_rtx_REG (mode, regno);
6667 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
6668 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
6669 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
6670 }
6671 }
6672 /* Handle aggregated types passed in register. */
6673 if (orig_mode == BLKmode)
6674 {
6675 if (bytes > 0 && bytes <= 8)
6676 mode = (bytes > 4 ? DImode : SImode);
6677 if (mode == BLKmode)
6678 mode = DImode;
6679 }
6680
6681 return gen_reg_or_parallel (mode, orig_mode, regno);
6682 }
6683
6684 /* Return where to put the arguments to a function.
6685 Return zero to push the argument on the stack, or a hard register in which to store the argument.
6686
6687 MODE is the argument's machine mode. TYPE is the data type of the
6688 argument. It is null for libcalls where that information may not be
6689 available. CUM gives information about the preceding args and about
6690 the function being called. NAMED is nonzero if this argument is a
6691 named parameter (otherwise it is an extra parameter matching an
6692 ellipsis). */
6693
6694 static rtx
6695 ix86_function_arg (cumulative_args_t cum_v, enum machine_mode omode,
6696 const_tree type, bool named)
6697 {
6698 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6699 enum machine_mode mode = omode;
6700 HOST_WIDE_INT bytes, words;
6701 rtx arg;
6702
6703 if (mode == BLKmode)
6704 bytes = int_size_in_bytes (type);
6705 else
6706 bytes = GET_MODE_SIZE (mode);
6707 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6708
6709 /* To simplify the code below, represent vector types with a vector mode
6710 even if MMX/SSE are not active. */
6711 if (type && TREE_CODE (type) == VECTOR_TYPE)
6712 mode = type_natural_mode (type, cum);
6713
6714 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6715 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
6716 else if (TARGET_64BIT)
6717 arg = function_arg_64 (cum, mode, omode, type, named);
6718 else
6719 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
6720
6721 if (TARGET_VZEROUPPER && function_pass_avx256_p (arg))
6722 {
6723 /* This argument uses 256bit AVX modes. */
6724 if (cum->caller)
6725 cfun->machine->callee_pass_avx256_p = true;
6726 else
6727 cfun->machine->caller_pass_avx256_p = true;
6728 }
6729
6730 return arg;
6731 }
6732
6733 /* A C expression that indicates when an argument must be passed by
6734 reference. If nonzero for an argument, a copy of that argument is
6735 made in memory and a pointer to the argument is passed instead of
6736 the argument itself. The pointer is passed in whatever way is
6737 appropriate for passing a pointer to that type. */
6738
6739 static bool
6740 ix86_pass_by_reference (cumulative_args_t cum_v ATTRIBUTE_UNUSED,
6741 enum machine_mode mode ATTRIBUTE_UNUSED,
6742 const_tree type, bool named ATTRIBUTE_UNUSED)
6743 {
6744 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6745
6746 /* See Windows x64 Software Convention. */
6747 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6748 {
6749 int msize = (int) GET_MODE_SIZE (mode);
6750 if (type)
6751 {
6752 /* Arrays are passed by reference. */
6753 if (TREE_CODE (type) == ARRAY_TYPE)
6754 return true;
6755
6756 if (AGGREGATE_TYPE_P (type))
6757 {
6758 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
6759 are passed by reference. */
6760 msize = int_size_in_bytes (type);
6761 }
6762 }
6763
6764 /* __m128 is passed by reference. */
6765 switch (msize) {
6766 case 1: case 2: case 4: case 8:
6767 break;
6768 default:
6769 return true;
6770 }
6771 }
6772 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
6773 return 1;
6774
6775 return 0;
6776 }
6777
6778 /* Return true when TYPE should be 128bit aligned for 32bit argument
6779 passing ABI. XXX: This function is obsolete and is only used for
6780 checking psABI compatibility with previous versions of GCC. */
6781
6782 static bool
6783 ix86_compat_aligned_value_p (const_tree type)
6784 {
6785 enum machine_mode mode = TYPE_MODE (type);
6786 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
6787 || mode == TDmode
6788 || mode == TFmode
6789 || mode == TCmode)
6790 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
6791 return true;
6792 if (TYPE_ALIGN (type) < 128)
6793 return false;
6794
6795 if (AGGREGATE_TYPE_P (type))
6796 {
6797 /* Walk the aggregates recursively. */
6798 switch (TREE_CODE (type))
6799 {
6800 case RECORD_TYPE:
6801 case UNION_TYPE:
6802 case QUAL_UNION_TYPE:
6803 {
6804 tree field;
6805
6806 /* Walk all the structure fields. */
6807 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6808 {
6809 if (TREE_CODE (field) == FIELD_DECL
6810 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
6811 return true;
6812 }
6813 break;
6814 }
6815
6816 case ARRAY_TYPE:
6817 /* Just for use if some languages passes arrays by value. */
6818 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
6819 return true;
6820 break;
6821
6822 default:
6823 gcc_unreachable ();
6824 }
6825 }
6826 return false;
6827 }
6828
6829 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
6830 XXX: This function is obsolete and is only used for checking psABI
6831 compatibility with previous versions of GCC. */
6832
6833 static unsigned int
6834 ix86_compat_function_arg_boundary (enum machine_mode mode,
6835 const_tree type, unsigned int align)
6836 {
6837 /* In 32bit, only _Decimal128 and __float128 are aligned to their
6838 natural boundaries. */
6839 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
6840 {
6841 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
6842 make an exception for SSE modes since these require 128bit
6843 alignment.
6844
6845 The handling here differs from field_alignment. ICC aligns MMX
6846 arguments to 4 byte boundaries, while structure fields are aligned
6847 to 8 byte boundaries. */
6848 if (!type)
6849 {
6850 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
6851 align = PARM_BOUNDARY;
6852 }
6853 else
6854 {
6855 if (!ix86_compat_aligned_value_p (type))
6856 align = PARM_BOUNDARY;
6857 }
6858 }
6859 if (align > BIGGEST_ALIGNMENT)
6860 align = BIGGEST_ALIGNMENT;
6861 return align;
6862 }
6863
6864 /* Return true when TYPE should be 128bit aligned for 32bit argument
6865 passing ABI. */
6866
6867 static bool
6868 ix86_contains_aligned_value_p (const_tree type)
6869 {
6870 enum machine_mode mode = TYPE_MODE (type);
6871
6872 if (mode == XFmode || mode == XCmode)
6873 return false;
6874
6875 if (TYPE_ALIGN (type) < 128)
6876 return false;
6877
6878 if (AGGREGATE_TYPE_P (type))
6879 {
6880 /* Walk the aggregates recursively. */
6881 switch (TREE_CODE (type))
6882 {
6883 case RECORD_TYPE:
6884 case UNION_TYPE:
6885 case QUAL_UNION_TYPE:
6886 {
6887 tree field;
6888
6889 /* Walk all the structure fields. */
6890 for (field = TYPE_FIELDS (type);
6891 field;
6892 field = DECL_CHAIN (field))
6893 {
6894 if (TREE_CODE (field) == FIELD_DECL
6895 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
6896 return true;
6897 }
6898 break;
6899 }
6900
6901 case ARRAY_TYPE:
6902 /* Just for use if some languages passes arrays by value. */
6903 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
6904 return true;
6905 break;
6906
6907 default:
6908 gcc_unreachable ();
6909 }
6910 }
6911 else
6912 return TYPE_ALIGN (type) >= 128;
6913
6914 return false;
6915 }
6916
6917 /* Gives the alignment boundary, in bits, of an argument with the
6918 specified mode and type. */
6919
6920 static unsigned int
6921 ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
6922 {
6923 unsigned int align;
6924 if (type)
6925 {
6926 /* Since the main variant type is used for call, we convert it to
6927 the main variant type. */
6928 type = TYPE_MAIN_VARIANT (type);
6929 align = TYPE_ALIGN (type);
6930 }
6931 else
6932 align = GET_MODE_ALIGNMENT (mode);
6933 if (align < PARM_BOUNDARY)
6934 align = PARM_BOUNDARY;
6935 else
6936 {
6937 static bool warned;
6938 unsigned int saved_align = align;
6939
6940 if (!TARGET_64BIT)
6941 {
6942 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
6943 if (!type)
6944 {
6945 if (mode == XFmode || mode == XCmode)
6946 align = PARM_BOUNDARY;
6947 }
6948 else if (!ix86_contains_aligned_value_p (type))
6949 align = PARM_BOUNDARY;
6950
6951 if (align < 128)
6952 align = PARM_BOUNDARY;
6953 }
6954
6955 if (warn_psabi
6956 && !warned
6957 && align != ix86_compat_function_arg_boundary (mode, type,
6958 saved_align))
6959 {
6960 warned = true;
6961 inform (input_location,
6962 "The ABI for passing parameters with %d-byte"
6963 " alignment has changed in GCC 4.6",
6964 align / BITS_PER_UNIT);
6965 }
6966 }
6967
6968 return align;
6969 }
6970
6971 /* Return true if N is a possible register number of function value. */
6972
6973 static bool
6974 ix86_function_value_regno_p (const unsigned int regno)
6975 {
6976 switch (regno)
6977 {
6978 case 0:
6979 return true;
6980
6981 case FIRST_FLOAT_REG:
6982 /* TODO: The function should depend on current function ABI but
6983 builtins.c would need updating then. Therefore we use the
6984 default ABI. */
6985 if (TARGET_64BIT && ix86_abi == MS_ABI)
6986 return false;
6987 return TARGET_FLOAT_RETURNS_IN_80387;
6988
6989 case FIRST_SSE_REG:
6990 return TARGET_SSE;
6991
6992 case FIRST_MMX_REG:
6993 if (TARGET_MACHO || TARGET_64BIT)
6994 return false;
6995 return TARGET_MMX;
6996 }
6997
6998 return false;
6999 }
7000
7001 /* Define how to find the value returned by a function.
7002 VALTYPE is the data type of the value (as a tree).
7003 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7004 otherwise, FUNC is 0. */
7005
7006 static rtx
7007 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
7008 const_tree fntype, const_tree fn)
7009 {
7010 unsigned int regno;
7011
7012 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7013 we normally prevent this case when mmx is not available. However
7014 some ABIs may require the result to be returned like DImode. */
7015 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7016 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
7017
7018 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7019 we prevent this case when sse is not available. However some ABIs
7020 may require the result to be returned like integer TImode. */
7021 else if (mode == TImode
7022 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7023 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
7024
7025 /* 32-byte vector modes in %ymm0. */
7026 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
7027 regno = TARGET_AVX ? FIRST_SSE_REG : 0;
7028
7029 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7030 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
7031 regno = FIRST_FLOAT_REG;
7032 else
7033 /* Most things go in %eax. */
7034 regno = AX_REG;
7035
7036 /* Override FP return register with %xmm0 for local functions when
7037 SSE math is enabled or for functions with sseregparm attribute. */
7038 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
7039 {
7040 int sse_level = ix86_function_sseregparm (fntype, fn, false);
7041 if ((sse_level >= 1 && mode == SFmode)
7042 || (sse_level == 2 && mode == DFmode))
7043 regno = FIRST_SSE_REG;
7044 }
7045
7046 /* OImode shouldn't be used directly. */
7047 gcc_assert (mode != OImode);
7048
7049 return gen_rtx_REG (orig_mode, regno);
7050 }
7051
7052 static rtx
7053 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
7054 const_tree valtype)
7055 {
7056 rtx ret;
7057
7058 /* Handle libcalls, which don't provide a type node. */
7059 if (valtype == NULL)
7060 {
7061 switch (mode)
7062 {
7063 case SFmode:
7064 case SCmode:
7065 case DFmode:
7066 case DCmode:
7067 case TFmode:
7068 case SDmode:
7069 case DDmode:
7070 case TDmode:
7071 return gen_rtx_REG (mode, FIRST_SSE_REG);
7072 case XFmode:
7073 case XCmode:
7074 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
7075 case TCmode:
7076 return NULL;
7077 default:
7078 return gen_rtx_REG (mode, AX_REG);
7079 }
7080 }
7081 else if (POINTER_TYPE_P (valtype))
7082 {
7083 /* Pointers are always returned in Pmode. */
7084 mode = Pmode;
7085 }
7086
7087 ret = construct_container (mode, orig_mode, valtype, 1,
7088 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
7089 x86_64_int_return_registers, 0);
7090
7091 /* For zero sized structures, construct_container returns NULL, but we
7092 need to keep rest of compiler happy by returning meaningful value. */
7093 if (!ret)
7094 ret = gen_rtx_REG (orig_mode, AX_REG);
7095
7096 return ret;
7097 }
7098
7099 static rtx
7100 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
7101 {
7102 unsigned int regno = AX_REG;
7103
7104 if (TARGET_SSE)
7105 {
7106 switch (GET_MODE_SIZE (mode))
7107 {
7108 case 16:
7109 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7110 && !COMPLEX_MODE_P (mode))
7111 regno = FIRST_SSE_REG;
7112 break;
7113 case 8:
7114 case 4:
7115 if (mode == SFmode || mode == DFmode)
7116 regno = FIRST_SSE_REG;
7117 break;
7118 default:
7119 break;
7120 }
7121 }
7122 return gen_rtx_REG (orig_mode, regno);
7123 }
7124
7125 static rtx
7126 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
7127 enum machine_mode orig_mode, enum machine_mode mode)
7128 {
7129 const_tree fn, fntype;
7130
7131 fn = NULL_TREE;
7132 if (fntype_or_decl && DECL_P (fntype_or_decl))
7133 fn = fntype_or_decl;
7134 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
7135
7136 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
7137 return function_value_ms_64 (orig_mode, mode);
7138 else if (TARGET_64BIT)
7139 return function_value_64 (orig_mode, mode, valtype);
7140 else
7141 return function_value_32 (orig_mode, mode, fntype, fn);
7142 }
7143
7144 static rtx
7145 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
7146 bool outgoing ATTRIBUTE_UNUSED)
7147 {
7148 enum machine_mode mode, orig_mode;
7149
7150 orig_mode = TYPE_MODE (valtype);
7151 mode = type_natural_mode (valtype, NULL);
7152 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
7153 }
7154
7155 /* Pointer function arguments and return values are promoted to Pmode. */
7156
7157 static enum machine_mode
7158 ix86_promote_function_mode (const_tree type, enum machine_mode mode,
7159 int *punsignedp, const_tree fntype,
7160 int for_return)
7161 {
7162 if (type != NULL_TREE && POINTER_TYPE_P (type))
7163 {
7164 *punsignedp = POINTERS_EXTEND_UNSIGNED;
7165 return Pmode;
7166 }
7167 return default_promote_function_mode (type, mode, punsignedp, fntype,
7168 for_return);
7169 }
7170
7171 rtx
7172 ix86_libcall_value (enum machine_mode mode)
7173 {
7174 return ix86_function_value_1 (NULL, NULL, mode, mode);
7175 }
7176
7177 /* Return true iff type is returned in memory. */
7178
7179 static bool ATTRIBUTE_UNUSED
7180 return_in_memory_32 (const_tree type, enum machine_mode mode)
7181 {
7182 HOST_WIDE_INT size;
7183
7184 if (mode == BLKmode)
7185 return true;
7186
7187 size = int_size_in_bytes (type);
7188
7189 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
7190 return false;
7191
7192 if (VECTOR_MODE_P (mode) || mode == TImode)
7193 {
7194 /* User-created vectors small enough to fit in EAX. */
7195 if (size < 8)
7196 return false;
7197
7198 /* MMX/3dNow values are returned in MM0,
7199 except when it doesn't exits or the ABI prescribes otherwise. */
7200 if (size == 8)
7201 return !TARGET_MMX || TARGET_VECT8_RETURNS;
7202
7203 /* SSE values are returned in XMM0, except when it doesn't exist. */
7204 if (size == 16)
7205 return !TARGET_SSE;
7206
7207 /* AVX values are returned in YMM0, except when it doesn't exist. */
7208 if (size == 32)
7209 return !TARGET_AVX;
7210 }
7211
7212 if (mode == XFmode)
7213 return false;
7214
7215 if (size > 12)
7216 return true;
7217
7218 /* OImode shouldn't be used directly. */
7219 gcc_assert (mode != OImode);
7220
7221 return false;
7222 }
7223
7224 static bool ATTRIBUTE_UNUSED
7225 return_in_memory_64 (const_tree type, enum machine_mode mode)
7226 {
7227 int needed_intregs, needed_sseregs;
7228 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
7229 }
7230
7231 static bool ATTRIBUTE_UNUSED
7232 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
7233 {
7234 HOST_WIDE_INT size = int_size_in_bytes (type);
7235
7236 /* __m128 is returned in xmm0. */
7237 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7238 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
7239 return false;
7240
7241 /* Otherwise, the size must be exactly in [1248]. */
7242 return size != 1 && size != 2 && size != 4 && size != 8;
7243 }
7244
7245 static bool
7246 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7247 {
7248 #ifdef SUBTARGET_RETURN_IN_MEMORY
7249 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
7250 #else
7251 const enum machine_mode mode = type_natural_mode (type, NULL);
7252
7253 if (TARGET_64BIT)
7254 {
7255 if (ix86_function_type_abi (fntype) == MS_ABI)
7256 return return_in_memory_ms_64 (type, mode);
7257 else
7258 return return_in_memory_64 (type, mode);
7259 }
7260 else
7261 return return_in_memory_32 (type, mode);
7262 #endif
7263 }
7264
7265 /* When returning SSE vector types, we have a choice of either
7266 (1) being abi incompatible with a -march switch, or
7267 (2) generating an error.
7268 Given no good solution, I think the safest thing is one warning.
7269 The user won't be able to use -Werror, but....
7270
7271 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
7272 called in response to actually generating a caller or callee that
7273 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
7274 via aggregate_value_p for general type probing from tree-ssa. */
7275
7276 static rtx
7277 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
7278 {
7279 static bool warnedsse, warnedmmx;
7280
7281 if (!TARGET_64BIT && type)
7282 {
7283 /* Look at the return type of the function, not the function type. */
7284 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
7285
7286 if (!TARGET_SSE && !warnedsse)
7287 {
7288 if (mode == TImode
7289 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7290 {
7291 warnedsse = true;
7292 warning (0, "SSE vector return without SSE enabled "
7293 "changes the ABI");
7294 }
7295 }
7296
7297 if (!TARGET_MMX && !warnedmmx)
7298 {
7299 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7300 {
7301 warnedmmx = true;
7302 warning (0, "MMX vector return without MMX enabled "
7303 "changes the ABI");
7304 }
7305 }
7306 }
7307
7308 return NULL;
7309 }
7310
7311 \f
7312 /* Create the va_list data type. */
7313
7314 /* Returns the calling convention specific va_list date type.
7315 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
7316
7317 static tree
7318 ix86_build_builtin_va_list_abi (enum calling_abi abi)
7319 {
7320 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
7321
7322 /* For i386 we use plain pointer to argument area. */
7323 if (!TARGET_64BIT || abi == MS_ABI)
7324 return build_pointer_type (char_type_node);
7325
7326 record = lang_hooks.types.make_type (RECORD_TYPE);
7327 type_decl = build_decl (BUILTINS_LOCATION,
7328 TYPE_DECL, get_identifier ("__va_list_tag"), record);
7329
7330 f_gpr = build_decl (BUILTINS_LOCATION,
7331 FIELD_DECL, get_identifier ("gp_offset"),
7332 unsigned_type_node);
7333 f_fpr = build_decl (BUILTINS_LOCATION,
7334 FIELD_DECL, get_identifier ("fp_offset"),
7335 unsigned_type_node);
7336 f_ovf = build_decl (BUILTINS_LOCATION,
7337 FIELD_DECL, get_identifier ("overflow_arg_area"),
7338 ptr_type_node);
7339 f_sav = build_decl (BUILTINS_LOCATION,
7340 FIELD_DECL, get_identifier ("reg_save_area"),
7341 ptr_type_node);
7342
7343 va_list_gpr_counter_field = f_gpr;
7344 va_list_fpr_counter_field = f_fpr;
7345
7346 DECL_FIELD_CONTEXT (f_gpr) = record;
7347 DECL_FIELD_CONTEXT (f_fpr) = record;
7348 DECL_FIELD_CONTEXT (f_ovf) = record;
7349 DECL_FIELD_CONTEXT (f_sav) = record;
7350
7351 TYPE_STUB_DECL (record) = type_decl;
7352 TYPE_NAME (record) = type_decl;
7353 TYPE_FIELDS (record) = f_gpr;
7354 DECL_CHAIN (f_gpr) = f_fpr;
7355 DECL_CHAIN (f_fpr) = f_ovf;
7356 DECL_CHAIN (f_ovf) = f_sav;
7357
7358 layout_type (record);
7359
7360 /* The correct type is an array type of one element. */
7361 return build_array_type (record, build_index_type (size_zero_node));
7362 }
7363
7364 /* Setup the builtin va_list data type and for 64-bit the additional
7365 calling convention specific va_list data types. */
7366
7367 static tree
7368 ix86_build_builtin_va_list (void)
7369 {
7370 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
7371
7372 /* Initialize abi specific va_list builtin types. */
7373 if (TARGET_64BIT)
7374 {
7375 tree t;
7376 if (ix86_abi == MS_ABI)
7377 {
7378 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
7379 if (TREE_CODE (t) != RECORD_TYPE)
7380 t = build_variant_type_copy (t);
7381 sysv_va_list_type_node = t;
7382 }
7383 else
7384 {
7385 t = ret;
7386 if (TREE_CODE (t) != RECORD_TYPE)
7387 t = build_variant_type_copy (t);
7388 sysv_va_list_type_node = t;
7389 }
7390 if (ix86_abi != MS_ABI)
7391 {
7392 t = ix86_build_builtin_va_list_abi (MS_ABI);
7393 if (TREE_CODE (t) != RECORD_TYPE)
7394 t = build_variant_type_copy (t);
7395 ms_va_list_type_node = t;
7396 }
7397 else
7398 {
7399 t = ret;
7400 if (TREE_CODE (t) != RECORD_TYPE)
7401 t = build_variant_type_copy (t);
7402 ms_va_list_type_node = t;
7403 }
7404 }
7405
7406 return ret;
7407 }
7408
7409 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
7410
7411 static void
7412 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
7413 {
7414 rtx save_area, mem;
7415 alias_set_type set;
7416 int i, max;
7417
7418 /* GPR size of varargs save area. */
7419 if (cfun->va_list_gpr_size)
7420 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
7421 else
7422 ix86_varargs_gpr_size = 0;
7423
7424 /* FPR size of varargs save area. We don't need it if we don't pass
7425 anything in SSE registers. */
7426 if (TARGET_SSE && cfun->va_list_fpr_size)
7427 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
7428 else
7429 ix86_varargs_fpr_size = 0;
7430
7431 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
7432 return;
7433
7434 save_area = frame_pointer_rtx;
7435 set = get_varargs_alias_set ();
7436
7437 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
7438 if (max > X86_64_REGPARM_MAX)
7439 max = X86_64_REGPARM_MAX;
7440
7441 for (i = cum->regno; i < max; i++)
7442 {
7443 mem = gen_rtx_MEM (Pmode,
7444 plus_constant (save_area, i * UNITS_PER_WORD));
7445 MEM_NOTRAP_P (mem) = 1;
7446 set_mem_alias_set (mem, set);
7447 emit_move_insn (mem, gen_rtx_REG (Pmode,
7448 x86_64_int_parameter_registers[i]));
7449 }
7450
7451 if (ix86_varargs_fpr_size)
7452 {
7453 enum machine_mode smode;
7454 rtx label, test;
7455
7456 /* Now emit code to save SSE registers. The AX parameter contains number
7457 of SSE parameter registers used to call this function, though all we
7458 actually check here is the zero/non-zero status. */
7459
7460 label = gen_label_rtx ();
7461 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
7462 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
7463 label));
7464
7465 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
7466 we used movdqa (i.e. TImode) instead? Perhaps even better would
7467 be if we could determine the real mode of the data, via a hook
7468 into pass_stdarg. Ignore all that for now. */
7469 smode = V4SFmode;
7470 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
7471 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
7472
7473 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
7474 if (max > X86_64_SSE_REGPARM_MAX)
7475 max = X86_64_SSE_REGPARM_MAX;
7476
7477 for (i = cum->sse_regno; i < max; ++i)
7478 {
7479 mem = plus_constant (save_area, i * 16 + ix86_varargs_gpr_size);
7480 mem = gen_rtx_MEM (smode, mem);
7481 MEM_NOTRAP_P (mem) = 1;
7482 set_mem_alias_set (mem, set);
7483 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
7484
7485 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
7486 }
7487
7488 emit_label (label);
7489 }
7490 }
7491
7492 static void
7493 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
7494 {
7495 alias_set_type set = get_varargs_alias_set ();
7496 int i;
7497
7498 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
7499 {
7500 rtx reg, mem;
7501
7502 mem = gen_rtx_MEM (Pmode,
7503 plus_constant (virtual_incoming_args_rtx,
7504 i * UNITS_PER_WORD));
7505 MEM_NOTRAP_P (mem) = 1;
7506 set_mem_alias_set (mem, set);
7507
7508 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
7509 emit_move_insn (mem, reg);
7510 }
7511 }
7512
7513 static void
7514 ix86_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
7515 tree type, int *pretend_size ATTRIBUTE_UNUSED,
7516 int no_rtl)
7517 {
7518 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7519 CUMULATIVE_ARGS next_cum;
7520 tree fntype;
7521
7522 /* This argument doesn't appear to be used anymore. Which is good,
7523 because the old code here didn't suppress rtl generation. */
7524 gcc_assert (!no_rtl);
7525
7526 if (!TARGET_64BIT)
7527 return;
7528
7529 fntype = TREE_TYPE (current_function_decl);
7530
7531 /* For varargs, we do not want to skip the dummy va_dcl argument.
7532 For stdargs, we do want to skip the last named argument. */
7533 next_cum = *cum;
7534 if (stdarg_p (fntype))
7535 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
7536 true);
7537
7538 if (cum->call_abi == MS_ABI)
7539 setup_incoming_varargs_ms_64 (&next_cum);
7540 else
7541 setup_incoming_varargs_64 (&next_cum);
7542 }
7543
7544 /* Checks if TYPE is of kind va_list char *. */
7545
7546 static bool
7547 is_va_list_char_pointer (tree type)
7548 {
7549 tree canonic;
7550
7551 /* For 32-bit it is always true. */
7552 if (!TARGET_64BIT)
7553 return true;
7554 canonic = ix86_canonical_va_list_type (type);
7555 return (canonic == ms_va_list_type_node
7556 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
7557 }
7558
7559 /* Implement va_start. */
7560
7561 static void
7562 ix86_va_start (tree valist, rtx nextarg)
7563 {
7564 HOST_WIDE_INT words, n_gpr, n_fpr;
7565 tree f_gpr, f_fpr, f_ovf, f_sav;
7566 tree gpr, fpr, ovf, sav, t;
7567 tree type;
7568 rtx ovf_rtx;
7569
7570 if (flag_split_stack
7571 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7572 {
7573 unsigned int scratch_regno;
7574
7575 /* When we are splitting the stack, we can't refer to the stack
7576 arguments using internal_arg_pointer, because they may be on
7577 the old stack. The split stack prologue will arrange to
7578 leave a pointer to the old stack arguments in a scratch
7579 register, which we here copy to a pseudo-register. The split
7580 stack prologue can't set the pseudo-register directly because
7581 it (the prologue) runs before any registers have been saved. */
7582
7583 scratch_regno = split_stack_prologue_scratch_regno ();
7584 if (scratch_regno != INVALID_REGNUM)
7585 {
7586 rtx reg, seq;
7587
7588 reg = gen_reg_rtx (Pmode);
7589 cfun->machine->split_stack_varargs_pointer = reg;
7590
7591 start_sequence ();
7592 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
7593 seq = get_insns ();
7594 end_sequence ();
7595
7596 push_topmost_sequence ();
7597 emit_insn_after (seq, entry_of_function ());
7598 pop_topmost_sequence ();
7599 }
7600 }
7601
7602 /* Only 64bit target needs something special. */
7603 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7604 {
7605 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7606 std_expand_builtin_va_start (valist, nextarg);
7607 else
7608 {
7609 rtx va_r, next;
7610
7611 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
7612 next = expand_binop (ptr_mode, add_optab,
7613 cfun->machine->split_stack_varargs_pointer,
7614 crtl->args.arg_offset_rtx,
7615 NULL_RTX, 0, OPTAB_LIB_WIDEN);
7616 convert_move (va_r, next, 0);
7617 }
7618 return;
7619 }
7620
7621 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7622 f_fpr = DECL_CHAIN (f_gpr);
7623 f_ovf = DECL_CHAIN (f_fpr);
7624 f_sav = DECL_CHAIN (f_ovf);
7625
7626 valist = build_simple_mem_ref (valist);
7627 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
7628 /* The following should be folded into the MEM_REF offset. */
7629 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
7630 f_gpr, NULL_TREE);
7631 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
7632 f_fpr, NULL_TREE);
7633 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
7634 f_ovf, NULL_TREE);
7635 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
7636 f_sav, NULL_TREE);
7637
7638 /* Count number of gp and fp argument registers used. */
7639 words = crtl->args.info.words;
7640 n_gpr = crtl->args.info.regno;
7641 n_fpr = crtl->args.info.sse_regno;
7642
7643 if (cfun->va_list_gpr_size)
7644 {
7645 type = TREE_TYPE (gpr);
7646 t = build2 (MODIFY_EXPR, type,
7647 gpr, build_int_cst (type, n_gpr * 8));
7648 TREE_SIDE_EFFECTS (t) = 1;
7649 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7650 }
7651
7652 if (TARGET_SSE && cfun->va_list_fpr_size)
7653 {
7654 type = TREE_TYPE (fpr);
7655 t = build2 (MODIFY_EXPR, type, fpr,
7656 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
7657 TREE_SIDE_EFFECTS (t) = 1;
7658 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7659 }
7660
7661 /* Find the overflow area. */
7662 type = TREE_TYPE (ovf);
7663 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7664 ovf_rtx = crtl->args.internal_arg_pointer;
7665 else
7666 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
7667 t = make_tree (type, ovf_rtx);
7668 if (words != 0)
7669 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
7670 t = build2 (MODIFY_EXPR, type, ovf, t);
7671 TREE_SIDE_EFFECTS (t) = 1;
7672 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7673
7674 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
7675 {
7676 /* Find the register save area.
7677 Prologue of the function save it right above stack frame. */
7678 type = TREE_TYPE (sav);
7679 t = make_tree (type, frame_pointer_rtx);
7680 if (!ix86_varargs_gpr_size)
7681 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
7682 t = build2 (MODIFY_EXPR, type, sav, t);
7683 TREE_SIDE_EFFECTS (t) = 1;
7684 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7685 }
7686 }
7687
7688 /* Implement va_arg. */
7689
7690 static tree
7691 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7692 gimple_seq *post_p)
7693 {
7694 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
7695 tree f_gpr, f_fpr, f_ovf, f_sav;
7696 tree gpr, fpr, ovf, sav, t;
7697 int size, rsize;
7698 tree lab_false, lab_over = NULL_TREE;
7699 tree addr, t2;
7700 rtx container;
7701 int indirect_p = 0;
7702 tree ptrtype;
7703 enum machine_mode nat_mode;
7704 unsigned int arg_boundary;
7705
7706 /* Only 64bit target needs something special. */
7707 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7708 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
7709
7710 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7711 f_fpr = DECL_CHAIN (f_gpr);
7712 f_ovf = DECL_CHAIN (f_fpr);
7713 f_sav = DECL_CHAIN (f_ovf);
7714
7715 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
7716 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
7717 valist = build_va_arg_indirect_ref (valist);
7718 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
7719 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
7720 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
7721
7722 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
7723 if (indirect_p)
7724 type = build_pointer_type (type);
7725 size = int_size_in_bytes (type);
7726 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7727
7728 nat_mode = type_natural_mode (type, NULL);
7729 switch (nat_mode)
7730 {
7731 case V8SFmode:
7732 case V8SImode:
7733 case V32QImode:
7734 case V16HImode:
7735 case V4DFmode:
7736 case V4DImode:
7737 /* Unnamed 256bit vector mode parameters are passed on stack. */
7738 if (!TARGET_64BIT_MS_ABI)
7739 {
7740 container = NULL;
7741 break;
7742 }
7743
7744 default:
7745 container = construct_container (nat_mode, TYPE_MODE (type),
7746 type, 0, X86_64_REGPARM_MAX,
7747 X86_64_SSE_REGPARM_MAX, intreg,
7748 0);
7749 break;
7750 }
7751
7752 /* Pull the value out of the saved registers. */
7753
7754 addr = create_tmp_var (ptr_type_node, "addr");
7755
7756 if (container)
7757 {
7758 int needed_intregs, needed_sseregs;
7759 bool need_temp;
7760 tree int_addr, sse_addr;
7761
7762 lab_false = create_artificial_label (UNKNOWN_LOCATION);
7763 lab_over = create_artificial_label (UNKNOWN_LOCATION);
7764
7765 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
7766
7767 need_temp = (!REG_P (container)
7768 && ((needed_intregs && TYPE_ALIGN (type) > 64)
7769 || TYPE_ALIGN (type) > 128));
7770
7771 /* In case we are passing structure, verify that it is consecutive block
7772 on the register save area. If not we need to do moves. */
7773 if (!need_temp && !REG_P (container))
7774 {
7775 /* Verify that all registers are strictly consecutive */
7776 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
7777 {
7778 int i;
7779
7780 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7781 {
7782 rtx slot = XVECEXP (container, 0, i);
7783 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
7784 || INTVAL (XEXP (slot, 1)) != i * 16)
7785 need_temp = 1;
7786 }
7787 }
7788 else
7789 {
7790 int i;
7791
7792 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7793 {
7794 rtx slot = XVECEXP (container, 0, i);
7795 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
7796 || INTVAL (XEXP (slot, 1)) != i * 8)
7797 need_temp = 1;
7798 }
7799 }
7800 }
7801 if (!need_temp)
7802 {
7803 int_addr = addr;
7804 sse_addr = addr;
7805 }
7806 else
7807 {
7808 int_addr = create_tmp_var (ptr_type_node, "int_addr");
7809 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
7810 }
7811
7812 /* First ensure that we fit completely in registers. */
7813 if (needed_intregs)
7814 {
7815 t = build_int_cst (TREE_TYPE (gpr),
7816 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
7817 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
7818 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7819 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7820 gimplify_and_add (t, pre_p);
7821 }
7822 if (needed_sseregs)
7823 {
7824 t = build_int_cst (TREE_TYPE (fpr),
7825 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
7826 + X86_64_REGPARM_MAX * 8);
7827 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
7828 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7829 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7830 gimplify_and_add (t, pre_p);
7831 }
7832
7833 /* Compute index to start of area used for integer regs. */
7834 if (needed_intregs)
7835 {
7836 /* int_addr = gpr + sav; */
7837 t = fold_build_pointer_plus (sav, gpr);
7838 gimplify_assign (int_addr, t, pre_p);
7839 }
7840 if (needed_sseregs)
7841 {
7842 /* sse_addr = fpr + sav; */
7843 t = fold_build_pointer_plus (sav, fpr);
7844 gimplify_assign (sse_addr, t, pre_p);
7845 }
7846 if (need_temp)
7847 {
7848 int i, prev_size = 0;
7849 tree temp = create_tmp_var (type, "va_arg_tmp");
7850
7851 /* addr = &temp; */
7852 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
7853 gimplify_assign (addr, t, pre_p);
7854
7855 for (i = 0; i < XVECLEN (container, 0); i++)
7856 {
7857 rtx slot = XVECEXP (container, 0, i);
7858 rtx reg = XEXP (slot, 0);
7859 enum machine_mode mode = GET_MODE (reg);
7860 tree piece_type;
7861 tree addr_type;
7862 tree daddr_type;
7863 tree src_addr, src;
7864 int src_offset;
7865 tree dest_addr, dest;
7866 int cur_size = GET_MODE_SIZE (mode);
7867
7868 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
7869 prev_size = INTVAL (XEXP (slot, 1));
7870 if (prev_size + cur_size > size)
7871 {
7872 cur_size = size - prev_size;
7873 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
7874 if (mode == BLKmode)
7875 mode = QImode;
7876 }
7877 piece_type = lang_hooks.types.type_for_mode (mode, 1);
7878 if (mode == GET_MODE (reg))
7879 addr_type = build_pointer_type (piece_type);
7880 else
7881 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
7882 true);
7883 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
7884 true);
7885
7886 if (SSE_REGNO_P (REGNO (reg)))
7887 {
7888 src_addr = sse_addr;
7889 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
7890 }
7891 else
7892 {
7893 src_addr = int_addr;
7894 src_offset = REGNO (reg) * 8;
7895 }
7896 src_addr = fold_convert (addr_type, src_addr);
7897 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
7898
7899 dest_addr = fold_convert (daddr_type, addr);
7900 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
7901 if (cur_size == GET_MODE_SIZE (mode))
7902 {
7903 src = build_va_arg_indirect_ref (src_addr);
7904 dest = build_va_arg_indirect_ref (dest_addr);
7905
7906 gimplify_assign (dest, src, pre_p);
7907 }
7908 else
7909 {
7910 tree copy
7911 = build_call_expr (implicit_built_in_decls[BUILT_IN_MEMCPY],
7912 3, dest_addr, src_addr,
7913 size_int (cur_size));
7914 gimplify_and_add (copy, pre_p);
7915 }
7916 prev_size += cur_size;
7917 }
7918 }
7919
7920 if (needed_intregs)
7921 {
7922 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
7923 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
7924 gimplify_assign (gpr, t, pre_p);
7925 }
7926
7927 if (needed_sseregs)
7928 {
7929 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
7930 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
7931 gimplify_assign (fpr, t, pre_p);
7932 }
7933
7934 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
7935
7936 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
7937 }
7938
7939 /* ... otherwise out of the overflow area. */
7940
7941 /* When we align parameter on stack for caller, if the parameter
7942 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
7943 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
7944 here with caller. */
7945 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
7946 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
7947 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
7948
7949 /* Care for on-stack alignment if needed. */
7950 if (arg_boundary <= 64 || size == 0)
7951 t = ovf;
7952 else
7953 {
7954 HOST_WIDE_INT align = arg_boundary / 8;
7955 t = fold_build_pointer_plus_hwi (ovf, align - 1);
7956 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
7957 build_int_cst (TREE_TYPE (t), -align));
7958 }
7959
7960 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
7961 gimplify_assign (addr, t, pre_p);
7962
7963 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
7964 gimplify_assign (unshare_expr (ovf), t, pre_p);
7965
7966 if (container)
7967 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
7968
7969 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
7970 addr = fold_convert (ptrtype, addr);
7971
7972 if (indirect_p)
7973 addr = build_va_arg_indirect_ref (addr);
7974 return build_va_arg_indirect_ref (addr);
7975 }
7976 \f
7977 /* Return true if OPNUM's MEM should be matched
7978 in movabs* patterns. */
7979
7980 bool
7981 ix86_check_movabs (rtx insn, int opnum)
7982 {
7983 rtx set, mem;
7984
7985 set = PATTERN (insn);
7986 if (GET_CODE (set) == PARALLEL)
7987 set = XVECEXP (set, 0, 0);
7988 gcc_assert (GET_CODE (set) == SET);
7989 mem = XEXP (set, opnum);
7990 while (GET_CODE (mem) == SUBREG)
7991 mem = SUBREG_REG (mem);
7992 gcc_assert (MEM_P (mem));
7993 return volatile_ok || !MEM_VOLATILE_P (mem);
7994 }
7995 \f
7996 /* Initialize the table of extra 80387 mathematical constants. */
7997
7998 static void
7999 init_ext_80387_constants (void)
8000 {
8001 static const char * cst[5] =
8002 {
8003 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8004 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8005 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8006 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8007 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8008 };
8009 int i;
8010
8011 for (i = 0; i < 5; i++)
8012 {
8013 real_from_string (&ext_80387_constants_table[i], cst[i]);
8014 /* Ensure each constant is rounded to XFmode precision. */
8015 real_convert (&ext_80387_constants_table[i],
8016 XFmode, &ext_80387_constants_table[i]);
8017 }
8018
8019 ext_80387_constants_init = 1;
8020 }
8021
8022 /* Return non-zero if the constant is something that
8023 can be loaded with a special instruction. */
8024
8025 int
8026 standard_80387_constant_p (rtx x)
8027 {
8028 enum machine_mode mode = GET_MODE (x);
8029
8030 REAL_VALUE_TYPE r;
8031
8032 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
8033 return -1;
8034
8035 if (x == CONST0_RTX (mode))
8036 return 1;
8037 if (x == CONST1_RTX (mode))
8038 return 2;
8039
8040 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8041
8042 /* For XFmode constants, try to find a special 80387 instruction when
8043 optimizing for size or on those CPUs that benefit from them. */
8044 if (mode == XFmode
8045 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
8046 {
8047 int i;
8048
8049 if (! ext_80387_constants_init)
8050 init_ext_80387_constants ();
8051
8052 for (i = 0; i < 5; i++)
8053 if (real_identical (&r, &ext_80387_constants_table[i]))
8054 return i + 3;
8055 }
8056
8057 /* Load of the constant -0.0 or -1.0 will be split as
8058 fldz;fchs or fld1;fchs sequence. */
8059 if (real_isnegzero (&r))
8060 return 8;
8061 if (real_identical (&r, &dconstm1))
8062 return 9;
8063
8064 return 0;
8065 }
8066
8067 /* Return the opcode of the special instruction to be used to load
8068 the constant X. */
8069
8070 const char *
8071 standard_80387_constant_opcode (rtx x)
8072 {
8073 switch (standard_80387_constant_p (x))
8074 {
8075 case 1:
8076 return "fldz";
8077 case 2:
8078 return "fld1";
8079 case 3:
8080 return "fldlg2";
8081 case 4:
8082 return "fldln2";
8083 case 5:
8084 return "fldl2e";
8085 case 6:
8086 return "fldl2t";
8087 case 7:
8088 return "fldpi";
8089 case 8:
8090 case 9:
8091 return "#";
8092 default:
8093 gcc_unreachable ();
8094 }
8095 }
8096
8097 /* Return the CONST_DOUBLE representing the 80387 constant that is
8098 loaded by the specified special instruction. The argument IDX
8099 matches the return value from standard_80387_constant_p. */
8100
8101 rtx
8102 standard_80387_constant_rtx (int idx)
8103 {
8104 int i;
8105
8106 if (! ext_80387_constants_init)
8107 init_ext_80387_constants ();
8108
8109 switch (idx)
8110 {
8111 case 3:
8112 case 4:
8113 case 5:
8114 case 6:
8115 case 7:
8116 i = idx - 3;
8117 break;
8118
8119 default:
8120 gcc_unreachable ();
8121 }
8122
8123 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
8124 XFmode);
8125 }
8126
8127 /* Return 1 if X is all 0s and 2 if x is all 1s
8128 in supported SSE vector mode. */
8129
8130 int
8131 standard_sse_constant_p (rtx x)
8132 {
8133 enum machine_mode mode = GET_MODE (x);
8134
8135 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
8136 return 1;
8137 if (vector_all_ones_operand (x, mode))
8138 switch (mode)
8139 {
8140 case V16QImode:
8141 case V8HImode:
8142 case V4SImode:
8143 case V2DImode:
8144 if (TARGET_SSE2)
8145 return 2;
8146 default:
8147 break;
8148 }
8149
8150 return 0;
8151 }
8152
8153 /* Return the opcode of the special instruction to be used to load
8154 the constant X. */
8155
8156 const char *
8157 standard_sse_constant_opcode (rtx insn, rtx x)
8158 {
8159 switch (standard_sse_constant_p (x))
8160 {
8161 case 1:
8162 switch (get_attr_mode (insn))
8163 {
8164 case MODE_TI:
8165 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8166 return "%vpxor\t%0, %d0";
8167 case MODE_V2DF:
8168 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8169 return "%vxorpd\t%0, %d0";
8170 case MODE_V4SF:
8171 return "%vxorps\t%0, %d0";
8172
8173 case MODE_OI:
8174 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8175 return "vpxor\t%x0, %x0, %x0";
8176 case MODE_V4DF:
8177 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8178 return "vxorpd\t%x0, %x0, %x0";
8179 case MODE_V8SF:
8180 return "vxorps\t%x0, %x0, %x0";
8181
8182 default:
8183 break;
8184 }
8185
8186 case 2:
8187 return "%vpcmpeqd\t%0, %d0";
8188 default:
8189 break;
8190 }
8191 gcc_unreachable ();
8192 }
8193
8194 /* Returns true if OP contains a symbol reference */
8195
8196 bool
8197 symbolic_reference_mentioned_p (rtx op)
8198 {
8199 const char *fmt;
8200 int i;
8201
8202 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
8203 return true;
8204
8205 fmt = GET_RTX_FORMAT (GET_CODE (op));
8206 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
8207 {
8208 if (fmt[i] == 'E')
8209 {
8210 int j;
8211
8212 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
8213 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
8214 return true;
8215 }
8216
8217 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
8218 return true;
8219 }
8220
8221 return false;
8222 }
8223
8224 /* Return true if it is appropriate to emit `ret' instructions in the
8225 body of a function. Do this only if the epilogue is simple, needing a
8226 couple of insns. Prior to reloading, we can't tell how many registers
8227 must be saved, so return false then. Return false if there is no frame
8228 marker to de-allocate. */
8229
8230 bool
8231 ix86_can_use_return_insn_p (void)
8232 {
8233 struct ix86_frame frame;
8234
8235 if (! reload_completed || frame_pointer_needed)
8236 return 0;
8237
8238 /* Don't allow more than 32k pop, since that's all we can do
8239 with one instruction. */
8240 if (crtl->args.pops_args && crtl->args.size >= 32768)
8241 return 0;
8242
8243 ix86_compute_frame_layout (&frame);
8244 return (frame.stack_pointer_offset == UNITS_PER_WORD
8245 && (frame.nregs + frame.nsseregs) == 0);
8246 }
8247 \f
8248 /* Value should be nonzero if functions must have frame pointers.
8249 Zero means the frame pointer need not be set up (and parms may
8250 be accessed via the stack pointer) in functions that seem suitable. */
8251
8252 static bool
8253 ix86_frame_pointer_required (void)
8254 {
8255 /* If we accessed previous frames, then the generated code expects
8256 to be able to access the saved ebp value in our frame. */
8257 if (cfun->machine->accesses_prev_frame)
8258 return true;
8259
8260 /* Several x86 os'es need a frame pointer for other reasons,
8261 usually pertaining to setjmp. */
8262 if (SUBTARGET_FRAME_POINTER_REQUIRED)
8263 return true;
8264
8265 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
8266 turns off the frame pointer by default. Turn it back on now if
8267 we've not got a leaf function. */
8268 if (TARGET_OMIT_LEAF_FRAME_POINTER
8269 && (!current_function_is_leaf
8270 || ix86_current_function_calls_tls_descriptor))
8271 return true;
8272
8273 if (crtl->profile && !flag_fentry)
8274 return true;
8275
8276 return false;
8277 }
8278
8279 /* Record that the current function accesses previous call frames. */
8280
8281 void
8282 ix86_setup_frame_addresses (void)
8283 {
8284 cfun->machine->accesses_prev_frame = 1;
8285 }
8286 \f
8287 #ifndef USE_HIDDEN_LINKONCE
8288 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
8289 # define USE_HIDDEN_LINKONCE 1
8290 # else
8291 # define USE_HIDDEN_LINKONCE 0
8292 # endif
8293 #endif
8294
8295 static int pic_labels_used;
8296
8297 /* Fills in the label name that should be used for a pc thunk for
8298 the given register. */
8299
8300 static void
8301 get_pc_thunk_name (char name[32], unsigned int regno)
8302 {
8303 gcc_assert (!TARGET_64BIT);
8304
8305 if (USE_HIDDEN_LINKONCE)
8306 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
8307 else
8308 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
8309 }
8310
8311
8312 /* This function generates code for -fpic that loads %ebx with
8313 the return address of the caller and then returns. */
8314
8315 static void
8316 ix86_code_end (void)
8317 {
8318 rtx xops[2];
8319 int regno;
8320
8321 #ifdef TARGET_SOLARIS
8322 solaris_code_end ();
8323 #endif
8324
8325 for (regno = AX_REG; regno <= SP_REG; regno++)
8326 {
8327 char name[32];
8328 tree decl;
8329
8330 if (!(pic_labels_used & (1 << regno)))
8331 continue;
8332
8333 get_pc_thunk_name (name, regno);
8334
8335 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
8336 get_identifier (name),
8337 build_function_type_list (void_type_node, NULL_TREE));
8338 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
8339 NULL_TREE, void_type_node);
8340 TREE_PUBLIC (decl) = 1;
8341 TREE_STATIC (decl) = 1;
8342
8343 #if TARGET_MACHO
8344 if (TARGET_MACHO)
8345 {
8346 switch_to_section (darwin_sections[text_coal_section]);
8347 fputs ("\t.weak_definition\t", asm_out_file);
8348 assemble_name (asm_out_file, name);
8349 fputs ("\n\t.private_extern\t", asm_out_file);
8350 assemble_name (asm_out_file, name);
8351 putc ('\n', asm_out_file);
8352 ASM_OUTPUT_LABEL (asm_out_file, name);
8353 DECL_WEAK (decl) = 1;
8354 }
8355 else
8356 #endif
8357 if (USE_HIDDEN_LINKONCE)
8358 {
8359 DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
8360
8361 targetm.asm_out.unique_section (decl, 0);
8362 switch_to_section (get_named_section (decl, NULL, 0));
8363
8364 targetm.asm_out.globalize_label (asm_out_file, name);
8365 fputs ("\t.hidden\t", asm_out_file);
8366 assemble_name (asm_out_file, name);
8367 putc ('\n', asm_out_file);
8368 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
8369 }
8370 else
8371 {
8372 switch_to_section (text_section);
8373 ASM_OUTPUT_LABEL (asm_out_file, name);
8374 }
8375
8376 DECL_INITIAL (decl) = make_node (BLOCK);
8377 current_function_decl = decl;
8378 init_function_start (decl);
8379 first_function_block_is_cold = false;
8380 /* Make sure unwind info is emitted for the thunk if needed. */
8381 final_start_function (emit_barrier (), asm_out_file, 1);
8382
8383 /* Pad stack IP move with 4 instructions (two NOPs count
8384 as one instruction). */
8385 if (TARGET_PAD_SHORT_FUNCTION)
8386 {
8387 int i = 8;
8388
8389 while (i--)
8390 fputs ("\tnop\n", asm_out_file);
8391 }
8392
8393 xops[0] = gen_rtx_REG (Pmode, regno);
8394 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
8395 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
8396 fputs ("\tret\n", asm_out_file);
8397 final_end_function ();
8398 init_insn_lengths ();
8399 free_after_compilation (cfun);
8400 set_cfun (NULL);
8401 current_function_decl = NULL;
8402 }
8403
8404 if (flag_split_stack)
8405 file_end_indicate_split_stack ();
8406 }
8407
8408 /* Emit code for the SET_GOT patterns. */
8409
8410 const char *
8411 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
8412 {
8413 rtx xops[3];
8414
8415 xops[0] = dest;
8416
8417 if (TARGET_VXWORKS_RTP && flag_pic)
8418 {
8419 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
8420 xops[2] = gen_rtx_MEM (Pmode,
8421 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
8422 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
8423
8424 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
8425 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
8426 an unadorned address. */
8427 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
8428 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
8429 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
8430 return "";
8431 }
8432
8433 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
8434
8435 if (!flag_pic)
8436 {
8437 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
8438
8439 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
8440
8441 #if TARGET_MACHO
8442 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8443 is what will be referenced by the Mach-O PIC subsystem. */
8444 if (!label)
8445 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8446 #endif
8447
8448 targetm.asm_out.internal_label (asm_out_file, "L",
8449 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
8450 }
8451 else
8452 {
8453 char name[32];
8454 get_pc_thunk_name (name, REGNO (dest));
8455 pic_labels_used |= 1 << REGNO (dest);
8456
8457 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
8458 xops[2] = gen_rtx_MEM (QImode, xops[2]);
8459 output_asm_insn ("call\t%X2", xops);
8460 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8461 is what will be referenced by the Mach-O PIC subsystem. */
8462 #if TARGET_MACHO
8463 if (!label)
8464 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8465 else
8466 targetm.asm_out.internal_label (asm_out_file, "L",
8467 CODE_LABEL_NUMBER (label));
8468 #endif
8469 }
8470
8471 if (!TARGET_MACHO)
8472 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
8473
8474 return "";
8475 }
8476
8477 /* Generate an "push" pattern for input ARG. */
8478
8479 static rtx
8480 gen_push (rtx arg)
8481 {
8482 struct machine_function *m = cfun->machine;
8483
8484 if (m->fs.cfa_reg == stack_pointer_rtx)
8485 m->fs.cfa_offset += UNITS_PER_WORD;
8486 m->fs.sp_offset += UNITS_PER_WORD;
8487
8488 return gen_rtx_SET (VOIDmode,
8489 gen_rtx_MEM (Pmode,
8490 gen_rtx_PRE_DEC (Pmode,
8491 stack_pointer_rtx)),
8492 arg);
8493 }
8494
8495 /* Generate an "pop" pattern for input ARG. */
8496
8497 static rtx
8498 gen_pop (rtx arg)
8499 {
8500 return gen_rtx_SET (VOIDmode,
8501 arg,
8502 gen_rtx_MEM (Pmode,
8503 gen_rtx_POST_INC (Pmode,
8504 stack_pointer_rtx)));
8505 }
8506
8507 /* Return >= 0 if there is an unused call-clobbered register available
8508 for the entire function. */
8509
8510 static unsigned int
8511 ix86_select_alt_pic_regnum (void)
8512 {
8513 if (current_function_is_leaf
8514 && !crtl->profile
8515 && !ix86_current_function_calls_tls_descriptor)
8516 {
8517 int i, drap;
8518 /* Can't use the same register for both PIC and DRAP. */
8519 if (crtl->drap_reg)
8520 drap = REGNO (crtl->drap_reg);
8521 else
8522 drap = -1;
8523 for (i = 2; i >= 0; --i)
8524 if (i != drap && !df_regs_ever_live_p (i))
8525 return i;
8526 }
8527
8528 return INVALID_REGNUM;
8529 }
8530
8531 /* Return TRUE if we need to save REGNO. */
8532
8533 static bool
8534 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
8535 {
8536 if (pic_offset_table_rtx
8537 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
8538 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
8539 || crtl->profile
8540 || crtl->calls_eh_return
8541 || crtl->uses_const_pool))
8542 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
8543
8544 if (crtl->calls_eh_return && maybe_eh_return)
8545 {
8546 unsigned i;
8547 for (i = 0; ; i++)
8548 {
8549 unsigned test = EH_RETURN_DATA_REGNO (i);
8550 if (test == INVALID_REGNUM)
8551 break;
8552 if (test == regno)
8553 return true;
8554 }
8555 }
8556
8557 if (crtl->drap_reg && regno == REGNO (crtl->drap_reg))
8558 return true;
8559
8560 return (df_regs_ever_live_p (regno)
8561 && !call_used_regs[regno]
8562 && !fixed_regs[regno]
8563 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
8564 }
8565
8566 /* Return number of saved general prupose registers. */
8567
8568 static int
8569 ix86_nsaved_regs (void)
8570 {
8571 int nregs = 0;
8572 int regno;
8573
8574 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8575 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8576 nregs ++;
8577 return nregs;
8578 }
8579
8580 /* Return number of saved SSE registrers. */
8581
8582 static int
8583 ix86_nsaved_sseregs (void)
8584 {
8585 int nregs = 0;
8586 int regno;
8587
8588 if (!TARGET_64BIT_MS_ABI)
8589 return 0;
8590 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8591 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8592 nregs ++;
8593 return nregs;
8594 }
8595
8596 /* Given FROM and TO register numbers, say whether this elimination is
8597 allowed. If stack alignment is needed, we can only replace argument
8598 pointer with hard frame pointer, or replace frame pointer with stack
8599 pointer. Otherwise, frame pointer elimination is automatically
8600 handled and all other eliminations are valid. */
8601
8602 static bool
8603 ix86_can_eliminate (const int from, const int to)
8604 {
8605 if (stack_realign_fp)
8606 return ((from == ARG_POINTER_REGNUM
8607 && to == HARD_FRAME_POINTER_REGNUM)
8608 || (from == FRAME_POINTER_REGNUM
8609 && to == STACK_POINTER_REGNUM));
8610 else
8611 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
8612 }
8613
8614 /* Return the offset between two registers, one to be eliminated, and the other
8615 its replacement, at the start of a routine. */
8616
8617 HOST_WIDE_INT
8618 ix86_initial_elimination_offset (int from, int to)
8619 {
8620 struct ix86_frame frame;
8621 ix86_compute_frame_layout (&frame);
8622
8623 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8624 return frame.hard_frame_pointer_offset;
8625 else if (from == FRAME_POINTER_REGNUM
8626 && to == HARD_FRAME_POINTER_REGNUM)
8627 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
8628 else
8629 {
8630 gcc_assert (to == STACK_POINTER_REGNUM);
8631
8632 if (from == ARG_POINTER_REGNUM)
8633 return frame.stack_pointer_offset;
8634
8635 gcc_assert (from == FRAME_POINTER_REGNUM);
8636 return frame.stack_pointer_offset - frame.frame_pointer_offset;
8637 }
8638 }
8639
8640 /* In a dynamically-aligned function, we can't know the offset from
8641 stack pointer to frame pointer, so we must ensure that setjmp
8642 eliminates fp against the hard fp (%ebp) rather than trying to
8643 index from %esp up to the top of the frame across a gap that is
8644 of unknown (at compile-time) size. */
8645 static rtx
8646 ix86_builtin_setjmp_frame_value (void)
8647 {
8648 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
8649 }
8650
8651 /* When using -fsplit-stack, the allocation routines set a field in
8652 the TCB to the bottom of the stack plus this much space, measured
8653 in bytes. */
8654
8655 #define SPLIT_STACK_AVAILABLE 256
8656
8657 /* Fill structure ix86_frame about frame of currently computed function. */
8658
8659 static void
8660 ix86_compute_frame_layout (struct ix86_frame *frame)
8661 {
8662 unsigned int stack_alignment_needed;
8663 HOST_WIDE_INT offset;
8664 unsigned int preferred_alignment;
8665 HOST_WIDE_INT size = get_frame_size ();
8666 HOST_WIDE_INT to_allocate;
8667
8668 frame->nregs = ix86_nsaved_regs ();
8669 frame->nsseregs = ix86_nsaved_sseregs ();
8670
8671 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
8672 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
8673
8674 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
8675 function prologues and leaf. */
8676 if ((TARGET_64BIT_MS_ABI && preferred_alignment < 16)
8677 && (!current_function_is_leaf || cfun->calls_alloca != 0
8678 || ix86_current_function_calls_tls_descriptor))
8679 {
8680 preferred_alignment = 16;
8681 stack_alignment_needed = 16;
8682 crtl->preferred_stack_boundary = 128;
8683 crtl->stack_alignment_needed = 128;
8684 }
8685
8686 gcc_assert (!size || stack_alignment_needed);
8687 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
8688 gcc_assert (preferred_alignment <= stack_alignment_needed);
8689
8690 /* For SEH we have to limit the amount of code movement into the prologue.
8691 At present we do this via a BLOCKAGE, at which point there's very little
8692 scheduling that can be done, which means that there's very little point
8693 in doing anything except PUSHs. */
8694 if (TARGET_SEH)
8695 cfun->machine->use_fast_prologue_epilogue = false;
8696
8697 /* During reload iteration the amount of registers saved can change.
8698 Recompute the value as needed. Do not recompute when amount of registers
8699 didn't change as reload does multiple calls to the function and does not
8700 expect the decision to change within single iteration. */
8701 else if (!optimize_function_for_size_p (cfun)
8702 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
8703 {
8704 int count = frame->nregs;
8705 struct cgraph_node *node = cgraph_get_node (current_function_decl);
8706
8707 cfun->machine->use_fast_prologue_epilogue_nregs = count;
8708
8709 /* The fast prologue uses move instead of push to save registers. This
8710 is significantly longer, but also executes faster as modern hardware
8711 can execute the moves in parallel, but can't do that for push/pop.
8712
8713 Be careful about choosing what prologue to emit: When function takes
8714 many instructions to execute we may use slow version as well as in
8715 case function is known to be outside hot spot (this is known with
8716 feedback only). Weight the size of function by number of registers
8717 to save as it is cheap to use one or two push instructions but very
8718 slow to use many of them. */
8719 if (count)
8720 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
8721 if (node->frequency < NODE_FREQUENCY_NORMAL
8722 || (flag_branch_probabilities
8723 && node->frequency < NODE_FREQUENCY_HOT))
8724 cfun->machine->use_fast_prologue_epilogue = false;
8725 else
8726 cfun->machine->use_fast_prologue_epilogue
8727 = !expensive_function_p (count);
8728 }
8729 if (TARGET_PROLOGUE_USING_MOVE
8730 && cfun->machine->use_fast_prologue_epilogue)
8731 frame->save_regs_using_mov = true;
8732 else
8733 frame->save_regs_using_mov = false;
8734
8735 /* If static stack checking is enabled and done with probes, the registers
8736 need to be saved before allocating the frame. */
8737 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
8738 frame->save_regs_using_mov = false;
8739
8740 /* Skip return address. */
8741 offset = UNITS_PER_WORD;
8742
8743 /* Skip pushed static chain. */
8744 if (ix86_static_chain_on_stack)
8745 offset += UNITS_PER_WORD;
8746
8747 /* Skip saved base pointer. */
8748 if (frame_pointer_needed)
8749 offset += UNITS_PER_WORD;
8750 frame->hfp_save_offset = offset;
8751
8752 /* The traditional frame pointer location is at the top of the frame. */
8753 frame->hard_frame_pointer_offset = offset;
8754
8755 /* Register save area */
8756 offset += frame->nregs * UNITS_PER_WORD;
8757 frame->reg_save_offset = offset;
8758
8759 /* Align and set SSE register save area. */
8760 if (frame->nsseregs)
8761 {
8762 /* The only ABI that has saved SSE registers (Win64) also has a
8763 16-byte aligned default stack, and thus we don't need to be
8764 within the re-aligned local stack frame to save them. */
8765 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
8766 offset = (offset + 16 - 1) & -16;
8767 offset += frame->nsseregs * 16;
8768 }
8769 frame->sse_reg_save_offset = offset;
8770
8771 /* The re-aligned stack starts here. Values before this point are not
8772 directly comparable with values below this point. In order to make
8773 sure that no value happens to be the same before and after, force
8774 the alignment computation below to add a non-zero value. */
8775 if (stack_realign_fp)
8776 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
8777
8778 /* Va-arg area */
8779 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
8780 offset += frame->va_arg_size;
8781
8782 /* Align start of frame for local function. */
8783 if (stack_realign_fp
8784 || offset != frame->sse_reg_save_offset
8785 || size != 0
8786 || !current_function_is_leaf
8787 || cfun->calls_alloca
8788 || ix86_current_function_calls_tls_descriptor)
8789 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
8790
8791 /* Frame pointer points here. */
8792 frame->frame_pointer_offset = offset;
8793
8794 offset += size;
8795
8796 /* Add outgoing arguments area. Can be skipped if we eliminated
8797 all the function calls as dead code.
8798 Skipping is however impossible when function calls alloca. Alloca
8799 expander assumes that last crtl->outgoing_args_size
8800 of stack frame are unused. */
8801 if (ACCUMULATE_OUTGOING_ARGS
8802 && (!current_function_is_leaf || cfun->calls_alloca
8803 || ix86_current_function_calls_tls_descriptor))
8804 {
8805 offset += crtl->outgoing_args_size;
8806 frame->outgoing_arguments_size = crtl->outgoing_args_size;
8807 }
8808 else
8809 frame->outgoing_arguments_size = 0;
8810
8811 /* Align stack boundary. Only needed if we're calling another function
8812 or using alloca. */
8813 if (!current_function_is_leaf || cfun->calls_alloca
8814 || ix86_current_function_calls_tls_descriptor)
8815 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
8816
8817 /* We've reached end of stack frame. */
8818 frame->stack_pointer_offset = offset;
8819
8820 /* Size prologue needs to allocate. */
8821 to_allocate = offset - frame->sse_reg_save_offset;
8822
8823 if ((!to_allocate && frame->nregs <= 1)
8824 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
8825 frame->save_regs_using_mov = false;
8826
8827 if (ix86_using_red_zone ()
8828 && current_function_sp_is_unchanging
8829 && current_function_is_leaf
8830 && !ix86_current_function_calls_tls_descriptor)
8831 {
8832 frame->red_zone_size = to_allocate;
8833 if (frame->save_regs_using_mov)
8834 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
8835 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
8836 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
8837 }
8838 else
8839 frame->red_zone_size = 0;
8840 frame->stack_pointer_offset -= frame->red_zone_size;
8841
8842 /* The SEH frame pointer location is near the bottom of the frame.
8843 This is enforced by the fact that the difference between the
8844 stack pointer and the frame pointer is limited to 240 bytes in
8845 the unwind data structure. */
8846 if (TARGET_SEH)
8847 {
8848 HOST_WIDE_INT diff;
8849
8850 /* If we can leave the frame pointer where it is, do so. */
8851 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
8852 if (diff > 240 || (diff & 15) != 0)
8853 {
8854 /* Ideally we'd determine what portion of the local stack frame
8855 (within the constraint of the lowest 240) is most heavily used.
8856 But without that complication, simply bias the frame pointer
8857 by 128 bytes so as to maximize the amount of the local stack
8858 frame that is addressable with 8-bit offsets. */
8859 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
8860 }
8861 }
8862 }
8863
8864 /* This is semi-inlined memory_address_length, but simplified
8865 since we know that we're always dealing with reg+offset, and
8866 to avoid having to create and discard all that rtl. */
8867
8868 static inline int
8869 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
8870 {
8871 int len = 4;
8872
8873 if (offset == 0)
8874 {
8875 /* EBP and R13 cannot be encoded without an offset. */
8876 len = (regno == BP_REG || regno == R13_REG);
8877 }
8878 else if (IN_RANGE (offset, -128, 127))
8879 len = 1;
8880
8881 /* ESP and R12 must be encoded with a SIB byte. */
8882 if (regno == SP_REG || regno == R12_REG)
8883 len++;
8884
8885 return len;
8886 }
8887
8888 /* Return an RTX that points to CFA_OFFSET within the stack frame.
8889 The valid base registers are taken from CFUN->MACHINE->FS. */
8890
8891 static rtx
8892 choose_baseaddr (HOST_WIDE_INT cfa_offset)
8893 {
8894 const struct machine_function *m = cfun->machine;
8895 rtx base_reg = NULL;
8896 HOST_WIDE_INT base_offset = 0;
8897
8898 if (m->use_fast_prologue_epilogue)
8899 {
8900 /* Choose the base register most likely to allow the most scheduling
8901 opportunities. Generally FP is valid througout the function,
8902 while DRAP must be reloaded within the epilogue. But choose either
8903 over the SP due to increased encoding size. */
8904
8905 if (m->fs.fp_valid)
8906 {
8907 base_reg = hard_frame_pointer_rtx;
8908 base_offset = m->fs.fp_offset - cfa_offset;
8909 }
8910 else if (m->fs.drap_valid)
8911 {
8912 base_reg = crtl->drap_reg;
8913 base_offset = 0 - cfa_offset;
8914 }
8915 else if (m->fs.sp_valid)
8916 {
8917 base_reg = stack_pointer_rtx;
8918 base_offset = m->fs.sp_offset - cfa_offset;
8919 }
8920 }
8921 else
8922 {
8923 HOST_WIDE_INT toffset;
8924 int len = 16, tlen;
8925
8926 /* Choose the base register with the smallest address encoding.
8927 With a tie, choose FP > DRAP > SP. */
8928 if (m->fs.sp_valid)
8929 {
8930 base_reg = stack_pointer_rtx;
8931 base_offset = m->fs.sp_offset - cfa_offset;
8932 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
8933 }
8934 if (m->fs.drap_valid)
8935 {
8936 toffset = 0 - cfa_offset;
8937 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
8938 if (tlen <= len)
8939 {
8940 base_reg = crtl->drap_reg;
8941 base_offset = toffset;
8942 len = tlen;
8943 }
8944 }
8945 if (m->fs.fp_valid)
8946 {
8947 toffset = m->fs.fp_offset - cfa_offset;
8948 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
8949 if (tlen <= len)
8950 {
8951 base_reg = hard_frame_pointer_rtx;
8952 base_offset = toffset;
8953 len = tlen;
8954 }
8955 }
8956 }
8957 gcc_assert (base_reg != NULL);
8958
8959 return plus_constant (base_reg, base_offset);
8960 }
8961
8962 /* Emit code to save registers in the prologue. */
8963
8964 static void
8965 ix86_emit_save_regs (void)
8966 {
8967 unsigned int regno;
8968 rtx insn;
8969
8970 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
8971 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8972 {
8973 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
8974 RTX_FRAME_RELATED_P (insn) = 1;
8975 }
8976 }
8977
8978 /* Emit a single register save at CFA - CFA_OFFSET. */
8979
8980 static void
8981 ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno,
8982 HOST_WIDE_INT cfa_offset)
8983 {
8984 struct machine_function *m = cfun->machine;
8985 rtx reg = gen_rtx_REG (mode, regno);
8986 rtx mem, addr, base, insn;
8987
8988 addr = choose_baseaddr (cfa_offset);
8989 mem = gen_frame_mem (mode, addr);
8990
8991 /* For SSE saves, we need to indicate the 128-bit alignment. */
8992 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
8993
8994 insn = emit_move_insn (mem, reg);
8995 RTX_FRAME_RELATED_P (insn) = 1;
8996
8997 base = addr;
8998 if (GET_CODE (base) == PLUS)
8999 base = XEXP (base, 0);
9000 gcc_checking_assert (REG_P (base));
9001
9002 /* When saving registers into a re-aligned local stack frame, avoid
9003 any tricky guessing by dwarf2out. */
9004 if (m->fs.realigned)
9005 {
9006 gcc_checking_assert (stack_realign_drap);
9007
9008 if (regno == REGNO (crtl->drap_reg))
9009 {
9010 /* A bit of a hack. We force the DRAP register to be saved in
9011 the re-aligned stack frame, which provides us with a copy
9012 of the CFA that will last past the prologue. Install it. */
9013 gcc_checking_assert (cfun->machine->fs.fp_valid);
9014 addr = plus_constant (hard_frame_pointer_rtx,
9015 cfun->machine->fs.fp_offset - cfa_offset);
9016 mem = gen_rtx_MEM (mode, addr);
9017 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
9018 }
9019 else
9020 {
9021 /* The frame pointer is a stable reference within the
9022 aligned frame. Use it. */
9023 gcc_checking_assert (cfun->machine->fs.fp_valid);
9024 addr = plus_constant (hard_frame_pointer_rtx,
9025 cfun->machine->fs.fp_offset - cfa_offset);
9026 mem = gen_rtx_MEM (mode, addr);
9027 add_reg_note (insn, REG_CFA_EXPRESSION,
9028 gen_rtx_SET (VOIDmode, mem, reg));
9029 }
9030 }
9031
9032 /* The memory may not be relative to the current CFA register,
9033 which means that we may need to generate a new pattern for
9034 use by the unwind info. */
9035 else if (base != m->fs.cfa_reg)
9036 {
9037 addr = plus_constant (m->fs.cfa_reg, m->fs.cfa_offset - cfa_offset);
9038 mem = gen_rtx_MEM (mode, addr);
9039 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
9040 }
9041 }
9042
9043 /* Emit code to save registers using MOV insns.
9044 First register is stored at CFA - CFA_OFFSET. */
9045 static void
9046 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
9047 {
9048 unsigned int regno;
9049
9050 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9051 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9052 {
9053 ix86_emit_save_reg_using_mov (Pmode, regno, cfa_offset);
9054 cfa_offset -= UNITS_PER_WORD;
9055 }
9056 }
9057
9058 /* Emit code to save SSE registers using MOV insns.
9059 First register is stored at CFA - CFA_OFFSET. */
9060 static void
9061 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
9062 {
9063 unsigned int regno;
9064
9065 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9066 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9067 {
9068 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
9069 cfa_offset -= 16;
9070 }
9071 }
9072
9073 static GTY(()) rtx queued_cfa_restores;
9074
9075 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9076 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9077 Don't add the note if the previously saved value will be left untouched
9078 within stack red-zone till return, as unwinders can find the same value
9079 in the register and on the stack. */
9080
9081 static void
9082 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
9083 {
9084 if (cfa_offset <= cfun->machine->fs.red_zone_offset)
9085 return;
9086
9087 if (insn)
9088 {
9089 add_reg_note (insn, REG_CFA_RESTORE, reg);
9090 RTX_FRAME_RELATED_P (insn) = 1;
9091 }
9092 else
9093 queued_cfa_restores
9094 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
9095 }
9096
9097 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9098
9099 static void
9100 ix86_add_queued_cfa_restore_notes (rtx insn)
9101 {
9102 rtx last;
9103 if (!queued_cfa_restores)
9104 return;
9105 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
9106 ;
9107 XEXP (last, 1) = REG_NOTES (insn);
9108 REG_NOTES (insn) = queued_cfa_restores;
9109 queued_cfa_restores = NULL_RTX;
9110 RTX_FRAME_RELATED_P (insn) = 1;
9111 }
9112
9113 /* Expand prologue or epilogue stack adjustment.
9114 The pattern exist to put a dependency on all ebp-based memory accesses.
9115 STYLE should be negative if instructions should be marked as frame related,
9116 zero if %r11 register is live and cannot be freely used and positive
9117 otherwise. */
9118
9119 static void
9120 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
9121 int style, bool set_cfa)
9122 {
9123 struct machine_function *m = cfun->machine;
9124 rtx insn;
9125 bool add_frame_related_expr = false;
9126
9127 if (! TARGET_64BIT)
9128 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
9129 else if (x86_64_immediate_operand (offset, DImode))
9130 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
9131 else
9132 {
9133 rtx tmp;
9134 /* r11 is used by indirect sibcall return as well, set before the
9135 epilogue and used after the epilogue. */
9136 if (style)
9137 tmp = gen_rtx_REG (DImode, R11_REG);
9138 else
9139 {
9140 gcc_assert (src != hard_frame_pointer_rtx
9141 && dest != hard_frame_pointer_rtx);
9142 tmp = hard_frame_pointer_rtx;
9143 }
9144 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
9145 if (style < 0)
9146 add_frame_related_expr = true;
9147
9148 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
9149 }
9150
9151 insn = emit_insn (insn);
9152 if (style >= 0)
9153 ix86_add_queued_cfa_restore_notes (insn);
9154
9155 if (set_cfa)
9156 {
9157 rtx r;
9158
9159 gcc_assert (m->fs.cfa_reg == src);
9160 m->fs.cfa_offset += INTVAL (offset);
9161 m->fs.cfa_reg = dest;
9162
9163 r = gen_rtx_PLUS (Pmode, src, offset);
9164 r = gen_rtx_SET (VOIDmode, dest, r);
9165 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
9166 RTX_FRAME_RELATED_P (insn) = 1;
9167 }
9168 else if (style < 0)
9169 {
9170 RTX_FRAME_RELATED_P (insn) = 1;
9171 if (add_frame_related_expr)
9172 {
9173 rtx r = gen_rtx_PLUS (Pmode, src, offset);
9174 r = gen_rtx_SET (VOIDmode, dest, r);
9175 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
9176 }
9177 }
9178
9179 if (dest == stack_pointer_rtx)
9180 {
9181 HOST_WIDE_INT ooffset = m->fs.sp_offset;
9182 bool valid = m->fs.sp_valid;
9183
9184 if (src == hard_frame_pointer_rtx)
9185 {
9186 valid = m->fs.fp_valid;
9187 ooffset = m->fs.fp_offset;
9188 }
9189 else if (src == crtl->drap_reg)
9190 {
9191 valid = m->fs.drap_valid;
9192 ooffset = 0;
9193 }
9194 else
9195 {
9196 /* Else there are two possibilities: SP itself, which we set
9197 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
9198 taken care of this by hand along the eh_return path. */
9199 gcc_checking_assert (src == stack_pointer_rtx
9200 || offset == const0_rtx);
9201 }
9202
9203 m->fs.sp_offset = ooffset - INTVAL (offset);
9204 m->fs.sp_valid = valid;
9205 }
9206 }
9207
9208 /* Find an available register to be used as dynamic realign argument
9209 pointer regsiter. Such a register will be written in prologue and
9210 used in begin of body, so it must not be
9211 1. parameter passing register.
9212 2. GOT pointer.
9213 We reuse static-chain register if it is available. Otherwise, we
9214 use DI for i386 and R13 for x86-64. We chose R13 since it has
9215 shorter encoding.
9216
9217 Return: the regno of chosen register. */
9218
9219 static unsigned int
9220 find_drap_reg (void)
9221 {
9222 tree decl = cfun->decl;
9223
9224 if (TARGET_64BIT)
9225 {
9226 /* Use R13 for nested function or function need static chain.
9227 Since function with tail call may use any caller-saved
9228 registers in epilogue, DRAP must not use caller-saved
9229 register in such case. */
9230 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
9231 return R13_REG;
9232
9233 return R10_REG;
9234 }
9235 else
9236 {
9237 /* Use DI for nested function or function need static chain.
9238 Since function with tail call may use any caller-saved
9239 registers in epilogue, DRAP must not use caller-saved
9240 register in such case. */
9241 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
9242 return DI_REG;
9243
9244 /* Reuse static chain register if it isn't used for parameter
9245 passing. */
9246 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
9247 {
9248 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
9249 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
9250 return CX_REG;
9251 }
9252 return DI_REG;
9253 }
9254 }
9255
9256 /* Return minimum incoming stack alignment. */
9257
9258 static unsigned int
9259 ix86_minimum_incoming_stack_boundary (bool sibcall)
9260 {
9261 unsigned int incoming_stack_boundary;
9262
9263 /* Prefer the one specified at command line. */
9264 if (ix86_user_incoming_stack_boundary)
9265 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
9266 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
9267 if -mstackrealign is used, it isn't used for sibcall check and
9268 estimated stack alignment is 128bit. */
9269 else if (!sibcall
9270 && !TARGET_64BIT
9271 && ix86_force_align_arg_pointer
9272 && crtl->stack_alignment_estimated == 128)
9273 incoming_stack_boundary = MIN_STACK_BOUNDARY;
9274 else
9275 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
9276
9277 /* Incoming stack alignment can be changed on individual functions
9278 via force_align_arg_pointer attribute. We use the smallest
9279 incoming stack boundary. */
9280 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
9281 && lookup_attribute (ix86_force_align_arg_pointer_string,
9282 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
9283 incoming_stack_boundary = MIN_STACK_BOUNDARY;
9284
9285 /* The incoming stack frame has to be aligned at least at
9286 parm_stack_boundary. */
9287 if (incoming_stack_boundary < crtl->parm_stack_boundary)
9288 incoming_stack_boundary = crtl->parm_stack_boundary;
9289
9290 /* Stack at entrance of main is aligned by runtime. We use the
9291 smallest incoming stack boundary. */
9292 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
9293 && DECL_NAME (current_function_decl)
9294 && MAIN_NAME_P (DECL_NAME (current_function_decl))
9295 && DECL_FILE_SCOPE_P (current_function_decl))
9296 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
9297
9298 return incoming_stack_boundary;
9299 }
9300
9301 /* Update incoming stack boundary and estimated stack alignment. */
9302
9303 static void
9304 ix86_update_stack_boundary (void)
9305 {
9306 ix86_incoming_stack_boundary
9307 = ix86_minimum_incoming_stack_boundary (false);
9308
9309 /* x86_64 vararg needs 16byte stack alignment for register save
9310 area. */
9311 if (TARGET_64BIT
9312 && cfun->stdarg
9313 && crtl->stack_alignment_estimated < 128)
9314 crtl->stack_alignment_estimated = 128;
9315 }
9316
9317 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
9318 needed or an rtx for DRAP otherwise. */
9319
9320 static rtx
9321 ix86_get_drap_rtx (void)
9322 {
9323 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
9324 crtl->need_drap = true;
9325
9326 if (stack_realign_drap)
9327 {
9328 /* Assign DRAP to vDRAP and returns vDRAP */
9329 unsigned int regno = find_drap_reg ();
9330 rtx drap_vreg;
9331 rtx arg_ptr;
9332 rtx seq, insn;
9333
9334 arg_ptr = gen_rtx_REG (Pmode, regno);
9335 crtl->drap_reg = arg_ptr;
9336
9337 start_sequence ();
9338 drap_vreg = copy_to_reg (arg_ptr);
9339 seq = get_insns ();
9340 end_sequence ();
9341
9342 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
9343 if (!optimize)
9344 {
9345 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
9346 RTX_FRAME_RELATED_P (insn) = 1;
9347 }
9348 return drap_vreg;
9349 }
9350 else
9351 return NULL;
9352 }
9353
9354 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
9355
9356 static rtx
9357 ix86_internal_arg_pointer (void)
9358 {
9359 return virtual_incoming_args_rtx;
9360 }
9361
9362 struct scratch_reg {
9363 rtx reg;
9364 bool saved;
9365 };
9366
9367 /* Return a short-lived scratch register for use on function entry.
9368 In 32-bit mode, it is valid only after the registers are saved
9369 in the prologue. This register must be released by means of
9370 release_scratch_register_on_entry once it is dead. */
9371
9372 static void
9373 get_scratch_register_on_entry (struct scratch_reg *sr)
9374 {
9375 int regno;
9376
9377 sr->saved = false;
9378
9379 if (TARGET_64BIT)
9380 {
9381 /* We always use R11 in 64-bit mode. */
9382 regno = R11_REG;
9383 }
9384 else
9385 {
9386 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
9387 bool fastcall_p
9388 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
9389 bool static_chain_p = DECL_STATIC_CHAIN (decl);
9390 int regparm = ix86_function_regparm (fntype, decl);
9391 int drap_regno
9392 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
9393
9394 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
9395 for the static chain register. */
9396 if ((regparm < 1 || (fastcall_p && !static_chain_p))
9397 && drap_regno != AX_REG)
9398 regno = AX_REG;
9399 else if (regparm < 2 && drap_regno != DX_REG)
9400 regno = DX_REG;
9401 /* ecx is the static chain register. */
9402 else if (regparm < 3 && !fastcall_p && !static_chain_p
9403 && drap_regno != CX_REG)
9404 regno = CX_REG;
9405 else if (ix86_save_reg (BX_REG, true))
9406 regno = BX_REG;
9407 /* esi is the static chain register. */
9408 else if (!(regparm == 3 && static_chain_p)
9409 && ix86_save_reg (SI_REG, true))
9410 regno = SI_REG;
9411 else if (ix86_save_reg (DI_REG, true))
9412 regno = DI_REG;
9413 else
9414 {
9415 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
9416 sr->saved = true;
9417 }
9418 }
9419
9420 sr->reg = gen_rtx_REG (Pmode, regno);
9421 if (sr->saved)
9422 {
9423 rtx insn = emit_insn (gen_push (sr->reg));
9424 RTX_FRAME_RELATED_P (insn) = 1;
9425 }
9426 }
9427
9428 /* Release a scratch register obtained from the preceding function. */
9429
9430 static void
9431 release_scratch_register_on_entry (struct scratch_reg *sr)
9432 {
9433 if (sr->saved)
9434 {
9435 rtx x, insn = emit_insn (gen_pop (sr->reg));
9436
9437 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
9438 RTX_FRAME_RELATED_P (insn) = 1;
9439 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
9440 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
9441 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
9442 }
9443 }
9444
9445 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
9446
9447 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
9448
9449 static void
9450 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
9451 {
9452 /* We skip the probe for the first interval + a small dope of 4 words and
9453 probe that many bytes past the specified size to maintain a protection
9454 area at the botton of the stack. */
9455 const int dope = 4 * UNITS_PER_WORD;
9456 rtx size_rtx = GEN_INT (size), last;
9457
9458 /* See if we have a constant small number of probes to generate. If so,
9459 that's the easy case. The run-time loop is made up of 11 insns in the
9460 generic case while the compile-time loop is made up of 3+2*(n-1) insns
9461 for n # of intervals. */
9462 if (size <= 5 * PROBE_INTERVAL)
9463 {
9464 HOST_WIDE_INT i, adjust;
9465 bool first_probe = true;
9466
9467 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
9468 values of N from 1 until it exceeds SIZE. If only one probe is
9469 needed, this will not generate any code. Then adjust and probe
9470 to PROBE_INTERVAL + SIZE. */
9471 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9472 {
9473 if (first_probe)
9474 {
9475 adjust = 2 * PROBE_INTERVAL + dope;
9476 first_probe = false;
9477 }
9478 else
9479 adjust = PROBE_INTERVAL;
9480
9481 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9482 plus_constant (stack_pointer_rtx, -adjust)));
9483 emit_stack_probe (stack_pointer_rtx);
9484 }
9485
9486 if (first_probe)
9487 adjust = size + PROBE_INTERVAL + dope;
9488 else
9489 adjust = size + PROBE_INTERVAL - i;
9490
9491 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9492 plus_constant (stack_pointer_rtx, -adjust)));
9493 emit_stack_probe (stack_pointer_rtx);
9494
9495 /* Adjust back to account for the additional first interval. */
9496 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9497 plus_constant (stack_pointer_rtx,
9498 PROBE_INTERVAL + dope)));
9499 }
9500
9501 /* Otherwise, do the same as above, but in a loop. Note that we must be
9502 extra careful with variables wrapping around because we might be at
9503 the very top (or the very bottom) of the address space and we have
9504 to be able to handle this case properly; in particular, we use an
9505 equality test for the loop condition. */
9506 else
9507 {
9508 HOST_WIDE_INT rounded_size;
9509 struct scratch_reg sr;
9510
9511 get_scratch_register_on_entry (&sr);
9512
9513
9514 /* Step 1: round SIZE to the previous multiple of the interval. */
9515
9516 rounded_size = size & -PROBE_INTERVAL;
9517
9518
9519 /* Step 2: compute initial and final value of the loop counter. */
9520
9521 /* SP = SP_0 + PROBE_INTERVAL. */
9522 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9523 plus_constant (stack_pointer_rtx,
9524 - (PROBE_INTERVAL + dope))));
9525
9526 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
9527 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
9528 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
9529 gen_rtx_PLUS (Pmode, sr.reg,
9530 stack_pointer_rtx)));
9531
9532
9533 /* Step 3: the loop
9534
9535 while (SP != LAST_ADDR)
9536 {
9537 SP = SP + PROBE_INTERVAL
9538 probe at SP
9539 }
9540
9541 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
9542 values of N from 1 until it is equal to ROUNDED_SIZE. */
9543
9544 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
9545
9546
9547 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
9548 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
9549
9550 if (size != rounded_size)
9551 {
9552 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9553 plus_constant (stack_pointer_rtx,
9554 rounded_size - size)));
9555 emit_stack_probe (stack_pointer_rtx);
9556 }
9557
9558 /* Adjust back to account for the additional first interval. */
9559 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9560 plus_constant (stack_pointer_rtx,
9561 PROBE_INTERVAL + dope)));
9562
9563 release_scratch_register_on_entry (&sr);
9564 }
9565
9566 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
9567
9568 /* Even if the stack pointer isn't the CFA register, we need to correctly
9569 describe the adjustments made to it, in particular differentiate the
9570 frame-related ones from the frame-unrelated ones. */
9571 if (size > 0)
9572 {
9573 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
9574 XVECEXP (expr, 0, 0)
9575 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9576 plus_constant (stack_pointer_rtx, -size));
9577 XVECEXP (expr, 0, 1)
9578 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9579 plus_constant (stack_pointer_rtx,
9580 PROBE_INTERVAL + dope + size));
9581 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
9582 RTX_FRAME_RELATED_P (last) = 1;
9583
9584 cfun->machine->fs.sp_offset += size;
9585 }
9586
9587 /* Make sure nothing is scheduled before we are done. */
9588 emit_insn (gen_blockage ());
9589 }
9590
9591 /* Adjust the stack pointer up to REG while probing it. */
9592
9593 const char *
9594 output_adjust_stack_and_probe (rtx reg)
9595 {
9596 static int labelno = 0;
9597 char loop_lab[32], end_lab[32];
9598 rtx xops[2];
9599
9600 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
9601 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
9602
9603 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
9604
9605 /* Jump to END_LAB if SP == LAST_ADDR. */
9606 xops[0] = stack_pointer_rtx;
9607 xops[1] = reg;
9608 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
9609 fputs ("\tje\t", asm_out_file);
9610 assemble_name_raw (asm_out_file, end_lab);
9611 fputc ('\n', asm_out_file);
9612
9613 /* SP = SP + PROBE_INTERVAL. */
9614 xops[1] = GEN_INT (PROBE_INTERVAL);
9615 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
9616
9617 /* Probe at SP. */
9618 xops[1] = const0_rtx;
9619 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
9620
9621 fprintf (asm_out_file, "\tjmp\t");
9622 assemble_name_raw (asm_out_file, loop_lab);
9623 fputc ('\n', asm_out_file);
9624
9625 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
9626
9627 return "";
9628 }
9629
9630 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
9631 inclusive. These are offsets from the current stack pointer. */
9632
9633 static void
9634 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
9635 {
9636 /* See if we have a constant small number of probes to generate. If so,
9637 that's the easy case. The run-time loop is made up of 7 insns in the
9638 generic case while the compile-time loop is made up of n insns for n #
9639 of intervals. */
9640 if (size <= 7 * PROBE_INTERVAL)
9641 {
9642 HOST_WIDE_INT i;
9643
9644 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
9645 it exceeds SIZE. If only one probe is needed, this will not
9646 generate any code. Then probe at FIRST + SIZE. */
9647 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9648 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + i)));
9649
9650 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + size)));
9651 }
9652
9653 /* Otherwise, do the same as above, but in a loop. Note that we must be
9654 extra careful with variables wrapping around because we might be at
9655 the very top (or the very bottom) of the address space and we have
9656 to be able to handle this case properly; in particular, we use an
9657 equality test for the loop condition. */
9658 else
9659 {
9660 HOST_WIDE_INT rounded_size, last;
9661 struct scratch_reg sr;
9662
9663 get_scratch_register_on_entry (&sr);
9664
9665
9666 /* Step 1: round SIZE to the previous multiple of the interval. */
9667
9668 rounded_size = size & -PROBE_INTERVAL;
9669
9670
9671 /* Step 2: compute initial and final value of the loop counter. */
9672
9673 /* TEST_OFFSET = FIRST. */
9674 emit_move_insn (sr.reg, GEN_INT (-first));
9675
9676 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
9677 last = first + rounded_size;
9678
9679
9680 /* Step 3: the loop
9681
9682 while (TEST_ADDR != LAST_ADDR)
9683 {
9684 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
9685 probe at TEST_ADDR
9686 }
9687
9688 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
9689 until it is equal to ROUNDED_SIZE. */
9690
9691 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
9692
9693
9694 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
9695 that SIZE is equal to ROUNDED_SIZE. */
9696
9697 if (size != rounded_size)
9698 emit_stack_probe (plus_constant (gen_rtx_PLUS (Pmode,
9699 stack_pointer_rtx,
9700 sr.reg),
9701 rounded_size - size));
9702
9703 release_scratch_register_on_entry (&sr);
9704 }
9705
9706 /* Make sure nothing is scheduled before we are done. */
9707 emit_insn (gen_blockage ());
9708 }
9709
9710 /* Probe a range of stack addresses from REG to END, inclusive. These are
9711 offsets from the current stack pointer. */
9712
9713 const char *
9714 output_probe_stack_range (rtx reg, rtx end)
9715 {
9716 static int labelno = 0;
9717 char loop_lab[32], end_lab[32];
9718 rtx xops[3];
9719
9720 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
9721 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
9722
9723 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
9724
9725 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
9726 xops[0] = reg;
9727 xops[1] = end;
9728 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
9729 fputs ("\tje\t", asm_out_file);
9730 assemble_name_raw (asm_out_file, end_lab);
9731 fputc ('\n', asm_out_file);
9732
9733 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
9734 xops[1] = GEN_INT (PROBE_INTERVAL);
9735 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
9736
9737 /* Probe at TEST_ADDR. */
9738 xops[0] = stack_pointer_rtx;
9739 xops[1] = reg;
9740 xops[2] = const0_rtx;
9741 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
9742
9743 fprintf (asm_out_file, "\tjmp\t");
9744 assemble_name_raw (asm_out_file, loop_lab);
9745 fputc ('\n', asm_out_file);
9746
9747 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
9748
9749 return "";
9750 }
9751
9752 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
9753 to be generated in correct form. */
9754 static void
9755 ix86_finalize_stack_realign_flags (void)
9756 {
9757 /* Check if stack realign is really needed after reload, and
9758 stores result in cfun */
9759 unsigned int incoming_stack_boundary
9760 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
9761 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
9762 unsigned int stack_realign = (incoming_stack_boundary
9763 < (current_function_is_leaf
9764 ? crtl->max_used_stack_slot_alignment
9765 : crtl->stack_alignment_needed));
9766
9767 if (crtl->stack_realign_finalized)
9768 {
9769 /* After stack_realign_needed is finalized, we can't no longer
9770 change it. */
9771 gcc_assert (crtl->stack_realign_needed == stack_realign);
9772 }
9773 else
9774 {
9775 crtl->stack_realign_needed = stack_realign;
9776 crtl->stack_realign_finalized = true;
9777 }
9778 }
9779
9780 /* Expand the prologue into a bunch of separate insns. */
9781
9782 void
9783 ix86_expand_prologue (void)
9784 {
9785 struct machine_function *m = cfun->machine;
9786 rtx insn, t;
9787 bool pic_reg_used;
9788 struct ix86_frame frame;
9789 HOST_WIDE_INT allocate;
9790 bool int_registers_saved;
9791
9792 ix86_finalize_stack_realign_flags ();
9793
9794 /* DRAP should not coexist with stack_realign_fp */
9795 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
9796
9797 memset (&m->fs, 0, sizeof (m->fs));
9798
9799 /* Initialize CFA state for before the prologue. */
9800 m->fs.cfa_reg = stack_pointer_rtx;
9801 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
9802
9803 /* Track SP offset to the CFA. We continue tracking this after we've
9804 swapped the CFA register away from SP. In the case of re-alignment
9805 this is fudged; we're interested to offsets within the local frame. */
9806 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
9807 m->fs.sp_valid = true;
9808
9809 ix86_compute_frame_layout (&frame);
9810
9811 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
9812 {
9813 /* We should have already generated an error for any use of
9814 ms_hook on a nested function. */
9815 gcc_checking_assert (!ix86_static_chain_on_stack);
9816
9817 /* Check if profiling is active and we shall use profiling before
9818 prologue variant. If so sorry. */
9819 if (crtl->profile && flag_fentry != 0)
9820 sorry ("ms_hook_prologue attribute isn%'t compatible "
9821 "with -mfentry for 32-bit");
9822
9823 /* In ix86_asm_output_function_label we emitted:
9824 8b ff movl.s %edi,%edi
9825 55 push %ebp
9826 8b ec movl.s %esp,%ebp
9827
9828 This matches the hookable function prologue in Win32 API
9829 functions in Microsoft Windows XP Service Pack 2 and newer.
9830 Wine uses this to enable Windows apps to hook the Win32 API
9831 functions provided by Wine.
9832
9833 What that means is that we've already set up the frame pointer. */
9834
9835 if (frame_pointer_needed
9836 && !(crtl->drap_reg && crtl->stack_realign_needed))
9837 {
9838 rtx push, mov;
9839
9840 /* We've decided to use the frame pointer already set up.
9841 Describe this to the unwinder by pretending that both
9842 push and mov insns happen right here.
9843
9844 Putting the unwind info here at the end of the ms_hook
9845 is done so that we can make absolutely certain we get
9846 the required byte sequence at the start of the function,
9847 rather than relying on an assembler that can produce
9848 the exact encoding required.
9849
9850 However it does mean (in the unpatched case) that we have
9851 a 1 insn window where the asynchronous unwind info is
9852 incorrect. However, if we placed the unwind info at
9853 its correct location we would have incorrect unwind info
9854 in the patched case. Which is probably all moot since
9855 I don't expect Wine generates dwarf2 unwind info for the
9856 system libraries that use this feature. */
9857
9858 insn = emit_insn (gen_blockage ());
9859
9860 push = gen_push (hard_frame_pointer_rtx);
9861 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
9862 stack_pointer_rtx);
9863 RTX_FRAME_RELATED_P (push) = 1;
9864 RTX_FRAME_RELATED_P (mov) = 1;
9865
9866 RTX_FRAME_RELATED_P (insn) = 1;
9867 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
9868 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
9869
9870 /* Note that gen_push incremented m->fs.cfa_offset, even
9871 though we didn't emit the push insn here. */
9872 m->fs.cfa_reg = hard_frame_pointer_rtx;
9873 m->fs.fp_offset = m->fs.cfa_offset;
9874 m->fs.fp_valid = true;
9875 }
9876 else
9877 {
9878 /* The frame pointer is not needed so pop %ebp again.
9879 This leaves us with a pristine state. */
9880 emit_insn (gen_pop (hard_frame_pointer_rtx));
9881 }
9882 }
9883
9884 /* The first insn of a function that accepts its static chain on the
9885 stack is to push the register that would be filled in by a direct
9886 call. This insn will be skipped by the trampoline. */
9887 else if (ix86_static_chain_on_stack)
9888 {
9889 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
9890 emit_insn (gen_blockage ());
9891
9892 /* We don't want to interpret this push insn as a register save,
9893 only as a stack adjustment. The real copy of the register as
9894 a save will be done later, if needed. */
9895 t = plus_constant (stack_pointer_rtx, -UNITS_PER_WORD);
9896 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
9897 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
9898 RTX_FRAME_RELATED_P (insn) = 1;
9899 }
9900
9901 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
9902 of DRAP is needed and stack realignment is really needed after reload */
9903 if (stack_realign_drap)
9904 {
9905 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
9906
9907 /* Only need to push parameter pointer reg if it is caller saved. */
9908 if (!call_used_regs[REGNO (crtl->drap_reg)])
9909 {
9910 /* Push arg pointer reg */
9911 insn = emit_insn (gen_push (crtl->drap_reg));
9912 RTX_FRAME_RELATED_P (insn) = 1;
9913 }
9914
9915 /* Grab the argument pointer. */
9916 t = plus_constant (stack_pointer_rtx, m->fs.sp_offset);
9917 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
9918 RTX_FRAME_RELATED_P (insn) = 1;
9919 m->fs.cfa_reg = crtl->drap_reg;
9920 m->fs.cfa_offset = 0;
9921
9922 /* Align the stack. */
9923 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
9924 stack_pointer_rtx,
9925 GEN_INT (-align_bytes)));
9926 RTX_FRAME_RELATED_P (insn) = 1;
9927
9928 /* Replicate the return address on the stack so that return
9929 address can be reached via (argp - 1) slot. This is needed
9930 to implement macro RETURN_ADDR_RTX and intrinsic function
9931 expand_builtin_return_addr etc. */
9932 t = plus_constant (crtl->drap_reg, -UNITS_PER_WORD);
9933 t = gen_frame_mem (Pmode, t);
9934 insn = emit_insn (gen_push (t));
9935 RTX_FRAME_RELATED_P (insn) = 1;
9936
9937 /* For the purposes of frame and register save area addressing,
9938 we've started over with a new frame. */
9939 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
9940 m->fs.realigned = true;
9941 }
9942
9943 if (frame_pointer_needed && !m->fs.fp_valid)
9944 {
9945 /* Note: AT&T enter does NOT have reversed args. Enter is probably
9946 slower on all targets. Also sdb doesn't like it. */
9947 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
9948 RTX_FRAME_RELATED_P (insn) = 1;
9949
9950 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
9951 {
9952 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
9953 RTX_FRAME_RELATED_P (insn) = 1;
9954
9955 if (m->fs.cfa_reg == stack_pointer_rtx)
9956 m->fs.cfa_reg = hard_frame_pointer_rtx;
9957 m->fs.fp_offset = m->fs.sp_offset;
9958 m->fs.fp_valid = true;
9959 }
9960 }
9961
9962 int_registers_saved = (frame.nregs == 0);
9963
9964 if (!int_registers_saved)
9965 {
9966 /* If saving registers via PUSH, do so now. */
9967 if (!frame.save_regs_using_mov)
9968 {
9969 ix86_emit_save_regs ();
9970 int_registers_saved = true;
9971 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
9972 }
9973
9974 /* When using red zone we may start register saving before allocating
9975 the stack frame saving one cycle of the prologue. However, avoid
9976 doing this if we have to probe the stack; at least on x86_64 the
9977 stack probe can turn into a call that clobbers a red zone location. */
9978 else if (ix86_using_red_zone ()
9979 && (! TARGET_STACK_PROBE
9980 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
9981 {
9982 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
9983 int_registers_saved = true;
9984 }
9985 }
9986
9987 if (stack_realign_fp)
9988 {
9989 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
9990 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
9991
9992 /* The computation of the size of the re-aligned stack frame means
9993 that we must allocate the size of the register save area before
9994 performing the actual alignment. Otherwise we cannot guarantee
9995 that there's enough storage above the realignment point. */
9996 if (m->fs.sp_offset != frame.sse_reg_save_offset)
9997 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9998 GEN_INT (m->fs.sp_offset
9999 - frame.sse_reg_save_offset),
10000 -1, false);
10001
10002 /* Align the stack. */
10003 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10004 stack_pointer_rtx,
10005 GEN_INT (-align_bytes)));
10006
10007 /* For the purposes of register save area addressing, the stack
10008 pointer is no longer valid. As for the value of sp_offset,
10009 see ix86_compute_frame_layout, which we need to match in order
10010 to pass verification of stack_pointer_offset at the end. */
10011 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
10012 m->fs.sp_valid = false;
10013 }
10014
10015 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
10016
10017 if (flag_stack_usage_info)
10018 {
10019 /* We start to count from ARG_POINTER. */
10020 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
10021
10022 /* If it was realigned, take into account the fake frame. */
10023 if (stack_realign_drap)
10024 {
10025 if (ix86_static_chain_on_stack)
10026 stack_size += UNITS_PER_WORD;
10027
10028 if (!call_used_regs[REGNO (crtl->drap_reg)])
10029 stack_size += UNITS_PER_WORD;
10030
10031 /* This over-estimates by 1 minimal-stack-alignment-unit but
10032 mitigates that by counting in the new return address slot. */
10033 current_function_dynamic_stack_size
10034 += crtl->stack_alignment_needed / BITS_PER_UNIT;
10035 }
10036
10037 current_function_static_stack_size = stack_size;
10038 }
10039
10040 /* The stack has already been decremented by the instruction calling us
10041 so probe if the size is non-negative to preserve the protection area. */
10042 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
10043 {
10044 /* We expect the registers to be saved when probes are used. */
10045 gcc_assert (int_registers_saved);
10046
10047 if (STACK_CHECK_MOVING_SP)
10048 {
10049 ix86_adjust_stack_and_probe (allocate);
10050 allocate = 0;
10051 }
10052 else
10053 {
10054 HOST_WIDE_INT size = allocate;
10055
10056 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
10057 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
10058
10059 if (TARGET_STACK_PROBE)
10060 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
10061 else
10062 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
10063 }
10064 }
10065
10066 if (allocate == 0)
10067 ;
10068 else if (!ix86_target_stack_probe ()
10069 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
10070 {
10071 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10072 GEN_INT (-allocate), -1,
10073 m->fs.cfa_reg == stack_pointer_rtx);
10074 }
10075 else
10076 {
10077 rtx eax = gen_rtx_REG (Pmode, AX_REG);
10078 rtx r10 = NULL;
10079 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
10080
10081 bool eax_live = false;
10082 bool r10_live = false;
10083
10084 if (TARGET_64BIT)
10085 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
10086 if (!TARGET_64BIT_MS_ABI)
10087 eax_live = ix86_eax_live_at_start_p ();
10088
10089 if (eax_live)
10090 {
10091 emit_insn (gen_push (eax));
10092 allocate -= UNITS_PER_WORD;
10093 }
10094 if (r10_live)
10095 {
10096 r10 = gen_rtx_REG (Pmode, R10_REG);
10097 emit_insn (gen_push (r10));
10098 allocate -= UNITS_PER_WORD;
10099 }
10100
10101 emit_move_insn (eax, GEN_INT (allocate));
10102 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
10103
10104 /* Use the fact that AX still contains ALLOCATE. */
10105 adjust_stack_insn = (TARGET_64BIT
10106 ? gen_pro_epilogue_adjust_stack_di_sub
10107 : gen_pro_epilogue_adjust_stack_si_sub);
10108
10109 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
10110 stack_pointer_rtx, eax));
10111
10112 /* Note that SEH directives need to continue tracking the stack
10113 pointer even after the frame pointer has been set up. */
10114 if (m->fs.cfa_reg == stack_pointer_rtx || TARGET_SEH)
10115 {
10116 if (m->fs.cfa_reg == stack_pointer_rtx)
10117 m->fs.cfa_offset += allocate;
10118
10119 RTX_FRAME_RELATED_P (insn) = 1;
10120 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10121 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10122 plus_constant (stack_pointer_rtx,
10123 -allocate)));
10124 }
10125 m->fs.sp_offset += allocate;
10126
10127 if (r10_live && eax_live)
10128 {
10129 t = choose_baseaddr (m->fs.sp_offset - allocate);
10130 emit_move_insn (r10, gen_frame_mem (Pmode, t));
10131 t = choose_baseaddr (m->fs.sp_offset - allocate - UNITS_PER_WORD);
10132 emit_move_insn (eax, gen_frame_mem (Pmode, t));
10133 }
10134 else if (eax_live || r10_live)
10135 {
10136 t = choose_baseaddr (m->fs.sp_offset - allocate);
10137 emit_move_insn ((eax_live ? eax : r10), gen_frame_mem (Pmode, t));
10138 }
10139 }
10140 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
10141
10142 /* If we havn't already set up the frame pointer, do so now. */
10143 if (frame_pointer_needed && !m->fs.fp_valid)
10144 {
10145 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
10146 GEN_INT (frame.stack_pointer_offset
10147 - frame.hard_frame_pointer_offset));
10148 insn = emit_insn (insn);
10149 RTX_FRAME_RELATED_P (insn) = 1;
10150 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
10151
10152 if (m->fs.cfa_reg == stack_pointer_rtx)
10153 m->fs.cfa_reg = hard_frame_pointer_rtx;
10154 m->fs.fp_offset = frame.hard_frame_pointer_offset;
10155 m->fs.fp_valid = true;
10156 }
10157
10158 if (!int_registers_saved)
10159 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10160 if (frame.nsseregs)
10161 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
10162
10163 pic_reg_used = false;
10164 if (pic_offset_table_rtx
10165 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
10166 || crtl->profile))
10167 {
10168 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
10169
10170 if (alt_pic_reg_used != INVALID_REGNUM)
10171 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
10172
10173 pic_reg_used = true;
10174 }
10175
10176 if (pic_reg_used)
10177 {
10178 if (TARGET_64BIT)
10179 {
10180 if (ix86_cmodel == CM_LARGE_PIC)
10181 {
10182 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
10183 rtx label = gen_label_rtx ();
10184 emit_label (label);
10185 LABEL_PRESERVE_P (label) = 1;
10186 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
10187 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
10188 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
10189 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
10190 pic_offset_table_rtx, tmp_reg));
10191 }
10192 else
10193 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
10194 }
10195 else
10196 {
10197 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
10198 RTX_FRAME_RELATED_P (insn) = 1;
10199 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
10200 }
10201 }
10202
10203 /* In the pic_reg_used case, make sure that the got load isn't deleted
10204 when mcount needs it. Blockage to avoid call movement across mcount
10205 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
10206 note. */
10207 if (crtl->profile && !flag_fentry && pic_reg_used)
10208 emit_insn (gen_prologue_use (pic_offset_table_rtx));
10209
10210 if (crtl->drap_reg && !crtl->stack_realign_needed)
10211 {
10212 /* vDRAP is setup but after reload it turns out stack realign
10213 isn't necessary, here we will emit prologue to setup DRAP
10214 without stack realign adjustment */
10215 t = choose_baseaddr (0);
10216 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10217 }
10218
10219 /* Prevent instructions from being scheduled into register save push
10220 sequence when access to the redzone area is done through frame pointer.
10221 The offset between the frame pointer and the stack pointer is calculated
10222 relative to the value of the stack pointer at the end of the function
10223 prologue, and moving instructions that access redzone area via frame
10224 pointer inside push sequence violates this assumption. */
10225 if (frame_pointer_needed && frame.red_zone_size)
10226 emit_insn (gen_memory_blockage ());
10227
10228 /* Emit cld instruction if stringops are used in the function. */
10229 if (TARGET_CLD && ix86_current_function_needs_cld)
10230 emit_insn (gen_cld ());
10231
10232 /* SEH requires that the prologue end within 256 bytes of the start of
10233 the function. Prevent instruction schedules that would extend that. */
10234 if (TARGET_SEH)
10235 emit_insn (gen_blockage ());
10236 }
10237
10238 /* Emit code to restore REG using a POP insn. */
10239
10240 static void
10241 ix86_emit_restore_reg_using_pop (rtx reg)
10242 {
10243 struct machine_function *m = cfun->machine;
10244 rtx insn = emit_insn (gen_pop (reg));
10245
10246 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
10247 m->fs.sp_offset -= UNITS_PER_WORD;
10248
10249 if (m->fs.cfa_reg == crtl->drap_reg
10250 && REGNO (reg) == REGNO (crtl->drap_reg))
10251 {
10252 /* Previously we'd represented the CFA as an expression
10253 like *(%ebp - 8). We've just popped that value from
10254 the stack, which means we need to reset the CFA to
10255 the drap register. This will remain until we restore
10256 the stack pointer. */
10257 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
10258 RTX_FRAME_RELATED_P (insn) = 1;
10259
10260 /* This means that the DRAP register is valid for addressing too. */
10261 m->fs.drap_valid = true;
10262 return;
10263 }
10264
10265 if (m->fs.cfa_reg == stack_pointer_rtx)
10266 {
10267 rtx x = plus_constant (stack_pointer_rtx, UNITS_PER_WORD);
10268 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10269 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
10270 RTX_FRAME_RELATED_P (insn) = 1;
10271
10272 m->fs.cfa_offset -= UNITS_PER_WORD;
10273 }
10274
10275 /* When the frame pointer is the CFA, and we pop it, we are
10276 swapping back to the stack pointer as the CFA. This happens
10277 for stack frames that don't allocate other data, so we assume
10278 the stack pointer is now pointing at the return address, i.e.
10279 the function entry state, which makes the offset be 1 word. */
10280 if (reg == hard_frame_pointer_rtx)
10281 {
10282 m->fs.fp_valid = false;
10283 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
10284 {
10285 m->fs.cfa_reg = stack_pointer_rtx;
10286 m->fs.cfa_offset -= UNITS_PER_WORD;
10287
10288 add_reg_note (insn, REG_CFA_DEF_CFA,
10289 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10290 GEN_INT (m->fs.cfa_offset)));
10291 RTX_FRAME_RELATED_P (insn) = 1;
10292 }
10293 }
10294 }
10295
10296 /* Emit code to restore saved registers using POP insns. */
10297
10298 static void
10299 ix86_emit_restore_regs_using_pop (void)
10300 {
10301 unsigned int regno;
10302
10303 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10304 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
10305 ix86_emit_restore_reg_using_pop (gen_rtx_REG (Pmode, regno));
10306 }
10307
10308 /* Emit code and notes for the LEAVE instruction. */
10309
10310 static void
10311 ix86_emit_leave (void)
10312 {
10313 struct machine_function *m = cfun->machine;
10314 rtx insn = emit_insn (ix86_gen_leave ());
10315
10316 ix86_add_queued_cfa_restore_notes (insn);
10317
10318 gcc_assert (m->fs.fp_valid);
10319 m->fs.sp_valid = true;
10320 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
10321 m->fs.fp_valid = false;
10322
10323 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
10324 {
10325 m->fs.cfa_reg = stack_pointer_rtx;
10326 m->fs.cfa_offset = m->fs.sp_offset;
10327
10328 add_reg_note (insn, REG_CFA_DEF_CFA,
10329 plus_constant (stack_pointer_rtx, m->fs.sp_offset));
10330 RTX_FRAME_RELATED_P (insn) = 1;
10331 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
10332 m->fs.fp_offset);
10333 }
10334 }
10335
10336 /* Emit code to restore saved registers using MOV insns.
10337 First register is restored from CFA - CFA_OFFSET. */
10338 static void
10339 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
10340 bool maybe_eh_return)
10341 {
10342 struct machine_function *m = cfun->machine;
10343 unsigned int regno;
10344
10345 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10346 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10347 {
10348 rtx reg = gen_rtx_REG (Pmode, regno);
10349 rtx insn, mem;
10350
10351 mem = choose_baseaddr (cfa_offset);
10352 mem = gen_frame_mem (Pmode, mem);
10353 insn = emit_move_insn (reg, mem);
10354
10355 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
10356 {
10357 /* Previously we'd represented the CFA as an expression
10358 like *(%ebp - 8). We've just popped that value from
10359 the stack, which means we need to reset the CFA to
10360 the drap register. This will remain until we restore
10361 the stack pointer. */
10362 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
10363 RTX_FRAME_RELATED_P (insn) = 1;
10364
10365 /* This means that the DRAP register is valid for addressing. */
10366 m->fs.drap_valid = true;
10367 }
10368 else
10369 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10370
10371 cfa_offset -= UNITS_PER_WORD;
10372 }
10373 }
10374
10375 /* Emit code to restore saved registers using MOV insns.
10376 First register is restored from CFA - CFA_OFFSET. */
10377 static void
10378 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
10379 bool maybe_eh_return)
10380 {
10381 unsigned int regno;
10382
10383 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10384 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10385 {
10386 rtx reg = gen_rtx_REG (V4SFmode, regno);
10387 rtx mem;
10388
10389 mem = choose_baseaddr (cfa_offset);
10390 mem = gen_rtx_MEM (V4SFmode, mem);
10391 set_mem_align (mem, 128);
10392 emit_move_insn (reg, mem);
10393
10394 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10395
10396 cfa_offset -= 16;
10397 }
10398 }
10399
10400 /* Restore function stack, frame, and registers. */
10401
10402 void
10403 ix86_expand_epilogue (int style)
10404 {
10405 struct machine_function *m = cfun->machine;
10406 struct machine_frame_state frame_state_save = m->fs;
10407 struct ix86_frame frame;
10408 bool restore_regs_via_mov;
10409 bool using_drap;
10410
10411 ix86_finalize_stack_realign_flags ();
10412 ix86_compute_frame_layout (&frame);
10413
10414 m->fs.sp_valid = (!frame_pointer_needed
10415 || (current_function_sp_is_unchanging
10416 && !stack_realign_fp));
10417 gcc_assert (!m->fs.sp_valid
10418 || m->fs.sp_offset == frame.stack_pointer_offset);
10419
10420 /* The FP must be valid if the frame pointer is present. */
10421 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
10422 gcc_assert (!m->fs.fp_valid
10423 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
10424
10425 /* We must have *some* valid pointer to the stack frame. */
10426 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
10427
10428 /* The DRAP is never valid at this point. */
10429 gcc_assert (!m->fs.drap_valid);
10430
10431 /* See the comment about red zone and frame
10432 pointer usage in ix86_expand_prologue. */
10433 if (frame_pointer_needed && frame.red_zone_size)
10434 emit_insn (gen_memory_blockage ());
10435
10436 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
10437 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
10438
10439 /* Determine the CFA offset of the end of the red-zone. */
10440 m->fs.red_zone_offset = 0;
10441 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
10442 {
10443 /* The red-zone begins below the return address. */
10444 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
10445
10446 /* When the register save area is in the aligned portion of
10447 the stack, determine the maximum runtime displacement that
10448 matches up with the aligned frame. */
10449 if (stack_realign_drap)
10450 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
10451 + UNITS_PER_WORD);
10452 }
10453
10454 /* Special care must be taken for the normal return case of a function
10455 using eh_return: the eax and edx registers are marked as saved, but
10456 not restored along this path. Adjust the save location to match. */
10457 if (crtl->calls_eh_return && style != 2)
10458 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
10459
10460 /* EH_RETURN requires the use of moves to function properly. */
10461 if (crtl->calls_eh_return)
10462 restore_regs_via_mov = true;
10463 /* SEH requires the use of pops to identify the epilogue. */
10464 else if (TARGET_SEH)
10465 restore_regs_via_mov = false;
10466 /* If we're only restoring one register and sp is not valid then
10467 using a move instruction to restore the register since it's
10468 less work than reloading sp and popping the register. */
10469 else if (!m->fs.sp_valid && frame.nregs <= 1)
10470 restore_regs_via_mov = true;
10471 else if (TARGET_EPILOGUE_USING_MOVE
10472 && cfun->machine->use_fast_prologue_epilogue
10473 && (frame.nregs > 1
10474 || m->fs.sp_offset != frame.reg_save_offset))
10475 restore_regs_via_mov = true;
10476 else if (frame_pointer_needed
10477 && !frame.nregs
10478 && m->fs.sp_offset != frame.reg_save_offset)
10479 restore_regs_via_mov = true;
10480 else if (frame_pointer_needed
10481 && TARGET_USE_LEAVE
10482 && cfun->machine->use_fast_prologue_epilogue
10483 && frame.nregs == 1)
10484 restore_regs_via_mov = true;
10485 else
10486 restore_regs_via_mov = false;
10487
10488 if (restore_regs_via_mov || frame.nsseregs)
10489 {
10490 /* Ensure that the entire register save area is addressable via
10491 the stack pointer, if we will restore via sp. */
10492 if (TARGET_64BIT
10493 && m->fs.sp_offset > 0x7fffffff
10494 && !(m->fs.fp_valid || m->fs.drap_valid)
10495 && (frame.nsseregs + frame.nregs) != 0)
10496 {
10497 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10498 GEN_INT (m->fs.sp_offset
10499 - frame.sse_reg_save_offset),
10500 style,
10501 m->fs.cfa_reg == stack_pointer_rtx);
10502 }
10503 }
10504
10505 /* If there are any SSE registers to restore, then we have to do it
10506 via moves, since there's obviously no pop for SSE regs. */
10507 if (frame.nsseregs)
10508 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
10509 style == 2);
10510
10511 if (restore_regs_via_mov)
10512 {
10513 rtx t;
10514
10515 if (frame.nregs)
10516 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
10517
10518 /* eh_return epilogues need %ecx added to the stack pointer. */
10519 if (style == 2)
10520 {
10521 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
10522
10523 /* Stack align doesn't work with eh_return. */
10524 gcc_assert (!stack_realign_drap);
10525 /* Neither does regparm nested functions. */
10526 gcc_assert (!ix86_static_chain_on_stack);
10527
10528 if (frame_pointer_needed)
10529 {
10530 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
10531 t = plus_constant (t, m->fs.fp_offset - UNITS_PER_WORD);
10532 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
10533
10534 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
10535 insn = emit_move_insn (hard_frame_pointer_rtx, t);
10536
10537 /* Note that we use SA as a temporary CFA, as the return
10538 address is at the proper place relative to it. We
10539 pretend this happens at the FP restore insn because
10540 prior to this insn the FP would be stored at the wrong
10541 offset relative to SA, and after this insn we have no
10542 other reasonable register to use for the CFA. We don't
10543 bother resetting the CFA to the SP for the duration of
10544 the return insn. */
10545 add_reg_note (insn, REG_CFA_DEF_CFA,
10546 plus_constant (sa, UNITS_PER_WORD));
10547 ix86_add_queued_cfa_restore_notes (insn);
10548 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
10549 RTX_FRAME_RELATED_P (insn) = 1;
10550
10551 m->fs.cfa_reg = sa;
10552 m->fs.cfa_offset = UNITS_PER_WORD;
10553 m->fs.fp_valid = false;
10554
10555 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
10556 const0_rtx, style, false);
10557 }
10558 else
10559 {
10560 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
10561 t = plus_constant (t, m->fs.sp_offset - UNITS_PER_WORD);
10562 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
10563 ix86_add_queued_cfa_restore_notes (insn);
10564
10565 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
10566 if (m->fs.cfa_offset != UNITS_PER_WORD)
10567 {
10568 m->fs.cfa_offset = UNITS_PER_WORD;
10569 add_reg_note (insn, REG_CFA_DEF_CFA,
10570 plus_constant (stack_pointer_rtx,
10571 UNITS_PER_WORD));
10572 RTX_FRAME_RELATED_P (insn) = 1;
10573 }
10574 }
10575 m->fs.sp_offset = UNITS_PER_WORD;
10576 m->fs.sp_valid = true;
10577 }
10578 }
10579 else
10580 {
10581 /* SEH requires that the function end with (1) a stack adjustment
10582 if necessary, (2) a sequence of pops, and (3) a return or
10583 jump instruction. Prevent insns from the function body from
10584 being scheduled into this sequence. */
10585 if (TARGET_SEH)
10586 {
10587 /* Prevent a catch region from being adjacent to the standard
10588 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
10589 several other flags that would be interesting to test are
10590 not yet set up. */
10591 if (flag_non_call_exceptions)
10592 emit_insn (gen_nops (const1_rtx));
10593 else
10594 emit_insn (gen_blockage ());
10595 }
10596
10597 /* First step is to deallocate the stack frame so that we can
10598 pop the registers. */
10599 if (!m->fs.sp_valid)
10600 {
10601 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
10602 GEN_INT (m->fs.fp_offset
10603 - frame.reg_save_offset),
10604 style, false);
10605 }
10606 else if (m->fs.sp_offset != frame.reg_save_offset)
10607 {
10608 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10609 GEN_INT (m->fs.sp_offset
10610 - frame.reg_save_offset),
10611 style,
10612 m->fs.cfa_reg == stack_pointer_rtx);
10613 }
10614
10615 ix86_emit_restore_regs_using_pop ();
10616 }
10617
10618 /* If we used a stack pointer and haven't already got rid of it,
10619 then do so now. */
10620 if (m->fs.fp_valid)
10621 {
10622 /* If the stack pointer is valid and pointing at the frame
10623 pointer store address, then we only need a pop. */
10624 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
10625 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10626 /* Leave results in shorter dependency chains on CPUs that are
10627 able to grok it fast. */
10628 else if (TARGET_USE_LEAVE
10629 || optimize_function_for_size_p (cfun)
10630 || !cfun->machine->use_fast_prologue_epilogue)
10631 ix86_emit_leave ();
10632 else
10633 {
10634 pro_epilogue_adjust_stack (stack_pointer_rtx,
10635 hard_frame_pointer_rtx,
10636 const0_rtx, style, !using_drap);
10637 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10638 }
10639 }
10640
10641 if (using_drap)
10642 {
10643 int param_ptr_offset = UNITS_PER_WORD;
10644 rtx insn;
10645
10646 gcc_assert (stack_realign_drap);
10647
10648 if (ix86_static_chain_on_stack)
10649 param_ptr_offset += UNITS_PER_WORD;
10650 if (!call_used_regs[REGNO (crtl->drap_reg)])
10651 param_ptr_offset += UNITS_PER_WORD;
10652
10653 insn = emit_insn (gen_rtx_SET
10654 (VOIDmode, stack_pointer_rtx,
10655 gen_rtx_PLUS (Pmode,
10656 crtl->drap_reg,
10657 GEN_INT (-param_ptr_offset))));
10658 m->fs.cfa_reg = stack_pointer_rtx;
10659 m->fs.cfa_offset = param_ptr_offset;
10660 m->fs.sp_offset = param_ptr_offset;
10661 m->fs.realigned = false;
10662
10663 add_reg_note (insn, REG_CFA_DEF_CFA,
10664 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10665 GEN_INT (param_ptr_offset)));
10666 RTX_FRAME_RELATED_P (insn) = 1;
10667
10668 if (!call_used_regs[REGNO (crtl->drap_reg)])
10669 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
10670 }
10671
10672 /* At this point the stack pointer must be valid, and we must have
10673 restored all of the registers. We may not have deallocated the
10674 entire stack frame. We've delayed this until now because it may
10675 be possible to merge the local stack deallocation with the
10676 deallocation forced by ix86_static_chain_on_stack. */
10677 gcc_assert (m->fs.sp_valid);
10678 gcc_assert (!m->fs.fp_valid);
10679 gcc_assert (!m->fs.realigned);
10680 if (m->fs.sp_offset != UNITS_PER_WORD)
10681 {
10682 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10683 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
10684 style, true);
10685 }
10686
10687 /* Sibcall epilogues don't want a return instruction. */
10688 if (style == 0)
10689 {
10690 m->fs = frame_state_save;
10691 return;
10692 }
10693
10694 /* Emit vzeroupper if needed. */
10695 if (TARGET_VZEROUPPER
10696 && !TREE_THIS_VOLATILE (cfun->decl)
10697 && !cfun->machine->caller_return_avx256_p)
10698 emit_insn (gen_avx_vzeroupper (GEN_INT (call_no_avx256)));
10699
10700 if (crtl->args.pops_args && crtl->args.size)
10701 {
10702 rtx popc = GEN_INT (crtl->args.pops_args);
10703
10704 /* i386 can only pop 64K bytes. If asked to pop more, pop return
10705 address, do explicit add, and jump indirectly to the caller. */
10706
10707 if (crtl->args.pops_args >= 65536)
10708 {
10709 rtx ecx = gen_rtx_REG (SImode, CX_REG);
10710 rtx insn;
10711
10712 /* There is no "pascal" calling convention in any 64bit ABI. */
10713 gcc_assert (!TARGET_64BIT);
10714
10715 insn = emit_insn (gen_pop (ecx));
10716 m->fs.cfa_offset -= UNITS_PER_WORD;
10717 m->fs.sp_offset -= UNITS_PER_WORD;
10718
10719 add_reg_note (insn, REG_CFA_ADJUST_CFA,
10720 copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
10721 add_reg_note (insn, REG_CFA_REGISTER,
10722 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
10723 RTX_FRAME_RELATED_P (insn) = 1;
10724
10725 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10726 popc, -1, true);
10727 emit_jump_insn (gen_return_indirect_internal (ecx));
10728 }
10729 else
10730 emit_jump_insn (gen_return_pop_internal (popc));
10731 }
10732 else
10733 emit_jump_insn (gen_return_internal ());
10734
10735 /* Restore the state back to the state from the prologue,
10736 so that it's correct for the next epilogue. */
10737 m->fs = frame_state_save;
10738 }
10739
10740 /* Reset from the function's potential modifications. */
10741
10742 static void
10743 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
10744 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
10745 {
10746 if (pic_offset_table_rtx)
10747 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
10748 #if TARGET_MACHO
10749 /* Mach-O doesn't support labels at the end of objects, so if
10750 it looks like we might want one, insert a NOP. */
10751 {
10752 rtx insn = get_last_insn ();
10753 while (insn
10754 && NOTE_P (insn)
10755 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
10756 insn = PREV_INSN (insn);
10757 if (insn
10758 && (LABEL_P (insn)
10759 || (NOTE_P (insn)
10760 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
10761 fputs ("\tnop\n", file);
10762 }
10763 #endif
10764
10765 }
10766
10767 /* Return a scratch register to use in the split stack prologue. The
10768 split stack prologue is used for -fsplit-stack. It is the first
10769 instructions in the function, even before the regular prologue.
10770 The scratch register can be any caller-saved register which is not
10771 used for parameters or for the static chain. */
10772
10773 static unsigned int
10774 split_stack_prologue_scratch_regno (void)
10775 {
10776 if (TARGET_64BIT)
10777 return R11_REG;
10778 else
10779 {
10780 bool is_fastcall;
10781 int regparm;
10782
10783 is_fastcall = (lookup_attribute ("fastcall",
10784 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
10785 != NULL);
10786 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
10787
10788 if (is_fastcall)
10789 {
10790 if (DECL_STATIC_CHAIN (cfun->decl))
10791 {
10792 sorry ("-fsplit-stack does not support fastcall with "
10793 "nested function");
10794 return INVALID_REGNUM;
10795 }
10796 return AX_REG;
10797 }
10798 else if (regparm < 3)
10799 {
10800 if (!DECL_STATIC_CHAIN (cfun->decl))
10801 return CX_REG;
10802 else
10803 {
10804 if (regparm >= 2)
10805 {
10806 sorry ("-fsplit-stack does not support 2 register "
10807 " parameters for a nested function");
10808 return INVALID_REGNUM;
10809 }
10810 return DX_REG;
10811 }
10812 }
10813 else
10814 {
10815 /* FIXME: We could make this work by pushing a register
10816 around the addition and comparison. */
10817 sorry ("-fsplit-stack does not support 3 register parameters");
10818 return INVALID_REGNUM;
10819 }
10820 }
10821 }
10822
10823 /* A SYMBOL_REF for the function which allocates new stackspace for
10824 -fsplit-stack. */
10825
10826 static GTY(()) rtx split_stack_fn;
10827
10828 /* A SYMBOL_REF for the more stack function when using the large
10829 model. */
10830
10831 static GTY(()) rtx split_stack_fn_large;
10832
10833 /* Handle -fsplit-stack. These are the first instructions in the
10834 function, even before the regular prologue. */
10835
10836 void
10837 ix86_expand_split_stack_prologue (void)
10838 {
10839 struct ix86_frame frame;
10840 HOST_WIDE_INT allocate;
10841 unsigned HOST_WIDE_INT args_size;
10842 rtx label, limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
10843 rtx scratch_reg = NULL_RTX;
10844 rtx varargs_label = NULL_RTX;
10845 rtx fn;
10846
10847 gcc_assert (flag_split_stack && reload_completed);
10848
10849 ix86_finalize_stack_realign_flags ();
10850 ix86_compute_frame_layout (&frame);
10851 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
10852
10853 /* This is the label we will branch to if we have enough stack
10854 space. We expect the basic block reordering pass to reverse this
10855 branch if optimizing, so that we branch in the unlikely case. */
10856 label = gen_label_rtx ();
10857
10858 /* We need to compare the stack pointer minus the frame size with
10859 the stack boundary in the TCB. The stack boundary always gives
10860 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
10861 can compare directly. Otherwise we need to do an addition. */
10862
10863 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
10864 UNSPEC_STACK_CHECK);
10865 limit = gen_rtx_CONST (Pmode, limit);
10866 limit = gen_rtx_MEM (Pmode, limit);
10867 if (allocate < SPLIT_STACK_AVAILABLE)
10868 current = stack_pointer_rtx;
10869 else
10870 {
10871 unsigned int scratch_regno;
10872 rtx offset;
10873
10874 /* We need a scratch register to hold the stack pointer minus
10875 the required frame size. Since this is the very start of the
10876 function, the scratch register can be any caller-saved
10877 register which is not used for parameters. */
10878 offset = GEN_INT (- allocate);
10879 scratch_regno = split_stack_prologue_scratch_regno ();
10880 if (scratch_regno == INVALID_REGNUM)
10881 return;
10882 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
10883 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
10884 {
10885 /* We don't use ix86_gen_add3 in this case because it will
10886 want to split to lea, but when not optimizing the insn
10887 will not be split after this point. */
10888 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
10889 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10890 offset)));
10891 }
10892 else
10893 {
10894 emit_move_insn (scratch_reg, offset);
10895 emit_insn (gen_adddi3 (scratch_reg, scratch_reg,
10896 stack_pointer_rtx));
10897 }
10898 current = scratch_reg;
10899 }
10900
10901 ix86_expand_branch (GEU, current, limit, label);
10902 jump_insn = get_last_insn ();
10903 JUMP_LABEL (jump_insn) = label;
10904
10905 /* Mark the jump as very likely to be taken. */
10906 add_reg_note (jump_insn, REG_BR_PROB,
10907 GEN_INT (REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100));
10908
10909 if (split_stack_fn == NULL_RTX)
10910 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
10911 fn = split_stack_fn;
10912
10913 /* Get more stack space. We pass in the desired stack space and the
10914 size of the arguments to copy to the new stack. In 32-bit mode
10915 we push the parameters; __morestack will return on a new stack
10916 anyhow. In 64-bit mode we pass the parameters in r10 and
10917 r11. */
10918 allocate_rtx = GEN_INT (allocate);
10919 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
10920 call_fusage = NULL_RTX;
10921 if (TARGET_64BIT)
10922 {
10923 rtx reg10, reg11;
10924
10925 reg10 = gen_rtx_REG (Pmode, R10_REG);
10926 reg11 = gen_rtx_REG (Pmode, R11_REG);
10927
10928 /* If this function uses a static chain, it will be in %r10.
10929 Preserve it across the call to __morestack. */
10930 if (DECL_STATIC_CHAIN (cfun->decl))
10931 {
10932 rtx rax;
10933
10934 rax = gen_rtx_REG (Pmode, AX_REG);
10935 emit_move_insn (rax, reg10);
10936 use_reg (&call_fusage, rax);
10937 }
10938
10939 if (ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
10940 {
10941 HOST_WIDE_INT argval;
10942
10943 /* When using the large model we need to load the address
10944 into a register, and we've run out of registers. So we
10945 switch to a different calling convention, and we call a
10946 different function: __morestack_large. We pass the
10947 argument size in the upper 32 bits of r10 and pass the
10948 frame size in the lower 32 bits. */
10949 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
10950 gcc_assert ((args_size & 0xffffffff) == args_size);
10951
10952 if (split_stack_fn_large == NULL_RTX)
10953 split_stack_fn_large =
10954 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
10955
10956 if (ix86_cmodel == CM_LARGE_PIC)
10957 {
10958 rtx label, x;
10959
10960 label = gen_label_rtx ();
10961 emit_label (label);
10962 LABEL_PRESERVE_P (label) = 1;
10963 emit_insn (gen_set_rip_rex64 (reg10, label));
10964 emit_insn (gen_set_got_offset_rex64 (reg11, label));
10965 emit_insn (gen_adddi3 (reg10, reg10, reg11));
10966 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
10967 UNSPEC_GOT);
10968 x = gen_rtx_CONST (Pmode, x);
10969 emit_move_insn (reg11, x);
10970 x = gen_rtx_PLUS (Pmode, reg10, reg11);
10971 x = gen_const_mem (Pmode, x);
10972 emit_move_insn (reg11, x);
10973 }
10974 else
10975 emit_move_insn (reg11, split_stack_fn_large);
10976
10977 fn = reg11;
10978
10979 argval = ((args_size << 16) << 16) + allocate;
10980 emit_move_insn (reg10, GEN_INT (argval));
10981 }
10982 else
10983 {
10984 emit_move_insn (reg10, allocate_rtx);
10985 emit_move_insn (reg11, GEN_INT (args_size));
10986 use_reg (&call_fusage, reg11);
10987 }
10988
10989 use_reg (&call_fusage, reg10);
10990 }
10991 else
10992 {
10993 emit_insn (gen_push (GEN_INT (args_size)));
10994 emit_insn (gen_push (allocate_rtx));
10995 }
10996 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
10997 GEN_INT (UNITS_PER_WORD), constm1_rtx,
10998 NULL_RTX, false);
10999 add_function_usage_to (call_insn, call_fusage);
11000
11001 /* In order to make call/return prediction work right, we now need
11002 to execute a return instruction. See
11003 libgcc/config/i386/morestack.S for the details on how this works.
11004
11005 For flow purposes gcc must not see this as a return
11006 instruction--we need control flow to continue at the subsequent
11007 label. Therefore, we use an unspec. */
11008 gcc_assert (crtl->args.pops_args < 65536);
11009 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
11010
11011 /* If we are in 64-bit mode and this function uses a static chain,
11012 we saved %r10 in %rax before calling _morestack. */
11013 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
11014 emit_move_insn (gen_rtx_REG (Pmode, R10_REG),
11015 gen_rtx_REG (Pmode, AX_REG));
11016
11017 /* If this function calls va_start, we need to store a pointer to
11018 the arguments on the old stack, because they may not have been
11019 all copied to the new stack. At this point the old stack can be
11020 found at the frame pointer value used by __morestack, because
11021 __morestack has set that up before calling back to us. Here we
11022 store that pointer in a scratch register, and in
11023 ix86_expand_prologue we store the scratch register in a stack
11024 slot. */
11025 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11026 {
11027 unsigned int scratch_regno;
11028 rtx frame_reg;
11029 int words;
11030
11031 scratch_regno = split_stack_prologue_scratch_regno ();
11032 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11033 frame_reg = gen_rtx_REG (Pmode, BP_REG);
11034
11035 /* 64-bit:
11036 fp -> old fp value
11037 return address within this function
11038 return address of caller of this function
11039 stack arguments
11040 So we add three words to get to the stack arguments.
11041
11042 32-bit:
11043 fp -> old fp value
11044 return address within this function
11045 first argument to __morestack
11046 second argument to __morestack
11047 return address of caller of this function
11048 stack arguments
11049 So we add five words to get to the stack arguments.
11050 */
11051 words = TARGET_64BIT ? 3 : 5;
11052 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11053 gen_rtx_PLUS (Pmode, frame_reg,
11054 GEN_INT (words * UNITS_PER_WORD))));
11055
11056 varargs_label = gen_label_rtx ();
11057 emit_jump_insn (gen_jump (varargs_label));
11058 JUMP_LABEL (get_last_insn ()) = varargs_label;
11059
11060 emit_barrier ();
11061 }
11062
11063 emit_label (label);
11064 LABEL_NUSES (label) = 1;
11065
11066 /* If this function calls va_start, we now have to set the scratch
11067 register for the case where we do not call __morestack. In this
11068 case we need to set it based on the stack pointer. */
11069 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11070 {
11071 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11072 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11073 GEN_INT (UNITS_PER_WORD))));
11074
11075 emit_label (varargs_label);
11076 LABEL_NUSES (varargs_label) = 1;
11077 }
11078 }
11079
11080 /* We may have to tell the dataflow pass that the split stack prologue
11081 is initializing a scratch register. */
11082
11083 static void
11084 ix86_live_on_entry (bitmap regs)
11085 {
11086 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11087 {
11088 gcc_assert (flag_split_stack);
11089 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
11090 }
11091 }
11092 \f
11093 /* Extract the parts of an RTL expression that is a valid memory address
11094 for an instruction. Return 0 if the structure of the address is
11095 grossly off. Return -1 if the address contains ASHIFT, so it is not
11096 strictly valid, but still used for computing length of lea instruction. */
11097
11098 int
11099 ix86_decompose_address (rtx addr, struct ix86_address *out)
11100 {
11101 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
11102 rtx base_reg, index_reg;
11103 HOST_WIDE_INT scale = 1;
11104 rtx scale_rtx = NULL_RTX;
11105 rtx tmp;
11106 int retval = 1;
11107 enum ix86_address_seg seg = SEG_DEFAULT;
11108
11109 if (REG_P (addr))
11110 base = addr;
11111 else if (GET_CODE (addr) == SUBREG)
11112 {
11113 /* Allow only subregs of DImode hard regs. */
11114 if (register_no_elim_operand (SUBREG_REG (addr), DImode))
11115 base = addr;
11116 else
11117 return 0;
11118 }
11119 else if (GET_CODE (addr) == PLUS)
11120 {
11121 rtx addends[4], op;
11122 int n = 0, i;
11123
11124 op = addr;
11125 do
11126 {
11127 if (n >= 4)
11128 return 0;
11129 addends[n++] = XEXP (op, 1);
11130 op = XEXP (op, 0);
11131 }
11132 while (GET_CODE (op) == PLUS);
11133 if (n >= 4)
11134 return 0;
11135 addends[n] = op;
11136
11137 for (i = n; i >= 0; --i)
11138 {
11139 op = addends[i];
11140 switch (GET_CODE (op))
11141 {
11142 case MULT:
11143 if (index)
11144 return 0;
11145 index = XEXP (op, 0);
11146 scale_rtx = XEXP (op, 1);
11147 break;
11148
11149 case ASHIFT:
11150 if (index)
11151 return 0;
11152 index = XEXP (op, 0);
11153 tmp = XEXP (op, 1);
11154 if (!CONST_INT_P (tmp))
11155 return 0;
11156 scale = INTVAL (tmp);
11157 if ((unsigned HOST_WIDE_INT) scale > 3)
11158 return 0;
11159 scale = 1 << scale;
11160 break;
11161
11162 case UNSPEC:
11163 if (XINT (op, 1) == UNSPEC_TP
11164 && TARGET_TLS_DIRECT_SEG_REFS
11165 && seg == SEG_DEFAULT)
11166 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
11167 else
11168 return 0;
11169 break;
11170
11171 case SUBREG:
11172 /* Allow only subregs of DImode hard regs in PLUS chains. */
11173 if (!register_no_elim_operand (SUBREG_REG (op), DImode))
11174 return 0;
11175 /* FALLTHRU */
11176
11177 case REG:
11178 if (!base)
11179 base = op;
11180 else if (!index)
11181 index = op;
11182 else
11183 return 0;
11184 break;
11185
11186 case CONST:
11187 case CONST_INT:
11188 case SYMBOL_REF:
11189 case LABEL_REF:
11190 if (disp)
11191 return 0;
11192 disp = op;
11193 break;
11194
11195 default:
11196 return 0;
11197 }
11198 }
11199 }
11200 else if (GET_CODE (addr) == MULT)
11201 {
11202 index = XEXP (addr, 0); /* index*scale */
11203 scale_rtx = XEXP (addr, 1);
11204 }
11205 else if (GET_CODE (addr) == ASHIFT)
11206 {
11207 /* We're called for lea too, which implements ashift on occasion. */
11208 index = XEXP (addr, 0);
11209 tmp = XEXP (addr, 1);
11210 if (!CONST_INT_P (tmp))
11211 return 0;
11212 scale = INTVAL (tmp);
11213 if ((unsigned HOST_WIDE_INT) scale > 3)
11214 return 0;
11215 scale = 1 << scale;
11216 retval = -1;
11217 }
11218 else
11219 disp = addr; /* displacement */
11220
11221 if (index)
11222 {
11223 if (REG_P (index))
11224 ;
11225 /* Allow only subregs of DImode hard regs. */
11226 else if (GET_CODE (index) == SUBREG
11227 && register_no_elim_operand (SUBREG_REG (index), DImode))
11228 ;
11229 else
11230 return 0;
11231 }
11232
11233 /* Extract the integral value of scale. */
11234 if (scale_rtx)
11235 {
11236 if (!CONST_INT_P (scale_rtx))
11237 return 0;
11238 scale = INTVAL (scale_rtx);
11239 }
11240
11241 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
11242 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
11243
11244 /* Avoid useless 0 displacement. */
11245 if (disp == const0_rtx && (base || index))
11246 disp = NULL_RTX;
11247
11248 /* Allow arg pointer and stack pointer as index if there is not scaling. */
11249 if (base_reg && index_reg && scale == 1
11250 && (index_reg == arg_pointer_rtx
11251 || index_reg == frame_pointer_rtx
11252 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
11253 {
11254 rtx tmp;
11255 tmp = base, base = index, index = tmp;
11256 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
11257 }
11258
11259 /* Special case: %ebp cannot be encoded as a base without a displacement.
11260 Similarly %r13. */
11261 if (!disp
11262 && base_reg
11263 && (base_reg == hard_frame_pointer_rtx
11264 || base_reg == frame_pointer_rtx
11265 || base_reg == arg_pointer_rtx
11266 || (REG_P (base_reg)
11267 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
11268 || REGNO (base_reg) == R13_REG))))
11269 disp = const0_rtx;
11270
11271 /* Special case: on K6, [%esi] makes the instruction vector decoded.
11272 Avoid this by transforming to [%esi+0].
11273 Reload calls address legitimization without cfun defined, so we need
11274 to test cfun for being non-NULL. */
11275 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
11276 && base_reg && !index_reg && !disp
11277 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
11278 disp = const0_rtx;
11279
11280 /* Special case: encode reg+reg instead of reg*2. */
11281 if (!base && index && scale == 2)
11282 base = index, base_reg = index_reg, scale = 1;
11283
11284 /* Special case: scaling cannot be encoded without base or displacement. */
11285 if (!base && !disp && index && scale != 1)
11286 disp = const0_rtx;
11287
11288 out->base = base;
11289 out->index = index;
11290 out->disp = disp;
11291 out->scale = scale;
11292 out->seg = seg;
11293
11294 return retval;
11295 }
11296 \f
11297 /* Return cost of the memory address x.
11298 For i386, it is better to use a complex address than let gcc copy
11299 the address into a reg and make a new pseudo. But not if the address
11300 requires to two regs - that would mean more pseudos with longer
11301 lifetimes. */
11302 static int
11303 ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
11304 {
11305 struct ix86_address parts;
11306 int cost = 1;
11307 int ok = ix86_decompose_address (x, &parts);
11308
11309 gcc_assert (ok);
11310
11311 if (parts.base && GET_CODE (parts.base) == SUBREG)
11312 parts.base = SUBREG_REG (parts.base);
11313 if (parts.index && GET_CODE (parts.index) == SUBREG)
11314 parts.index = SUBREG_REG (parts.index);
11315
11316 /* Attempt to minimize number of registers in the address. */
11317 if ((parts.base
11318 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
11319 || (parts.index
11320 && (!REG_P (parts.index)
11321 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
11322 cost++;
11323
11324 if (parts.base
11325 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
11326 && parts.index
11327 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
11328 && parts.base != parts.index)
11329 cost++;
11330
11331 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
11332 since it's predecode logic can't detect the length of instructions
11333 and it degenerates to vector decoded. Increase cost of such
11334 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
11335 to split such addresses or even refuse such addresses at all.
11336
11337 Following addressing modes are affected:
11338 [base+scale*index]
11339 [scale*index+disp]
11340 [base+index]
11341
11342 The first and last case may be avoidable by explicitly coding the zero in
11343 memory address, but I don't have AMD-K6 machine handy to check this
11344 theory. */
11345
11346 if (TARGET_K6
11347 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
11348 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
11349 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
11350 cost += 10;
11351
11352 return cost;
11353 }
11354 \f
11355 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
11356 this is used for to form addresses to local data when -fPIC is in
11357 use. */
11358
11359 static bool
11360 darwin_local_data_pic (rtx disp)
11361 {
11362 return (GET_CODE (disp) == UNSPEC
11363 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
11364 }
11365
11366 /* Determine if a given RTX is a valid constant. We already know this
11367 satisfies CONSTANT_P. */
11368
11369 static bool
11370 ix86_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
11371 {
11372 switch (GET_CODE (x))
11373 {
11374 case CONST:
11375 x = XEXP (x, 0);
11376
11377 if (GET_CODE (x) == PLUS)
11378 {
11379 if (!CONST_INT_P (XEXP (x, 1)))
11380 return false;
11381 x = XEXP (x, 0);
11382 }
11383
11384 if (TARGET_MACHO && darwin_local_data_pic (x))
11385 return true;
11386
11387 /* Only some unspecs are valid as "constants". */
11388 if (GET_CODE (x) == UNSPEC)
11389 switch (XINT (x, 1))
11390 {
11391 case UNSPEC_GOT:
11392 case UNSPEC_GOTOFF:
11393 case UNSPEC_PLTOFF:
11394 return TARGET_64BIT;
11395 case UNSPEC_TPOFF:
11396 case UNSPEC_NTPOFF:
11397 x = XVECEXP (x, 0, 0);
11398 return (GET_CODE (x) == SYMBOL_REF
11399 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11400 case UNSPEC_DTPOFF:
11401 x = XVECEXP (x, 0, 0);
11402 return (GET_CODE (x) == SYMBOL_REF
11403 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
11404 default:
11405 return false;
11406 }
11407
11408 /* We must have drilled down to a symbol. */
11409 if (GET_CODE (x) == LABEL_REF)
11410 return true;
11411 if (GET_CODE (x) != SYMBOL_REF)
11412 return false;
11413 /* FALLTHRU */
11414
11415 case SYMBOL_REF:
11416 /* TLS symbols are never valid. */
11417 if (SYMBOL_REF_TLS_MODEL (x))
11418 return false;
11419
11420 /* DLLIMPORT symbols are never valid. */
11421 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
11422 && SYMBOL_REF_DLLIMPORT_P (x))
11423 return false;
11424
11425 #if TARGET_MACHO
11426 /* mdynamic-no-pic */
11427 if (MACHO_DYNAMIC_NO_PIC_P)
11428 return machopic_symbol_defined_p (x);
11429 #endif
11430 break;
11431
11432 case CONST_DOUBLE:
11433 if (GET_MODE (x) == TImode
11434 && x != CONST0_RTX (TImode)
11435 && !TARGET_64BIT)
11436 return false;
11437 break;
11438
11439 case CONST_VECTOR:
11440 if (!standard_sse_constant_p (x))
11441 return false;
11442
11443 default:
11444 break;
11445 }
11446
11447 /* Otherwise we handle everything else in the move patterns. */
11448 return true;
11449 }
11450
11451 /* Determine if it's legal to put X into the constant pool. This
11452 is not possible for the address of thread-local symbols, which
11453 is checked above. */
11454
11455 static bool
11456 ix86_cannot_force_const_mem (enum machine_mode mode, rtx x)
11457 {
11458 /* We can always put integral constants and vectors in memory. */
11459 switch (GET_CODE (x))
11460 {
11461 case CONST_INT:
11462 case CONST_DOUBLE:
11463 case CONST_VECTOR:
11464 return false;
11465
11466 default:
11467 break;
11468 }
11469 return !ix86_legitimate_constant_p (mode, x);
11470 }
11471
11472
11473 /* Nonzero if the constant value X is a legitimate general operand
11474 when generating PIC code. It is given that flag_pic is on and
11475 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
11476
11477 bool
11478 legitimate_pic_operand_p (rtx x)
11479 {
11480 rtx inner;
11481
11482 switch (GET_CODE (x))
11483 {
11484 case CONST:
11485 inner = XEXP (x, 0);
11486 if (GET_CODE (inner) == PLUS
11487 && CONST_INT_P (XEXP (inner, 1)))
11488 inner = XEXP (inner, 0);
11489
11490 /* Only some unspecs are valid as "constants". */
11491 if (GET_CODE (inner) == UNSPEC)
11492 switch (XINT (inner, 1))
11493 {
11494 case UNSPEC_GOT:
11495 case UNSPEC_GOTOFF:
11496 case UNSPEC_PLTOFF:
11497 return TARGET_64BIT;
11498 case UNSPEC_TPOFF:
11499 x = XVECEXP (inner, 0, 0);
11500 return (GET_CODE (x) == SYMBOL_REF
11501 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11502 case UNSPEC_MACHOPIC_OFFSET:
11503 return legitimate_pic_address_disp_p (x);
11504 default:
11505 return false;
11506 }
11507 /* FALLTHRU */
11508
11509 case SYMBOL_REF:
11510 case LABEL_REF:
11511 return legitimate_pic_address_disp_p (x);
11512
11513 default:
11514 return true;
11515 }
11516 }
11517
11518 /* Determine if a given CONST RTX is a valid memory displacement
11519 in PIC mode. */
11520
11521 bool
11522 legitimate_pic_address_disp_p (rtx disp)
11523 {
11524 bool saw_plus;
11525
11526 /* In 64bit mode we can allow direct addresses of symbols and labels
11527 when they are not dynamic symbols. */
11528 if (TARGET_64BIT)
11529 {
11530 rtx op0 = disp, op1;
11531
11532 switch (GET_CODE (disp))
11533 {
11534 case LABEL_REF:
11535 return true;
11536
11537 case CONST:
11538 if (GET_CODE (XEXP (disp, 0)) != PLUS)
11539 break;
11540 op0 = XEXP (XEXP (disp, 0), 0);
11541 op1 = XEXP (XEXP (disp, 0), 1);
11542 if (!CONST_INT_P (op1)
11543 || INTVAL (op1) >= 16*1024*1024
11544 || INTVAL (op1) < -16*1024*1024)
11545 break;
11546 if (GET_CODE (op0) == LABEL_REF)
11547 return true;
11548 if (GET_CODE (op0) != SYMBOL_REF)
11549 break;
11550 /* FALLTHRU */
11551
11552 case SYMBOL_REF:
11553 /* TLS references should always be enclosed in UNSPEC. */
11554 if (SYMBOL_REF_TLS_MODEL (op0))
11555 return false;
11556 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
11557 && ix86_cmodel != CM_LARGE_PIC)
11558 return true;
11559 break;
11560
11561 default:
11562 break;
11563 }
11564 }
11565 if (GET_CODE (disp) != CONST)
11566 return false;
11567 disp = XEXP (disp, 0);
11568
11569 if (TARGET_64BIT)
11570 {
11571 /* We are unsafe to allow PLUS expressions. This limit allowed distance
11572 of GOT tables. We should not need these anyway. */
11573 if (GET_CODE (disp) != UNSPEC
11574 || (XINT (disp, 1) != UNSPEC_GOTPCREL
11575 && XINT (disp, 1) != UNSPEC_GOTOFF
11576 && XINT (disp, 1) != UNSPEC_PCREL
11577 && XINT (disp, 1) != UNSPEC_PLTOFF))
11578 return false;
11579
11580 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
11581 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
11582 return false;
11583 return true;
11584 }
11585
11586 saw_plus = false;
11587 if (GET_CODE (disp) == PLUS)
11588 {
11589 if (!CONST_INT_P (XEXP (disp, 1)))
11590 return false;
11591 disp = XEXP (disp, 0);
11592 saw_plus = true;
11593 }
11594
11595 if (TARGET_MACHO && darwin_local_data_pic (disp))
11596 return true;
11597
11598 if (GET_CODE (disp) != UNSPEC)
11599 return false;
11600
11601 switch (XINT (disp, 1))
11602 {
11603 case UNSPEC_GOT:
11604 if (saw_plus)
11605 return false;
11606 /* We need to check for both symbols and labels because VxWorks loads
11607 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
11608 details. */
11609 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
11610 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
11611 case UNSPEC_GOTOFF:
11612 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
11613 While ABI specify also 32bit relocation but we don't produce it in
11614 small PIC model at all. */
11615 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
11616 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
11617 && !TARGET_64BIT)
11618 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
11619 return false;
11620 case UNSPEC_GOTTPOFF:
11621 case UNSPEC_GOTNTPOFF:
11622 case UNSPEC_INDNTPOFF:
11623 if (saw_plus)
11624 return false;
11625 disp = XVECEXP (disp, 0, 0);
11626 return (GET_CODE (disp) == SYMBOL_REF
11627 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
11628 case UNSPEC_NTPOFF:
11629 disp = XVECEXP (disp, 0, 0);
11630 return (GET_CODE (disp) == SYMBOL_REF
11631 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
11632 case UNSPEC_DTPOFF:
11633 disp = XVECEXP (disp, 0, 0);
11634 return (GET_CODE (disp) == SYMBOL_REF
11635 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
11636 }
11637
11638 return false;
11639 }
11640
11641 /* Recognizes RTL expressions that are valid memory addresses for an
11642 instruction. The MODE argument is the machine mode for the MEM
11643 expression that wants to use this address.
11644
11645 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
11646 convert common non-canonical forms to canonical form so that they will
11647 be recognized. */
11648
11649 static bool
11650 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
11651 rtx addr, bool strict)
11652 {
11653 struct ix86_address parts;
11654 rtx base, index, disp;
11655 HOST_WIDE_INT scale;
11656
11657 if (ix86_decompose_address (addr, &parts) <= 0)
11658 /* Decomposition failed. */
11659 return false;
11660
11661 base = parts.base;
11662 index = parts.index;
11663 disp = parts.disp;
11664 scale = parts.scale;
11665
11666 /* Validate base register. */
11667 if (base)
11668 {
11669 rtx reg;
11670
11671 if (REG_P (base))
11672 reg = base;
11673 else if (GET_CODE (base) == SUBREG && REG_P (SUBREG_REG (base)))
11674 {
11675 reg = SUBREG_REG (base);
11676 gcc_assert (register_no_elim_operand (reg, DImode));
11677 }
11678 else
11679 /* Base is not a register. */
11680 return false;
11681
11682 if (GET_MODE (base) != SImode && GET_MODE (base) != DImode)
11683 return false;
11684
11685 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
11686 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
11687 /* Base is not valid. */
11688 return false;
11689 }
11690
11691 /* Validate index register. */
11692 if (index)
11693 {
11694 rtx reg;
11695
11696 if (REG_P (index))
11697 reg = index;
11698 else if (GET_CODE (index) == SUBREG && REG_P (SUBREG_REG (index)))
11699 {
11700 reg = SUBREG_REG (index);
11701 gcc_assert (register_no_elim_operand (reg, DImode));
11702 }
11703 else
11704 /* Index is not a register. */
11705 return false;
11706
11707 if (GET_MODE (index) != SImode && GET_MODE (index) != DImode)
11708 return false;
11709
11710 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
11711 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
11712 /* Index is not valid. */
11713 return false;
11714 }
11715
11716 /* Index and base should have the same mode. */
11717 if (base && index
11718 && GET_MODE (base) != GET_MODE (index))
11719 return false;
11720
11721 /* Validate scale factor. */
11722 if (scale != 1)
11723 {
11724 if (!index)
11725 /* Scale without index. */
11726 return false;
11727
11728 if (scale != 2 && scale != 4 && scale != 8)
11729 /* Scale is not a valid multiplier. */
11730 return false;
11731 }
11732
11733 /* Validate displacement. */
11734 if (disp)
11735 {
11736 if (GET_CODE (disp) == CONST
11737 && GET_CODE (XEXP (disp, 0)) == UNSPEC
11738 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
11739 switch (XINT (XEXP (disp, 0), 1))
11740 {
11741 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
11742 used. While ABI specify also 32bit relocations, we don't produce
11743 them at all and use IP relative instead. */
11744 case UNSPEC_GOT:
11745 case UNSPEC_GOTOFF:
11746 gcc_assert (flag_pic);
11747 if (!TARGET_64BIT)
11748 goto is_legitimate_pic;
11749
11750 /* 64bit address unspec. */
11751 return false;
11752
11753 case UNSPEC_GOTPCREL:
11754 case UNSPEC_PCREL:
11755 gcc_assert (flag_pic);
11756 goto is_legitimate_pic;
11757
11758 case UNSPEC_GOTTPOFF:
11759 case UNSPEC_GOTNTPOFF:
11760 case UNSPEC_INDNTPOFF:
11761 case UNSPEC_NTPOFF:
11762 case UNSPEC_DTPOFF:
11763 break;
11764
11765 case UNSPEC_STACK_CHECK:
11766 gcc_assert (flag_split_stack);
11767 break;
11768
11769 default:
11770 /* Invalid address unspec. */
11771 return false;
11772 }
11773
11774 else if (SYMBOLIC_CONST (disp)
11775 && (flag_pic
11776 || (TARGET_MACHO
11777 #if TARGET_MACHO
11778 && MACHOPIC_INDIRECT
11779 && !machopic_operand_p (disp)
11780 #endif
11781 )))
11782 {
11783
11784 is_legitimate_pic:
11785 if (TARGET_64BIT && (index || base))
11786 {
11787 /* foo@dtpoff(%rX) is ok. */
11788 if (GET_CODE (disp) != CONST
11789 || GET_CODE (XEXP (disp, 0)) != PLUS
11790 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
11791 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
11792 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
11793 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
11794 /* Non-constant pic memory reference. */
11795 return false;
11796 }
11797 else if ((!TARGET_MACHO || flag_pic)
11798 && ! legitimate_pic_address_disp_p (disp))
11799 /* Displacement is an invalid pic construct. */
11800 return false;
11801 #if TARGET_MACHO
11802 else if (MACHO_DYNAMIC_NO_PIC_P
11803 && !ix86_legitimate_constant_p (Pmode, disp))
11804 /* displacment must be referenced via non_lazy_pointer */
11805 return false;
11806 #endif
11807
11808 /* This code used to verify that a symbolic pic displacement
11809 includes the pic_offset_table_rtx register.
11810
11811 While this is good idea, unfortunately these constructs may
11812 be created by "adds using lea" optimization for incorrect
11813 code like:
11814
11815 int a;
11816 int foo(int i)
11817 {
11818 return *(&a+i);
11819 }
11820
11821 This code is nonsensical, but results in addressing
11822 GOT table with pic_offset_table_rtx base. We can't
11823 just refuse it easily, since it gets matched by
11824 "addsi3" pattern, that later gets split to lea in the
11825 case output register differs from input. While this
11826 can be handled by separate addsi pattern for this case
11827 that never results in lea, this seems to be easier and
11828 correct fix for crash to disable this test. */
11829 }
11830 else if (GET_CODE (disp) != LABEL_REF
11831 && !CONST_INT_P (disp)
11832 && (GET_CODE (disp) != CONST
11833 || !ix86_legitimate_constant_p (Pmode, disp))
11834 && (GET_CODE (disp) != SYMBOL_REF
11835 || !ix86_legitimate_constant_p (Pmode, disp)))
11836 /* Displacement is not constant. */
11837 return false;
11838 else if (TARGET_64BIT
11839 && !x86_64_immediate_operand (disp, VOIDmode))
11840 /* Displacement is out of range. */
11841 return false;
11842 }
11843
11844 /* Everything looks valid. */
11845 return true;
11846 }
11847
11848 /* Determine if a given RTX is a valid constant address. */
11849
11850 bool
11851 constant_address_p (rtx x)
11852 {
11853 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
11854 }
11855 \f
11856 /* Return a unique alias set for the GOT. */
11857
11858 static alias_set_type
11859 ix86_GOT_alias_set (void)
11860 {
11861 static alias_set_type set = -1;
11862 if (set == -1)
11863 set = new_alias_set ();
11864 return set;
11865 }
11866
11867 /* Return a legitimate reference for ORIG (an address) using the
11868 register REG. If REG is 0, a new pseudo is generated.
11869
11870 There are two types of references that must be handled:
11871
11872 1. Global data references must load the address from the GOT, via
11873 the PIC reg. An insn is emitted to do this load, and the reg is
11874 returned.
11875
11876 2. Static data references, constant pool addresses, and code labels
11877 compute the address as an offset from the GOT, whose base is in
11878 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
11879 differentiate them from global data objects. The returned
11880 address is the PIC reg + an unspec constant.
11881
11882 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
11883 reg also appears in the address. */
11884
11885 static rtx
11886 legitimize_pic_address (rtx orig, rtx reg)
11887 {
11888 rtx addr = orig;
11889 rtx new_rtx = orig;
11890 rtx base;
11891
11892 #if TARGET_MACHO
11893 if (TARGET_MACHO && !TARGET_64BIT)
11894 {
11895 if (reg == 0)
11896 reg = gen_reg_rtx (Pmode);
11897 /* Use the generic Mach-O PIC machinery. */
11898 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
11899 }
11900 #endif
11901
11902 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
11903 new_rtx = addr;
11904 else if (TARGET_64BIT
11905 && ix86_cmodel != CM_SMALL_PIC
11906 && gotoff_operand (addr, Pmode))
11907 {
11908 rtx tmpreg;
11909 /* This symbol may be referenced via a displacement from the PIC
11910 base address (@GOTOFF). */
11911
11912 if (reload_in_progress)
11913 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11914 if (GET_CODE (addr) == CONST)
11915 addr = XEXP (addr, 0);
11916 if (GET_CODE (addr) == PLUS)
11917 {
11918 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
11919 UNSPEC_GOTOFF);
11920 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
11921 }
11922 else
11923 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
11924 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11925 if (!reg)
11926 tmpreg = gen_reg_rtx (Pmode);
11927 else
11928 tmpreg = reg;
11929 emit_move_insn (tmpreg, new_rtx);
11930
11931 if (reg != 0)
11932 {
11933 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
11934 tmpreg, 1, OPTAB_DIRECT);
11935 new_rtx = reg;
11936 }
11937 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
11938 }
11939 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
11940 {
11941 /* This symbol may be referenced via a displacement from the PIC
11942 base address (@GOTOFF). */
11943
11944 if (reload_in_progress)
11945 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11946 if (GET_CODE (addr) == CONST)
11947 addr = XEXP (addr, 0);
11948 if (GET_CODE (addr) == PLUS)
11949 {
11950 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
11951 UNSPEC_GOTOFF);
11952 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
11953 }
11954 else
11955 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
11956 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11957 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
11958
11959 if (reg != 0)
11960 {
11961 emit_move_insn (reg, new_rtx);
11962 new_rtx = reg;
11963 }
11964 }
11965 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
11966 /* We can't use @GOTOFF for text labels on VxWorks;
11967 see gotoff_operand. */
11968 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
11969 {
11970 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
11971 {
11972 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
11973 return legitimize_dllimport_symbol (addr, true);
11974 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
11975 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
11976 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
11977 {
11978 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
11979 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
11980 }
11981 }
11982
11983 /* For x64 PE-COFF there is no GOT table. So we use address
11984 directly. */
11985 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
11986 {
11987 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
11988 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11989
11990 if (reg == 0)
11991 reg = gen_reg_rtx (Pmode);
11992 emit_move_insn (reg, new_rtx);
11993 new_rtx = reg;
11994 }
11995 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
11996 {
11997 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
11998 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11999 new_rtx = gen_const_mem (Pmode, new_rtx);
12000 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
12001
12002 if (reg == 0)
12003 reg = gen_reg_rtx (Pmode);
12004 /* Use directly gen_movsi, otherwise the address is loaded
12005 into register for CSE. We don't want to CSE this addresses,
12006 instead we CSE addresses from the GOT table, so skip this. */
12007 emit_insn (gen_movsi (reg, new_rtx));
12008 new_rtx = reg;
12009 }
12010 else
12011 {
12012 /* This symbol must be referenced via a load from the
12013 Global Offset Table (@GOT). */
12014
12015 if (reload_in_progress)
12016 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12017 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
12018 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12019 if (TARGET_64BIT)
12020 new_rtx = force_reg (Pmode, new_rtx);
12021 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12022 new_rtx = gen_const_mem (Pmode, new_rtx);
12023 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
12024
12025 if (reg == 0)
12026 reg = gen_reg_rtx (Pmode);
12027 emit_move_insn (reg, new_rtx);
12028 new_rtx = reg;
12029 }
12030 }
12031 else
12032 {
12033 if (CONST_INT_P (addr)
12034 && !x86_64_immediate_operand (addr, VOIDmode))
12035 {
12036 if (reg)
12037 {
12038 emit_move_insn (reg, addr);
12039 new_rtx = reg;
12040 }
12041 else
12042 new_rtx = force_reg (Pmode, addr);
12043 }
12044 else if (GET_CODE (addr) == CONST)
12045 {
12046 addr = XEXP (addr, 0);
12047
12048 /* We must match stuff we generate before. Assume the only
12049 unspecs that can get here are ours. Not that we could do
12050 anything with them anyway.... */
12051 if (GET_CODE (addr) == UNSPEC
12052 || (GET_CODE (addr) == PLUS
12053 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
12054 return orig;
12055 gcc_assert (GET_CODE (addr) == PLUS);
12056 }
12057 if (GET_CODE (addr) == PLUS)
12058 {
12059 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
12060
12061 /* Check first to see if this is a constant offset from a @GOTOFF
12062 symbol reference. */
12063 if (gotoff_operand (op0, Pmode)
12064 && CONST_INT_P (op1))
12065 {
12066 if (!TARGET_64BIT)
12067 {
12068 if (reload_in_progress)
12069 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12070 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
12071 UNSPEC_GOTOFF);
12072 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
12073 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12074 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12075
12076 if (reg != 0)
12077 {
12078 emit_move_insn (reg, new_rtx);
12079 new_rtx = reg;
12080 }
12081 }
12082 else
12083 {
12084 if (INTVAL (op1) < -16*1024*1024
12085 || INTVAL (op1) >= 16*1024*1024)
12086 {
12087 if (!x86_64_immediate_operand (op1, Pmode))
12088 op1 = force_reg (Pmode, op1);
12089 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
12090 }
12091 }
12092 }
12093 else
12094 {
12095 base = legitimize_pic_address (XEXP (addr, 0), reg);
12096 new_rtx = legitimize_pic_address (XEXP (addr, 1),
12097 base == reg ? NULL_RTX : reg);
12098
12099 if (CONST_INT_P (new_rtx))
12100 new_rtx = plus_constant (base, INTVAL (new_rtx));
12101 else
12102 {
12103 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
12104 {
12105 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
12106 new_rtx = XEXP (new_rtx, 1);
12107 }
12108 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
12109 }
12110 }
12111 }
12112 }
12113 return new_rtx;
12114 }
12115 \f
12116 /* Load the thread pointer. If TO_REG is true, force it into a register. */
12117
12118 static rtx
12119 get_thread_pointer (bool to_reg)
12120 {
12121 rtx tp, reg, insn;
12122
12123 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
12124 if (!to_reg)
12125 return tp;
12126
12127 reg = gen_reg_rtx (Pmode);
12128 insn = gen_rtx_SET (VOIDmode, reg, tp);
12129 insn = emit_insn (insn);
12130
12131 return reg;
12132 }
12133
12134 /* Construct the SYMBOL_REF for the tls_get_addr function. */
12135
12136 static GTY(()) rtx ix86_tls_symbol;
12137
12138 static rtx
12139 ix86_tls_get_addr (void)
12140 {
12141 if (!ix86_tls_symbol)
12142 {
12143 const char *sym
12144 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
12145 ? "___tls_get_addr" : "__tls_get_addr");
12146
12147 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
12148 }
12149
12150 return ix86_tls_symbol;
12151 }
12152
12153 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
12154
12155 static GTY(()) rtx ix86_tls_module_base_symbol;
12156
12157 rtx
12158 ix86_tls_module_base (void)
12159 {
12160 if (!ix86_tls_module_base_symbol)
12161 {
12162 ix86_tls_module_base_symbol
12163 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
12164
12165 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
12166 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
12167 }
12168
12169 return ix86_tls_module_base_symbol;
12170 }
12171
12172 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
12173 false if we expect this to be used for a memory address and true if
12174 we expect to load the address into a register. */
12175
12176 static rtx
12177 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
12178 {
12179 rtx dest, base, off;
12180 rtx pic = NULL_RTX, tp = NULL_RTX;
12181 int type;
12182
12183 switch (model)
12184 {
12185 case TLS_MODEL_GLOBAL_DYNAMIC:
12186 dest = gen_reg_rtx (Pmode);
12187
12188 if (!TARGET_64BIT)
12189 {
12190 if (flag_pic)
12191 pic = pic_offset_table_rtx;
12192 else
12193 {
12194 pic = gen_reg_rtx (Pmode);
12195 emit_insn (gen_set_got (pic));
12196 }
12197 }
12198
12199 if (TARGET_GNU2_TLS)
12200 {
12201 if (TARGET_64BIT)
12202 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
12203 else
12204 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
12205
12206 tp = get_thread_pointer (true);
12207 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
12208
12209 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
12210 }
12211 else
12212 {
12213 rtx caddr = ix86_tls_get_addr ();
12214
12215 if (TARGET_64BIT)
12216 {
12217 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
12218
12219 start_sequence ();
12220 emit_call_insn (gen_tls_global_dynamic_64 (rax, x, caddr));
12221 insns = get_insns ();
12222 end_sequence ();
12223
12224 RTL_CONST_CALL_P (insns) = 1;
12225 emit_libcall_block (insns, dest, rax, x);
12226 }
12227 else
12228 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
12229 }
12230 break;
12231
12232 case TLS_MODEL_LOCAL_DYNAMIC:
12233 base = gen_reg_rtx (Pmode);
12234
12235 if (!TARGET_64BIT)
12236 {
12237 if (flag_pic)
12238 pic = pic_offset_table_rtx;
12239 else
12240 {
12241 pic = gen_reg_rtx (Pmode);
12242 emit_insn (gen_set_got (pic));
12243 }
12244 }
12245
12246 if (TARGET_GNU2_TLS)
12247 {
12248 rtx tmp = ix86_tls_module_base ();
12249
12250 if (TARGET_64BIT)
12251 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
12252 else
12253 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
12254
12255 tp = get_thread_pointer (true);
12256 set_unique_reg_note (get_last_insn (), REG_EQUIV,
12257 gen_rtx_MINUS (Pmode, tmp, tp));
12258 }
12259 else
12260 {
12261 rtx caddr = ix86_tls_get_addr ();
12262
12263 if (TARGET_64BIT)
12264 {
12265 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, eqv;
12266
12267 start_sequence ();
12268 emit_call_insn (gen_tls_local_dynamic_base_64 (rax, caddr));
12269 insns = get_insns ();
12270 end_sequence ();
12271
12272 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
12273 share the LD_BASE result with other LD model accesses. */
12274 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
12275 UNSPEC_TLS_LD_BASE);
12276
12277 RTL_CONST_CALL_P (insns) = 1;
12278 emit_libcall_block (insns, base, rax, eqv);
12279 }
12280 else
12281 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
12282 }
12283
12284 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
12285 off = gen_rtx_CONST (Pmode, off);
12286
12287 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
12288
12289 if (TARGET_GNU2_TLS)
12290 {
12291 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
12292
12293 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
12294 }
12295 break;
12296
12297 case TLS_MODEL_INITIAL_EXEC:
12298 if (TARGET_64BIT)
12299 {
12300 if (TARGET_SUN_TLS)
12301 {
12302 /* The Sun linker took the AMD64 TLS spec literally
12303 and can only handle %rax as destination of the
12304 initial executable code sequence. */
12305
12306 dest = gen_reg_rtx (Pmode);
12307 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
12308 return dest;
12309 }
12310
12311 pic = NULL;
12312 type = UNSPEC_GOTNTPOFF;
12313 }
12314 else if (flag_pic)
12315 {
12316 if (reload_in_progress)
12317 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12318 pic = pic_offset_table_rtx;
12319 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
12320 }
12321 else if (!TARGET_ANY_GNU_TLS)
12322 {
12323 pic = gen_reg_rtx (Pmode);
12324 emit_insn (gen_set_got (pic));
12325 type = UNSPEC_GOTTPOFF;
12326 }
12327 else
12328 {
12329 pic = NULL;
12330 type = UNSPEC_INDNTPOFF;
12331 }
12332
12333 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
12334 off = gen_rtx_CONST (Pmode, off);
12335 if (pic)
12336 off = gen_rtx_PLUS (Pmode, pic, off);
12337 off = gen_const_mem (Pmode, off);
12338 set_mem_alias_set (off, ix86_GOT_alias_set ());
12339
12340 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12341 {
12342 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12343 off = force_reg (Pmode, off);
12344 return gen_rtx_PLUS (Pmode, base, off);
12345 }
12346 else
12347 {
12348 base = get_thread_pointer (true);
12349 dest = gen_reg_rtx (Pmode);
12350 emit_insn (gen_subsi3 (dest, base, off));
12351 }
12352 break;
12353
12354 case TLS_MODEL_LOCAL_EXEC:
12355 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
12356 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12357 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
12358 off = gen_rtx_CONST (Pmode, off);
12359
12360 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12361 {
12362 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12363 return gen_rtx_PLUS (Pmode, base, off);
12364 }
12365 else
12366 {
12367 base = get_thread_pointer (true);
12368 dest = gen_reg_rtx (Pmode);
12369 emit_insn (gen_subsi3 (dest, base, off));
12370 }
12371 break;
12372
12373 default:
12374 gcc_unreachable ();
12375 }
12376
12377 return dest;
12378 }
12379
12380 /* Create or return the unique __imp_DECL dllimport symbol corresponding
12381 to symbol DECL. */
12382
12383 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
12384 htab_t dllimport_map;
12385
12386 static tree
12387 get_dllimport_decl (tree decl)
12388 {
12389 struct tree_map *h, in;
12390 void **loc;
12391 const char *name;
12392 const char *prefix;
12393 size_t namelen, prefixlen;
12394 char *imp_name;
12395 tree to;
12396 rtx rtl;
12397
12398 if (!dllimport_map)
12399 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
12400
12401 in.hash = htab_hash_pointer (decl);
12402 in.base.from = decl;
12403 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
12404 h = (struct tree_map *) *loc;
12405 if (h)
12406 return h->to;
12407
12408 *loc = h = ggc_alloc_tree_map ();
12409 h->hash = in.hash;
12410 h->base.from = decl;
12411 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
12412 VAR_DECL, NULL, ptr_type_node);
12413 DECL_ARTIFICIAL (to) = 1;
12414 DECL_IGNORED_P (to) = 1;
12415 DECL_EXTERNAL (to) = 1;
12416 TREE_READONLY (to) = 1;
12417
12418 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
12419 name = targetm.strip_name_encoding (name);
12420 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
12421 ? "*__imp_" : "*__imp__";
12422 namelen = strlen (name);
12423 prefixlen = strlen (prefix);
12424 imp_name = (char *) alloca (namelen + prefixlen + 1);
12425 memcpy (imp_name, prefix, prefixlen);
12426 memcpy (imp_name + prefixlen, name, namelen + 1);
12427
12428 name = ggc_alloc_string (imp_name, namelen + prefixlen);
12429 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
12430 SET_SYMBOL_REF_DECL (rtl, to);
12431 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
12432
12433 rtl = gen_const_mem (Pmode, rtl);
12434 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
12435
12436 SET_DECL_RTL (to, rtl);
12437 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
12438
12439 return to;
12440 }
12441
12442 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
12443 true if we require the result be a register. */
12444
12445 static rtx
12446 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
12447 {
12448 tree imp_decl;
12449 rtx x;
12450
12451 gcc_assert (SYMBOL_REF_DECL (symbol));
12452 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
12453
12454 x = DECL_RTL (imp_decl);
12455 if (want_reg)
12456 x = force_reg (Pmode, x);
12457 return x;
12458 }
12459
12460 /* Try machine-dependent ways of modifying an illegitimate address
12461 to be legitimate. If we find one, return the new, valid address.
12462 This macro is used in only one place: `memory_address' in explow.c.
12463
12464 OLDX is the address as it was before break_out_memory_refs was called.
12465 In some cases it is useful to look at this to decide what needs to be done.
12466
12467 It is always safe for this macro to do nothing. It exists to recognize
12468 opportunities to optimize the output.
12469
12470 For the 80386, we handle X+REG by loading X into a register R and
12471 using R+REG. R will go in a general reg and indexing will be used.
12472 However, if REG is a broken-out memory address or multiplication,
12473 nothing needs to be done because REG can certainly go in a general reg.
12474
12475 When -fpic is used, special handling is needed for symbolic references.
12476 See comments by legitimize_pic_address in i386.c for details. */
12477
12478 static rtx
12479 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
12480 enum machine_mode mode)
12481 {
12482 int changed = 0;
12483 unsigned log;
12484
12485 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
12486 if (log)
12487 return legitimize_tls_address (x, (enum tls_model) log, false);
12488 if (GET_CODE (x) == CONST
12489 && GET_CODE (XEXP (x, 0)) == PLUS
12490 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
12491 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
12492 {
12493 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
12494 (enum tls_model) log, false);
12495 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
12496 }
12497
12498 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
12499 {
12500 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
12501 return legitimize_dllimport_symbol (x, true);
12502 if (GET_CODE (x) == CONST
12503 && GET_CODE (XEXP (x, 0)) == PLUS
12504 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
12505 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
12506 {
12507 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
12508 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
12509 }
12510 }
12511
12512 if (flag_pic && SYMBOLIC_CONST (x))
12513 return legitimize_pic_address (x, 0);
12514
12515 #if TARGET_MACHO
12516 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
12517 return machopic_indirect_data_reference (x, 0);
12518 #endif
12519
12520 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
12521 if (GET_CODE (x) == ASHIFT
12522 && CONST_INT_P (XEXP (x, 1))
12523 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
12524 {
12525 changed = 1;
12526 log = INTVAL (XEXP (x, 1));
12527 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
12528 GEN_INT (1 << log));
12529 }
12530
12531 if (GET_CODE (x) == PLUS)
12532 {
12533 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
12534
12535 if (GET_CODE (XEXP (x, 0)) == ASHIFT
12536 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
12537 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
12538 {
12539 changed = 1;
12540 log = INTVAL (XEXP (XEXP (x, 0), 1));
12541 XEXP (x, 0) = gen_rtx_MULT (Pmode,
12542 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
12543 GEN_INT (1 << log));
12544 }
12545
12546 if (GET_CODE (XEXP (x, 1)) == ASHIFT
12547 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
12548 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
12549 {
12550 changed = 1;
12551 log = INTVAL (XEXP (XEXP (x, 1), 1));
12552 XEXP (x, 1) = gen_rtx_MULT (Pmode,
12553 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
12554 GEN_INT (1 << log));
12555 }
12556
12557 /* Put multiply first if it isn't already. */
12558 if (GET_CODE (XEXP (x, 1)) == MULT)
12559 {
12560 rtx tmp = XEXP (x, 0);
12561 XEXP (x, 0) = XEXP (x, 1);
12562 XEXP (x, 1) = tmp;
12563 changed = 1;
12564 }
12565
12566 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
12567 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
12568 created by virtual register instantiation, register elimination, and
12569 similar optimizations. */
12570 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
12571 {
12572 changed = 1;
12573 x = gen_rtx_PLUS (Pmode,
12574 gen_rtx_PLUS (Pmode, XEXP (x, 0),
12575 XEXP (XEXP (x, 1), 0)),
12576 XEXP (XEXP (x, 1), 1));
12577 }
12578
12579 /* Canonicalize
12580 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
12581 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
12582 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
12583 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
12584 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
12585 && CONSTANT_P (XEXP (x, 1)))
12586 {
12587 rtx constant;
12588 rtx other = NULL_RTX;
12589
12590 if (CONST_INT_P (XEXP (x, 1)))
12591 {
12592 constant = XEXP (x, 1);
12593 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
12594 }
12595 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
12596 {
12597 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
12598 other = XEXP (x, 1);
12599 }
12600 else
12601 constant = 0;
12602
12603 if (constant)
12604 {
12605 changed = 1;
12606 x = gen_rtx_PLUS (Pmode,
12607 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
12608 XEXP (XEXP (XEXP (x, 0), 1), 0)),
12609 plus_constant (other, INTVAL (constant)));
12610 }
12611 }
12612
12613 if (changed && ix86_legitimate_address_p (mode, x, false))
12614 return x;
12615
12616 if (GET_CODE (XEXP (x, 0)) == MULT)
12617 {
12618 changed = 1;
12619 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
12620 }
12621
12622 if (GET_CODE (XEXP (x, 1)) == MULT)
12623 {
12624 changed = 1;
12625 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
12626 }
12627
12628 if (changed
12629 && REG_P (XEXP (x, 1))
12630 && REG_P (XEXP (x, 0)))
12631 return x;
12632
12633 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
12634 {
12635 changed = 1;
12636 x = legitimize_pic_address (x, 0);
12637 }
12638
12639 if (changed && ix86_legitimate_address_p (mode, x, false))
12640 return x;
12641
12642 if (REG_P (XEXP (x, 0)))
12643 {
12644 rtx temp = gen_reg_rtx (Pmode);
12645 rtx val = force_operand (XEXP (x, 1), temp);
12646 if (val != temp)
12647 {
12648 if (GET_MODE (val) != Pmode)
12649 val = convert_to_mode (Pmode, val, 1);
12650 emit_move_insn (temp, val);
12651 }
12652
12653 XEXP (x, 1) = temp;
12654 return x;
12655 }
12656
12657 else if (REG_P (XEXP (x, 1)))
12658 {
12659 rtx temp = gen_reg_rtx (Pmode);
12660 rtx val = force_operand (XEXP (x, 0), temp);
12661 if (val != temp)
12662 {
12663 if (GET_MODE (val) != Pmode)
12664 val = convert_to_mode (Pmode, val, 1);
12665 emit_move_insn (temp, val);
12666 }
12667
12668 XEXP (x, 0) = temp;
12669 return x;
12670 }
12671 }
12672
12673 return x;
12674 }
12675 \f
12676 /* Print an integer constant expression in assembler syntax. Addition
12677 and subtraction are the only arithmetic that may appear in these
12678 expressions. FILE is the stdio stream to write to, X is the rtx, and
12679 CODE is the operand print code from the output string. */
12680
12681 static void
12682 output_pic_addr_const (FILE *file, rtx x, int code)
12683 {
12684 char buf[256];
12685
12686 switch (GET_CODE (x))
12687 {
12688 case PC:
12689 gcc_assert (flag_pic);
12690 putc ('.', file);
12691 break;
12692
12693 case SYMBOL_REF:
12694 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
12695 output_addr_const (file, x);
12696 else
12697 {
12698 const char *name = XSTR (x, 0);
12699
12700 /* Mark the decl as referenced so that cgraph will
12701 output the function. */
12702 if (SYMBOL_REF_DECL (x))
12703 mark_decl_referenced (SYMBOL_REF_DECL (x));
12704
12705 #if TARGET_MACHO
12706 if (MACHOPIC_INDIRECT
12707 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
12708 name = machopic_indirection_name (x, /*stub_p=*/true);
12709 #endif
12710 assemble_name (file, name);
12711 }
12712 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
12713 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
12714 fputs ("@PLT", file);
12715 break;
12716
12717 case LABEL_REF:
12718 x = XEXP (x, 0);
12719 /* FALLTHRU */
12720 case CODE_LABEL:
12721 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
12722 assemble_name (asm_out_file, buf);
12723 break;
12724
12725 case CONST_INT:
12726 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
12727 break;
12728
12729 case CONST:
12730 /* This used to output parentheses around the expression,
12731 but that does not work on the 386 (either ATT or BSD assembler). */
12732 output_pic_addr_const (file, XEXP (x, 0), code);
12733 break;
12734
12735 case CONST_DOUBLE:
12736 if (GET_MODE (x) == VOIDmode)
12737 {
12738 /* We can use %d if the number is <32 bits and positive. */
12739 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
12740 fprintf (file, "0x%lx%08lx",
12741 (unsigned long) CONST_DOUBLE_HIGH (x),
12742 (unsigned long) CONST_DOUBLE_LOW (x));
12743 else
12744 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
12745 }
12746 else
12747 /* We can't handle floating point constants;
12748 TARGET_PRINT_OPERAND must handle them. */
12749 output_operand_lossage ("floating constant misused");
12750 break;
12751
12752 case PLUS:
12753 /* Some assemblers need integer constants to appear first. */
12754 if (CONST_INT_P (XEXP (x, 0)))
12755 {
12756 output_pic_addr_const (file, XEXP (x, 0), code);
12757 putc ('+', file);
12758 output_pic_addr_const (file, XEXP (x, 1), code);
12759 }
12760 else
12761 {
12762 gcc_assert (CONST_INT_P (XEXP (x, 1)));
12763 output_pic_addr_const (file, XEXP (x, 1), code);
12764 putc ('+', file);
12765 output_pic_addr_const (file, XEXP (x, 0), code);
12766 }
12767 break;
12768
12769 case MINUS:
12770 if (!TARGET_MACHO)
12771 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
12772 output_pic_addr_const (file, XEXP (x, 0), code);
12773 putc ('-', file);
12774 output_pic_addr_const (file, XEXP (x, 1), code);
12775 if (!TARGET_MACHO)
12776 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
12777 break;
12778
12779 case UNSPEC:
12780 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
12781 {
12782 bool f = i386_asm_output_addr_const_extra (file, x);
12783 gcc_assert (f);
12784 break;
12785 }
12786
12787 gcc_assert (XVECLEN (x, 0) == 1);
12788 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
12789 switch (XINT (x, 1))
12790 {
12791 case UNSPEC_GOT:
12792 fputs ("@GOT", file);
12793 break;
12794 case UNSPEC_GOTOFF:
12795 fputs ("@GOTOFF", file);
12796 break;
12797 case UNSPEC_PLTOFF:
12798 fputs ("@PLTOFF", file);
12799 break;
12800 case UNSPEC_PCREL:
12801 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12802 "(%rip)" : "[rip]", file);
12803 break;
12804 case UNSPEC_GOTPCREL:
12805 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12806 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
12807 break;
12808 case UNSPEC_GOTTPOFF:
12809 /* FIXME: This might be @TPOFF in Sun ld too. */
12810 fputs ("@gottpoff", file);
12811 break;
12812 case UNSPEC_TPOFF:
12813 fputs ("@tpoff", file);
12814 break;
12815 case UNSPEC_NTPOFF:
12816 if (TARGET_64BIT)
12817 fputs ("@tpoff", file);
12818 else
12819 fputs ("@ntpoff", file);
12820 break;
12821 case UNSPEC_DTPOFF:
12822 fputs ("@dtpoff", file);
12823 break;
12824 case UNSPEC_GOTNTPOFF:
12825 if (TARGET_64BIT)
12826 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12827 "@gottpoff(%rip)": "@gottpoff[rip]", file);
12828 else
12829 fputs ("@gotntpoff", file);
12830 break;
12831 case UNSPEC_INDNTPOFF:
12832 fputs ("@indntpoff", file);
12833 break;
12834 #if TARGET_MACHO
12835 case UNSPEC_MACHOPIC_OFFSET:
12836 putc ('-', file);
12837 machopic_output_function_base_name (file);
12838 break;
12839 #endif
12840 default:
12841 output_operand_lossage ("invalid UNSPEC as operand");
12842 break;
12843 }
12844 break;
12845
12846 default:
12847 output_operand_lossage ("invalid expression as operand");
12848 }
12849 }
12850
12851 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
12852 We need to emit DTP-relative relocations. */
12853
12854 static void ATTRIBUTE_UNUSED
12855 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
12856 {
12857 fputs (ASM_LONG, file);
12858 output_addr_const (file, x);
12859 fputs ("@dtpoff", file);
12860 switch (size)
12861 {
12862 case 4:
12863 break;
12864 case 8:
12865 fputs (", 0", file);
12866 break;
12867 default:
12868 gcc_unreachable ();
12869 }
12870 }
12871
12872 /* Return true if X is a representation of the PIC register. This copes
12873 with calls from ix86_find_base_term, where the register might have
12874 been replaced by a cselib value. */
12875
12876 static bool
12877 ix86_pic_register_p (rtx x)
12878 {
12879 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
12880 return (pic_offset_table_rtx
12881 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
12882 else
12883 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
12884 }
12885
12886 /* Helper function for ix86_delegitimize_address.
12887 Attempt to delegitimize TLS local-exec accesses. */
12888
12889 static rtx
12890 ix86_delegitimize_tls_address (rtx orig_x)
12891 {
12892 rtx x = orig_x, unspec;
12893 struct ix86_address addr;
12894
12895 if (!TARGET_TLS_DIRECT_SEG_REFS)
12896 return orig_x;
12897 if (MEM_P (x))
12898 x = XEXP (x, 0);
12899 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
12900 return orig_x;
12901 if (ix86_decompose_address (x, &addr) == 0
12902 || addr.seg != (TARGET_64BIT ? SEG_FS : SEG_GS)
12903 || addr.disp == NULL_RTX
12904 || GET_CODE (addr.disp) != CONST)
12905 return orig_x;
12906 unspec = XEXP (addr.disp, 0);
12907 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
12908 unspec = XEXP (unspec, 0);
12909 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
12910 return orig_x;
12911 x = XVECEXP (unspec, 0, 0);
12912 gcc_assert (GET_CODE (x) == SYMBOL_REF);
12913 if (unspec != XEXP (addr.disp, 0))
12914 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
12915 if (addr.index)
12916 {
12917 rtx idx = addr.index;
12918 if (addr.scale != 1)
12919 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
12920 x = gen_rtx_PLUS (Pmode, idx, x);
12921 }
12922 if (addr.base)
12923 x = gen_rtx_PLUS (Pmode, addr.base, x);
12924 if (MEM_P (orig_x))
12925 x = replace_equiv_address_nv (orig_x, x);
12926 return x;
12927 }
12928
12929 /* In the name of slightly smaller debug output, and to cater to
12930 general assembler lossage, recognize PIC+GOTOFF and turn it back
12931 into a direct symbol reference.
12932
12933 On Darwin, this is necessary to avoid a crash, because Darwin
12934 has a different PIC label for each routine but the DWARF debugging
12935 information is not associated with any particular routine, so it's
12936 necessary to remove references to the PIC label from RTL stored by
12937 the DWARF output code. */
12938
12939 static rtx
12940 ix86_delegitimize_address (rtx x)
12941 {
12942 rtx orig_x = delegitimize_mem_from_attrs (x);
12943 /* addend is NULL or some rtx if x is something+GOTOFF where
12944 something doesn't include the PIC register. */
12945 rtx addend = NULL_RTX;
12946 /* reg_addend is NULL or a multiple of some register. */
12947 rtx reg_addend = NULL_RTX;
12948 /* const_addend is NULL or a const_int. */
12949 rtx const_addend = NULL_RTX;
12950 /* This is the result, or NULL. */
12951 rtx result = NULL_RTX;
12952
12953 x = orig_x;
12954
12955 if (MEM_P (x))
12956 x = XEXP (x, 0);
12957
12958 if (TARGET_64BIT)
12959 {
12960 if (GET_CODE (x) != CONST
12961 || GET_CODE (XEXP (x, 0)) != UNSPEC
12962 || (XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
12963 && XINT (XEXP (x, 0), 1) != UNSPEC_PCREL)
12964 || !MEM_P (orig_x))
12965 return ix86_delegitimize_tls_address (orig_x);
12966 x = XVECEXP (XEXP (x, 0), 0, 0);
12967 if (GET_MODE (orig_x) != Pmode)
12968 {
12969 x = simplify_gen_subreg (GET_MODE (orig_x), x, Pmode, 0);
12970 if (x == NULL_RTX)
12971 return orig_x;
12972 }
12973 return x;
12974 }
12975
12976 if (GET_CODE (x) != PLUS
12977 || GET_CODE (XEXP (x, 1)) != CONST)
12978 return ix86_delegitimize_tls_address (orig_x);
12979
12980 if (ix86_pic_register_p (XEXP (x, 0)))
12981 /* %ebx + GOT/GOTOFF */
12982 ;
12983 else if (GET_CODE (XEXP (x, 0)) == PLUS)
12984 {
12985 /* %ebx + %reg * scale + GOT/GOTOFF */
12986 reg_addend = XEXP (x, 0);
12987 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
12988 reg_addend = XEXP (reg_addend, 1);
12989 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
12990 reg_addend = XEXP (reg_addend, 0);
12991 else
12992 {
12993 reg_addend = NULL_RTX;
12994 addend = XEXP (x, 0);
12995 }
12996 }
12997 else
12998 addend = XEXP (x, 0);
12999
13000 x = XEXP (XEXP (x, 1), 0);
13001 if (GET_CODE (x) == PLUS
13002 && CONST_INT_P (XEXP (x, 1)))
13003 {
13004 const_addend = XEXP (x, 1);
13005 x = XEXP (x, 0);
13006 }
13007
13008 if (GET_CODE (x) == UNSPEC
13009 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
13010 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
13011 result = XVECEXP (x, 0, 0);
13012
13013 if (TARGET_MACHO && darwin_local_data_pic (x)
13014 && !MEM_P (orig_x))
13015 result = XVECEXP (x, 0, 0);
13016
13017 if (! result)
13018 return ix86_delegitimize_tls_address (orig_x);
13019
13020 if (const_addend)
13021 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
13022 if (reg_addend)
13023 result = gen_rtx_PLUS (Pmode, reg_addend, result);
13024 if (addend)
13025 {
13026 /* If the rest of original X doesn't involve the PIC register, add
13027 addend and subtract pic_offset_table_rtx. This can happen e.g.
13028 for code like:
13029 leal (%ebx, %ecx, 4), %ecx
13030 ...
13031 movl foo@GOTOFF(%ecx), %edx
13032 in which case we return (%ecx - %ebx) + foo. */
13033 if (pic_offset_table_rtx)
13034 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
13035 pic_offset_table_rtx),
13036 result);
13037 else
13038 return orig_x;
13039 }
13040 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
13041 {
13042 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
13043 if (result == NULL_RTX)
13044 return orig_x;
13045 }
13046 return result;
13047 }
13048
13049 /* If X is a machine specific address (i.e. a symbol or label being
13050 referenced as a displacement from the GOT implemented using an
13051 UNSPEC), then return the base term. Otherwise return X. */
13052
13053 rtx
13054 ix86_find_base_term (rtx x)
13055 {
13056 rtx term;
13057
13058 if (TARGET_64BIT)
13059 {
13060 if (GET_CODE (x) != CONST)
13061 return x;
13062 term = XEXP (x, 0);
13063 if (GET_CODE (term) == PLUS
13064 && (CONST_INT_P (XEXP (term, 1))
13065 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
13066 term = XEXP (term, 0);
13067 if (GET_CODE (term) != UNSPEC
13068 || (XINT (term, 1) != UNSPEC_GOTPCREL
13069 && XINT (term, 1) != UNSPEC_PCREL))
13070 return x;
13071
13072 return XVECEXP (term, 0, 0);
13073 }
13074
13075 return ix86_delegitimize_address (x);
13076 }
13077 \f
13078 static void
13079 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
13080 int fp, FILE *file)
13081 {
13082 const char *suffix;
13083
13084 if (mode == CCFPmode || mode == CCFPUmode)
13085 {
13086 code = ix86_fp_compare_code_to_integer (code);
13087 mode = CCmode;
13088 }
13089 if (reverse)
13090 code = reverse_condition (code);
13091
13092 switch (code)
13093 {
13094 case EQ:
13095 switch (mode)
13096 {
13097 case CCAmode:
13098 suffix = "a";
13099 break;
13100
13101 case CCCmode:
13102 suffix = "c";
13103 break;
13104
13105 case CCOmode:
13106 suffix = "o";
13107 break;
13108
13109 case CCSmode:
13110 suffix = "s";
13111 break;
13112
13113 default:
13114 suffix = "e";
13115 }
13116 break;
13117 case NE:
13118 switch (mode)
13119 {
13120 case CCAmode:
13121 suffix = "na";
13122 break;
13123
13124 case CCCmode:
13125 suffix = "nc";
13126 break;
13127
13128 case CCOmode:
13129 suffix = "no";
13130 break;
13131
13132 case CCSmode:
13133 suffix = "ns";
13134 break;
13135
13136 default:
13137 suffix = "ne";
13138 }
13139 break;
13140 case GT:
13141 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
13142 suffix = "g";
13143 break;
13144 case GTU:
13145 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
13146 Those same assemblers have the same but opposite lossage on cmov. */
13147 if (mode == CCmode)
13148 suffix = fp ? "nbe" : "a";
13149 else if (mode == CCCmode)
13150 suffix = "b";
13151 else
13152 gcc_unreachable ();
13153 break;
13154 case LT:
13155 switch (mode)
13156 {
13157 case CCNOmode:
13158 case CCGOCmode:
13159 suffix = "s";
13160 break;
13161
13162 case CCmode:
13163 case CCGCmode:
13164 suffix = "l";
13165 break;
13166
13167 default:
13168 gcc_unreachable ();
13169 }
13170 break;
13171 case LTU:
13172 gcc_assert (mode == CCmode || mode == CCCmode);
13173 suffix = "b";
13174 break;
13175 case GE:
13176 switch (mode)
13177 {
13178 case CCNOmode:
13179 case CCGOCmode:
13180 suffix = "ns";
13181 break;
13182
13183 case CCmode:
13184 case CCGCmode:
13185 suffix = "ge";
13186 break;
13187
13188 default:
13189 gcc_unreachable ();
13190 }
13191 break;
13192 case GEU:
13193 /* ??? As above. */
13194 gcc_assert (mode == CCmode || mode == CCCmode);
13195 suffix = fp ? "nb" : "ae";
13196 break;
13197 case LE:
13198 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
13199 suffix = "le";
13200 break;
13201 case LEU:
13202 /* ??? As above. */
13203 if (mode == CCmode)
13204 suffix = "be";
13205 else if (mode == CCCmode)
13206 suffix = fp ? "nb" : "ae";
13207 else
13208 gcc_unreachable ();
13209 break;
13210 case UNORDERED:
13211 suffix = fp ? "u" : "p";
13212 break;
13213 case ORDERED:
13214 suffix = fp ? "nu" : "np";
13215 break;
13216 default:
13217 gcc_unreachable ();
13218 }
13219 fputs (suffix, file);
13220 }
13221
13222 /* Print the name of register X to FILE based on its machine mode and number.
13223 If CODE is 'w', pretend the mode is HImode.
13224 If CODE is 'b', pretend the mode is QImode.
13225 If CODE is 'k', pretend the mode is SImode.
13226 If CODE is 'q', pretend the mode is DImode.
13227 If CODE is 'x', pretend the mode is V4SFmode.
13228 If CODE is 't', pretend the mode is V8SFmode.
13229 If CODE is 'h', pretend the reg is the 'high' byte register.
13230 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
13231 If CODE is 'd', duplicate the operand for AVX instruction.
13232 */
13233
13234 void
13235 print_reg (rtx x, int code, FILE *file)
13236 {
13237 const char *reg;
13238 bool duplicated = code == 'd' && TARGET_AVX;
13239
13240 gcc_assert (x == pc_rtx
13241 || (REGNO (x) != ARG_POINTER_REGNUM
13242 && REGNO (x) != FRAME_POINTER_REGNUM
13243 && REGNO (x) != FLAGS_REG
13244 && REGNO (x) != FPSR_REG
13245 && REGNO (x) != FPCR_REG));
13246
13247 if (ASSEMBLER_DIALECT == ASM_ATT)
13248 putc ('%', file);
13249
13250 if (x == pc_rtx)
13251 {
13252 gcc_assert (TARGET_64BIT);
13253 fputs ("rip", file);
13254 return;
13255 }
13256
13257 if (code == 'w' || MMX_REG_P (x))
13258 code = 2;
13259 else if (code == 'b')
13260 code = 1;
13261 else if (code == 'k')
13262 code = 4;
13263 else if (code == 'q')
13264 code = 8;
13265 else if (code == 'y')
13266 code = 3;
13267 else if (code == 'h')
13268 code = 0;
13269 else if (code == 'x')
13270 code = 16;
13271 else if (code == 't')
13272 code = 32;
13273 else
13274 code = GET_MODE_SIZE (GET_MODE (x));
13275
13276 /* Irritatingly, AMD extended registers use different naming convention
13277 from the normal registers. */
13278 if (REX_INT_REG_P (x))
13279 {
13280 gcc_assert (TARGET_64BIT);
13281 switch (code)
13282 {
13283 case 0:
13284 error ("extended registers have no high halves");
13285 break;
13286 case 1:
13287 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
13288 break;
13289 case 2:
13290 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
13291 break;
13292 case 4:
13293 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
13294 break;
13295 case 8:
13296 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
13297 break;
13298 default:
13299 error ("unsupported operand size for extended register");
13300 break;
13301 }
13302 return;
13303 }
13304
13305 reg = NULL;
13306 switch (code)
13307 {
13308 case 3:
13309 if (STACK_TOP_P (x))
13310 {
13311 reg = "st(0)";
13312 break;
13313 }
13314 /* FALLTHRU */
13315 case 8:
13316 case 4:
13317 case 12:
13318 if (! ANY_FP_REG_P (x))
13319 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
13320 /* FALLTHRU */
13321 case 16:
13322 case 2:
13323 normal:
13324 reg = hi_reg_name[REGNO (x)];
13325 break;
13326 case 1:
13327 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
13328 goto normal;
13329 reg = qi_reg_name[REGNO (x)];
13330 break;
13331 case 0:
13332 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
13333 goto normal;
13334 reg = qi_high_reg_name[REGNO (x)];
13335 break;
13336 case 32:
13337 if (SSE_REG_P (x))
13338 {
13339 gcc_assert (!duplicated);
13340 putc ('y', file);
13341 fputs (hi_reg_name[REGNO (x)] + 1, file);
13342 return;
13343 }
13344 break;
13345 default:
13346 gcc_unreachable ();
13347 }
13348
13349 fputs (reg, file);
13350 if (duplicated)
13351 {
13352 if (ASSEMBLER_DIALECT == ASM_ATT)
13353 fprintf (file, ", %%%s", reg);
13354 else
13355 fprintf (file, ", %s", reg);
13356 }
13357 }
13358
13359 /* Locate some local-dynamic symbol still in use by this function
13360 so that we can print its name in some tls_local_dynamic_base
13361 pattern. */
13362
13363 static int
13364 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
13365 {
13366 rtx x = *px;
13367
13368 if (GET_CODE (x) == SYMBOL_REF
13369 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
13370 {
13371 cfun->machine->some_ld_name = XSTR (x, 0);
13372 return 1;
13373 }
13374
13375 return 0;
13376 }
13377
13378 static const char *
13379 get_some_local_dynamic_name (void)
13380 {
13381 rtx insn;
13382
13383 if (cfun->machine->some_ld_name)
13384 return cfun->machine->some_ld_name;
13385
13386 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
13387 if (NONDEBUG_INSN_P (insn)
13388 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
13389 return cfun->machine->some_ld_name;
13390
13391 return NULL;
13392 }
13393
13394 /* Meaning of CODE:
13395 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
13396 C -- print opcode suffix for set/cmov insn.
13397 c -- like C, but print reversed condition
13398 F,f -- likewise, but for floating-point.
13399 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
13400 otherwise nothing
13401 R -- print the prefix for register names.
13402 z -- print the opcode suffix for the size of the current operand.
13403 Z -- likewise, with special suffixes for x87 instructions.
13404 * -- print a star (in certain assembler syntax)
13405 A -- print an absolute memory reference.
13406 w -- print the operand as if it's a "word" (HImode) even if it isn't.
13407 s -- print a shift double count, followed by the assemblers argument
13408 delimiter.
13409 b -- print the QImode name of the register for the indicated operand.
13410 %b0 would print %al if operands[0] is reg 0.
13411 w -- likewise, print the HImode name of the register.
13412 k -- likewise, print the SImode name of the register.
13413 q -- likewise, print the DImode name of the register.
13414 x -- likewise, print the V4SFmode name of the register.
13415 t -- likewise, print the V8SFmode name of the register.
13416 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
13417 y -- print "st(0)" instead of "st" as a register.
13418 d -- print duplicated register operand for AVX instruction.
13419 D -- print condition for SSE cmp instruction.
13420 P -- if PIC, print an @PLT suffix.
13421 p -- print raw symbol name.
13422 X -- don't print any sort of PIC '@' suffix for a symbol.
13423 & -- print some in-use local-dynamic symbol name.
13424 H -- print a memory address offset by 8; used for sse high-parts
13425 Y -- print condition for XOP pcom* instruction.
13426 + -- print a branch hint as 'cs' or 'ds' prefix
13427 ; -- print a semicolon (after prefixes due to bug in older gas).
13428 @ -- print a segment register of thread base pointer load
13429 */
13430
13431 void
13432 ix86_print_operand (FILE *file, rtx x, int code)
13433 {
13434 if (code)
13435 {
13436 switch (code)
13437 {
13438 case '*':
13439 if (ASSEMBLER_DIALECT == ASM_ATT)
13440 putc ('*', file);
13441 return;
13442
13443 case '&':
13444 {
13445 const char *name = get_some_local_dynamic_name ();
13446 if (name == NULL)
13447 output_operand_lossage ("'%%&' used without any "
13448 "local dynamic TLS references");
13449 else
13450 assemble_name (file, name);
13451 return;
13452 }
13453
13454 case 'A':
13455 switch (ASSEMBLER_DIALECT)
13456 {
13457 case ASM_ATT:
13458 putc ('*', file);
13459 break;
13460
13461 case ASM_INTEL:
13462 /* Intel syntax. For absolute addresses, registers should not
13463 be surrounded by braces. */
13464 if (!REG_P (x))
13465 {
13466 putc ('[', file);
13467 ix86_print_operand (file, x, 0);
13468 putc (']', file);
13469 return;
13470 }
13471 break;
13472
13473 default:
13474 gcc_unreachable ();
13475 }
13476
13477 ix86_print_operand (file, x, 0);
13478 return;
13479
13480
13481 case 'L':
13482 if (ASSEMBLER_DIALECT == ASM_ATT)
13483 putc ('l', file);
13484 return;
13485
13486 case 'W':
13487 if (ASSEMBLER_DIALECT == ASM_ATT)
13488 putc ('w', file);
13489 return;
13490
13491 case 'B':
13492 if (ASSEMBLER_DIALECT == ASM_ATT)
13493 putc ('b', file);
13494 return;
13495
13496 case 'Q':
13497 if (ASSEMBLER_DIALECT == ASM_ATT)
13498 putc ('l', file);
13499 return;
13500
13501 case 'S':
13502 if (ASSEMBLER_DIALECT == ASM_ATT)
13503 putc ('s', file);
13504 return;
13505
13506 case 'T':
13507 if (ASSEMBLER_DIALECT == ASM_ATT)
13508 putc ('t', file);
13509 return;
13510
13511 case 'z':
13512 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
13513 {
13514 /* Opcodes don't get size suffixes if using Intel opcodes. */
13515 if (ASSEMBLER_DIALECT == ASM_INTEL)
13516 return;
13517
13518 switch (GET_MODE_SIZE (GET_MODE (x)))
13519 {
13520 case 1:
13521 putc ('b', file);
13522 return;
13523
13524 case 2:
13525 putc ('w', file);
13526 return;
13527
13528 case 4:
13529 putc ('l', file);
13530 return;
13531
13532 case 8:
13533 putc ('q', file);
13534 return;
13535
13536 default:
13537 output_operand_lossage
13538 ("invalid operand size for operand code '%c'", code);
13539 return;
13540 }
13541 }
13542
13543 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
13544 warning
13545 (0, "non-integer operand used with operand code '%c'", code);
13546 /* FALLTHRU */
13547
13548 case 'Z':
13549 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
13550 if (ASSEMBLER_DIALECT == ASM_INTEL)
13551 return;
13552
13553 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
13554 {
13555 switch (GET_MODE_SIZE (GET_MODE (x)))
13556 {
13557 case 2:
13558 #ifdef HAVE_AS_IX86_FILDS
13559 putc ('s', file);
13560 #endif
13561 return;
13562
13563 case 4:
13564 putc ('l', file);
13565 return;
13566
13567 case 8:
13568 #ifdef HAVE_AS_IX86_FILDQ
13569 putc ('q', file);
13570 #else
13571 fputs ("ll", file);
13572 #endif
13573 return;
13574
13575 default:
13576 break;
13577 }
13578 }
13579 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
13580 {
13581 /* 387 opcodes don't get size suffixes
13582 if the operands are registers. */
13583 if (STACK_REG_P (x))
13584 return;
13585
13586 switch (GET_MODE_SIZE (GET_MODE (x)))
13587 {
13588 case 4:
13589 putc ('s', file);
13590 return;
13591
13592 case 8:
13593 putc ('l', file);
13594 return;
13595
13596 case 12:
13597 case 16:
13598 putc ('t', file);
13599 return;
13600
13601 default:
13602 break;
13603 }
13604 }
13605 else
13606 {
13607 output_operand_lossage
13608 ("invalid operand type used with operand code '%c'", code);
13609 return;
13610 }
13611
13612 output_operand_lossage
13613 ("invalid operand size for operand code '%c'", code);
13614 return;
13615
13616 case 'd':
13617 case 'b':
13618 case 'w':
13619 case 'k':
13620 case 'q':
13621 case 'h':
13622 case 't':
13623 case 'y':
13624 case 'x':
13625 case 'X':
13626 case 'P':
13627 case 'p':
13628 break;
13629
13630 case 's':
13631 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
13632 {
13633 ix86_print_operand (file, x, 0);
13634 fputs (", ", file);
13635 }
13636 return;
13637
13638 case 'D':
13639 /* Little bit of braindamage here. The SSE compare instructions
13640 does use completely different names for the comparisons that the
13641 fp conditional moves. */
13642 if (TARGET_AVX)
13643 {
13644 switch (GET_CODE (x))
13645 {
13646 case EQ:
13647 fputs ("eq", file);
13648 break;
13649 case UNEQ:
13650 fputs ("eq_us", file);
13651 break;
13652 case LT:
13653 fputs ("lt", file);
13654 break;
13655 case UNLT:
13656 fputs ("nge", file);
13657 break;
13658 case LE:
13659 fputs ("le", file);
13660 break;
13661 case UNLE:
13662 fputs ("ngt", file);
13663 break;
13664 case UNORDERED:
13665 fputs ("unord", file);
13666 break;
13667 case NE:
13668 fputs ("neq", file);
13669 break;
13670 case LTGT:
13671 fputs ("neq_oq", file);
13672 break;
13673 case GE:
13674 fputs ("ge", file);
13675 break;
13676 case UNGE:
13677 fputs ("nlt", file);
13678 break;
13679 case GT:
13680 fputs ("gt", file);
13681 break;
13682 case UNGT:
13683 fputs ("nle", file);
13684 break;
13685 case ORDERED:
13686 fputs ("ord", file);
13687 break;
13688 default:
13689 output_operand_lossage ("operand is not a condition code, "
13690 "invalid operand code 'D'");
13691 return;
13692 }
13693 }
13694 else
13695 {
13696 switch (GET_CODE (x))
13697 {
13698 case EQ:
13699 case UNEQ:
13700 fputs ("eq", file);
13701 break;
13702 case LT:
13703 case UNLT:
13704 fputs ("lt", file);
13705 break;
13706 case LE:
13707 case UNLE:
13708 fputs ("le", file);
13709 break;
13710 case UNORDERED:
13711 fputs ("unord", file);
13712 break;
13713 case NE:
13714 case LTGT:
13715 fputs ("neq", file);
13716 break;
13717 case UNGE:
13718 case GE:
13719 fputs ("nlt", file);
13720 break;
13721 case UNGT:
13722 case GT:
13723 fputs ("nle", file);
13724 break;
13725 case ORDERED:
13726 fputs ("ord", file);
13727 break;
13728 default:
13729 output_operand_lossage ("operand is not a condition code, "
13730 "invalid operand code 'D'");
13731 return;
13732 }
13733 }
13734 return;
13735 case 'O':
13736 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13737 if (ASSEMBLER_DIALECT == ASM_ATT)
13738 {
13739 switch (GET_MODE (x))
13740 {
13741 case HImode: putc ('w', file); break;
13742 case SImode:
13743 case SFmode: putc ('l', file); break;
13744 case DImode:
13745 case DFmode: putc ('q', file); break;
13746 default: gcc_unreachable ();
13747 }
13748 putc ('.', file);
13749 }
13750 #endif
13751 return;
13752 case 'C':
13753 if (!COMPARISON_P (x))
13754 {
13755 output_operand_lossage ("operand is neither a constant nor a "
13756 "condition code, invalid operand code "
13757 "'C'");
13758 return;
13759 }
13760 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
13761 return;
13762 case 'F':
13763 if (!COMPARISON_P (x))
13764 {
13765 output_operand_lossage ("operand is neither a constant nor a "
13766 "condition code, invalid operand code "
13767 "'F'");
13768 return;
13769 }
13770 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13771 if (ASSEMBLER_DIALECT == ASM_ATT)
13772 putc ('.', file);
13773 #endif
13774 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
13775 return;
13776
13777 /* Like above, but reverse condition */
13778 case 'c':
13779 /* Check to see if argument to %c is really a constant
13780 and not a condition code which needs to be reversed. */
13781 if (!COMPARISON_P (x))
13782 {
13783 output_operand_lossage ("operand is neither a constant nor a "
13784 "condition code, invalid operand "
13785 "code 'c'");
13786 return;
13787 }
13788 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
13789 return;
13790 case 'f':
13791 if (!COMPARISON_P (x))
13792 {
13793 output_operand_lossage ("operand is neither a constant nor a "
13794 "condition code, invalid operand "
13795 "code 'f'");
13796 return;
13797 }
13798 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13799 if (ASSEMBLER_DIALECT == ASM_ATT)
13800 putc ('.', file);
13801 #endif
13802 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
13803 return;
13804
13805 case 'H':
13806 /* It doesn't actually matter what mode we use here, as we're
13807 only going to use this for printing. */
13808 x = adjust_address_nv (x, DImode, 8);
13809 break;
13810
13811 case '+':
13812 {
13813 rtx x;
13814
13815 if (!optimize
13816 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
13817 return;
13818
13819 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
13820 if (x)
13821 {
13822 int pred_val = INTVAL (XEXP (x, 0));
13823
13824 if (pred_val < REG_BR_PROB_BASE * 45 / 100
13825 || pred_val > REG_BR_PROB_BASE * 55 / 100)
13826 {
13827 int taken = pred_val > REG_BR_PROB_BASE / 2;
13828 int cputaken = final_forward_branch_p (current_output_insn) == 0;
13829
13830 /* Emit hints only in the case default branch prediction
13831 heuristics would fail. */
13832 if (taken != cputaken)
13833 {
13834 /* We use 3e (DS) prefix for taken branches and
13835 2e (CS) prefix for not taken branches. */
13836 if (taken)
13837 fputs ("ds ; ", file);
13838 else
13839 fputs ("cs ; ", file);
13840 }
13841 }
13842 }
13843 return;
13844 }
13845
13846 case 'Y':
13847 switch (GET_CODE (x))
13848 {
13849 case NE:
13850 fputs ("neq", file);
13851 break;
13852 case EQ:
13853 fputs ("eq", file);
13854 break;
13855 case GE:
13856 case GEU:
13857 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
13858 break;
13859 case GT:
13860 case GTU:
13861 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
13862 break;
13863 case LE:
13864 case LEU:
13865 fputs ("le", file);
13866 break;
13867 case LT:
13868 case LTU:
13869 fputs ("lt", file);
13870 break;
13871 case UNORDERED:
13872 fputs ("unord", file);
13873 break;
13874 case ORDERED:
13875 fputs ("ord", file);
13876 break;
13877 case UNEQ:
13878 fputs ("ueq", file);
13879 break;
13880 case UNGE:
13881 fputs ("nlt", file);
13882 break;
13883 case UNGT:
13884 fputs ("nle", file);
13885 break;
13886 case UNLE:
13887 fputs ("ule", file);
13888 break;
13889 case UNLT:
13890 fputs ("ult", file);
13891 break;
13892 case LTGT:
13893 fputs ("une", file);
13894 break;
13895 default:
13896 output_operand_lossage ("operand is not a condition code, "
13897 "invalid operand code 'Y'");
13898 return;
13899 }
13900 return;
13901
13902 case ';':
13903 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
13904 putc (';', file);
13905 #endif
13906 return;
13907
13908 case '@':
13909 if (ASSEMBLER_DIALECT == ASM_ATT)
13910 putc ('%', file);
13911
13912 /* The kernel uses a different segment register for performance
13913 reasons; a system call would not have to trash the userspace
13914 segment register, which would be expensive. */
13915 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
13916 fputs ("fs", file);
13917 else
13918 fputs ("gs", file);
13919 return;
13920
13921 default:
13922 output_operand_lossage ("invalid operand code '%c'", code);
13923 }
13924 }
13925
13926 if (REG_P (x))
13927 print_reg (x, code, file);
13928
13929 else if (MEM_P (x))
13930 {
13931 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
13932 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
13933 && GET_MODE (x) != BLKmode)
13934 {
13935 const char * size;
13936 switch (GET_MODE_SIZE (GET_MODE (x)))
13937 {
13938 case 1: size = "BYTE"; break;
13939 case 2: size = "WORD"; break;
13940 case 4: size = "DWORD"; break;
13941 case 8: size = "QWORD"; break;
13942 case 12: size = "TBYTE"; break;
13943 case 16:
13944 if (GET_MODE (x) == XFmode)
13945 size = "TBYTE";
13946 else
13947 size = "XMMWORD";
13948 break;
13949 case 32: size = "YMMWORD"; break;
13950 default:
13951 gcc_unreachable ();
13952 }
13953
13954 /* Check for explicit size override (codes 'b', 'w' and 'k') */
13955 if (code == 'b')
13956 size = "BYTE";
13957 else if (code == 'w')
13958 size = "WORD";
13959 else if (code == 'k')
13960 size = "DWORD";
13961
13962 fputs (size, file);
13963 fputs (" PTR ", file);
13964 }
13965
13966 x = XEXP (x, 0);
13967 /* Avoid (%rip) for call operands. */
13968 if (CONSTANT_ADDRESS_P (x) && code == 'P'
13969 && !CONST_INT_P (x))
13970 output_addr_const (file, x);
13971 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
13972 output_operand_lossage ("invalid constraints for operand");
13973 else
13974 output_address (x);
13975 }
13976
13977 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
13978 {
13979 REAL_VALUE_TYPE r;
13980 long l;
13981
13982 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
13983 REAL_VALUE_TO_TARGET_SINGLE (r, l);
13984
13985 if (ASSEMBLER_DIALECT == ASM_ATT)
13986 putc ('$', file);
13987 /* Sign extend 32bit SFmode immediate to 8 bytes. */
13988 if (code == 'q')
13989 fprintf (file, "0x%08llx", (unsigned long long) (int) l);
13990 else
13991 fprintf (file, "0x%08x", (unsigned int) l);
13992 }
13993
13994 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
13995 {
13996 REAL_VALUE_TYPE r;
13997 long l[2];
13998
13999 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
14000 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
14001
14002 if (ASSEMBLER_DIALECT == ASM_ATT)
14003 putc ('$', file);
14004 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
14005 }
14006
14007 /* These float cases don't actually occur as immediate operands. */
14008 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
14009 {
14010 char dstr[30];
14011
14012 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
14013 fputs (dstr, file);
14014 }
14015
14016 else
14017 {
14018 /* We have patterns that allow zero sets of memory, for instance.
14019 In 64-bit mode, we should probably support all 8-byte vectors,
14020 since we can in fact encode that into an immediate. */
14021 if (GET_CODE (x) == CONST_VECTOR)
14022 {
14023 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
14024 x = const0_rtx;
14025 }
14026
14027 if (code != 'P' && code != 'p')
14028 {
14029 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
14030 {
14031 if (ASSEMBLER_DIALECT == ASM_ATT)
14032 putc ('$', file);
14033 }
14034 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
14035 || GET_CODE (x) == LABEL_REF)
14036 {
14037 if (ASSEMBLER_DIALECT == ASM_ATT)
14038 putc ('$', file);
14039 else
14040 fputs ("OFFSET FLAT:", file);
14041 }
14042 }
14043 if (CONST_INT_P (x))
14044 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14045 else if (flag_pic || MACHOPIC_INDIRECT)
14046 output_pic_addr_const (file, x, code);
14047 else
14048 output_addr_const (file, x);
14049 }
14050 }
14051
14052 static bool
14053 ix86_print_operand_punct_valid_p (unsigned char code)
14054 {
14055 return (code == '@' || code == '*' || code == '+'
14056 || code == '&' || code == ';');
14057 }
14058 \f
14059 /* Print a memory operand whose address is ADDR. */
14060
14061 static void
14062 ix86_print_operand_address (FILE *file, rtx addr)
14063 {
14064 struct ix86_address parts;
14065 rtx base, index, disp;
14066 int scale;
14067 int ok = ix86_decompose_address (addr, &parts);
14068
14069 gcc_assert (ok);
14070
14071 base = parts.base;
14072 index = parts.index;
14073 disp = parts.disp;
14074 scale = parts.scale;
14075
14076 switch (parts.seg)
14077 {
14078 case SEG_DEFAULT:
14079 break;
14080 case SEG_FS:
14081 case SEG_GS:
14082 if (ASSEMBLER_DIALECT == ASM_ATT)
14083 putc ('%', file);
14084 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
14085 break;
14086 default:
14087 gcc_unreachable ();
14088 }
14089
14090 /* Use one byte shorter RIP relative addressing for 64bit mode. */
14091 if (TARGET_64BIT && !base && !index)
14092 {
14093 rtx symbol = disp;
14094
14095 if (GET_CODE (disp) == CONST
14096 && GET_CODE (XEXP (disp, 0)) == PLUS
14097 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
14098 symbol = XEXP (XEXP (disp, 0), 0);
14099
14100 if (GET_CODE (symbol) == LABEL_REF
14101 || (GET_CODE (symbol) == SYMBOL_REF
14102 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
14103 base = pc_rtx;
14104 }
14105 if (!base && !index)
14106 {
14107 /* Displacement only requires special attention. */
14108
14109 if (CONST_INT_P (disp))
14110 {
14111 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
14112 fputs ("ds:", file);
14113 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
14114 }
14115 else if (flag_pic)
14116 output_pic_addr_const (file, disp, 0);
14117 else
14118 output_addr_const (file, disp);
14119 }
14120 else
14121 {
14122 if (ASSEMBLER_DIALECT == ASM_ATT)
14123 {
14124 if (disp)
14125 {
14126 if (flag_pic)
14127 output_pic_addr_const (file, disp, 0);
14128 else if (GET_CODE (disp) == LABEL_REF)
14129 output_asm_label (disp);
14130 else
14131 output_addr_const (file, disp);
14132 }
14133
14134 putc ('(', file);
14135 if (base)
14136 print_reg (base, 0, file);
14137 if (index)
14138 {
14139 putc (',', file);
14140 print_reg (index, 0, file);
14141 if (scale != 1)
14142 fprintf (file, ",%d", scale);
14143 }
14144 putc (')', file);
14145 }
14146 else
14147 {
14148 rtx offset = NULL_RTX;
14149
14150 if (disp)
14151 {
14152 /* Pull out the offset of a symbol; print any symbol itself. */
14153 if (GET_CODE (disp) == CONST
14154 && GET_CODE (XEXP (disp, 0)) == PLUS
14155 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
14156 {
14157 offset = XEXP (XEXP (disp, 0), 1);
14158 disp = gen_rtx_CONST (VOIDmode,
14159 XEXP (XEXP (disp, 0), 0));
14160 }
14161
14162 if (flag_pic)
14163 output_pic_addr_const (file, disp, 0);
14164 else if (GET_CODE (disp) == LABEL_REF)
14165 output_asm_label (disp);
14166 else if (CONST_INT_P (disp))
14167 offset = disp;
14168 else
14169 output_addr_const (file, disp);
14170 }
14171
14172 putc ('[', file);
14173 if (base)
14174 {
14175 print_reg (base, 0, file);
14176 if (offset)
14177 {
14178 if (INTVAL (offset) >= 0)
14179 putc ('+', file);
14180 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
14181 }
14182 }
14183 else if (offset)
14184 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
14185 else
14186 putc ('0', file);
14187
14188 if (index)
14189 {
14190 putc ('+', file);
14191 print_reg (index, 0, file);
14192 if (scale != 1)
14193 fprintf (file, "*%d", scale);
14194 }
14195 putc (']', file);
14196 }
14197 }
14198 }
14199
14200 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
14201
14202 static bool
14203 i386_asm_output_addr_const_extra (FILE *file, rtx x)
14204 {
14205 rtx op;
14206
14207 if (GET_CODE (x) != UNSPEC)
14208 return false;
14209
14210 op = XVECEXP (x, 0, 0);
14211 switch (XINT (x, 1))
14212 {
14213 case UNSPEC_GOTTPOFF:
14214 output_addr_const (file, op);
14215 /* FIXME: This might be @TPOFF in Sun ld. */
14216 fputs ("@gottpoff", file);
14217 break;
14218 case UNSPEC_TPOFF:
14219 output_addr_const (file, op);
14220 fputs ("@tpoff", file);
14221 break;
14222 case UNSPEC_NTPOFF:
14223 output_addr_const (file, op);
14224 if (TARGET_64BIT)
14225 fputs ("@tpoff", file);
14226 else
14227 fputs ("@ntpoff", file);
14228 break;
14229 case UNSPEC_DTPOFF:
14230 output_addr_const (file, op);
14231 fputs ("@dtpoff", file);
14232 break;
14233 case UNSPEC_GOTNTPOFF:
14234 output_addr_const (file, op);
14235 if (TARGET_64BIT)
14236 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14237 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
14238 else
14239 fputs ("@gotntpoff", file);
14240 break;
14241 case UNSPEC_INDNTPOFF:
14242 output_addr_const (file, op);
14243 fputs ("@indntpoff", file);
14244 break;
14245 #if TARGET_MACHO
14246 case UNSPEC_MACHOPIC_OFFSET:
14247 output_addr_const (file, op);
14248 putc ('-', file);
14249 machopic_output_function_base_name (file);
14250 break;
14251 #endif
14252
14253 case UNSPEC_STACK_CHECK:
14254 {
14255 int offset;
14256
14257 gcc_assert (flag_split_stack);
14258
14259 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
14260 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
14261 #else
14262 gcc_unreachable ();
14263 #endif
14264
14265 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
14266 }
14267 break;
14268
14269 default:
14270 return false;
14271 }
14272
14273 return true;
14274 }
14275 \f
14276 /* Split one or more double-mode RTL references into pairs of half-mode
14277 references. The RTL can be REG, offsettable MEM, integer constant, or
14278 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
14279 split and "num" is its length. lo_half and hi_half are output arrays
14280 that parallel "operands". */
14281
14282 void
14283 split_double_mode (enum machine_mode mode, rtx operands[],
14284 int num, rtx lo_half[], rtx hi_half[])
14285 {
14286 enum machine_mode half_mode;
14287 unsigned int byte;
14288
14289 switch (mode)
14290 {
14291 case TImode:
14292 half_mode = DImode;
14293 break;
14294 case DImode:
14295 half_mode = SImode;
14296 break;
14297 default:
14298 gcc_unreachable ();
14299 }
14300
14301 byte = GET_MODE_SIZE (half_mode);
14302
14303 while (num--)
14304 {
14305 rtx op = operands[num];
14306
14307 /* simplify_subreg refuse to split volatile memory addresses,
14308 but we still have to handle it. */
14309 if (MEM_P (op))
14310 {
14311 lo_half[num] = adjust_address (op, half_mode, 0);
14312 hi_half[num] = adjust_address (op, half_mode, byte);
14313 }
14314 else
14315 {
14316 lo_half[num] = simplify_gen_subreg (half_mode, op,
14317 GET_MODE (op) == VOIDmode
14318 ? mode : GET_MODE (op), 0);
14319 hi_half[num] = simplify_gen_subreg (half_mode, op,
14320 GET_MODE (op) == VOIDmode
14321 ? mode : GET_MODE (op), byte);
14322 }
14323 }
14324 }
14325 \f
14326 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
14327 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
14328 is the expression of the binary operation. The output may either be
14329 emitted here, or returned to the caller, like all output_* functions.
14330
14331 There is no guarantee that the operands are the same mode, as they
14332 might be within FLOAT or FLOAT_EXTEND expressions. */
14333
14334 #ifndef SYSV386_COMPAT
14335 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
14336 wants to fix the assemblers because that causes incompatibility
14337 with gcc. No-one wants to fix gcc because that causes
14338 incompatibility with assemblers... You can use the option of
14339 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
14340 #define SYSV386_COMPAT 1
14341 #endif
14342
14343 const char *
14344 output_387_binary_op (rtx insn, rtx *operands)
14345 {
14346 static char buf[40];
14347 const char *p;
14348 const char *ssep;
14349 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
14350
14351 #ifdef ENABLE_CHECKING
14352 /* Even if we do not want to check the inputs, this documents input
14353 constraints. Which helps in understanding the following code. */
14354 if (STACK_REG_P (operands[0])
14355 && ((REG_P (operands[1])
14356 && REGNO (operands[0]) == REGNO (operands[1])
14357 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
14358 || (REG_P (operands[2])
14359 && REGNO (operands[0]) == REGNO (operands[2])
14360 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
14361 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
14362 ; /* ok */
14363 else
14364 gcc_assert (is_sse);
14365 #endif
14366
14367 switch (GET_CODE (operands[3]))
14368 {
14369 case PLUS:
14370 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14371 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14372 p = "fiadd";
14373 else
14374 p = "fadd";
14375 ssep = "vadd";
14376 break;
14377
14378 case MINUS:
14379 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14380 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14381 p = "fisub";
14382 else
14383 p = "fsub";
14384 ssep = "vsub";
14385 break;
14386
14387 case MULT:
14388 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14389 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14390 p = "fimul";
14391 else
14392 p = "fmul";
14393 ssep = "vmul";
14394 break;
14395
14396 case DIV:
14397 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14398 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14399 p = "fidiv";
14400 else
14401 p = "fdiv";
14402 ssep = "vdiv";
14403 break;
14404
14405 default:
14406 gcc_unreachable ();
14407 }
14408
14409 if (is_sse)
14410 {
14411 if (TARGET_AVX)
14412 {
14413 strcpy (buf, ssep);
14414 if (GET_MODE (operands[0]) == SFmode)
14415 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
14416 else
14417 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
14418 }
14419 else
14420 {
14421 strcpy (buf, ssep + 1);
14422 if (GET_MODE (operands[0]) == SFmode)
14423 strcat (buf, "ss\t{%2, %0|%0, %2}");
14424 else
14425 strcat (buf, "sd\t{%2, %0|%0, %2}");
14426 }
14427 return buf;
14428 }
14429 strcpy (buf, p);
14430
14431 switch (GET_CODE (operands[3]))
14432 {
14433 case MULT:
14434 case PLUS:
14435 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
14436 {
14437 rtx temp = operands[2];
14438 operands[2] = operands[1];
14439 operands[1] = temp;
14440 }
14441
14442 /* know operands[0] == operands[1]. */
14443
14444 if (MEM_P (operands[2]))
14445 {
14446 p = "%Z2\t%2";
14447 break;
14448 }
14449
14450 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
14451 {
14452 if (STACK_TOP_P (operands[0]))
14453 /* How is it that we are storing to a dead operand[2]?
14454 Well, presumably operands[1] is dead too. We can't
14455 store the result to st(0) as st(0) gets popped on this
14456 instruction. Instead store to operands[2] (which I
14457 think has to be st(1)). st(1) will be popped later.
14458 gcc <= 2.8.1 didn't have this check and generated
14459 assembly code that the Unixware assembler rejected. */
14460 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14461 else
14462 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14463 break;
14464 }
14465
14466 if (STACK_TOP_P (operands[0]))
14467 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14468 else
14469 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
14470 break;
14471
14472 case MINUS:
14473 case DIV:
14474 if (MEM_P (operands[1]))
14475 {
14476 p = "r%Z1\t%1";
14477 break;
14478 }
14479
14480 if (MEM_P (operands[2]))
14481 {
14482 p = "%Z2\t%2";
14483 break;
14484 }
14485
14486 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
14487 {
14488 #if SYSV386_COMPAT
14489 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
14490 derived assemblers, confusingly reverse the direction of
14491 the operation for fsub{r} and fdiv{r} when the
14492 destination register is not st(0). The Intel assembler
14493 doesn't have this brain damage. Read !SYSV386_COMPAT to
14494 figure out what the hardware really does. */
14495 if (STACK_TOP_P (operands[0]))
14496 p = "{p\t%0, %2|rp\t%2, %0}";
14497 else
14498 p = "{rp\t%2, %0|p\t%0, %2}";
14499 #else
14500 if (STACK_TOP_P (operands[0]))
14501 /* As above for fmul/fadd, we can't store to st(0). */
14502 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14503 else
14504 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14505 #endif
14506 break;
14507 }
14508
14509 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
14510 {
14511 #if SYSV386_COMPAT
14512 if (STACK_TOP_P (operands[0]))
14513 p = "{rp\t%0, %1|p\t%1, %0}";
14514 else
14515 p = "{p\t%1, %0|rp\t%0, %1}";
14516 #else
14517 if (STACK_TOP_P (operands[0]))
14518 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
14519 else
14520 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
14521 #endif
14522 break;
14523 }
14524
14525 if (STACK_TOP_P (operands[0]))
14526 {
14527 if (STACK_TOP_P (operands[1]))
14528 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14529 else
14530 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
14531 break;
14532 }
14533 else if (STACK_TOP_P (operands[1]))
14534 {
14535 #if SYSV386_COMPAT
14536 p = "{\t%1, %0|r\t%0, %1}";
14537 #else
14538 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
14539 #endif
14540 }
14541 else
14542 {
14543 #if SYSV386_COMPAT
14544 p = "{r\t%2, %0|\t%0, %2}";
14545 #else
14546 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
14547 #endif
14548 }
14549 break;
14550
14551 default:
14552 gcc_unreachable ();
14553 }
14554
14555 strcat (buf, p);
14556 return buf;
14557 }
14558
14559 /* Return needed mode for entity in optimize_mode_switching pass. */
14560
14561 int
14562 ix86_mode_needed (int entity, rtx insn)
14563 {
14564 enum attr_i387_cw mode;
14565
14566 /* The mode UNINITIALIZED is used to store control word after a
14567 function call or ASM pattern. The mode ANY specify that function
14568 has no requirements on the control word and make no changes in the
14569 bits we are interested in. */
14570
14571 if (CALL_P (insn)
14572 || (NONJUMP_INSN_P (insn)
14573 && (asm_noperands (PATTERN (insn)) >= 0
14574 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
14575 return I387_CW_UNINITIALIZED;
14576
14577 if (recog_memoized (insn) < 0)
14578 return I387_CW_ANY;
14579
14580 mode = get_attr_i387_cw (insn);
14581
14582 switch (entity)
14583 {
14584 case I387_TRUNC:
14585 if (mode == I387_CW_TRUNC)
14586 return mode;
14587 break;
14588
14589 case I387_FLOOR:
14590 if (mode == I387_CW_FLOOR)
14591 return mode;
14592 break;
14593
14594 case I387_CEIL:
14595 if (mode == I387_CW_CEIL)
14596 return mode;
14597 break;
14598
14599 case I387_MASK_PM:
14600 if (mode == I387_CW_MASK_PM)
14601 return mode;
14602 break;
14603
14604 default:
14605 gcc_unreachable ();
14606 }
14607
14608 return I387_CW_ANY;
14609 }
14610
14611 /* Output code to initialize control word copies used by trunc?f?i and
14612 rounding patterns. CURRENT_MODE is set to current control word,
14613 while NEW_MODE is set to new control word. */
14614
14615 void
14616 emit_i387_cw_initialization (int mode)
14617 {
14618 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
14619 rtx new_mode;
14620
14621 enum ix86_stack_slot slot;
14622
14623 rtx reg = gen_reg_rtx (HImode);
14624
14625 emit_insn (gen_x86_fnstcw_1 (stored_mode));
14626 emit_move_insn (reg, copy_rtx (stored_mode));
14627
14628 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
14629 || optimize_function_for_size_p (cfun))
14630 {
14631 switch (mode)
14632 {
14633 case I387_CW_TRUNC:
14634 /* round toward zero (truncate) */
14635 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
14636 slot = SLOT_CW_TRUNC;
14637 break;
14638
14639 case I387_CW_FLOOR:
14640 /* round down toward -oo */
14641 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
14642 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
14643 slot = SLOT_CW_FLOOR;
14644 break;
14645
14646 case I387_CW_CEIL:
14647 /* round up toward +oo */
14648 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
14649 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
14650 slot = SLOT_CW_CEIL;
14651 break;
14652
14653 case I387_CW_MASK_PM:
14654 /* mask precision exception for nearbyint() */
14655 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
14656 slot = SLOT_CW_MASK_PM;
14657 break;
14658
14659 default:
14660 gcc_unreachable ();
14661 }
14662 }
14663 else
14664 {
14665 switch (mode)
14666 {
14667 case I387_CW_TRUNC:
14668 /* round toward zero (truncate) */
14669 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
14670 slot = SLOT_CW_TRUNC;
14671 break;
14672
14673 case I387_CW_FLOOR:
14674 /* round down toward -oo */
14675 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
14676 slot = SLOT_CW_FLOOR;
14677 break;
14678
14679 case I387_CW_CEIL:
14680 /* round up toward +oo */
14681 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
14682 slot = SLOT_CW_CEIL;
14683 break;
14684
14685 case I387_CW_MASK_PM:
14686 /* mask precision exception for nearbyint() */
14687 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
14688 slot = SLOT_CW_MASK_PM;
14689 break;
14690
14691 default:
14692 gcc_unreachable ();
14693 }
14694 }
14695
14696 gcc_assert (slot < MAX_386_STACK_LOCALS);
14697
14698 new_mode = assign_386_stack_local (HImode, slot);
14699 emit_move_insn (new_mode, reg);
14700 }
14701
14702 /* Output code for INSN to convert a float to a signed int. OPERANDS
14703 are the insn operands. The output may be [HSD]Imode and the input
14704 operand may be [SDX]Fmode. */
14705
14706 const char *
14707 output_fix_trunc (rtx insn, rtx *operands, bool fisttp)
14708 {
14709 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
14710 int dimode_p = GET_MODE (operands[0]) == DImode;
14711 int round_mode = get_attr_i387_cw (insn);
14712
14713 /* Jump through a hoop or two for DImode, since the hardware has no
14714 non-popping instruction. We used to do this a different way, but
14715 that was somewhat fragile and broke with post-reload splitters. */
14716 if ((dimode_p || fisttp) && !stack_top_dies)
14717 output_asm_insn ("fld\t%y1", operands);
14718
14719 gcc_assert (STACK_TOP_P (operands[1]));
14720 gcc_assert (MEM_P (operands[0]));
14721 gcc_assert (GET_MODE (operands[1]) != TFmode);
14722
14723 if (fisttp)
14724 output_asm_insn ("fisttp%Z0\t%0", operands);
14725 else
14726 {
14727 if (round_mode != I387_CW_ANY)
14728 output_asm_insn ("fldcw\t%3", operands);
14729 if (stack_top_dies || dimode_p)
14730 output_asm_insn ("fistp%Z0\t%0", operands);
14731 else
14732 output_asm_insn ("fist%Z0\t%0", operands);
14733 if (round_mode != I387_CW_ANY)
14734 output_asm_insn ("fldcw\t%2", operands);
14735 }
14736
14737 return "";
14738 }
14739
14740 /* Output code for x87 ffreep insn. The OPNO argument, which may only
14741 have the values zero or one, indicates the ffreep insn's operand
14742 from the OPERANDS array. */
14743
14744 static const char *
14745 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
14746 {
14747 if (TARGET_USE_FFREEP)
14748 #ifdef HAVE_AS_IX86_FFREEP
14749 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
14750 #else
14751 {
14752 static char retval[32];
14753 int regno = REGNO (operands[opno]);
14754
14755 gcc_assert (FP_REGNO_P (regno));
14756
14757 regno -= FIRST_STACK_REG;
14758
14759 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
14760 return retval;
14761 }
14762 #endif
14763
14764 return opno ? "fstp\t%y1" : "fstp\t%y0";
14765 }
14766
14767
14768 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
14769 should be used. UNORDERED_P is true when fucom should be used. */
14770
14771 const char *
14772 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
14773 {
14774 int stack_top_dies;
14775 rtx cmp_op0, cmp_op1;
14776 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
14777
14778 if (eflags_p)
14779 {
14780 cmp_op0 = operands[0];
14781 cmp_op1 = operands[1];
14782 }
14783 else
14784 {
14785 cmp_op0 = operands[1];
14786 cmp_op1 = operands[2];
14787 }
14788
14789 if (is_sse)
14790 {
14791 static const char ucomiss[] = "vucomiss\t{%1, %0|%0, %1}";
14792 static const char ucomisd[] = "vucomisd\t{%1, %0|%0, %1}";
14793 static const char comiss[] = "vcomiss\t{%1, %0|%0, %1}";
14794 static const char comisd[] = "vcomisd\t{%1, %0|%0, %1}";
14795
14796 if (GET_MODE (operands[0]) == SFmode)
14797 if (unordered_p)
14798 return &ucomiss[TARGET_AVX ? 0 : 1];
14799 else
14800 return &comiss[TARGET_AVX ? 0 : 1];
14801 else
14802 if (unordered_p)
14803 return &ucomisd[TARGET_AVX ? 0 : 1];
14804 else
14805 return &comisd[TARGET_AVX ? 0 : 1];
14806 }
14807
14808 gcc_assert (STACK_TOP_P (cmp_op0));
14809
14810 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
14811
14812 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
14813 {
14814 if (stack_top_dies)
14815 {
14816 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
14817 return output_387_ffreep (operands, 1);
14818 }
14819 else
14820 return "ftst\n\tfnstsw\t%0";
14821 }
14822
14823 if (STACK_REG_P (cmp_op1)
14824 && stack_top_dies
14825 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
14826 && REGNO (cmp_op1) != FIRST_STACK_REG)
14827 {
14828 /* If both the top of the 387 stack dies, and the other operand
14829 is also a stack register that dies, then this must be a
14830 `fcompp' float compare */
14831
14832 if (eflags_p)
14833 {
14834 /* There is no double popping fcomi variant. Fortunately,
14835 eflags is immune from the fstp's cc clobbering. */
14836 if (unordered_p)
14837 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
14838 else
14839 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
14840 return output_387_ffreep (operands, 0);
14841 }
14842 else
14843 {
14844 if (unordered_p)
14845 return "fucompp\n\tfnstsw\t%0";
14846 else
14847 return "fcompp\n\tfnstsw\t%0";
14848 }
14849 }
14850 else
14851 {
14852 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
14853
14854 static const char * const alt[16] =
14855 {
14856 "fcom%Z2\t%y2\n\tfnstsw\t%0",
14857 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
14858 "fucom%Z2\t%y2\n\tfnstsw\t%0",
14859 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
14860
14861 "ficom%Z2\t%y2\n\tfnstsw\t%0",
14862 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
14863 NULL,
14864 NULL,
14865
14866 "fcomi\t{%y1, %0|%0, %y1}",
14867 "fcomip\t{%y1, %0|%0, %y1}",
14868 "fucomi\t{%y1, %0|%0, %y1}",
14869 "fucomip\t{%y1, %0|%0, %y1}",
14870
14871 NULL,
14872 NULL,
14873 NULL,
14874 NULL
14875 };
14876
14877 int mask;
14878 const char *ret;
14879
14880 mask = eflags_p << 3;
14881 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
14882 mask |= unordered_p << 1;
14883 mask |= stack_top_dies;
14884
14885 gcc_assert (mask < 16);
14886 ret = alt[mask];
14887 gcc_assert (ret);
14888
14889 return ret;
14890 }
14891 }
14892
14893 void
14894 ix86_output_addr_vec_elt (FILE *file, int value)
14895 {
14896 const char *directive = ASM_LONG;
14897
14898 #ifdef ASM_QUAD
14899 if (TARGET_LP64)
14900 directive = ASM_QUAD;
14901 #else
14902 gcc_assert (!TARGET_64BIT);
14903 #endif
14904
14905 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
14906 }
14907
14908 void
14909 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
14910 {
14911 const char *directive = ASM_LONG;
14912
14913 #ifdef ASM_QUAD
14914 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
14915 directive = ASM_QUAD;
14916 #else
14917 gcc_assert (!TARGET_64BIT);
14918 #endif
14919 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
14920 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
14921 fprintf (file, "%s%s%d-%s%d\n",
14922 directive, LPREFIX, value, LPREFIX, rel);
14923 else if (HAVE_AS_GOTOFF_IN_DATA)
14924 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
14925 #if TARGET_MACHO
14926 else if (TARGET_MACHO)
14927 {
14928 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
14929 machopic_output_function_base_name (file);
14930 putc ('\n', file);
14931 }
14932 #endif
14933 else
14934 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
14935 GOT_SYMBOL_NAME, LPREFIX, value);
14936 }
14937 \f
14938 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
14939 for the target. */
14940
14941 void
14942 ix86_expand_clear (rtx dest)
14943 {
14944 rtx tmp;
14945
14946 /* We play register width games, which are only valid after reload. */
14947 gcc_assert (reload_completed);
14948
14949 /* Avoid HImode and its attendant prefix byte. */
14950 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
14951 dest = gen_rtx_REG (SImode, REGNO (dest));
14952 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
14953
14954 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
14955 if (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ())
14956 {
14957 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
14958 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
14959 }
14960
14961 emit_insn (tmp);
14962 }
14963
14964 /* X is an unchanging MEM. If it is a constant pool reference, return
14965 the constant pool rtx, else NULL. */
14966
14967 rtx
14968 maybe_get_pool_constant (rtx x)
14969 {
14970 x = ix86_delegitimize_address (XEXP (x, 0));
14971
14972 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
14973 return get_pool_constant (x);
14974
14975 return NULL_RTX;
14976 }
14977
14978 void
14979 ix86_expand_move (enum machine_mode mode, rtx operands[])
14980 {
14981 rtx op0, op1;
14982 enum tls_model model;
14983
14984 op0 = operands[0];
14985 op1 = operands[1];
14986
14987 if (GET_CODE (op1) == SYMBOL_REF)
14988 {
14989 model = SYMBOL_REF_TLS_MODEL (op1);
14990 if (model)
14991 {
14992 op1 = legitimize_tls_address (op1, model, true);
14993 op1 = force_operand (op1, op0);
14994 if (op1 == op0)
14995 return;
14996 if (GET_MODE (op1) != mode)
14997 op1 = convert_to_mode (mode, op1, 1);
14998 }
14999 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15000 && SYMBOL_REF_DLLIMPORT_P (op1))
15001 op1 = legitimize_dllimport_symbol (op1, false);
15002 }
15003 else if (GET_CODE (op1) == CONST
15004 && GET_CODE (XEXP (op1, 0)) == PLUS
15005 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
15006 {
15007 rtx addend = XEXP (XEXP (op1, 0), 1);
15008 rtx symbol = XEXP (XEXP (op1, 0), 0);
15009 rtx tmp = NULL;
15010
15011 model = SYMBOL_REF_TLS_MODEL (symbol);
15012 if (model)
15013 tmp = legitimize_tls_address (symbol, model, true);
15014 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15015 && SYMBOL_REF_DLLIMPORT_P (symbol))
15016 tmp = legitimize_dllimport_symbol (symbol, true);
15017
15018 if (tmp)
15019 {
15020 tmp = force_operand (tmp, NULL);
15021 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
15022 op0, 1, OPTAB_DIRECT);
15023 if (tmp == op0)
15024 return;
15025 }
15026 }
15027
15028 if ((flag_pic || MACHOPIC_INDIRECT)
15029 && mode == Pmode && symbolic_operand (op1, Pmode))
15030 {
15031 if (TARGET_MACHO && !TARGET_64BIT)
15032 {
15033 #if TARGET_MACHO
15034 /* dynamic-no-pic */
15035 if (MACHOPIC_INDIRECT)
15036 {
15037 rtx temp = ((reload_in_progress
15038 || ((op0 && REG_P (op0))
15039 && mode == Pmode))
15040 ? op0 : gen_reg_rtx (Pmode));
15041 op1 = machopic_indirect_data_reference (op1, temp);
15042 if (MACHOPIC_PURE)
15043 op1 = machopic_legitimize_pic_address (op1, mode,
15044 temp == op1 ? 0 : temp);
15045 }
15046 if (op0 != op1 && GET_CODE (op0) != MEM)
15047 {
15048 rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
15049 emit_insn (insn);
15050 return;
15051 }
15052 if (GET_CODE (op0) == MEM)
15053 op1 = force_reg (Pmode, op1);
15054 else
15055 {
15056 rtx temp = op0;
15057 if (GET_CODE (temp) != REG)
15058 temp = gen_reg_rtx (Pmode);
15059 temp = legitimize_pic_address (op1, temp);
15060 if (temp == op0)
15061 return;
15062 op1 = temp;
15063 }
15064 /* dynamic-no-pic */
15065 #endif
15066 }
15067 else
15068 {
15069 if (MEM_P (op0))
15070 op1 = force_reg (Pmode, op1);
15071 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
15072 {
15073 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
15074 op1 = legitimize_pic_address (op1, reg);
15075 if (op0 == op1)
15076 return;
15077 }
15078 }
15079 }
15080 else
15081 {
15082 if (MEM_P (op0)
15083 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
15084 || !push_operand (op0, mode))
15085 && MEM_P (op1))
15086 op1 = force_reg (mode, op1);
15087
15088 if (push_operand (op0, mode)
15089 && ! general_no_elim_operand (op1, mode))
15090 op1 = copy_to_mode_reg (mode, op1);
15091
15092 /* Force large constants in 64bit compilation into register
15093 to get them CSEed. */
15094 if (can_create_pseudo_p ()
15095 && (mode == DImode) && TARGET_64BIT
15096 && immediate_operand (op1, mode)
15097 && !x86_64_zext_immediate_operand (op1, VOIDmode)
15098 && !register_operand (op0, mode)
15099 && optimize)
15100 op1 = copy_to_mode_reg (mode, op1);
15101
15102 if (can_create_pseudo_p ()
15103 && FLOAT_MODE_P (mode)
15104 && GET_CODE (op1) == CONST_DOUBLE)
15105 {
15106 /* If we are loading a floating point constant to a register,
15107 force the value to memory now, since we'll get better code
15108 out the back end. */
15109
15110 op1 = validize_mem (force_const_mem (mode, op1));
15111 if (!register_operand (op0, mode))
15112 {
15113 rtx temp = gen_reg_rtx (mode);
15114 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
15115 emit_move_insn (op0, temp);
15116 return;
15117 }
15118 }
15119 }
15120
15121 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
15122 }
15123
15124 void
15125 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
15126 {
15127 rtx op0 = operands[0], op1 = operands[1];
15128 unsigned int align = GET_MODE_ALIGNMENT (mode);
15129
15130 /* Force constants other than zero into memory. We do not know how
15131 the instructions used to build constants modify the upper 64 bits
15132 of the register, once we have that information we may be able
15133 to handle some of them more efficiently. */
15134 if (can_create_pseudo_p ()
15135 && register_operand (op0, mode)
15136 && (CONSTANT_P (op1)
15137 || (GET_CODE (op1) == SUBREG
15138 && CONSTANT_P (SUBREG_REG (op1))))
15139 && !standard_sse_constant_p (op1))
15140 op1 = validize_mem (force_const_mem (mode, op1));
15141
15142 /* We need to check memory alignment for SSE mode since attribute
15143 can make operands unaligned. */
15144 if (can_create_pseudo_p ()
15145 && SSE_REG_MODE_P (mode)
15146 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
15147 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
15148 {
15149 rtx tmp[2];
15150
15151 /* ix86_expand_vector_move_misalign() does not like constants ... */
15152 if (CONSTANT_P (op1)
15153 || (GET_CODE (op1) == SUBREG
15154 && CONSTANT_P (SUBREG_REG (op1))))
15155 op1 = validize_mem (force_const_mem (mode, op1));
15156
15157 /* ... nor both arguments in memory. */
15158 if (!register_operand (op0, mode)
15159 && !register_operand (op1, mode))
15160 op1 = force_reg (mode, op1);
15161
15162 tmp[0] = op0; tmp[1] = op1;
15163 ix86_expand_vector_move_misalign (mode, tmp);
15164 return;
15165 }
15166
15167 /* Make operand1 a register if it isn't already. */
15168 if (can_create_pseudo_p ()
15169 && !register_operand (op0, mode)
15170 && !register_operand (op1, mode))
15171 {
15172 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
15173 return;
15174 }
15175
15176 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
15177 }
15178
15179 /* Split 32-byte AVX unaligned load and store if needed. */
15180
15181 static void
15182 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
15183 {
15184 rtx m;
15185 rtx (*extract) (rtx, rtx, rtx);
15186 rtx (*move_unaligned) (rtx, rtx);
15187 enum machine_mode mode;
15188
15189 switch (GET_MODE (op0))
15190 {
15191 default:
15192 gcc_unreachable ();
15193 case V32QImode:
15194 extract = gen_avx_vextractf128v32qi;
15195 move_unaligned = gen_avx_movdqu256;
15196 mode = V16QImode;
15197 break;
15198 case V8SFmode:
15199 extract = gen_avx_vextractf128v8sf;
15200 move_unaligned = gen_avx_movups256;
15201 mode = V4SFmode;
15202 break;
15203 case V4DFmode:
15204 extract = gen_avx_vextractf128v4df;
15205 move_unaligned = gen_avx_movupd256;
15206 mode = V2DFmode;
15207 break;
15208 }
15209
15210 if (MEM_P (op1) && TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
15211 {
15212 rtx r = gen_reg_rtx (mode);
15213 m = adjust_address (op1, mode, 0);
15214 emit_move_insn (r, m);
15215 m = adjust_address (op1, mode, 16);
15216 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
15217 emit_move_insn (op0, r);
15218 }
15219 else if (MEM_P (op0) && TARGET_AVX256_SPLIT_UNALIGNED_STORE)
15220 {
15221 m = adjust_address (op0, mode, 0);
15222 emit_insn (extract (m, op1, const0_rtx));
15223 m = adjust_address (op0, mode, 16);
15224 emit_insn (extract (m, op1, const1_rtx));
15225 }
15226 else
15227 emit_insn (move_unaligned (op0, op1));
15228 }
15229
15230 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
15231 straight to ix86_expand_vector_move. */
15232 /* Code generation for scalar reg-reg moves of single and double precision data:
15233 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
15234 movaps reg, reg
15235 else
15236 movss reg, reg
15237 if (x86_sse_partial_reg_dependency == true)
15238 movapd reg, reg
15239 else
15240 movsd reg, reg
15241
15242 Code generation for scalar loads of double precision data:
15243 if (x86_sse_split_regs == true)
15244 movlpd mem, reg (gas syntax)
15245 else
15246 movsd mem, reg
15247
15248 Code generation for unaligned packed loads of single precision data
15249 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
15250 if (x86_sse_unaligned_move_optimal)
15251 movups mem, reg
15252
15253 if (x86_sse_partial_reg_dependency == true)
15254 {
15255 xorps reg, reg
15256 movlps mem, reg
15257 movhps mem+8, reg
15258 }
15259 else
15260 {
15261 movlps mem, reg
15262 movhps mem+8, reg
15263 }
15264
15265 Code generation for unaligned packed loads of double precision data
15266 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
15267 if (x86_sse_unaligned_move_optimal)
15268 movupd mem, reg
15269
15270 if (x86_sse_split_regs == true)
15271 {
15272 movlpd mem, reg
15273 movhpd mem+8, reg
15274 }
15275 else
15276 {
15277 movsd mem, reg
15278 movhpd mem+8, reg
15279 }
15280 */
15281
15282 void
15283 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
15284 {
15285 rtx op0, op1, m;
15286
15287 op0 = operands[0];
15288 op1 = operands[1];
15289
15290 if (TARGET_AVX)
15291 {
15292 switch (GET_MODE_CLASS (mode))
15293 {
15294 case MODE_VECTOR_INT:
15295 case MODE_INT:
15296 switch (GET_MODE_SIZE (mode))
15297 {
15298 case 16:
15299 /* If we're optimizing for size, movups is the smallest. */
15300 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15301 {
15302 op0 = gen_lowpart (V4SFmode, op0);
15303 op1 = gen_lowpart (V4SFmode, op1);
15304 emit_insn (gen_sse_movups (op0, op1));
15305 return;
15306 }
15307 op0 = gen_lowpart (V16QImode, op0);
15308 op1 = gen_lowpart (V16QImode, op1);
15309 emit_insn (gen_sse2_movdqu (op0, op1));
15310 break;
15311 case 32:
15312 op0 = gen_lowpart (V32QImode, op0);
15313 op1 = gen_lowpart (V32QImode, op1);
15314 ix86_avx256_split_vector_move_misalign (op0, op1);
15315 break;
15316 default:
15317 gcc_unreachable ();
15318 }
15319 break;
15320 case MODE_VECTOR_FLOAT:
15321 op0 = gen_lowpart (mode, op0);
15322 op1 = gen_lowpart (mode, op1);
15323
15324 switch (mode)
15325 {
15326 case V4SFmode:
15327 emit_insn (gen_sse_movups (op0, op1));
15328 break;
15329 case V8SFmode:
15330 ix86_avx256_split_vector_move_misalign (op0, op1);
15331 break;
15332 case V2DFmode:
15333 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15334 {
15335 op0 = gen_lowpart (V4SFmode, op0);
15336 op1 = gen_lowpart (V4SFmode, op1);
15337 emit_insn (gen_sse_movups (op0, op1));
15338 return;
15339 }
15340 emit_insn (gen_sse2_movupd (op0, op1));
15341 break;
15342 case V4DFmode:
15343 ix86_avx256_split_vector_move_misalign (op0, op1);
15344 break;
15345 default:
15346 gcc_unreachable ();
15347 }
15348 break;
15349
15350 default:
15351 gcc_unreachable ();
15352 }
15353
15354 return;
15355 }
15356
15357 if (MEM_P (op1))
15358 {
15359 /* If we're optimizing for size, movups is the smallest. */
15360 if (optimize_insn_for_size_p ()
15361 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15362 {
15363 op0 = gen_lowpart (V4SFmode, op0);
15364 op1 = gen_lowpart (V4SFmode, op1);
15365 emit_insn (gen_sse_movups (op0, op1));
15366 return;
15367 }
15368
15369 /* ??? If we have typed data, then it would appear that using
15370 movdqu is the only way to get unaligned data loaded with
15371 integer type. */
15372 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15373 {
15374 op0 = gen_lowpart (V16QImode, op0);
15375 op1 = gen_lowpart (V16QImode, op1);
15376 emit_insn (gen_sse2_movdqu (op0, op1));
15377 return;
15378 }
15379
15380 if (TARGET_SSE2 && mode == V2DFmode)
15381 {
15382 rtx zero;
15383
15384 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
15385 {
15386 op0 = gen_lowpart (V2DFmode, op0);
15387 op1 = gen_lowpart (V2DFmode, op1);
15388 emit_insn (gen_sse2_movupd (op0, op1));
15389 return;
15390 }
15391
15392 /* When SSE registers are split into halves, we can avoid
15393 writing to the top half twice. */
15394 if (TARGET_SSE_SPLIT_REGS)
15395 {
15396 emit_clobber (op0);
15397 zero = op0;
15398 }
15399 else
15400 {
15401 /* ??? Not sure about the best option for the Intel chips.
15402 The following would seem to satisfy; the register is
15403 entirely cleared, breaking the dependency chain. We
15404 then store to the upper half, with a dependency depth
15405 of one. A rumor has it that Intel recommends two movsd
15406 followed by an unpacklpd, but this is unconfirmed. And
15407 given that the dependency depth of the unpacklpd would
15408 still be one, I'm not sure why this would be better. */
15409 zero = CONST0_RTX (V2DFmode);
15410 }
15411
15412 m = adjust_address (op1, DFmode, 0);
15413 emit_insn (gen_sse2_loadlpd (op0, zero, m));
15414 m = adjust_address (op1, DFmode, 8);
15415 emit_insn (gen_sse2_loadhpd (op0, op0, m));
15416 }
15417 else
15418 {
15419 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
15420 {
15421 op0 = gen_lowpart (V4SFmode, op0);
15422 op1 = gen_lowpart (V4SFmode, op1);
15423 emit_insn (gen_sse_movups (op0, op1));
15424 return;
15425 }
15426
15427 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
15428 emit_move_insn (op0, CONST0_RTX (mode));
15429 else
15430 emit_clobber (op0);
15431
15432 if (mode != V4SFmode)
15433 op0 = gen_lowpart (V4SFmode, op0);
15434 m = adjust_address (op1, V2SFmode, 0);
15435 emit_insn (gen_sse_loadlps (op0, op0, m));
15436 m = adjust_address (op1, V2SFmode, 8);
15437 emit_insn (gen_sse_loadhps (op0, op0, m));
15438 }
15439 }
15440 else if (MEM_P (op0))
15441 {
15442 /* If we're optimizing for size, movups is the smallest. */
15443 if (optimize_insn_for_size_p ()
15444 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15445 {
15446 op0 = gen_lowpart (V4SFmode, op0);
15447 op1 = gen_lowpart (V4SFmode, op1);
15448 emit_insn (gen_sse_movups (op0, op1));
15449 return;
15450 }
15451
15452 /* ??? Similar to above, only less clear because of quote
15453 typeless stores unquote. */
15454 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
15455 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15456 {
15457 op0 = gen_lowpart (V16QImode, op0);
15458 op1 = gen_lowpart (V16QImode, op1);
15459 emit_insn (gen_sse2_movdqu (op0, op1));
15460 return;
15461 }
15462
15463 if (TARGET_SSE2 && mode == V2DFmode)
15464 {
15465 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
15466 {
15467 op0 = gen_lowpart (V2DFmode, op0);
15468 op1 = gen_lowpart (V2DFmode, op1);
15469 emit_insn (gen_sse2_movupd (op0, op1));
15470 }
15471 else
15472 {
15473 m = adjust_address (op0, DFmode, 0);
15474 emit_insn (gen_sse2_storelpd (m, op1));
15475 m = adjust_address (op0, DFmode, 8);
15476 emit_insn (gen_sse2_storehpd (m, op1));
15477 }
15478 }
15479 else
15480 {
15481 if (mode != V4SFmode)
15482 op1 = gen_lowpart (V4SFmode, op1);
15483
15484 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
15485 {
15486 op0 = gen_lowpart (V4SFmode, op0);
15487 emit_insn (gen_sse_movups (op0, op1));
15488 }
15489 else
15490 {
15491 m = adjust_address (op0, V2SFmode, 0);
15492 emit_insn (gen_sse_storelps (m, op1));
15493 m = adjust_address (op0, V2SFmode, 8);
15494 emit_insn (gen_sse_storehps (m, op1));
15495 }
15496 }
15497 }
15498 else
15499 gcc_unreachable ();
15500 }
15501
15502 /* Expand a push in MODE. This is some mode for which we do not support
15503 proper push instructions, at least from the registers that we expect
15504 the value to live in. */
15505
15506 void
15507 ix86_expand_push (enum machine_mode mode, rtx x)
15508 {
15509 rtx tmp;
15510
15511 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
15512 GEN_INT (-GET_MODE_SIZE (mode)),
15513 stack_pointer_rtx, 1, OPTAB_DIRECT);
15514 if (tmp != stack_pointer_rtx)
15515 emit_move_insn (stack_pointer_rtx, tmp);
15516
15517 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
15518
15519 /* When we push an operand onto stack, it has to be aligned at least
15520 at the function argument boundary. However since we don't have
15521 the argument type, we can't determine the actual argument
15522 boundary. */
15523 emit_move_insn (tmp, x);
15524 }
15525
15526 /* Helper function of ix86_fixup_binary_operands to canonicalize
15527 operand order. Returns true if the operands should be swapped. */
15528
15529 static bool
15530 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
15531 rtx operands[])
15532 {
15533 rtx dst = operands[0];
15534 rtx src1 = operands[1];
15535 rtx src2 = operands[2];
15536
15537 /* If the operation is not commutative, we can't do anything. */
15538 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
15539 return false;
15540
15541 /* Highest priority is that src1 should match dst. */
15542 if (rtx_equal_p (dst, src1))
15543 return false;
15544 if (rtx_equal_p (dst, src2))
15545 return true;
15546
15547 /* Next highest priority is that immediate constants come second. */
15548 if (immediate_operand (src2, mode))
15549 return false;
15550 if (immediate_operand (src1, mode))
15551 return true;
15552
15553 /* Lowest priority is that memory references should come second. */
15554 if (MEM_P (src2))
15555 return false;
15556 if (MEM_P (src1))
15557 return true;
15558
15559 return false;
15560 }
15561
15562
15563 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
15564 destination to use for the operation. If different from the true
15565 destination in operands[0], a copy operation will be required. */
15566
15567 rtx
15568 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
15569 rtx operands[])
15570 {
15571 rtx dst = operands[0];
15572 rtx src1 = operands[1];
15573 rtx src2 = operands[2];
15574
15575 /* Canonicalize operand order. */
15576 if (ix86_swap_binary_operands_p (code, mode, operands))
15577 {
15578 rtx temp;
15579
15580 /* It is invalid to swap operands of different modes. */
15581 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
15582
15583 temp = src1;
15584 src1 = src2;
15585 src2 = temp;
15586 }
15587
15588 /* Both source operands cannot be in memory. */
15589 if (MEM_P (src1) && MEM_P (src2))
15590 {
15591 /* Optimization: Only read from memory once. */
15592 if (rtx_equal_p (src1, src2))
15593 {
15594 src2 = force_reg (mode, src2);
15595 src1 = src2;
15596 }
15597 else
15598 src2 = force_reg (mode, src2);
15599 }
15600
15601 /* If the destination is memory, and we do not have matching source
15602 operands, do things in registers. */
15603 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
15604 dst = gen_reg_rtx (mode);
15605
15606 /* Source 1 cannot be a constant. */
15607 if (CONSTANT_P (src1))
15608 src1 = force_reg (mode, src1);
15609
15610 /* Source 1 cannot be a non-matching memory. */
15611 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
15612 src1 = force_reg (mode, src1);
15613
15614 operands[1] = src1;
15615 operands[2] = src2;
15616 return dst;
15617 }
15618
15619 /* Similarly, but assume that the destination has already been
15620 set up properly. */
15621
15622 void
15623 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
15624 enum machine_mode mode, rtx operands[])
15625 {
15626 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
15627 gcc_assert (dst == operands[0]);
15628 }
15629
15630 /* Attempt to expand a binary operator. Make the expansion closer to the
15631 actual machine, then just general_operand, which will allow 3 separate
15632 memory references (one output, two input) in a single insn. */
15633
15634 void
15635 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
15636 rtx operands[])
15637 {
15638 rtx src1, src2, dst, op, clob;
15639
15640 dst = ix86_fixup_binary_operands (code, mode, operands);
15641 src1 = operands[1];
15642 src2 = operands[2];
15643
15644 /* Emit the instruction. */
15645
15646 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
15647 if (reload_in_progress)
15648 {
15649 /* Reload doesn't know about the flags register, and doesn't know that
15650 it doesn't want to clobber it. We can only do this with PLUS. */
15651 gcc_assert (code == PLUS);
15652 emit_insn (op);
15653 }
15654 else if (reload_completed
15655 && code == PLUS
15656 && !rtx_equal_p (dst, src1))
15657 {
15658 /* This is going to be an LEA; avoid splitting it later. */
15659 emit_insn (op);
15660 }
15661 else
15662 {
15663 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15664 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
15665 }
15666
15667 /* Fix up the destination if needed. */
15668 if (dst != operands[0])
15669 emit_move_insn (operands[0], dst);
15670 }
15671
15672 /* Return TRUE or FALSE depending on whether the binary operator meets the
15673 appropriate constraints. */
15674
15675 bool
15676 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
15677 rtx operands[3])
15678 {
15679 rtx dst = operands[0];
15680 rtx src1 = operands[1];
15681 rtx src2 = operands[2];
15682
15683 /* Both source operands cannot be in memory. */
15684 if (MEM_P (src1) && MEM_P (src2))
15685 return false;
15686
15687 /* Canonicalize operand order for commutative operators. */
15688 if (ix86_swap_binary_operands_p (code, mode, operands))
15689 {
15690 rtx temp = src1;
15691 src1 = src2;
15692 src2 = temp;
15693 }
15694
15695 /* If the destination is memory, we must have a matching source operand. */
15696 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
15697 return false;
15698
15699 /* Source 1 cannot be a constant. */
15700 if (CONSTANT_P (src1))
15701 return false;
15702
15703 /* Source 1 cannot be a non-matching memory. */
15704 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
15705 {
15706 /* Support "andhi/andsi/anddi" as a zero-extending move. */
15707 return (code == AND
15708 && (mode == HImode
15709 || mode == SImode
15710 || (TARGET_64BIT && mode == DImode))
15711 && CONST_INT_P (src2)
15712 && (INTVAL (src2) == 0xff
15713 || INTVAL (src2) == 0xffff));
15714 }
15715
15716 return true;
15717 }
15718
15719 /* Attempt to expand a unary operator. Make the expansion closer to the
15720 actual machine, then just general_operand, which will allow 2 separate
15721 memory references (one output, one input) in a single insn. */
15722
15723 void
15724 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
15725 rtx operands[])
15726 {
15727 int matching_memory;
15728 rtx src, dst, op, clob;
15729
15730 dst = operands[0];
15731 src = operands[1];
15732
15733 /* If the destination is memory, and we do not have matching source
15734 operands, do things in registers. */
15735 matching_memory = 0;
15736 if (MEM_P (dst))
15737 {
15738 if (rtx_equal_p (dst, src))
15739 matching_memory = 1;
15740 else
15741 dst = gen_reg_rtx (mode);
15742 }
15743
15744 /* When source operand is memory, destination must match. */
15745 if (MEM_P (src) && !matching_memory)
15746 src = force_reg (mode, src);
15747
15748 /* Emit the instruction. */
15749
15750 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
15751 if (reload_in_progress || code == NOT)
15752 {
15753 /* Reload doesn't know about the flags register, and doesn't know that
15754 it doesn't want to clobber it. */
15755 gcc_assert (code == NOT);
15756 emit_insn (op);
15757 }
15758 else
15759 {
15760 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15761 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
15762 }
15763
15764 /* Fix up the destination if needed. */
15765 if (dst != operands[0])
15766 emit_move_insn (operands[0], dst);
15767 }
15768
15769 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
15770 divisor are within the range [0-255]. */
15771
15772 void
15773 ix86_split_idivmod (enum machine_mode mode, rtx operands[],
15774 bool signed_p)
15775 {
15776 rtx end_label, qimode_label;
15777 rtx insn, div, mod;
15778 rtx scratch, tmp0, tmp1, tmp2;
15779 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
15780 rtx (*gen_zero_extend) (rtx, rtx);
15781 rtx (*gen_test_ccno_1) (rtx, rtx);
15782
15783 switch (mode)
15784 {
15785 case SImode:
15786 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
15787 gen_test_ccno_1 = gen_testsi_ccno_1;
15788 gen_zero_extend = gen_zero_extendqisi2;
15789 break;
15790 case DImode:
15791 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
15792 gen_test_ccno_1 = gen_testdi_ccno_1;
15793 gen_zero_extend = gen_zero_extendqidi2;
15794 break;
15795 default:
15796 gcc_unreachable ();
15797 }
15798
15799 end_label = gen_label_rtx ();
15800 qimode_label = gen_label_rtx ();
15801
15802 scratch = gen_reg_rtx (mode);
15803
15804 /* Use 8bit unsigned divimod if dividend and divisor are within
15805 the range [0-255]. */
15806 emit_move_insn (scratch, operands[2]);
15807 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
15808 scratch, 1, OPTAB_DIRECT);
15809 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
15810 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
15811 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
15812 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
15813 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
15814 pc_rtx);
15815 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
15816 predict_jump (REG_BR_PROB_BASE * 50 / 100);
15817 JUMP_LABEL (insn) = qimode_label;
15818
15819 /* Generate original signed/unsigned divimod. */
15820 div = gen_divmod4_1 (operands[0], operands[1],
15821 operands[2], operands[3]);
15822 emit_insn (div);
15823
15824 /* Branch to the end. */
15825 emit_jump_insn (gen_jump (end_label));
15826 emit_barrier ();
15827
15828 /* Generate 8bit unsigned divide. */
15829 emit_label (qimode_label);
15830 /* Don't use operands[0] for result of 8bit divide since not all
15831 registers support QImode ZERO_EXTRACT. */
15832 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
15833 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
15834 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
15835 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
15836
15837 if (signed_p)
15838 {
15839 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
15840 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
15841 }
15842 else
15843 {
15844 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
15845 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
15846 }
15847
15848 /* Extract remainder from AH. */
15849 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
15850 if (REG_P (operands[1]))
15851 insn = emit_move_insn (operands[1], tmp1);
15852 else
15853 {
15854 /* Need a new scratch register since the old one has result
15855 of 8bit divide. */
15856 scratch = gen_reg_rtx (mode);
15857 emit_move_insn (scratch, tmp1);
15858 insn = emit_move_insn (operands[1], scratch);
15859 }
15860 set_unique_reg_note (insn, REG_EQUAL, mod);
15861
15862 /* Zero extend quotient from AL. */
15863 tmp1 = gen_lowpart (QImode, tmp0);
15864 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
15865 set_unique_reg_note (insn, REG_EQUAL, div);
15866
15867 emit_label (end_label);
15868 }
15869
15870 #define LEA_SEARCH_THRESHOLD 12
15871
15872 /* Search backward for non-agu definition of register number REGNO1
15873 or register number REGNO2 in INSN's basic block until
15874 1. Pass LEA_SEARCH_THRESHOLD instructions, or
15875 2. Reach BB boundary, or
15876 3. Reach agu definition.
15877 Returns the distance between the non-agu definition point and INSN.
15878 If no definition point, returns -1. */
15879
15880 static int
15881 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
15882 rtx insn)
15883 {
15884 basic_block bb = BLOCK_FOR_INSN (insn);
15885 int distance = 0;
15886 df_ref *def_rec;
15887 enum attr_type insn_type;
15888
15889 if (insn != BB_HEAD (bb))
15890 {
15891 rtx prev = PREV_INSN (insn);
15892 while (prev && distance < LEA_SEARCH_THRESHOLD)
15893 {
15894 if (NONDEBUG_INSN_P (prev))
15895 {
15896 distance++;
15897 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
15898 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
15899 && !DF_REF_IS_ARTIFICIAL (*def_rec)
15900 && (regno1 == DF_REF_REGNO (*def_rec)
15901 || regno2 == DF_REF_REGNO (*def_rec)))
15902 {
15903 insn_type = get_attr_type (prev);
15904 if (insn_type != TYPE_LEA)
15905 goto done;
15906 }
15907 }
15908 if (prev == BB_HEAD (bb))
15909 break;
15910 prev = PREV_INSN (prev);
15911 }
15912 }
15913
15914 if (distance < LEA_SEARCH_THRESHOLD)
15915 {
15916 edge e;
15917 edge_iterator ei;
15918 bool simple_loop = false;
15919
15920 FOR_EACH_EDGE (e, ei, bb->preds)
15921 if (e->src == bb)
15922 {
15923 simple_loop = true;
15924 break;
15925 }
15926
15927 if (simple_loop)
15928 {
15929 rtx prev = BB_END (bb);
15930 while (prev
15931 && prev != insn
15932 && distance < LEA_SEARCH_THRESHOLD)
15933 {
15934 if (NONDEBUG_INSN_P (prev))
15935 {
15936 distance++;
15937 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
15938 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
15939 && !DF_REF_IS_ARTIFICIAL (*def_rec)
15940 && (regno1 == DF_REF_REGNO (*def_rec)
15941 || regno2 == DF_REF_REGNO (*def_rec)))
15942 {
15943 insn_type = get_attr_type (prev);
15944 if (insn_type != TYPE_LEA)
15945 goto done;
15946 }
15947 }
15948 prev = PREV_INSN (prev);
15949 }
15950 }
15951 }
15952
15953 distance = -1;
15954
15955 done:
15956 /* get_attr_type may modify recog data. We want to make sure
15957 that recog data is valid for instruction INSN, on which
15958 distance_non_agu_define is called. INSN is unchanged here. */
15959 extract_insn_cached (insn);
15960 return distance;
15961 }
15962
15963 /* Return the distance between INSN and the next insn that uses
15964 register number REGNO0 in memory address. Return -1 if no such
15965 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
15966
15967 static int
15968 distance_agu_use (unsigned int regno0, rtx insn)
15969 {
15970 basic_block bb = BLOCK_FOR_INSN (insn);
15971 int distance = 0;
15972 df_ref *def_rec;
15973 df_ref *use_rec;
15974
15975 if (insn != BB_END (bb))
15976 {
15977 rtx next = NEXT_INSN (insn);
15978 while (next && distance < LEA_SEARCH_THRESHOLD)
15979 {
15980 if (NONDEBUG_INSN_P (next))
15981 {
15982 distance++;
15983
15984 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
15985 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
15986 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
15987 && regno0 == DF_REF_REGNO (*use_rec))
15988 {
15989 /* Return DISTANCE if OP0 is used in memory
15990 address in NEXT. */
15991 return distance;
15992 }
15993
15994 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
15995 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
15996 && !DF_REF_IS_ARTIFICIAL (*def_rec)
15997 && regno0 == DF_REF_REGNO (*def_rec))
15998 {
15999 /* Return -1 if OP0 is set in NEXT. */
16000 return -1;
16001 }
16002 }
16003 if (next == BB_END (bb))
16004 break;
16005 next = NEXT_INSN (next);
16006 }
16007 }
16008
16009 if (distance < LEA_SEARCH_THRESHOLD)
16010 {
16011 edge e;
16012 edge_iterator ei;
16013 bool simple_loop = false;
16014
16015 FOR_EACH_EDGE (e, ei, bb->succs)
16016 if (e->dest == bb)
16017 {
16018 simple_loop = true;
16019 break;
16020 }
16021
16022 if (simple_loop)
16023 {
16024 rtx next = BB_HEAD (bb);
16025 while (next
16026 && next != insn
16027 && distance < LEA_SEARCH_THRESHOLD)
16028 {
16029 if (NONDEBUG_INSN_P (next))
16030 {
16031 distance++;
16032
16033 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
16034 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
16035 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
16036 && regno0 == DF_REF_REGNO (*use_rec))
16037 {
16038 /* Return DISTANCE if OP0 is used in memory
16039 address in NEXT. */
16040 return distance;
16041 }
16042
16043 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
16044 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
16045 && !DF_REF_IS_ARTIFICIAL (*def_rec)
16046 && regno0 == DF_REF_REGNO (*def_rec))
16047 {
16048 /* Return -1 if OP0 is set in NEXT. */
16049 return -1;
16050 }
16051
16052 }
16053 next = NEXT_INSN (next);
16054 }
16055 }
16056 }
16057
16058 return -1;
16059 }
16060
16061 /* Define this macro to tune LEA priority vs ADD, it take effect when
16062 there is a dilemma of choicing LEA or ADD
16063 Negative value: ADD is more preferred than LEA
16064 Zero: Netrual
16065 Positive value: LEA is more preferred than ADD*/
16066 #define IX86_LEA_PRIORITY 2
16067
16068 /* Return true if it is ok to optimize an ADD operation to LEA
16069 operation to avoid flag register consumation. For most processors,
16070 ADD is faster than LEA. For the processors like ATOM, if the
16071 destination register of LEA holds an actual address which will be
16072 used soon, LEA is better and otherwise ADD is better. */
16073
16074 bool
16075 ix86_lea_for_add_ok (rtx insn, rtx operands[])
16076 {
16077 unsigned int regno0 = true_regnum (operands[0]);
16078 unsigned int regno1 = true_regnum (operands[1]);
16079 unsigned int regno2 = true_regnum (operands[2]);
16080
16081 /* If a = b + c, (a!=b && a!=c), must use lea form. */
16082 if (regno0 != regno1 && regno0 != regno2)
16083 return true;
16084
16085 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
16086 return false;
16087 else
16088 {
16089 int dist_define, dist_use;
16090
16091 /* Return false if REGNO0 isn't used in memory address. */
16092 dist_use = distance_agu_use (regno0, insn);
16093 if (dist_use <= 0)
16094 return false;
16095
16096 dist_define = distance_non_agu_define (regno1, regno2, insn);
16097 if (dist_define <= 0)
16098 return true;
16099
16100 /* If this insn has both backward non-agu dependence and forward
16101 agu dependence, the one with short distance take effect. */
16102 if ((dist_define + IX86_LEA_PRIORITY) < dist_use)
16103 return false;
16104
16105 return true;
16106 }
16107 }
16108
16109 /* Return true if destination reg of SET_BODY is shift count of
16110 USE_BODY. */
16111
16112 static bool
16113 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
16114 {
16115 rtx set_dest;
16116 rtx shift_rtx;
16117 int i;
16118
16119 /* Retrieve destination of SET_BODY. */
16120 switch (GET_CODE (set_body))
16121 {
16122 case SET:
16123 set_dest = SET_DEST (set_body);
16124 if (!set_dest || !REG_P (set_dest))
16125 return false;
16126 break;
16127 case PARALLEL:
16128 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
16129 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
16130 use_body))
16131 return true;
16132 default:
16133 return false;
16134 break;
16135 }
16136
16137 /* Retrieve shift count of USE_BODY. */
16138 switch (GET_CODE (use_body))
16139 {
16140 case SET:
16141 shift_rtx = XEXP (use_body, 1);
16142 break;
16143 case PARALLEL:
16144 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
16145 if (ix86_dep_by_shift_count_body (set_body,
16146 XVECEXP (use_body, 0, i)))
16147 return true;
16148 default:
16149 return false;
16150 break;
16151 }
16152
16153 if (shift_rtx
16154 && (GET_CODE (shift_rtx) == ASHIFT
16155 || GET_CODE (shift_rtx) == LSHIFTRT
16156 || GET_CODE (shift_rtx) == ASHIFTRT
16157 || GET_CODE (shift_rtx) == ROTATE
16158 || GET_CODE (shift_rtx) == ROTATERT))
16159 {
16160 rtx shift_count = XEXP (shift_rtx, 1);
16161
16162 /* Return true if shift count is dest of SET_BODY. */
16163 if (REG_P (shift_count)
16164 && true_regnum (set_dest) == true_regnum (shift_count))
16165 return true;
16166 }
16167
16168 return false;
16169 }
16170
16171 /* Return true if destination reg of SET_INSN is shift count of
16172 USE_INSN. */
16173
16174 bool
16175 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
16176 {
16177 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
16178 PATTERN (use_insn));
16179 }
16180
16181 /* Return TRUE or FALSE depending on whether the unary operator meets the
16182 appropriate constraints. */
16183
16184 bool
16185 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
16186 enum machine_mode mode ATTRIBUTE_UNUSED,
16187 rtx operands[2] ATTRIBUTE_UNUSED)
16188 {
16189 /* If one of operands is memory, source and destination must match. */
16190 if ((MEM_P (operands[0])
16191 || MEM_P (operands[1]))
16192 && ! rtx_equal_p (operands[0], operands[1]))
16193 return false;
16194 return true;
16195 }
16196
16197 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
16198 are ok, keeping in mind the possible movddup alternative. */
16199
16200 bool
16201 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
16202 {
16203 if (MEM_P (operands[0]))
16204 return rtx_equal_p (operands[0], operands[1 + high]);
16205 if (MEM_P (operands[1]) && MEM_P (operands[2]))
16206 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
16207 return true;
16208 }
16209
16210 /* Post-reload splitter for converting an SF or DFmode value in an
16211 SSE register into an unsigned SImode. */
16212
16213 void
16214 ix86_split_convert_uns_si_sse (rtx operands[])
16215 {
16216 enum machine_mode vecmode;
16217 rtx value, large, zero_or_two31, input, two31, x;
16218
16219 large = operands[1];
16220 zero_or_two31 = operands[2];
16221 input = operands[3];
16222 two31 = operands[4];
16223 vecmode = GET_MODE (large);
16224 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
16225
16226 /* Load up the value into the low element. We must ensure that the other
16227 elements are valid floats -- zero is the easiest such value. */
16228 if (MEM_P (input))
16229 {
16230 if (vecmode == V4SFmode)
16231 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
16232 else
16233 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
16234 }
16235 else
16236 {
16237 input = gen_rtx_REG (vecmode, REGNO (input));
16238 emit_move_insn (value, CONST0_RTX (vecmode));
16239 if (vecmode == V4SFmode)
16240 emit_insn (gen_sse_movss (value, value, input));
16241 else
16242 emit_insn (gen_sse2_movsd (value, value, input));
16243 }
16244
16245 emit_move_insn (large, two31);
16246 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
16247
16248 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
16249 emit_insn (gen_rtx_SET (VOIDmode, large, x));
16250
16251 x = gen_rtx_AND (vecmode, zero_or_two31, large);
16252 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
16253
16254 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
16255 emit_insn (gen_rtx_SET (VOIDmode, value, x));
16256
16257 large = gen_rtx_REG (V4SImode, REGNO (large));
16258 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
16259
16260 x = gen_rtx_REG (V4SImode, REGNO (value));
16261 if (vecmode == V4SFmode)
16262 emit_insn (gen_sse2_cvttps2dq (x, value));
16263 else
16264 emit_insn (gen_sse2_cvttpd2dq (x, value));
16265 value = x;
16266
16267 emit_insn (gen_xorv4si3 (value, value, large));
16268 }
16269
16270 /* Convert an unsigned DImode value into a DFmode, using only SSE.
16271 Expects the 64-bit DImode to be supplied in a pair of integral
16272 registers. Requires SSE2; will use SSE3 if available. For x86_32,
16273 -mfpmath=sse, !optimize_size only. */
16274
16275 void
16276 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
16277 {
16278 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
16279 rtx int_xmm, fp_xmm;
16280 rtx biases, exponents;
16281 rtx x;
16282
16283 int_xmm = gen_reg_rtx (V4SImode);
16284 if (TARGET_INTER_UNIT_MOVES)
16285 emit_insn (gen_movdi_to_sse (int_xmm, input));
16286 else if (TARGET_SSE_SPLIT_REGS)
16287 {
16288 emit_clobber (int_xmm);
16289 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
16290 }
16291 else
16292 {
16293 x = gen_reg_rtx (V2DImode);
16294 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
16295 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
16296 }
16297
16298 x = gen_rtx_CONST_VECTOR (V4SImode,
16299 gen_rtvec (4, GEN_INT (0x43300000UL),
16300 GEN_INT (0x45300000UL),
16301 const0_rtx, const0_rtx));
16302 exponents = validize_mem (force_const_mem (V4SImode, x));
16303
16304 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
16305 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
16306
16307 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
16308 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
16309 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
16310 (0x1.0p84 + double(fp_value_hi_xmm)).
16311 Note these exponents differ by 32. */
16312
16313 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
16314
16315 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
16316 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
16317 real_ldexp (&bias_lo_rvt, &dconst1, 52);
16318 real_ldexp (&bias_hi_rvt, &dconst1, 84);
16319 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
16320 x = const_double_from_real_value (bias_hi_rvt, DFmode);
16321 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
16322 biases = validize_mem (force_const_mem (V2DFmode, biases));
16323 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
16324
16325 /* Add the upper and lower DFmode values together. */
16326 if (TARGET_SSE3)
16327 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
16328 else
16329 {
16330 x = copy_to_mode_reg (V2DFmode, fp_xmm);
16331 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
16332 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
16333 }
16334
16335 ix86_expand_vector_extract (false, target, fp_xmm, 0);
16336 }
16337
16338 /* Not used, but eases macroization of patterns. */
16339 void
16340 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
16341 rtx input ATTRIBUTE_UNUSED)
16342 {
16343 gcc_unreachable ();
16344 }
16345
16346 /* Convert an unsigned SImode value into a DFmode. Only currently used
16347 for SSE, but applicable anywhere. */
16348
16349 void
16350 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
16351 {
16352 REAL_VALUE_TYPE TWO31r;
16353 rtx x, fp;
16354
16355 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
16356 NULL, 1, OPTAB_DIRECT);
16357
16358 fp = gen_reg_rtx (DFmode);
16359 emit_insn (gen_floatsidf2 (fp, x));
16360
16361 real_ldexp (&TWO31r, &dconst1, 31);
16362 x = const_double_from_real_value (TWO31r, DFmode);
16363
16364 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
16365 if (x != target)
16366 emit_move_insn (target, x);
16367 }
16368
16369 /* Convert a signed DImode value into a DFmode. Only used for SSE in
16370 32-bit mode; otherwise we have a direct convert instruction. */
16371
16372 void
16373 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
16374 {
16375 REAL_VALUE_TYPE TWO32r;
16376 rtx fp_lo, fp_hi, x;
16377
16378 fp_lo = gen_reg_rtx (DFmode);
16379 fp_hi = gen_reg_rtx (DFmode);
16380
16381 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
16382
16383 real_ldexp (&TWO32r, &dconst1, 32);
16384 x = const_double_from_real_value (TWO32r, DFmode);
16385 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
16386
16387 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
16388
16389 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
16390 0, OPTAB_DIRECT);
16391 if (x != target)
16392 emit_move_insn (target, x);
16393 }
16394
16395 /* Convert an unsigned SImode value into a SFmode, using only SSE.
16396 For x86_32, -mfpmath=sse, !optimize_size only. */
16397 void
16398 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
16399 {
16400 REAL_VALUE_TYPE ONE16r;
16401 rtx fp_hi, fp_lo, int_hi, int_lo, x;
16402
16403 real_ldexp (&ONE16r, &dconst1, 16);
16404 x = const_double_from_real_value (ONE16r, SFmode);
16405 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
16406 NULL, 0, OPTAB_DIRECT);
16407 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
16408 NULL, 0, OPTAB_DIRECT);
16409 fp_hi = gen_reg_rtx (SFmode);
16410 fp_lo = gen_reg_rtx (SFmode);
16411 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
16412 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
16413 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
16414 0, OPTAB_DIRECT);
16415 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
16416 0, OPTAB_DIRECT);
16417 if (!rtx_equal_p (target, fp_hi))
16418 emit_move_insn (target, fp_hi);
16419 }
16420
16421 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
16422 then replicate the value for all elements of the vector
16423 register. */
16424
16425 rtx
16426 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
16427 {
16428 rtvec v;
16429 switch (mode)
16430 {
16431 case V4SImode:
16432 gcc_assert (vect);
16433 v = gen_rtvec (4, value, value, value, value);
16434 return gen_rtx_CONST_VECTOR (V4SImode, v);
16435
16436 case V2DImode:
16437 gcc_assert (vect);
16438 v = gen_rtvec (2, value, value);
16439 return gen_rtx_CONST_VECTOR (V2DImode, v);
16440
16441 case V8SFmode:
16442 if (vect)
16443 v = gen_rtvec (8, value, value, value, value,
16444 value, value, value, value);
16445 else
16446 v = gen_rtvec (8, value, CONST0_RTX (SFmode),
16447 CONST0_RTX (SFmode), CONST0_RTX (SFmode),
16448 CONST0_RTX (SFmode), CONST0_RTX (SFmode),
16449 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
16450 return gen_rtx_CONST_VECTOR (V8SFmode, v);
16451
16452 case V4SFmode:
16453 if (vect)
16454 v = gen_rtvec (4, value, value, value, value);
16455 else
16456 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
16457 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
16458 return gen_rtx_CONST_VECTOR (V4SFmode, v);
16459
16460 case V4DFmode:
16461 if (vect)
16462 v = gen_rtvec (4, value, value, value, value);
16463 else
16464 v = gen_rtvec (4, value, CONST0_RTX (DFmode),
16465 CONST0_RTX (DFmode), CONST0_RTX (DFmode));
16466 return gen_rtx_CONST_VECTOR (V4DFmode, v);
16467
16468 case V2DFmode:
16469 if (vect)
16470 v = gen_rtvec (2, value, value);
16471 else
16472 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
16473 return gen_rtx_CONST_VECTOR (V2DFmode, v);
16474
16475 default:
16476 gcc_unreachable ();
16477 }
16478 }
16479
16480 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
16481 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
16482 for an SSE register. If VECT is true, then replicate the mask for
16483 all elements of the vector register. If INVERT is true, then create
16484 a mask excluding the sign bit. */
16485
16486 rtx
16487 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
16488 {
16489 enum machine_mode vec_mode, imode;
16490 HOST_WIDE_INT hi, lo;
16491 int shift = 63;
16492 rtx v;
16493 rtx mask;
16494
16495 /* Find the sign bit, sign extended to 2*HWI. */
16496 switch (mode)
16497 {
16498 case V4SImode:
16499 case V8SFmode:
16500 case V4SFmode:
16501 vec_mode = mode;
16502 mode = GET_MODE_INNER (mode);
16503 imode = SImode;
16504 lo = 0x80000000, hi = lo < 0;
16505 break;
16506
16507 case V2DImode:
16508 case V4DFmode:
16509 case V2DFmode:
16510 vec_mode = mode;
16511 mode = GET_MODE_INNER (mode);
16512 imode = DImode;
16513 if (HOST_BITS_PER_WIDE_INT >= 64)
16514 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
16515 else
16516 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
16517 break;
16518
16519 case TImode:
16520 case TFmode:
16521 vec_mode = VOIDmode;
16522 if (HOST_BITS_PER_WIDE_INT >= 64)
16523 {
16524 imode = TImode;
16525 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
16526 }
16527 else
16528 {
16529 rtvec vec;
16530
16531 imode = DImode;
16532 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
16533
16534 if (invert)
16535 {
16536 lo = ~lo, hi = ~hi;
16537 v = constm1_rtx;
16538 }
16539 else
16540 v = const0_rtx;
16541
16542 mask = immed_double_const (lo, hi, imode);
16543
16544 vec = gen_rtvec (2, v, mask);
16545 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
16546 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
16547
16548 return v;
16549 }
16550 break;
16551
16552 default:
16553 gcc_unreachable ();
16554 }
16555
16556 if (invert)
16557 lo = ~lo, hi = ~hi;
16558
16559 /* Force this value into the low part of a fp vector constant. */
16560 mask = immed_double_const (lo, hi, imode);
16561 mask = gen_lowpart (mode, mask);
16562
16563 if (vec_mode == VOIDmode)
16564 return force_reg (mode, mask);
16565
16566 v = ix86_build_const_vector (vec_mode, vect, mask);
16567 return force_reg (vec_mode, v);
16568 }
16569
16570 /* Generate code for floating point ABS or NEG. */
16571
16572 void
16573 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
16574 rtx operands[])
16575 {
16576 rtx mask, set, dst, src;
16577 bool use_sse = false;
16578 bool vector_mode = VECTOR_MODE_P (mode);
16579 enum machine_mode vmode = mode;
16580
16581 if (vector_mode)
16582 use_sse = true;
16583 else if (mode == TFmode)
16584 use_sse = true;
16585 else if (TARGET_SSE_MATH)
16586 {
16587 use_sse = SSE_FLOAT_MODE_P (mode);
16588 if (mode == SFmode)
16589 vmode = V4SFmode;
16590 else if (mode == DFmode)
16591 vmode = V2DFmode;
16592 }
16593
16594 /* NEG and ABS performed with SSE use bitwise mask operations.
16595 Create the appropriate mask now. */
16596 if (use_sse)
16597 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
16598 else
16599 mask = NULL_RTX;
16600
16601 dst = operands[0];
16602 src = operands[1];
16603
16604 set = gen_rtx_fmt_e (code, mode, src);
16605 set = gen_rtx_SET (VOIDmode, dst, set);
16606
16607 if (mask)
16608 {
16609 rtx use, clob;
16610 rtvec par;
16611
16612 use = gen_rtx_USE (VOIDmode, mask);
16613 if (vector_mode)
16614 par = gen_rtvec (2, set, use);
16615 else
16616 {
16617 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16618 par = gen_rtvec (3, set, use, clob);
16619 }
16620 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
16621 }
16622 else
16623 emit_insn (set);
16624 }
16625
16626 /* Expand a copysign operation. Special case operand 0 being a constant. */
16627
16628 void
16629 ix86_expand_copysign (rtx operands[])
16630 {
16631 enum machine_mode mode, vmode;
16632 rtx dest, op0, op1, mask, nmask;
16633
16634 dest = operands[0];
16635 op0 = operands[1];
16636 op1 = operands[2];
16637
16638 mode = GET_MODE (dest);
16639
16640 if (mode == SFmode)
16641 vmode = V4SFmode;
16642 else if (mode == DFmode)
16643 vmode = V2DFmode;
16644 else
16645 vmode = mode;
16646
16647 if (GET_CODE (op0) == CONST_DOUBLE)
16648 {
16649 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
16650
16651 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
16652 op0 = simplify_unary_operation (ABS, mode, op0, mode);
16653
16654 if (mode == SFmode || mode == DFmode)
16655 {
16656 if (op0 == CONST0_RTX (mode))
16657 op0 = CONST0_RTX (vmode);
16658 else
16659 {
16660 rtx v = ix86_build_const_vector (vmode, false, op0);
16661
16662 op0 = force_reg (vmode, v);
16663 }
16664 }
16665 else if (op0 != CONST0_RTX (mode))
16666 op0 = force_reg (mode, op0);
16667
16668 mask = ix86_build_signbit_mask (vmode, 0, 0);
16669
16670 if (mode == SFmode)
16671 copysign_insn = gen_copysignsf3_const;
16672 else if (mode == DFmode)
16673 copysign_insn = gen_copysigndf3_const;
16674 else
16675 copysign_insn = gen_copysigntf3_const;
16676
16677 emit_insn (copysign_insn (dest, op0, op1, mask));
16678 }
16679 else
16680 {
16681 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
16682
16683 nmask = ix86_build_signbit_mask (vmode, 0, 1);
16684 mask = ix86_build_signbit_mask (vmode, 0, 0);
16685
16686 if (mode == SFmode)
16687 copysign_insn = gen_copysignsf3_var;
16688 else if (mode == DFmode)
16689 copysign_insn = gen_copysigndf3_var;
16690 else
16691 copysign_insn = gen_copysigntf3_var;
16692
16693 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
16694 }
16695 }
16696
16697 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
16698 be a constant, and so has already been expanded into a vector constant. */
16699
16700 void
16701 ix86_split_copysign_const (rtx operands[])
16702 {
16703 enum machine_mode mode, vmode;
16704 rtx dest, op0, mask, x;
16705
16706 dest = operands[0];
16707 op0 = operands[1];
16708 mask = operands[3];
16709
16710 mode = GET_MODE (dest);
16711 vmode = GET_MODE (mask);
16712
16713 dest = simplify_gen_subreg (vmode, dest, mode, 0);
16714 x = gen_rtx_AND (vmode, dest, mask);
16715 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16716
16717 if (op0 != CONST0_RTX (vmode))
16718 {
16719 x = gen_rtx_IOR (vmode, dest, op0);
16720 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16721 }
16722 }
16723
16724 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
16725 so we have to do two masks. */
16726
16727 void
16728 ix86_split_copysign_var (rtx operands[])
16729 {
16730 enum machine_mode mode, vmode;
16731 rtx dest, scratch, op0, op1, mask, nmask, x;
16732
16733 dest = operands[0];
16734 scratch = operands[1];
16735 op0 = operands[2];
16736 op1 = operands[3];
16737 nmask = operands[4];
16738 mask = operands[5];
16739
16740 mode = GET_MODE (dest);
16741 vmode = GET_MODE (mask);
16742
16743 if (rtx_equal_p (op0, op1))
16744 {
16745 /* Shouldn't happen often (it's useless, obviously), but when it does
16746 we'd generate incorrect code if we continue below. */
16747 emit_move_insn (dest, op0);
16748 return;
16749 }
16750
16751 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
16752 {
16753 gcc_assert (REGNO (op1) == REGNO (scratch));
16754
16755 x = gen_rtx_AND (vmode, scratch, mask);
16756 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
16757
16758 dest = mask;
16759 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
16760 x = gen_rtx_NOT (vmode, dest);
16761 x = gen_rtx_AND (vmode, x, op0);
16762 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16763 }
16764 else
16765 {
16766 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
16767 {
16768 x = gen_rtx_AND (vmode, scratch, mask);
16769 }
16770 else /* alternative 2,4 */
16771 {
16772 gcc_assert (REGNO (mask) == REGNO (scratch));
16773 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
16774 x = gen_rtx_AND (vmode, scratch, op1);
16775 }
16776 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
16777
16778 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
16779 {
16780 dest = simplify_gen_subreg (vmode, op0, mode, 0);
16781 x = gen_rtx_AND (vmode, dest, nmask);
16782 }
16783 else /* alternative 3,4 */
16784 {
16785 gcc_assert (REGNO (nmask) == REGNO (dest));
16786 dest = nmask;
16787 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
16788 x = gen_rtx_AND (vmode, dest, op0);
16789 }
16790 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16791 }
16792
16793 x = gen_rtx_IOR (vmode, dest, scratch);
16794 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16795 }
16796
16797 /* Return TRUE or FALSE depending on whether the first SET in INSN
16798 has source and destination with matching CC modes, and that the
16799 CC mode is at least as constrained as REQ_MODE. */
16800
16801 bool
16802 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
16803 {
16804 rtx set;
16805 enum machine_mode set_mode;
16806
16807 set = PATTERN (insn);
16808 if (GET_CODE (set) == PARALLEL)
16809 set = XVECEXP (set, 0, 0);
16810 gcc_assert (GET_CODE (set) == SET);
16811 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
16812
16813 set_mode = GET_MODE (SET_DEST (set));
16814 switch (set_mode)
16815 {
16816 case CCNOmode:
16817 if (req_mode != CCNOmode
16818 && (req_mode != CCmode
16819 || XEXP (SET_SRC (set), 1) != const0_rtx))
16820 return false;
16821 break;
16822 case CCmode:
16823 if (req_mode == CCGCmode)
16824 return false;
16825 /* FALLTHRU */
16826 case CCGCmode:
16827 if (req_mode == CCGOCmode || req_mode == CCNOmode)
16828 return false;
16829 /* FALLTHRU */
16830 case CCGOCmode:
16831 if (req_mode == CCZmode)
16832 return false;
16833 /* FALLTHRU */
16834 case CCZmode:
16835 break;
16836
16837 case CCAmode:
16838 case CCCmode:
16839 case CCOmode:
16840 case CCSmode:
16841 if (set_mode != req_mode)
16842 return false;
16843 break;
16844
16845 default:
16846 gcc_unreachable ();
16847 }
16848
16849 return GET_MODE (SET_SRC (set)) == set_mode;
16850 }
16851
16852 /* Generate insn patterns to do an integer compare of OPERANDS. */
16853
16854 static rtx
16855 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
16856 {
16857 enum machine_mode cmpmode;
16858 rtx tmp, flags;
16859
16860 cmpmode = SELECT_CC_MODE (code, op0, op1);
16861 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
16862
16863 /* This is very simple, but making the interface the same as in the
16864 FP case makes the rest of the code easier. */
16865 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
16866 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
16867
16868 /* Return the test that should be put into the flags user, i.e.
16869 the bcc, scc, or cmov instruction. */
16870 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
16871 }
16872
16873 /* Figure out whether to use ordered or unordered fp comparisons.
16874 Return the appropriate mode to use. */
16875
16876 enum machine_mode
16877 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
16878 {
16879 /* ??? In order to make all comparisons reversible, we do all comparisons
16880 non-trapping when compiling for IEEE. Once gcc is able to distinguish
16881 all forms trapping and nontrapping comparisons, we can make inequality
16882 comparisons trapping again, since it results in better code when using
16883 FCOM based compares. */
16884 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
16885 }
16886
16887 enum machine_mode
16888 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
16889 {
16890 enum machine_mode mode = GET_MODE (op0);
16891
16892 if (SCALAR_FLOAT_MODE_P (mode))
16893 {
16894 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
16895 return ix86_fp_compare_mode (code);
16896 }
16897
16898 switch (code)
16899 {
16900 /* Only zero flag is needed. */
16901 case EQ: /* ZF=0 */
16902 case NE: /* ZF!=0 */
16903 return CCZmode;
16904 /* Codes needing carry flag. */
16905 case GEU: /* CF=0 */
16906 case LTU: /* CF=1 */
16907 /* Detect overflow checks. They need just the carry flag. */
16908 if (GET_CODE (op0) == PLUS
16909 && rtx_equal_p (op1, XEXP (op0, 0)))
16910 return CCCmode;
16911 else
16912 return CCmode;
16913 case GTU: /* CF=0 & ZF=0 */
16914 case LEU: /* CF=1 | ZF=1 */
16915 /* Detect overflow checks. They need just the carry flag. */
16916 if (GET_CODE (op0) == MINUS
16917 && rtx_equal_p (op1, XEXP (op0, 0)))
16918 return CCCmode;
16919 else
16920 return CCmode;
16921 /* Codes possibly doable only with sign flag when
16922 comparing against zero. */
16923 case GE: /* SF=OF or SF=0 */
16924 case LT: /* SF<>OF or SF=1 */
16925 if (op1 == const0_rtx)
16926 return CCGOCmode;
16927 else
16928 /* For other cases Carry flag is not required. */
16929 return CCGCmode;
16930 /* Codes doable only with sign flag when comparing
16931 against zero, but we miss jump instruction for it
16932 so we need to use relational tests against overflow
16933 that thus needs to be zero. */
16934 case GT: /* ZF=0 & SF=OF */
16935 case LE: /* ZF=1 | SF<>OF */
16936 if (op1 == const0_rtx)
16937 return CCNOmode;
16938 else
16939 return CCGCmode;
16940 /* strcmp pattern do (use flags) and combine may ask us for proper
16941 mode. */
16942 case USE:
16943 return CCmode;
16944 default:
16945 gcc_unreachable ();
16946 }
16947 }
16948
16949 /* Return the fixed registers used for condition codes. */
16950
16951 static bool
16952 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
16953 {
16954 *p1 = FLAGS_REG;
16955 *p2 = FPSR_REG;
16956 return true;
16957 }
16958
16959 /* If two condition code modes are compatible, return a condition code
16960 mode which is compatible with both. Otherwise, return
16961 VOIDmode. */
16962
16963 static enum machine_mode
16964 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
16965 {
16966 if (m1 == m2)
16967 return m1;
16968
16969 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
16970 return VOIDmode;
16971
16972 if ((m1 == CCGCmode && m2 == CCGOCmode)
16973 || (m1 == CCGOCmode && m2 == CCGCmode))
16974 return CCGCmode;
16975
16976 switch (m1)
16977 {
16978 default:
16979 gcc_unreachable ();
16980
16981 case CCmode:
16982 case CCGCmode:
16983 case CCGOCmode:
16984 case CCNOmode:
16985 case CCAmode:
16986 case CCCmode:
16987 case CCOmode:
16988 case CCSmode:
16989 case CCZmode:
16990 switch (m2)
16991 {
16992 default:
16993 return VOIDmode;
16994
16995 case CCmode:
16996 case CCGCmode:
16997 case CCGOCmode:
16998 case CCNOmode:
16999 case CCAmode:
17000 case CCCmode:
17001 case CCOmode:
17002 case CCSmode:
17003 case CCZmode:
17004 return CCmode;
17005 }
17006
17007 case CCFPmode:
17008 case CCFPUmode:
17009 /* These are only compatible with themselves, which we already
17010 checked above. */
17011 return VOIDmode;
17012 }
17013 }
17014
17015
17016 /* Return a comparison we can do and that it is equivalent to
17017 swap_condition (code) apart possibly from orderedness.
17018 But, never change orderedness if TARGET_IEEE_FP, returning
17019 UNKNOWN in that case if necessary. */
17020
17021 static enum rtx_code
17022 ix86_fp_swap_condition (enum rtx_code code)
17023 {
17024 switch (code)
17025 {
17026 case GT: /* GTU - CF=0 & ZF=0 */
17027 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
17028 case GE: /* GEU - CF=0 */
17029 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
17030 case UNLT: /* LTU - CF=1 */
17031 return TARGET_IEEE_FP ? UNKNOWN : GT;
17032 case UNLE: /* LEU - CF=1 | ZF=1 */
17033 return TARGET_IEEE_FP ? UNKNOWN : GE;
17034 default:
17035 return swap_condition (code);
17036 }
17037 }
17038
17039 /* Return cost of comparison CODE using the best strategy for performance.
17040 All following functions do use number of instructions as a cost metrics.
17041 In future this should be tweaked to compute bytes for optimize_size and
17042 take into account performance of various instructions on various CPUs. */
17043
17044 static int
17045 ix86_fp_comparison_cost (enum rtx_code code)
17046 {
17047 int arith_cost;
17048
17049 /* The cost of code using bit-twiddling on %ah. */
17050 switch (code)
17051 {
17052 case UNLE:
17053 case UNLT:
17054 case LTGT:
17055 case GT:
17056 case GE:
17057 case UNORDERED:
17058 case ORDERED:
17059 case UNEQ:
17060 arith_cost = 4;
17061 break;
17062 case LT:
17063 case NE:
17064 case EQ:
17065 case UNGE:
17066 arith_cost = TARGET_IEEE_FP ? 5 : 4;
17067 break;
17068 case LE:
17069 case UNGT:
17070 arith_cost = TARGET_IEEE_FP ? 6 : 4;
17071 break;
17072 default:
17073 gcc_unreachable ();
17074 }
17075
17076 switch (ix86_fp_comparison_strategy (code))
17077 {
17078 case IX86_FPCMP_COMI:
17079 return arith_cost > 4 ? 3 : 2;
17080 case IX86_FPCMP_SAHF:
17081 return arith_cost > 4 ? 4 : 3;
17082 default:
17083 return arith_cost;
17084 }
17085 }
17086
17087 /* Return strategy to use for floating-point. We assume that fcomi is always
17088 preferrable where available, since that is also true when looking at size
17089 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
17090
17091 enum ix86_fpcmp_strategy
17092 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED)
17093 {
17094 /* Do fcomi/sahf based test when profitable. */
17095
17096 if (TARGET_CMOVE)
17097 return IX86_FPCMP_COMI;
17098
17099 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_function_for_size_p (cfun)))
17100 return IX86_FPCMP_SAHF;
17101
17102 return IX86_FPCMP_ARITH;
17103 }
17104
17105 /* Swap, force into registers, or otherwise massage the two operands
17106 to a fp comparison. The operands are updated in place; the new
17107 comparison code is returned. */
17108
17109 static enum rtx_code
17110 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
17111 {
17112 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
17113 rtx op0 = *pop0, op1 = *pop1;
17114 enum machine_mode op_mode = GET_MODE (op0);
17115 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
17116
17117 /* All of the unordered compare instructions only work on registers.
17118 The same is true of the fcomi compare instructions. The XFmode
17119 compare instructions require registers except when comparing
17120 against zero or when converting operand 1 from fixed point to
17121 floating point. */
17122
17123 if (!is_sse
17124 && (fpcmp_mode == CCFPUmode
17125 || (op_mode == XFmode
17126 && ! (standard_80387_constant_p (op0) == 1
17127 || standard_80387_constant_p (op1) == 1)
17128 && GET_CODE (op1) != FLOAT)
17129 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
17130 {
17131 op0 = force_reg (op_mode, op0);
17132 op1 = force_reg (op_mode, op1);
17133 }
17134 else
17135 {
17136 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
17137 things around if they appear profitable, otherwise force op0
17138 into a register. */
17139
17140 if (standard_80387_constant_p (op0) == 0
17141 || (MEM_P (op0)
17142 && ! (standard_80387_constant_p (op1) == 0
17143 || MEM_P (op1))))
17144 {
17145 enum rtx_code new_code = ix86_fp_swap_condition (code);
17146 if (new_code != UNKNOWN)
17147 {
17148 rtx tmp;
17149 tmp = op0, op0 = op1, op1 = tmp;
17150 code = new_code;
17151 }
17152 }
17153
17154 if (!REG_P (op0))
17155 op0 = force_reg (op_mode, op0);
17156
17157 if (CONSTANT_P (op1))
17158 {
17159 int tmp = standard_80387_constant_p (op1);
17160 if (tmp == 0)
17161 op1 = validize_mem (force_const_mem (op_mode, op1));
17162 else if (tmp == 1)
17163 {
17164 if (TARGET_CMOVE)
17165 op1 = force_reg (op_mode, op1);
17166 }
17167 else
17168 op1 = force_reg (op_mode, op1);
17169 }
17170 }
17171
17172 /* Try to rearrange the comparison to make it cheaper. */
17173 if (ix86_fp_comparison_cost (code)
17174 > ix86_fp_comparison_cost (swap_condition (code))
17175 && (REG_P (op1) || can_create_pseudo_p ()))
17176 {
17177 rtx tmp;
17178 tmp = op0, op0 = op1, op1 = tmp;
17179 code = swap_condition (code);
17180 if (!REG_P (op0))
17181 op0 = force_reg (op_mode, op0);
17182 }
17183
17184 *pop0 = op0;
17185 *pop1 = op1;
17186 return code;
17187 }
17188
17189 /* Convert comparison codes we use to represent FP comparison to integer
17190 code that will result in proper branch. Return UNKNOWN if no such code
17191 is available. */
17192
17193 enum rtx_code
17194 ix86_fp_compare_code_to_integer (enum rtx_code code)
17195 {
17196 switch (code)
17197 {
17198 case GT:
17199 return GTU;
17200 case GE:
17201 return GEU;
17202 case ORDERED:
17203 case UNORDERED:
17204 return code;
17205 break;
17206 case UNEQ:
17207 return EQ;
17208 break;
17209 case UNLT:
17210 return LTU;
17211 break;
17212 case UNLE:
17213 return LEU;
17214 break;
17215 case LTGT:
17216 return NE;
17217 break;
17218 default:
17219 return UNKNOWN;
17220 }
17221 }
17222
17223 /* Generate insn patterns to do a floating point compare of OPERANDS. */
17224
17225 static rtx
17226 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
17227 {
17228 enum machine_mode fpcmp_mode, intcmp_mode;
17229 rtx tmp, tmp2;
17230
17231 fpcmp_mode = ix86_fp_compare_mode (code);
17232 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
17233
17234 /* Do fcomi/sahf based test when profitable. */
17235 switch (ix86_fp_comparison_strategy (code))
17236 {
17237 case IX86_FPCMP_COMI:
17238 intcmp_mode = fpcmp_mode;
17239 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
17240 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
17241 tmp);
17242 emit_insn (tmp);
17243 break;
17244
17245 case IX86_FPCMP_SAHF:
17246 intcmp_mode = fpcmp_mode;
17247 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
17248 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
17249 tmp);
17250
17251 if (!scratch)
17252 scratch = gen_reg_rtx (HImode);
17253 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
17254 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
17255 break;
17256
17257 case IX86_FPCMP_ARITH:
17258 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
17259 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
17260 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
17261 if (!scratch)
17262 scratch = gen_reg_rtx (HImode);
17263 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
17264
17265 /* In the unordered case, we have to check C2 for NaN's, which
17266 doesn't happen to work out to anything nice combination-wise.
17267 So do some bit twiddling on the value we've got in AH to come
17268 up with an appropriate set of condition codes. */
17269
17270 intcmp_mode = CCNOmode;
17271 switch (code)
17272 {
17273 case GT:
17274 case UNGT:
17275 if (code == GT || !TARGET_IEEE_FP)
17276 {
17277 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
17278 code = EQ;
17279 }
17280 else
17281 {
17282 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17283 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
17284 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
17285 intcmp_mode = CCmode;
17286 code = GEU;
17287 }
17288 break;
17289 case LT:
17290 case UNLT:
17291 if (code == LT && TARGET_IEEE_FP)
17292 {
17293 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17294 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
17295 intcmp_mode = CCmode;
17296 code = EQ;
17297 }
17298 else
17299 {
17300 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
17301 code = NE;
17302 }
17303 break;
17304 case GE:
17305 case UNGE:
17306 if (code == GE || !TARGET_IEEE_FP)
17307 {
17308 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
17309 code = EQ;
17310 }
17311 else
17312 {
17313 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17314 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
17315 code = NE;
17316 }
17317 break;
17318 case LE:
17319 case UNLE:
17320 if (code == LE && TARGET_IEEE_FP)
17321 {
17322 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17323 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
17324 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
17325 intcmp_mode = CCmode;
17326 code = LTU;
17327 }
17328 else
17329 {
17330 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
17331 code = NE;
17332 }
17333 break;
17334 case EQ:
17335 case UNEQ:
17336 if (code == EQ && TARGET_IEEE_FP)
17337 {
17338 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17339 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
17340 intcmp_mode = CCmode;
17341 code = EQ;
17342 }
17343 else
17344 {
17345 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
17346 code = NE;
17347 }
17348 break;
17349 case NE:
17350 case LTGT:
17351 if (code == NE && TARGET_IEEE_FP)
17352 {
17353 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17354 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
17355 GEN_INT (0x40)));
17356 code = NE;
17357 }
17358 else
17359 {
17360 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
17361 code = EQ;
17362 }
17363 break;
17364
17365 case UNORDERED:
17366 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
17367 code = NE;
17368 break;
17369 case ORDERED:
17370 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
17371 code = EQ;
17372 break;
17373
17374 default:
17375 gcc_unreachable ();
17376 }
17377 break;
17378
17379 default:
17380 gcc_unreachable();
17381 }
17382
17383 /* Return the test that should be put into the flags user, i.e.
17384 the bcc, scc, or cmov instruction. */
17385 return gen_rtx_fmt_ee (code, VOIDmode,
17386 gen_rtx_REG (intcmp_mode, FLAGS_REG),
17387 const0_rtx);
17388 }
17389
17390 static rtx
17391 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
17392 {
17393 rtx ret;
17394
17395 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
17396 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
17397
17398 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
17399 {
17400 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
17401 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
17402 }
17403 else
17404 ret = ix86_expand_int_compare (code, op0, op1);
17405
17406 return ret;
17407 }
17408
17409 void
17410 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
17411 {
17412 enum machine_mode mode = GET_MODE (op0);
17413 rtx tmp;
17414
17415 switch (mode)
17416 {
17417 case SFmode:
17418 case DFmode:
17419 case XFmode:
17420 case QImode:
17421 case HImode:
17422 case SImode:
17423 simple:
17424 tmp = ix86_expand_compare (code, op0, op1);
17425 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
17426 gen_rtx_LABEL_REF (VOIDmode, label),
17427 pc_rtx);
17428 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
17429 return;
17430
17431 case DImode:
17432 if (TARGET_64BIT)
17433 goto simple;
17434 case TImode:
17435 /* Expand DImode branch into multiple compare+branch. */
17436 {
17437 rtx lo[2], hi[2], label2;
17438 enum rtx_code code1, code2, code3;
17439 enum machine_mode submode;
17440
17441 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
17442 {
17443 tmp = op0, op0 = op1, op1 = tmp;
17444 code = swap_condition (code);
17445 }
17446
17447 split_double_mode (mode, &op0, 1, lo+0, hi+0);
17448 split_double_mode (mode, &op1, 1, lo+1, hi+1);
17449
17450 submode = mode == DImode ? SImode : DImode;
17451
17452 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
17453 avoid two branches. This costs one extra insn, so disable when
17454 optimizing for size. */
17455
17456 if ((code == EQ || code == NE)
17457 && (!optimize_insn_for_size_p ()
17458 || hi[1] == const0_rtx || lo[1] == const0_rtx))
17459 {
17460 rtx xor0, xor1;
17461
17462 xor1 = hi[0];
17463 if (hi[1] != const0_rtx)
17464 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
17465 NULL_RTX, 0, OPTAB_WIDEN);
17466
17467 xor0 = lo[0];
17468 if (lo[1] != const0_rtx)
17469 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
17470 NULL_RTX, 0, OPTAB_WIDEN);
17471
17472 tmp = expand_binop (submode, ior_optab, xor1, xor0,
17473 NULL_RTX, 0, OPTAB_WIDEN);
17474
17475 ix86_expand_branch (code, tmp, const0_rtx, label);
17476 return;
17477 }
17478
17479 /* Otherwise, if we are doing less-than or greater-or-equal-than,
17480 op1 is a constant and the low word is zero, then we can just
17481 examine the high word. Similarly for low word -1 and
17482 less-or-equal-than or greater-than. */
17483
17484 if (CONST_INT_P (hi[1]))
17485 switch (code)
17486 {
17487 case LT: case LTU: case GE: case GEU:
17488 if (lo[1] == const0_rtx)
17489 {
17490 ix86_expand_branch (code, hi[0], hi[1], label);
17491 return;
17492 }
17493 break;
17494 case LE: case LEU: case GT: case GTU:
17495 if (lo[1] == constm1_rtx)
17496 {
17497 ix86_expand_branch (code, hi[0], hi[1], label);
17498 return;
17499 }
17500 break;
17501 default:
17502 break;
17503 }
17504
17505 /* Otherwise, we need two or three jumps. */
17506
17507 label2 = gen_label_rtx ();
17508
17509 code1 = code;
17510 code2 = swap_condition (code);
17511 code3 = unsigned_condition (code);
17512
17513 switch (code)
17514 {
17515 case LT: case GT: case LTU: case GTU:
17516 break;
17517
17518 case LE: code1 = LT; code2 = GT; break;
17519 case GE: code1 = GT; code2 = LT; break;
17520 case LEU: code1 = LTU; code2 = GTU; break;
17521 case GEU: code1 = GTU; code2 = LTU; break;
17522
17523 case EQ: code1 = UNKNOWN; code2 = NE; break;
17524 case NE: code2 = UNKNOWN; break;
17525
17526 default:
17527 gcc_unreachable ();
17528 }
17529
17530 /*
17531 * a < b =>
17532 * if (hi(a) < hi(b)) goto true;
17533 * if (hi(a) > hi(b)) goto false;
17534 * if (lo(a) < lo(b)) goto true;
17535 * false:
17536 */
17537
17538 if (code1 != UNKNOWN)
17539 ix86_expand_branch (code1, hi[0], hi[1], label);
17540 if (code2 != UNKNOWN)
17541 ix86_expand_branch (code2, hi[0], hi[1], label2);
17542
17543 ix86_expand_branch (code3, lo[0], lo[1], label);
17544
17545 if (code2 != UNKNOWN)
17546 emit_label (label2);
17547 return;
17548 }
17549
17550 default:
17551 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
17552 goto simple;
17553 }
17554 }
17555
17556 /* Split branch based on floating point condition. */
17557 void
17558 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
17559 rtx target1, rtx target2, rtx tmp, rtx pushed)
17560 {
17561 rtx condition;
17562 rtx i;
17563
17564 if (target2 != pc_rtx)
17565 {
17566 rtx tmp = target2;
17567 code = reverse_condition_maybe_unordered (code);
17568 target2 = target1;
17569 target1 = tmp;
17570 }
17571
17572 condition = ix86_expand_fp_compare (code, op1, op2,
17573 tmp);
17574
17575 /* Remove pushed operand from stack. */
17576 if (pushed)
17577 ix86_free_from_memory (GET_MODE (pushed));
17578
17579 i = emit_jump_insn (gen_rtx_SET
17580 (VOIDmode, pc_rtx,
17581 gen_rtx_IF_THEN_ELSE (VOIDmode,
17582 condition, target1, target2)));
17583 if (split_branch_probability >= 0)
17584 add_reg_note (i, REG_BR_PROB, GEN_INT (split_branch_probability));
17585 }
17586
17587 void
17588 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
17589 {
17590 rtx ret;
17591
17592 gcc_assert (GET_MODE (dest) == QImode);
17593
17594 ret = ix86_expand_compare (code, op0, op1);
17595 PUT_MODE (ret, QImode);
17596 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
17597 }
17598
17599 /* Expand comparison setting or clearing carry flag. Return true when
17600 successful and set pop for the operation. */
17601 static bool
17602 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
17603 {
17604 enum machine_mode mode =
17605 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
17606
17607 /* Do not handle double-mode compares that go through special path. */
17608 if (mode == (TARGET_64BIT ? TImode : DImode))
17609 return false;
17610
17611 if (SCALAR_FLOAT_MODE_P (mode))
17612 {
17613 rtx compare_op, compare_seq;
17614
17615 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
17616
17617 /* Shortcut: following common codes never translate
17618 into carry flag compares. */
17619 if (code == EQ || code == NE || code == UNEQ || code == LTGT
17620 || code == ORDERED || code == UNORDERED)
17621 return false;
17622
17623 /* These comparisons require zero flag; swap operands so they won't. */
17624 if ((code == GT || code == UNLE || code == LE || code == UNGT)
17625 && !TARGET_IEEE_FP)
17626 {
17627 rtx tmp = op0;
17628 op0 = op1;
17629 op1 = tmp;
17630 code = swap_condition (code);
17631 }
17632
17633 /* Try to expand the comparison and verify that we end up with
17634 carry flag based comparison. This fails to be true only when
17635 we decide to expand comparison using arithmetic that is not
17636 too common scenario. */
17637 start_sequence ();
17638 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
17639 compare_seq = get_insns ();
17640 end_sequence ();
17641
17642 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
17643 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
17644 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
17645 else
17646 code = GET_CODE (compare_op);
17647
17648 if (code != LTU && code != GEU)
17649 return false;
17650
17651 emit_insn (compare_seq);
17652 *pop = compare_op;
17653 return true;
17654 }
17655
17656 if (!INTEGRAL_MODE_P (mode))
17657 return false;
17658
17659 switch (code)
17660 {
17661 case LTU:
17662 case GEU:
17663 break;
17664
17665 /* Convert a==0 into (unsigned)a<1. */
17666 case EQ:
17667 case NE:
17668 if (op1 != const0_rtx)
17669 return false;
17670 op1 = const1_rtx;
17671 code = (code == EQ ? LTU : GEU);
17672 break;
17673
17674 /* Convert a>b into b<a or a>=b-1. */
17675 case GTU:
17676 case LEU:
17677 if (CONST_INT_P (op1))
17678 {
17679 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
17680 /* Bail out on overflow. We still can swap operands but that
17681 would force loading of the constant into register. */
17682 if (op1 == const0_rtx
17683 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
17684 return false;
17685 code = (code == GTU ? GEU : LTU);
17686 }
17687 else
17688 {
17689 rtx tmp = op1;
17690 op1 = op0;
17691 op0 = tmp;
17692 code = (code == GTU ? LTU : GEU);
17693 }
17694 break;
17695
17696 /* Convert a>=0 into (unsigned)a<0x80000000. */
17697 case LT:
17698 case GE:
17699 if (mode == DImode || op1 != const0_rtx)
17700 return false;
17701 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
17702 code = (code == LT ? GEU : LTU);
17703 break;
17704 case LE:
17705 case GT:
17706 if (mode == DImode || op1 != constm1_rtx)
17707 return false;
17708 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
17709 code = (code == LE ? GEU : LTU);
17710 break;
17711
17712 default:
17713 return false;
17714 }
17715 /* Swapping operands may cause constant to appear as first operand. */
17716 if (!nonimmediate_operand (op0, VOIDmode))
17717 {
17718 if (!can_create_pseudo_p ())
17719 return false;
17720 op0 = force_reg (mode, op0);
17721 }
17722 *pop = ix86_expand_compare (code, op0, op1);
17723 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
17724 return true;
17725 }
17726
17727 bool
17728 ix86_expand_int_movcc (rtx operands[])
17729 {
17730 enum rtx_code code = GET_CODE (operands[1]), compare_code;
17731 rtx compare_seq, compare_op;
17732 enum machine_mode mode = GET_MODE (operands[0]);
17733 bool sign_bit_compare_p = false;
17734 rtx op0 = XEXP (operands[1], 0);
17735 rtx op1 = XEXP (operands[1], 1);
17736
17737 start_sequence ();
17738 compare_op = ix86_expand_compare (code, op0, op1);
17739 compare_seq = get_insns ();
17740 end_sequence ();
17741
17742 compare_code = GET_CODE (compare_op);
17743
17744 if ((op1 == const0_rtx && (code == GE || code == LT))
17745 || (op1 == constm1_rtx && (code == GT || code == LE)))
17746 sign_bit_compare_p = true;
17747
17748 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
17749 HImode insns, we'd be swallowed in word prefix ops. */
17750
17751 if ((mode != HImode || TARGET_FAST_PREFIX)
17752 && (mode != (TARGET_64BIT ? TImode : DImode))
17753 && CONST_INT_P (operands[2])
17754 && CONST_INT_P (operands[3]))
17755 {
17756 rtx out = operands[0];
17757 HOST_WIDE_INT ct = INTVAL (operands[2]);
17758 HOST_WIDE_INT cf = INTVAL (operands[3]);
17759 HOST_WIDE_INT diff;
17760
17761 diff = ct - cf;
17762 /* Sign bit compares are better done using shifts than we do by using
17763 sbb. */
17764 if (sign_bit_compare_p
17765 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
17766 {
17767 /* Detect overlap between destination and compare sources. */
17768 rtx tmp = out;
17769
17770 if (!sign_bit_compare_p)
17771 {
17772 rtx flags;
17773 bool fpcmp = false;
17774
17775 compare_code = GET_CODE (compare_op);
17776
17777 flags = XEXP (compare_op, 0);
17778
17779 if (GET_MODE (flags) == CCFPmode
17780 || GET_MODE (flags) == CCFPUmode)
17781 {
17782 fpcmp = true;
17783 compare_code
17784 = ix86_fp_compare_code_to_integer (compare_code);
17785 }
17786
17787 /* To simplify rest of code, restrict to the GEU case. */
17788 if (compare_code == LTU)
17789 {
17790 HOST_WIDE_INT tmp = ct;
17791 ct = cf;
17792 cf = tmp;
17793 compare_code = reverse_condition (compare_code);
17794 code = reverse_condition (code);
17795 }
17796 else
17797 {
17798 if (fpcmp)
17799 PUT_CODE (compare_op,
17800 reverse_condition_maybe_unordered
17801 (GET_CODE (compare_op)));
17802 else
17803 PUT_CODE (compare_op,
17804 reverse_condition (GET_CODE (compare_op)));
17805 }
17806 diff = ct - cf;
17807
17808 if (reg_overlap_mentioned_p (out, op0)
17809 || reg_overlap_mentioned_p (out, op1))
17810 tmp = gen_reg_rtx (mode);
17811
17812 if (mode == DImode)
17813 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
17814 else
17815 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
17816 flags, compare_op));
17817 }
17818 else
17819 {
17820 if (code == GT || code == GE)
17821 code = reverse_condition (code);
17822 else
17823 {
17824 HOST_WIDE_INT tmp = ct;
17825 ct = cf;
17826 cf = tmp;
17827 diff = ct - cf;
17828 }
17829 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
17830 }
17831
17832 if (diff == 1)
17833 {
17834 /*
17835 * cmpl op0,op1
17836 * sbbl dest,dest
17837 * [addl dest, ct]
17838 *
17839 * Size 5 - 8.
17840 */
17841 if (ct)
17842 tmp = expand_simple_binop (mode, PLUS,
17843 tmp, GEN_INT (ct),
17844 copy_rtx (tmp), 1, OPTAB_DIRECT);
17845 }
17846 else if (cf == -1)
17847 {
17848 /*
17849 * cmpl op0,op1
17850 * sbbl dest,dest
17851 * orl $ct, dest
17852 *
17853 * Size 8.
17854 */
17855 tmp = expand_simple_binop (mode, IOR,
17856 tmp, GEN_INT (ct),
17857 copy_rtx (tmp), 1, OPTAB_DIRECT);
17858 }
17859 else if (diff == -1 && ct)
17860 {
17861 /*
17862 * cmpl op0,op1
17863 * sbbl dest,dest
17864 * notl dest
17865 * [addl dest, cf]
17866 *
17867 * Size 8 - 11.
17868 */
17869 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
17870 if (cf)
17871 tmp = expand_simple_binop (mode, PLUS,
17872 copy_rtx (tmp), GEN_INT (cf),
17873 copy_rtx (tmp), 1, OPTAB_DIRECT);
17874 }
17875 else
17876 {
17877 /*
17878 * cmpl op0,op1
17879 * sbbl dest,dest
17880 * [notl dest]
17881 * andl cf - ct, dest
17882 * [addl dest, ct]
17883 *
17884 * Size 8 - 11.
17885 */
17886
17887 if (cf == 0)
17888 {
17889 cf = ct;
17890 ct = 0;
17891 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
17892 }
17893
17894 tmp = expand_simple_binop (mode, AND,
17895 copy_rtx (tmp),
17896 gen_int_mode (cf - ct, mode),
17897 copy_rtx (tmp), 1, OPTAB_DIRECT);
17898 if (ct)
17899 tmp = expand_simple_binop (mode, PLUS,
17900 copy_rtx (tmp), GEN_INT (ct),
17901 copy_rtx (tmp), 1, OPTAB_DIRECT);
17902 }
17903
17904 if (!rtx_equal_p (tmp, out))
17905 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
17906
17907 return true;
17908 }
17909
17910 if (diff < 0)
17911 {
17912 enum machine_mode cmp_mode = GET_MODE (op0);
17913
17914 HOST_WIDE_INT tmp;
17915 tmp = ct, ct = cf, cf = tmp;
17916 diff = -diff;
17917
17918 if (SCALAR_FLOAT_MODE_P (cmp_mode))
17919 {
17920 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
17921
17922 /* We may be reversing unordered compare to normal compare, that
17923 is not valid in general (we may convert non-trapping condition
17924 to trapping one), however on i386 we currently emit all
17925 comparisons unordered. */
17926 compare_code = reverse_condition_maybe_unordered (compare_code);
17927 code = reverse_condition_maybe_unordered (code);
17928 }
17929 else
17930 {
17931 compare_code = reverse_condition (compare_code);
17932 code = reverse_condition (code);
17933 }
17934 }
17935
17936 compare_code = UNKNOWN;
17937 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
17938 && CONST_INT_P (op1))
17939 {
17940 if (op1 == const0_rtx
17941 && (code == LT || code == GE))
17942 compare_code = code;
17943 else if (op1 == constm1_rtx)
17944 {
17945 if (code == LE)
17946 compare_code = LT;
17947 else if (code == GT)
17948 compare_code = GE;
17949 }
17950 }
17951
17952 /* Optimize dest = (op0 < 0) ? -1 : cf. */
17953 if (compare_code != UNKNOWN
17954 && GET_MODE (op0) == GET_MODE (out)
17955 && (cf == -1 || ct == -1))
17956 {
17957 /* If lea code below could be used, only optimize
17958 if it results in a 2 insn sequence. */
17959
17960 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
17961 || diff == 3 || diff == 5 || diff == 9)
17962 || (compare_code == LT && ct == -1)
17963 || (compare_code == GE && cf == -1))
17964 {
17965 /*
17966 * notl op1 (if necessary)
17967 * sarl $31, op1
17968 * orl cf, op1
17969 */
17970 if (ct != -1)
17971 {
17972 cf = ct;
17973 ct = -1;
17974 code = reverse_condition (code);
17975 }
17976
17977 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
17978
17979 out = expand_simple_binop (mode, IOR,
17980 out, GEN_INT (cf),
17981 out, 1, OPTAB_DIRECT);
17982 if (out != operands[0])
17983 emit_move_insn (operands[0], out);
17984
17985 return true;
17986 }
17987 }
17988
17989
17990 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
17991 || diff == 3 || diff == 5 || diff == 9)
17992 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
17993 && (mode != DImode
17994 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
17995 {
17996 /*
17997 * xorl dest,dest
17998 * cmpl op1,op2
17999 * setcc dest
18000 * lea cf(dest*(ct-cf)),dest
18001 *
18002 * Size 14.
18003 *
18004 * This also catches the degenerate setcc-only case.
18005 */
18006
18007 rtx tmp;
18008 int nops;
18009
18010 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
18011
18012 nops = 0;
18013 /* On x86_64 the lea instruction operates on Pmode, so we need
18014 to get arithmetics done in proper mode to match. */
18015 if (diff == 1)
18016 tmp = copy_rtx (out);
18017 else
18018 {
18019 rtx out1;
18020 out1 = copy_rtx (out);
18021 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
18022 nops++;
18023 if (diff & 1)
18024 {
18025 tmp = gen_rtx_PLUS (mode, tmp, out1);
18026 nops++;
18027 }
18028 }
18029 if (cf != 0)
18030 {
18031 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
18032 nops++;
18033 }
18034 if (!rtx_equal_p (tmp, out))
18035 {
18036 if (nops == 1)
18037 out = force_operand (tmp, copy_rtx (out));
18038 else
18039 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
18040 }
18041 if (!rtx_equal_p (out, operands[0]))
18042 emit_move_insn (operands[0], copy_rtx (out));
18043
18044 return true;
18045 }
18046
18047 /*
18048 * General case: Jumpful:
18049 * xorl dest,dest cmpl op1, op2
18050 * cmpl op1, op2 movl ct, dest
18051 * setcc dest jcc 1f
18052 * decl dest movl cf, dest
18053 * andl (cf-ct),dest 1:
18054 * addl ct,dest
18055 *
18056 * Size 20. Size 14.
18057 *
18058 * This is reasonably steep, but branch mispredict costs are
18059 * high on modern cpus, so consider failing only if optimizing
18060 * for space.
18061 */
18062
18063 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
18064 && BRANCH_COST (optimize_insn_for_speed_p (),
18065 false) >= 2)
18066 {
18067 if (cf == 0)
18068 {
18069 enum machine_mode cmp_mode = GET_MODE (op0);
18070
18071 cf = ct;
18072 ct = 0;
18073
18074 if (SCALAR_FLOAT_MODE_P (cmp_mode))
18075 {
18076 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
18077
18078 /* We may be reversing unordered compare to normal compare,
18079 that is not valid in general (we may convert non-trapping
18080 condition to trapping one), however on i386 we currently
18081 emit all comparisons unordered. */
18082 code = reverse_condition_maybe_unordered (code);
18083 }
18084 else
18085 {
18086 code = reverse_condition (code);
18087 if (compare_code != UNKNOWN)
18088 compare_code = reverse_condition (compare_code);
18089 }
18090 }
18091
18092 if (compare_code != UNKNOWN)
18093 {
18094 /* notl op1 (if needed)
18095 sarl $31, op1
18096 andl (cf-ct), op1
18097 addl ct, op1
18098
18099 For x < 0 (resp. x <= -1) there will be no notl,
18100 so if possible swap the constants to get rid of the
18101 complement.
18102 True/false will be -1/0 while code below (store flag
18103 followed by decrement) is 0/-1, so the constants need
18104 to be exchanged once more. */
18105
18106 if (compare_code == GE || !cf)
18107 {
18108 code = reverse_condition (code);
18109 compare_code = LT;
18110 }
18111 else
18112 {
18113 HOST_WIDE_INT tmp = cf;
18114 cf = ct;
18115 ct = tmp;
18116 }
18117
18118 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
18119 }
18120 else
18121 {
18122 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
18123
18124 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
18125 constm1_rtx,
18126 copy_rtx (out), 1, OPTAB_DIRECT);
18127 }
18128
18129 out = expand_simple_binop (mode, AND, copy_rtx (out),
18130 gen_int_mode (cf - ct, mode),
18131 copy_rtx (out), 1, OPTAB_DIRECT);
18132 if (ct)
18133 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
18134 copy_rtx (out), 1, OPTAB_DIRECT);
18135 if (!rtx_equal_p (out, operands[0]))
18136 emit_move_insn (operands[0], copy_rtx (out));
18137
18138 return true;
18139 }
18140 }
18141
18142 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
18143 {
18144 /* Try a few things more with specific constants and a variable. */
18145
18146 optab op;
18147 rtx var, orig_out, out, tmp;
18148
18149 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
18150 return false;
18151
18152 /* If one of the two operands is an interesting constant, load a
18153 constant with the above and mask it in with a logical operation. */
18154
18155 if (CONST_INT_P (operands[2]))
18156 {
18157 var = operands[3];
18158 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
18159 operands[3] = constm1_rtx, op = and_optab;
18160 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
18161 operands[3] = const0_rtx, op = ior_optab;
18162 else
18163 return false;
18164 }
18165 else if (CONST_INT_P (operands[3]))
18166 {
18167 var = operands[2];
18168 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
18169 operands[2] = constm1_rtx, op = and_optab;
18170 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
18171 operands[2] = const0_rtx, op = ior_optab;
18172 else
18173 return false;
18174 }
18175 else
18176 return false;
18177
18178 orig_out = operands[0];
18179 tmp = gen_reg_rtx (mode);
18180 operands[0] = tmp;
18181
18182 /* Recurse to get the constant loaded. */
18183 if (ix86_expand_int_movcc (operands) == 0)
18184 return false;
18185
18186 /* Mask in the interesting variable. */
18187 out = expand_binop (mode, op, var, tmp, orig_out, 0,
18188 OPTAB_WIDEN);
18189 if (!rtx_equal_p (out, orig_out))
18190 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
18191
18192 return true;
18193 }
18194
18195 /*
18196 * For comparison with above,
18197 *
18198 * movl cf,dest
18199 * movl ct,tmp
18200 * cmpl op1,op2
18201 * cmovcc tmp,dest
18202 *
18203 * Size 15.
18204 */
18205
18206 if (! nonimmediate_operand (operands[2], mode))
18207 operands[2] = force_reg (mode, operands[2]);
18208 if (! nonimmediate_operand (operands[3], mode))
18209 operands[3] = force_reg (mode, operands[3]);
18210
18211 if (! register_operand (operands[2], VOIDmode)
18212 && (mode == QImode
18213 || ! register_operand (operands[3], VOIDmode)))
18214 operands[2] = force_reg (mode, operands[2]);
18215
18216 if (mode == QImode
18217 && ! register_operand (operands[3], VOIDmode))
18218 operands[3] = force_reg (mode, operands[3]);
18219
18220 emit_insn (compare_seq);
18221 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
18222 gen_rtx_IF_THEN_ELSE (mode,
18223 compare_op, operands[2],
18224 operands[3])));
18225 return true;
18226 }
18227
18228 /* Swap, force into registers, or otherwise massage the two operands
18229 to an sse comparison with a mask result. Thus we differ a bit from
18230 ix86_prepare_fp_compare_args which expects to produce a flags result.
18231
18232 The DEST operand exists to help determine whether to commute commutative
18233 operators. The POP0/POP1 operands are updated in place. The new
18234 comparison code is returned, or UNKNOWN if not implementable. */
18235
18236 static enum rtx_code
18237 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
18238 rtx *pop0, rtx *pop1)
18239 {
18240 rtx tmp;
18241
18242 switch (code)
18243 {
18244 case LTGT:
18245 case UNEQ:
18246 /* We have no LTGT as an operator. We could implement it with
18247 NE & ORDERED, but this requires an extra temporary. It's
18248 not clear that it's worth it. */
18249 return UNKNOWN;
18250
18251 case LT:
18252 case LE:
18253 case UNGT:
18254 case UNGE:
18255 /* These are supported directly. */
18256 break;
18257
18258 case EQ:
18259 case NE:
18260 case UNORDERED:
18261 case ORDERED:
18262 /* For commutative operators, try to canonicalize the destination
18263 operand to be first in the comparison - this helps reload to
18264 avoid extra moves. */
18265 if (!dest || !rtx_equal_p (dest, *pop1))
18266 break;
18267 /* FALLTHRU */
18268
18269 case GE:
18270 case GT:
18271 case UNLE:
18272 case UNLT:
18273 /* These are not supported directly. Swap the comparison operands
18274 to transform into something that is supported. */
18275 tmp = *pop0;
18276 *pop0 = *pop1;
18277 *pop1 = tmp;
18278 code = swap_condition (code);
18279 break;
18280
18281 default:
18282 gcc_unreachable ();
18283 }
18284
18285 return code;
18286 }
18287
18288 /* Detect conditional moves that exactly match min/max operational
18289 semantics. Note that this is IEEE safe, as long as we don't
18290 interchange the operands.
18291
18292 Returns FALSE if this conditional move doesn't match a MIN/MAX,
18293 and TRUE if the operation is successful and instructions are emitted. */
18294
18295 static bool
18296 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
18297 rtx cmp_op1, rtx if_true, rtx if_false)
18298 {
18299 enum machine_mode mode;
18300 bool is_min;
18301 rtx tmp;
18302
18303 if (code == LT)
18304 ;
18305 else if (code == UNGE)
18306 {
18307 tmp = if_true;
18308 if_true = if_false;
18309 if_false = tmp;
18310 }
18311 else
18312 return false;
18313
18314 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
18315 is_min = true;
18316 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
18317 is_min = false;
18318 else
18319 return false;
18320
18321 mode = GET_MODE (dest);
18322
18323 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
18324 but MODE may be a vector mode and thus not appropriate. */
18325 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
18326 {
18327 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
18328 rtvec v;
18329
18330 if_true = force_reg (mode, if_true);
18331 v = gen_rtvec (2, if_true, if_false);
18332 tmp = gen_rtx_UNSPEC (mode, v, u);
18333 }
18334 else
18335 {
18336 code = is_min ? SMIN : SMAX;
18337 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
18338 }
18339
18340 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
18341 return true;
18342 }
18343
18344 /* Expand an sse vector comparison. Return the register with the result. */
18345
18346 static rtx
18347 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
18348 rtx op_true, rtx op_false)
18349 {
18350 enum machine_mode mode = GET_MODE (dest);
18351 rtx x;
18352
18353 cmp_op0 = force_reg (mode, cmp_op0);
18354 if (!nonimmediate_operand (cmp_op1, mode))
18355 cmp_op1 = force_reg (mode, cmp_op1);
18356
18357 if (optimize
18358 || reg_overlap_mentioned_p (dest, op_true)
18359 || reg_overlap_mentioned_p (dest, op_false))
18360 dest = gen_reg_rtx (mode);
18361
18362 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
18363 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18364
18365 return dest;
18366 }
18367
18368 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
18369 operations. This is used for both scalar and vector conditional moves. */
18370
18371 static void
18372 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
18373 {
18374 enum machine_mode mode = GET_MODE (dest);
18375 rtx t2, t3, x;
18376
18377 if (op_false == CONST0_RTX (mode))
18378 {
18379 op_true = force_reg (mode, op_true);
18380 x = gen_rtx_AND (mode, cmp, op_true);
18381 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18382 }
18383 else if (op_true == CONST0_RTX (mode))
18384 {
18385 op_false = force_reg (mode, op_false);
18386 x = gen_rtx_NOT (mode, cmp);
18387 x = gen_rtx_AND (mode, x, op_false);
18388 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18389 }
18390 else if (TARGET_XOP)
18391 {
18392 rtx pcmov = gen_rtx_SET (mode, dest,
18393 gen_rtx_IF_THEN_ELSE (mode, cmp,
18394 op_true,
18395 op_false));
18396 emit_insn (pcmov);
18397 }
18398 else
18399 {
18400 op_true = force_reg (mode, op_true);
18401 op_false = force_reg (mode, op_false);
18402
18403 t2 = gen_reg_rtx (mode);
18404 if (optimize)
18405 t3 = gen_reg_rtx (mode);
18406 else
18407 t3 = dest;
18408
18409 x = gen_rtx_AND (mode, op_true, cmp);
18410 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
18411
18412 x = gen_rtx_NOT (mode, cmp);
18413 x = gen_rtx_AND (mode, x, op_false);
18414 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
18415
18416 x = gen_rtx_IOR (mode, t3, t2);
18417 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18418 }
18419 }
18420
18421 /* Expand a floating-point conditional move. Return true if successful. */
18422
18423 bool
18424 ix86_expand_fp_movcc (rtx operands[])
18425 {
18426 enum machine_mode mode = GET_MODE (operands[0]);
18427 enum rtx_code code = GET_CODE (operands[1]);
18428 rtx tmp, compare_op;
18429 rtx op0 = XEXP (operands[1], 0);
18430 rtx op1 = XEXP (operands[1], 1);
18431
18432 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
18433 {
18434 enum machine_mode cmode;
18435
18436 /* Since we've no cmove for sse registers, don't force bad register
18437 allocation just to gain access to it. Deny movcc when the
18438 comparison mode doesn't match the move mode. */
18439 cmode = GET_MODE (op0);
18440 if (cmode == VOIDmode)
18441 cmode = GET_MODE (op1);
18442 if (cmode != mode)
18443 return false;
18444
18445 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
18446 if (code == UNKNOWN)
18447 return false;
18448
18449 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
18450 operands[2], operands[3]))
18451 return true;
18452
18453 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
18454 operands[2], operands[3]);
18455 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
18456 return true;
18457 }
18458
18459 /* The floating point conditional move instructions don't directly
18460 support conditions resulting from a signed integer comparison. */
18461
18462 compare_op = ix86_expand_compare (code, op0, op1);
18463 if (!fcmov_comparison_operator (compare_op, VOIDmode))
18464 {
18465 tmp = gen_reg_rtx (QImode);
18466 ix86_expand_setcc (tmp, code, op0, op1);
18467
18468 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
18469 }
18470
18471 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
18472 gen_rtx_IF_THEN_ELSE (mode, compare_op,
18473 operands[2], operands[3])));
18474
18475 return true;
18476 }
18477
18478 /* Expand a floating-point vector conditional move; a vcond operation
18479 rather than a movcc operation. */
18480
18481 bool
18482 ix86_expand_fp_vcond (rtx operands[])
18483 {
18484 enum rtx_code code = GET_CODE (operands[3]);
18485 rtx cmp;
18486
18487 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
18488 &operands[4], &operands[5]);
18489 if (code == UNKNOWN)
18490 return false;
18491
18492 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
18493 operands[5], operands[1], operands[2]))
18494 return true;
18495
18496 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
18497 operands[1], operands[2]);
18498 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
18499 return true;
18500 }
18501
18502 /* Expand a signed/unsigned integral vector conditional move. */
18503
18504 bool
18505 ix86_expand_int_vcond (rtx operands[])
18506 {
18507 enum machine_mode mode = GET_MODE (operands[0]);
18508 enum rtx_code code = GET_CODE (operands[3]);
18509 bool negate = false;
18510 rtx x, cop0, cop1;
18511
18512 cop0 = operands[4];
18513 cop1 = operands[5];
18514
18515 /* XOP supports all of the comparisons on all vector int types. */
18516 if (!TARGET_XOP)
18517 {
18518 /* Canonicalize the comparison to EQ, GT, GTU. */
18519 switch (code)
18520 {
18521 case EQ:
18522 case GT:
18523 case GTU:
18524 break;
18525
18526 case NE:
18527 case LE:
18528 case LEU:
18529 code = reverse_condition (code);
18530 negate = true;
18531 break;
18532
18533 case GE:
18534 case GEU:
18535 code = reverse_condition (code);
18536 negate = true;
18537 /* FALLTHRU */
18538
18539 case LT:
18540 case LTU:
18541 code = swap_condition (code);
18542 x = cop0, cop0 = cop1, cop1 = x;
18543 break;
18544
18545 default:
18546 gcc_unreachable ();
18547 }
18548
18549 /* Only SSE4.1/SSE4.2 supports V2DImode. */
18550 if (mode == V2DImode)
18551 {
18552 switch (code)
18553 {
18554 case EQ:
18555 /* SSE4.1 supports EQ. */
18556 if (!TARGET_SSE4_1)
18557 return false;
18558 break;
18559
18560 case GT:
18561 case GTU:
18562 /* SSE4.2 supports GT/GTU. */
18563 if (!TARGET_SSE4_2)
18564 return false;
18565 break;
18566
18567 default:
18568 gcc_unreachable ();
18569 }
18570 }
18571
18572 /* Unsigned parallel compare is not supported by the hardware.
18573 Play some tricks to turn this into a signed comparison
18574 against 0. */
18575 if (code == GTU)
18576 {
18577 cop0 = force_reg (mode, cop0);
18578
18579 switch (mode)
18580 {
18581 case V4SImode:
18582 case V2DImode:
18583 {
18584 rtx t1, t2, mask;
18585 rtx (*gen_sub3) (rtx, rtx, rtx);
18586
18587 /* Subtract (-(INT MAX) - 1) from both operands to make
18588 them signed. */
18589 mask = ix86_build_signbit_mask (mode, true, false);
18590 gen_sub3 = (mode == V4SImode
18591 ? gen_subv4si3 : gen_subv2di3);
18592 t1 = gen_reg_rtx (mode);
18593 emit_insn (gen_sub3 (t1, cop0, mask));
18594
18595 t2 = gen_reg_rtx (mode);
18596 emit_insn (gen_sub3 (t2, cop1, mask));
18597
18598 cop0 = t1;
18599 cop1 = t2;
18600 code = GT;
18601 }
18602 break;
18603
18604 case V16QImode:
18605 case V8HImode:
18606 /* Perform a parallel unsigned saturating subtraction. */
18607 x = gen_reg_rtx (mode);
18608 emit_insn (gen_rtx_SET (VOIDmode, x,
18609 gen_rtx_US_MINUS (mode, cop0, cop1)));
18610
18611 cop0 = x;
18612 cop1 = CONST0_RTX (mode);
18613 code = EQ;
18614 negate = !negate;
18615 break;
18616
18617 default:
18618 gcc_unreachable ();
18619 }
18620 }
18621 }
18622
18623 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
18624 operands[1+negate], operands[2-negate]);
18625
18626 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
18627 operands[2-negate]);
18628 return true;
18629 }
18630
18631 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
18632 true if we should do zero extension, else sign extension. HIGH_P is
18633 true if we want the N/2 high elements, else the low elements. */
18634
18635 void
18636 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
18637 {
18638 enum machine_mode imode = GET_MODE (operands[1]);
18639 rtx tmp, dest;
18640
18641 if (TARGET_SSE4_1)
18642 {
18643 rtx (*unpack)(rtx, rtx);
18644
18645 switch (imode)
18646 {
18647 case V16QImode:
18648 if (unsigned_p)
18649 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
18650 else
18651 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
18652 break;
18653 case V8HImode:
18654 if (unsigned_p)
18655 unpack = gen_sse4_1_zero_extendv4hiv4si2;
18656 else
18657 unpack = gen_sse4_1_sign_extendv4hiv4si2;
18658 break;
18659 case V4SImode:
18660 if (unsigned_p)
18661 unpack = gen_sse4_1_zero_extendv2siv2di2;
18662 else
18663 unpack = gen_sse4_1_sign_extendv2siv2di2;
18664 break;
18665 default:
18666 gcc_unreachable ();
18667 }
18668
18669 if (high_p)
18670 {
18671 /* Shift higher 8 bytes to lower 8 bytes. */
18672 tmp = gen_reg_rtx (imode);
18673 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, tmp),
18674 gen_lowpart (V1TImode, operands[1]),
18675 GEN_INT (64)));
18676 }
18677 else
18678 tmp = operands[1];
18679
18680 emit_insn (unpack (operands[0], tmp));
18681 }
18682 else
18683 {
18684 rtx (*unpack)(rtx, rtx, rtx);
18685
18686 switch (imode)
18687 {
18688 case V16QImode:
18689 if (high_p)
18690 unpack = gen_vec_interleave_highv16qi;
18691 else
18692 unpack = gen_vec_interleave_lowv16qi;
18693 break;
18694 case V8HImode:
18695 if (high_p)
18696 unpack = gen_vec_interleave_highv8hi;
18697 else
18698 unpack = gen_vec_interleave_lowv8hi;
18699 break;
18700 case V4SImode:
18701 if (high_p)
18702 unpack = gen_vec_interleave_highv4si;
18703 else
18704 unpack = gen_vec_interleave_lowv4si;
18705 break;
18706 default:
18707 gcc_unreachable ();
18708 }
18709
18710 dest = gen_lowpart (imode, operands[0]);
18711
18712 if (unsigned_p)
18713 tmp = force_reg (imode, CONST0_RTX (imode));
18714 else
18715 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
18716 operands[1], pc_rtx, pc_rtx);
18717
18718 emit_insn (unpack (dest, operands[1], tmp));
18719 }
18720 }
18721
18722 /* Expand conditional increment or decrement using adb/sbb instructions.
18723 The default case using setcc followed by the conditional move can be
18724 done by generic code. */
18725 bool
18726 ix86_expand_int_addcc (rtx operands[])
18727 {
18728 enum rtx_code code = GET_CODE (operands[1]);
18729 rtx flags;
18730 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
18731 rtx compare_op;
18732 rtx val = const0_rtx;
18733 bool fpcmp = false;
18734 enum machine_mode mode;
18735 rtx op0 = XEXP (operands[1], 0);
18736 rtx op1 = XEXP (operands[1], 1);
18737
18738 if (operands[3] != const1_rtx
18739 && operands[3] != constm1_rtx)
18740 return false;
18741 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
18742 return false;
18743 code = GET_CODE (compare_op);
18744
18745 flags = XEXP (compare_op, 0);
18746
18747 if (GET_MODE (flags) == CCFPmode
18748 || GET_MODE (flags) == CCFPUmode)
18749 {
18750 fpcmp = true;
18751 code = ix86_fp_compare_code_to_integer (code);
18752 }
18753
18754 if (code != LTU)
18755 {
18756 val = constm1_rtx;
18757 if (fpcmp)
18758 PUT_CODE (compare_op,
18759 reverse_condition_maybe_unordered
18760 (GET_CODE (compare_op)));
18761 else
18762 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
18763 }
18764
18765 mode = GET_MODE (operands[0]);
18766
18767 /* Construct either adc or sbb insn. */
18768 if ((code == LTU) == (operands[3] == constm1_rtx))
18769 {
18770 switch (mode)
18771 {
18772 case QImode:
18773 insn = gen_subqi3_carry;
18774 break;
18775 case HImode:
18776 insn = gen_subhi3_carry;
18777 break;
18778 case SImode:
18779 insn = gen_subsi3_carry;
18780 break;
18781 case DImode:
18782 insn = gen_subdi3_carry;
18783 break;
18784 default:
18785 gcc_unreachable ();
18786 }
18787 }
18788 else
18789 {
18790 switch (mode)
18791 {
18792 case QImode:
18793 insn = gen_addqi3_carry;
18794 break;
18795 case HImode:
18796 insn = gen_addhi3_carry;
18797 break;
18798 case SImode:
18799 insn = gen_addsi3_carry;
18800 break;
18801 case DImode:
18802 insn = gen_adddi3_carry;
18803 break;
18804 default:
18805 gcc_unreachable ();
18806 }
18807 }
18808 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
18809
18810 return true;
18811 }
18812
18813
18814 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
18815 but works for floating pointer parameters and nonoffsetable memories.
18816 For pushes, it returns just stack offsets; the values will be saved
18817 in the right order. Maximally three parts are generated. */
18818
18819 static int
18820 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
18821 {
18822 int size;
18823
18824 if (!TARGET_64BIT)
18825 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
18826 else
18827 size = (GET_MODE_SIZE (mode) + 4) / 8;
18828
18829 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
18830 gcc_assert (size >= 2 && size <= 4);
18831
18832 /* Optimize constant pool reference to immediates. This is used by fp
18833 moves, that force all constants to memory to allow combining. */
18834 if (MEM_P (operand) && MEM_READONLY_P (operand))
18835 {
18836 rtx tmp = maybe_get_pool_constant (operand);
18837 if (tmp)
18838 operand = tmp;
18839 }
18840
18841 if (MEM_P (operand) && !offsettable_memref_p (operand))
18842 {
18843 /* The only non-offsetable memories we handle are pushes. */
18844 int ok = push_operand (operand, VOIDmode);
18845
18846 gcc_assert (ok);
18847
18848 operand = copy_rtx (operand);
18849 PUT_MODE (operand, Pmode);
18850 parts[0] = parts[1] = parts[2] = parts[3] = operand;
18851 return size;
18852 }
18853
18854 if (GET_CODE (operand) == CONST_VECTOR)
18855 {
18856 enum machine_mode imode = int_mode_for_mode (mode);
18857 /* Caution: if we looked through a constant pool memory above,
18858 the operand may actually have a different mode now. That's
18859 ok, since we want to pun this all the way back to an integer. */
18860 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
18861 gcc_assert (operand != NULL);
18862 mode = imode;
18863 }
18864
18865 if (!TARGET_64BIT)
18866 {
18867 if (mode == DImode)
18868 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
18869 else
18870 {
18871 int i;
18872
18873 if (REG_P (operand))
18874 {
18875 gcc_assert (reload_completed);
18876 for (i = 0; i < size; i++)
18877 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
18878 }
18879 else if (offsettable_memref_p (operand))
18880 {
18881 operand = adjust_address (operand, SImode, 0);
18882 parts[0] = operand;
18883 for (i = 1; i < size; i++)
18884 parts[i] = adjust_address (operand, SImode, 4 * i);
18885 }
18886 else if (GET_CODE (operand) == CONST_DOUBLE)
18887 {
18888 REAL_VALUE_TYPE r;
18889 long l[4];
18890
18891 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
18892 switch (mode)
18893 {
18894 case TFmode:
18895 real_to_target (l, &r, mode);
18896 parts[3] = gen_int_mode (l[3], SImode);
18897 parts[2] = gen_int_mode (l[2], SImode);
18898 break;
18899 case XFmode:
18900 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
18901 parts[2] = gen_int_mode (l[2], SImode);
18902 break;
18903 case DFmode:
18904 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
18905 break;
18906 default:
18907 gcc_unreachable ();
18908 }
18909 parts[1] = gen_int_mode (l[1], SImode);
18910 parts[0] = gen_int_mode (l[0], SImode);
18911 }
18912 else
18913 gcc_unreachable ();
18914 }
18915 }
18916 else
18917 {
18918 if (mode == TImode)
18919 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
18920 if (mode == XFmode || mode == TFmode)
18921 {
18922 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
18923 if (REG_P (operand))
18924 {
18925 gcc_assert (reload_completed);
18926 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
18927 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
18928 }
18929 else if (offsettable_memref_p (operand))
18930 {
18931 operand = adjust_address (operand, DImode, 0);
18932 parts[0] = operand;
18933 parts[1] = adjust_address (operand, upper_mode, 8);
18934 }
18935 else if (GET_CODE (operand) == CONST_DOUBLE)
18936 {
18937 REAL_VALUE_TYPE r;
18938 long l[4];
18939
18940 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
18941 real_to_target (l, &r, mode);
18942
18943 /* Do not use shift by 32 to avoid warning on 32bit systems. */
18944 if (HOST_BITS_PER_WIDE_INT >= 64)
18945 parts[0]
18946 = gen_int_mode
18947 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
18948 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
18949 DImode);
18950 else
18951 parts[0] = immed_double_const (l[0], l[1], DImode);
18952
18953 if (upper_mode == SImode)
18954 parts[1] = gen_int_mode (l[2], SImode);
18955 else if (HOST_BITS_PER_WIDE_INT >= 64)
18956 parts[1]
18957 = gen_int_mode
18958 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
18959 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
18960 DImode);
18961 else
18962 parts[1] = immed_double_const (l[2], l[3], DImode);
18963 }
18964 else
18965 gcc_unreachable ();
18966 }
18967 }
18968
18969 return size;
18970 }
18971
18972 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
18973 Return false when normal moves are needed; true when all required
18974 insns have been emitted. Operands 2-4 contain the input values
18975 int the correct order; operands 5-7 contain the output values. */
18976
18977 void
18978 ix86_split_long_move (rtx operands[])
18979 {
18980 rtx part[2][4];
18981 int nparts, i, j;
18982 int push = 0;
18983 int collisions = 0;
18984 enum machine_mode mode = GET_MODE (operands[0]);
18985 bool collisionparts[4];
18986
18987 /* The DFmode expanders may ask us to move double.
18988 For 64bit target this is single move. By hiding the fact
18989 here we simplify i386.md splitters. */
18990 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
18991 {
18992 /* Optimize constant pool reference to immediates. This is used by
18993 fp moves, that force all constants to memory to allow combining. */
18994
18995 if (MEM_P (operands[1])
18996 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
18997 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
18998 operands[1] = get_pool_constant (XEXP (operands[1], 0));
18999 if (push_operand (operands[0], VOIDmode))
19000 {
19001 operands[0] = copy_rtx (operands[0]);
19002 PUT_MODE (operands[0], Pmode);
19003 }
19004 else
19005 operands[0] = gen_lowpart (DImode, operands[0]);
19006 operands[1] = gen_lowpart (DImode, operands[1]);
19007 emit_move_insn (operands[0], operands[1]);
19008 return;
19009 }
19010
19011 /* The only non-offsettable memory we handle is push. */
19012 if (push_operand (operands[0], VOIDmode))
19013 push = 1;
19014 else
19015 gcc_assert (!MEM_P (operands[0])
19016 || offsettable_memref_p (operands[0]));
19017
19018 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
19019 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
19020
19021 /* When emitting push, take care for source operands on the stack. */
19022 if (push && MEM_P (operands[1])
19023 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
19024 {
19025 rtx src_base = XEXP (part[1][nparts - 1], 0);
19026
19027 /* Compensate for the stack decrement by 4. */
19028 if (!TARGET_64BIT && nparts == 3
19029 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
19030 src_base = plus_constant (src_base, 4);
19031
19032 /* src_base refers to the stack pointer and is
19033 automatically decreased by emitted push. */
19034 for (i = 0; i < nparts; i++)
19035 part[1][i] = change_address (part[1][i],
19036 GET_MODE (part[1][i]), src_base);
19037 }
19038
19039 /* We need to do copy in the right order in case an address register
19040 of the source overlaps the destination. */
19041 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
19042 {
19043 rtx tmp;
19044
19045 for (i = 0; i < nparts; i++)
19046 {
19047 collisionparts[i]
19048 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
19049 if (collisionparts[i])
19050 collisions++;
19051 }
19052
19053 /* Collision in the middle part can be handled by reordering. */
19054 if (collisions == 1 && nparts == 3 && collisionparts [1])
19055 {
19056 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
19057 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
19058 }
19059 else if (collisions == 1
19060 && nparts == 4
19061 && (collisionparts [1] || collisionparts [2]))
19062 {
19063 if (collisionparts [1])
19064 {
19065 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
19066 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
19067 }
19068 else
19069 {
19070 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
19071 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
19072 }
19073 }
19074
19075 /* If there are more collisions, we can't handle it by reordering.
19076 Do an lea to the last part and use only one colliding move. */
19077 else if (collisions > 1)
19078 {
19079 rtx base;
19080
19081 collisions = 1;
19082
19083 base = part[0][nparts - 1];
19084
19085 /* Handle the case when the last part isn't valid for lea.
19086 Happens in 64-bit mode storing the 12-byte XFmode. */
19087 if (GET_MODE (base) != Pmode)
19088 base = gen_rtx_REG (Pmode, REGNO (base));
19089
19090 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
19091 part[1][0] = replace_equiv_address (part[1][0], base);
19092 for (i = 1; i < nparts; i++)
19093 {
19094 tmp = plus_constant (base, UNITS_PER_WORD * i);
19095 part[1][i] = replace_equiv_address (part[1][i], tmp);
19096 }
19097 }
19098 }
19099
19100 if (push)
19101 {
19102 if (!TARGET_64BIT)
19103 {
19104 if (nparts == 3)
19105 {
19106 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
19107 emit_insn (gen_addsi3 (stack_pointer_rtx,
19108 stack_pointer_rtx, GEN_INT (-4)));
19109 emit_move_insn (part[0][2], part[1][2]);
19110 }
19111 else if (nparts == 4)
19112 {
19113 emit_move_insn (part[0][3], part[1][3]);
19114 emit_move_insn (part[0][2], part[1][2]);
19115 }
19116 }
19117 else
19118 {
19119 /* In 64bit mode we don't have 32bit push available. In case this is
19120 register, it is OK - we will just use larger counterpart. We also
19121 retype memory - these comes from attempt to avoid REX prefix on
19122 moving of second half of TFmode value. */
19123 if (GET_MODE (part[1][1]) == SImode)
19124 {
19125 switch (GET_CODE (part[1][1]))
19126 {
19127 case MEM:
19128 part[1][1] = adjust_address (part[1][1], DImode, 0);
19129 break;
19130
19131 case REG:
19132 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
19133 break;
19134
19135 default:
19136 gcc_unreachable ();
19137 }
19138
19139 if (GET_MODE (part[1][0]) == SImode)
19140 part[1][0] = part[1][1];
19141 }
19142 }
19143 emit_move_insn (part[0][1], part[1][1]);
19144 emit_move_insn (part[0][0], part[1][0]);
19145 return;
19146 }
19147
19148 /* Choose correct order to not overwrite the source before it is copied. */
19149 if ((REG_P (part[0][0])
19150 && REG_P (part[1][1])
19151 && (REGNO (part[0][0]) == REGNO (part[1][1])
19152 || (nparts == 3
19153 && REGNO (part[0][0]) == REGNO (part[1][2]))
19154 || (nparts == 4
19155 && REGNO (part[0][0]) == REGNO (part[1][3]))))
19156 || (collisions > 0
19157 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
19158 {
19159 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
19160 {
19161 operands[2 + i] = part[0][j];
19162 operands[6 + i] = part[1][j];
19163 }
19164 }
19165 else
19166 {
19167 for (i = 0; i < nparts; i++)
19168 {
19169 operands[2 + i] = part[0][i];
19170 operands[6 + i] = part[1][i];
19171 }
19172 }
19173
19174 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
19175 if (optimize_insn_for_size_p ())
19176 {
19177 for (j = 0; j < nparts - 1; j++)
19178 if (CONST_INT_P (operands[6 + j])
19179 && operands[6 + j] != const0_rtx
19180 && REG_P (operands[2 + j]))
19181 for (i = j; i < nparts - 1; i++)
19182 if (CONST_INT_P (operands[7 + i])
19183 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
19184 operands[7 + i] = operands[2 + j];
19185 }
19186
19187 for (i = 0; i < nparts; i++)
19188 emit_move_insn (operands[2 + i], operands[6 + i]);
19189
19190 return;
19191 }
19192
19193 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
19194 left shift by a constant, either using a single shift or
19195 a sequence of add instructions. */
19196
19197 static void
19198 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
19199 {
19200 rtx (*insn)(rtx, rtx, rtx);
19201
19202 if (count == 1
19203 || (count * ix86_cost->add <= ix86_cost->shift_const
19204 && !optimize_insn_for_size_p ()))
19205 {
19206 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
19207 while (count-- > 0)
19208 emit_insn (insn (operand, operand, operand));
19209 }
19210 else
19211 {
19212 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
19213 emit_insn (insn (operand, operand, GEN_INT (count)));
19214 }
19215 }
19216
19217 void
19218 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
19219 {
19220 rtx (*gen_ashl3)(rtx, rtx, rtx);
19221 rtx (*gen_shld)(rtx, rtx, rtx);
19222 int half_width = GET_MODE_BITSIZE (mode) >> 1;
19223
19224 rtx low[2], high[2];
19225 int count;
19226
19227 if (CONST_INT_P (operands[2]))
19228 {
19229 split_double_mode (mode, operands, 2, low, high);
19230 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
19231
19232 if (count >= half_width)
19233 {
19234 emit_move_insn (high[0], low[1]);
19235 emit_move_insn (low[0], const0_rtx);
19236
19237 if (count > half_width)
19238 ix86_expand_ashl_const (high[0], count - half_width, mode);
19239 }
19240 else
19241 {
19242 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
19243
19244 if (!rtx_equal_p (operands[0], operands[1]))
19245 emit_move_insn (operands[0], operands[1]);
19246
19247 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
19248 ix86_expand_ashl_const (low[0], count, mode);
19249 }
19250 return;
19251 }
19252
19253 split_double_mode (mode, operands, 1, low, high);
19254
19255 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
19256
19257 if (operands[1] == const1_rtx)
19258 {
19259 /* Assuming we've chosen a QImode capable registers, then 1 << N
19260 can be done with two 32/64-bit shifts, no branches, no cmoves. */
19261 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
19262 {
19263 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
19264
19265 ix86_expand_clear (low[0]);
19266 ix86_expand_clear (high[0]);
19267 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
19268
19269 d = gen_lowpart (QImode, low[0]);
19270 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
19271 s = gen_rtx_EQ (QImode, flags, const0_rtx);
19272 emit_insn (gen_rtx_SET (VOIDmode, d, s));
19273
19274 d = gen_lowpart (QImode, high[0]);
19275 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
19276 s = gen_rtx_NE (QImode, flags, const0_rtx);
19277 emit_insn (gen_rtx_SET (VOIDmode, d, s));
19278 }
19279
19280 /* Otherwise, we can get the same results by manually performing
19281 a bit extract operation on bit 5/6, and then performing the two
19282 shifts. The two methods of getting 0/1 into low/high are exactly
19283 the same size. Avoiding the shift in the bit extract case helps
19284 pentium4 a bit; no one else seems to care much either way. */
19285 else
19286 {
19287 enum machine_mode half_mode;
19288 rtx (*gen_lshr3)(rtx, rtx, rtx);
19289 rtx (*gen_and3)(rtx, rtx, rtx);
19290 rtx (*gen_xor3)(rtx, rtx, rtx);
19291 HOST_WIDE_INT bits;
19292 rtx x;
19293
19294 if (mode == DImode)
19295 {
19296 half_mode = SImode;
19297 gen_lshr3 = gen_lshrsi3;
19298 gen_and3 = gen_andsi3;
19299 gen_xor3 = gen_xorsi3;
19300 bits = 5;
19301 }
19302 else
19303 {
19304 half_mode = DImode;
19305 gen_lshr3 = gen_lshrdi3;
19306 gen_and3 = gen_anddi3;
19307 gen_xor3 = gen_xordi3;
19308 bits = 6;
19309 }
19310
19311 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
19312 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
19313 else
19314 x = gen_lowpart (half_mode, operands[2]);
19315 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
19316
19317 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
19318 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
19319 emit_move_insn (low[0], high[0]);
19320 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
19321 }
19322
19323 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
19324 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
19325 return;
19326 }
19327
19328 if (operands[1] == constm1_rtx)
19329 {
19330 /* For -1 << N, we can avoid the shld instruction, because we
19331 know that we're shifting 0...31/63 ones into a -1. */
19332 emit_move_insn (low[0], constm1_rtx);
19333 if (optimize_insn_for_size_p ())
19334 emit_move_insn (high[0], low[0]);
19335 else
19336 emit_move_insn (high[0], constm1_rtx);
19337 }
19338 else
19339 {
19340 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
19341
19342 if (!rtx_equal_p (operands[0], operands[1]))
19343 emit_move_insn (operands[0], operands[1]);
19344
19345 split_double_mode (mode, operands, 1, low, high);
19346 emit_insn (gen_shld (high[0], low[0], operands[2]));
19347 }
19348
19349 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
19350
19351 if (TARGET_CMOVE && scratch)
19352 {
19353 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
19354 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
19355
19356 ix86_expand_clear (scratch);
19357 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
19358 }
19359 else
19360 {
19361 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
19362 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
19363
19364 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
19365 }
19366 }
19367
19368 void
19369 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
19370 {
19371 rtx (*gen_ashr3)(rtx, rtx, rtx)
19372 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
19373 rtx (*gen_shrd)(rtx, rtx, rtx);
19374 int half_width = GET_MODE_BITSIZE (mode) >> 1;
19375
19376 rtx low[2], high[2];
19377 int count;
19378
19379 if (CONST_INT_P (operands[2]))
19380 {
19381 split_double_mode (mode, operands, 2, low, high);
19382 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
19383
19384 if (count == GET_MODE_BITSIZE (mode) - 1)
19385 {
19386 emit_move_insn (high[0], high[1]);
19387 emit_insn (gen_ashr3 (high[0], high[0],
19388 GEN_INT (half_width - 1)));
19389 emit_move_insn (low[0], high[0]);
19390
19391 }
19392 else if (count >= half_width)
19393 {
19394 emit_move_insn (low[0], high[1]);
19395 emit_move_insn (high[0], low[0]);
19396 emit_insn (gen_ashr3 (high[0], high[0],
19397 GEN_INT (half_width - 1)));
19398
19399 if (count > half_width)
19400 emit_insn (gen_ashr3 (low[0], low[0],
19401 GEN_INT (count - half_width)));
19402 }
19403 else
19404 {
19405 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19406
19407 if (!rtx_equal_p (operands[0], operands[1]))
19408 emit_move_insn (operands[0], operands[1]);
19409
19410 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
19411 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
19412 }
19413 }
19414 else
19415 {
19416 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19417
19418 if (!rtx_equal_p (operands[0], operands[1]))
19419 emit_move_insn (operands[0], operands[1]);
19420
19421 split_double_mode (mode, operands, 1, low, high);
19422
19423 emit_insn (gen_shrd (low[0], high[0], operands[2]));
19424 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
19425
19426 if (TARGET_CMOVE && scratch)
19427 {
19428 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
19429 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
19430
19431 emit_move_insn (scratch, high[0]);
19432 emit_insn (gen_ashr3 (scratch, scratch,
19433 GEN_INT (half_width - 1)));
19434 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
19435 scratch));
19436 }
19437 else
19438 {
19439 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
19440 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
19441
19442 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
19443 }
19444 }
19445 }
19446
19447 void
19448 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
19449 {
19450 rtx (*gen_lshr3)(rtx, rtx, rtx)
19451 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
19452 rtx (*gen_shrd)(rtx, rtx, rtx);
19453 int half_width = GET_MODE_BITSIZE (mode) >> 1;
19454
19455 rtx low[2], high[2];
19456 int count;
19457
19458 if (CONST_INT_P (operands[2]))
19459 {
19460 split_double_mode (mode, operands, 2, low, high);
19461 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
19462
19463 if (count >= half_width)
19464 {
19465 emit_move_insn (low[0], high[1]);
19466 ix86_expand_clear (high[0]);
19467
19468 if (count > half_width)
19469 emit_insn (gen_lshr3 (low[0], low[0],
19470 GEN_INT (count - half_width)));
19471 }
19472 else
19473 {
19474 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19475
19476 if (!rtx_equal_p (operands[0], operands[1]))
19477 emit_move_insn (operands[0], operands[1]);
19478
19479 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
19480 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
19481 }
19482 }
19483 else
19484 {
19485 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19486
19487 if (!rtx_equal_p (operands[0], operands[1]))
19488 emit_move_insn (operands[0], operands[1]);
19489
19490 split_double_mode (mode, operands, 1, low, high);
19491
19492 emit_insn (gen_shrd (low[0], high[0], operands[2]));
19493 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
19494
19495 if (TARGET_CMOVE && scratch)
19496 {
19497 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
19498 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
19499
19500 ix86_expand_clear (scratch);
19501 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
19502 scratch));
19503 }
19504 else
19505 {
19506 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
19507 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
19508
19509 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
19510 }
19511 }
19512 }
19513
19514 /* Predict just emitted jump instruction to be taken with probability PROB. */
19515 static void
19516 predict_jump (int prob)
19517 {
19518 rtx insn = get_last_insn ();
19519 gcc_assert (JUMP_P (insn));
19520 add_reg_note (insn, REG_BR_PROB, GEN_INT (prob));
19521 }
19522
19523 /* Helper function for the string operations below. Dest VARIABLE whether
19524 it is aligned to VALUE bytes. If true, jump to the label. */
19525 static rtx
19526 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
19527 {
19528 rtx label = gen_label_rtx ();
19529 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
19530 if (GET_MODE (variable) == DImode)
19531 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
19532 else
19533 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
19534 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
19535 1, label);
19536 if (epilogue)
19537 predict_jump (REG_BR_PROB_BASE * 50 / 100);
19538 else
19539 predict_jump (REG_BR_PROB_BASE * 90 / 100);
19540 return label;
19541 }
19542
19543 /* Adjust COUNTER by the VALUE. */
19544 static void
19545 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
19546 {
19547 rtx (*gen_add)(rtx, rtx, rtx)
19548 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
19549
19550 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
19551 }
19552
19553 /* Zero extend possibly SImode EXP to Pmode register. */
19554 rtx
19555 ix86_zero_extend_to_Pmode (rtx exp)
19556 {
19557 rtx r;
19558 if (GET_MODE (exp) == VOIDmode)
19559 return force_reg (Pmode, exp);
19560 if (GET_MODE (exp) == Pmode)
19561 return copy_to_mode_reg (Pmode, exp);
19562 r = gen_reg_rtx (Pmode);
19563 emit_insn (gen_zero_extendsidi2 (r, exp));
19564 return r;
19565 }
19566
19567 /* Divide COUNTREG by SCALE. */
19568 static rtx
19569 scale_counter (rtx countreg, int scale)
19570 {
19571 rtx sc;
19572
19573 if (scale == 1)
19574 return countreg;
19575 if (CONST_INT_P (countreg))
19576 return GEN_INT (INTVAL (countreg) / scale);
19577 gcc_assert (REG_P (countreg));
19578
19579 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
19580 GEN_INT (exact_log2 (scale)),
19581 NULL, 1, OPTAB_DIRECT);
19582 return sc;
19583 }
19584
19585 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
19586 DImode for constant loop counts. */
19587
19588 static enum machine_mode
19589 counter_mode (rtx count_exp)
19590 {
19591 if (GET_MODE (count_exp) != VOIDmode)
19592 return GET_MODE (count_exp);
19593 if (!CONST_INT_P (count_exp))
19594 return Pmode;
19595 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
19596 return DImode;
19597 return SImode;
19598 }
19599
19600 /* When SRCPTR is non-NULL, output simple loop to move memory
19601 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
19602 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
19603 equivalent loop to set memory by VALUE (supposed to be in MODE).
19604
19605 The size is rounded down to whole number of chunk size moved at once.
19606 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
19607
19608
19609 static void
19610 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
19611 rtx destptr, rtx srcptr, rtx value,
19612 rtx count, enum machine_mode mode, int unroll,
19613 int expected_size)
19614 {
19615 rtx out_label, top_label, iter, tmp;
19616 enum machine_mode iter_mode = counter_mode (count);
19617 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
19618 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
19619 rtx size;
19620 rtx x_addr;
19621 rtx y_addr;
19622 int i;
19623
19624 top_label = gen_label_rtx ();
19625 out_label = gen_label_rtx ();
19626 iter = gen_reg_rtx (iter_mode);
19627
19628 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
19629 NULL, 1, OPTAB_DIRECT);
19630 /* Those two should combine. */
19631 if (piece_size == const1_rtx)
19632 {
19633 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
19634 true, out_label);
19635 predict_jump (REG_BR_PROB_BASE * 10 / 100);
19636 }
19637 emit_move_insn (iter, const0_rtx);
19638
19639 emit_label (top_label);
19640
19641 tmp = convert_modes (Pmode, iter_mode, iter, true);
19642 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
19643 destmem = change_address (destmem, mode, x_addr);
19644
19645 if (srcmem)
19646 {
19647 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
19648 srcmem = change_address (srcmem, mode, y_addr);
19649
19650 /* When unrolling for chips that reorder memory reads and writes,
19651 we can save registers by using single temporary.
19652 Also using 4 temporaries is overkill in 32bit mode. */
19653 if (!TARGET_64BIT && 0)
19654 {
19655 for (i = 0; i < unroll; i++)
19656 {
19657 if (i)
19658 {
19659 destmem =
19660 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
19661 srcmem =
19662 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
19663 }
19664 emit_move_insn (destmem, srcmem);
19665 }
19666 }
19667 else
19668 {
19669 rtx tmpreg[4];
19670 gcc_assert (unroll <= 4);
19671 for (i = 0; i < unroll; i++)
19672 {
19673 tmpreg[i] = gen_reg_rtx (mode);
19674 if (i)
19675 {
19676 srcmem =
19677 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
19678 }
19679 emit_move_insn (tmpreg[i], srcmem);
19680 }
19681 for (i = 0; i < unroll; i++)
19682 {
19683 if (i)
19684 {
19685 destmem =
19686 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
19687 }
19688 emit_move_insn (destmem, tmpreg[i]);
19689 }
19690 }
19691 }
19692 else
19693 for (i = 0; i < unroll; i++)
19694 {
19695 if (i)
19696 destmem =
19697 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
19698 emit_move_insn (destmem, value);
19699 }
19700
19701 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
19702 true, OPTAB_LIB_WIDEN);
19703 if (tmp != iter)
19704 emit_move_insn (iter, tmp);
19705
19706 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
19707 true, top_label);
19708 if (expected_size != -1)
19709 {
19710 expected_size /= GET_MODE_SIZE (mode) * unroll;
19711 if (expected_size == 0)
19712 predict_jump (0);
19713 else if (expected_size > REG_BR_PROB_BASE)
19714 predict_jump (REG_BR_PROB_BASE - 1);
19715 else
19716 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
19717 }
19718 else
19719 predict_jump (REG_BR_PROB_BASE * 80 / 100);
19720 iter = ix86_zero_extend_to_Pmode (iter);
19721 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
19722 true, OPTAB_LIB_WIDEN);
19723 if (tmp != destptr)
19724 emit_move_insn (destptr, tmp);
19725 if (srcptr)
19726 {
19727 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
19728 true, OPTAB_LIB_WIDEN);
19729 if (tmp != srcptr)
19730 emit_move_insn (srcptr, tmp);
19731 }
19732 emit_label (out_label);
19733 }
19734
19735 /* Output "rep; mov" instruction.
19736 Arguments have same meaning as for previous function */
19737 static void
19738 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
19739 rtx destptr, rtx srcptr,
19740 rtx count,
19741 enum machine_mode mode)
19742 {
19743 rtx destexp;
19744 rtx srcexp;
19745 rtx countreg;
19746 HOST_WIDE_INT rounded_count;
19747
19748 /* If the size is known, it is shorter to use rep movs. */
19749 if (mode == QImode && CONST_INT_P (count)
19750 && !(INTVAL (count) & 3))
19751 mode = SImode;
19752
19753 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
19754 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
19755 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
19756 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
19757 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
19758 if (mode != QImode)
19759 {
19760 destexp = gen_rtx_ASHIFT (Pmode, countreg,
19761 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
19762 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
19763 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
19764 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
19765 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
19766 }
19767 else
19768 {
19769 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
19770 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
19771 }
19772 if (CONST_INT_P (count))
19773 {
19774 rounded_count = (INTVAL (count)
19775 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
19776 destmem = shallow_copy_rtx (destmem);
19777 srcmem = shallow_copy_rtx (srcmem);
19778 set_mem_size (destmem, rounded_count);
19779 set_mem_size (srcmem, rounded_count);
19780 }
19781 else
19782 {
19783 if (MEM_SIZE_KNOWN_P (destmem))
19784 clear_mem_size (destmem);
19785 if (MEM_SIZE_KNOWN_P (srcmem))
19786 clear_mem_size (srcmem);
19787 }
19788 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
19789 destexp, srcexp));
19790 }
19791
19792 /* Output "rep; stos" instruction.
19793 Arguments have same meaning as for previous function */
19794 static void
19795 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
19796 rtx count, enum machine_mode mode,
19797 rtx orig_value)
19798 {
19799 rtx destexp;
19800 rtx countreg;
19801 HOST_WIDE_INT rounded_count;
19802
19803 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
19804 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
19805 value = force_reg (mode, gen_lowpart (mode, value));
19806 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
19807 if (mode != QImode)
19808 {
19809 destexp = gen_rtx_ASHIFT (Pmode, countreg,
19810 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
19811 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
19812 }
19813 else
19814 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
19815 if (orig_value == const0_rtx && CONST_INT_P (count))
19816 {
19817 rounded_count = (INTVAL (count)
19818 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
19819 destmem = shallow_copy_rtx (destmem);
19820 set_mem_size (destmem, rounded_count);
19821 }
19822 else if (MEM_SIZE_KNOWN_P (destmem))
19823 clear_mem_size (destmem);
19824 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
19825 }
19826
19827 static void
19828 emit_strmov (rtx destmem, rtx srcmem,
19829 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
19830 {
19831 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
19832 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
19833 emit_insn (gen_strmov (destptr, dest, srcptr, src));
19834 }
19835
19836 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
19837 static void
19838 expand_movmem_epilogue (rtx destmem, rtx srcmem,
19839 rtx destptr, rtx srcptr, rtx count, int max_size)
19840 {
19841 rtx src, dest;
19842 if (CONST_INT_P (count))
19843 {
19844 HOST_WIDE_INT countval = INTVAL (count);
19845 int offset = 0;
19846
19847 if ((countval & 0x10) && max_size > 16)
19848 {
19849 if (TARGET_64BIT)
19850 {
19851 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
19852 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
19853 }
19854 else
19855 gcc_unreachable ();
19856 offset += 16;
19857 }
19858 if ((countval & 0x08) && max_size > 8)
19859 {
19860 if (TARGET_64BIT)
19861 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
19862 else
19863 {
19864 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
19865 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
19866 }
19867 offset += 8;
19868 }
19869 if ((countval & 0x04) && max_size > 4)
19870 {
19871 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
19872 offset += 4;
19873 }
19874 if ((countval & 0x02) && max_size > 2)
19875 {
19876 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
19877 offset += 2;
19878 }
19879 if ((countval & 0x01) && max_size > 1)
19880 {
19881 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
19882 offset += 1;
19883 }
19884 return;
19885 }
19886 if (max_size > 8)
19887 {
19888 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
19889 count, 1, OPTAB_DIRECT);
19890 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
19891 count, QImode, 1, 4);
19892 return;
19893 }
19894
19895 /* When there are stringops, we can cheaply increase dest and src pointers.
19896 Otherwise we save code size by maintaining offset (zero is readily
19897 available from preceding rep operation) and using x86 addressing modes.
19898 */
19899 if (TARGET_SINGLE_STRINGOP)
19900 {
19901 if (max_size > 4)
19902 {
19903 rtx label = ix86_expand_aligntest (count, 4, true);
19904 src = change_address (srcmem, SImode, srcptr);
19905 dest = change_address (destmem, SImode, destptr);
19906 emit_insn (gen_strmov (destptr, dest, srcptr, src));
19907 emit_label (label);
19908 LABEL_NUSES (label) = 1;
19909 }
19910 if (max_size > 2)
19911 {
19912 rtx label = ix86_expand_aligntest (count, 2, true);
19913 src = change_address (srcmem, HImode, srcptr);
19914 dest = change_address (destmem, HImode, destptr);
19915 emit_insn (gen_strmov (destptr, dest, srcptr, src));
19916 emit_label (label);
19917 LABEL_NUSES (label) = 1;
19918 }
19919 if (max_size > 1)
19920 {
19921 rtx label = ix86_expand_aligntest (count, 1, true);
19922 src = change_address (srcmem, QImode, srcptr);
19923 dest = change_address (destmem, QImode, destptr);
19924 emit_insn (gen_strmov (destptr, dest, srcptr, src));
19925 emit_label (label);
19926 LABEL_NUSES (label) = 1;
19927 }
19928 }
19929 else
19930 {
19931 rtx offset = force_reg (Pmode, const0_rtx);
19932 rtx tmp;
19933
19934 if (max_size > 4)
19935 {
19936 rtx label = ix86_expand_aligntest (count, 4, true);
19937 src = change_address (srcmem, SImode, srcptr);
19938 dest = change_address (destmem, SImode, destptr);
19939 emit_move_insn (dest, src);
19940 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
19941 true, OPTAB_LIB_WIDEN);
19942 if (tmp != offset)
19943 emit_move_insn (offset, tmp);
19944 emit_label (label);
19945 LABEL_NUSES (label) = 1;
19946 }
19947 if (max_size > 2)
19948 {
19949 rtx label = ix86_expand_aligntest (count, 2, true);
19950 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
19951 src = change_address (srcmem, HImode, tmp);
19952 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
19953 dest = change_address (destmem, HImode, tmp);
19954 emit_move_insn (dest, src);
19955 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
19956 true, OPTAB_LIB_WIDEN);
19957 if (tmp != offset)
19958 emit_move_insn (offset, tmp);
19959 emit_label (label);
19960 LABEL_NUSES (label) = 1;
19961 }
19962 if (max_size > 1)
19963 {
19964 rtx label = ix86_expand_aligntest (count, 1, true);
19965 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
19966 src = change_address (srcmem, QImode, tmp);
19967 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
19968 dest = change_address (destmem, QImode, tmp);
19969 emit_move_insn (dest, src);
19970 emit_label (label);
19971 LABEL_NUSES (label) = 1;
19972 }
19973 }
19974 }
19975
19976 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
19977 static void
19978 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
19979 rtx count, int max_size)
19980 {
19981 count =
19982 expand_simple_binop (counter_mode (count), AND, count,
19983 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
19984 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
19985 gen_lowpart (QImode, value), count, QImode,
19986 1, max_size / 2);
19987 }
19988
19989 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
19990 static void
19991 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
19992 {
19993 rtx dest;
19994
19995 if (CONST_INT_P (count))
19996 {
19997 HOST_WIDE_INT countval = INTVAL (count);
19998 int offset = 0;
19999
20000 if ((countval & 0x10) && max_size > 16)
20001 {
20002 if (TARGET_64BIT)
20003 {
20004 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
20005 emit_insn (gen_strset (destptr, dest, value));
20006 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
20007 emit_insn (gen_strset (destptr, dest, value));
20008 }
20009 else
20010 gcc_unreachable ();
20011 offset += 16;
20012 }
20013 if ((countval & 0x08) && max_size > 8)
20014 {
20015 if (TARGET_64BIT)
20016 {
20017 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
20018 emit_insn (gen_strset (destptr, dest, value));
20019 }
20020 else
20021 {
20022 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
20023 emit_insn (gen_strset (destptr, dest, value));
20024 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
20025 emit_insn (gen_strset (destptr, dest, value));
20026 }
20027 offset += 8;
20028 }
20029 if ((countval & 0x04) && max_size > 4)
20030 {
20031 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
20032 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
20033 offset += 4;
20034 }
20035 if ((countval & 0x02) && max_size > 2)
20036 {
20037 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
20038 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
20039 offset += 2;
20040 }
20041 if ((countval & 0x01) && max_size > 1)
20042 {
20043 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
20044 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
20045 offset += 1;
20046 }
20047 return;
20048 }
20049 if (max_size > 32)
20050 {
20051 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
20052 return;
20053 }
20054 if (max_size > 16)
20055 {
20056 rtx label = ix86_expand_aligntest (count, 16, true);
20057 if (TARGET_64BIT)
20058 {
20059 dest = change_address (destmem, DImode, destptr);
20060 emit_insn (gen_strset (destptr, dest, value));
20061 emit_insn (gen_strset (destptr, dest, value));
20062 }
20063 else
20064 {
20065 dest = change_address (destmem, SImode, destptr);
20066 emit_insn (gen_strset (destptr, dest, value));
20067 emit_insn (gen_strset (destptr, dest, value));
20068 emit_insn (gen_strset (destptr, dest, value));
20069 emit_insn (gen_strset (destptr, dest, value));
20070 }
20071 emit_label (label);
20072 LABEL_NUSES (label) = 1;
20073 }
20074 if (max_size > 8)
20075 {
20076 rtx label = ix86_expand_aligntest (count, 8, true);
20077 if (TARGET_64BIT)
20078 {
20079 dest = change_address (destmem, DImode, destptr);
20080 emit_insn (gen_strset (destptr, dest, value));
20081 }
20082 else
20083 {
20084 dest = change_address (destmem, SImode, destptr);
20085 emit_insn (gen_strset (destptr, dest, value));
20086 emit_insn (gen_strset (destptr, dest, value));
20087 }
20088 emit_label (label);
20089 LABEL_NUSES (label) = 1;
20090 }
20091 if (max_size > 4)
20092 {
20093 rtx label = ix86_expand_aligntest (count, 4, true);
20094 dest = change_address (destmem, SImode, destptr);
20095 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
20096 emit_label (label);
20097 LABEL_NUSES (label) = 1;
20098 }
20099 if (max_size > 2)
20100 {
20101 rtx label = ix86_expand_aligntest (count, 2, true);
20102 dest = change_address (destmem, HImode, destptr);
20103 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
20104 emit_label (label);
20105 LABEL_NUSES (label) = 1;
20106 }
20107 if (max_size > 1)
20108 {
20109 rtx label = ix86_expand_aligntest (count, 1, true);
20110 dest = change_address (destmem, QImode, destptr);
20111 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
20112 emit_label (label);
20113 LABEL_NUSES (label) = 1;
20114 }
20115 }
20116
20117 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
20118 DESIRED_ALIGNMENT. */
20119 static void
20120 expand_movmem_prologue (rtx destmem, rtx srcmem,
20121 rtx destptr, rtx srcptr, rtx count,
20122 int align, int desired_alignment)
20123 {
20124 if (align <= 1 && desired_alignment > 1)
20125 {
20126 rtx label = ix86_expand_aligntest (destptr, 1, false);
20127 srcmem = change_address (srcmem, QImode, srcptr);
20128 destmem = change_address (destmem, QImode, destptr);
20129 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
20130 ix86_adjust_counter (count, 1);
20131 emit_label (label);
20132 LABEL_NUSES (label) = 1;
20133 }
20134 if (align <= 2 && desired_alignment > 2)
20135 {
20136 rtx label = ix86_expand_aligntest (destptr, 2, false);
20137 srcmem = change_address (srcmem, HImode, srcptr);
20138 destmem = change_address (destmem, HImode, destptr);
20139 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
20140 ix86_adjust_counter (count, 2);
20141 emit_label (label);
20142 LABEL_NUSES (label) = 1;
20143 }
20144 if (align <= 4 && desired_alignment > 4)
20145 {
20146 rtx label = ix86_expand_aligntest (destptr, 4, false);
20147 srcmem = change_address (srcmem, SImode, srcptr);
20148 destmem = change_address (destmem, SImode, destptr);
20149 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
20150 ix86_adjust_counter (count, 4);
20151 emit_label (label);
20152 LABEL_NUSES (label) = 1;
20153 }
20154 gcc_assert (desired_alignment <= 8);
20155 }
20156
20157 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
20158 ALIGN_BYTES is how many bytes need to be copied. */
20159 static rtx
20160 expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
20161 int desired_align, int align_bytes)
20162 {
20163 rtx src = *srcp;
20164 rtx orig_dst = dst;
20165 rtx orig_src = src;
20166 int off = 0;
20167 int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
20168 if (src_align_bytes >= 0)
20169 src_align_bytes = desired_align - src_align_bytes;
20170 if (align_bytes & 1)
20171 {
20172 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
20173 src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
20174 off = 1;
20175 emit_insn (gen_strmov (destreg, dst, srcreg, src));
20176 }
20177 if (align_bytes & 2)
20178 {
20179 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
20180 src = adjust_automodify_address_nv (src, HImode, srcreg, off);
20181 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
20182 set_mem_align (dst, 2 * BITS_PER_UNIT);
20183 if (src_align_bytes >= 0
20184 && (src_align_bytes & 1) == (align_bytes & 1)
20185 && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
20186 set_mem_align (src, 2 * BITS_PER_UNIT);
20187 off = 2;
20188 emit_insn (gen_strmov (destreg, dst, srcreg, src));
20189 }
20190 if (align_bytes & 4)
20191 {
20192 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
20193 src = adjust_automodify_address_nv (src, SImode, srcreg, off);
20194 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
20195 set_mem_align (dst, 4 * BITS_PER_UNIT);
20196 if (src_align_bytes >= 0)
20197 {
20198 unsigned int src_align = 0;
20199 if ((src_align_bytes & 3) == (align_bytes & 3))
20200 src_align = 4;
20201 else if ((src_align_bytes & 1) == (align_bytes & 1))
20202 src_align = 2;
20203 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
20204 set_mem_align (src, src_align * BITS_PER_UNIT);
20205 }
20206 off = 4;
20207 emit_insn (gen_strmov (destreg, dst, srcreg, src));
20208 }
20209 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
20210 src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
20211 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
20212 set_mem_align (dst, desired_align * BITS_PER_UNIT);
20213 if (src_align_bytes >= 0)
20214 {
20215 unsigned int src_align = 0;
20216 if ((src_align_bytes & 7) == (align_bytes & 7))
20217 src_align = 8;
20218 else if ((src_align_bytes & 3) == (align_bytes & 3))
20219 src_align = 4;
20220 else if ((src_align_bytes & 1) == (align_bytes & 1))
20221 src_align = 2;
20222 if (src_align > (unsigned int) desired_align)
20223 src_align = desired_align;
20224 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
20225 set_mem_align (src, src_align * BITS_PER_UNIT);
20226 }
20227 if (MEM_SIZE_KNOWN_P (orig_dst))
20228 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
20229 if (MEM_SIZE_KNOWN_P (orig_src))
20230 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
20231 *srcp = src;
20232 return dst;
20233 }
20234
20235 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
20236 DESIRED_ALIGNMENT. */
20237 static void
20238 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
20239 int align, int desired_alignment)
20240 {
20241 if (align <= 1 && desired_alignment > 1)
20242 {
20243 rtx label = ix86_expand_aligntest (destptr, 1, false);
20244 destmem = change_address (destmem, QImode, destptr);
20245 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
20246 ix86_adjust_counter (count, 1);
20247 emit_label (label);
20248 LABEL_NUSES (label) = 1;
20249 }
20250 if (align <= 2 && desired_alignment > 2)
20251 {
20252 rtx label = ix86_expand_aligntest (destptr, 2, false);
20253 destmem = change_address (destmem, HImode, destptr);
20254 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
20255 ix86_adjust_counter (count, 2);
20256 emit_label (label);
20257 LABEL_NUSES (label) = 1;
20258 }
20259 if (align <= 4 && desired_alignment > 4)
20260 {
20261 rtx label = ix86_expand_aligntest (destptr, 4, false);
20262 destmem = change_address (destmem, SImode, destptr);
20263 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
20264 ix86_adjust_counter (count, 4);
20265 emit_label (label);
20266 LABEL_NUSES (label) = 1;
20267 }
20268 gcc_assert (desired_alignment <= 8);
20269 }
20270
20271 /* Set enough from DST to align DST known to by aligned by ALIGN to
20272 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
20273 static rtx
20274 expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
20275 int desired_align, int align_bytes)
20276 {
20277 int off = 0;
20278 rtx orig_dst = dst;
20279 if (align_bytes & 1)
20280 {
20281 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
20282 off = 1;
20283 emit_insn (gen_strset (destreg, dst,
20284 gen_lowpart (QImode, value)));
20285 }
20286 if (align_bytes & 2)
20287 {
20288 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
20289 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
20290 set_mem_align (dst, 2 * BITS_PER_UNIT);
20291 off = 2;
20292 emit_insn (gen_strset (destreg, dst,
20293 gen_lowpart (HImode, value)));
20294 }
20295 if (align_bytes & 4)
20296 {
20297 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
20298 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
20299 set_mem_align (dst, 4 * BITS_PER_UNIT);
20300 off = 4;
20301 emit_insn (gen_strset (destreg, dst,
20302 gen_lowpart (SImode, value)));
20303 }
20304 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
20305 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
20306 set_mem_align (dst, desired_align * BITS_PER_UNIT);
20307 if (MEM_SIZE_KNOWN_P (orig_dst))
20308 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
20309 return dst;
20310 }
20311
20312 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
20313 static enum stringop_alg
20314 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
20315 int *dynamic_check)
20316 {
20317 const struct stringop_algs * algs;
20318 bool optimize_for_speed;
20319 /* Algorithms using the rep prefix want at least edi and ecx;
20320 additionally, memset wants eax and memcpy wants esi. Don't
20321 consider such algorithms if the user has appropriated those
20322 registers for their own purposes. */
20323 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
20324 || (memset
20325 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
20326
20327 #define ALG_USABLE_P(alg) (rep_prefix_usable \
20328 || (alg != rep_prefix_1_byte \
20329 && alg != rep_prefix_4_byte \
20330 && alg != rep_prefix_8_byte))
20331 const struct processor_costs *cost;
20332
20333 /* Even if the string operation call is cold, we still might spend a lot
20334 of time processing large blocks. */
20335 if (optimize_function_for_size_p (cfun)
20336 || (optimize_insn_for_size_p ()
20337 && expected_size != -1 && expected_size < 256))
20338 optimize_for_speed = false;
20339 else
20340 optimize_for_speed = true;
20341
20342 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
20343
20344 *dynamic_check = -1;
20345 if (memset)
20346 algs = &cost->memset[TARGET_64BIT != 0];
20347 else
20348 algs = &cost->memcpy[TARGET_64BIT != 0];
20349 if (ix86_stringop_alg != no_stringop && ALG_USABLE_P (ix86_stringop_alg))
20350 return ix86_stringop_alg;
20351 /* rep; movq or rep; movl is the smallest variant. */
20352 else if (!optimize_for_speed)
20353 {
20354 if (!count || (count & 3))
20355 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
20356 else
20357 return rep_prefix_usable ? rep_prefix_4_byte : loop;
20358 }
20359 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
20360 */
20361 else if (expected_size != -1 && expected_size < 4)
20362 return loop_1_byte;
20363 else if (expected_size != -1)
20364 {
20365 unsigned int i;
20366 enum stringop_alg alg = libcall;
20367 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
20368 {
20369 /* We get here if the algorithms that were not libcall-based
20370 were rep-prefix based and we are unable to use rep prefixes
20371 based on global register usage. Break out of the loop and
20372 use the heuristic below. */
20373 if (algs->size[i].max == 0)
20374 break;
20375 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
20376 {
20377 enum stringop_alg candidate = algs->size[i].alg;
20378
20379 if (candidate != libcall && ALG_USABLE_P (candidate))
20380 alg = candidate;
20381 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
20382 last non-libcall inline algorithm. */
20383 if (TARGET_INLINE_ALL_STRINGOPS)
20384 {
20385 /* When the current size is best to be copied by a libcall,
20386 but we are still forced to inline, run the heuristic below
20387 that will pick code for medium sized blocks. */
20388 if (alg != libcall)
20389 return alg;
20390 break;
20391 }
20392 else if (ALG_USABLE_P (candidate))
20393 return candidate;
20394 }
20395 }
20396 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
20397 }
20398 /* When asked to inline the call anyway, try to pick meaningful choice.
20399 We look for maximal size of block that is faster to copy by hand and
20400 take blocks of at most of that size guessing that average size will
20401 be roughly half of the block.
20402
20403 If this turns out to be bad, we might simply specify the preferred
20404 choice in ix86_costs. */
20405 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
20406 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
20407 {
20408 int max = -1;
20409 enum stringop_alg alg;
20410 int i;
20411 bool any_alg_usable_p = true;
20412
20413 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
20414 {
20415 enum stringop_alg candidate = algs->size[i].alg;
20416 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
20417
20418 if (candidate != libcall && candidate
20419 && ALG_USABLE_P (candidate))
20420 max = algs->size[i].max;
20421 }
20422 /* If there aren't any usable algorithms, then recursing on
20423 smaller sizes isn't going to find anything. Just return the
20424 simple byte-at-a-time copy loop. */
20425 if (!any_alg_usable_p)
20426 {
20427 /* Pick something reasonable. */
20428 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
20429 *dynamic_check = 128;
20430 return loop_1_byte;
20431 }
20432 if (max == -1)
20433 max = 4096;
20434 alg = decide_alg (count, max / 2, memset, dynamic_check);
20435 gcc_assert (*dynamic_check == -1);
20436 gcc_assert (alg != libcall);
20437 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
20438 *dynamic_check = max;
20439 return alg;
20440 }
20441 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
20442 #undef ALG_USABLE_P
20443 }
20444
20445 /* Decide on alignment. We know that the operand is already aligned to ALIGN
20446 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
20447 static int
20448 decide_alignment (int align,
20449 enum stringop_alg alg,
20450 int expected_size)
20451 {
20452 int desired_align = 0;
20453 switch (alg)
20454 {
20455 case no_stringop:
20456 gcc_unreachable ();
20457 case loop:
20458 case unrolled_loop:
20459 desired_align = GET_MODE_SIZE (Pmode);
20460 break;
20461 case rep_prefix_8_byte:
20462 desired_align = 8;
20463 break;
20464 case rep_prefix_4_byte:
20465 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
20466 copying whole cacheline at once. */
20467 if (TARGET_PENTIUMPRO)
20468 desired_align = 8;
20469 else
20470 desired_align = 4;
20471 break;
20472 case rep_prefix_1_byte:
20473 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
20474 copying whole cacheline at once. */
20475 if (TARGET_PENTIUMPRO)
20476 desired_align = 8;
20477 else
20478 desired_align = 1;
20479 break;
20480 case loop_1_byte:
20481 desired_align = 1;
20482 break;
20483 case libcall:
20484 return 0;
20485 }
20486
20487 if (optimize_size)
20488 desired_align = 1;
20489 if (desired_align < align)
20490 desired_align = align;
20491 if (expected_size != -1 && expected_size < 4)
20492 desired_align = align;
20493 return desired_align;
20494 }
20495
20496 /* Return the smallest power of 2 greater than VAL. */
20497 static int
20498 smallest_pow2_greater_than (int val)
20499 {
20500 int ret = 1;
20501 while (ret <= val)
20502 ret <<= 1;
20503 return ret;
20504 }
20505
20506 /* Expand string move (memcpy) operation. Use i386 string operations
20507 when profitable. expand_setmem contains similar code. The code
20508 depends upon architecture, block size and alignment, but always has
20509 the same overall structure:
20510
20511 1) Prologue guard: Conditional that jumps up to epilogues for small
20512 blocks that can be handled by epilogue alone. This is faster
20513 but also needed for correctness, since prologue assume the block
20514 is larger than the desired alignment.
20515
20516 Optional dynamic check for size and libcall for large
20517 blocks is emitted here too, with -minline-stringops-dynamically.
20518
20519 2) Prologue: copy first few bytes in order to get destination
20520 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
20521 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
20522 copied. We emit either a jump tree on power of two sized
20523 blocks, or a byte loop.
20524
20525 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
20526 with specified algorithm.
20527
20528 4) Epilogue: code copying tail of the block that is too small to be
20529 handled by main body (or up to size guarded by prologue guard). */
20530
20531 bool
20532 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
20533 rtx expected_align_exp, rtx expected_size_exp)
20534 {
20535 rtx destreg;
20536 rtx srcreg;
20537 rtx label = NULL;
20538 rtx tmp;
20539 rtx jump_around_label = NULL;
20540 HOST_WIDE_INT align = 1;
20541 unsigned HOST_WIDE_INT count = 0;
20542 HOST_WIDE_INT expected_size = -1;
20543 int size_needed = 0, epilogue_size_needed;
20544 int desired_align = 0, align_bytes = 0;
20545 enum stringop_alg alg;
20546 int dynamic_check;
20547 bool need_zero_guard = false;
20548
20549 if (CONST_INT_P (align_exp))
20550 align = INTVAL (align_exp);
20551 /* i386 can do misaligned access on reasonably increased cost. */
20552 if (CONST_INT_P (expected_align_exp)
20553 && INTVAL (expected_align_exp) > align)
20554 align = INTVAL (expected_align_exp);
20555 /* ALIGN is the minimum of destination and source alignment, but we care here
20556 just about destination alignment. */
20557 else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
20558 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
20559
20560 if (CONST_INT_P (count_exp))
20561 count = expected_size = INTVAL (count_exp);
20562 if (CONST_INT_P (expected_size_exp) && count == 0)
20563 expected_size = INTVAL (expected_size_exp);
20564
20565 /* Make sure we don't need to care about overflow later on. */
20566 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
20567 return false;
20568
20569 /* Step 0: Decide on preferred algorithm, desired alignment and
20570 size of chunks to be copied by main loop. */
20571
20572 alg = decide_alg (count, expected_size, false, &dynamic_check);
20573 desired_align = decide_alignment (align, alg, expected_size);
20574
20575 if (!TARGET_ALIGN_STRINGOPS)
20576 align = desired_align;
20577
20578 if (alg == libcall)
20579 return false;
20580 gcc_assert (alg != no_stringop);
20581 if (!count)
20582 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
20583 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
20584 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
20585 switch (alg)
20586 {
20587 case libcall:
20588 case no_stringop:
20589 gcc_unreachable ();
20590 case loop:
20591 need_zero_guard = true;
20592 size_needed = GET_MODE_SIZE (Pmode);
20593 break;
20594 case unrolled_loop:
20595 need_zero_guard = true;
20596 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
20597 break;
20598 case rep_prefix_8_byte:
20599 size_needed = 8;
20600 break;
20601 case rep_prefix_4_byte:
20602 size_needed = 4;
20603 break;
20604 case rep_prefix_1_byte:
20605 size_needed = 1;
20606 break;
20607 case loop_1_byte:
20608 need_zero_guard = true;
20609 size_needed = 1;
20610 break;
20611 }
20612
20613 epilogue_size_needed = size_needed;
20614
20615 /* Step 1: Prologue guard. */
20616
20617 /* Alignment code needs count to be in register. */
20618 if (CONST_INT_P (count_exp) && desired_align > align)
20619 {
20620 if (INTVAL (count_exp) > desired_align
20621 && INTVAL (count_exp) > size_needed)
20622 {
20623 align_bytes
20624 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
20625 if (align_bytes <= 0)
20626 align_bytes = 0;
20627 else
20628 align_bytes = desired_align - align_bytes;
20629 }
20630 if (align_bytes == 0)
20631 count_exp = force_reg (counter_mode (count_exp), count_exp);
20632 }
20633 gcc_assert (desired_align >= 1 && align >= 1);
20634
20635 /* Ensure that alignment prologue won't copy past end of block. */
20636 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
20637 {
20638 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
20639 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
20640 Make sure it is power of 2. */
20641 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
20642
20643 if (count)
20644 {
20645 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
20646 {
20647 /* If main algorithm works on QImode, no epilogue is needed.
20648 For small sizes just don't align anything. */
20649 if (size_needed == 1)
20650 desired_align = align;
20651 else
20652 goto epilogue;
20653 }
20654 }
20655 else
20656 {
20657 label = gen_label_rtx ();
20658 emit_cmp_and_jump_insns (count_exp,
20659 GEN_INT (epilogue_size_needed),
20660 LTU, 0, counter_mode (count_exp), 1, label);
20661 if (expected_size == -1 || expected_size < epilogue_size_needed)
20662 predict_jump (REG_BR_PROB_BASE * 60 / 100);
20663 else
20664 predict_jump (REG_BR_PROB_BASE * 20 / 100);
20665 }
20666 }
20667
20668 /* Emit code to decide on runtime whether library call or inline should be
20669 used. */
20670 if (dynamic_check != -1)
20671 {
20672 if (CONST_INT_P (count_exp))
20673 {
20674 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
20675 {
20676 emit_block_move_via_libcall (dst, src, count_exp, false);
20677 count_exp = const0_rtx;
20678 goto epilogue;
20679 }
20680 }
20681 else
20682 {
20683 rtx hot_label = gen_label_rtx ();
20684 jump_around_label = gen_label_rtx ();
20685 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
20686 LEU, 0, GET_MODE (count_exp), 1, hot_label);
20687 predict_jump (REG_BR_PROB_BASE * 90 / 100);
20688 emit_block_move_via_libcall (dst, src, count_exp, false);
20689 emit_jump (jump_around_label);
20690 emit_label (hot_label);
20691 }
20692 }
20693
20694 /* Step 2: Alignment prologue. */
20695
20696 if (desired_align > align)
20697 {
20698 if (align_bytes == 0)
20699 {
20700 /* Except for the first move in epilogue, we no longer know
20701 constant offset in aliasing info. It don't seems to worth
20702 the pain to maintain it for the first move, so throw away
20703 the info early. */
20704 src = change_address (src, BLKmode, srcreg);
20705 dst = change_address (dst, BLKmode, destreg);
20706 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
20707 desired_align);
20708 }
20709 else
20710 {
20711 /* If we know how many bytes need to be stored before dst is
20712 sufficiently aligned, maintain aliasing info accurately. */
20713 dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
20714 desired_align, align_bytes);
20715 count_exp = plus_constant (count_exp, -align_bytes);
20716 count -= align_bytes;
20717 }
20718 if (need_zero_guard
20719 && (count < (unsigned HOST_WIDE_INT) size_needed
20720 || (align_bytes == 0
20721 && count < ((unsigned HOST_WIDE_INT) size_needed
20722 + desired_align - align))))
20723 {
20724 /* It is possible that we copied enough so the main loop will not
20725 execute. */
20726 gcc_assert (size_needed > 1);
20727 if (label == NULL_RTX)
20728 label = gen_label_rtx ();
20729 emit_cmp_and_jump_insns (count_exp,
20730 GEN_INT (size_needed),
20731 LTU, 0, counter_mode (count_exp), 1, label);
20732 if (expected_size == -1
20733 || expected_size < (desired_align - align) / 2 + size_needed)
20734 predict_jump (REG_BR_PROB_BASE * 20 / 100);
20735 else
20736 predict_jump (REG_BR_PROB_BASE * 60 / 100);
20737 }
20738 }
20739 if (label && size_needed == 1)
20740 {
20741 emit_label (label);
20742 LABEL_NUSES (label) = 1;
20743 label = NULL;
20744 epilogue_size_needed = 1;
20745 }
20746 else if (label == NULL_RTX)
20747 epilogue_size_needed = size_needed;
20748
20749 /* Step 3: Main loop. */
20750
20751 switch (alg)
20752 {
20753 case libcall:
20754 case no_stringop:
20755 gcc_unreachable ();
20756 case loop_1_byte:
20757 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
20758 count_exp, QImode, 1, expected_size);
20759 break;
20760 case loop:
20761 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
20762 count_exp, Pmode, 1, expected_size);
20763 break;
20764 case unrolled_loop:
20765 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
20766 registers for 4 temporaries anyway. */
20767 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
20768 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
20769 expected_size);
20770 break;
20771 case rep_prefix_8_byte:
20772 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
20773 DImode);
20774 break;
20775 case rep_prefix_4_byte:
20776 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
20777 SImode);
20778 break;
20779 case rep_prefix_1_byte:
20780 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
20781 QImode);
20782 break;
20783 }
20784 /* Adjust properly the offset of src and dest memory for aliasing. */
20785 if (CONST_INT_P (count_exp))
20786 {
20787 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
20788 (count / size_needed) * size_needed);
20789 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
20790 (count / size_needed) * size_needed);
20791 }
20792 else
20793 {
20794 src = change_address (src, BLKmode, srcreg);
20795 dst = change_address (dst, BLKmode, destreg);
20796 }
20797
20798 /* Step 4: Epilogue to copy the remaining bytes. */
20799 epilogue:
20800 if (label)
20801 {
20802 /* When the main loop is done, COUNT_EXP might hold original count,
20803 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
20804 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
20805 bytes. Compensate if needed. */
20806
20807 if (size_needed < epilogue_size_needed)
20808 {
20809 tmp =
20810 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
20811 GEN_INT (size_needed - 1), count_exp, 1,
20812 OPTAB_DIRECT);
20813 if (tmp != count_exp)
20814 emit_move_insn (count_exp, tmp);
20815 }
20816 emit_label (label);
20817 LABEL_NUSES (label) = 1;
20818 }
20819
20820 if (count_exp != const0_rtx && epilogue_size_needed > 1)
20821 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
20822 epilogue_size_needed);
20823 if (jump_around_label)
20824 emit_label (jump_around_label);
20825 return true;
20826 }
20827
20828 /* Helper function for memcpy. For QImode value 0xXY produce
20829 0xXYXYXYXY of wide specified by MODE. This is essentially
20830 a * 0x10101010, but we can do slightly better than
20831 synth_mult by unwinding the sequence by hand on CPUs with
20832 slow multiply. */
20833 static rtx
20834 promote_duplicated_reg (enum machine_mode mode, rtx val)
20835 {
20836 enum machine_mode valmode = GET_MODE (val);
20837 rtx tmp;
20838 int nops = mode == DImode ? 3 : 2;
20839
20840 gcc_assert (mode == SImode || mode == DImode);
20841 if (val == const0_rtx)
20842 return copy_to_mode_reg (mode, const0_rtx);
20843 if (CONST_INT_P (val))
20844 {
20845 HOST_WIDE_INT v = INTVAL (val) & 255;
20846
20847 v |= v << 8;
20848 v |= v << 16;
20849 if (mode == DImode)
20850 v |= (v << 16) << 16;
20851 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
20852 }
20853
20854 if (valmode == VOIDmode)
20855 valmode = QImode;
20856 if (valmode != QImode)
20857 val = gen_lowpart (QImode, val);
20858 if (mode == QImode)
20859 return val;
20860 if (!TARGET_PARTIAL_REG_STALL)
20861 nops--;
20862 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
20863 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
20864 <= (ix86_cost->shift_const + ix86_cost->add) * nops
20865 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
20866 {
20867 rtx reg = convert_modes (mode, QImode, val, true);
20868 tmp = promote_duplicated_reg (mode, const1_rtx);
20869 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
20870 OPTAB_DIRECT);
20871 }
20872 else
20873 {
20874 rtx reg = convert_modes (mode, QImode, val, true);
20875
20876 if (!TARGET_PARTIAL_REG_STALL)
20877 if (mode == SImode)
20878 emit_insn (gen_movsi_insv_1 (reg, reg));
20879 else
20880 emit_insn (gen_movdi_insv_1 (reg, reg));
20881 else
20882 {
20883 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
20884 NULL, 1, OPTAB_DIRECT);
20885 reg =
20886 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
20887 }
20888 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
20889 NULL, 1, OPTAB_DIRECT);
20890 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
20891 if (mode == SImode)
20892 return reg;
20893 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
20894 NULL, 1, OPTAB_DIRECT);
20895 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
20896 return reg;
20897 }
20898 }
20899
20900 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
20901 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
20902 alignment from ALIGN to DESIRED_ALIGN. */
20903 static rtx
20904 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
20905 {
20906 rtx promoted_val;
20907
20908 if (TARGET_64BIT
20909 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
20910 promoted_val = promote_duplicated_reg (DImode, val);
20911 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
20912 promoted_val = promote_duplicated_reg (SImode, val);
20913 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
20914 promoted_val = promote_duplicated_reg (HImode, val);
20915 else
20916 promoted_val = val;
20917
20918 return promoted_val;
20919 }
20920
20921 /* Expand string clear operation (bzero). Use i386 string operations when
20922 profitable. See expand_movmem comment for explanation of individual
20923 steps performed. */
20924 bool
20925 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
20926 rtx expected_align_exp, rtx expected_size_exp)
20927 {
20928 rtx destreg;
20929 rtx label = NULL;
20930 rtx tmp;
20931 rtx jump_around_label = NULL;
20932 HOST_WIDE_INT align = 1;
20933 unsigned HOST_WIDE_INT count = 0;
20934 HOST_WIDE_INT expected_size = -1;
20935 int size_needed = 0, epilogue_size_needed;
20936 int desired_align = 0, align_bytes = 0;
20937 enum stringop_alg alg;
20938 rtx promoted_val = NULL;
20939 bool force_loopy_epilogue = false;
20940 int dynamic_check;
20941 bool need_zero_guard = false;
20942
20943 if (CONST_INT_P (align_exp))
20944 align = INTVAL (align_exp);
20945 /* i386 can do misaligned access on reasonably increased cost. */
20946 if (CONST_INT_P (expected_align_exp)
20947 && INTVAL (expected_align_exp) > align)
20948 align = INTVAL (expected_align_exp);
20949 if (CONST_INT_P (count_exp))
20950 count = expected_size = INTVAL (count_exp);
20951 if (CONST_INT_P (expected_size_exp) && count == 0)
20952 expected_size = INTVAL (expected_size_exp);
20953
20954 /* Make sure we don't need to care about overflow later on. */
20955 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
20956 return false;
20957
20958 /* Step 0: Decide on preferred algorithm, desired alignment and
20959 size of chunks to be copied by main loop. */
20960
20961 alg = decide_alg (count, expected_size, true, &dynamic_check);
20962 desired_align = decide_alignment (align, alg, expected_size);
20963
20964 if (!TARGET_ALIGN_STRINGOPS)
20965 align = desired_align;
20966
20967 if (alg == libcall)
20968 return false;
20969 gcc_assert (alg != no_stringop);
20970 if (!count)
20971 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
20972 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
20973 switch (alg)
20974 {
20975 case libcall:
20976 case no_stringop:
20977 gcc_unreachable ();
20978 case loop:
20979 need_zero_guard = true;
20980 size_needed = GET_MODE_SIZE (Pmode);
20981 break;
20982 case unrolled_loop:
20983 need_zero_guard = true;
20984 size_needed = GET_MODE_SIZE (Pmode) * 4;
20985 break;
20986 case rep_prefix_8_byte:
20987 size_needed = 8;
20988 break;
20989 case rep_prefix_4_byte:
20990 size_needed = 4;
20991 break;
20992 case rep_prefix_1_byte:
20993 size_needed = 1;
20994 break;
20995 case loop_1_byte:
20996 need_zero_guard = true;
20997 size_needed = 1;
20998 break;
20999 }
21000 epilogue_size_needed = size_needed;
21001
21002 /* Step 1: Prologue guard. */
21003
21004 /* Alignment code needs count to be in register. */
21005 if (CONST_INT_P (count_exp) && desired_align > align)
21006 {
21007 if (INTVAL (count_exp) > desired_align
21008 && INTVAL (count_exp) > size_needed)
21009 {
21010 align_bytes
21011 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
21012 if (align_bytes <= 0)
21013 align_bytes = 0;
21014 else
21015 align_bytes = desired_align - align_bytes;
21016 }
21017 if (align_bytes == 0)
21018 {
21019 enum machine_mode mode = SImode;
21020 if (TARGET_64BIT && (count & ~0xffffffff))
21021 mode = DImode;
21022 count_exp = force_reg (mode, count_exp);
21023 }
21024 }
21025 /* Do the cheap promotion to allow better CSE across the
21026 main loop and epilogue (ie one load of the big constant in the
21027 front of all code. */
21028 if (CONST_INT_P (val_exp))
21029 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
21030 desired_align, align);
21031 /* Ensure that alignment prologue won't copy past end of block. */
21032 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
21033 {
21034 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
21035 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
21036 Make sure it is power of 2. */
21037 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
21038
21039 /* To improve performance of small blocks, we jump around the VAL
21040 promoting mode. This mean that if the promoted VAL is not constant,
21041 we might not use it in the epilogue and have to use byte
21042 loop variant. */
21043 if (epilogue_size_needed > 2 && !promoted_val)
21044 force_loopy_epilogue = true;
21045 if (count)
21046 {
21047 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
21048 {
21049 /* If main algorithm works on QImode, no epilogue is needed.
21050 For small sizes just don't align anything. */
21051 if (size_needed == 1)
21052 desired_align = align;
21053 else
21054 goto epilogue;
21055 }
21056 }
21057 else
21058 {
21059 label = gen_label_rtx ();
21060 emit_cmp_and_jump_insns (count_exp,
21061 GEN_INT (epilogue_size_needed),
21062 LTU, 0, counter_mode (count_exp), 1, label);
21063 if (expected_size == -1 || expected_size <= epilogue_size_needed)
21064 predict_jump (REG_BR_PROB_BASE * 60 / 100);
21065 else
21066 predict_jump (REG_BR_PROB_BASE * 20 / 100);
21067 }
21068 }
21069 if (dynamic_check != -1)
21070 {
21071 rtx hot_label = gen_label_rtx ();
21072 jump_around_label = gen_label_rtx ();
21073 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
21074 LEU, 0, counter_mode (count_exp), 1, hot_label);
21075 predict_jump (REG_BR_PROB_BASE * 90 / 100);
21076 set_storage_via_libcall (dst, count_exp, val_exp, false);
21077 emit_jump (jump_around_label);
21078 emit_label (hot_label);
21079 }
21080
21081 /* Step 2: Alignment prologue. */
21082
21083 /* Do the expensive promotion once we branched off the small blocks. */
21084 if (!promoted_val)
21085 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
21086 desired_align, align);
21087 gcc_assert (desired_align >= 1 && align >= 1);
21088
21089 if (desired_align > align)
21090 {
21091 if (align_bytes == 0)
21092 {
21093 /* Except for the first move in epilogue, we no longer know
21094 constant offset in aliasing info. It don't seems to worth
21095 the pain to maintain it for the first move, so throw away
21096 the info early. */
21097 dst = change_address (dst, BLKmode, destreg);
21098 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
21099 desired_align);
21100 }
21101 else
21102 {
21103 /* If we know how many bytes need to be stored before dst is
21104 sufficiently aligned, maintain aliasing info accurately. */
21105 dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
21106 desired_align, align_bytes);
21107 count_exp = plus_constant (count_exp, -align_bytes);
21108 count -= align_bytes;
21109 }
21110 if (need_zero_guard
21111 && (count < (unsigned HOST_WIDE_INT) size_needed
21112 || (align_bytes == 0
21113 && count < ((unsigned HOST_WIDE_INT) size_needed
21114 + desired_align - align))))
21115 {
21116 /* It is possible that we copied enough so the main loop will not
21117 execute. */
21118 gcc_assert (size_needed > 1);
21119 if (label == NULL_RTX)
21120 label = gen_label_rtx ();
21121 emit_cmp_and_jump_insns (count_exp,
21122 GEN_INT (size_needed),
21123 LTU, 0, counter_mode (count_exp), 1, label);
21124 if (expected_size == -1
21125 || expected_size < (desired_align - align) / 2 + size_needed)
21126 predict_jump (REG_BR_PROB_BASE * 20 / 100);
21127 else
21128 predict_jump (REG_BR_PROB_BASE * 60 / 100);
21129 }
21130 }
21131 if (label && size_needed == 1)
21132 {
21133 emit_label (label);
21134 LABEL_NUSES (label) = 1;
21135 label = NULL;
21136 promoted_val = val_exp;
21137 epilogue_size_needed = 1;
21138 }
21139 else if (label == NULL_RTX)
21140 epilogue_size_needed = size_needed;
21141
21142 /* Step 3: Main loop. */
21143
21144 switch (alg)
21145 {
21146 case libcall:
21147 case no_stringop:
21148 gcc_unreachable ();
21149 case loop_1_byte:
21150 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
21151 count_exp, QImode, 1, expected_size);
21152 break;
21153 case loop:
21154 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
21155 count_exp, Pmode, 1, expected_size);
21156 break;
21157 case unrolled_loop:
21158 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
21159 count_exp, Pmode, 4, expected_size);
21160 break;
21161 case rep_prefix_8_byte:
21162 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
21163 DImode, val_exp);
21164 break;
21165 case rep_prefix_4_byte:
21166 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
21167 SImode, val_exp);
21168 break;
21169 case rep_prefix_1_byte:
21170 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
21171 QImode, val_exp);
21172 break;
21173 }
21174 /* Adjust properly the offset of src and dest memory for aliasing. */
21175 if (CONST_INT_P (count_exp))
21176 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
21177 (count / size_needed) * size_needed);
21178 else
21179 dst = change_address (dst, BLKmode, destreg);
21180
21181 /* Step 4: Epilogue to copy the remaining bytes. */
21182
21183 if (label)
21184 {
21185 /* When the main loop is done, COUNT_EXP might hold original count,
21186 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
21187 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
21188 bytes. Compensate if needed. */
21189
21190 if (size_needed < epilogue_size_needed)
21191 {
21192 tmp =
21193 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
21194 GEN_INT (size_needed - 1), count_exp, 1,
21195 OPTAB_DIRECT);
21196 if (tmp != count_exp)
21197 emit_move_insn (count_exp, tmp);
21198 }
21199 emit_label (label);
21200 LABEL_NUSES (label) = 1;
21201 }
21202 epilogue:
21203 if (count_exp != const0_rtx && epilogue_size_needed > 1)
21204 {
21205 if (force_loopy_epilogue)
21206 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
21207 epilogue_size_needed);
21208 else
21209 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
21210 epilogue_size_needed);
21211 }
21212 if (jump_around_label)
21213 emit_label (jump_around_label);
21214 return true;
21215 }
21216
21217 /* Expand the appropriate insns for doing strlen if not just doing
21218 repnz; scasb
21219
21220 out = result, initialized with the start address
21221 align_rtx = alignment of the address.
21222 scratch = scratch register, initialized with the startaddress when
21223 not aligned, otherwise undefined
21224
21225 This is just the body. It needs the initializations mentioned above and
21226 some address computing at the end. These things are done in i386.md. */
21227
21228 static void
21229 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
21230 {
21231 int align;
21232 rtx tmp;
21233 rtx align_2_label = NULL_RTX;
21234 rtx align_3_label = NULL_RTX;
21235 rtx align_4_label = gen_label_rtx ();
21236 rtx end_0_label = gen_label_rtx ();
21237 rtx mem;
21238 rtx tmpreg = gen_reg_rtx (SImode);
21239 rtx scratch = gen_reg_rtx (SImode);
21240 rtx cmp;
21241
21242 align = 0;
21243 if (CONST_INT_P (align_rtx))
21244 align = INTVAL (align_rtx);
21245
21246 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
21247
21248 /* Is there a known alignment and is it less than 4? */
21249 if (align < 4)
21250 {
21251 rtx scratch1 = gen_reg_rtx (Pmode);
21252 emit_move_insn (scratch1, out);
21253 /* Is there a known alignment and is it not 2? */
21254 if (align != 2)
21255 {
21256 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
21257 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
21258
21259 /* Leave just the 3 lower bits. */
21260 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
21261 NULL_RTX, 0, OPTAB_WIDEN);
21262
21263 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
21264 Pmode, 1, align_4_label);
21265 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
21266 Pmode, 1, align_2_label);
21267 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
21268 Pmode, 1, align_3_label);
21269 }
21270 else
21271 {
21272 /* Since the alignment is 2, we have to check 2 or 0 bytes;
21273 check if is aligned to 4 - byte. */
21274
21275 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
21276 NULL_RTX, 0, OPTAB_WIDEN);
21277
21278 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
21279 Pmode, 1, align_4_label);
21280 }
21281
21282 mem = change_address (src, QImode, out);
21283
21284 /* Now compare the bytes. */
21285
21286 /* Compare the first n unaligned byte on a byte per byte basis. */
21287 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
21288 QImode, 1, end_0_label);
21289
21290 /* Increment the address. */
21291 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
21292
21293 /* Not needed with an alignment of 2 */
21294 if (align != 2)
21295 {
21296 emit_label (align_2_label);
21297
21298 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
21299 end_0_label);
21300
21301 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
21302
21303 emit_label (align_3_label);
21304 }
21305
21306 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
21307 end_0_label);
21308
21309 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
21310 }
21311
21312 /* Generate loop to check 4 bytes at a time. It is not a good idea to
21313 align this loop. It gives only huge programs, but does not help to
21314 speed up. */
21315 emit_label (align_4_label);
21316
21317 mem = change_address (src, SImode, out);
21318 emit_move_insn (scratch, mem);
21319 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
21320
21321 /* This formula yields a nonzero result iff one of the bytes is zero.
21322 This saves three branches inside loop and many cycles. */
21323
21324 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
21325 emit_insn (gen_one_cmplsi2 (scratch, scratch));
21326 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
21327 emit_insn (gen_andsi3 (tmpreg, tmpreg,
21328 gen_int_mode (0x80808080, SImode)));
21329 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
21330 align_4_label);
21331
21332 if (TARGET_CMOVE)
21333 {
21334 rtx reg = gen_reg_rtx (SImode);
21335 rtx reg2 = gen_reg_rtx (Pmode);
21336 emit_move_insn (reg, tmpreg);
21337 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
21338
21339 /* If zero is not in the first two bytes, move two bytes forward. */
21340 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
21341 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
21342 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
21343 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
21344 gen_rtx_IF_THEN_ELSE (SImode, tmp,
21345 reg,
21346 tmpreg)));
21347 /* Emit lea manually to avoid clobbering of flags. */
21348 emit_insn (gen_rtx_SET (SImode, reg2,
21349 gen_rtx_PLUS (Pmode, out, const2_rtx)));
21350
21351 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
21352 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
21353 emit_insn (gen_rtx_SET (VOIDmode, out,
21354 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
21355 reg2,
21356 out)));
21357 }
21358 else
21359 {
21360 rtx end_2_label = gen_label_rtx ();
21361 /* Is zero in the first two bytes? */
21362
21363 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
21364 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
21365 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
21366 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
21367 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
21368 pc_rtx);
21369 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
21370 JUMP_LABEL (tmp) = end_2_label;
21371
21372 /* Not in the first two. Move two bytes forward. */
21373 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
21374 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
21375
21376 emit_label (end_2_label);
21377
21378 }
21379
21380 /* Avoid branch in fixing the byte. */
21381 tmpreg = gen_lowpart (QImode, tmpreg);
21382 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
21383 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
21384 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
21385 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
21386
21387 emit_label (end_0_label);
21388 }
21389
21390 /* Expand strlen. */
21391
21392 bool
21393 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
21394 {
21395 rtx addr, scratch1, scratch2, scratch3, scratch4;
21396
21397 /* The generic case of strlen expander is long. Avoid it's
21398 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
21399
21400 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
21401 && !TARGET_INLINE_ALL_STRINGOPS
21402 && !optimize_insn_for_size_p ()
21403 && (!CONST_INT_P (align) || INTVAL (align) < 4))
21404 return false;
21405
21406 addr = force_reg (Pmode, XEXP (src, 0));
21407 scratch1 = gen_reg_rtx (Pmode);
21408
21409 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
21410 && !optimize_insn_for_size_p ())
21411 {
21412 /* Well it seems that some optimizer does not combine a call like
21413 foo(strlen(bar), strlen(bar));
21414 when the move and the subtraction is done here. It does calculate
21415 the length just once when these instructions are done inside of
21416 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
21417 often used and I use one fewer register for the lifetime of
21418 output_strlen_unroll() this is better. */
21419
21420 emit_move_insn (out, addr);
21421
21422 ix86_expand_strlensi_unroll_1 (out, src, align);
21423
21424 /* strlensi_unroll_1 returns the address of the zero at the end of
21425 the string, like memchr(), so compute the length by subtracting
21426 the start address. */
21427 emit_insn (ix86_gen_sub3 (out, out, addr));
21428 }
21429 else
21430 {
21431 rtx unspec;
21432
21433 /* Can't use this if the user has appropriated eax, ecx, or edi. */
21434 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
21435 return false;
21436
21437 scratch2 = gen_reg_rtx (Pmode);
21438 scratch3 = gen_reg_rtx (Pmode);
21439 scratch4 = force_reg (Pmode, constm1_rtx);
21440
21441 emit_move_insn (scratch3, addr);
21442 eoschar = force_reg (QImode, eoschar);
21443
21444 src = replace_equiv_address_nv (src, scratch3);
21445
21446 /* If .md starts supporting :P, this can be done in .md. */
21447 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
21448 scratch4), UNSPEC_SCAS);
21449 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
21450 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
21451 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
21452 }
21453 return true;
21454 }
21455
21456 /* For given symbol (function) construct code to compute address of it's PLT
21457 entry in large x86-64 PIC model. */
21458 rtx
21459 construct_plt_address (rtx symbol)
21460 {
21461 rtx tmp = gen_reg_rtx (Pmode);
21462 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
21463
21464 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
21465 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
21466
21467 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
21468 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
21469 return tmp;
21470 }
21471
21472 rtx
21473 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
21474 rtx callarg2,
21475 rtx pop, bool sibcall)
21476 {
21477 rtx use = NULL, call;
21478
21479 if (pop == const0_rtx)
21480 pop = NULL;
21481 gcc_assert (!TARGET_64BIT || !pop);
21482
21483 if (TARGET_MACHO && !TARGET_64BIT)
21484 {
21485 #if TARGET_MACHO
21486 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
21487 fnaddr = machopic_indirect_call_target (fnaddr);
21488 #endif
21489 }
21490 else
21491 {
21492 /* Static functions and indirect calls don't need the pic register. */
21493 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
21494 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
21495 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
21496 use_reg (&use, pic_offset_table_rtx);
21497 }
21498
21499 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
21500 {
21501 rtx al = gen_rtx_REG (QImode, AX_REG);
21502 emit_move_insn (al, callarg2);
21503 use_reg (&use, al);
21504 }
21505
21506 if (ix86_cmodel == CM_LARGE_PIC
21507 && MEM_P (fnaddr)
21508 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
21509 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
21510 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
21511 else if (sibcall
21512 ? !sibcall_insn_operand (XEXP (fnaddr, 0), Pmode)
21513 : !call_insn_operand (XEXP (fnaddr, 0), Pmode))
21514 {
21515 fnaddr = XEXP (fnaddr, 0);
21516 if (GET_MODE (fnaddr) != Pmode)
21517 fnaddr = convert_to_mode (Pmode, fnaddr, 1);
21518 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (Pmode, fnaddr));
21519 }
21520
21521 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
21522 if (retval)
21523 call = gen_rtx_SET (VOIDmode, retval, call);
21524 if (pop)
21525 {
21526 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
21527 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
21528 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
21529 }
21530 if (TARGET_64BIT_MS_ABI
21531 && (!callarg2 || INTVAL (callarg2) != -2))
21532 {
21533 /* We need to represent that SI and DI registers are clobbered
21534 by SYSV calls. */
21535 static int clobbered_registers[] = {
21536 XMM6_REG, XMM7_REG, XMM8_REG,
21537 XMM9_REG, XMM10_REG, XMM11_REG,
21538 XMM12_REG, XMM13_REG, XMM14_REG,
21539 XMM15_REG, SI_REG, DI_REG
21540 };
21541 unsigned int i;
21542 rtx vec[ARRAY_SIZE (clobbered_registers) + 2];
21543 rtx unspec = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
21544 UNSPEC_MS_TO_SYSV_CALL);
21545
21546 vec[0] = call;
21547 vec[1] = unspec;
21548 for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
21549 vec[i + 2] = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
21550 ? TImode : DImode,
21551 gen_rtx_REG
21552 (SSE_REGNO_P (clobbered_registers[i])
21553 ? TImode : DImode,
21554 clobbered_registers[i]));
21555
21556 call = gen_rtx_PARALLEL (VOIDmode,
21557 gen_rtvec_v (ARRAY_SIZE (clobbered_registers)
21558 + 2, vec));
21559 }
21560
21561 /* Add UNSPEC_CALL_NEEDS_VZEROUPPER decoration. */
21562 if (TARGET_VZEROUPPER)
21563 {
21564 rtx unspec;
21565 int avx256;
21566
21567 if (cfun->machine->callee_pass_avx256_p)
21568 {
21569 if (cfun->machine->callee_return_avx256_p)
21570 avx256 = callee_return_pass_avx256;
21571 else
21572 avx256 = callee_pass_avx256;
21573 }
21574 else if (cfun->machine->callee_return_avx256_p)
21575 avx256 = callee_return_avx256;
21576 else
21577 avx256 = call_no_avx256;
21578
21579 if (reload_completed)
21580 emit_insn (gen_avx_vzeroupper (GEN_INT (avx256)));
21581 else
21582 {
21583 unspec = gen_rtx_UNSPEC (VOIDmode,
21584 gen_rtvec (1, GEN_INT (avx256)),
21585 UNSPEC_CALL_NEEDS_VZEROUPPER);
21586 call = gen_rtx_PARALLEL (VOIDmode,
21587 gen_rtvec (2, call, unspec));
21588 }
21589 }
21590
21591 call = emit_call_insn (call);
21592 if (use)
21593 CALL_INSN_FUNCTION_USAGE (call) = use;
21594
21595 return call;
21596 }
21597
21598 void
21599 ix86_split_call_vzeroupper (rtx insn, rtx vzeroupper)
21600 {
21601 rtx call = XVECEXP (PATTERN (insn), 0, 0);
21602 emit_insn (gen_avx_vzeroupper (vzeroupper));
21603 emit_call_insn (call);
21604 }
21605
21606 /* Output the assembly for a call instruction. */
21607
21608 const char *
21609 ix86_output_call_insn (rtx insn, rtx call_op)
21610 {
21611 bool direct_p = constant_call_address_operand (call_op, Pmode);
21612 bool seh_nop_p = false;
21613 const char *xasm;
21614
21615 if (SIBLING_CALL_P (insn))
21616 {
21617 if (direct_p)
21618 xasm = "jmp\t%P0";
21619 /* SEH epilogue detection requires the indirect branch case
21620 to include REX.W. */
21621 else if (TARGET_SEH)
21622 xasm = "rex.W jmp %A0";
21623 else
21624 xasm = "jmp\t%A0";
21625
21626 output_asm_insn (xasm, &call_op);
21627 return "";
21628 }
21629
21630 /* SEH unwinding can require an extra nop to be emitted in several
21631 circumstances. Determine if we have one of those. */
21632 if (TARGET_SEH)
21633 {
21634 rtx i;
21635
21636 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
21637 {
21638 /* If we get to another real insn, we don't need the nop. */
21639 if (INSN_P (i))
21640 break;
21641
21642 /* If we get to the epilogue note, prevent a catch region from
21643 being adjacent to the standard epilogue sequence. If non-
21644 call-exceptions, we'll have done this during epilogue emission. */
21645 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
21646 && !flag_non_call_exceptions
21647 && !can_throw_internal (insn))
21648 {
21649 seh_nop_p = true;
21650 break;
21651 }
21652 }
21653
21654 /* If we didn't find a real insn following the call, prevent the
21655 unwinder from looking into the next function. */
21656 if (i == NULL)
21657 seh_nop_p = true;
21658 }
21659
21660 if (direct_p)
21661 xasm = "call\t%P0";
21662 else
21663 xasm = "call\t%A0";
21664
21665 output_asm_insn (xasm, &call_op);
21666
21667 if (seh_nop_p)
21668 return "nop";
21669
21670 return "";
21671 }
21672 \f
21673 /* Clear stack slot assignments remembered from previous functions.
21674 This is called from INIT_EXPANDERS once before RTL is emitted for each
21675 function. */
21676
21677 static struct machine_function *
21678 ix86_init_machine_status (void)
21679 {
21680 struct machine_function *f;
21681
21682 f = ggc_alloc_cleared_machine_function ();
21683 f->use_fast_prologue_epilogue_nregs = -1;
21684 f->tls_descriptor_call_expanded_p = 0;
21685 f->call_abi = ix86_abi;
21686
21687 return f;
21688 }
21689
21690 /* Return a MEM corresponding to a stack slot with mode MODE.
21691 Allocate a new slot if necessary.
21692
21693 The RTL for a function can have several slots available: N is
21694 which slot to use. */
21695
21696 rtx
21697 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
21698 {
21699 struct stack_local_entry *s;
21700
21701 gcc_assert (n < MAX_386_STACK_LOCALS);
21702
21703 /* Virtual slot is valid only before vregs are instantiated. */
21704 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
21705
21706 for (s = ix86_stack_locals; s; s = s->next)
21707 if (s->mode == mode && s->n == n)
21708 return copy_rtx (s->rtl);
21709
21710 s = ggc_alloc_stack_local_entry ();
21711 s->n = n;
21712 s->mode = mode;
21713 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
21714
21715 s->next = ix86_stack_locals;
21716 ix86_stack_locals = s;
21717 return s->rtl;
21718 }
21719 \f
21720 /* Calculate the length of the memory address in the instruction
21721 encoding. Does not include the one-byte modrm, opcode, or prefix. */
21722
21723 int
21724 memory_address_length (rtx addr)
21725 {
21726 struct ix86_address parts;
21727 rtx base, index, disp;
21728 int len;
21729 int ok;
21730
21731 if (GET_CODE (addr) == PRE_DEC
21732 || GET_CODE (addr) == POST_INC
21733 || GET_CODE (addr) == PRE_MODIFY
21734 || GET_CODE (addr) == POST_MODIFY)
21735 return 0;
21736
21737 ok = ix86_decompose_address (addr, &parts);
21738 gcc_assert (ok);
21739
21740 if (parts.base && GET_CODE (parts.base) == SUBREG)
21741 parts.base = SUBREG_REG (parts.base);
21742 if (parts.index && GET_CODE (parts.index) == SUBREG)
21743 parts.index = SUBREG_REG (parts.index);
21744
21745 base = parts.base;
21746 index = parts.index;
21747 disp = parts.disp;
21748 len = 0;
21749
21750 /* Rule of thumb:
21751 - esp as the base always wants an index,
21752 - ebp as the base always wants a displacement,
21753 - r12 as the base always wants an index,
21754 - r13 as the base always wants a displacement. */
21755
21756 /* Register Indirect. */
21757 if (base && !index && !disp)
21758 {
21759 /* esp (for its index) and ebp (for its displacement) need
21760 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
21761 code. */
21762 if (REG_P (addr)
21763 && (addr == arg_pointer_rtx
21764 || addr == frame_pointer_rtx
21765 || REGNO (addr) == SP_REG
21766 || REGNO (addr) == BP_REG
21767 || REGNO (addr) == R12_REG
21768 || REGNO (addr) == R13_REG))
21769 len = 1;
21770 }
21771
21772 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
21773 is not disp32, but disp32(%rip), so for disp32
21774 SIB byte is needed, unless print_operand_address
21775 optimizes it into disp32(%rip) or (%rip) is implied
21776 by UNSPEC. */
21777 else if (disp && !base && !index)
21778 {
21779 len = 4;
21780 if (TARGET_64BIT)
21781 {
21782 rtx symbol = disp;
21783
21784 if (GET_CODE (disp) == CONST)
21785 symbol = XEXP (disp, 0);
21786 if (GET_CODE (symbol) == PLUS
21787 && CONST_INT_P (XEXP (symbol, 1)))
21788 symbol = XEXP (symbol, 0);
21789
21790 if (GET_CODE (symbol) != LABEL_REF
21791 && (GET_CODE (symbol) != SYMBOL_REF
21792 || SYMBOL_REF_TLS_MODEL (symbol) != 0)
21793 && (GET_CODE (symbol) != UNSPEC
21794 || (XINT (symbol, 1) != UNSPEC_GOTPCREL
21795 && XINT (symbol, 1) != UNSPEC_PCREL
21796 && XINT (symbol, 1) != UNSPEC_GOTNTPOFF)))
21797 len += 1;
21798 }
21799 }
21800
21801 else
21802 {
21803 /* Find the length of the displacement constant. */
21804 if (disp)
21805 {
21806 if (base && satisfies_constraint_K (disp))
21807 len = 1;
21808 else
21809 len = 4;
21810 }
21811 /* ebp always wants a displacement. Similarly r13. */
21812 else if (base && REG_P (base)
21813 && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
21814 len = 1;
21815
21816 /* An index requires the two-byte modrm form.... */
21817 if (index
21818 /* ...like esp (or r12), which always wants an index. */
21819 || base == arg_pointer_rtx
21820 || base == frame_pointer_rtx
21821 || (base && REG_P (base)
21822 && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
21823 len += 1;
21824 }
21825
21826 switch (parts.seg)
21827 {
21828 case SEG_FS:
21829 case SEG_GS:
21830 len += 1;
21831 break;
21832 default:
21833 break;
21834 }
21835
21836 return len;
21837 }
21838
21839 /* Compute default value for "length_immediate" attribute. When SHORTFORM
21840 is set, expect that insn have 8bit immediate alternative. */
21841 int
21842 ix86_attr_length_immediate_default (rtx insn, bool shortform)
21843 {
21844 int len = 0;
21845 int i;
21846 extract_insn_cached (insn);
21847 for (i = recog_data.n_operands - 1; i >= 0; --i)
21848 if (CONSTANT_P (recog_data.operand[i]))
21849 {
21850 enum attr_mode mode = get_attr_mode (insn);
21851
21852 gcc_assert (!len);
21853 if (shortform && CONST_INT_P (recog_data.operand[i]))
21854 {
21855 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
21856 switch (mode)
21857 {
21858 case MODE_QI:
21859 len = 1;
21860 continue;
21861 case MODE_HI:
21862 ival = trunc_int_for_mode (ival, HImode);
21863 break;
21864 case MODE_SI:
21865 ival = trunc_int_for_mode (ival, SImode);
21866 break;
21867 default:
21868 break;
21869 }
21870 if (IN_RANGE (ival, -128, 127))
21871 {
21872 len = 1;
21873 continue;
21874 }
21875 }
21876 switch (mode)
21877 {
21878 case MODE_QI:
21879 len = 1;
21880 break;
21881 case MODE_HI:
21882 len = 2;
21883 break;
21884 case MODE_SI:
21885 len = 4;
21886 break;
21887 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
21888 case MODE_DI:
21889 len = 4;
21890 break;
21891 default:
21892 fatal_insn ("unknown insn mode", insn);
21893 }
21894 }
21895 return len;
21896 }
21897 /* Compute default value for "length_address" attribute. */
21898 int
21899 ix86_attr_length_address_default (rtx insn)
21900 {
21901 int i;
21902
21903 if (get_attr_type (insn) == TYPE_LEA)
21904 {
21905 rtx set = PATTERN (insn), addr;
21906
21907 if (GET_CODE (set) == PARALLEL)
21908 set = XVECEXP (set, 0, 0);
21909
21910 gcc_assert (GET_CODE (set) == SET);
21911
21912 addr = SET_SRC (set);
21913 if (TARGET_64BIT && get_attr_mode (insn) == MODE_SI)
21914 {
21915 if (GET_CODE (addr) == ZERO_EXTEND)
21916 addr = XEXP (addr, 0);
21917 if (GET_CODE (addr) == SUBREG)
21918 addr = SUBREG_REG (addr);
21919 }
21920
21921 return memory_address_length (addr);
21922 }
21923
21924 extract_insn_cached (insn);
21925 for (i = recog_data.n_operands - 1; i >= 0; --i)
21926 if (MEM_P (recog_data.operand[i]))
21927 {
21928 constrain_operands_cached (reload_completed);
21929 if (which_alternative != -1)
21930 {
21931 const char *constraints = recog_data.constraints[i];
21932 int alt = which_alternative;
21933
21934 while (*constraints == '=' || *constraints == '+')
21935 constraints++;
21936 while (alt-- > 0)
21937 while (*constraints++ != ',')
21938 ;
21939 /* Skip ignored operands. */
21940 if (*constraints == 'X')
21941 continue;
21942 }
21943 return memory_address_length (XEXP (recog_data.operand[i], 0));
21944 }
21945 return 0;
21946 }
21947
21948 /* Compute default value for "length_vex" attribute. It includes
21949 2 or 3 byte VEX prefix and 1 opcode byte. */
21950
21951 int
21952 ix86_attr_length_vex_default (rtx insn, bool has_0f_opcode, bool has_vex_w)
21953 {
21954 int i;
21955
21956 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
21957 byte VEX prefix. */
21958 if (!has_0f_opcode || has_vex_w)
21959 return 3 + 1;
21960
21961 /* We can always use 2 byte VEX prefix in 32bit. */
21962 if (!TARGET_64BIT)
21963 return 2 + 1;
21964
21965 extract_insn_cached (insn);
21966
21967 for (i = recog_data.n_operands - 1; i >= 0; --i)
21968 if (REG_P (recog_data.operand[i]))
21969 {
21970 /* REX.W bit uses 3 byte VEX prefix. */
21971 if (GET_MODE (recog_data.operand[i]) == DImode
21972 && GENERAL_REG_P (recog_data.operand[i]))
21973 return 3 + 1;
21974 }
21975 else
21976 {
21977 /* REX.X or REX.B bits use 3 byte VEX prefix. */
21978 if (MEM_P (recog_data.operand[i])
21979 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
21980 return 3 + 1;
21981 }
21982
21983 return 2 + 1;
21984 }
21985 \f
21986 /* Return the maximum number of instructions a cpu can issue. */
21987
21988 static int
21989 ix86_issue_rate (void)
21990 {
21991 switch (ix86_tune)
21992 {
21993 case PROCESSOR_PENTIUM:
21994 case PROCESSOR_ATOM:
21995 case PROCESSOR_K6:
21996 return 2;
21997
21998 case PROCESSOR_PENTIUMPRO:
21999 case PROCESSOR_PENTIUM4:
22000 case PROCESSOR_CORE2_32:
22001 case PROCESSOR_CORE2_64:
22002 case PROCESSOR_COREI7_32:
22003 case PROCESSOR_COREI7_64:
22004 case PROCESSOR_ATHLON:
22005 case PROCESSOR_K8:
22006 case PROCESSOR_AMDFAM10:
22007 case PROCESSOR_NOCONA:
22008 case PROCESSOR_GENERIC32:
22009 case PROCESSOR_GENERIC64:
22010 case PROCESSOR_BDVER1:
22011 case PROCESSOR_BDVER2:
22012 case PROCESSOR_BTVER1:
22013 return 3;
22014
22015 default:
22016 return 1;
22017 }
22018 }
22019
22020 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
22021 by DEP_INSN and nothing set by DEP_INSN. */
22022
22023 static bool
22024 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
22025 {
22026 rtx set, set2;
22027
22028 /* Simplify the test for uninteresting insns. */
22029 if (insn_type != TYPE_SETCC
22030 && insn_type != TYPE_ICMOV
22031 && insn_type != TYPE_FCMOV
22032 && insn_type != TYPE_IBR)
22033 return false;
22034
22035 if ((set = single_set (dep_insn)) != 0)
22036 {
22037 set = SET_DEST (set);
22038 set2 = NULL_RTX;
22039 }
22040 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
22041 && XVECLEN (PATTERN (dep_insn), 0) == 2
22042 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
22043 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
22044 {
22045 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
22046 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
22047 }
22048 else
22049 return false;
22050
22051 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
22052 return false;
22053
22054 /* This test is true if the dependent insn reads the flags but
22055 not any other potentially set register. */
22056 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
22057 return false;
22058
22059 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
22060 return false;
22061
22062 return true;
22063 }
22064
22065 /* Return true iff USE_INSN has a memory address with operands set by
22066 SET_INSN. */
22067
22068 bool
22069 ix86_agi_dependent (rtx set_insn, rtx use_insn)
22070 {
22071 int i;
22072 extract_insn_cached (use_insn);
22073 for (i = recog_data.n_operands - 1; i >= 0; --i)
22074 if (MEM_P (recog_data.operand[i]))
22075 {
22076 rtx addr = XEXP (recog_data.operand[i], 0);
22077 return modified_in_p (addr, set_insn) != 0;
22078 }
22079 return false;
22080 }
22081
22082 static int
22083 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
22084 {
22085 enum attr_type insn_type, dep_insn_type;
22086 enum attr_memory memory;
22087 rtx set, set2;
22088 int dep_insn_code_number;
22089
22090 /* Anti and output dependencies have zero cost on all CPUs. */
22091 if (REG_NOTE_KIND (link) != 0)
22092 return 0;
22093
22094 dep_insn_code_number = recog_memoized (dep_insn);
22095
22096 /* If we can't recognize the insns, we can't really do anything. */
22097 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
22098 return cost;
22099
22100 insn_type = get_attr_type (insn);
22101 dep_insn_type = get_attr_type (dep_insn);
22102
22103 switch (ix86_tune)
22104 {
22105 case PROCESSOR_PENTIUM:
22106 /* Address Generation Interlock adds a cycle of latency. */
22107 if (insn_type == TYPE_LEA)
22108 {
22109 rtx addr = PATTERN (insn);
22110
22111 if (GET_CODE (addr) == PARALLEL)
22112 addr = XVECEXP (addr, 0, 0);
22113
22114 gcc_assert (GET_CODE (addr) == SET);
22115
22116 addr = SET_SRC (addr);
22117 if (modified_in_p (addr, dep_insn))
22118 cost += 1;
22119 }
22120 else if (ix86_agi_dependent (dep_insn, insn))
22121 cost += 1;
22122
22123 /* ??? Compares pair with jump/setcc. */
22124 if (ix86_flags_dependent (insn, dep_insn, insn_type))
22125 cost = 0;
22126
22127 /* Floating point stores require value to be ready one cycle earlier. */
22128 if (insn_type == TYPE_FMOV
22129 && get_attr_memory (insn) == MEMORY_STORE
22130 && !ix86_agi_dependent (dep_insn, insn))
22131 cost += 1;
22132 break;
22133
22134 case PROCESSOR_PENTIUMPRO:
22135 memory = get_attr_memory (insn);
22136
22137 /* INT->FP conversion is expensive. */
22138 if (get_attr_fp_int_src (dep_insn))
22139 cost += 5;
22140
22141 /* There is one cycle extra latency between an FP op and a store. */
22142 if (insn_type == TYPE_FMOV
22143 && (set = single_set (dep_insn)) != NULL_RTX
22144 && (set2 = single_set (insn)) != NULL_RTX
22145 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
22146 && MEM_P (SET_DEST (set2)))
22147 cost += 1;
22148
22149 /* Show ability of reorder buffer to hide latency of load by executing
22150 in parallel with previous instruction in case
22151 previous instruction is not needed to compute the address. */
22152 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
22153 && !ix86_agi_dependent (dep_insn, insn))
22154 {
22155 /* Claim moves to take one cycle, as core can issue one load
22156 at time and the next load can start cycle later. */
22157 if (dep_insn_type == TYPE_IMOV
22158 || dep_insn_type == TYPE_FMOV)
22159 cost = 1;
22160 else if (cost > 1)
22161 cost--;
22162 }
22163 break;
22164
22165 case PROCESSOR_K6:
22166 memory = get_attr_memory (insn);
22167
22168 /* The esp dependency is resolved before the instruction is really
22169 finished. */
22170 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
22171 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
22172 return 1;
22173
22174 /* INT->FP conversion is expensive. */
22175 if (get_attr_fp_int_src (dep_insn))
22176 cost += 5;
22177
22178 /* Show ability of reorder buffer to hide latency of load by executing
22179 in parallel with previous instruction in case
22180 previous instruction is not needed to compute the address. */
22181 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
22182 && !ix86_agi_dependent (dep_insn, insn))
22183 {
22184 /* Claim moves to take one cycle, as core can issue one load
22185 at time and the next load can start cycle later. */
22186 if (dep_insn_type == TYPE_IMOV
22187 || dep_insn_type == TYPE_FMOV)
22188 cost = 1;
22189 else if (cost > 2)
22190 cost -= 2;
22191 else
22192 cost = 1;
22193 }
22194 break;
22195
22196 case PROCESSOR_ATHLON:
22197 case PROCESSOR_K8:
22198 case PROCESSOR_AMDFAM10:
22199 case PROCESSOR_BDVER1:
22200 case PROCESSOR_BDVER2:
22201 case PROCESSOR_BTVER1:
22202 case PROCESSOR_ATOM:
22203 case PROCESSOR_GENERIC32:
22204 case PROCESSOR_GENERIC64:
22205 memory = get_attr_memory (insn);
22206
22207 /* Show ability of reorder buffer to hide latency of load by executing
22208 in parallel with previous instruction in case
22209 previous instruction is not needed to compute the address. */
22210 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
22211 && !ix86_agi_dependent (dep_insn, insn))
22212 {
22213 enum attr_unit unit = get_attr_unit (insn);
22214 int loadcost = 3;
22215
22216 /* Because of the difference between the length of integer and
22217 floating unit pipeline preparation stages, the memory operands
22218 for floating point are cheaper.
22219
22220 ??? For Athlon it the difference is most probably 2. */
22221 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
22222 loadcost = 3;
22223 else
22224 loadcost = TARGET_ATHLON ? 2 : 0;
22225
22226 if (cost >= loadcost)
22227 cost -= loadcost;
22228 else
22229 cost = 0;
22230 }
22231
22232 default:
22233 break;
22234 }
22235
22236 return cost;
22237 }
22238
22239 /* How many alternative schedules to try. This should be as wide as the
22240 scheduling freedom in the DFA, but no wider. Making this value too
22241 large results extra work for the scheduler. */
22242
22243 static int
22244 ia32_multipass_dfa_lookahead (void)
22245 {
22246 switch (ix86_tune)
22247 {
22248 case PROCESSOR_PENTIUM:
22249 return 2;
22250
22251 case PROCESSOR_PENTIUMPRO:
22252 case PROCESSOR_K6:
22253 return 1;
22254
22255 case PROCESSOR_CORE2_32:
22256 case PROCESSOR_CORE2_64:
22257 case PROCESSOR_COREI7_32:
22258 case PROCESSOR_COREI7_64:
22259 /* Generally, we want haifa-sched:max_issue() to look ahead as far
22260 as many instructions can be executed on a cycle, i.e.,
22261 issue_rate. I wonder why tuning for many CPUs does not do this. */
22262 return ix86_issue_rate ();
22263
22264 default:
22265 return 0;
22266 }
22267 }
22268
22269 \f
22270
22271 /* Model decoder of Core 2/i7.
22272 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
22273 track the instruction fetch block boundaries and make sure that long
22274 (9+ bytes) instructions are assigned to D0. */
22275
22276 /* Maximum length of an insn that can be handled by
22277 a secondary decoder unit. '8' for Core 2/i7. */
22278 static int core2i7_secondary_decoder_max_insn_size;
22279
22280 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
22281 '16' for Core 2/i7. */
22282 static int core2i7_ifetch_block_size;
22283
22284 /* Maximum number of instructions decoder can handle per cycle.
22285 '6' for Core 2/i7. */
22286 static int core2i7_ifetch_block_max_insns;
22287
22288 typedef struct ix86_first_cycle_multipass_data_ *
22289 ix86_first_cycle_multipass_data_t;
22290 typedef const struct ix86_first_cycle_multipass_data_ *
22291 const_ix86_first_cycle_multipass_data_t;
22292
22293 /* A variable to store target state across calls to max_issue within
22294 one cycle. */
22295 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
22296 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
22297
22298 /* Initialize DATA. */
22299 static void
22300 core2i7_first_cycle_multipass_init (void *_data)
22301 {
22302 ix86_first_cycle_multipass_data_t data
22303 = (ix86_first_cycle_multipass_data_t) _data;
22304
22305 data->ifetch_block_len = 0;
22306 data->ifetch_block_n_insns = 0;
22307 data->ready_try_change = NULL;
22308 data->ready_try_change_size = 0;
22309 }
22310
22311 /* Advancing the cycle; reset ifetch block counts. */
22312 static void
22313 core2i7_dfa_post_advance_cycle (void)
22314 {
22315 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
22316
22317 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
22318
22319 data->ifetch_block_len = 0;
22320 data->ifetch_block_n_insns = 0;
22321 }
22322
22323 static int min_insn_size (rtx);
22324
22325 /* Filter out insns from ready_try that the core will not be able to issue
22326 on current cycle due to decoder. */
22327 static void
22328 core2i7_first_cycle_multipass_filter_ready_try
22329 (const_ix86_first_cycle_multipass_data_t data,
22330 char *ready_try, int n_ready, bool first_cycle_insn_p)
22331 {
22332 while (n_ready--)
22333 {
22334 rtx insn;
22335 int insn_size;
22336
22337 if (ready_try[n_ready])
22338 continue;
22339
22340 insn = get_ready_element (n_ready);
22341 insn_size = min_insn_size (insn);
22342
22343 if (/* If this is a too long an insn for a secondary decoder ... */
22344 (!first_cycle_insn_p
22345 && insn_size > core2i7_secondary_decoder_max_insn_size)
22346 /* ... or it would not fit into the ifetch block ... */
22347 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
22348 /* ... or the decoder is full already ... */
22349 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
22350 /* ... mask the insn out. */
22351 {
22352 ready_try[n_ready] = 1;
22353
22354 if (data->ready_try_change)
22355 SET_BIT (data->ready_try_change, n_ready);
22356 }
22357 }
22358 }
22359
22360 /* Prepare for a new round of multipass lookahead scheduling. */
22361 static void
22362 core2i7_first_cycle_multipass_begin (void *_data, char *ready_try, int n_ready,
22363 bool first_cycle_insn_p)
22364 {
22365 ix86_first_cycle_multipass_data_t data
22366 = (ix86_first_cycle_multipass_data_t) _data;
22367 const_ix86_first_cycle_multipass_data_t prev_data
22368 = ix86_first_cycle_multipass_data;
22369
22370 /* Restore the state from the end of the previous round. */
22371 data->ifetch_block_len = prev_data->ifetch_block_len;
22372 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
22373
22374 /* Filter instructions that cannot be issued on current cycle due to
22375 decoder restrictions. */
22376 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
22377 first_cycle_insn_p);
22378 }
22379
22380 /* INSN is being issued in current solution. Account for its impact on
22381 the decoder model. */
22382 static void
22383 core2i7_first_cycle_multipass_issue (void *_data, char *ready_try, int n_ready,
22384 rtx insn, const void *_prev_data)
22385 {
22386 ix86_first_cycle_multipass_data_t data
22387 = (ix86_first_cycle_multipass_data_t) _data;
22388 const_ix86_first_cycle_multipass_data_t prev_data
22389 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
22390
22391 int insn_size = min_insn_size (insn);
22392
22393 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
22394 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
22395 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
22396 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
22397
22398 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
22399 if (!data->ready_try_change)
22400 {
22401 data->ready_try_change = sbitmap_alloc (n_ready);
22402 data->ready_try_change_size = n_ready;
22403 }
22404 else if (data->ready_try_change_size < n_ready)
22405 {
22406 data->ready_try_change = sbitmap_resize (data->ready_try_change,
22407 n_ready, 0);
22408 data->ready_try_change_size = n_ready;
22409 }
22410 sbitmap_zero (data->ready_try_change);
22411
22412 /* Filter out insns from ready_try that the core will not be able to issue
22413 on current cycle due to decoder. */
22414 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
22415 false);
22416 }
22417
22418 /* Revert the effect on ready_try. */
22419 static void
22420 core2i7_first_cycle_multipass_backtrack (const void *_data,
22421 char *ready_try,
22422 int n_ready ATTRIBUTE_UNUSED)
22423 {
22424 const_ix86_first_cycle_multipass_data_t data
22425 = (const_ix86_first_cycle_multipass_data_t) _data;
22426 unsigned int i = 0;
22427 sbitmap_iterator sbi;
22428
22429 gcc_assert (sbitmap_last_set_bit (data->ready_try_change) < n_ready);
22430 EXECUTE_IF_SET_IN_SBITMAP (data->ready_try_change, 0, i, sbi)
22431 {
22432 ready_try[i] = 0;
22433 }
22434 }
22435
22436 /* Save the result of multipass lookahead scheduling for the next round. */
22437 static void
22438 core2i7_first_cycle_multipass_end (const void *_data)
22439 {
22440 const_ix86_first_cycle_multipass_data_t data
22441 = (const_ix86_first_cycle_multipass_data_t) _data;
22442 ix86_first_cycle_multipass_data_t next_data
22443 = ix86_first_cycle_multipass_data;
22444
22445 if (data != NULL)
22446 {
22447 next_data->ifetch_block_len = data->ifetch_block_len;
22448 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
22449 }
22450 }
22451
22452 /* Deallocate target data. */
22453 static void
22454 core2i7_first_cycle_multipass_fini (void *_data)
22455 {
22456 ix86_first_cycle_multipass_data_t data
22457 = (ix86_first_cycle_multipass_data_t) _data;
22458
22459 if (data->ready_try_change)
22460 {
22461 sbitmap_free (data->ready_try_change);
22462 data->ready_try_change = NULL;
22463 data->ready_try_change_size = 0;
22464 }
22465 }
22466
22467 /* Prepare for scheduling pass. */
22468 static void
22469 ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
22470 int verbose ATTRIBUTE_UNUSED,
22471 int max_uid ATTRIBUTE_UNUSED)
22472 {
22473 /* Install scheduling hooks for current CPU. Some of these hooks are used
22474 in time-critical parts of the scheduler, so we only set them up when
22475 they are actually used. */
22476 switch (ix86_tune)
22477 {
22478 case PROCESSOR_CORE2_32:
22479 case PROCESSOR_CORE2_64:
22480 case PROCESSOR_COREI7_32:
22481 case PROCESSOR_COREI7_64:
22482 targetm.sched.dfa_post_advance_cycle
22483 = core2i7_dfa_post_advance_cycle;
22484 targetm.sched.first_cycle_multipass_init
22485 = core2i7_first_cycle_multipass_init;
22486 targetm.sched.first_cycle_multipass_begin
22487 = core2i7_first_cycle_multipass_begin;
22488 targetm.sched.first_cycle_multipass_issue
22489 = core2i7_first_cycle_multipass_issue;
22490 targetm.sched.first_cycle_multipass_backtrack
22491 = core2i7_first_cycle_multipass_backtrack;
22492 targetm.sched.first_cycle_multipass_end
22493 = core2i7_first_cycle_multipass_end;
22494 targetm.sched.first_cycle_multipass_fini
22495 = core2i7_first_cycle_multipass_fini;
22496
22497 /* Set decoder parameters. */
22498 core2i7_secondary_decoder_max_insn_size = 8;
22499 core2i7_ifetch_block_size = 16;
22500 core2i7_ifetch_block_max_insns = 6;
22501 break;
22502
22503 default:
22504 targetm.sched.dfa_post_advance_cycle = NULL;
22505 targetm.sched.first_cycle_multipass_init = NULL;
22506 targetm.sched.first_cycle_multipass_begin = NULL;
22507 targetm.sched.first_cycle_multipass_issue = NULL;
22508 targetm.sched.first_cycle_multipass_backtrack = NULL;
22509 targetm.sched.first_cycle_multipass_end = NULL;
22510 targetm.sched.first_cycle_multipass_fini = NULL;
22511 break;
22512 }
22513 }
22514
22515 \f
22516 /* Compute the alignment given to a constant that is being placed in memory.
22517 EXP is the constant and ALIGN is the alignment that the object would
22518 ordinarily have.
22519 The value of this function is used instead of that alignment to align
22520 the object. */
22521
22522 int
22523 ix86_constant_alignment (tree exp, int align)
22524 {
22525 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
22526 || TREE_CODE (exp) == INTEGER_CST)
22527 {
22528 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
22529 return 64;
22530 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
22531 return 128;
22532 }
22533 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
22534 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
22535 return BITS_PER_WORD;
22536
22537 return align;
22538 }
22539
22540 /* Compute the alignment for a static variable.
22541 TYPE is the data type, and ALIGN is the alignment that
22542 the object would ordinarily have. The value of this function is used
22543 instead of that alignment to align the object. */
22544
22545 int
22546 ix86_data_alignment (tree type, int align)
22547 {
22548 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
22549
22550 if (AGGREGATE_TYPE_P (type)
22551 && TYPE_SIZE (type)
22552 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
22553 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
22554 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
22555 && align < max_align)
22556 align = max_align;
22557
22558 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
22559 to 16byte boundary. */
22560 if (TARGET_64BIT)
22561 {
22562 if (AGGREGATE_TYPE_P (type)
22563 && TYPE_SIZE (type)
22564 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
22565 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
22566 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
22567 return 128;
22568 }
22569
22570 if (TREE_CODE (type) == ARRAY_TYPE)
22571 {
22572 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
22573 return 64;
22574 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
22575 return 128;
22576 }
22577 else if (TREE_CODE (type) == COMPLEX_TYPE)
22578 {
22579
22580 if (TYPE_MODE (type) == DCmode && align < 64)
22581 return 64;
22582 if ((TYPE_MODE (type) == XCmode
22583 || TYPE_MODE (type) == TCmode) && align < 128)
22584 return 128;
22585 }
22586 else if ((TREE_CODE (type) == RECORD_TYPE
22587 || TREE_CODE (type) == UNION_TYPE
22588 || TREE_CODE (type) == QUAL_UNION_TYPE)
22589 && TYPE_FIELDS (type))
22590 {
22591 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
22592 return 64;
22593 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
22594 return 128;
22595 }
22596 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
22597 || TREE_CODE (type) == INTEGER_TYPE)
22598 {
22599 if (TYPE_MODE (type) == DFmode && align < 64)
22600 return 64;
22601 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
22602 return 128;
22603 }
22604
22605 return align;
22606 }
22607
22608 /* Compute the alignment for a local variable or a stack slot. EXP is
22609 the data type or decl itself, MODE is the widest mode available and
22610 ALIGN is the alignment that the object would ordinarily have. The
22611 value of this macro is used instead of that alignment to align the
22612 object. */
22613
22614 unsigned int
22615 ix86_local_alignment (tree exp, enum machine_mode mode,
22616 unsigned int align)
22617 {
22618 tree type, decl;
22619
22620 if (exp && DECL_P (exp))
22621 {
22622 type = TREE_TYPE (exp);
22623 decl = exp;
22624 }
22625 else
22626 {
22627 type = exp;
22628 decl = NULL;
22629 }
22630
22631 /* Don't do dynamic stack realignment for long long objects with
22632 -mpreferred-stack-boundary=2. */
22633 if (!TARGET_64BIT
22634 && align == 64
22635 && ix86_preferred_stack_boundary < 64
22636 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
22637 && (!type || !TYPE_USER_ALIGN (type))
22638 && (!decl || !DECL_USER_ALIGN (decl)))
22639 align = 32;
22640
22641 /* If TYPE is NULL, we are allocating a stack slot for caller-save
22642 register in MODE. We will return the largest alignment of XF
22643 and DF. */
22644 if (!type)
22645 {
22646 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
22647 align = GET_MODE_ALIGNMENT (DFmode);
22648 return align;
22649 }
22650
22651 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
22652 to 16byte boundary. Exact wording is:
22653
22654 An array uses the same alignment as its elements, except that a local or
22655 global array variable of length at least 16 bytes or
22656 a C99 variable-length array variable always has alignment of at least 16 bytes.
22657
22658 This was added to allow use of aligned SSE instructions at arrays. This
22659 rule is meant for static storage (where compiler can not do the analysis
22660 by itself). We follow it for automatic variables only when convenient.
22661 We fully control everything in the function compiled and functions from
22662 other unit can not rely on the alignment.
22663
22664 Exclude va_list type. It is the common case of local array where
22665 we can not benefit from the alignment. */
22666 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
22667 && TARGET_SSE)
22668 {
22669 if (AGGREGATE_TYPE_P (type)
22670 && (va_list_type_node == NULL_TREE
22671 || (TYPE_MAIN_VARIANT (type)
22672 != TYPE_MAIN_VARIANT (va_list_type_node)))
22673 && TYPE_SIZE (type)
22674 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
22675 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
22676 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
22677 return 128;
22678 }
22679 if (TREE_CODE (type) == ARRAY_TYPE)
22680 {
22681 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
22682 return 64;
22683 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
22684 return 128;
22685 }
22686 else if (TREE_CODE (type) == COMPLEX_TYPE)
22687 {
22688 if (TYPE_MODE (type) == DCmode && align < 64)
22689 return 64;
22690 if ((TYPE_MODE (type) == XCmode
22691 || TYPE_MODE (type) == TCmode) && align < 128)
22692 return 128;
22693 }
22694 else if ((TREE_CODE (type) == RECORD_TYPE
22695 || TREE_CODE (type) == UNION_TYPE
22696 || TREE_CODE (type) == QUAL_UNION_TYPE)
22697 && TYPE_FIELDS (type))
22698 {
22699 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
22700 return 64;
22701 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
22702 return 128;
22703 }
22704 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
22705 || TREE_CODE (type) == INTEGER_TYPE)
22706 {
22707
22708 if (TYPE_MODE (type) == DFmode && align < 64)
22709 return 64;
22710 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
22711 return 128;
22712 }
22713 return align;
22714 }
22715
22716 /* Compute the minimum required alignment for dynamic stack realignment
22717 purposes for a local variable, parameter or a stack slot. EXP is
22718 the data type or decl itself, MODE is its mode and ALIGN is the
22719 alignment that the object would ordinarily have. */
22720
22721 unsigned int
22722 ix86_minimum_alignment (tree exp, enum machine_mode mode,
22723 unsigned int align)
22724 {
22725 tree type, decl;
22726
22727 if (exp && DECL_P (exp))
22728 {
22729 type = TREE_TYPE (exp);
22730 decl = exp;
22731 }
22732 else
22733 {
22734 type = exp;
22735 decl = NULL;
22736 }
22737
22738 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
22739 return align;
22740
22741 /* Don't do dynamic stack realignment for long long objects with
22742 -mpreferred-stack-boundary=2. */
22743 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
22744 && (!type || !TYPE_USER_ALIGN (type))
22745 && (!decl || !DECL_USER_ALIGN (decl)))
22746 return 32;
22747
22748 return align;
22749 }
22750 \f
22751 /* Find a location for the static chain incoming to a nested function.
22752 This is a register, unless all free registers are used by arguments. */
22753
22754 static rtx
22755 ix86_static_chain (const_tree fndecl, bool incoming_p)
22756 {
22757 unsigned regno;
22758
22759 if (!DECL_STATIC_CHAIN (fndecl))
22760 return NULL;
22761
22762 if (TARGET_64BIT)
22763 {
22764 /* We always use R10 in 64-bit mode. */
22765 regno = R10_REG;
22766 }
22767 else
22768 {
22769 tree fntype;
22770 unsigned int ccvt;
22771
22772 /* By default in 32-bit mode we use ECX to pass the static chain. */
22773 regno = CX_REG;
22774
22775 fntype = TREE_TYPE (fndecl);
22776 ccvt = ix86_get_callcvt (fntype);
22777 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) != 0)
22778 {
22779 /* Fastcall functions use ecx/edx for arguments, which leaves
22780 us with EAX for the static chain.
22781 Thiscall functions use ecx for arguments, which also
22782 leaves us with EAX for the static chain. */
22783 regno = AX_REG;
22784 }
22785 else if (ix86_function_regparm (fntype, fndecl) == 3)
22786 {
22787 /* For regparm 3, we have no free call-clobbered registers in
22788 which to store the static chain. In order to implement this,
22789 we have the trampoline push the static chain to the stack.
22790 However, we can't push a value below the return address when
22791 we call the nested function directly, so we have to use an
22792 alternate entry point. For this we use ESI, and have the
22793 alternate entry point push ESI, so that things appear the
22794 same once we're executing the nested function. */
22795 if (incoming_p)
22796 {
22797 if (fndecl == current_function_decl)
22798 ix86_static_chain_on_stack = true;
22799 return gen_frame_mem (SImode,
22800 plus_constant (arg_pointer_rtx, -8));
22801 }
22802 regno = SI_REG;
22803 }
22804 }
22805
22806 return gen_rtx_REG (Pmode, regno);
22807 }
22808
22809 /* Emit RTL insns to initialize the variable parts of a trampoline.
22810 FNDECL is the decl of the target address; M_TRAMP is a MEM for
22811 the trampoline, and CHAIN_VALUE is an RTX for the static chain
22812 to be passed to the target function. */
22813
22814 static void
22815 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
22816 {
22817 rtx mem, fnaddr;
22818 int opcode;
22819 int offset = 0;
22820
22821 fnaddr = XEXP (DECL_RTL (fndecl), 0);
22822
22823 if (TARGET_64BIT)
22824 {
22825 int size;
22826
22827 /* Load the function address to r11. Try to load address using
22828 the shorter movl instead of movabs. We may want to support
22829 movq for kernel mode, but kernel does not use trampolines at
22830 the moment. */
22831 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
22832 {
22833 fnaddr = copy_to_mode_reg (DImode, fnaddr);
22834
22835 mem = adjust_address (m_tramp, HImode, offset);
22836 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
22837
22838 mem = adjust_address (m_tramp, SImode, offset + 2);
22839 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
22840 offset += 6;
22841 }
22842 else
22843 {
22844 mem = adjust_address (m_tramp, HImode, offset);
22845 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
22846
22847 mem = adjust_address (m_tramp, DImode, offset + 2);
22848 emit_move_insn (mem, fnaddr);
22849 offset += 10;
22850 }
22851
22852 /* Load static chain using movabs to r10. Use the
22853 shorter movl instead of movabs for x32. */
22854 if (TARGET_X32)
22855 {
22856 opcode = 0xba41;
22857 size = 6;
22858 }
22859 else
22860 {
22861 opcode = 0xba49;
22862 size = 10;
22863 }
22864
22865 mem = adjust_address (m_tramp, HImode, offset);
22866 emit_move_insn (mem, gen_int_mode (opcode, HImode));
22867
22868 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
22869 emit_move_insn (mem, chain_value);
22870 offset += size;
22871
22872 /* Jump to r11; the last (unused) byte is a nop, only there to
22873 pad the write out to a single 32-bit store. */
22874 mem = adjust_address (m_tramp, SImode, offset);
22875 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
22876 offset += 4;
22877 }
22878 else
22879 {
22880 rtx disp, chain;
22881
22882 /* Depending on the static chain location, either load a register
22883 with a constant, or push the constant to the stack. All of the
22884 instructions are the same size. */
22885 chain = ix86_static_chain (fndecl, true);
22886 if (REG_P (chain))
22887 {
22888 switch (REGNO (chain))
22889 {
22890 case AX_REG:
22891 opcode = 0xb8; break;
22892 case CX_REG:
22893 opcode = 0xb9; break;
22894 default:
22895 gcc_unreachable ();
22896 }
22897 }
22898 else
22899 opcode = 0x68;
22900
22901 mem = adjust_address (m_tramp, QImode, offset);
22902 emit_move_insn (mem, gen_int_mode (opcode, QImode));
22903
22904 mem = adjust_address (m_tramp, SImode, offset + 1);
22905 emit_move_insn (mem, chain_value);
22906 offset += 5;
22907
22908 mem = adjust_address (m_tramp, QImode, offset);
22909 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
22910
22911 mem = adjust_address (m_tramp, SImode, offset + 1);
22912
22913 /* Compute offset from the end of the jmp to the target function.
22914 In the case in which the trampoline stores the static chain on
22915 the stack, we need to skip the first insn which pushes the
22916 (call-saved) register static chain; this push is 1 byte. */
22917 offset += 5;
22918 disp = expand_binop (SImode, sub_optab, fnaddr,
22919 plus_constant (XEXP (m_tramp, 0),
22920 offset - (MEM_P (chain) ? 1 : 0)),
22921 NULL_RTX, 1, OPTAB_DIRECT);
22922 emit_move_insn (mem, disp);
22923 }
22924
22925 gcc_assert (offset <= TRAMPOLINE_SIZE);
22926
22927 #ifdef HAVE_ENABLE_EXECUTE_STACK
22928 #ifdef CHECK_EXECUTE_STACK_ENABLED
22929 if (CHECK_EXECUTE_STACK_ENABLED)
22930 #endif
22931 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
22932 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
22933 #endif
22934 }
22935 \f
22936 /* The following file contains several enumerations and data structures
22937 built from the definitions in i386-builtin-types.def. */
22938
22939 #include "i386-builtin-types.inc"
22940
22941 /* Table for the ix86 builtin non-function types. */
22942 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
22943
22944 /* Retrieve an element from the above table, building some of
22945 the types lazily. */
22946
22947 static tree
22948 ix86_get_builtin_type (enum ix86_builtin_type tcode)
22949 {
22950 unsigned int index;
22951 tree type, itype;
22952
22953 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
22954
22955 type = ix86_builtin_type_tab[(int) tcode];
22956 if (type != NULL)
22957 return type;
22958
22959 gcc_assert (tcode > IX86_BT_LAST_PRIM);
22960 if (tcode <= IX86_BT_LAST_VECT)
22961 {
22962 enum machine_mode mode;
22963
22964 index = tcode - IX86_BT_LAST_PRIM - 1;
22965 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
22966 mode = ix86_builtin_type_vect_mode[index];
22967
22968 type = build_vector_type_for_mode (itype, mode);
22969 }
22970 else
22971 {
22972 int quals;
22973
22974 index = tcode - IX86_BT_LAST_VECT - 1;
22975 if (tcode <= IX86_BT_LAST_PTR)
22976 quals = TYPE_UNQUALIFIED;
22977 else
22978 quals = TYPE_QUAL_CONST;
22979
22980 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
22981 if (quals != TYPE_UNQUALIFIED)
22982 itype = build_qualified_type (itype, quals);
22983
22984 type = build_pointer_type (itype);
22985 }
22986
22987 ix86_builtin_type_tab[(int) tcode] = type;
22988 return type;
22989 }
22990
22991 /* Table for the ix86 builtin function types. */
22992 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
22993
22994 /* Retrieve an element from the above table, building some of
22995 the types lazily. */
22996
22997 static tree
22998 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
22999 {
23000 tree type;
23001
23002 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
23003
23004 type = ix86_builtin_func_type_tab[(int) tcode];
23005 if (type != NULL)
23006 return type;
23007
23008 if (tcode <= IX86_BT_LAST_FUNC)
23009 {
23010 unsigned start = ix86_builtin_func_start[(int) tcode];
23011 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
23012 tree rtype, atype, args = void_list_node;
23013 unsigned i;
23014
23015 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
23016 for (i = after - 1; i > start; --i)
23017 {
23018 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
23019 args = tree_cons (NULL, atype, args);
23020 }
23021
23022 type = build_function_type (rtype, args);
23023 }
23024 else
23025 {
23026 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
23027 enum ix86_builtin_func_type icode;
23028
23029 icode = ix86_builtin_func_alias_base[index];
23030 type = ix86_get_builtin_func_type (icode);
23031 }
23032
23033 ix86_builtin_func_type_tab[(int) tcode] = type;
23034 return type;
23035 }
23036
23037
23038 /* Codes for all the SSE/MMX builtins. */
23039 enum ix86_builtins
23040 {
23041 IX86_BUILTIN_ADDPS,
23042 IX86_BUILTIN_ADDSS,
23043 IX86_BUILTIN_DIVPS,
23044 IX86_BUILTIN_DIVSS,
23045 IX86_BUILTIN_MULPS,
23046 IX86_BUILTIN_MULSS,
23047 IX86_BUILTIN_SUBPS,
23048 IX86_BUILTIN_SUBSS,
23049
23050 IX86_BUILTIN_CMPEQPS,
23051 IX86_BUILTIN_CMPLTPS,
23052 IX86_BUILTIN_CMPLEPS,
23053 IX86_BUILTIN_CMPGTPS,
23054 IX86_BUILTIN_CMPGEPS,
23055 IX86_BUILTIN_CMPNEQPS,
23056 IX86_BUILTIN_CMPNLTPS,
23057 IX86_BUILTIN_CMPNLEPS,
23058 IX86_BUILTIN_CMPNGTPS,
23059 IX86_BUILTIN_CMPNGEPS,
23060 IX86_BUILTIN_CMPORDPS,
23061 IX86_BUILTIN_CMPUNORDPS,
23062 IX86_BUILTIN_CMPEQSS,
23063 IX86_BUILTIN_CMPLTSS,
23064 IX86_BUILTIN_CMPLESS,
23065 IX86_BUILTIN_CMPNEQSS,
23066 IX86_BUILTIN_CMPNLTSS,
23067 IX86_BUILTIN_CMPNLESS,
23068 IX86_BUILTIN_CMPNGTSS,
23069 IX86_BUILTIN_CMPNGESS,
23070 IX86_BUILTIN_CMPORDSS,
23071 IX86_BUILTIN_CMPUNORDSS,
23072
23073 IX86_BUILTIN_COMIEQSS,
23074 IX86_BUILTIN_COMILTSS,
23075 IX86_BUILTIN_COMILESS,
23076 IX86_BUILTIN_COMIGTSS,
23077 IX86_BUILTIN_COMIGESS,
23078 IX86_BUILTIN_COMINEQSS,
23079 IX86_BUILTIN_UCOMIEQSS,
23080 IX86_BUILTIN_UCOMILTSS,
23081 IX86_BUILTIN_UCOMILESS,
23082 IX86_BUILTIN_UCOMIGTSS,
23083 IX86_BUILTIN_UCOMIGESS,
23084 IX86_BUILTIN_UCOMINEQSS,
23085
23086 IX86_BUILTIN_CVTPI2PS,
23087 IX86_BUILTIN_CVTPS2PI,
23088 IX86_BUILTIN_CVTSI2SS,
23089 IX86_BUILTIN_CVTSI642SS,
23090 IX86_BUILTIN_CVTSS2SI,
23091 IX86_BUILTIN_CVTSS2SI64,
23092 IX86_BUILTIN_CVTTPS2PI,
23093 IX86_BUILTIN_CVTTSS2SI,
23094 IX86_BUILTIN_CVTTSS2SI64,
23095
23096 IX86_BUILTIN_MAXPS,
23097 IX86_BUILTIN_MAXSS,
23098 IX86_BUILTIN_MINPS,
23099 IX86_BUILTIN_MINSS,
23100
23101 IX86_BUILTIN_LOADUPS,
23102 IX86_BUILTIN_STOREUPS,
23103 IX86_BUILTIN_MOVSS,
23104
23105 IX86_BUILTIN_MOVHLPS,
23106 IX86_BUILTIN_MOVLHPS,
23107 IX86_BUILTIN_LOADHPS,
23108 IX86_BUILTIN_LOADLPS,
23109 IX86_BUILTIN_STOREHPS,
23110 IX86_BUILTIN_STORELPS,
23111
23112 IX86_BUILTIN_MASKMOVQ,
23113 IX86_BUILTIN_MOVMSKPS,
23114 IX86_BUILTIN_PMOVMSKB,
23115
23116 IX86_BUILTIN_MOVNTPS,
23117 IX86_BUILTIN_MOVNTQ,
23118
23119 IX86_BUILTIN_LOADDQU,
23120 IX86_BUILTIN_STOREDQU,
23121
23122 IX86_BUILTIN_PACKSSWB,
23123 IX86_BUILTIN_PACKSSDW,
23124 IX86_BUILTIN_PACKUSWB,
23125
23126 IX86_BUILTIN_PADDB,
23127 IX86_BUILTIN_PADDW,
23128 IX86_BUILTIN_PADDD,
23129 IX86_BUILTIN_PADDQ,
23130 IX86_BUILTIN_PADDSB,
23131 IX86_BUILTIN_PADDSW,
23132 IX86_BUILTIN_PADDUSB,
23133 IX86_BUILTIN_PADDUSW,
23134 IX86_BUILTIN_PSUBB,
23135 IX86_BUILTIN_PSUBW,
23136 IX86_BUILTIN_PSUBD,
23137 IX86_BUILTIN_PSUBQ,
23138 IX86_BUILTIN_PSUBSB,
23139 IX86_BUILTIN_PSUBSW,
23140 IX86_BUILTIN_PSUBUSB,
23141 IX86_BUILTIN_PSUBUSW,
23142
23143 IX86_BUILTIN_PAND,
23144 IX86_BUILTIN_PANDN,
23145 IX86_BUILTIN_POR,
23146 IX86_BUILTIN_PXOR,
23147
23148 IX86_BUILTIN_PAVGB,
23149 IX86_BUILTIN_PAVGW,
23150
23151 IX86_BUILTIN_PCMPEQB,
23152 IX86_BUILTIN_PCMPEQW,
23153 IX86_BUILTIN_PCMPEQD,
23154 IX86_BUILTIN_PCMPGTB,
23155 IX86_BUILTIN_PCMPGTW,
23156 IX86_BUILTIN_PCMPGTD,
23157
23158 IX86_BUILTIN_PMADDWD,
23159
23160 IX86_BUILTIN_PMAXSW,
23161 IX86_BUILTIN_PMAXUB,
23162 IX86_BUILTIN_PMINSW,
23163 IX86_BUILTIN_PMINUB,
23164
23165 IX86_BUILTIN_PMULHUW,
23166 IX86_BUILTIN_PMULHW,
23167 IX86_BUILTIN_PMULLW,
23168
23169 IX86_BUILTIN_PSADBW,
23170 IX86_BUILTIN_PSHUFW,
23171
23172 IX86_BUILTIN_PSLLW,
23173 IX86_BUILTIN_PSLLD,
23174 IX86_BUILTIN_PSLLQ,
23175 IX86_BUILTIN_PSRAW,
23176 IX86_BUILTIN_PSRAD,
23177 IX86_BUILTIN_PSRLW,
23178 IX86_BUILTIN_PSRLD,
23179 IX86_BUILTIN_PSRLQ,
23180 IX86_BUILTIN_PSLLWI,
23181 IX86_BUILTIN_PSLLDI,
23182 IX86_BUILTIN_PSLLQI,
23183 IX86_BUILTIN_PSRAWI,
23184 IX86_BUILTIN_PSRADI,
23185 IX86_BUILTIN_PSRLWI,
23186 IX86_BUILTIN_PSRLDI,
23187 IX86_BUILTIN_PSRLQI,
23188
23189 IX86_BUILTIN_PUNPCKHBW,
23190 IX86_BUILTIN_PUNPCKHWD,
23191 IX86_BUILTIN_PUNPCKHDQ,
23192 IX86_BUILTIN_PUNPCKLBW,
23193 IX86_BUILTIN_PUNPCKLWD,
23194 IX86_BUILTIN_PUNPCKLDQ,
23195
23196 IX86_BUILTIN_SHUFPS,
23197
23198 IX86_BUILTIN_RCPPS,
23199 IX86_BUILTIN_RCPSS,
23200 IX86_BUILTIN_RSQRTPS,
23201 IX86_BUILTIN_RSQRTPS_NR,
23202 IX86_BUILTIN_RSQRTSS,
23203 IX86_BUILTIN_RSQRTF,
23204 IX86_BUILTIN_SQRTPS,
23205 IX86_BUILTIN_SQRTPS_NR,
23206 IX86_BUILTIN_SQRTSS,
23207
23208 IX86_BUILTIN_UNPCKHPS,
23209 IX86_BUILTIN_UNPCKLPS,
23210
23211 IX86_BUILTIN_ANDPS,
23212 IX86_BUILTIN_ANDNPS,
23213 IX86_BUILTIN_ORPS,
23214 IX86_BUILTIN_XORPS,
23215
23216 IX86_BUILTIN_EMMS,
23217 IX86_BUILTIN_LDMXCSR,
23218 IX86_BUILTIN_STMXCSR,
23219 IX86_BUILTIN_SFENCE,
23220
23221 /* 3DNow! Original */
23222 IX86_BUILTIN_FEMMS,
23223 IX86_BUILTIN_PAVGUSB,
23224 IX86_BUILTIN_PF2ID,
23225 IX86_BUILTIN_PFACC,
23226 IX86_BUILTIN_PFADD,
23227 IX86_BUILTIN_PFCMPEQ,
23228 IX86_BUILTIN_PFCMPGE,
23229 IX86_BUILTIN_PFCMPGT,
23230 IX86_BUILTIN_PFMAX,
23231 IX86_BUILTIN_PFMIN,
23232 IX86_BUILTIN_PFMUL,
23233 IX86_BUILTIN_PFRCP,
23234 IX86_BUILTIN_PFRCPIT1,
23235 IX86_BUILTIN_PFRCPIT2,
23236 IX86_BUILTIN_PFRSQIT1,
23237 IX86_BUILTIN_PFRSQRT,
23238 IX86_BUILTIN_PFSUB,
23239 IX86_BUILTIN_PFSUBR,
23240 IX86_BUILTIN_PI2FD,
23241 IX86_BUILTIN_PMULHRW,
23242
23243 /* 3DNow! Athlon Extensions */
23244 IX86_BUILTIN_PF2IW,
23245 IX86_BUILTIN_PFNACC,
23246 IX86_BUILTIN_PFPNACC,
23247 IX86_BUILTIN_PI2FW,
23248 IX86_BUILTIN_PSWAPDSI,
23249 IX86_BUILTIN_PSWAPDSF,
23250
23251 /* SSE2 */
23252 IX86_BUILTIN_ADDPD,
23253 IX86_BUILTIN_ADDSD,
23254 IX86_BUILTIN_DIVPD,
23255 IX86_BUILTIN_DIVSD,
23256 IX86_BUILTIN_MULPD,
23257 IX86_BUILTIN_MULSD,
23258 IX86_BUILTIN_SUBPD,
23259 IX86_BUILTIN_SUBSD,
23260
23261 IX86_BUILTIN_CMPEQPD,
23262 IX86_BUILTIN_CMPLTPD,
23263 IX86_BUILTIN_CMPLEPD,
23264 IX86_BUILTIN_CMPGTPD,
23265 IX86_BUILTIN_CMPGEPD,
23266 IX86_BUILTIN_CMPNEQPD,
23267 IX86_BUILTIN_CMPNLTPD,
23268 IX86_BUILTIN_CMPNLEPD,
23269 IX86_BUILTIN_CMPNGTPD,
23270 IX86_BUILTIN_CMPNGEPD,
23271 IX86_BUILTIN_CMPORDPD,
23272 IX86_BUILTIN_CMPUNORDPD,
23273 IX86_BUILTIN_CMPEQSD,
23274 IX86_BUILTIN_CMPLTSD,
23275 IX86_BUILTIN_CMPLESD,
23276 IX86_BUILTIN_CMPNEQSD,
23277 IX86_BUILTIN_CMPNLTSD,
23278 IX86_BUILTIN_CMPNLESD,
23279 IX86_BUILTIN_CMPORDSD,
23280 IX86_BUILTIN_CMPUNORDSD,
23281
23282 IX86_BUILTIN_COMIEQSD,
23283 IX86_BUILTIN_COMILTSD,
23284 IX86_BUILTIN_COMILESD,
23285 IX86_BUILTIN_COMIGTSD,
23286 IX86_BUILTIN_COMIGESD,
23287 IX86_BUILTIN_COMINEQSD,
23288 IX86_BUILTIN_UCOMIEQSD,
23289 IX86_BUILTIN_UCOMILTSD,
23290 IX86_BUILTIN_UCOMILESD,
23291 IX86_BUILTIN_UCOMIGTSD,
23292 IX86_BUILTIN_UCOMIGESD,
23293 IX86_BUILTIN_UCOMINEQSD,
23294
23295 IX86_BUILTIN_MAXPD,
23296 IX86_BUILTIN_MAXSD,
23297 IX86_BUILTIN_MINPD,
23298 IX86_BUILTIN_MINSD,
23299
23300 IX86_BUILTIN_ANDPD,
23301 IX86_BUILTIN_ANDNPD,
23302 IX86_BUILTIN_ORPD,
23303 IX86_BUILTIN_XORPD,
23304
23305 IX86_BUILTIN_SQRTPD,
23306 IX86_BUILTIN_SQRTSD,
23307
23308 IX86_BUILTIN_UNPCKHPD,
23309 IX86_BUILTIN_UNPCKLPD,
23310
23311 IX86_BUILTIN_SHUFPD,
23312
23313 IX86_BUILTIN_LOADUPD,
23314 IX86_BUILTIN_STOREUPD,
23315 IX86_BUILTIN_MOVSD,
23316
23317 IX86_BUILTIN_LOADHPD,
23318 IX86_BUILTIN_LOADLPD,
23319
23320 IX86_BUILTIN_CVTDQ2PD,
23321 IX86_BUILTIN_CVTDQ2PS,
23322
23323 IX86_BUILTIN_CVTPD2DQ,
23324 IX86_BUILTIN_CVTPD2PI,
23325 IX86_BUILTIN_CVTPD2PS,
23326 IX86_BUILTIN_CVTTPD2DQ,
23327 IX86_BUILTIN_CVTTPD2PI,
23328
23329 IX86_BUILTIN_CVTPI2PD,
23330 IX86_BUILTIN_CVTSI2SD,
23331 IX86_BUILTIN_CVTSI642SD,
23332
23333 IX86_BUILTIN_CVTSD2SI,
23334 IX86_BUILTIN_CVTSD2SI64,
23335 IX86_BUILTIN_CVTSD2SS,
23336 IX86_BUILTIN_CVTSS2SD,
23337 IX86_BUILTIN_CVTTSD2SI,
23338 IX86_BUILTIN_CVTTSD2SI64,
23339
23340 IX86_BUILTIN_CVTPS2DQ,
23341 IX86_BUILTIN_CVTPS2PD,
23342 IX86_BUILTIN_CVTTPS2DQ,
23343
23344 IX86_BUILTIN_MOVNTI,
23345 IX86_BUILTIN_MOVNTPD,
23346 IX86_BUILTIN_MOVNTDQ,
23347
23348 IX86_BUILTIN_MOVQ128,
23349
23350 /* SSE2 MMX */
23351 IX86_BUILTIN_MASKMOVDQU,
23352 IX86_BUILTIN_MOVMSKPD,
23353 IX86_BUILTIN_PMOVMSKB128,
23354
23355 IX86_BUILTIN_PACKSSWB128,
23356 IX86_BUILTIN_PACKSSDW128,
23357 IX86_BUILTIN_PACKUSWB128,
23358
23359 IX86_BUILTIN_PADDB128,
23360 IX86_BUILTIN_PADDW128,
23361 IX86_BUILTIN_PADDD128,
23362 IX86_BUILTIN_PADDQ128,
23363 IX86_BUILTIN_PADDSB128,
23364 IX86_BUILTIN_PADDSW128,
23365 IX86_BUILTIN_PADDUSB128,
23366 IX86_BUILTIN_PADDUSW128,
23367 IX86_BUILTIN_PSUBB128,
23368 IX86_BUILTIN_PSUBW128,
23369 IX86_BUILTIN_PSUBD128,
23370 IX86_BUILTIN_PSUBQ128,
23371 IX86_BUILTIN_PSUBSB128,
23372 IX86_BUILTIN_PSUBSW128,
23373 IX86_BUILTIN_PSUBUSB128,
23374 IX86_BUILTIN_PSUBUSW128,
23375
23376 IX86_BUILTIN_PAND128,
23377 IX86_BUILTIN_PANDN128,
23378 IX86_BUILTIN_POR128,
23379 IX86_BUILTIN_PXOR128,
23380
23381 IX86_BUILTIN_PAVGB128,
23382 IX86_BUILTIN_PAVGW128,
23383
23384 IX86_BUILTIN_PCMPEQB128,
23385 IX86_BUILTIN_PCMPEQW128,
23386 IX86_BUILTIN_PCMPEQD128,
23387 IX86_BUILTIN_PCMPGTB128,
23388 IX86_BUILTIN_PCMPGTW128,
23389 IX86_BUILTIN_PCMPGTD128,
23390
23391 IX86_BUILTIN_PMADDWD128,
23392
23393 IX86_BUILTIN_PMAXSW128,
23394 IX86_BUILTIN_PMAXUB128,
23395 IX86_BUILTIN_PMINSW128,
23396 IX86_BUILTIN_PMINUB128,
23397
23398 IX86_BUILTIN_PMULUDQ,
23399 IX86_BUILTIN_PMULUDQ128,
23400 IX86_BUILTIN_PMULHUW128,
23401 IX86_BUILTIN_PMULHW128,
23402 IX86_BUILTIN_PMULLW128,
23403
23404 IX86_BUILTIN_PSADBW128,
23405 IX86_BUILTIN_PSHUFHW,
23406 IX86_BUILTIN_PSHUFLW,
23407 IX86_BUILTIN_PSHUFD,
23408
23409 IX86_BUILTIN_PSLLDQI128,
23410 IX86_BUILTIN_PSLLWI128,
23411 IX86_BUILTIN_PSLLDI128,
23412 IX86_BUILTIN_PSLLQI128,
23413 IX86_BUILTIN_PSRAWI128,
23414 IX86_BUILTIN_PSRADI128,
23415 IX86_BUILTIN_PSRLDQI128,
23416 IX86_BUILTIN_PSRLWI128,
23417 IX86_BUILTIN_PSRLDI128,
23418 IX86_BUILTIN_PSRLQI128,
23419
23420 IX86_BUILTIN_PSLLDQ128,
23421 IX86_BUILTIN_PSLLW128,
23422 IX86_BUILTIN_PSLLD128,
23423 IX86_BUILTIN_PSLLQ128,
23424 IX86_BUILTIN_PSRAW128,
23425 IX86_BUILTIN_PSRAD128,
23426 IX86_BUILTIN_PSRLW128,
23427 IX86_BUILTIN_PSRLD128,
23428 IX86_BUILTIN_PSRLQ128,
23429
23430 IX86_BUILTIN_PUNPCKHBW128,
23431 IX86_BUILTIN_PUNPCKHWD128,
23432 IX86_BUILTIN_PUNPCKHDQ128,
23433 IX86_BUILTIN_PUNPCKHQDQ128,
23434 IX86_BUILTIN_PUNPCKLBW128,
23435 IX86_BUILTIN_PUNPCKLWD128,
23436 IX86_BUILTIN_PUNPCKLDQ128,
23437 IX86_BUILTIN_PUNPCKLQDQ128,
23438
23439 IX86_BUILTIN_CLFLUSH,
23440 IX86_BUILTIN_MFENCE,
23441 IX86_BUILTIN_LFENCE,
23442 IX86_BUILTIN_PAUSE,
23443
23444 IX86_BUILTIN_BSRSI,
23445 IX86_BUILTIN_BSRDI,
23446 IX86_BUILTIN_RDPMC,
23447 IX86_BUILTIN_RDTSC,
23448 IX86_BUILTIN_RDTSCP,
23449 IX86_BUILTIN_ROLQI,
23450 IX86_BUILTIN_ROLHI,
23451 IX86_BUILTIN_RORQI,
23452 IX86_BUILTIN_RORHI,
23453
23454 /* SSE3. */
23455 IX86_BUILTIN_ADDSUBPS,
23456 IX86_BUILTIN_HADDPS,
23457 IX86_BUILTIN_HSUBPS,
23458 IX86_BUILTIN_MOVSHDUP,
23459 IX86_BUILTIN_MOVSLDUP,
23460 IX86_BUILTIN_ADDSUBPD,
23461 IX86_BUILTIN_HADDPD,
23462 IX86_BUILTIN_HSUBPD,
23463 IX86_BUILTIN_LDDQU,
23464
23465 IX86_BUILTIN_MONITOR,
23466 IX86_BUILTIN_MWAIT,
23467
23468 /* SSSE3. */
23469 IX86_BUILTIN_PHADDW,
23470 IX86_BUILTIN_PHADDD,
23471 IX86_BUILTIN_PHADDSW,
23472 IX86_BUILTIN_PHSUBW,
23473 IX86_BUILTIN_PHSUBD,
23474 IX86_BUILTIN_PHSUBSW,
23475 IX86_BUILTIN_PMADDUBSW,
23476 IX86_BUILTIN_PMULHRSW,
23477 IX86_BUILTIN_PSHUFB,
23478 IX86_BUILTIN_PSIGNB,
23479 IX86_BUILTIN_PSIGNW,
23480 IX86_BUILTIN_PSIGND,
23481 IX86_BUILTIN_PALIGNR,
23482 IX86_BUILTIN_PABSB,
23483 IX86_BUILTIN_PABSW,
23484 IX86_BUILTIN_PABSD,
23485
23486 IX86_BUILTIN_PHADDW128,
23487 IX86_BUILTIN_PHADDD128,
23488 IX86_BUILTIN_PHADDSW128,
23489 IX86_BUILTIN_PHSUBW128,
23490 IX86_BUILTIN_PHSUBD128,
23491 IX86_BUILTIN_PHSUBSW128,
23492 IX86_BUILTIN_PMADDUBSW128,
23493 IX86_BUILTIN_PMULHRSW128,
23494 IX86_BUILTIN_PSHUFB128,
23495 IX86_BUILTIN_PSIGNB128,
23496 IX86_BUILTIN_PSIGNW128,
23497 IX86_BUILTIN_PSIGND128,
23498 IX86_BUILTIN_PALIGNR128,
23499 IX86_BUILTIN_PABSB128,
23500 IX86_BUILTIN_PABSW128,
23501 IX86_BUILTIN_PABSD128,
23502
23503 /* AMDFAM10 - SSE4A New Instructions. */
23504 IX86_BUILTIN_MOVNTSD,
23505 IX86_BUILTIN_MOVNTSS,
23506 IX86_BUILTIN_EXTRQI,
23507 IX86_BUILTIN_EXTRQ,
23508 IX86_BUILTIN_INSERTQI,
23509 IX86_BUILTIN_INSERTQ,
23510
23511 /* SSE4.1. */
23512 IX86_BUILTIN_BLENDPD,
23513 IX86_BUILTIN_BLENDPS,
23514 IX86_BUILTIN_BLENDVPD,
23515 IX86_BUILTIN_BLENDVPS,
23516 IX86_BUILTIN_PBLENDVB128,
23517 IX86_BUILTIN_PBLENDW128,
23518
23519 IX86_BUILTIN_DPPD,
23520 IX86_BUILTIN_DPPS,
23521
23522 IX86_BUILTIN_INSERTPS128,
23523
23524 IX86_BUILTIN_MOVNTDQA,
23525 IX86_BUILTIN_MPSADBW128,
23526 IX86_BUILTIN_PACKUSDW128,
23527 IX86_BUILTIN_PCMPEQQ,
23528 IX86_BUILTIN_PHMINPOSUW128,
23529
23530 IX86_BUILTIN_PMAXSB128,
23531 IX86_BUILTIN_PMAXSD128,
23532 IX86_BUILTIN_PMAXUD128,
23533 IX86_BUILTIN_PMAXUW128,
23534
23535 IX86_BUILTIN_PMINSB128,
23536 IX86_BUILTIN_PMINSD128,
23537 IX86_BUILTIN_PMINUD128,
23538 IX86_BUILTIN_PMINUW128,
23539
23540 IX86_BUILTIN_PMOVSXBW128,
23541 IX86_BUILTIN_PMOVSXBD128,
23542 IX86_BUILTIN_PMOVSXBQ128,
23543 IX86_BUILTIN_PMOVSXWD128,
23544 IX86_BUILTIN_PMOVSXWQ128,
23545 IX86_BUILTIN_PMOVSXDQ128,
23546
23547 IX86_BUILTIN_PMOVZXBW128,
23548 IX86_BUILTIN_PMOVZXBD128,
23549 IX86_BUILTIN_PMOVZXBQ128,
23550 IX86_BUILTIN_PMOVZXWD128,
23551 IX86_BUILTIN_PMOVZXWQ128,
23552 IX86_BUILTIN_PMOVZXDQ128,
23553
23554 IX86_BUILTIN_PMULDQ128,
23555 IX86_BUILTIN_PMULLD128,
23556
23557 IX86_BUILTIN_ROUNDPD,
23558 IX86_BUILTIN_ROUNDPS,
23559 IX86_BUILTIN_ROUNDSD,
23560 IX86_BUILTIN_ROUNDSS,
23561
23562 IX86_BUILTIN_FLOORPD,
23563 IX86_BUILTIN_CEILPD,
23564 IX86_BUILTIN_TRUNCPD,
23565 IX86_BUILTIN_RINTPD,
23566 IX86_BUILTIN_FLOORPS,
23567 IX86_BUILTIN_CEILPS,
23568 IX86_BUILTIN_TRUNCPS,
23569 IX86_BUILTIN_RINTPS,
23570
23571 IX86_BUILTIN_PTESTZ,
23572 IX86_BUILTIN_PTESTC,
23573 IX86_BUILTIN_PTESTNZC,
23574
23575 IX86_BUILTIN_VEC_INIT_V2SI,
23576 IX86_BUILTIN_VEC_INIT_V4HI,
23577 IX86_BUILTIN_VEC_INIT_V8QI,
23578 IX86_BUILTIN_VEC_EXT_V2DF,
23579 IX86_BUILTIN_VEC_EXT_V2DI,
23580 IX86_BUILTIN_VEC_EXT_V4SF,
23581 IX86_BUILTIN_VEC_EXT_V4SI,
23582 IX86_BUILTIN_VEC_EXT_V8HI,
23583 IX86_BUILTIN_VEC_EXT_V2SI,
23584 IX86_BUILTIN_VEC_EXT_V4HI,
23585 IX86_BUILTIN_VEC_EXT_V16QI,
23586 IX86_BUILTIN_VEC_SET_V2DI,
23587 IX86_BUILTIN_VEC_SET_V4SF,
23588 IX86_BUILTIN_VEC_SET_V4SI,
23589 IX86_BUILTIN_VEC_SET_V8HI,
23590 IX86_BUILTIN_VEC_SET_V4HI,
23591 IX86_BUILTIN_VEC_SET_V16QI,
23592
23593 IX86_BUILTIN_VEC_PACK_SFIX,
23594
23595 /* SSE4.2. */
23596 IX86_BUILTIN_CRC32QI,
23597 IX86_BUILTIN_CRC32HI,
23598 IX86_BUILTIN_CRC32SI,
23599 IX86_BUILTIN_CRC32DI,
23600
23601 IX86_BUILTIN_PCMPESTRI128,
23602 IX86_BUILTIN_PCMPESTRM128,
23603 IX86_BUILTIN_PCMPESTRA128,
23604 IX86_BUILTIN_PCMPESTRC128,
23605 IX86_BUILTIN_PCMPESTRO128,
23606 IX86_BUILTIN_PCMPESTRS128,
23607 IX86_BUILTIN_PCMPESTRZ128,
23608 IX86_BUILTIN_PCMPISTRI128,
23609 IX86_BUILTIN_PCMPISTRM128,
23610 IX86_BUILTIN_PCMPISTRA128,
23611 IX86_BUILTIN_PCMPISTRC128,
23612 IX86_BUILTIN_PCMPISTRO128,
23613 IX86_BUILTIN_PCMPISTRS128,
23614 IX86_BUILTIN_PCMPISTRZ128,
23615
23616 IX86_BUILTIN_PCMPGTQ,
23617
23618 /* AES instructions */
23619 IX86_BUILTIN_AESENC128,
23620 IX86_BUILTIN_AESENCLAST128,
23621 IX86_BUILTIN_AESDEC128,
23622 IX86_BUILTIN_AESDECLAST128,
23623 IX86_BUILTIN_AESIMC128,
23624 IX86_BUILTIN_AESKEYGENASSIST128,
23625
23626 /* PCLMUL instruction */
23627 IX86_BUILTIN_PCLMULQDQ128,
23628
23629 /* AVX */
23630 IX86_BUILTIN_ADDPD256,
23631 IX86_BUILTIN_ADDPS256,
23632 IX86_BUILTIN_ADDSUBPD256,
23633 IX86_BUILTIN_ADDSUBPS256,
23634 IX86_BUILTIN_ANDPD256,
23635 IX86_BUILTIN_ANDPS256,
23636 IX86_BUILTIN_ANDNPD256,
23637 IX86_BUILTIN_ANDNPS256,
23638 IX86_BUILTIN_BLENDPD256,
23639 IX86_BUILTIN_BLENDPS256,
23640 IX86_BUILTIN_BLENDVPD256,
23641 IX86_BUILTIN_BLENDVPS256,
23642 IX86_BUILTIN_DIVPD256,
23643 IX86_BUILTIN_DIVPS256,
23644 IX86_BUILTIN_DPPS256,
23645 IX86_BUILTIN_HADDPD256,
23646 IX86_BUILTIN_HADDPS256,
23647 IX86_BUILTIN_HSUBPD256,
23648 IX86_BUILTIN_HSUBPS256,
23649 IX86_BUILTIN_MAXPD256,
23650 IX86_BUILTIN_MAXPS256,
23651 IX86_BUILTIN_MINPD256,
23652 IX86_BUILTIN_MINPS256,
23653 IX86_BUILTIN_MULPD256,
23654 IX86_BUILTIN_MULPS256,
23655 IX86_BUILTIN_ORPD256,
23656 IX86_BUILTIN_ORPS256,
23657 IX86_BUILTIN_SHUFPD256,
23658 IX86_BUILTIN_SHUFPS256,
23659 IX86_BUILTIN_SUBPD256,
23660 IX86_BUILTIN_SUBPS256,
23661 IX86_BUILTIN_XORPD256,
23662 IX86_BUILTIN_XORPS256,
23663 IX86_BUILTIN_CMPSD,
23664 IX86_BUILTIN_CMPSS,
23665 IX86_BUILTIN_CMPPD,
23666 IX86_BUILTIN_CMPPS,
23667 IX86_BUILTIN_CMPPD256,
23668 IX86_BUILTIN_CMPPS256,
23669 IX86_BUILTIN_CVTDQ2PD256,
23670 IX86_BUILTIN_CVTDQ2PS256,
23671 IX86_BUILTIN_CVTPD2PS256,
23672 IX86_BUILTIN_CVTPS2DQ256,
23673 IX86_BUILTIN_CVTPS2PD256,
23674 IX86_BUILTIN_CVTTPD2DQ256,
23675 IX86_BUILTIN_CVTPD2DQ256,
23676 IX86_BUILTIN_CVTTPS2DQ256,
23677 IX86_BUILTIN_EXTRACTF128PD256,
23678 IX86_BUILTIN_EXTRACTF128PS256,
23679 IX86_BUILTIN_EXTRACTF128SI256,
23680 IX86_BUILTIN_VZEROALL,
23681 IX86_BUILTIN_VZEROUPPER,
23682 IX86_BUILTIN_VPERMILVARPD,
23683 IX86_BUILTIN_VPERMILVARPS,
23684 IX86_BUILTIN_VPERMILVARPD256,
23685 IX86_BUILTIN_VPERMILVARPS256,
23686 IX86_BUILTIN_VPERMILPD,
23687 IX86_BUILTIN_VPERMILPS,
23688 IX86_BUILTIN_VPERMILPD256,
23689 IX86_BUILTIN_VPERMILPS256,
23690 IX86_BUILTIN_VPERMIL2PD,
23691 IX86_BUILTIN_VPERMIL2PS,
23692 IX86_BUILTIN_VPERMIL2PD256,
23693 IX86_BUILTIN_VPERMIL2PS256,
23694 IX86_BUILTIN_VPERM2F128PD256,
23695 IX86_BUILTIN_VPERM2F128PS256,
23696 IX86_BUILTIN_VPERM2F128SI256,
23697 IX86_BUILTIN_VBROADCASTSS,
23698 IX86_BUILTIN_VBROADCASTSD256,
23699 IX86_BUILTIN_VBROADCASTSS256,
23700 IX86_BUILTIN_VBROADCASTPD256,
23701 IX86_BUILTIN_VBROADCASTPS256,
23702 IX86_BUILTIN_VINSERTF128PD256,
23703 IX86_BUILTIN_VINSERTF128PS256,
23704 IX86_BUILTIN_VINSERTF128SI256,
23705 IX86_BUILTIN_LOADUPD256,
23706 IX86_BUILTIN_LOADUPS256,
23707 IX86_BUILTIN_STOREUPD256,
23708 IX86_BUILTIN_STOREUPS256,
23709 IX86_BUILTIN_LDDQU256,
23710 IX86_BUILTIN_MOVNTDQ256,
23711 IX86_BUILTIN_MOVNTPD256,
23712 IX86_BUILTIN_MOVNTPS256,
23713 IX86_BUILTIN_LOADDQU256,
23714 IX86_BUILTIN_STOREDQU256,
23715 IX86_BUILTIN_MASKLOADPD,
23716 IX86_BUILTIN_MASKLOADPS,
23717 IX86_BUILTIN_MASKSTOREPD,
23718 IX86_BUILTIN_MASKSTOREPS,
23719 IX86_BUILTIN_MASKLOADPD256,
23720 IX86_BUILTIN_MASKLOADPS256,
23721 IX86_BUILTIN_MASKSTOREPD256,
23722 IX86_BUILTIN_MASKSTOREPS256,
23723 IX86_BUILTIN_MOVSHDUP256,
23724 IX86_BUILTIN_MOVSLDUP256,
23725 IX86_BUILTIN_MOVDDUP256,
23726
23727 IX86_BUILTIN_SQRTPD256,
23728 IX86_BUILTIN_SQRTPS256,
23729 IX86_BUILTIN_SQRTPS_NR256,
23730 IX86_BUILTIN_RSQRTPS256,
23731 IX86_BUILTIN_RSQRTPS_NR256,
23732
23733 IX86_BUILTIN_RCPPS256,
23734
23735 IX86_BUILTIN_ROUNDPD256,
23736 IX86_BUILTIN_ROUNDPS256,
23737
23738 IX86_BUILTIN_FLOORPD256,
23739 IX86_BUILTIN_CEILPD256,
23740 IX86_BUILTIN_TRUNCPD256,
23741 IX86_BUILTIN_RINTPD256,
23742 IX86_BUILTIN_FLOORPS256,
23743 IX86_BUILTIN_CEILPS256,
23744 IX86_BUILTIN_TRUNCPS256,
23745 IX86_BUILTIN_RINTPS256,
23746
23747 IX86_BUILTIN_UNPCKHPD256,
23748 IX86_BUILTIN_UNPCKLPD256,
23749 IX86_BUILTIN_UNPCKHPS256,
23750 IX86_BUILTIN_UNPCKLPS256,
23751
23752 IX86_BUILTIN_SI256_SI,
23753 IX86_BUILTIN_PS256_PS,
23754 IX86_BUILTIN_PD256_PD,
23755 IX86_BUILTIN_SI_SI256,
23756 IX86_BUILTIN_PS_PS256,
23757 IX86_BUILTIN_PD_PD256,
23758
23759 IX86_BUILTIN_VTESTZPD,
23760 IX86_BUILTIN_VTESTCPD,
23761 IX86_BUILTIN_VTESTNZCPD,
23762 IX86_BUILTIN_VTESTZPS,
23763 IX86_BUILTIN_VTESTCPS,
23764 IX86_BUILTIN_VTESTNZCPS,
23765 IX86_BUILTIN_VTESTZPD256,
23766 IX86_BUILTIN_VTESTCPD256,
23767 IX86_BUILTIN_VTESTNZCPD256,
23768 IX86_BUILTIN_VTESTZPS256,
23769 IX86_BUILTIN_VTESTCPS256,
23770 IX86_BUILTIN_VTESTNZCPS256,
23771 IX86_BUILTIN_PTESTZ256,
23772 IX86_BUILTIN_PTESTC256,
23773 IX86_BUILTIN_PTESTNZC256,
23774
23775 IX86_BUILTIN_MOVMSKPD256,
23776 IX86_BUILTIN_MOVMSKPS256,
23777
23778 /* TFmode support builtins. */
23779 IX86_BUILTIN_INFQ,
23780 IX86_BUILTIN_HUGE_VALQ,
23781 IX86_BUILTIN_FABSQ,
23782 IX86_BUILTIN_COPYSIGNQ,
23783
23784 /* Vectorizer support builtins. */
23785 IX86_BUILTIN_CPYSGNPS,
23786 IX86_BUILTIN_CPYSGNPD,
23787 IX86_BUILTIN_CPYSGNPS256,
23788 IX86_BUILTIN_CPYSGNPD256,
23789
23790 IX86_BUILTIN_CVTUDQ2PS,
23791
23792 IX86_BUILTIN_VEC_PERM_V2DF,
23793 IX86_BUILTIN_VEC_PERM_V4SF,
23794 IX86_BUILTIN_VEC_PERM_V2DI,
23795 IX86_BUILTIN_VEC_PERM_V4SI,
23796 IX86_BUILTIN_VEC_PERM_V8HI,
23797 IX86_BUILTIN_VEC_PERM_V16QI,
23798 IX86_BUILTIN_VEC_PERM_V2DI_U,
23799 IX86_BUILTIN_VEC_PERM_V4SI_U,
23800 IX86_BUILTIN_VEC_PERM_V8HI_U,
23801 IX86_BUILTIN_VEC_PERM_V16QI_U,
23802 IX86_BUILTIN_VEC_PERM_V4DF,
23803 IX86_BUILTIN_VEC_PERM_V8SF,
23804
23805 /* FMA4 and XOP instructions. */
23806 IX86_BUILTIN_VFMADDSS,
23807 IX86_BUILTIN_VFMADDSD,
23808 IX86_BUILTIN_VFMADDPS,
23809 IX86_BUILTIN_VFMADDPD,
23810 IX86_BUILTIN_VFMADDPS256,
23811 IX86_BUILTIN_VFMADDPD256,
23812 IX86_BUILTIN_VFMADDSUBPS,
23813 IX86_BUILTIN_VFMADDSUBPD,
23814 IX86_BUILTIN_VFMADDSUBPS256,
23815 IX86_BUILTIN_VFMADDSUBPD256,
23816
23817 IX86_BUILTIN_VPCMOV,
23818 IX86_BUILTIN_VPCMOV_V2DI,
23819 IX86_BUILTIN_VPCMOV_V4SI,
23820 IX86_BUILTIN_VPCMOV_V8HI,
23821 IX86_BUILTIN_VPCMOV_V16QI,
23822 IX86_BUILTIN_VPCMOV_V4SF,
23823 IX86_BUILTIN_VPCMOV_V2DF,
23824 IX86_BUILTIN_VPCMOV256,
23825 IX86_BUILTIN_VPCMOV_V4DI256,
23826 IX86_BUILTIN_VPCMOV_V8SI256,
23827 IX86_BUILTIN_VPCMOV_V16HI256,
23828 IX86_BUILTIN_VPCMOV_V32QI256,
23829 IX86_BUILTIN_VPCMOV_V8SF256,
23830 IX86_BUILTIN_VPCMOV_V4DF256,
23831
23832 IX86_BUILTIN_VPPERM,
23833
23834 IX86_BUILTIN_VPMACSSWW,
23835 IX86_BUILTIN_VPMACSWW,
23836 IX86_BUILTIN_VPMACSSWD,
23837 IX86_BUILTIN_VPMACSWD,
23838 IX86_BUILTIN_VPMACSSDD,
23839 IX86_BUILTIN_VPMACSDD,
23840 IX86_BUILTIN_VPMACSSDQL,
23841 IX86_BUILTIN_VPMACSSDQH,
23842 IX86_BUILTIN_VPMACSDQL,
23843 IX86_BUILTIN_VPMACSDQH,
23844 IX86_BUILTIN_VPMADCSSWD,
23845 IX86_BUILTIN_VPMADCSWD,
23846
23847 IX86_BUILTIN_VPHADDBW,
23848 IX86_BUILTIN_VPHADDBD,
23849 IX86_BUILTIN_VPHADDBQ,
23850 IX86_BUILTIN_VPHADDWD,
23851 IX86_BUILTIN_VPHADDWQ,
23852 IX86_BUILTIN_VPHADDDQ,
23853 IX86_BUILTIN_VPHADDUBW,
23854 IX86_BUILTIN_VPHADDUBD,
23855 IX86_BUILTIN_VPHADDUBQ,
23856 IX86_BUILTIN_VPHADDUWD,
23857 IX86_BUILTIN_VPHADDUWQ,
23858 IX86_BUILTIN_VPHADDUDQ,
23859 IX86_BUILTIN_VPHSUBBW,
23860 IX86_BUILTIN_VPHSUBWD,
23861 IX86_BUILTIN_VPHSUBDQ,
23862
23863 IX86_BUILTIN_VPROTB,
23864 IX86_BUILTIN_VPROTW,
23865 IX86_BUILTIN_VPROTD,
23866 IX86_BUILTIN_VPROTQ,
23867 IX86_BUILTIN_VPROTB_IMM,
23868 IX86_BUILTIN_VPROTW_IMM,
23869 IX86_BUILTIN_VPROTD_IMM,
23870 IX86_BUILTIN_VPROTQ_IMM,
23871
23872 IX86_BUILTIN_VPSHLB,
23873 IX86_BUILTIN_VPSHLW,
23874 IX86_BUILTIN_VPSHLD,
23875 IX86_BUILTIN_VPSHLQ,
23876 IX86_BUILTIN_VPSHAB,
23877 IX86_BUILTIN_VPSHAW,
23878 IX86_BUILTIN_VPSHAD,
23879 IX86_BUILTIN_VPSHAQ,
23880
23881 IX86_BUILTIN_VFRCZSS,
23882 IX86_BUILTIN_VFRCZSD,
23883 IX86_BUILTIN_VFRCZPS,
23884 IX86_BUILTIN_VFRCZPD,
23885 IX86_BUILTIN_VFRCZPS256,
23886 IX86_BUILTIN_VFRCZPD256,
23887
23888 IX86_BUILTIN_VPCOMEQUB,
23889 IX86_BUILTIN_VPCOMNEUB,
23890 IX86_BUILTIN_VPCOMLTUB,
23891 IX86_BUILTIN_VPCOMLEUB,
23892 IX86_BUILTIN_VPCOMGTUB,
23893 IX86_BUILTIN_VPCOMGEUB,
23894 IX86_BUILTIN_VPCOMFALSEUB,
23895 IX86_BUILTIN_VPCOMTRUEUB,
23896
23897 IX86_BUILTIN_VPCOMEQUW,
23898 IX86_BUILTIN_VPCOMNEUW,
23899 IX86_BUILTIN_VPCOMLTUW,
23900 IX86_BUILTIN_VPCOMLEUW,
23901 IX86_BUILTIN_VPCOMGTUW,
23902 IX86_BUILTIN_VPCOMGEUW,
23903 IX86_BUILTIN_VPCOMFALSEUW,
23904 IX86_BUILTIN_VPCOMTRUEUW,
23905
23906 IX86_BUILTIN_VPCOMEQUD,
23907 IX86_BUILTIN_VPCOMNEUD,
23908 IX86_BUILTIN_VPCOMLTUD,
23909 IX86_BUILTIN_VPCOMLEUD,
23910 IX86_BUILTIN_VPCOMGTUD,
23911 IX86_BUILTIN_VPCOMGEUD,
23912 IX86_BUILTIN_VPCOMFALSEUD,
23913 IX86_BUILTIN_VPCOMTRUEUD,
23914
23915 IX86_BUILTIN_VPCOMEQUQ,
23916 IX86_BUILTIN_VPCOMNEUQ,
23917 IX86_BUILTIN_VPCOMLTUQ,
23918 IX86_BUILTIN_VPCOMLEUQ,
23919 IX86_BUILTIN_VPCOMGTUQ,
23920 IX86_BUILTIN_VPCOMGEUQ,
23921 IX86_BUILTIN_VPCOMFALSEUQ,
23922 IX86_BUILTIN_VPCOMTRUEUQ,
23923
23924 IX86_BUILTIN_VPCOMEQB,
23925 IX86_BUILTIN_VPCOMNEB,
23926 IX86_BUILTIN_VPCOMLTB,
23927 IX86_BUILTIN_VPCOMLEB,
23928 IX86_BUILTIN_VPCOMGTB,
23929 IX86_BUILTIN_VPCOMGEB,
23930 IX86_BUILTIN_VPCOMFALSEB,
23931 IX86_BUILTIN_VPCOMTRUEB,
23932
23933 IX86_BUILTIN_VPCOMEQW,
23934 IX86_BUILTIN_VPCOMNEW,
23935 IX86_BUILTIN_VPCOMLTW,
23936 IX86_BUILTIN_VPCOMLEW,
23937 IX86_BUILTIN_VPCOMGTW,
23938 IX86_BUILTIN_VPCOMGEW,
23939 IX86_BUILTIN_VPCOMFALSEW,
23940 IX86_BUILTIN_VPCOMTRUEW,
23941
23942 IX86_BUILTIN_VPCOMEQD,
23943 IX86_BUILTIN_VPCOMNED,
23944 IX86_BUILTIN_VPCOMLTD,
23945 IX86_BUILTIN_VPCOMLED,
23946 IX86_BUILTIN_VPCOMGTD,
23947 IX86_BUILTIN_VPCOMGED,
23948 IX86_BUILTIN_VPCOMFALSED,
23949 IX86_BUILTIN_VPCOMTRUED,
23950
23951 IX86_BUILTIN_VPCOMEQQ,
23952 IX86_BUILTIN_VPCOMNEQ,
23953 IX86_BUILTIN_VPCOMLTQ,
23954 IX86_BUILTIN_VPCOMLEQ,
23955 IX86_BUILTIN_VPCOMGTQ,
23956 IX86_BUILTIN_VPCOMGEQ,
23957 IX86_BUILTIN_VPCOMFALSEQ,
23958 IX86_BUILTIN_VPCOMTRUEQ,
23959
23960 /* LWP instructions. */
23961 IX86_BUILTIN_LLWPCB,
23962 IX86_BUILTIN_SLWPCB,
23963 IX86_BUILTIN_LWPVAL32,
23964 IX86_BUILTIN_LWPVAL64,
23965 IX86_BUILTIN_LWPINS32,
23966 IX86_BUILTIN_LWPINS64,
23967
23968 IX86_BUILTIN_CLZS,
23969
23970 /* BMI instructions. */
23971 IX86_BUILTIN_BEXTR32,
23972 IX86_BUILTIN_BEXTR64,
23973 IX86_BUILTIN_CTZS,
23974
23975 /* TBM instructions. */
23976 IX86_BUILTIN_BEXTRI32,
23977 IX86_BUILTIN_BEXTRI64,
23978
23979
23980 /* FSGSBASE instructions. */
23981 IX86_BUILTIN_RDFSBASE32,
23982 IX86_BUILTIN_RDFSBASE64,
23983 IX86_BUILTIN_RDGSBASE32,
23984 IX86_BUILTIN_RDGSBASE64,
23985 IX86_BUILTIN_WRFSBASE32,
23986 IX86_BUILTIN_WRFSBASE64,
23987 IX86_BUILTIN_WRGSBASE32,
23988 IX86_BUILTIN_WRGSBASE64,
23989
23990 /* RDRND instructions. */
23991 IX86_BUILTIN_RDRAND16_STEP,
23992 IX86_BUILTIN_RDRAND32_STEP,
23993 IX86_BUILTIN_RDRAND64_STEP,
23994
23995 /* F16C instructions. */
23996 IX86_BUILTIN_CVTPH2PS,
23997 IX86_BUILTIN_CVTPH2PS256,
23998 IX86_BUILTIN_CVTPS2PH,
23999 IX86_BUILTIN_CVTPS2PH256,
24000
24001 /* CFString built-in for darwin */
24002 IX86_BUILTIN_CFSTRING,
24003
24004 IX86_BUILTIN_MAX
24005 };
24006
24007 /* Table for the ix86 builtin decls. */
24008 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
24009
24010 /* Table of all of the builtin functions that are possible with different ISA's
24011 but are waiting to be built until a function is declared to use that
24012 ISA. */
24013 struct builtin_isa {
24014 const char *name; /* function name */
24015 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
24016 int isa; /* isa_flags this builtin is defined for */
24017 bool const_p; /* true if the declaration is constant */
24018 bool set_and_not_built_p;
24019 };
24020
24021 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
24022
24023
24024 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
24025 of which isa_flags to use in the ix86_builtins_isa array. Stores the
24026 function decl in the ix86_builtins array. Returns the function decl or
24027 NULL_TREE, if the builtin was not added.
24028
24029 If the front end has a special hook for builtin functions, delay adding
24030 builtin functions that aren't in the current ISA until the ISA is changed
24031 with function specific optimization. Doing so, can save about 300K for the
24032 default compiler. When the builtin is expanded, check at that time whether
24033 it is valid.
24034
24035 If the front end doesn't have a special hook, record all builtins, even if
24036 it isn't an instruction set in the current ISA in case the user uses
24037 function specific options for a different ISA, so that we don't get scope
24038 errors if a builtin is added in the middle of a function scope. */
24039
24040 static inline tree
24041 def_builtin (int mask, const char *name, enum ix86_builtin_func_type tcode,
24042 enum ix86_builtins code)
24043 {
24044 tree decl = NULL_TREE;
24045
24046 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
24047 {
24048 ix86_builtins_isa[(int) code].isa = mask;
24049
24050 mask &= ~OPTION_MASK_ISA_64BIT;
24051 if (mask == 0
24052 || (mask & ix86_isa_flags) != 0
24053 || (lang_hooks.builtin_function
24054 == lang_hooks.builtin_function_ext_scope))
24055
24056 {
24057 tree type = ix86_get_builtin_func_type (tcode);
24058 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
24059 NULL, NULL_TREE);
24060 ix86_builtins[(int) code] = decl;
24061 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
24062 }
24063 else
24064 {
24065 ix86_builtins[(int) code] = NULL_TREE;
24066 ix86_builtins_isa[(int) code].tcode = tcode;
24067 ix86_builtins_isa[(int) code].name = name;
24068 ix86_builtins_isa[(int) code].const_p = false;
24069 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
24070 }
24071 }
24072
24073 return decl;
24074 }
24075
24076 /* Like def_builtin, but also marks the function decl "const". */
24077
24078 static inline tree
24079 def_builtin_const (int mask, const char *name,
24080 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
24081 {
24082 tree decl = def_builtin (mask, name, tcode, code);
24083 if (decl)
24084 TREE_READONLY (decl) = 1;
24085 else
24086 ix86_builtins_isa[(int) code].const_p = true;
24087
24088 return decl;
24089 }
24090
24091 /* Add any new builtin functions for a given ISA that may not have been
24092 declared. This saves a bit of space compared to adding all of the
24093 declarations to the tree, even if we didn't use them. */
24094
24095 static void
24096 ix86_add_new_builtins (int isa)
24097 {
24098 int i;
24099
24100 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
24101 {
24102 if ((ix86_builtins_isa[i].isa & isa) != 0
24103 && ix86_builtins_isa[i].set_and_not_built_p)
24104 {
24105 tree decl, type;
24106
24107 /* Don't define the builtin again. */
24108 ix86_builtins_isa[i].set_and_not_built_p = false;
24109
24110 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
24111 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
24112 type, i, BUILT_IN_MD, NULL,
24113 NULL_TREE);
24114
24115 ix86_builtins[i] = decl;
24116 if (ix86_builtins_isa[i].const_p)
24117 TREE_READONLY (decl) = 1;
24118 }
24119 }
24120 }
24121
24122 /* Bits for builtin_description.flag. */
24123
24124 /* Set when we don't support the comparison natively, and should
24125 swap_comparison in order to support it. */
24126 #define BUILTIN_DESC_SWAP_OPERANDS 1
24127
24128 struct builtin_description
24129 {
24130 const unsigned int mask;
24131 const enum insn_code icode;
24132 const char *const name;
24133 const enum ix86_builtins code;
24134 const enum rtx_code comparison;
24135 const int flag;
24136 };
24137
24138 static const struct builtin_description bdesc_comi[] =
24139 {
24140 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
24141 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
24142 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
24143 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
24144 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
24145 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
24146 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
24147 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
24148 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
24149 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
24150 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
24151 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
24152 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
24153 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
24154 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
24155 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
24156 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
24157 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
24158 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
24159 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
24160 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
24161 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
24162 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
24163 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
24164 };
24165
24166 static const struct builtin_description bdesc_pcmpestr[] =
24167 {
24168 /* SSE4.2 */
24169 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
24170 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
24171 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
24172 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
24173 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
24174 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
24175 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
24176 };
24177
24178 static const struct builtin_description bdesc_pcmpistr[] =
24179 {
24180 /* SSE4.2 */
24181 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
24182 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
24183 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
24184 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
24185 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
24186 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
24187 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
24188 };
24189
24190 /* Special builtins with variable number of arguments. */
24191 static const struct builtin_description bdesc_special_args[] =
24192 {
24193 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtsc, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
24194 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtscp, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
24195 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
24196
24197 /* MMX */
24198 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
24199
24200 /* 3DNow! */
24201 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
24202
24203 /* SSE */
24204 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
24205 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
24206 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
24207
24208 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
24209 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
24210 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
24211 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
24212
24213 /* SSE or 3DNow!A */
24214 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
24215 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
24216
24217 /* SSE2 */
24218 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
24219 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
24220 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
24221 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
24222 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
24223 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
24224 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
24225 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
24226 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
24227
24228 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
24229 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
24230
24231 /* SSE3 */
24232 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
24233
24234 /* SSE4.1 */
24235 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
24236
24237 /* SSE4A */
24238 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
24239 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
24240
24241 /* AVX */
24242 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
24243 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
24244
24245 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
24246 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
24247 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
24248 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
24249 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
24250
24251 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
24252 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
24253 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
24254 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
24255 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
24256 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
24257 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
24258
24259 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
24260 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
24261 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
24262
24263 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
24264 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
24265 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
24266 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
24267 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
24268 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
24269 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
24270 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
24271
24272 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
24273 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
24274 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
24275 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
24276 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
24277 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
24278
24279 /* FSGSBASE */
24280 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
24281 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
24282 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
24283 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
24284 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
24285 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
24286 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
24287 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
24288 };
24289
24290 /* Builtins with variable number of arguments. */
24291 static const struct builtin_description bdesc_args[] =
24292 {
24293 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
24294 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
24295 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdpmc, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
24296 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
24297 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
24298 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
24299 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
24300
24301 /* MMX */
24302 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24303 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24304 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24305 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24306 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24307 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24308
24309 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24310 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24311 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24312 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24313 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24314 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24315 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24316 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24317
24318 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24319 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24320
24321 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24322 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24323 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24324 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24325
24326 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24327 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24328 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24329 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24330 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24331 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24332
24333 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24334 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24335 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24336 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24337 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
24338 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
24339
24340 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
24341 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
24342 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
24343
24344 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
24345
24346 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
24347 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
24348 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
24349 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
24350 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
24351 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
24352
24353 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
24354 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
24355 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
24356 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
24357 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
24358 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
24359
24360 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
24361 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
24362 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
24363 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
24364
24365 /* 3DNow! */
24366 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
24367 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
24368 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
24369 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
24370
24371 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24372 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24373 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24374 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
24375 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
24376 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
24377 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24378 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24379 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24380 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24381 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24382 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24383 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24384 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24385 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24386
24387 /* 3DNow!A */
24388 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
24389 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
24390 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
24391 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
24392 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24393 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24394
24395 /* SSE */
24396 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
24397 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24398 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24399 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24400 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24401 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24402 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
24403 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
24404 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
24405 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
24406 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
24407 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
24408
24409 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24410
24411 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24412 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24413 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24414 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24415 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24416 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24417 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24418 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24419
24420 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
24421 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
24422 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
24423 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24424 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24425 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
24426 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
24427 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
24428 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
24429 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24430 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
24431 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
24432 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
24433 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
24434 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
24435 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
24436 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
24437 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
24438 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
24439 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24440 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24441 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
24442
24443 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24444 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24445 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24446 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24447
24448 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24449 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24450 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24451 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24452
24453 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24454
24455 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24456 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24457 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24458 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24459 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24460
24461 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
24462 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
24463 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
24464
24465 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
24466
24467 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
24468 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
24469 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
24470
24471 /* SSE MMX or 3Dnow!A */
24472 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24473 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24474 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24475
24476 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24477 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24478 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24479 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24480
24481 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
24482 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
24483
24484 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
24485
24486 /* SSE2 */
24487 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
24488
24489 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2df", IX86_BUILTIN_VEC_PERM_V2DF, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI },
24490 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4sf", IX86_BUILTIN_VEC_PERM_V4SF, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI },
24491 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di", IX86_BUILTIN_VEC_PERM_V2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI },
24492 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si", IX86_BUILTIN_VEC_PERM_V4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
24493 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi", IX86_BUILTIN_VEC_PERM_V8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI },
24494 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi", IX86_BUILTIN_VEC_PERM_V16QI, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
24495 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di_u", IX86_BUILTIN_VEC_PERM_V2DI_U, UNKNOWN, (int) V2UDI_FTYPE_V2UDI_V2UDI_V2UDI },
24496 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si_u", IX86_BUILTIN_VEC_PERM_V4SI_U, UNKNOWN, (int) V4USI_FTYPE_V4USI_V4USI_V4USI },
24497 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi_u", IX86_BUILTIN_VEC_PERM_V8HI_U, UNKNOWN, (int) V8UHI_FTYPE_V8UHI_V8UHI_V8UHI },
24498 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi_u", IX86_BUILTIN_VEC_PERM_V16QI_U, UNKNOWN, (int) V16UQI_FTYPE_V16UQI_V16UQI_V16UQI },
24499 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4df", IX86_BUILTIN_VEC_PERM_V4DF, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI },
24500 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8sf", IX86_BUILTIN_VEC_PERM_V8SF, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI },
24501
24502 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
24503 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
24504 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
24505 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
24506 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
24507 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtudq2ps, "__builtin_ia32_cvtudq2ps", IX86_BUILTIN_CVTUDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
24508
24509 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
24510 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
24511 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
24512 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
24513 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
24514
24515 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
24516
24517 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
24518 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
24519 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
24520 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
24521
24522 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
24523 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
24524 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
24525
24526 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24527 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24528 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24529 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24530 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24531 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24532 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24533 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24534
24535 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
24536 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
24537 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
24538 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
24539 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
24540 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
24541 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
24542 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
24543 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
24544 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
24545 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
24546 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
24547 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
24548 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
24549 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
24550 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
24551 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
24552 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
24553 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
24554 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
24555
24556 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24557 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24558 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24559 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24560
24561 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24562 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24563 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24564 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24565
24566 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24567
24568 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24569 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24570 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24571
24572 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
24573
24574 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24575 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24576 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24577 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24578 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24579 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24580 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24581 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24582
24583 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24584 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24585 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24586 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24587 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24588 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24589 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24590 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24591
24592 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24593 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
24594
24595 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24596 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24597 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24598 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24599
24600 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24601 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24602
24603 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24604 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24605 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24606 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24607 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24608 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24609
24610 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24611 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24612 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24613 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24614
24615 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24616 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24617 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24618 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24619 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24620 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24621 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24622 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24623
24624 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
24625 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
24626 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
24627
24628 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24629 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
24630
24631 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
24632 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
24633
24634 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
24635
24636 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
24637 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
24638 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
24639 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
24640
24641 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
24642 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
24643 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
24644 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
24645 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
24646 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
24647 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
24648
24649 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
24650 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
24651 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
24652 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
24653 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
24654 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
24655 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
24656
24657 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
24658 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
24659 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
24660 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
24661
24662 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
24663 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
24664 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
24665
24666 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
24667
24668 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
24669 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
24670
24671 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
24672
24673 /* SSE2 MMX */
24674 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
24675 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
24676
24677 /* SSE3 */
24678 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
24679 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24680
24681 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24682 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24683 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24684 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24685 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24686 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24687
24688 /* SSSE3 */
24689 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
24690 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
24691 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
24692 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
24693 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
24694 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
24695
24696 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24697 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24698 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24699 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24700 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24701 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24702 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24703 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24704 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24705 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24706 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24707 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24708 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
24709 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
24710 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24711 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24712 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24713 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24714 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24715 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24716 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24717 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24718 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24719 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24720
24721 /* SSSE3. */
24722 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
24723 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
24724
24725 /* SSE4.1 */
24726 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
24727 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24728 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
24729 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
24730 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
24731 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24732 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24733 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
24734 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
24735 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
24736
24737 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
24738 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
24739 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
24740 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
24741 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
24742 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
24743 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
24744 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
24745 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
24746 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
24747 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
24748 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
24749 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
24750
24751 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
24752 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24753 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24754 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24755 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24756 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24757 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24758 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24759 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24760 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24761 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
24762 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24763
24764 /* SSE4.1 */
24765 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
24766 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
24767 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
24768 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24769
24770 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
24771 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
24772 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
24773 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
24774
24775 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
24776 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
24777 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
24778 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
24779
24780 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
24781 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
24782 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
24783
24784 /* SSE4.2 */
24785 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24786 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
24787 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
24788 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
24789 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
24790
24791 /* SSE4A */
24792 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
24793 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
24794 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
24795 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24796
24797 /* AES */
24798 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
24799 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
24800
24801 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24802 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24803 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24804 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24805
24806 /* PCLMUL */
24807 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
24808
24809 /* AVX */
24810 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24811 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24812 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24813 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24814 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24815 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24816 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24817 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24818 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24819 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24820 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24821 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24822 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24823 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24824 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24825 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24826 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24827 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24828 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24829 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24830 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24831 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24832 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24833 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24834 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24835 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24836
24837 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
24838 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
24839 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
24840 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
24841
24842 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
24843 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
24844 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
24845 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
24846 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
24847 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
24848 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
24849 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
24850 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24851 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
24852 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24853 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
24854 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
24855 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
24856 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
24857 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
24858 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
24859 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
24860 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
24861 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
24862 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
24863 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
24864 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
24865 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
24866 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
24867 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
24868 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
24869 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
24870 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
24871 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
24872 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
24873 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
24874 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
24875 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
24876
24877 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
24878 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
24879 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
24880
24881 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
24882 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
24883 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
24884 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
24885 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
24886
24887 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
24888
24889 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
24890 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
24891
24892 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
24893 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
24894 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
24895 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
24896
24897 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
24898 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
24899 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
24900 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
24901
24902 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24903 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24904 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24905 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24906
24907 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
24908 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
24909 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
24910 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
24911 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
24912 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
24913
24914 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
24915 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
24916 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
24917 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
24918 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
24919 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
24920 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
24921 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
24922 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
24923 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
24924 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
24925 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
24926 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
24927 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
24928 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
24929
24930 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
24931 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
24932
24933 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24934 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24935
24936 { OPTION_MASK_ISA_ABM, CODE_FOR_clzhi2_abm, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
24937
24938 /* BMI */
24939 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
24940 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
24941 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
24942
24943 /* TBM */
24944 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
24945 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
24946
24947 /* F16C */
24948 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
24949 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
24950 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
24951 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
24952 };
24953
24954 /* FMA4 and XOP. */
24955 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
24956 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
24957 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
24958 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
24959 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
24960 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
24961 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
24962 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
24963 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
24964 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
24965 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
24966 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
24967 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
24968 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
24969 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
24970 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
24971 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
24972 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
24973 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
24974 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
24975 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
24976 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
24977 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
24978 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
24979 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
24980 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
24981 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
24982 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
24983 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
24984 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
24985 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
24986 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
24987 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
24988 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
24989 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
24990 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
24991 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
24992 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
24993 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
24994 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
24995 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
24996 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
24997 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
24998 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
24999 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
25000 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
25001 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
25002 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
25003 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
25004 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
25005 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
25006 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
25007
25008 static const struct builtin_description bdesc_multi_arg[] =
25009 {
25010 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
25011 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
25012 UNKNOWN, (int)MULTI_ARG_3_SF },
25013 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
25014 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
25015 UNKNOWN, (int)MULTI_ARG_3_DF },
25016
25017 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
25018 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
25019 UNKNOWN, (int)MULTI_ARG_3_SF },
25020 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
25021 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
25022 UNKNOWN, (int)MULTI_ARG_3_DF },
25023 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
25024 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
25025 UNKNOWN, (int)MULTI_ARG_3_SF2 },
25026 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
25027 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
25028 UNKNOWN, (int)MULTI_ARG_3_DF2 },
25029
25030 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
25031 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
25032 UNKNOWN, (int)MULTI_ARG_3_SF },
25033 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
25034 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
25035 UNKNOWN, (int)MULTI_ARG_3_DF },
25036 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
25037 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
25038 UNKNOWN, (int)MULTI_ARG_3_SF2 },
25039 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
25040 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
25041 UNKNOWN, (int)MULTI_ARG_3_DF2 },
25042
25043 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
25044 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
25045 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
25046 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
25047 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
25048 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
25049 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
25050
25051 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
25052 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
25053 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
25054 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
25055 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
25056 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
25057 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
25058
25059 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
25060
25061 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
25062 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
25063 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
25064 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
25065 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
25066 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
25067 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
25068 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
25069 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
25070 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
25071 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
25072 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
25073
25074 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
25075 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
25076 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
25077 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
25078 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
25079 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
25080 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
25081 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
25082 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
25083 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
25084 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
25085 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
25086 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
25087 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
25088 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
25089 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
25090
25091 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_2_SF },
25092 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_2_DF },
25093 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
25094 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
25095 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
25096 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
25097
25098 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
25099 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
25100 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
25101 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
25102 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
25103 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
25104 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
25105 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
25106 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
25107 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
25108 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
25109 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
25110 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
25111 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
25112 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
25113
25114 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
25115 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
25116 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
25117 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
25118 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
25119 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
25120 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
25121
25122 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
25123 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
25124 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
25125 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
25126 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
25127 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
25128 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
25129
25130 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
25131 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
25132 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
25133 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
25134 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
25135 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
25136 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
25137
25138 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
25139 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
25140 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
25141 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
25142 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
25143 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
25144 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
25145
25146 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
25147 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
25148 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
25149 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
25150 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
25151 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
25152 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
25153
25154 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
25155 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
25156 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
25157 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
25158 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
25159 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
25160 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
25161
25162 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
25163 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
25164 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
25165 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
25166 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
25167 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
25168 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
25169
25170 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
25171 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
25172 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
25173 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
25174 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
25175 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
25176 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
25177
25178 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
25179 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
25180 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
25181 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
25182 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
25183 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
25184 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
25185 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
25186
25187 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
25188 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
25189 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
25190 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
25191 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
25192 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
25193 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
25194 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
25195
25196 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
25197 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
25198 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
25199 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
25200
25201 };
25202
25203 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
25204 in the current target ISA to allow the user to compile particular modules
25205 with different target specific options that differ from the command line
25206 options. */
25207 static void
25208 ix86_init_mmx_sse_builtins (void)
25209 {
25210 const struct builtin_description * d;
25211 enum ix86_builtin_func_type ftype;
25212 size_t i;
25213
25214 /* Add all special builtins with variable number of operands. */
25215 for (i = 0, d = bdesc_special_args;
25216 i < ARRAY_SIZE (bdesc_special_args);
25217 i++, d++)
25218 {
25219 if (d->name == 0)
25220 continue;
25221
25222 ftype = (enum ix86_builtin_func_type) d->flag;
25223 def_builtin (d->mask, d->name, ftype, d->code);
25224 }
25225
25226 /* Add all builtins with variable number of operands. */
25227 for (i = 0, d = bdesc_args;
25228 i < ARRAY_SIZE (bdesc_args);
25229 i++, d++)
25230 {
25231 if (d->name == 0)
25232 continue;
25233
25234 ftype = (enum ix86_builtin_func_type) d->flag;
25235 def_builtin_const (d->mask, d->name, ftype, d->code);
25236 }
25237
25238 /* pcmpestr[im] insns. */
25239 for (i = 0, d = bdesc_pcmpestr;
25240 i < ARRAY_SIZE (bdesc_pcmpestr);
25241 i++, d++)
25242 {
25243 if (d->code == IX86_BUILTIN_PCMPESTRM128)
25244 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
25245 else
25246 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
25247 def_builtin_const (d->mask, d->name, ftype, d->code);
25248 }
25249
25250 /* pcmpistr[im] insns. */
25251 for (i = 0, d = bdesc_pcmpistr;
25252 i < ARRAY_SIZE (bdesc_pcmpistr);
25253 i++, d++)
25254 {
25255 if (d->code == IX86_BUILTIN_PCMPISTRM128)
25256 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
25257 else
25258 ftype = INT_FTYPE_V16QI_V16QI_INT;
25259 def_builtin_const (d->mask, d->name, ftype, d->code);
25260 }
25261
25262 /* comi/ucomi insns. */
25263 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
25264 {
25265 if (d->mask == OPTION_MASK_ISA_SSE2)
25266 ftype = INT_FTYPE_V2DF_V2DF;
25267 else
25268 ftype = INT_FTYPE_V4SF_V4SF;
25269 def_builtin_const (d->mask, d->name, ftype, d->code);
25270 }
25271
25272 /* SSE */
25273 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
25274 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
25275 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
25276 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
25277
25278 /* SSE or 3DNow!A */
25279 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
25280 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
25281 IX86_BUILTIN_MASKMOVQ);
25282
25283 /* SSE2 */
25284 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
25285 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
25286
25287 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
25288 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
25289 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
25290 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
25291
25292 /* SSE3. */
25293 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
25294 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
25295 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
25296 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
25297
25298 /* AES */
25299 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
25300 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
25301 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
25302 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
25303 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
25304 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
25305 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
25306 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
25307 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
25308 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
25309 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
25310 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
25311
25312 /* PCLMUL */
25313 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
25314 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
25315
25316 /* RDRND */
25317 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
25318 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
25319 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
25320 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
25321 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
25322 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
25323 IX86_BUILTIN_RDRAND64_STEP);
25324
25325 /* MMX access to the vec_init patterns. */
25326 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
25327 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
25328
25329 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
25330 V4HI_FTYPE_HI_HI_HI_HI,
25331 IX86_BUILTIN_VEC_INIT_V4HI);
25332
25333 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
25334 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
25335 IX86_BUILTIN_VEC_INIT_V8QI);
25336
25337 /* Access to the vec_extract patterns. */
25338 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
25339 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
25340 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
25341 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
25342 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
25343 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
25344 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
25345 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
25346 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
25347 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
25348
25349 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
25350 "__builtin_ia32_vec_ext_v4hi",
25351 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
25352
25353 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
25354 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
25355
25356 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
25357 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
25358
25359 /* Access to the vec_set patterns. */
25360 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
25361 "__builtin_ia32_vec_set_v2di",
25362 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
25363
25364 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
25365 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
25366
25367 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
25368 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
25369
25370 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
25371 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
25372
25373 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
25374 "__builtin_ia32_vec_set_v4hi",
25375 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
25376
25377 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
25378 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
25379
25380 /* Add FMA4 multi-arg argument instructions */
25381 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
25382 {
25383 if (d->name == 0)
25384 continue;
25385
25386 ftype = (enum ix86_builtin_func_type) d->flag;
25387 def_builtin_const (d->mask, d->name, ftype, d->code);
25388 }
25389 }
25390
25391 /* Internal method for ix86_init_builtins. */
25392
25393 static void
25394 ix86_init_builtins_va_builtins_abi (void)
25395 {
25396 tree ms_va_ref, sysv_va_ref;
25397 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
25398 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
25399 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
25400 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
25401
25402 if (!TARGET_64BIT)
25403 return;
25404 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
25405 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
25406 ms_va_ref = build_reference_type (ms_va_list_type_node);
25407 sysv_va_ref =
25408 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
25409
25410 fnvoid_va_end_ms =
25411 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
25412 fnvoid_va_start_ms =
25413 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
25414 fnvoid_va_end_sysv =
25415 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
25416 fnvoid_va_start_sysv =
25417 build_varargs_function_type_list (void_type_node, sysv_va_ref,
25418 NULL_TREE);
25419 fnvoid_va_copy_ms =
25420 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
25421 NULL_TREE);
25422 fnvoid_va_copy_sysv =
25423 build_function_type_list (void_type_node, sysv_va_ref,
25424 sysv_va_ref, NULL_TREE);
25425
25426 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
25427 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
25428 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
25429 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
25430 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
25431 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
25432 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
25433 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
25434 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
25435 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
25436 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
25437 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
25438 }
25439
25440 static void
25441 ix86_init_builtin_types (void)
25442 {
25443 tree float128_type_node, float80_type_node;
25444
25445 /* The __float80 type. */
25446 float80_type_node = long_double_type_node;
25447 if (TYPE_MODE (float80_type_node) != XFmode)
25448 {
25449 /* The __float80 type. */
25450 float80_type_node = make_node (REAL_TYPE);
25451
25452 TYPE_PRECISION (float80_type_node) = 80;
25453 layout_type (float80_type_node);
25454 }
25455 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
25456
25457 /* The __float128 type. */
25458 float128_type_node = make_node (REAL_TYPE);
25459 TYPE_PRECISION (float128_type_node) = 128;
25460 layout_type (float128_type_node);
25461 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
25462
25463 /* This macro is built by i386-builtin-types.awk. */
25464 DEFINE_BUILTIN_PRIMITIVE_TYPES;
25465 }
25466
25467 static void
25468 ix86_init_builtins (void)
25469 {
25470 tree t;
25471
25472 ix86_init_builtin_types ();
25473
25474 /* TFmode support builtins. */
25475 def_builtin_const (0, "__builtin_infq",
25476 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
25477 def_builtin_const (0, "__builtin_huge_valq",
25478 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
25479
25480 /* We will expand them to normal call if SSE2 isn't available since
25481 they are used by libgcc. */
25482 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
25483 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
25484 BUILT_IN_MD, "__fabstf2", NULL_TREE);
25485 TREE_READONLY (t) = 1;
25486 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
25487
25488 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
25489 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
25490 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
25491 TREE_READONLY (t) = 1;
25492 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
25493
25494 ix86_init_mmx_sse_builtins ();
25495
25496 if (TARGET_64BIT)
25497 ix86_init_builtins_va_builtins_abi ();
25498
25499 #ifdef SUBTARGET_INIT_BUILTINS
25500 SUBTARGET_INIT_BUILTINS;
25501 #endif
25502 }
25503
25504 /* Return the ix86 builtin for CODE. */
25505
25506 static tree
25507 ix86_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
25508 {
25509 if (code >= IX86_BUILTIN_MAX)
25510 return error_mark_node;
25511
25512 return ix86_builtins[code];
25513 }
25514
25515 /* Errors in the source file can cause expand_expr to return const0_rtx
25516 where we expect a vector. To avoid crashing, use one of the vector
25517 clear instructions. */
25518 static rtx
25519 safe_vector_operand (rtx x, enum machine_mode mode)
25520 {
25521 if (x == const0_rtx)
25522 x = CONST0_RTX (mode);
25523 return x;
25524 }
25525
25526 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
25527
25528 static rtx
25529 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
25530 {
25531 rtx pat;
25532 tree arg0 = CALL_EXPR_ARG (exp, 0);
25533 tree arg1 = CALL_EXPR_ARG (exp, 1);
25534 rtx op0 = expand_normal (arg0);
25535 rtx op1 = expand_normal (arg1);
25536 enum machine_mode tmode = insn_data[icode].operand[0].mode;
25537 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
25538 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
25539
25540 if (VECTOR_MODE_P (mode0))
25541 op0 = safe_vector_operand (op0, mode0);
25542 if (VECTOR_MODE_P (mode1))
25543 op1 = safe_vector_operand (op1, mode1);
25544
25545 if (optimize || !target
25546 || GET_MODE (target) != tmode
25547 || !insn_data[icode].operand[0].predicate (target, tmode))
25548 target = gen_reg_rtx (tmode);
25549
25550 if (GET_MODE (op1) == SImode && mode1 == TImode)
25551 {
25552 rtx x = gen_reg_rtx (V4SImode);
25553 emit_insn (gen_sse2_loadd (x, op1));
25554 op1 = gen_lowpart (TImode, x);
25555 }
25556
25557 if (!insn_data[icode].operand[1].predicate (op0, mode0))
25558 op0 = copy_to_mode_reg (mode0, op0);
25559 if (!insn_data[icode].operand[2].predicate (op1, mode1))
25560 op1 = copy_to_mode_reg (mode1, op1);
25561
25562 pat = GEN_FCN (icode) (target, op0, op1);
25563 if (! pat)
25564 return 0;
25565
25566 emit_insn (pat);
25567
25568 return target;
25569 }
25570
25571 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
25572
25573 static rtx
25574 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
25575 enum ix86_builtin_func_type m_type,
25576 enum rtx_code sub_code)
25577 {
25578 rtx pat;
25579 int i;
25580 int nargs;
25581 bool comparison_p = false;
25582 bool tf_p = false;
25583 bool last_arg_constant = false;
25584 int num_memory = 0;
25585 struct {
25586 rtx op;
25587 enum machine_mode mode;
25588 } args[4];
25589
25590 enum machine_mode tmode = insn_data[icode].operand[0].mode;
25591
25592 switch (m_type)
25593 {
25594 case MULTI_ARG_4_DF2_DI_I:
25595 case MULTI_ARG_4_DF2_DI_I1:
25596 case MULTI_ARG_4_SF2_SI_I:
25597 case MULTI_ARG_4_SF2_SI_I1:
25598 nargs = 4;
25599 last_arg_constant = true;
25600 break;
25601
25602 case MULTI_ARG_3_SF:
25603 case MULTI_ARG_3_DF:
25604 case MULTI_ARG_3_SF2:
25605 case MULTI_ARG_3_DF2:
25606 case MULTI_ARG_3_DI:
25607 case MULTI_ARG_3_SI:
25608 case MULTI_ARG_3_SI_DI:
25609 case MULTI_ARG_3_HI:
25610 case MULTI_ARG_3_HI_SI:
25611 case MULTI_ARG_3_QI:
25612 case MULTI_ARG_3_DI2:
25613 case MULTI_ARG_3_SI2:
25614 case MULTI_ARG_3_HI2:
25615 case MULTI_ARG_3_QI2:
25616 nargs = 3;
25617 break;
25618
25619 case MULTI_ARG_2_SF:
25620 case MULTI_ARG_2_DF:
25621 case MULTI_ARG_2_DI:
25622 case MULTI_ARG_2_SI:
25623 case MULTI_ARG_2_HI:
25624 case MULTI_ARG_2_QI:
25625 nargs = 2;
25626 break;
25627
25628 case MULTI_ARG_2_DI_IMM:
25629 case MULTI_ARG_2_SI_IMM:
25630 case MULTI_ARG_2_HI_IMM:
25631 case MULTI_ARG_2_QI_IMM:
25632 nargs = 2;
25633 last_arg_constant = true;
25634 break;
25635
25636 case MULTI_ARG_1_SF:
25637 case MULTI_ARG_1_DF:
25638 case MULTI_ARG_1_SF2:
25639 case MULTI_ARG_1_DF2:
25640 case MULTI_ARG_1_DI:
25641 case MULTI_ARG_1_SI:
25642 case MULTI_ARG_1_HI:
25643 case MULTI_ARG_1_QI:
25644 case MULTI_ARG_1_SI_DI:
25645 case MULTI_ARG_1_HI_DI:
25646 case MULTI_ARG_1_HI_SI:
25647 case MULTI_ARG_1_QI_DI:
25648 case MULTI_ARG_1_QI_SI:
25649 case MULTI_ARG_1_QI_HI:
25650 nargs = 1;
25651 break;
25652
25653 case MULTI_ARG_2_DI_CMP:
25654 case MULTI_ARG_2_SI_CMP:
25655 case MULTI_ARG_2_HI_CMP:
25656 case MULTI_ARG_2_QI_CMP:
25657 nargs = 2;
25658 comparison_p = true;
25659 break;
25660
25661 case MULTI_ARG_2_SF_TF:
25662 case MULTI_ARG_2_DF_TF:
25663 case MULTI_ARG_2_DI_TF:
25664 case MULTI_ARG_2_SI_TF:
25665 case MULTI_ARG_2_HI_TF:
25666 case MULTI_ARG_2_QI_TF:
25667 nargs = 2;
25668 tf_p = true;
25669 break;
25670
25671 default:
25672 gcc_unreachable ();
25673 }
25674
25675 if (optimize || !target
25676 || GET_MODE (target) != tmode
25677 || !insn_data[icode].operand[0].predicate (target, tmode))
25678 target = gen_reg_rtx (tmode);
25679
25680 gcc_assert (nargs <= 4);
25681
25682 for (i = 0; i < nargs; i++)
25683 {
25684 tree arg = CALL_EXPR_ARG (exp, i);
25685 rtx op = expand_normal (arg);
25686 int adjust = (comparison_p) ? 1 : 0;
25687 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
25688
25689 if (last_arg_constant && i == nargs - 1)
25690 {
25691 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
25692 {
25693 enum insn_code new_icode = icode;
25694 switch (icode)
25695 {
25696 case CODE_FOR_xop_vpermil2v2df3:
25697 case CODE_FOR_xop_vpermil2v4sf3:
25698 case CODE_FOR_xop_vpermil2v4df3:
25699 case CODE_FOR_xop_vpermil2v8sf3:
25700 error ("the last argument must be a 2-bit immediate");
25701 return gen_reg_rtx (tmode);
25702 case CODE_FOR_xop_rotlv2di3:
25703 new_icode = CODE_FOR_rotlv2di3;
25704 goto xop_rotl;
25705 case CODE_FOR_xop_rotlv4si3:
25706 new_icode = CODE_FOR_rotlv4si3;
25707 goto xop_rotl;
25708 case CODE_FOR_xop_rotlv8hi3:
25709 new_icode = CODE_FOR_rotlv8hi3;
25710 goto xop_rotl;
25711 case CODE_FOR_xop_rotlv16qi3:
25712 new_icode = CODE_FOR_rotlv16qi3;
25713 xop_rotl:
25714 if (CONST_INT_P (op))
25715 {
25716 int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
25717 op = GEN_INT (INTVAL (op) & mask);
25718 gcc_checking_assert
25719 (insn_data[icode].operand[i + 1].predicate (op, mode));
25720 }
25721 else
25722 {
25723 gcc_checking_assert
25724 (nargs == 2
25725 && insn_data[new_icode].operand[0].mode == tmode
25726 && insn_data[new_icode].operand[1].mode == tmode
25727 && insn_data[new_icode].operand[2].mode == mode
25728 && insn_data[new_icode].operand[0].predicate
25729 == insn_data[icode].operand[0].predicate
25730 && insn_data[new_icode].operand[1].predicate
25731 == insn_data[icode].operand[1].predicate);
25732 icode = new_icode;
25733 goto non_constant;
25734 }
25735 break;
25736 default:
25737 gcc_unreachable ();
25738 }
25739 }
25740 }
25741 else
25742 {
25743 non_constant:
25744 if (VECTOR_MODE_P (mode))
25745 op = safe_vector_operand (op, mode);
25746
25747 /* If we aren't optimizing, only allow one memory operand to be
25748 generated. */
25749 if (memory_operand (op, mode))
25750 num_memory++;
25751
25752 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
25753
25754 if (optimize
25755 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
25756 || num_memory > 1)
25757 op = force_reg (mode, op);
25758 }
25759
25760 args[i].op = op;
25761 args[i].mode = mode;
25762 }
25763
25764 switch (nargs)
25765 {
25766 case 1:
25767 pat = GEN_FCN (icode) (target, args[0].op);
25768 break;
25769
25770 case 2:
25771 if (tf_p)
25772 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
25773 GEN_INT ((int)sub_code));
25774 else if (! comparison_p)
25775 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
25776 else
25777 {
25778 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
25779 args[0].op,
25780 args[1].op);
25781
25782 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
25783 }
25784 break;
25785
25786 case 3:
25787 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
25788 break;
25789
25790 case 4:
25791 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
25792 break;
25793
25794 default:
25795 gcc_unreachable ();
25796 }
25797
25798 if (! pat)
25799 return 0;
25800
25801 emit_insn (pat);
25802 return target;
25803 }
25804
25805 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
25806 insns with vec_merge. */
25807
25808 static rtx
25809 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
25810 rtx target)
25811 {
25812 rtx pat;
25813 tree arg0 = CALL_EXPR_ARG (exp, 0);
25814 rtx op1, op0 = expand_normal (arg0);
25815 enum machine_mode tmode = insn_data[icode].operand[0].mode;
25816 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
25817
25818 if (optimize || !target
25819 || GET_MODE (target) != tmode
25820 || !insn_data[icode].operand[0].predicate (target, tmode))
25821 target = gen_reg_rtx (tmode);
25822
25823 if (VECTOR_MODE_P (mode0))
25824 op0 = safe_vector_operand (op0, mode0);
25825
25826 if ((optimize && !register_operand (op0, mode0))
25827 || !insn_data[icode].operand[1].predicate (op0, mode0))
25828 op0 = copy_to_mode_reg (mode0, op0);
25829
25830 op1 = op0;
25831 if (!insn_data[icode].operand[2].predicate (op1, mode0))
25832 op1 = copy_to_mode_reg (mode0, op1);
25833
25834 pat = GEN_FCN (icode) (target, op0, op1);
25835 if (! pat)
25836 return 0;
25837 emit_insn (pat);
25838 return target;
25839 }
25840
25841 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
25842
25843 static rtx
25844 ix86_expand_sse_compare (const struct builtin_description *d,
25845 tree exp, rtx target, bool swap)
25846 {
25847 rtx pat;
25848 tree arg0 = CALL_EXPR_ARG (exp, 0);
25849 tree arg1 = CALL_EXPR_ARG (exp, 1);
25850 rtx op0 = expand_normal (arg0);
25851 rtx op1 = expand_normal (arg1);
25852 rtx op2;
25853 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
25854 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
25855 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
25856 enum rtx_code comparison = d->comparison;
25857
25858 if (VECTOR_MODE_P (mode0))
25859 op0 = safe_vector_operand (op0, mode0);
25860 if (VECTOR_MODE_P (mode1))
25861 op1 = safe_vector_operand (op1, mode1);
25862
25863 /* Swap operands if we have a comparison that isn't available in
25864 hardware. */
25865 if (swap)
25866 {
25867 rtx tmp = gen_reg_rtx (mode1);
25868 emit_move_insn (tmp, op1);
25869 op1 = op0;
25870 op0 = tmp;
25871 }
25872
25873 if (optimize || !target
25874 || GET_MODE (target) != tmode
25875 || !insn_data[d->icode].operand[0].predicate (target, tmode))
25876 target = gen_reg_rtx (tmode);
25877
25878 if ((optimize && !register_operand (op0, mode0))
25879 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
25880 op0 = copy_to_mode_reg (mode0, op0);
25881 if ((optimize && !register_operand (op1, mode1))
25882 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
25883 op1 = copy_to_mode_reg (mode1, op1);
25884
25885 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
25886 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
25887 if (! pat)
25888 return 0;
25889 emit_insn (pat);
25890 return target;
25891 }
25892
25893 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
25894
25895 static rtx
25896 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
25897 rtx target)
25898 {
25899 rtx pat;
25900 tree arg0 = CALL_EXPR_ARG (exp, 0);
25901 tree arg1 = CALL_EXPR_ARG (exp, 1);
25902 rtx op0 = expand_normal (arg0);
25903 rtx op1 = expand_normal (arg1);
25904 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
25905 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
25906 enum rtx_code comparison = d->comparison;
25907
25908 if (VECTOR_MODE_P (mode0))
25909 op0 = safe_vector_operand (op0, mode0);
25910 if (VECTOR_MODE_P (mode1))
25911 op1 = safe_vector_operand (op1, mode1);
25912
25913 /* Swap operands if we have a comparison that isn't available in
25914 hardware. */
25915 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
25916 {
25917 rtx tmp = op1;
25918 op1 = op0;
25919 op0 = tmp;
25920 }
25921
25922 target = gen_reg_rtx (SImode);
25923 emit_move_insn (target, const0_rtx);
25924 target = gen_rtx_SUBREG (QImode, target, 0);
25925
25926 if ((optimize && !register_operand (op0, mode0))
25927 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
25928 op0 = copy_to_mode_reg (mode0, op0);
25929 if ((optimize && !register_operand (op1, mode1))
25930 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
25931 op1 = copy_to_mode_reg (mode1, op1);
25932
25933 pat = GEN_FCN (d->icode) (op0, op1);
25934 if (! pat)
25935 return 0;
25936 emit_insn (pat);
25937 emit_insn (gen_rtx_SET (VOIDmode,
25938 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
25939 gen_rtx_fmt_ee (comparison, QImode,
25940 SET_DEST (pat),
25941 const0_rtx)));
25942
25943 return SUBREG_REG (target);
25944 }
25945
25946 /* Subroutine of ix86_expand_args_builtin to take care of round insns. */
25947
25948 static rtx
25949 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
25950 rtx target)
25951 {
25952 rtx pat;
25953 tree arg0 = CALL_EXPR_ARG (exp, 0);
25954 rtx op1, op0 = expand_normal (arg0);
25955 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
25956 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
25957
25958 if (optimize || target == 0
25959 || GET_MODE (target) != tmode
25960 || !insn_data[d->icode].operand[0].predicate (target, tmode))
25961 target = gen_reg_rtx (tmode);
25962
25963 if (VECTOR_MODE_P (mode0))
25964 op0 = safe_vector_operand (op0, mode0);
25965
25966 if ((optimize && !register_operand (op0, mode0))
25967 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
25968 op0 = copy_to_mode_reg (mode0, op0);
25969
25970 op1 = GEN_INT (d->comparison);
25971
25972 pat = GEN_FCN (d->icode) (target, op0, op1);
25973 if (! pat)
25974 return 0;
25975 emit_insn (pat);
25976 return target;
25977 }
25978
25979 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
25980
25981 static rtx
25982 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
25983 rtx target)
25984 {
25985 rtx pat;
25986 tree arg0 = CALL_EXPR_ARG (exp, 0);
25987 tree arg1 = CALL_EXPR_ARG (exp, 1);
25988 rtx op0 = expand_normal (arg0);
25989 rtx op1 = expand_normal (arg1);
25990 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
25991 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
25992 enum rtx_code comparison = d->comparison;
25993
25994 if (VECTOR_MODE_P (mode0))
25995 op0 = safe_vector_operand (op0, mode0);
25996 if (VECTOR_MODE_P (mode1))
25997 op1 = safe_vector_operand (op1, mode1);
25998
25999 target = gen_reg_rtx (SImode);
26000 emit_move_insn (target, const0_rtx);
26001 target = gen_rtx_SUBREG (QImode, target, 0);
26002
26003 if ((optimize && !register_operand (op0, mode0))
26004 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
26005 op0 = copy_to_mode_reg (mode0, op0);
26006 if ((optimize && !register_operand (op1, mode1))
26007 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
26008 op1 = copy_to_mode_reg (mode1, op1);
26009
26010 pat = GEN_FCN (d->icode) (op0, op1);
26011 if (! pat)
26012 return 0;
26013 emit_insn (pat);
26014 emit_insn (gen_rtx_SET (VOIDmode,
26015 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
26016 gen_rtx_fmt_ee (comparison, QImode,
26017 SET_DEST (pat),
26018 const0_rtx)));
26019
26020 return SUBREG_REG (target);
26021 }
26022
26023 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
26024
26025 static rtx
26026 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
26027 tree exp, rtx target)
26028 {
26029 rtx pat;
26030 tree arg0 = CALL_EXPR_ARG (exp, 0);
26031 tree arg1 = CALL_EXPR_ARG (exp, 1);
26032 tree arg2 = CALL_EXPR_ARG (exp, 2);
26033 tree arg3 = CALL_EXPR_ARG (exp, 3);
26034 tree arg4 = CALL_EXPR_ARG (exp, 4);
26035 rtx scratch0, scratch1;
26036 rtx op0 = expand_normal (arg0);
26037 rtx op1 = expand_normal (arg1);
26038 rtx op2 = expand_normal (arg2);
26039 rtx op3 = expand_normal (arg3);
26040 rtx op4 = expand_normal (arg4);
26041 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
26042
26043 tmode0 = insn_data[d->icode].operand[0].mode;
26044 tmode1 = insn_data[d->icode].operand[1].mode;
26045 modev2 = insn_data[d->icode].operand[2].mode;
26046 modei3 = insn_data[d->icode].operand[3].mode;
26047 modev4 = insn_data[d->icode].operand[4].mode;
26048 modei5 = insn_data[d->icode].operand[5].mode;
26049 modeimm = insn_data[d->icode].operand[6].mode;
26050
26051 if (VECTOR_MODE_P (modev2))
26052 op0 = safe_vector_operand (op0, modev2);
26053 if (VECTOR_MODE_P (modev4))
26054 op2 = safe_vector_operand (op2, modev4);
26055
26056 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
26057 op0 = copy_to_mode_reg (modev2, op0);
26058 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
26059 op1 = copy_to_mode_reg (modei3, op1);
26060 if ((optimize && !register_operand (op2, modev4))
26061 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
26062 op2 = copy_to_mode_reg (modev4, op2);
26063 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
26064 op3 = copy_to_mode_reg (modei5, op3);
26065
26066 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
26067 {
26068 error ("the fifth argument must be an 8-bit immediate");
26069 return const0_rtx;
26070 }
26071
26072 if (d->code == IX86_BUILTIN_PCMPESTRI128)
26073 {
26074 if (optimize || !target
26075 || GET_MODE (target) != tmode0
26076 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
26077 target = gen_reg_rtx (tmode0);
26078
26079 scratch1 = gen_reg_rtx (tmode1);
26080
26081 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
26082 }
26083 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
26084 {
26085 if (optimize || !target
26086 || GET_MODE (target) != tmode1
26087 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
26088 target = gen_reg_rtx (tmode1);
26089
26090 scratch0 = gen_reg_rtx (tmode0);
26091
26092 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
26093 }
26094 else
26095 {
26096 gcc_assert (d->flag);
26097
26098 scratch0 = gen_reg_rtx (tmode0);
26099 scratch1 = gen_reg_rtx (tmode1);
26100
26101 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
26102 }
26103
26104 if (! pat)
26105 return 0;
26106
26107 emit_insn (pat);
26108
26109 if (d->flag)
26110 {
26111 target = gen_reg_rtx (SImode);
26112 emit_move_insn (target, const0_rtx);
26113 target = gen_rtx_SUBREG (QImode, target, 0);
26114
26115 emit_insn
26116 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
26117 gen_rtx_fmt_ee (EQ, QImode,
26118 gen_rtx_REG ((enum machine_mode) d->flag,
26119 FLAGS_REG),
26120 const0_rtx)));
26121 return SUBREG_REG (target);
26122 }
26123 else
26124 return target;
26125 }
26126
26127
26128 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
26129
26130 static rtx
26131 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
26132 tree exp, rtx target)
26133 {
26134 rtx pat;
26135 tree arg0 = CALL_EXPR_ARG (exp, 0);
26136 tree arg1 = CALL_EXPR_ARG (exp, 1);
26137 tree arg2 = CALL_EXPR_ARG (exp, 2);
26138 rtx scratch0, scratch1;
26139 rtx op0 = expand_normal (arg0);
26140 rtx op1 = expand_normal (arg1);
26141 rtx op2 = expand_normal (arg2);
26142 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
26143
26144 tmode0 = insn_data[d->icode].operand[0].mode;
26145 tmode1 = insn_data[d->icode].operand[1].mode;
26146 modev2 = insn_data[d->icode].operand[2].mode;
26147 modev3 = insn_data[d->icode].operand[3].mode;
26148 modeimm = insn_data[d->icode].operand[4].mode;
26149
26150 if (VECTOR_MODE_P (modev2))
26151 op0 = safe_vector_operand (op0, modev2);
26152 if (VECTOR_MODE_P (modev3))
26153 op1 = safe_vector_operand (op1, modev3);
26154
26155 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
26156 op0 = copy_to_mode_reg (modev2, op0);
26157 if ((optimize && !register_operand (op1, modev3))
26158 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
26159 op1 = copy_to_mode_reg (modev3, op1);
26160
26161 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
26162 {
26163 error ("the third argument must be an 8-bit immediate");
26164 return const0_rtx;
26165 }
26166
26167 if (d->code == IX86_BUILTIN_PCMPISTRI128)
26168 {
26169 if (optimize || !target
26170 || GET_MODE (target) != tmode0
26171 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
26172 target = gen_reg_rtx (tmode0);
26173
26174 scratch1 = gen_reg_rtx (tmode1);
26175
26176 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
26177 }
26178 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
26179 {
26180 if (optimize || !target
26181 || GET_MODE (target) != tmode1
26182 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
26183 target = gen_reg_rtx (tmode1);
26184
26185 scratch0 = gen_reg_rtx (tmode0);
26186
26187 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
26188 }
26189 else
26190 {
26191 gcc_assert (d->flag);
26192
26193 scratch0 = gen_reg_rtx (tmode0);
26194 scratch1 = gen_reg_rtx (tmode1);
26195
26196 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
26197 }
26198
26199 if (! pat)
26200 return 0;
26201
26202 emit_insn (pat);
26203
26204 if (d->flag)
26205 {
26206 target = gen_reg_rtx (SImode);
26207 emit_move_insn (target, const0_rtx);
26208 target = gen_rtx_SUBREG (QImode, target, 0);
26209
26210 emit_insn
26211 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
26212 gen_rtx_fmt_ee (EQ, QImode,
26213 gen_rtx_REG ((enum machine_mode) d->flag,
26214 FLAGS_REG),
26215 const0_rtx)));
26216 return SUBREG_REG (target);
26217 }
26218 else
26219 return target;
26220 }
26221
26222 /* Subroutine of ix86_expand_builtin to take care of insns with
26223 variable number of operands. */
26224
26225 static rtx
26226 ix86_expand_args_builtin (const struct builtin_description *d,
26227 tree exp, rtx target)
26228 {
26229 rtx pat, real_target;
26230 unsigned int i, nargs;
26231 unsigned int nargs_constant = 0;
26232 int num_memory = 0;
26233 struct
26234 {
26235 rtx op;
26236 enum machine_mode mode;
26237 } args[4];
26238 bool last_arg_count = false;
26239 enum insn_code icode = d->icode;
26240 const struct insn_data_d *insn_p = &insn_data[icode];
26241 enum machine_mode tmode = insn_p->operand[0].mode;
26242 enum machine_mode rmode = VOIDmode;
26243 bool swap = false;
26244 enum rtx_code comparison = d->comparison;
26245
26246 switch ((enum ix86_builtin_func_type) d->flag)
26247 {
26248 case V2DF_FTYPE_V2DF_ROUND:
26249 case V4DF_FTYPE_V4DF_ROUND:
26250 case V4SF_FTYPE_V4SF_ROUND:
26251 case V8SF_FTYPE_V8SF_ROUND:
26252 return ix86_expand_sse_round (d, exp, target);
26253 case INT_FTYPE_V8SF_V8SF_PTEST:
26254 case INT_FTYPE_V4DI_V4DI_PTEST:
26255 case INT_FTYPE_V4DF_V4DF_PTEST:
26256 case INT_FTYPE_V4SF_V4SF_PTEST:
26257 case INT_FTYPE_V2DI_V2DI_PTEST:
26258 case INT_FTYPE_V2DF_V2DF_PTEST:
26259 return ix86_expand_sse_ptest (d, exp, target);
26260 case FLOAT128_FTYPE_FLOAT128:
26261 case FLOAT_FTYPE_FLOAT:
26262 case INT_FTYPE_INT:
26263 case UINT64_FTYPE_INT:
26264 case UINT16_FTYPE_UINT16:
26265 case INT64_FTYPE_INT64:
26266 case INT64_FTYPE_V4SF:
26267 case INT64_FTYPE_V2DF:
26268 case INT_FTYPE_V16QI:
26269 case INT_FTYPE_V8QI:
26270 case INT_FTYPE_V8SF:
26271 case INT_FTYPE_V4DF:
26272 case INT_FTYPE_V4SF:
26273 case INT_FTYPE_V2DF:
26274 case V16QI_FTYPE_V16QI:
26275 case V8SI_FTYPE_V8SF:
26276 case V8SI_FTYPE_V4SI:
26277 case V8HI_FTYPE_V8HI:
26278 case V8HI_FTYPE_V16QI:
26279 case V8QI_FTYPE_V8QI:
26280 case V8SF_FTYPE_V8SF:
26281 case V8SF_FTYPE_V8SI:
26282 case V8SF_FTYPE_V4SF:
26283 case V8SF_FTYPE_V8HI:
26284 case V4SI_FTYPE_V4SI:
26285 case V4SI_FTYPE_V16QI:
26286 case V4SI_FTYPE_V4SF:
26287 case V4SI_FTYPE_V8SI:
26288 case V4SI_FTYPE_V8HI:
26289 case V4SI_FTYPE_V4DF:
26290 case V4SI_FTYPE_V2DF:
26291 case V4HI_FTYPE_V4HI:
26292 case V4DF_FTYPE_V4DF:
26293 case V4DF_FTYPE_V4SI:
26294 case V4DF_FTYPE_V4SF:
26295 case V4DF_FTYPE_V2DF:
26296 case V4SF_FTYPE_V4SF:
26297 case V4SF_FTYPE_V4SI:
26298 case V4SF_FTYPE_V8SF:
26299 case V4SF_FTYPE_V4DF:
26300 case V4SF_FTYPE_V8HI:
26301 case V4SF_FTYPE_V2DF:
26302 case V2DI_FTYPE_V2DI:
26303 case V2DI_FTYPE_V16QI:
26304 case V2DI_FTYPE_V8HI:
26305 case V2DI_FTYPE_V4SI:
26306 case V2DF_FTYPE_V2DF:
26307 case V2DF_FTYPE_V4SI:
26308 case V2DF_FTYPE_V4DF:
26309 case V2DF_FTYPE_V4SF:
26310 case V2DF_FTYPE_V2SI:
26311 case V2SI_FTYPE_V2SI:
26312 case V2SI_FTYPE_V4SF:
26313 case V2SI_FTYPE_V2SF:
26314 case V2SI_FTYPE_V2DF:
26315 case V2SF_FTYPE_V2SF:
26316 case V2SF_FTYPE_V2SI:
26317 nargs = 1;
26318 break;
26319 case V4SF_FTYPE_V4SF_VEC_MERGE:
26320 case V2DF_FTYPE_V2DF_VEC_MERGE:
26321 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
26322 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
26323 case V16QI_FTYPE_V16QI_V16QI:
26324 case V16QI_FTYPE_V8HI_V8HI:
26325 case V8QI_FTYPE_V8QI_V8QI:
26326 case V8QI_FTYPE_V4HI_V4HI:
26327 case V8HI_FTYPE_V8HI_V8HI:
26328 case V8HI_FTYPE_V16QI_V16QI:
26329 case V8HI_FTYPE_V4SI_V4SI:
26330 case V8SF_FTYPE_V8SF_V8SF:
26331 case V8SF_FTYPE_V8SF_V8SI:
26332 case V4SI_FTYPE_V4SI_V4SI:
26333 case V4SI_FTYPE_V8HI_V8HI:
26334 case V4SI_FTYPE_V4SF_V4SF:
26335 case V4SI_FTYPE_V2DF_V2DF:
26336 case V4HI_FTYPE_V4HI_V4HI:
26337 case V4HI_FTYPE_V8QI_V8QI:
26338 case V4HI_FTYPE_V2SI_V2SI:
26339 case V4DF_FTYPE_V4DF_V4DF:
26340 case V4DF_FTYPE_V4DF_V4DI:
26341 case V4SF_FTYPE_V4SF_V4SF:
26342 case V4SF_FTYPE_V4SF_V4SI:
26343 case V4SF_FTYPE_V4SF_V2SI:
26344 case V4SF_FTYPE_V4SF_V2DF:
26345 case V4SF_FTYPE_V4SF_DI:
26346 case V4SF_FTYPE_V4SF_SI:
26347 case V2DI_FTYPE_V2DI_V2DI:
26348 case V2DI_FTYPE_V16QI_V16QI:
26349 case V2DI_FTYPE_V4SI_V4SI:
26350 case V2DI_FTYPE_V2DI_V16QI:
26351 case V2DI_FTYPE_V2DF_V2DF:
26352 case V2SI_FTYPE_V2SI_V2SI:
26353 case V2SI_FTYPE_V4HI_V4HI:
26354 case V2SI_FTYPE_V2SF_V2SF:
26355 case V2DF_FTYPE_V2DF_V2DF:
26356 case V2DF_FTYPE_V2DF_V4SF:
26357 case V2DF_FTYPE_V2DF_V2DI:
26358 case V2DF_FTYPE_V2DF_DI:
26359 case V2DF_FTYPE_V2DF_SI:
26360 case V2SF_FTYPE_V2SF_V2SF:
26361 case V1DI_FTYPE_V1DI_V1DI:
26362 case V1DI_FTYPE_V8QI_V8QI:
26363 case V1DI_FTYPE_V2SI_V2SI:
26364 if (comparison == UNKNOWN)
26365 return ix86_expand_binop_builtin (icode, exp, target);
26366 nargs = 2;
26367 break;
26368 case V4SF_FTYPE_V4SF_V4SF_SWAP:
26369 case V2DF_FTYPE_V2DF_V2DF_SWAP:
26370 gcc_assert (comparison != UNKNOWN);
26371 nargs = 2;
26372 swap = true;
26373 break;
26374 case V8HI_FTYPE_V8HI_V8HI_COUNT:
26375 case V8HI_FTYPE_V8HI_SI_COUNT:
26376 case V4SI_FTYPE_V4SI_V4SI_COUNT:
26377 case V4SI_FTYPE_V4SI_SI_COUNT:
26378 case V4HI_FTYPE_V4HI_V4HI_COUNT:
26379 case V4HI_FTYPE_V4HI_SI_COUNT:
26380 case V2DI_FTYPE_V2DI_V2DI_COUNT:
26381 case V2DI_FTYPE_V2DI_SI_COUNT:
26382 case V2SI_FTYPE_V2SI_V2SI_COUNT:
26383 case V2SI_FTYPE_V2SI_SI_COUNT:
26384 case V1DI_FTYPE_V1DI_V1DI_COUNT:
26385 case V1DI_FTYPE_V1DI_SI_COUNT:
26386 nargs = 2;
26387 last_arg_count = true;
26388 break;
26389 case UINT64_FTYPE_UINT64_UINT64:
26390 case UINT_FTYPE_UINT_UINT:
26391 case UINT_FTYPE_UINT_USHORT:
26392 case UINT_FTYPE_UINT_UCHAR:
26393 case UINT16_FTYPE_UINT16_INT:
26394 case UINT8_FTYPE_UINT8_INT:
26395 nargs = 2;
26396 break;
26397 case V2DI_FTYPE_V2DI_INT_CONVERT:
26398 nargs = 2;
26399 rmode = V1TImode;
26400 nargs_constant = 1;
26401 break;
26402 case V8HI_FTYPE_V8HI_INT:
26403 case V8HI_FTYPE_V8SF_INT:
26404 case V8HI_FTYPE_V4SF_INT:
26405 case V8SF_FTYPE_V8SF_INT:
26406 case V4SI_FTYPE_V4SI_INT:
26407 case V4SI_FTYPE_V8SI_INT:
26408 case V4HI_FTYPE_V4HI_INT:
26409 case V4DF_FTYPE_V4DF_INT:
26410 case V4SF_FTYPE_V4SF_INT:
26411 case V4SF_FTYPE_V8SF_INT:
26412 case V2DI_FTYPE_V2DI_INT:
26413 case V2DF_FTYPE_V2DF_INT:
26414 case V2DF_FTYPE_V4DF_INT:
26415 nargs = 2;
26416 nargs_constant = 1;
26417 break;
26418 case V16QI_FTYPE_V16QI_V16QI_V16QI:
26419 case V8SF_FTYPE_V8SF_V8SF_V8SF:
26420 case V4DF_FTYPE_V4DF_V4DF_V4DF:
26421 case V4SF_FTYPE_V4SF_V4SF_V4SF:
26422 case V2DF_FTYPE_V2DF_V2DF_V2DF:
26423 nargs = 3;
26424 break;
26425 case V16QI_FTYPE_V16QI_V16QI_INT:
26426 case V8HI_FTYPE_V8HI_V8HI_INT:
26427 case V8SI_FTYPE_V8SI_V8SI_INT:
26428 case V8SI_FTYPE_V8SI_V4SI_INT:
26429 case V8SF_FTYPE_V8SF_V8SF_INT:
26430 case V8SF_FTYPE_V8SF_V4SF_INT:
26431 case V4SI_FTYPE_V4SI_V4SI_INT:
26432 case V4DF_FTYPE_V4DF_V4DF_INT:
26433 case V4DF_FTYPE_V4DF_V2DF_INT:
26434 case V4SF_FTYPE_V4SF_V4SF_INT:
26435 case V2DI_FTYPE_V2DI_V2DI_INT:
26436 case V2DF_FTYPE_V2DF_V2DF_INT:
26437 nargs = 3;
26438 nargs_constant = 1;
26439 break;
26440 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
26441 nargs = 3;
26442 rmode = V2DImode;
26443 nargs_constant = 1;
26444 break;
26445 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
26446 nargs = 3;
26447 rmode = DImode;
26448 nargs_constant = 1;
26449 break;
26450 case V2DI_FTYPE_V2DI_UINT_UINT:
26451 nargs = 3;
26452 nargs_constant = 2;
26453 break;
26454 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
26455 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
26456 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
26457 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
26458 nargs = 4;
26459 nargs_constant = 1;
26460 break;
26461 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
26462 nargs = 4;
26463 nargs_constant = 2;
26464 break;
26465 default:
26466 gcc_unreachable ();
26467 }
26468
26469 gcc_assert (nargs <= ARRAY_SIZE (args));
26470
26471 if (comparison != UNKNOWN)
26472 {
26473 gcc_assert (nargs == 2);
26474 return ix86_expand_sse_compare (d, exp, target, swap);
26475 }
26476
26477 if (rmode == VOIDmode || rmode == tmode)
26478 {
26479 if (optimize
26480 || target == 0
26481 || GET_MODE (target) != tmode
26482 || !insn_p->operand[0].predicate (target, tmode))
26483 target = gen_reg_rtx (tmode);
26484 real_target = target;
26485 }
26486 else
26487 {
26488 target = gen_reg_rtx (rmode);
26489 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
26490 }
26491
26492 for (i = 0; i < nargs; i++)
26493 {
26494 tree arg = CALL_EXPR_ARG (exp, i);
26495 rtx op = expand_normal (arg);
26496 enum machine_mode mode = insn_p->operand[i + 1].mode;
26497 bool match = insn_p->operand[i + 1].predicate (op, mode);
26498
26499 if (last_arg_count && (i + 1) == nargs)
26500 {
26501 /* SIMD shift insns take either an 8-bit immediate or
26502 register as count. But builtin functions take int as
26503 count. If count doesn't match, we put it in register. */
26504 if (!match)
26505 {
26506 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
26507 if (!insn_p->operand[i + 1].predicate (op, mode))
26508 op = copy_to_reg (op);
26509 }
26510 }
26511 else if ((nargs - i) <= nargs_constant)
26512 {
26513 if (!match)
26514 switch (icode)
26515 {
26516 case CODE_FOR_sse4_1_roundpd:
26517 case CODE_FOR_sse4_1_roundps:
26518 case CODE_FOR_sse4_1_roundsd:
26519 case CODE_FOR_sse4_1_roundss:
26520 case CODE_FOR_sse4_1_blendps:
26521 case CODE_FOR_avx_blendpd256:
26522 case CODE_FOR_avx_vpermilv4df:
26523 case CODE_FOR_avx_roundpd256:
26524 case CODE_FOR_avx_roundps256:
26525 error ("the last argument must be a 4-bit immediate");
26526 return const0_rtx;
26527
26528 case CODE_FOR_sse4_1_blendpd:
26529 case CODE_FOR_avx_vpermilv2df:
26530 case CODE_FOR_xop_vpermil2v2df3:
26531 case CODE_FOR_xop_vpermil2v4sf3:
26532 case CODE_FOR_xop_vpermil2v4df3:
26533 case CODE_FOR_xop_vpermil2v8sf3:
26534 error ("the last argument must be a 2-bit immediate");
26535 return const0_rtx;
26536
26537 case CODE_FOR_avx_vextractf128v4df:
26538 case CODE_FOR_avx_vextractf128v8sf:
26539 case CODE_FOR_avx_vextractf128v8si:
26540 case CODE_FOR_avx_vinsertf128v4df:
26541 case CODE_FOR_avx_vinsertf128v8sf:
26542 case CODE_FOR_avx_vinsertf128v8si:
26543 error ("the last argument must be a 1-bit immediate");
26544 return const0_rtx;
26545
26546 case CODE_FOR_avx_vmcmpv2df3:
26547 case CODE_FOR_avx_vmcmpv4sf3:
26548 case CODE_FOR_avx_cmpv2df3:
26549 case CODE_FOR_avx_cmpv4sf3:
26550 case CODE_FOR_avx_cmpv4df3:
26551 case CODE_FOR_avx_cmpv8sf3:
26552 error ("the last argument must be a 5-bit immediate");
26553 return const0_rtx;
26554
26555 default:
26556 switch (nargs_constant)
26557 {
26558 case 2:
26559 if ((nargs - i) == nargs_constant)
26560 {
26561 error ("the next to last argument must be an 8-bit immediate");
26562 break;
26563 }
26564 case 1:
26565 error ("the last argument must be an 8-bit immediate");
26566 break;
26567 default:
26568 gcc_unreachable ();
26569 }
26570 return const0_rtx;
26571 }
26572 }
26573 else
26574 {
26575 if (VECTOR_MODE_P (mode))
26576 op = safe_vector_operand (op, mode);
26577
26578 /* If we aren't optimizing, only allow one memory operand to
26579 be generated. */
26580 if (memory_operand (op, mode))
26581 num_memory++;
26582
26583 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
26584 {
26585 if (optimize || !match || num_memory > 1)
26586 op = copy_to_mode_reg (mode, op);
26587 }
26588 else
26589 {
26590 op = copy_to_reg (op);
26591 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
26592 }
26593 }
26594
26595 args[i].op = op;
26596 args[i].mode = mode;
26597 }
26598
26599 switch (nargs)
26600 {
26601 case 1:
26602 pat = GEN_FCN (icode) (real_target, args[0].op);
26603 break;
26604 case 2:
26605 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
26606 break;
26607 case 3:
26608 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
26609 args[2].op);
26610 break;
26611 case 4:
26612 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
26613 args[2].op, args[3].op);
26614 break;
26615 default:
26616 gcc_unreachable ();
26617 }
26618
26619 if (! pat)
26620 return 0;
26621
26622 emit_insn (pat);
26623 return target;
26624 }
26625
26626 /* Subroutine of ix86_expand_builtin to take care of special insns
26627 with variable number of operands. */
26628
26629 static rtx
26630 ix86_expand_special_args_builtin (const struct builtin_description *d,
26631 tree exp, rtx target)
26632 {
26633 tree arg;
26634 rtx pat, op;
26635 unsigned int i, nargs, arg_adjust, memory;
26636 struct
26637 {
26638 rtx op;
26639 enum machine_mode mode;
26640 } args[3];
26641 enum insn_code icode = d->icode;
26642 bool last_arg_constant = false;
26643 const struct insn_data_d *insn_p = &insn_data[icode];
26644 enum machine_mode tmode = insn_p->operand[0].mode;
26645 enum { load, store } klass;
26646
26647 switch ((enum ix86_builtin_func_type) d->flag)
26648 {
26649 case VOID_FTYPE_VOID:
26650 if (icode == CODE_FOR_avx_vzeroupper)
26651 target = GEN_INT (vzeroupper_intrinsic);
26652 emit_insn (GEN_FCN (icode) (target));
26653 return 0;
26654 case VOID_FTYPE_UINT64:
26655 case VOID_FTYPE_UNSIGNED:
26656 nargs = 0;
26657 klass = store;
26658 memory = 0;
26659 break;
26660 break;
26661 case UINT64_FTYPE_VOID:
26662 case UNSIGNED_FTYPE_VOID:
26663 nargs = 0;
26664 klass = load;
26665 memory = 0;
26666 break;
26667 case UINT64_FTYPE_PUNSIGNED:
26668 case V2DI_FTYPE_PV2DI:
26669 case V32QI_FTYPE_PCCHAR:
26670 case V16QI_FTYPE_PCCHAR:
26671 case V8SF_FTYPE_PCV4SF:
26672 case V8SF_FTYPE_PCFLOAT:
26673 case V4SF_FTYPE_PCFLOAT:
26674 case V4DF_FTYPE_PCV2DF:
26675 case V4DF_FTYPE_PCDOUBLE:
26676 case V2DF_FTYPE_PCDOUBLE:
26677 case VOID_FTYPE_PVOID:
26678 nargs = 1;
26679 klass = load;
26680 memory = 0;
26681 break;
26682 case VOID_FTYPE_PV2SF_V4SF:
26683 case VOID_FTYPE_PV4DI_V4DI:
26684 case VOID_FTYPE_PV2DI_V2DI:
26685 case VOID_FTYPE_PCHAR_V32QI:
26686 case VOID_FTYPE_PCHAR_V16QI:
26687 case VOID_FTYPE_PFLOAT_V8SF:
26688 case VOID_FTYPE_PFLOAT_V4SF:
26689 case VOID_FTYPE_PDOUBLE_V4DF:
26690 case VOID_FTYPE_PDOUBLE_V2DF:
26691 case VOID_FTYPE_PULONGLONG_ULONGLONG:
26692 case VOID_FTYPE_PINT_INT:
26693 nargs = 1;
26694 klass = store;
26695 /* Reserve memory operand for target. */
26696 memory = ARRAY_SIZE (args);
26697 break;
26698 case V4SF_FTYPE_V4SF_PCV2SF:
26699 case V2DF_FTYPE_V2DF_PCDOUBLE:
26700 nargs = 2;
26701 klass = load;
26702 memory = 1;
26703 break;
26704 case V8SF_FTYPE_PCV8SF_V8SI:
26705 case V4DF_FTYPE_PCV4DF_V4DI:
26706 case V4SF_FTYPE_PCV4SF_V4SI:
26707 case V2DF_FTYPE_PCV2DF_V2DI:
26708 nargs = 2;
26709 klass = load;
26710 memory = 0;
26711 break;
26712 case VOID_FTYPE_PV8SF_V8SI_V8SF:
26713 case VOID_FTYPE_PV4DF_V4DI_V4DF:
26714 case VOID_FTYPE_PV4SF_V4SI_V4SF:
26715 case VOID_FTYPE_PV2DF_V2DI_V2DF:
26716 nargs = 2;
26717 klass = store;
26718 /* Reserve memory operand for target. */
26719 memory = ARRAY_SIZE (args);
26720 break;
26721 case VOID_FTYPE_UINT_UINT_UINT:
26722 case VOID_FTYPE_UINT64_UINT_UINT:
26723 case UCHAR_FTYPE_UINT_UINT_UINT:
26724 case UCHAR_FTYPE_UINT64_UINT_UINT:
26725 nargs = 3;
26726 klass = load;
26727 memory = ARRAY_SIZE (args);
26728 last_arg_constant = true;
26729 break;
26730 default:
26731 gcc_unreachable ();
26732 }
26733
26734 gcc_assert (nargs <= ARRAY_SIZE (args));
26735
26736 if (klass == store)
26737 {
26738 arg = CALL_EXPR_ARG (exp, 0);
26739 op = expand_normal (arg);
26740 gcc_assert (target == 0);
26741 if (memory)
26742 {
26743 if (GET_MODE (op) != Pmode)
26744 op = convert_to_mode (Pmode, op, 1);
26745 target = gen_rtx_MEM (tmode, force_reg (Pmode, op));
26746 }
26747 else
26748 target = force_reg (tmode, op);
26749 arg_adjust = 1;
26750 }
26751 else
26752 {
26753 arg_adjust = 0;
26754 if (optimize
26755 || target == 0
26756 || GET_MODE (target) != tmode
26757 || !insn_p->operand[0].predicate (target, tmode))
26758 target = gen_reg_rtx (tmode);
26759 }
26760
26761 for (i = 0; i < nargs; i++)
26762 {
26763 enum machine_mode mode = insn_p->operand[i + 1].mode;
26764 bool match;
26765
26766 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
26767 op = expand_normal (arg);
26768 match = insn_p->operand[i + 1].predicate (op, mode);
26769
26770 if (last_arg_constant && (i + 1) == nargs)
26771 {
26772 if (!match)
26773 {
26774 if (icode == CODE_FOR_lwp_lwpvalsi3
26775 || icode == CODE_FOR_lwp_lwpinssi3
26776 || icode == CODE_FOR_lwp_lwpvaldi3
26777 || icode == CODE_FOR_lwp_lwpinsdi3)
26778 error ("the last argument must be a 32-bit immediate");
26779 else
26780 error ("the last argument must be an 8-bit immediate");
26781 return const0_rtx;
26782 }
26783 }
26784 else
26785 {
26786 if (i == memory)
26787 {
26788 /* This must be the memory operand. */
26789 if (GET_MODE (op) != Pmode)
26790 op = convert_to_mode (Pmode, op, 1);
26791 op = gen_rtx_MEM (mode, force_reg (Pmode, op));
26792 gcc_assert (GET_MODE (op) == mode
26793 || GET_MODE (op) == VOIDmode);
26794 }
26795 else
26796 {
26797 /* This must be register. */
26798 if (VECTOR_MODE_P (mode))
26799 op = safe_vector_operand (op, mode);
26800
26801 gcc_assert (GET_MODE (op) == mode
26802 || GET_MODE (op) == VOIDmode);
26803 op = copy_to_mode_reg (mode, op);
26804 }
26805 }
26806
26807 args[i].op = op;
26808 args[i].mode = mode;
26809 }
26810
26811 switch (nargs)
26812 {
26813 case 0:
26814 pat = GEN_FCN (icode) (target);
26815 break;
26816 case 1:
26817 pat = GEN_FCN (icode) (target, args[0].op);
26818 break;
26819 case 2:
26820 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
26821 break;
26822 case 3:
26823 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
26824 break;
26825 default:
26826 gcc_unreachable ();
26827 }
26828
26829 if (! pat)
26830 return 0;
26831 emit_insn (pat);
26832 return klass == store ? 0 : target;
26833 }
26834
26835 /* Return the integer constant in ARG. Constrain it to be in the range
26836 of the subparts of VEC_TYPE; issue an error if not. */
26837
26838 static int
26839 get_element_number (tree vec_type, tree arg)
26840 {
26841 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
26842
26843 if (!host_integerp (arg, 1)
26844 || (elt = tree_low_cst (arg, 1), elt > max))
26845 {
26846 error ("selector must be an integer constant in the range 0..%wi", max);
26847 return 0;
26848 }
26849
26850 return elt;
26851 }
26852
26853 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
26854 ix86_expand_vector_init. We DO have language-level syntax for this, in
26855 the form of (type){ init-list }. Except that since we can't place emms
26856 instructions from inside the compiler, we can't allow the use of MMX
26857 registers unless the user explicitly asks for it. So we do *not* define
26858 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
26859 we have builtins invoked by mmintrin.h that gives us license to emit
26860 these sorts of instructions. */
26861
26862 static rtx
26863 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
26864 {
26865 enum machine_mode tmode = TYPE_MODE (type);
26866 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
26867 int i, n_elt = GET_MODE_NUNITS (tmode);
26868 rtvec v = rtvec_alloc (n_elt);
26869
26870 gcc_assert (VECTOR_MODE_P (tmode));
26871 gcc_assert (call_expr_nargs (exp) == n_elt);
26872
26873 for (i = 0; i < n_elt; ++i)
26874 {
26875 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
26876 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
26877 }
26878
26879 if (!target || !register_operand (target, tmode))
26880 target = gen_reg_rtx (tmode);
26881
26882 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
26883 return target;
26884 }
26885
26886 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
26887 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
26888 had a language-level syntax for referencing vector elements. */
26889
26890 static rtx
26891 ix86_expand_vec_ext_builtin (tree exp, rtx target)
26892 {
26893 enum machine_mode tmode, mode0;
26894 tree arg0, arg1;
26895 int elt;
26896 rtx op0;
26897
26898 arg0 = CALL_EXPR_ARG (exp, 0);
26899 arg1 = CALL_EXPR_ARG (exp, 1);
26900
26901 op0 = expand_normal (arg0);
26902 elt = get_element_number (TREE_TYPE (arg0), arg1);
26903
26904 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
26905 mode0 = TYPE_MODE (TREE_TYPE (arg0));
26906 gcc_assert (VECTOR_MODE_P (mode0));
26907
26908 op0 = force_reg (mode0, op0);
26909
26910 if (optimize || !target || !register_operand (target, tmode))
26911 target = gen_reg_rtx (tmode);
26912
26913 ix86_expand_vector_extract (true, target, op0, elt);
26914
26915 return target;
26916 }
26917
26918 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
26919 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
26920 a language-level syntax for referencing vector elements. */
26921
26922 static rtx
26923 ix86_expand_vec_set_builtin (tree exp)
26924 {
26925 enum machine_mode tmode, mode1;
26926 tree arg0, arg1, arg2;
26927 int elt;
26928 rtx op0, op1, target;
26929
26930 arg0 = CALL_EXPR_ARG (exp, 0);
26931 arg1 = CALL_EXPR_ARG (exp, 1);
26932 arg2 = CALL_EXPR_ARG (exp, 2);
26933
26934 tmode = TYPE_MODE (TREE_TYPE (arg0));
26935 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
26936 gcc_assert (VECTOR_MODE_P (tmode));
26937
26938 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
26939 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
26940 elt = get_element_number (TREE_TYPE (arg0), arg2);
26941
26942 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
26943 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
26944
26945 op0 = force_reg (tmode, op0);
26946 op1 = force_reg (mode1, op1);
26947
26948 /* OP0 is the source of these builtin functions and shouldn't be
26949 modified. Create a copy, use it and return it as target. */
26950 target = gen_reg_rtx (tmode);
26951 emit_move_insn (target, op0);
26952 ix86_expand_vector_set (true, target, op1, elt);
26953
26954 return target;
26955 }
26956
26957 /* Expand an expression EXP that calls a built-in function,
26958 with result going to TARGET if that's convenient
26959 (and in mode MODE if that's convenient).
26960 SUBTARGET may be used as the target for computing one of EXP's operands.
26961 IGNORE is nonzero if the value is to be ignored. */
26962
26963 static rtx
26964 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
26965 enum machine_mode mode ATTRIBUTE_UNUSED,
26966 int ignore ATTRIBUTE_UNUSED)
26967 {
26968 const struct builtin_description *d;
26969 size_t i;
26970 enum insn_code icode;
26971 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
26972 tree arg0, arg1, arg2;
26973 rtx op0, op1, op2, pat;
26974 enum machine_mode mode0, mode1, mode2;
26975 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
26976
26977 /* Determine whether the builtin function is available under the current ISA.
26978 Originally the builtin was not created if it wasn't applicable to the
26979 current ISA based on the command line switches. With function specific
26980 options, we need to check in the context of the function making the call
26981 whether it is supported. */
26982 if (ix86_builtins_isa[fcode].isa
26983 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
26984 {
26985 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
26986 NULL, (enum fpmath_unit) 0, false);
26987
26988 if (!opts)
26989 error ("%qE needs unknown isa option", fndecl);
26990 else
26991 {
26992 gcc_assert (opts != NULL);
26993 error ("%qE needs isa option %s", fndecl, opts);
26994 free (opts);
26995 }
26996 return const0_rtx;
26997 }
26998
26999 switch (fcode)
27000 {
27001 case IX86_BUILTIN_MASKMOVQ:
27002 case IX86_BUILTIN_MASKMOVDQU:
27003 icode = (fcode == IX86_BUILTIN_MASKMOVQ
27004 ? CODE_FOR_mmx_maskmovq
27005 : CODE_FOR_sse2_maskmovdqu);
27006 /* Note the arg order is different from the operand order. */
27007 arg1 = CALL_EXPR_ARG (exp, 0);
27008 arg2 = CALL_EXPR_ARG (exp, 1);
27009 arg0 = CALL_EXPR_ARG (exp, 2);
27010 op0 = expand_normal (arg0);
27011 op1 = expand_normal (arg1);
27012 op2 = expand_normal (arg2);
27013 mode0 = insn_data[icode].operand[0].mode;
27014 mode1 = insn_data[icode].operand[1].mode;
27015 mode2 = insn_data[icode].operand[2].mode;
27016
27017 if (GET_MODE (op0) != Pmode)
27018 op0 = convert_to_mode (Pmode, op0, 1);
27019 op0 = gen_rtx_MEM (mode1, force_reg (Pmode, op0));
27020
27021 if (!insn_data[icode].operand[0].predicate (op0, mode0))
27022 op0 = copy_to_mode_reg (mode0, op0);
27023 if (!insn_data[icode].operand[1].predicate (op1, mode1))
27024 op1 = copy_to_mode_reg (mode1, op1);
27025 if (!insn_data[icode].operand[2].predicate (op2, mode2))
27026 op2 = copy_to_mode_reg (mode2, op2);
27027 pat = GEN_FCN (icode) (op0, op1, op2);
27028 if (! pat)
27029 return 0;
27030 emit_insn (pat);
27031 return 0;
27032
27033 case IX86_BUILTIN_LDMXCSR:
27034 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
27035 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
27036 emit_move_insn (target, op0);
27037 emit_insn (gen_sse_ldmxcsr (target));
27038 return 0;
27039
27040 case IX86_BUILTIN_STMXCSR:
27041 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
27042 emit_insn (gen_sse_stmxcsr (target));
27043 return copy_to_mode_reg (SImode, target);
27044
27045 case IX86_BUILTIN_CLFLUSH:
27046 arg0 = CALL_EXPR_ARG (exp, 0);
27047 op0 = expand_normal (arg0);
27048 icode = CODE_FOR_sse2_clflush;
27049 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
27050 {
27051 if (GET_MODE (op0) != Pmode)
27052 op0 = convert_to_mode (Pmode, op0, 1);
27053 op0 = force_reg (Pmode, op0);
27054 }
27055
27056 emit_insn (gen_sse2_clflush (op0));
27057 return 0;
27058
27059 case IX86_BUILTIN_MONITOR:
27060 arg0 = CALL_EXPR_ARG (exp, 0);
27061 arg1 = CALL_EXPR_ARG (exp, 1);
27062 arg2 = CALL_EXPR_ARG (exp, 2);
27063 op0 = expand_normal (arg0);
27064 op1 = expand_normal (arg1);
27065 op2 = expand_normal (arg2);
27066 if (!REG_P (op0))
27067 {
27068 if (GET_MODE (op0) != Pmode)
27069 op0 = convert_to_mode (Pmode, op0, 1);
27070 op0 = force_reg (Pmode, op0);
27071 }
27072 if (!REG_P (op1))
27073 op1 = copy_to_mode_reg (SImode, op1);
27074 if (!REG_P (op2))
27075 op2 = copy_to_mode_reg (SImode, op2);
27076 emit_insn (ix86_gen_monitor (op0, op1, op2));
27077 return 0;
27078
27079 case IX86_BUILTIN_MWAIT:
27080 arg0 = CALL_EXPR_ARG (exp, 0);
27081 arg1 = CALL_EXPR_ARG (exp, 1);
27082 op0 = expand_normal (arg0);
27083 op1 = expand_normal (arg1);
27084 if (!REG_P (op0))
27085 op0 = copy_to_mode_reg (SImode, op0);
27086 if (!REG_P (op1))
27087 op1 = copy_to_mode_reg (SImode, op1);
27088 emit_insn (gen_sse3_mwait (op0, op1));
27089 return 0;
27090
27091 case IX86_BUILTIN_VEC_INIT_V2SI:
27092 case IX86_BUILTIN_VEC_INIT_V4HI:
27093 case IX86_BUILTIN_VEC_INIT_V8QI:
27094 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
27095
27096 case IX86_BUILTIN_VEC_EXT_V2DF:
27097 case IX86_BUILTIN_VEC_EXT_V2DI:
27098 case IX86_BUILTIN_VEC_EXT_V4SF:
27099 case IX86_BUILTIN_VEC_EXT_V4SI:
27100 case IX86_BUILTIN_VEC_EXT_V8HI:
27101 case IX86_BUILTIN_VEC_EXT_V2SI:
27102 case IX86_BUILTIN_VEC_EXT_V4HI:
27103 case IX86_BUILTIN_VEC_EXT_V16QI:
27104 return ix86_expand_vec_ext_builtin (exp, target);
27105
27106 case IX86_BUILTIN_VEC_SET_V2DI:
27107 case IX86_BUILTIN_VEC_SET_V4SF:
27108 case IX86_BUILTIN_VEC_SET_V4SI:
27109 case IX86_BUILTIN_VEC_SET_V8HI:
27110 case IX86_BUILTIN_VEC_SET_V4HI:
27111 case IX86_BUILTIN_VEC_SET_V16QI:
27112 return ix86_expand_vec_set_builtin (exp);
27113
27114 case IX86_BUILTIN_VEC_PERM_V2DF:
27115 case IX86_BUILTIN_VEC_PERM_V4SF:
27116 case IX86_BUILTIN_VEC_PERM_V2DI:
27117 case IX86_BUILTIN_VEC_PERM_V4SI:
27118 case IX86_BUILTIN_VEC_PERM_V8HI:
27119 case IX86_BUILTIN_VEC_PERM_V16QI:
27120 case IX86_BUILTIN_VEC_PERM_V2DI_U:
27121 case IX86_BUILTIN_VEC_PERM_V4SI_U:
27122 case IX86_BUILTIN_VEC_PERM_V8HI_U:
27123 case IX86_BUILTIN_VEC_PERM_V16QI_U:
27124 case IX86_BUILTIN_VEC_PERM_V4DF:
27125 case IX86_BUILTIN_VEC_PERM_V8SF:
27126 return ix86_expand_vec_perm_builtin (exp);
27127
27128 case IX86_BUILTIN_INFQ:
27129 case IX86_BUILTIN_HUGE_VALQ:
27130 {
27131 REAL_VALUE_TYPE inf;
27132 rtx tmp;
27133
27134 real_inf (&inf);
27135 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
27136
27137 tmp = validize_mem (force_const_mem (mode, tmp));
27138
27139 if (target == 0)
27140 target = gen_reg_rtx (mode);
27141
27142 emit_move_insn (target, tmp);
27143 return target;
27144 }
27145
27146 case IX86_BUILTIN_LLWPCB:
27147 arg0 = CALL_EXPR_ARG (exp, 0);
27148 op0 = expand_normal (arg0);
27149 icode = CODE_FOR_lwp_llwpcb;
27150 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
27151 {
27152 if (GET_MODE (op0) != Pmode)
27153 op0 = convert_to_mode (Pmode, op0, 1);
27154 op0 = force_reg (Pmode, op0);
27155 }
27156 emit_insn (gen_lwp_llwpcb (op0));
27157 return 0;
27158
27159 case IX86_BUILTIN_SLWPCB:
27160 icode = CODE_FOR_lwp_slwpcb;
27161 if (!target
27162 || !insn_data[icode].operand[0].predicate (target, Pmode))
27163 target = gen_reg_rtx (Pmode);
27164 emit_insn (gen_lwp_slwpcb (target));
27165 return target;
27166
27167 case IX86_BUILTIN_BEXTRI32:
27168 case IX86_BUILTIN_BEXTRI64:
27169 arg0 = CALL_EXPR_ARG (exp, 0);
27170 arg1 = CALL_EXPR_ARG (exp, 1);
27171 op0 = expand_normal (arg0);
27172 op1 = expand_normal (arg1);
27173 icode = (fcode == IX86_BUILTIN_BEXTRI32
27174 ? CODE_FOR_tbm_bextri_si
27175 : CODE_FOR_tbm_bextri_di);
27176 if (!CONST_INT_P (op1))
27177 {
27178 error ("last argument must be an immediate");
27179 return const0_rtx;
27180 }
27181 else
27182 {
27183 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
27184 unsigned char lsb_index = INTVAL (op1) & 0xFF;
27185 op1 = GEN_INT (length);
27186 op2 = GEN_INT (lsb_index);
27187 pat = GEN_FCN (icode) (target, op0, op1, op2);
27188 if (pat)
27189 emit_insn (pat);
27190 return target;
27191 }
27192
27193 case IX86_BUILTIN_RDRAND16_STEP:
27194 icode = CODE_FOR_rdrandhi_1;
27195 mode0 = HImode;
27196 goto rdrand_step;
27197
27198 case IX86_BUILTIN_RDRAND32_STEP:
27199 icode = CODE_FOR_rdrandsi_1;
27200 mode0 = SImode;
27201 goto rdrand_step;
27202
27203 case IX86_BUILTIN_RDRAND64_STEP:
27204 icode = CODE_FOR_rdranddi_1;
27205 mode0 = DImode;
27206
27207 rdrand_step:
27208 op0 = gen_reg_rtx (mode0);
27209 emit_insn (GEN_FCN (icode) (op0));
27210
27211 arg0 = CALL_EXPR_ARG (exp, 0);
27212 op1 = expand_normal (arg0);
27213 if (!address_operand (op1, VOIDmode))
27214 {
27215 op1 = convert_memory_address (Pmode, op1);
27216 op1 = copy_addr_to_reg (op1);
27217 }
27218 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
27219
27220 op1 = gen_reg_rtx (SImode);
27221 emit_move_insn (op1, CONST1_RTX (SImode));
27222
27223 /* Emit SImode conditional move. */
27224 if (mode0 == HImode)
27225 {
27226 op2 = gen_reg_rtx (SImode);
27227 emit_insn (gen_zero_extendhisi2 (op2, op0));
27228 }
27229 else if (mode0 == SImode)
27230 op2 = op0;
27231 else
27232 op2 = gen_rtx_SUBREG (SImode, op0, 0);
27233
27234 if (target == 0)
27235 target = gen_reg_rtx (SImode);
27236
27237 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
27238 const0_rtx);
27239 emit_insn (gen_rtx_SET (VOIDmode, target,
27240 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
27241 return target;
27242
27243 default:
27244 break;
27245 }
27246
27247 for (i = 0, d = bdesc_special_args;
27248 i < ARRAY_SIZE (bdesc_special_args);
27249 i++, d++)
27250 if (d->code == fcode)
27251 return ix86_expand_special_args_builtin (d, exp, target);
27252
27253 for (i = 0, d = bdesc_args;
27254 i < ARRAY_SIZE (bdesc_args);
27255 i++, d++)
27256 if (d->code == fcode)
27257 switch (fcode)
27258 {
27259 case IX86_BUILTIN_FABSQ:
27260 case IX86_BUILTIN_COPYSIGNQ:
27261 if (!TARGET_SSE2)
27262 /* Emit a normal call if SSE2 isn't available. */
27263 return expand_call (exp, target, ignore);
27264 default:
27265 return ix86_expand_args_builtin (d, exp, target);
27266 }
27267
27268 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
27269 if (d->code == fcode)
27270 return ix86_expand_sse_comi (d, exp, target);
27271
27272 for (i = 0, d = bdesc_pcmpestr;
27273 i < ARRAY_SIZE (bdesc_pcmpestr);
27274 i++, d++)
27275 if (d->code == fcode)
27276 return ix86_expand_sse_pcmpestr (d, exp, target);
27277
27278 for (i = 0, d = bdesc_pcmpistr;
27279 i < ARRAY_SIZE (bdesc_pcmpistr);
27280 i++, d++)
27281 if (d->code == fcode)
27282 return ix86_expand_sse_pcmpistr (d, exp, target);
27283
27284 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
27285 if (d->code == fcode)
27286 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
27287 (enum ix86_builtin_func_type)
27288 d->flag, d->comparison);
27289
27290 gcc_unreachable ();
27291 }
27292
27293 /* Returns a function decl for a vectorized version of the builtin function
27294 with builtin function code FN and the result vector type TYPE, or NULL_TREE
27295 if it is not available. */
27296
27297 static tree
27298 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
27299 tree type_in)
27300 {
27301 enum machine_mode in_mode, out_mode;
27302 int in_n, out_n;
27303 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
27304
27305 if (TREE_CODE (type_out) != VECTOR_TYPE
27306 || TREE_CODE (type_in) != VECTOR_TYPE
27307 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
27308 return NULL_TREE;
27309
27310 out_mode = TYPE_MODE (TREE_TYPE (type_out));
27311 out_n = TYPE_VECTOR_SUBPARTS (type_out);
27312 in_mode = TYPE_MODE (TREE_TYPE (type_in));
27313 in_n = TYPE_VECTOR_SUBPARTS (type_in);
27314
27315 switch (fn)
27316 {
27317 case BUILT_IN_SQRT:
27318 if (out_mode == DFmode && in_mode == DFmode)
27319 {
27320 if (out_n == 2 && in_n == 2)
27321 return ix86_builtins[IX86_BUILTIN_SQRTPD];
27322 else if (out_n == 4 && in_n == 4)
27323 return ix86_builtins[IX86_BUILTIN_SQRTPD256];
27324 }
27325 break;
27326
27327 case BUILT_IN_SQRTF:
27328 if (out_mode == SFmode && in_mode == SFmode)
27329 {
27330 if (out_n == 4 && in_n == 4)
27331 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
27332 else if (out_n == 8 && in_n == 8)
27333 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR256];
27334 }
27335 break;
27336
27337 case BUILT_IN_LRINT:
27338 if (out_mode == SImode && out_n == 4
27339 && in_mode == DFmode && in_n == 2)
27340 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
27341 break;
27342
27343 case BUILT_IN_LRINTF:
27344 if (out_mode == SImode && in_mode == SFmode)
27345 {
27346 if (out_n == 4 && in_n == 4)
27347 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
27348 else if (out_n == 8 && in_n == 8)
27349 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ256];
27350 }
27351 break;
27352
27353 case BUILT_IN_COPYSIGN:
27354 if (out_mode == DFmode && in_mode == DFmode)
27355 {
27356 if (out_n == 2 && in_n == 2)
27357 return ix86_builtins[IX86_BUILTIN_CPYSGNPD];
27358 else if (out_n == 4 && in_n == 4)
27359 return ix86_builtins[IX86_BUILTIN_CPYSGNPD256];
27360 }
27361 break;
27362
27363 case BUILT_IN_COPYSIGNF:
27364 if (out_mode == SFmode && in_mode == SFmode)
27365 {
27366 if (out_n == 4 && in_n == 4)
27367 return ix86_builtins[IX86_BUILTIN_CPYSGNPS];
27368 else if (out_n == 8 && in_n == 8)
27369 return ix86_builtins[IX86_BUILTIN_CPYSGNPS256];
27370 }
27371 break;
27372
27373 case BUILT_IN_FLOOR:
27374 /* The round insn does not trap on denormals. */
27375 if (flag_trapping_math || !TARGET_ROUND)
27376 break;
27377
27378 if (out_mode == DFmode && in_mode == DFmode)
27379 {
27380 if (out_n == 2 && in_n == 2)
27381 return ix86_builtins[IX86_BUILTIN_FLOORPD];
27382 else if (out_n == 4 && in_n == 4)
27383 return ix86_builtins[IX86_BUILTIN_FLOORPD256];
27384 }
27385 break;
27386
27387 case BUILT_IN_FLOORF:
27388 /* The round insn does not trap on denormals. */
27389 if (flag_trapping_math || !TARGET_ROUND)
27390 break;
27391
27392 if (out_mode == SFmode && in_mode == SFmode)
27393 {
27394 if (out_n == 4 && in_n == 4)
27395 return ix86_builtins[IX86_BUILTIN_FLOORPS];
27396 else if (out_n == 8 && in_n == 8)
27397 return ix86_builtins[IX86_BUILTIN_FLOORPS256];
27398 }
27399 break;
27400
27401 case BUILT_IN_CEIL:
27402 /* The round insn does not trap on denormals. */
27403 if (flag_trapping_math || !TARGET_ROUND)
27404 break;
27405
27406 if (out_mode == DFmode && in_mode == DFmode)
27407 {
27408 if (out_n == 2 && in_n == 2)
27409 return ix86_builtins[IX86_BUILTIN_CEILPD];
27410 else if (out_n == 4 && in_n == 4)
27411 return ix86_builtins[IX86_BUILTIN_CEILPD256];
27412 }
27413 break;
27414
27415 case BUILT_IN_CEILF:
27416 /* The round insn does not trap on denormals. */
27417 if (flag_trapping_math || !TARGET_ROUND)
27418 break;
27419
27420 if (out_mode == SFmode && in_mode == SFmode)
27421 {
27422 if (out_n == 4 && in_n == 4)
27423 return ix86_builtins[IX86_BUILTIN_CEILPS];
27424 else if (out_n == 8 && in_n == 8)
27425 return ix86_builtins[IX86_BUILTIN_CEILPS256];
27426 }
27427 break;
27428
27429 case BUILT_IN_TRUNC:
27430 /* The round insn does not trap on denormals. */
27431 if (flag_trapping_math || !TARGET_ROUND)
27432 break;
27433
27434 if (out_mode == DFmode && in_mode == DFmode)
27435 {
27436 if (out_n == 2 && in_n == 2)
27437 return ix86_builtins[IX86_BUILTIN_TRUNCPD];
27438 else if (out_n == 4 && in_n == 4)
27439 return ix86_builtins[IX86_BUILTIN_TRUNCPD256];
27440 }
27441 break;
27442
27443 case BUILT_IN_TRUNCF:
27444 /* The round insn does not trap on denormals. */
27445 if (flag_trapping_math || !TARGET_ROUND)
27446 break;
27447
27448 if (out_mode == SFmode && in_mode == SFmode)
27449 {
27450 if (out_n == 4 && in_n == 4)
27451 return ix86_builtins[IX86_BUILTIN_TRUNCPS];
27452 else if (out_n == 8 && in_n == 8)
27453 return ix86_builtins[IX86_BUILTIN_TRUNCPS256];
27454 }
27455 break;
27456
27457 case BUILT_IN_RINT:
27458 /* The round insn does not trap on denormals. */
27459 if (flag_trapping_math || !TARGET_ROUND)
27460 break;
27461
27462 if (out_mode == DFmode && in_mode == DFmode)
27463 {
27464 if (out_n == 2 && in_n == 2)
27465 return ix86_builtins[IX86_BUILTIN_RINTPD];
27466 else if (out_n == 4 && in_n == 4)
27467 return ix86_builtins[IX86_BUILTIN_RINTPD256];
27468 }
27469 break;
27470
27471 case BUILT_IN_RINTF:
27472 /* The round insn does not trap on denormals. */
27473 if (flag_trapping_math || !TARGET_ROUND)
27474 break;
27475
27476 if (out_mode == SFmode && in_mode == SFmode)
27477 {
27478 if (out_n == 4 && in_n == 4)
27479 return ix86_builtins[IX86_BUILTIN_RINTPS];
27480 else if (out_n == 8 && in_n == 8)
27481 return ix86_builtins[IX86_BUILTIN_RINTPS256];
27482 }
27483 break;
27484
27485 case BUILT_IN_FMA:
27486 if (out_mode == DFmode && in_mode == DFmode)
27487 {
27488 if (out_n == 2 && in_n == 2)
27489 return ix86_builtins[IX86_BUILTIN_VFMADDPD];
27490 if (out_n == 4 && in_n == 4)
27491 return ix86_builtins[IX86_BUILTIN_VFMADDPD256];
27492 }
27493 break;
27494
27495 case BUILT_IN_FMAF:
27496 if (out_mode == SFmode && in_mode == SFmode)
27497 {
27498 if (out_n == 4 && in_n == 4)
27499 return ix86_builtins[IX86_BUILTIN_VFMADDPS];
27500 if (out_n == 8 && in_n == 8)
27501 return ix86_builtins[IX86_BUILTIN_VFMADDPS256];
27502 }
27503 break;
27504
27505 default:
27506 break;
27507 }
27508
27509 /* Dispatch to a handler for a vectorization library. */
27510 if (ix86_veclib_handler)
27511 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
27512 type_in);
27513
27514 return NULL_TREE;
27515 }
27516
27517 /* Handler for an SVML-style interface to
27518 a library with vectorized intrinsics. */
27519
27520 static tree
27521 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
27522 {
27523 char name[20];
27524 tree fntype, new_fndecl, args;
27525 unsigned arity;
27526 const char *bname;
27527 enum machine_mode el_mode, in_mode;
27528 int n, in_n;
27529
27530 /* The SVML is suitable for unsafe math only. */
27531 if (!flag_unsafe_math_optimizations)
27532 return NULL_TREE;
27533
27534 el_mode = TYPE_MODE (TREE_TYPE (type_out));
27535 n = TYPE_VECTOR_SUBPARTS (type_out);
27536 in_mode = TYPE_MODE (TREE_TYPE (type_in));
27537 in_n = TYPE_VECTOR_SUBPARTS (type_in);
27538 if (el_mode != in_mode
27539 || n != in_n)
27540 return NULL_TREE;
27541
27542 switch (fn)
27543 {
27544 case BUILT_IN_EXP:
27545 case BUILT_IN_LOG:
27546 case BUILT_IN_LOG10:
27547 case BUILT_IN_POW:
27548 case BUILT_IN_TANH:
27549 case BUILT_IN_TAN:
27550 case BUILT_IN_ATAN:
27551 case BUILT_IN_ATAN2:
27552 case BUILT_IN_ATANH:
27553 case BUILT_IN_CBRT:
27554 case BUILT_IN_SINH:
27555 case BUILT_IN_SIN:
27556 case BUILT_IN_ASINH:
27557 case BUILT_IN_ASIN:
27558 case BUILT_IN_COSH:
27559 case BUILT_IN_COS:
27560 case BUILT_IN_ACOSH:
27561 case BUILT_IN_ACOS:
27562 if (el_mode != DFmode || n != 2)
27563 return NULL_TREE;
27564 break;
27565
27566 case BUILT_IN_EXPF:
27567 case BUILT_IN_LOGF:
27568 case BUILT_IN_LOG10F:
27569 case BUILT_IN_POWF:
27570 case BUILT_IN_TANHF:
27571 case BUILT_IN_TANF:
27572 case BUILT_IN_ATANF:
27573 case BUILT_IN_ATAN2F:
27574 case BUILT_IN_ATANHF:
27575 case BUILT_IN_CBRTF:
27576 case BUILT_IN_SINHF:
27577 case BUILT_IN_SINF:
27578 case BUILT_IN_ASINHF:
27579 case BUILT_IN_ASINF:
27580 case BUILT_IN_COSHF:
27581 case BUILT_IN_COSF:
27582 case BUILT_IN_ACOSHF:
27583 case BUILT_IN_ACOSF:
27584 if (el_mode != SFmode || n != 4)
27585 return NULL_TREE;
27586 break;
27587
27588 default:
27589 return NULL_TREE;
27590 }
27591
27592 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
27593
27594 if (fn == BUILT_IN_LOGF)
27595 strcpy (name, "vmlsLn4");
27596 else if (fn == BUILT_IN_LOG)
27597 strcpy (name, "vmldLn2");
27598 else if (n == 4)
27599 {
27600 sprintf (name, "vmls%s", bname+10);
27601 name[strlen (name)-1] = '4';
27602 }
27603 else
27604 sprintf (name, "vmld%s2", bname+10);
27605
27606 /* Convert to uppercase. */
27607 name[4] &= ~0x20;
27608
27609 arity = 0;
27610 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
27611 args = TREE_CHAIN (args))
27612 arity++;
27613
27614 if (arity == 1)
27615 fntype = build_function_type_list (type_out, type_in, NULL);
27616 else
27617 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
27618
27619 /* Build a function declaration for the vectorized function. */
27620 new_fndecl = build_decl (BUILTINS_LOCATION,
27621 FUNCTION_DECL, get_identifier (name), fntype);
27622 TREE_PUBLIC (new_fndecl) = 1;
27623 DECL_EXTERNAL (new_fndecl) = 1;
27624 DECL_IS_NOVOPS (new_fndecl) = 1;
27625 TREE_READONLY (new_fndecl) = 1;
27626
27627 return new_fndecl;
27628 }
27629
27630 /* Handler for an ACML-style interface to
27631 a library with vectorized intrinsics. */
27632
27633 static tree
27634 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
27635 {
27636 char name[20] = "__vr.._";
27637 tree fntype, new_fndecl, args;
27638 unsigned arity;
27639 const char *bname;
27640 enum machine_mode el_mode, in_mode;
27641 int n, in_n;
27642
27643 /* The ACML is 64bits only and suitable for unsafe math only as
27644 it does not correctly support parts of IEEE with the required
27645 precision such as denormals. */
27646 if (!TARGET_64BIT
27647 || !flag_unsafe_math_optimizations)
27648 return NULL_TREE;
27649
27650 el_mode = TYPE_MODE (TREE_TYPE (type_out));
27651 n = TYPE_VECTOR_SUBPARTS (type_out);
27652 in_mode = TYPE_MODE (TREE_TYPE (type_in));
27653 in_n = TYPE_VECTOR_SUBPARTS (type_in);
27654 if (el_mode != in_mode
27655 || n != in_n)
27656 return NULL_TREE;
27657
27658 switch (fn)
27659 {
27660 case BUILT_IN_SIN:
27661 case BUILT_IN_COS:
27662 case BUILT_IN_EXP:
27663 case BUILT_IN_LOG:
27664 case BUILT_IN_LOG2:
27665 case BUILT_IN_LOG10:
27666 name[4] = 'd';
27667 name[5] = '2';
27668 if (el_mode != DFmode
27669 || n != 2)
27670 return NULL_TREE;
27671 break;
27672
27673 case BUILT_IN_SINF:
27674 case BUILT_IN_COSF:
27675 case BUILT_IN_EXPF:
27676 case BUILT_IN_POWF:
27677 case BUILT_IN_LOGF:
27678 case BUILT_IN_LOG2F:
27679 case BUILT_IN_LOG10F:
27680 name[4] = 's';
27681 name[5] = '4';
27682 if (el_mode != SFmode
27683 || n != 4)
27684 return NULL_TREE;
27685 break;
27686
27687 default:
27688 return NULL_TREE;
27689 }
27690
27691 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
27692 sprintf (name + 7, "%s", bname+10);
27693
27694 arity = 0;
27695 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
27696 args = TREE_CHAIN (args))
27697 arity++;
27698
27699 if (arity == 1)
27700 fntype = build_function_type_list (type_out, type_in, NULL);
27701 else
27702 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
27703
27704 /* Build a function declaration for the vectorized function. */
27705 new_fndecl = build_decl (BUILTINS_LOCATION,
27706 FUNCTION_DECL, get_identifier (name), fntype);
27707 TREE_PUBLIC (new_fndecl) = 1;
27708 DECL_EXTERNAL (new_fndecl) = 1;
27709 DECL_IS_NOVOPS (new_fndecl) = 1;
27710 TREE_READONLY (new_fndecl) = 1;
27711
27712 return new_fndecl;
27713 }
27714
27715
27716 /* Returns a decl of a function that implements conversion of an integer vector
27717 into a floating-point vector, or vice-versa. DEST_TYPE and SRC_TYPE
27718 are the types involved when converting according to CODE.
27719 Return NULL_TREE if it is not available. */
27720
27721 static tree
27722 ix86_vectorize_builtin_conversion (unsigned int code,
27723 tree dest_type, tree src_type)
27724 {
27725 if (! TARGET_SSE2)
27726 return NULL_TREE;
27727
27728 switch (code)
27729 {
27730 case FLOAT_EXPR:
27731 switch (TYPE_MODE (src_type))
27732 {
27733 case V4SImode:
27734 switch (TYPE_MODE (dest_type))
27735 {
27736 case V4SFmode:
27737 return (TYPE_UNSIGNED (src_type)
27738 ? ix86_builtins[IX86_BUILTIN_CVTUDQ2PS]
27739 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS]);
27740 case V4DFmode:
27741 return (TYPE_UNSIGNED (src_type)
27742 ? NULL_TREE
27743 : ix86_builtins[IX86_BUILTIN_CVTDQ2PD256]);
27744 default:
27745 return NULL_TREE;
27746 }
27747 break;
27748 case V8SImode:
27749 switch (TYPE_MODE (dest_type))
27750 {
27751 case V8SFmode:
27752 return (TYPE_UNSIGNED (src_type)
27753 ? NULL_TREE
27754 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS256]);
27755 default:
27756 return NULL_TREE;
27757 }
27758 break;
27759 default:
27760 return NULL_TREE;
27761 }
27762
27763 case FIX_TRUNC_EXPR:
27764 switch (TYPE_MODE (dest_type))
27765 {
27766 case V4SImode:
27767 switch (TYPE_MODE (src_type))
27768 {
27769 case V4SFmode:
27770 return (TYPE_UNSIGNED (dest_type)
27771 ? NULL_TREE
27772 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ]);
27773 case V4DFmode:
27774 return (TYPE_UNSIGNED (dest_type)
27775 ? NULL_TREE
27776 : ix86_builtins[IX86_BUILTIN_CVTTPD2DQ256]);
27777 default:
27778 return NULL_TREE;
27779 }
27780 break;
27781
27782 case V8SImode:
27783 switch (TYPE_MODE (src_type))
27784 {
27785 case V8SFmode:
27786 return (TYPE_UNSIGNED (dest_type)
27787 ? NULL_TREE
27788 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ256]);
27789 default:
27790 return NULL_TREE;
27791 }
27792 break;
27793
27794 default:
27795 return NULL_TREE;
27796 }
27797
27798 default:
27799 return NULL_TREE;
27800 }
27801
27802 return NULL_TREE;
27803 }
27804
27805 /* Returns a code for a target-specific builtin that implements
27806 reciprocal of the function, or NULL_TREE if not available. */
27807
27808 static tree
27809 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
27810 bool sqrt ATTRIBUTE_UNUSED)
27811 {
27812 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
27813 && flag_finite_math_only && !flag_trapping_math
27814 && flag_unsafe_math_optimizations))
27815 return NULL_TREE;
27816
27817 if (md_fn)
27818 /* Machine dependent builtins. */
27819 switch (fn)
27820 {
27821 /* Vectorized version of sqrt to rsqrt conversion. */
27822 case IX86_BUILTIN_SQRTPS_NR:
27823 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
27824
27825 case IX86_BUILTIN_SQRTPS_NR256:
27826 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR256];
27827
27828 default:
27829 return NULL_TREE;
27830 }
27831 else
27832 /* Normal builtins. */
27833 switch (fn)
27834 {
27835 /* Sqrt to rsqrt conversion. */
27836 case BUILT_IN_SQRTF:
27837 return ix86_builtins[IX86_BUILTIN_RSQRTF];
27838
27839 default:
27840 return NULL_TREE;
27841 }
27842 }
27843 \f
27844 /* Helper for avx_vpermilps256_operand et al. This is also used by
27845 the expansion functions to turn the parallel back into a mask.
27846 The return value is 0 for no match and the imm8+1 for a match. */
27847
27848 int
27849 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
27850 {
27851 unsigned i, nelt = GET_MODE_NUNITS (mode);
27852 unsigned mask = 0;
27853 unsigned char ipar[8];
27854
27855 if (XVECLEN (par, 0) != (int) nelt)
27856 return 0;
27857
27858 /* Validate that all of the elements are constants, and not totally
27859 out of range. Copy the data into an integral array to make the
27860 subsequent checks easier. */
27861 for (i = 0; i < nelt; ++i)
27862 {
27863 rtx er = XVECEXP (par, 0, i);
27864 unsigned HOST_WIDE_INT ei;
27865
27866 if (!CONST_INT_P (er))
27867 return 0;
27868 ei = INTVAL (er);
27869 if (ei >= nelt)
27870 return 0;
27871 ipar[i] = ei;
27872 }
27873
27874 switch (mode)
27875 {
27876 case V4DFmode:
27877 /* In the 256-bit DFmode case, we can only move elements within
27878 a 128-bit lane. */
27879 for (i = 0; i < 2; ++i)
27880 {
27881 if (ipar[i] >= 2)
27882 return 0;
27883 mask |= ipar[i] << i;
27884 }
27885 for (i = 2; i < 4; ++i)
27886 {
27887 if (ipar[i] < 2)
27888 return 0;
27889 mask |= (ipar[i] - 2) << i;
27890 }
27891 break;
27892
27893 case V8SFmode:
27894 /* In the 256-bit SFmode case, we have full freedom of movement
27895 within the low 128-bit lane, but the high 128-bit lane must
27896 mirror the exact same pattern. */
27897 for (i = 0; i < 4; ++i)
27898 if (ipar[i] + 4 != ipar[i + 4])
27899 return 0;
27900 nelt = 4;
27901 /* FALLTHRU */
27902
27903 case V2DFmode:
27904 case V4SFmode:
27905 /* In the 128-bit case, we've full freedom in the placement of
27906 the elements from the source operand. */
27907 for (i = 0; i < nelt; ++i)
27908 mask |= ipar[i] << (i * (nelt / 2));
27909 break;
27910
27911 default:
27912 gcc_unreachable ();
27913 }
27914
27915 /* Make sure success has a non-zero value by adding one. */
27916 return mask + 1;
27917 }
27918
27919 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
27920 the expansion functions to turn the parallel back into a mask.
27921 The return value is 0 for no match and the imm8+1 for a match. */
27922
27923 int
27924 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
27925 {
27926 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
27927 unsigned mask = 0;
27928 unsigned char ipar[8];
27929
27930 if (XVECLEN (par, 0) != (int) nelt)
27931 return 0;
27932
27933 /* Validate that all of the elements are constants, and not totally
27934 out of range. Copy the data into an integral array to make the
27935 subsequent checks easier. */
27936 for (i = 0; i < nelt; ++i)
27937 {
27938 rtx er = XVECEXP (par, 0, i);
27939 unsigned HOST_WIDE_INT ei;
27940
27941 if (!CONST_INT_P (er))
27942 return 0;
27943 ei = INTVAL (er);
27944 if (ei >= 2 * nelt)
27945 return 0;
27946 ipar[i] = ei;
27947 }
27948
27949 /* Validate that the halves of the permute are halves. */
27950 for (i = 0; i < nelt2 - 1; ++i)
27951 if (ipar[i] + 1 != ipar[i + 1])
27952 return 0;
27953 for (i = nelt2; i < nelt - 1; ++i)
27954 if (ipar[i] + 1 != ipar[i + 1])
27955 return 0;
27956
27957 /* Reconstruct the mask. */
27958 for (i = 0; i < 2; ++i)
27959 {
27960 unsigned e = ipar[i * nelt2];
27961 if (e % nelt2)
27962 return 0;
27963 e /= nelt2;
27964 mask |= e << (i * 4);
27965 }
27966
27967 /* Make sure success has a non-zero value by adding one. */
27968 return mask + 1;
27969 }
27970 \f
27971
27972 /* Store OPERAND to the memory after reload is completed. This means
27973 that we can't easily use assign_stack_local. */
27974 rtx
27975 ix86_force_to_memory (enum machine_mode mode, rtx operand)
27976 {
27977 rtx result;
27978
27979 gcc_assert (reload_completed);
27980 if (ix86_using_red_zone ())
27981 {
27982 result = gen_rtx_MEM (mode,
27983 gen_rtx_PLUS (Pmode,
27984 stack_pointer_rtx,
27985 GEN_INT (-RED_ZONE_SIZE)));
27986 emit_move_insn (result, operand);
27987 }
27988 else if (TARGET_64BIT)
27989 {
27990 switch (mode)
27991 {
27992 case HImode:
27993 case SImode:
27994 operand = gen_lowpart (DImode, operand);
27995 /* FALLTHRU */
27996 case DImode:
27997 emit_insn (
27998 gen_rtx_SET (VOIDmode,
27999 gen_rtx_MEM (DImode,
28000 gen_rtx_PRE_DEC (DImode,
28001 stack_pointer_rtx)),
28002 operand));
28003 break;
28004 default:
28005 gcc_unreachable ();
28006 }
28007 result = gen_rtx_MEM (mode, stack_pointer_rtx);
28008 }
28009 else
28010 {
28011 switch (mode)
28012 {
28013 case DImode:
28014 {
28015 rtx operands[2];
28016 split_double_mode (mode, &operand, 1, operands, operands + 1);
28017 emit_insn (
28018 gen_rtx_SET (VOIDmode,
28019 gen_rtx_MEM (SImode,
28020 gen_rtx_PRE_DEC (Pmode,
28021 stack_pointer_rtx)),
28022 operands[1]));
28023 emit_insn (
28024 gen_rtx_SET (VOIDmode,
28025 gen_rtx_MEM (SImode,
28026 gen_rtx_PRE_DEC (Pmode,
28027 stack_pointer_rtx)),
28028 operands[0]));
28029 }
28030 break;
28031 case HImode:
28032 /* Store HImodes as SImodes. */
28033 operand = gen_lowpart (SImode, operand);
28034 /* FALLTHRU */
28035 case SImode:
28036 emit_insn (
28037 gen_rtx_SET (VOIDmode,
28038 gen_rtx_MEM (GET_MODE (operand),
28039 gen_rtx_PRE_DEC (SImode,
28040 stack_pointer_rtx)),
28041 operand));
28042 break;
28043 default:
28044 gcc_unreachable ();
28045 }
28046 result = gen_rtx_MEM (mode, stack_pointer_rtx);
28047 }
28048 return result;
28049 }
28050
28051 /* Free operand from the memory. */
28052 void
28053 ix86_free_from_memory (enum machine_mode mode)
28054 {
28055 if (!ix86_using_red_zone ())
28056 {
28057 int size;
28058
28059 if (mode == DImode || TARGET_64BIT)
28060 size = 8;
28061 else
28062 size = 4;
28063 /* Use LEA to deallocate stack space. In peephole2 it will be converted
28064 to pop or add instruction if registers are available. */
28065 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
28066 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
28067 GEN_INT (size))));
28068 }
28069 }
28070
28071 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
28072
28073 Put float CONST_DOUBLE in the constant pool instead of fp regs.
28074 QImode must go into class Q_REGS.
28075 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
28076 movdf to do mem-to-mem moves through integer regs. */
28077
28078 static reg_class_t
28079 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
28080 {
28081 enum machine_mode mode = GET_MODE (x);
28082
28083 /* We're only allowed to return a subclass of CLASS. Many of the
28084 following checks fail for NO_REGS, so eliminate that early. */
28085 if (regclass == NO_REGS)
28086 return NO_REGS;
28087
28088 /* All classes can load zeros. */
28089 if (x == CONST0_RTX (mode))
28090 return regclass;
28091
28092 /* Force constants into memory if we are loading a (nonzero) constant into
28093 an MMX or SSE register. This is because there are no MMX/SSE instructions
28094 to load from a constant. */
28095 if (CONSTANT_P (x)
28096 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
28097 return NO_REGS;
28098
28099 /* Prefer SSE regs only, if we can use them for math. */
28100 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
28101 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
28102
28103 /* Floating-point constants need more complex checks. */
28104 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
28105 {
28106 /* General regs can load everything. */
28107 if (reg_class_subset_p (regclass, GENERAL_REGS))
28108 return regclass;
28109
28110 /* Floats can load 0 and 1 plus some others. Note that we eliminated
28111 zero above. We only want to wind up preferring 80387 registers if
28112 we plan on doing computation with them. */
28113 if (TARGET_80387
28114 && standard_80387_constant_p (x) > 0)
28115 {
28116 /* Limit class to non-sse. */
28117 if (regclass == FLOAT_SSE_REGS)
28118 return FLOAT_REGS;
28119 if (regclass == FP_TOP_SSE_REGS)
28120 return FP_TOP_REG;
28121 if (regclass == FP_SECOND_SSE_REGS)
28122 return FP_SECOND_REG;
28123 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
28124 return regclass;
28125 }
28126
28127 return NO_REGS;
28128 }
28129
28130 /* Generally when we see PLUS here, it's the function invariant
28131 (plus soft-fp const_int). Which can only be computed into general
28132 regs. */
28133 if (GET_CODE (x) == PLUS)
28134 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
28135
28136 /* QImode constants are easy to load, but non-constant QImode data
28137 must go into Q_REGS. */
28138 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
28139 {
28140 if (reg_class_subset_p (regclass, Q_REGS))
28141 return regclass;
28142 if (reg_class_subset_p (Q_REGS, regclass))
28143 return Q_REGS;
28144 return NO_REGS;
28145 }
28146
28147 return regclass;
28148 }
28149
28150 /* Discourage putting floating-point values in SSE registers unless
28151 SSE math is being used, and likewise for the 387 registers. */
28152 static reg_class_t
28153 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
28154 {
28155 enum machine_mode mode = GET_MODE (x);
28156
28157 /* Restrict the output reload class to the register bank that we are doing
28158 math on. If we would like not to return a subset of CLASS, reject this
28159 alternative: if reload cannot do this, it will still use its choice. */
28160 mode = GET_MODE (x);
28161 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
28162 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
28163
28164 if (X87_FLOAT_MODE_P (mode))
28165 {
28166 if (regclass == FP_TOP_SSE_REGS)
28167 return FP_TOP_REG;
28168 else if (regclass == FP_SECOND_SSE_REGS)
28169 return FP_SECOND_REG;
28170 else
28171 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
28172 }
28173
28174 return regclass;
28175 }
28176
28177 static reg_class_t
28178 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
28179 enum machine_mode mode,
28180 secondary_reload_info *sri ATTRIBUTE_UNUSED)
28181 {
28182 /* QImode spills from non-QI registers require
28183 intermediate register on 32bit targets. */
28184 if (!TARGET_64BIT
28185 && !in_p && mode == QImode
28186 && (rclass == GENERAL_REGS
28187 || rclass == LEGACY_REGS
28188 || rclass == INDEX_REGS))
28189 {
28190 int regno;
28191
28192 if (REG_P (x))
28193 regno = REGNO (x);
28194 else
28195 regno = -1;
28196
28197 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
28198 regno = true_regnum (x);
28199
28200 /* Return Q_REGS if the operand is in memory. */
28201 if (regno == -1)
28202 return Q_REGS;
28203 }
28204
28205 /* This condition handles corner case where an expression involving
28206 pointers gets vectorized. We're trying to use the address of a
28207 stack slot as a vector initializer.
28208
28209 (set (reg:V2DI 74 [ vect_cst_.2 ])
28210 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
28211
28212 Eventually frame gets turned into sp+offset like this:
28213
28214 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
28215 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
28216 (const_int 392 [0x188]))))
28217
28218 That later gets turned into:
28219
28220 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
28221 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
28222 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
28223
28224 We'll have the following reload recorded:
28225
28226 Reload 0: reload_in (DI) =
28227 (plus:DI (reg/f:DI 7 sp)
28228 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
28229 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
28230 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
28231 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
28232 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
28233 reload_reg_rtx: (reg:V2DI 22 xmm1)
28234
28235 Which isn't going to work since SSE instructions can't handle scalar
28236 additions. Returning GENERAL_REGS forces the addition into integer
28237 register and reload can handle subsequent reloads without problems. */
28238
28239 if (in_p && GET_CODE (x) == PLUS
28240 && SSE_CLASS_P (rclass)
28241 && SCALAR_INT_MODE_P (mode))
28242 return GENERAL_REGS;
28243
28244 return NO_REGS;
28245 }
28246
28247 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
28248
28249 static bool
28250 ix86_class_likely_spilled_p (reg_class_t rclass)
28251 {
28252 switch (rclass)
28253 {
28254 case AREG:
28255 case DREG:
28256 case CREG:
28257 case BREG:
28258 case AD_REGS:
28259 case SIREG:
28260 case DIREG:
28261 case SSE_FIRST_REG:
28262 case FP_TOP_REG:
28263 case FP_SECOND_REG:
28264 return true;
28265
28266 default:
28267 break;
28268 }
28269
28270 return false;
28271 }
28272
28273 /* If we are copying between general and FP registers, we need a memory
28274 location. The same is true for SSE and MMX registers.
28275
28276 To optimize register_move_cost performance, allow inline variant.
28277
28278 The macro can't work reliably when one of the CLASSES is class containing
28279 registers from multiple units (SSE, MMX, integer). We avoid this by never
28280 combining those units in single alternative in the machine description.
28281 Ensure that this constraint holds to avoid unexpected surprises.
28282
28283 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
28284 enforce these sanity checks. */
28285
28286 static inline bool
28287 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
28288 enum machine_mode mode, int strict)
28289 {
28290 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
28291 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
28292 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
28293 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
28294 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
28295 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
28296 {
28297 gcc_assert (!strict);
28298 return true;
28299 }
28300
28301 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
28302 return true;
28303
28304 /* ??? This is a lie. We do have moves between mmx/general, and for
28305 mmx/sse2. But by saying we need secondary memory we discourage the
28306 register allocator from using the mmx registers unless needed. */
28307 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
28308 return true;
28309
28310 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
28311 {
28312 /* SSE1 doesn't have any direct moves from other classes. */
28313 if (!TARGET_SSE2)
28314 return true;
28315
28316 /* If the target says that inter-unit moves are more expensive
28317 than moving through memory, then don't generate them. */
28318 if (!TARGET_INTER_UNIT_MOVES)
28319 return true;
28320
28321 /* Between SSE and general, we have moves no larger than word size. */
28322 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
28323 return true;
28324 }
28325
28326 return false;
28327 }
28328
28329 bool
28330 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
28331 enum machine_mode mode, int strict)
28332 {
28333 return inline_secondary_memory_needed (class1, class2, mode, strict);
28334 }
28335
28336 /* Implement the TARGET_CLASS_MAX_NREGS hook.
28337
28338 On the 80386, this is the size of MODE in words,
28339 except in the FP regs, where a single reg is always enough. */
28340
28341 static unsigned char
28342 ix86_class_max_nregs (reg_class_t rclass, enum machine_mode mode)
28343 {
28344 if (MAYBE_INTEGER_CLASS_P (rclass))
28345 {
28346 if (mode == XFmode)
28347 return (TARGET_64BIT ? 2 : 3);
28348 else if (mode == XCmode)
28349 return (TARGET_64BIT ? 4 : 6);
28350 else
28351 return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
28352 }
28353 else
28354 {
28355 if (COMPLEX_MODE_P (mode))
28356 return 2;
28357 else
28358 return 1;
28359 }
28360 }
28361
28362 /* Return true if the registers in CLASS cannot represent the change from
28363 modes FROM to TO. */
28364
28365 bool
28366 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
28367 enum reg_class regclass)
28368 {
28369 if (from == to)
28370 return false;
28371
28372 /* x87 registers can't do subreg at all, as all values are reformatted
28373 to extended precision. */
28374 if (MAYBE_FLOAT_CLASS_P (regclass))
28375 return true;
28376
28377 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
28378 {
28379 /* Vector registers do not support QI or HImode loads. If we don't
28380 disallow a change to these modes, reload will assume it's ok to
28381 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
28382 the vec_dupv4hi pattern. */
28383 if (GET_MODE_SIZE (from) < 4)
28384 return true;
28385
28386 /* Vector registers do not support subreg with nonzero offsets, which
28387 are otherwise valid for integer registers. Since we can't see
28388 whether we have a nonzero offset from here, prohibit all
28389 nonparadoxical subregs changing size. */
28390 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
28391 return true;
28392 }
28393
28394 return false;
28395 }
28396
28397 /* Return the cost of moving data of mode M between a
28398 register and memory. A value of 2 is the default; this cost is
28399 relative to those in `REGISTER_MOVE_COST'.
28400
28401 This function is used extensively by register_move_cost that is used to
28402 build tables at startup. Make it inline in this case.
28403 When IN is 2, return maximum of in and out move cost.
28404
28405 If moving between registers and memory is more expensive than
28406 between two registers, you should define this macro to express the
28407 relative cost.
28408
28409 Model also increased moving costs of QImode registers in non
28410 Q_REGS classes.
28411 */
28412 static inline int
28413 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
28414 int in)
28415 {
28416 int cost;
28417 if (FLOAT_CLASS_P (regclass))
28418 {
28419 int index;
28420 switch (mode)
28421 {
28422 case SFmode:
28423 index = 0;
28424 break;
28425 case DFmode:
28426 index = 1;
28427 break;
28428 case XFmode:
28429 index = 2;
28430 break;
28431 default:
28432 return 100;
28433 }
28434 if (in == 2)
28435 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
28436 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
28437 }
28438 if (SSE_CLASS_P (regclass))
28439 {
28440 int index;
28441 switch (GET_MODE_SIZE (mode))
28442 {
28443 case 4:
28444 index = 0;
28445 break;
28446 case 8:
28447 index = 1;
28448 break;
28449 case 16:
28450 index = 2;
28451 break;
28452 default:
28453 return 100;
28454 }
28455 if (in == 2)
28456 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
28457 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
28458 }
28459 if (MMX_CLASS_P (regclass))
28460 {
28461 int index;
28462 switch (GET_MODE_SIZE (mode))
28463 {
28464 case 4:
28465 index = 0;
28466 break;
28467 case 8:
28468 index = 1;
28469 break;
28470 default:
28471 return 100;
28472 }
28473 if (in)
28474 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
28475 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
28476 }
28477 switch (GET_MODE_SIZE (mode))
28478 {
28479 case 1:
28480 if (Q_CLASS_P (regclass) || TARGET_64BIT)
28481 {
28482 if (!in)
28483 return ix86_cost->int_store[0];
28484 if (TARGET_PARTIAL_REG_DEPENDENCY
28485 && optimize_function_for_speed_p (cfun))
28486 cost = ix86_cost->movzbl_load;
28487 else
28488 cost = ix86_cost->int_load[0];
28489 if (in == 2)
28490 return MAX (cost, ix86_cost->int_store[0]);
28491 return cost;
28492 }
28493 else
28494 {
28495 if (in == 2)
28496 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
28497 if (in)
28498 return ix86_cost->movzbl_load;
28499 else
28500 return ix86_cost->int_store[0] + 4;
28501 }
28502 break;
28503 case 2:
28504 if (in == 2)
28505 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
28506 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
28507 default:
28508 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
28509 if (mode == TFmode)
28510 mode = XFmode;
28511 if (in == 2)
28512 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
28513 else if (in)
28514 cost = ix86_cost->int_load[2];
28515 else
28516 cost = ix86_cost->int_store[2];
28517 return (cost * (((int) GET_MODE_SIZE (mode)
28518 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
28519 }
28520 }
28521
28522 static int
28523 ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
28524 bool in)
28525 {
28526 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
28527 }
28528
28529
28530 /* Return the cost of moving data from a register in class CLASS1 to
28531 one in class CLASS2.
28532
28533 It is not required that the cost always equal 2 when FROM is the same as TO;
28534 on some machines it is expensive to move between registers if they are not
28535 general registers. */
28536
28537 static int
28538 ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
28539 reg_class_t class2_i)
28540 {
28541 enum reg_class class1 = (enum reg_class) class1_i;
28542 enum reg_class class2 = (enum reg_class) class2_i;
28543
28544 /* In case we require secondary memory, compute cost of the store followed
28545 by load. In order to avoid bad register allocation choices, we need
28546 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
28547
28548 if (inline_secondary_memory_needed (class1, class2, mode, 0))
28549 {
28550 int cost = 1;
28551
28552 cost += inline_memory_move_cost (mode, class1, 2);
28553 cost += inline_memory_move_cost (mode, class2, 2);
28554
28555 /* In case of copying from general_purpose_register we may emit multiple
28556 stores followed by single load causing memory size mismatch stall.
28557 Count this as arbitrarily high cost of 20. */
28558 if (targetm.class_max_nregs (class1, mode)
28559 > targetm.class_max_nregs (class2, mode))
28560 cost += 20;
28561
28562 /* In the case of FP/MMX moves, the registers actually overlap, and we
28563 have to switch modes in order to treat them differently. */
28564 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
28565 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
28566 cost += 20;
28567
28568 return cost;
28569 }
28570
28571 /* Moves between SSE/MMX and integer unit are expensive. */
28572 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
28573 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
28574
28575 /* ??? By keeping returned value relatively high, we limit the number
28576 of moves between integer and MMX/SSE registers for all targets.
28577 Additionally, high value prevents problem with x86_modes_tieable_p(),
28578 where integer modes in MMX/SSE registers are not tieable
28579 because of missing QImode and HImode moves to, from or between
28580 MMX/SSE registers. */
28581 return MAX (8, ix86_cost->mmxsse_to_integer);
28582
28583 if (MAYBE_FLOAT_CLASS_P (class1))
28584 return ix86_cost->fp_move;
28585 if (MAYBE_SSE_CLASS_P (class1))
28586 return ix86_cost->sse_move;
28587 if (MAYBE_MMX_CLASS_P (class1))
28588 return ix86_cost->mmx_move;
28589 return 2;
28590 }
28591
28592 /* Return TRUE if hard register REGNO can hold a value of machine-mode
28593 MODE. */
28594
28595 bool
28596 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
28597 {
28598 /* Flags and only flags can only hold CCmode values. */
28599 if (CC_REGNO_P (regno))
28600 return GET_MODE_CLASS (mode) == MODE_CC;
28601 if (GET_MODE_CLASS (mode) == MODE_CC
28602 || GET_MODE_CLASS (mode) == MODE_RANDOM
28603 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
28604 return false;
28605 if (FP_REGNO_P (regno))
28606 return VALID_FP_MODE_P (mode);
28607 if (SSE_REGNO_P (regno))
28608 {
28609 /* We implement the move patterns for all vector modes into and
28610 out of SSE registers, even when no operation instructions
28611 are available. OImode move is available only when AVX is
28612 enabled. */
28613 return ((TARGET_AVX && mode == OImode)
28614 || VALID_AVX256_REG_MODE (mode)
28615 || VALID_SSE_REG_MODE (mode)
28616 || VALID_SSE2_REG_MODE (mode)
28617 || VALID_MMX_REG_MODE (mode)
28618 || VALID_MMX_REG_MODE_3DNOW (mode));
28619 }
28620 if (MMX_REGNO_P (regno))
28621 {
28622 /* We implement the move patterns for 3DNOW modes even in MMX mode,
28623 so if the register is available at all, then we can move data of
28624 the given mode into or out of it. */
28625 return (VALID_MMX_REG_MODE (mode)
28626 || VALID_MMX_REG_MODE_3DNOW (mode));
28627 }
28628
28629 if (mode == QImode)
28630 {
28631 /* Take care for QImode values - they can be in non-QI regs,
28632 but then they do cause partial register stalls. */
28633 if (regno <= BX_REG || TARGET_64BIT)
28634 return true;
28635 if (!TARGET_PARTIAL_REG_STALL)
28636 return true;
28637 return !can_create_pseudo_p ();
28638 }
28639 /* We handle both integer and floats in the general purpose registers. */
28640 else if (VALID_INT_MODE_P (mode))
28641 return true;
28642 else if (VALID_FP_MODE_P (mode))
28643 return true;
28644 else if (VALID_DFP_MODE_P (mode))
28645 return true;
28646 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
28647 on to use that value in smaller contexts, this can easily force a
28648 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
28649 supporting DImode, allow it. */
28650 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
28651 return true;
28652
28653 return false;
28654 }
28655
28656 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
28657 tieable integer mode. */
28658
28659 static bool
28660 ix86_tieable_integer_mode_p (enum machine_mode mode)
28661 {
28662 switch (mode)
28663 {
28664 case HImode:
28665 case SImode:
28666 return true;
28667
28668 case QImode:
28669 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
28670
28671 case DImode:
28672 return TARGET_64BIT;
28673
28674 default:
28675 return false;
28676 }
28677 }
28678
28679 /* Return true if MODE1 is accessible in a register that can hold MODE2
28680 without copying. That is, all register classes that can hold MODE2
28681 can also hold MODE1. */
28682
28683 bool
28684 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
28685 {
28686 if (mode1 == mode2)
28687 return true;
28688
28689 if (ix86_tieable_integer_mode_p (mode1)
28690 && ix86_tieable_integer_mode_p (mode2))
28691 return true;
28692
28693 /* MODE2 being XFmode implies fp stack or general regs, which means we
28694 can tie any smaller floating point modes to it. Note that we do not
28695 tie this with TFmode. */
28696 if (mode2 == XFmode)
28697 return mode1 == SFmode || mode1 == DFmode;
28698
28699 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
28700 that we can tie it with SFmode. */
28701 if (mode2 == DFmode)
28702 return mode1 == SFmode;
28703
28704 /* If MODE2 is only appropriate for an SSE register, then tie with
28705 any other mode acceptable to SSE registers. */
28706 if (GET_MODE_SIZE (mode2) == 16
28707 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
28708 return (GET_MODE_SIZE (mode1) == 16
28709 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
28710
28711 /* If MODE2 is appropriate for an MMX register, then tie
28712 with any other mode acceptable to MMX registers. */
28713 if (GET_MODE_SIZE (mode2) == 8
28714 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
28715 return (GET_MODE_SIZE (mode1) == 8
28716 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
28717
28718 return false;
28719 }
28720
28721 /* Compute a (partial) cost for rtx X. Return true if the complete
28722 cost has been computed, and false if subexpressions should be
28723 scanned. In either case, *TOTAL contains the cost result. */
28724
28725 static bool
28726 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total, bool speed)
28727 {
28728 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
28729 enum machine_mode mode = GET_MODE (x);
28730 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
28731
28732 switch (code)
28733 {
28734 case CONST_INT:
28735 case CONST:
28736 case LABEL_REF:
28737 case SYMBOL_REF:
28738 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
28739 *total = 3;
28740 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
28741 *total = 2;
28742 else if (flag_pic && SYMBOLIC_CONST (x)
28743 && (!TARGET_64BIT
28744 || (!GET_CODE (x) != LABEL_REF
28745 && (GET_CODE (x) != SYMBOL_REF
28746 || !SYMBOL_REF_LOCAL_P (x)))))
28747 *total = 1;
28748 else
28749 *total = 0;
28750 return true;
28751
28752 case CONST_DOUBLE:
28753 if (mode == VOIDmode)
28754 *total = 0;
28755 else
28756 switch (standard_80387_constant_p (x))
28757 {
28758 case 1: /* 0.0 */
28759 *total = 1;
28760 break;
28761 default: /* Other constants */
28762 *total = 2;
28763 break;
28764 case 0:
28765 case -1:
28766 /* Start with (MEM (SYMBOL_REF)), since that's where
28767 it'll probably end up. Add a penalty for size. */
28768 *total = (COSTS_N_INSNS (1)
28769 + (flag_pic != 0 && !TARGET_64BIT)
28770 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
28771 break;
28772 }
28773 return true;
28774
28775 case ZERO_EXTEND:
28776 /* The zero extensions is often completely free on x86_64, so make
28777 it as cheap as possible. */
28778 if (TARGET_64BIT && mode == DImode
28779 && GET_MODE (XEXP (x, 0)) == SImode)
28780 *total = 1;
28781 else if (TARGET_ZERO_EXTEND_WITH_AND)
28782 *total = cost->add;
28783 else
28784 *total = cost->movzx;
28785 return false;
28786
28787 case SIGN_EXTEND:
28788 *total = cost->movsx;
28789 return false;
28790
28791 case ASHIFT:
28792 if (CONST_INT_P (XEXP (x, 1))
28793 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
28794 {
28795 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
28796 if (value == 1)
28797 {
28798 *total = cost->add;
28799 return false;
28800 }
28801 if ((value == 2 || value == 3)
28802 && cost->lea <= cost->shift_const)
28803 {
28804 *total = cost->lea;
28805 return false;
28806 }
28807 }
28808 /* FALLTHRU */
28809
28810 case ROTATE:
28811 case ASHIFTRT:
28812 case LSHIFTRT:
28813 case ROTATERT:
28814 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
28815 {
28816 if (CONST_INT_P (XEXP (x, 1)))
28817 {
28818 if (INTVAL (XEXP (x, 1)) > 32)
28819 *total = cost->shift_const + COSTS_N_INSNS (2);
28820 else
28821 *total = cost->shift_const * 2;
28822 }
28823 else
28824 {
28825 if (GET_CODE (XEXP (x, 1)) == AND)
28826 *total = cost->shift_var * 2;
28827 else
28828 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
28829 }
28830 }
28831 else
28832 {
28833 if (CONST_INT_P (XEXP (x, 1)))
28834 *total = cost->shift_const;
28835 else
28836 *total = cost->shift_var;
28837 }
28838 return false;
28839
28840 case FMA:
28841 {
28842 rtx sub;
28843
28844 gcc_assert (FLOAT_MODE_P (mode));
28845 gcc_assert (TARGET_FMA || TARGET_FMA4);
28846
28847 /* ??? SSE scalar/vector cost should be used here. */
28848 /* ??? Bald assumption that fma has the same cost as fmul. */
28849 *total = cost->fmul;
28850 *total += rtx_cost (XEXP (x, 1), FMA, speed);
28851
28852 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
28853 sub = XEXP (x, 0);
28854 if (GET_CODE (sub) == NEG)
28855 sub = XEXP (sub, 0);
28856 *total += rtx_cost (sub, FMA, speed);
28857
28858 sub = XEXP (x, 2);
28859 if (GET_CODE (sub) == NEG)
28860 sub = XEXP (sub, 0);
28861 *total += rtx_cost (sub, FMA, speed);
28862 return true;
28863 }
28864
28865 case MULT:
28866 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
28867 {
28868 /* ??? SSE scalar cost should be used here. */
28869 *total = cost->fmul;
28870 return false;
28871 }
28872 else if (X87_FLOAT_MODE_P (mode))
28873 {
28874 *total = cost->fmul;
28875 return false;
28876 }
28877 else if (FLOAT_MODE_P (mode))
28878 {
28879 /* ??? SSE vector cost should be used here. */
28880 *total = cost->fmul;
28881 return false;
28882 }
28883 else
28884 {
28885 rtx op0 = XEXP (x, 0);
28886 rtx op1 = XEXP (x, 1);
28887 int nbits;
28888 if (CONST_INT_P (XEXP (x, 1)))
28889 {
28890 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
28891 for (nbits = 0; value != 0; value &= value - 1)
28892 nbits++;
28893 }
28894 else
28895 /* This is arbitrary. */
28896 nbits = 7;
28897
28898 /* Compute costs correctly for widening multiplication. */
28899 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
28900 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
28901 == GET_MODE_SIZE (mode))
28902 {
28903 int is_mulwiden = 0;
28904 enum machine_mode inner_mode = GET_MODE (op0);
28905
28906 if (GET_CODE (op0) == GET_CODE (op1))
28907 is_mulwiden = 1, op1 = XEXP (op1, 0);
28908 else if (CONST_INT_P (op1))
28909 {
28910 if (GET_CODE (op0) == SIGN_EXTEND)
28911 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
28912 == INTVAL (op1);
28913 else
28914 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
28915 }
28916
28917 if (is_mulwiden)
28918 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
28919 }
28920
28921 *total = (cost->mult_init[MODE_INDEX (mode)]
28922 + nbits * cost->mult_bit
28923 + rtx_cost (op0, outer_code, speed) + rtx_cost (op1, outer_code, speed));
28924
28925 return true;
28926 }
28927
28928 case DIV:
28929 case UDIV:
28930 case MOD:
28931 case UMOD:
28932 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
28933 /* ??? SSE cost should be used here. */
28934 *total = cost->fdiv;
28935 else if (X87_FLOAT_MODE_P (mode))
28936 *total = cost->fdiv;
28937 else if (FLOAT_MODE_P (mode))
28938 /* ??? SSE vector cost should be used here. */
28939 *total = cost->fdiv;
28940 else
28941 *total = cost->divide[MODE_INDEX (mode)];
28942 return false;
28943
28944 case PLUS:
28945 if (GET_MODE_CLASS (mode) == MODE_INT
28946 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
28947 {
28948 if (GET_CODE (XEXP (x, 0)) == PLUS
28949 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
28950 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
28951 && CONSTANT_P (XEXP (x, 1)))
28952 {
28953 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
28954 if (val == 2 || val == 4 || val == 8)
28955 {
28956 *total = cost->lea;
28957 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
28958 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
28959 outer_code, speed);
28960 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
28961 return true;
28962 }
28963 }
28964 else if (GET_CODE (XEXP (x, 0)) == MULT
28965 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
28966 {
28967 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
28968 if (val == 2 || val == 4 || val == 8)
28969 {
28970 *total = cost->lea;
28971 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
28972 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
28973 return true;
28974 }
28975 }
28976 else if (GET_CODE (XEXP (x, 0)) == PLUS)
28977 {
28978 *total = cost->lea;
28979 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
28980 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
28981 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
28982 return true;
28983 }
28984 }
28985 /* FALLTHRU */
28986
28987 case MINUS:
28988 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
28989 {
28990 /* ??? SSE cost should be used here. */
28991 *total = cost->fadd;
28992 return false;
28993 }
28994 else if (X87_FLOAT_MODE_P (mode))
28995 {
28996 *total = cost->fadd;
28997 return false;
28998 }
28999 else if (FLOAT_MODE_P (mode))
29000 {
29001 /* ??? SSE vector cost should be used here. */
29002 *total = cost->fadd;
29003 return false;
29004 }
29005 /* FALLTHRU */
29006
29007 case AND:
29008 case IOR:
29009 case XOR:
29010 if (!TARGET_64BIT && mode == DImode)
29011 {
29012 *total = (cost->add * 2
29013 + (rtx_cost (XEXP (x, 0), outer_code, speed)
29014 << (GET_MODE (XEXP (x, 0)) != DImode))
29015 + (rtx_cost (XEXP (x, 1), outer_code, speed)
29016 << (GET_MODE (XEXP (x, 1)) != DImode)));
29017 return true;
29018 }
29019 /* FALLTHRU */
29020
29021 case NEG:
29022 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
29023 {
29024 /* ??? SSE cost should be used here. */
29025 *total = cost->fchs;
29026 return false;
29027 }
29028 else if (X87_FLOAT_MODE_P (mode))
29029 {
29030 *total = cost->fchs;
29031 return false;
29032 }
29033 else if (FLOAT_MODE_P (mode))
29034 {
29035 /* ??? SSE vector cost should be used here. */
29036 *total = cost->fchs;
29037 return false;
29038 }
29039 /* FALLTHRU */
29040
29041 case NOT:
29042 if (!TARGET_64BIT && mode == DImode)
29043 *total = cost->add * 2;
29044 else
29045 *total = cost->add;
29046 return false;
29047
29048 case COMPARE:
29049 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
29050 && XEXP (XEXP (x, 0), 1) == const1_rtx
29051 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
29052 && XEXP (x, 1) == const0_rtx)
29053 {
29054 /* This kind of construct is implemented using test[bwl].
29055 Treat it as if we had an AND. */
29056 *total = (cost->add
29057 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed)
29058 + rtx_cost (const1_rtx, outer_code, speed));
29059 return true;
29060 }
29061 return false;
29062
29063 case FLOAT_EXTEND:
29064 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
29065 *total = 0;
29066 return false;
29067
29068 case ABS:
29069 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
29070 /* ??? SSE cost should be used here. */
29071 *total = cost->fabs;
29072 else if (X87_FLOAT_MODE_P (mode))
29073 *total = cost->fabs;
29074 else if (FLOAT_MODE_P (mode))
29075 /* ??? SSE vector cost should be used here. */
29076 *total = cost->fabs;
29077 return false;
29078
29079 case SQRT:
29080 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
29081 /* ??? SSE cost should be used here. */
29082 *total = cost->fsqrt;
29083 else if (X87_FLOAT_MODE_P (mode))
29084 *total = cost->fsqrt;
29085 else if (FLOAT_MODE_P (mode))
29086 /* ??? SSE vector cost should be used here. */
29087 *total = cost->fsqrt;
29088 return false;
29089
29090 case UNSPEC:
29091 if (XINT (x, 1) == UNSPEC_TP)
29092 *total = 0;
29093 return false;
29094
29095 case VEC_SELECT:
29096 case VEC_CONCAT:
29097 case VEC_MERGE:
29098 case VEC_DUPLICATE:
29099 /* ??? Assume all of these vector manipulation patterns are
29100 recognizable. In which case they all pretty much have the
29101 same cost. */
29102 *total = COSTS_N_INSNS (1);
29103 return true;
29104
29105 default:
29106 return false;
29107 }
29108 }
29109
29110 #if TARGET_MACHO
29111
29112 static int current_machopic_label_num;
29113
29114 /* Given a symbol name and its associated stub, write out the
29115 definition of the stub. */
29116
29117 void
29118 machopic_output_stub (FILE *file, const char *symb, const char *stub)
29119 {
29120 unsigned int length;
29121 char *binder_name, *symbol_name, lazy_ptr_name[32];
29122 int label = ++current_machopic_label_num;
29123
29124 /* For 64-bit we shouldn't get here. */
29125 gcc_assert (!TARGET_64BIT);
29126
29127 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
29128 symb = targetm.strip_name_encoding (symb);
29129
29130 length = strlen (stub);
29131 binder_name = XALLOCAVEC (char, length + 32);
29132 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
29133
29134 length = strlen (symb);
29135 symbol_name = XALLOCAVEC (char, length + 32);
29136 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
29137
29138 sprintf (lazy_ptr_name, "L%d$lz", label);
29139
29140 if (MACHOPIC_ATT_STUB)
29141 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
29142 else if (MACHOPIC_PURE)
29143 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
29144 else
29145 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
29146
29147 fprintf (file, "%s:\n", stub);
29148 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
29149
29150 if (MACHOPIC_ATT_STUB)
29151 {
29152 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
29153 }
29154 else if (MACHOPIC_PURE)
29155 {
29156 /* PIC stub. */
29157 /* 25-byte PIC stub using "CALL get_pc_thunk". */
29158 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
29159 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
29160 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
29161 label, lazy_ptr_name, label);
29162 fprintf (file, "\tjmp\t*%%ecx\n");
29163 }
29164 else
29165 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
29166
29167 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
29168 it needs no stub-binding-helper. */
29169 if (MACHOPIC_ATT_STUB)
29170 return;
29171
29172 fprintf (file, "%s:\n", binder_name);
29173
29174 if (MACHOPIC_PURE)
29175 {
29176 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
29177 fprintf (file, "\tpushl\t%%ecx\n");
29178 }
29179 else
29180 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
29181
29182 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
29183
29184 /* N.B. Keep the correspondence of these
29185 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
29186 old-pic/new-pic/non-pic stubs; altering this will break
29187 compatibility with existing dylibs. */
29188 if (MACHOPIC_PURE)
29189 {
29190 /* 25-byte PIC stub using "CALL get_pc_thunk". */
29191 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
29192 }
29193 else
29194 /* 16-byte -mdynamic-no-pic stub. */
29195 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
29196
29197 fprintf (file, "%s:\n", lazy_ptr_name);
29198 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
29199 fprintf (file, ASM_LONG "%s\n", binder_name);
29200 }
29201 #endif /* TARGET_MACHO */
29202
29203 /* Order the registers for register allocator. */
29204
29205 void
29206 x86_order_regs_for_local_alloc (void)
29207 {
29208 int pos = 0;
29209 int i;
29210
29211 /* First allocate the local general purpose registers. */
29212 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
29213 if (GENERAL_REGNO_P (i) && call_used_regs[i])
29214 reg_alloc_order [pos++] = i;
29215
29216 /* Global general purpose registers. */
29217 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
29218 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
29219 reg_alloc_order [pos++] = i;
29220
29221 /* x87 registers come first in case we are doing FP math
29222 using them. */
29223 if (!TARGET_SSE_MATH)
29224 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
29225 reg_alloc_order [pos++] = i;
29226
29227 /* SSE registers. */
29228 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
29229 reg_alloc_order [pos++] = i;
29230 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
29231 reg_alloc_order [pos++] = i;
29232
29233 /* x87 registers. */
29234 if (TARGET_SSE_MATH)
29235 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
29236 reg_alloc_order [pos++] = i;
29237
29238 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
29239 reg_alloc_order [pos++] = i;
29240
29241 /* Initialize the rest of array as we do not allocate some registers
29242 at all. */
29243 while (pos < FIRST_PSEUDO_REGISTER)
29244 reg_alloc_order [pos++] = 0;
29245 }
29246
29247 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
29248 in struct attribute_spec handler. */
29249 static tree
29250 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
29251 tree args,
29252 int flags ATTRIBUTE_UNUSED,
29253 bool *no_add_attrs)
29254 {
29255 if (TREE_CODE (*node) != FUNCTION_TYPE
29256 && TREE_CODE (*node) != METHOD_TYPE
29257 && TREE_CODE (*node) != FIELD_DECL
29258 && TREE_CODE (*node) != TYPE_DECL)
29259 {
29260 warning (OPT_Wattributes, "%qE attribute only applies to functions",
29261 name);
29262 *no_add_attrs = true;
29263 return NULL_TREE;
29264 }
29265 if (TARGET_64BIT)
29266 {
29267 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
29268 name);
29269 *no_add_attrs = true;
29270 return NULL_TREE;
29271 }
29272 if (is_attribute_p ("callee_pop_aggregate_return", name))
29273 {
29274 tree cst;
29275
29276 cst = TREE_VALUE (args);
29277 if (TREE_CODE (cst) != INTEGER_CST)
29278 {
29279 warning (OPT_Wattributes,
29280 "%qE attribute requires an integer constant argument",
29281 name);
29282 *no_add_attrs = true;
29283 }
29284 else if (compare_tree_int (cst, 0) != 0
29285 && compare_tree_int (cst, 1) != 0)
29286 {
29287 warning (OPT_Wattributes,
29288 "argument to %qE attribute is neither zero, nor one",
29289 name);
29290 *no_add_attrs = true;
29291 }
29292
29293 return NULL_TREE;
29294 }
29295
29296 return NULL_TREE;
29297 }
29298
29299 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
29300 struct attribute_spec.handler. */
29301 static tree
29302 ix86_handle_abi_attribute (tree *node, tree name,
29303 tree args ATTRIBUTE_UNUSED,
29304 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
29305 {
29306 if (TREE_CODE (*node) != FUNCTION_TYPE
29307 && TREE_CODE (*node) != METHOD_TYPE
29308 && TREE_CODE (*node) != FIELD_DECL
29309 && TREE_CODE (*node) != TYPE_DECL)
29310 {
29311 warning (OPT_Wattributes, "%qE attribute only applies to functions",
29312 name);
29313 *no_add_attrs = true;
29314 return NULL_TREE;
29315 }
29316 if (!TARGET_64BIT)
29317 {
29318 warning (OPT_Wattributes, "%qE attribute only available for 64-bit",
29319 name);
29320 *no_add_attrs = true;
29321 return NULL_TREE;
29322 }
29323
29324 /* Can combine regparm with all attributes but fastcall. */
29325 if (is_attribute_p ("ms_abi", name))
29326 {
29327 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
29328 {
29329 error ("ms_abi and sysv_abi attributes are not compatible");
29330 }
29331
29332 return NULL_TREE;
29333 }
29334 else if (is_attribute_p ("sysv_abi", name))
29335 {
29336 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
29337 {
29338 error ("ms_abi and sysv_abi attributes are not compatible");
29339 }
29340
29341 return NULL_TREE;
29342 }
29343
29344 return NULL_TREE;
29345 }
29346
29347 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
29348 struct attribute_spec.handler. */
29349 static tree
29350 ix86_handle_struct_attribute (tree *node, tree name,
29351 tree args ATTRIBUTE_UNUSED,
29352 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
29353 {
29354 tree *type = NULL;
29355 if (DECL_P (*node))
29356 {
29357 if (TREE_CODE (*node) == TYPE_DECL)
29358 type = &TREE_TYPE (*node);
29359 }
29360 else
29361 type = node;
29362
29363 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
29364 || TREE_CODE (*type) == UNION_TYPE)))
29365 {
29366 warning (OPT_Wattributes, "%qE attribute ignored",
29367 name);
29368 *no_add_attrs = true;
29369 }
29370
29371 else if ((is_attribute_p ("ms_struct", name)
29372 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
29373 || ((is_attribute_p ("gcc_struct", name)
29374 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
29375 {
29376 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
29377 name);
29378 *no_add_attrs = true;
29379 }
29380
29381 return NULL_TREE;
29382 }
29383
29384 static tree
29385 ix86_handle_fndecl_attribute (tree *node, tree name,
29386 tree args ATTRIBUTE_UNUSED,
29387 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
29388 {
29389 if (TREE_CODE (*node) != FUNCTION_DECL)
29390 {
29391 warning (OPT_Wattributes, "%qE attribute only applies to functions",
29392 name);
29393 *no_add_attrs = true;
29394 }
29395 return NULL_TREE;
29396 }
29397
29398 static bool
29399 ix86_ms_bitfield_layout_p (const_tree record_type)
29400 {
29401 return ((TARGET_MS_BITFIELD_LAYOUT
29402 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
29403 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
29404 }
29405
29406 /* Returns an expression indicating where the this parameter is
29407 located on entry to the FUNCTION. */
29408
29409 static rtx
29410 x86_this_parameter (tree function)
29411 {
29412 tree type = TREE_TYPE (function);
29413 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
29414 int nregs;
29415
29416 if (TARGET_64BIT)
29417 {
29418 const int *parm_regs;
29419
29420 if (ix86_function_type_abi (type) == MS_ABI)
29421 parm_regs = x86_64_ms_abi_int_parameter_registers;
29422 else
29423 parm_regs = x86_64_int_parameter_registers;
29424 return gen_rtx_REG (DImode, parm_regs[aggr]);
29425 }
29426
29427 nregs = ix86_function_regparm (type, function);
29428
29429 if (nregs > 0 && !stdarg_p (type))
29430 {
29431 int regno;
29432 unsigned int ccvt = ix86_get_callcvt (type);
29433
29434 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
29435 regno = aggr ? DX_REG : CX_REG;
29436 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
29437 {
29438 regno = CX_REG;
29439 if (aggr)
29440 return gen_rtx_MEM (SImode,
29441 plus_constant (stack_pointer_rtx, 4));
29442 }
29443 else
29444 {
29445 regno = AX_REG;
29446 if (aggr)
29447 {
29448 regno = DX_REG;
29449 if (nregs == 1)
29450 return gen_rtx_MEM (SImode,
29451 plus_constant (stack_pointer_rtx, 4));
29452 }
29453 }
29454 return gen_rtx_REG (SImode, regno);
29455 }
29456
29457 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
29458 }
29459
29460 /* Determine whether x86_output_mi_thunk can succeed. */
29461
29462 static bool
29463 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
29464 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
29465 HOST_WIDE_INT vcall_offset, const_tree function)
29466 {
29467 /* 64-bit can handle anything. */
29468 if (TARGET_64BIT)
29469 return true;
29470
29471 /* For 32-bit, everything's fine if we have one free register. */
29472 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
29473 return true;
29474
29475 /* Need a free register for vcall_offset. */
29476 if (vcall_offset)
29477 return false;
29478
29479 /* Need a free register for GOT references. */
29480 if (flag_pic && !targetm.binds_local_p (function))
29481 return false;
29482
29483 /* Otherwise ok. */
29484 return true;
29485 }
29486
29487 /* Output the assembler code for a thunk function. THUNK_DECL is the
29488 declaration for the thunk function itself, FUNCTION is the decl for
29489 the target function. DELTA is an immediate constant offset to be
29490 added to THIS. If VCALL_OFFSET is nonzero, the word at
29491 *(*this + vcall_offset) should be added to THIS. */
29492
29493 static void
29494 x86_output_mi_thunk (FILE *file,
29495 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
29496 HOST_WIDE_INT vcall_offset, tree function)
29497 {
29498 rtx this_param = x86_this_parameter (function);
29499 rtx this_reg, tmp, fnaddr;
29500
29501 emit_note (NOTE_INSN_PROLOGUE_END);
29502
29503 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
29504 pull it in now and let DELTA benefit. */
29505 if (REG_P (this_param))
29506 this_reg = this_param;
29507 else if (vcall_offset)
29508 {
29509 /* Put the this parameter into %eax. */
29510 this_reg = gen_rtx_REG (Pmode, AX_REG);
29511 emit_move_insn (this_reg, this_param);
29512 }
29513 else
29514 this_reg = NULL_RTX;
29515
29516 /* Adjust the this parameter by a fixed constant. */
29517 if (delta)
29518 {
29519 rtx delta_rtx = GEN_INT (delta);
29520 rtx delta_dst = this_reg ? this_reg : this_param;
29521
29522 if (TARGET_64BIT)
29523 {
29524 if (!x86_64_general_operand (delta_rtx, Pmode))
29525 {
29526 tmp = gen_rtx_REG (Pmode, R10_REG);
29527 emit_move_insn (tmp, delta_rtx);
29528 delta_rtx = tmp;
29529 }
29530 }
29531
29532 emit_insn (ix86_gen_add3 (delta_dst, delta_dst, delta_rtx));
29533 }
29534
29535 /* Adjust the this parameter by a value stored in the vtable. */
29536 if (vcall_offset)
29537 {
29538 rtx vcall_addr, vcall_mem, this_mem;
29539 unsigned int tmp_regno;
29540
29541 if (TARGET_64BIT)
29542 tmp_regno = R10_REG;
29543 else
29544 {
29545 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
29546 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) != 0)
29547 tmp_regno = AX_REG;
29548 else
29549 tmp_regno = CX_REG;
29550 }
29551 tmp = gen_rtx_REG (Pmode, tmp_regno);
29552
29553 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
29554 if (Pmode != ptr_mode)
29555 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
29556 emit_move_insn (tmp, this_mem);
29557
29558 /* Adjust the this parameter. */
29559 vcall_addr = plus_constant (tmp, vcall_offset);
29560 if (TARGET_64BIT
29561 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
29562 {
29563 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
29564 emit_move_insn (tmp2, GEN_INT (vcall_offset));
29565 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
29566 }
29567
29568 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
29569 if (Pmode != ptr_mode)
29570 emit_insn (gen_addsi_1_zext (this_reg,
29571 gen_rtx_REG (ptr_mode,
29572 REGNO (this_reg)),
29573 vcall_mem));
29574 else
29575 emit_insn (ix86_gen_add3 (this_reg, this_reg, vcall_mem));
29576 }
29577
29578 /* If necessary, drop THIS back to its stack slot. */
29579 if (this_reg && this_reg != this_param)
29580 emit_move_insn (this_param, this_reg);
29581
29582 fnaddr = XEXP (DECL_RTL (function), 0);
29583 if (TARGET_64BIT)
29584 {
29585 if (!flag_pic || targetm.binds_local_p (function)
29586 || cfun->machine->call_abi == MS_ABI)
29587 ;
29588 else
29589 {
29590 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
29591 tmp = gen_rtx_CONST (Pmode, tmp);
29592 fnaddr = gen_rtx_MEM (Pmode, tmp);
29593 }
29594 }
29595 else
29596 {
29597 if (!flag_pic || targetm.binds_local_p (function))
29598 ;
29599 #if TARGET_MACHO
29600 else if (TARGET_MACHO)
29601 {
29602 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
29603 fnaddr = XEXP (fnaddr, 0);
29604 }
29605 #endif /* TARGET_MACHO */
29606 else
29607 {
29608 tmp = gen_rtx_REG (Pmode, CX_REG);
29609 output_set_got (tmp, NULL_RTX);
29610
29611 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
29612 fnaddr = gen_rtx_PLUS (Pmode, fnaddr, tmp);
29613 fnaddr = gen_rtx_MEM (Pmode, fnaddr);
29614 }
29615 }
29616
29617 /* Our sibling call patterns do not allow memories, because we have no
29618 predicate that can distinguish between frame and non-frame memory.
29619 For our purposes here, we can get away with (ab)using a jump pattern,
29620 because we're going to do no optimization. */
29621 if (MEM_P (fnaddr))
29622 emit_jump_insn (gen_indirect_jump (fnaddr));
29623 else
29624 {
29625 tmp = gen_rtx_MEM (QImode, fnaddr);
29626 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
29627 tmp = emit_call_insn (tmp);
29628 SIBLING_CALL_P (tmp) = 1;
29629 }
29630 emit_barrier ();
29631
29632 /* Emit just enough of rest_of_compilation to get the insns emitted.
29633 Note that use_thunk calls assemble_start_function et al. */
29634 tmp = get_insns ();
29635 insn_locators_alloc ();
29636 shorten_branches (tmp);
29637 final_start_function (tmp, file, 1);
29638 final (tmp, file, 1);
29639 final_end_function ();
29640 }
29641
29642 static void
29643 x86_file_start (void)
29644 {
29645 default_file_start ();
29646 #if TARGET_MACHO
29647 darwin_file_start ();
29648 #endif
29649 if (X86_FILE_START_VERSION_DIRECTIVE)
29650 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
29651 if (X86_FILE_START_FLTUSED)
29652 fputs ("\t.global\t__fltused\n", asm_out_file);
29653 if (ix86_asm_dialect == ASM_INTEL)
29654 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
29655 }
29656
29657 int
29658 x86_field_alignment (tree field, int computed)
29659 {
29660 enum machine_mode mode;
29661 tree type = TREE_TYPE (field);
29662
29663 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
29664 return computed;
29665 mode = TYPE_MODE (strip_array_types (type));
29666 if (mode == DFmode || mode == DCmode
29667 || GET_MODE_CLASS (mode) == MODE_INT
29668 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
29669 return MIN (32, computed);
29670 return computed;
29671 }
29672
29673 /* Output assembler code to FILE to increment profiler label # LABELNO
29674 for profiling a function entry. */
29675 void
29676 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
29677 {
29678 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
29679 : MCOUNT_NAME);
29680
29681 if (TARGET_64BIT)
29682 {
29683 #ifndef NO_PROFILE_COUNTERS
29684 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
29685 #endif
29686
29687 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
29688 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
29689 else
29690 fprintf (file, "\tcall\t%s\n", mcount_name);
29691 }
29692 else if (flag_pic)
29693 {
29694 #ifndef NO_PROFILE_COUNTERS
29695 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
29696 LPREFIX, labelno);
29697 #endif
29698 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
29699 }
29700 else
29701 {
29702 #ifndef NO_PROFILE_COUNTERS
29703 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
29704 LPREFIX, labelno);
29705 #endif
29706 fprintf (file, "\tcall\t%s\n", mcount_name);
29707 }
29708 }
29709
29710 /* We don't have exact information about the insn sizes, but we may assume
29711 quite safely that we are informed about all 1 byte insns and memory
29712 address sizes. This is enough to eliminate unnecessary padding in
29713 99% of cases. */
29714
29715 static int
29716 min_insn_size (rtx insn)
29717 {
29718 int l = 0, len;
29719
29720 if (!INSN_P (insn) || !active_insn_p (insn))
29721 return 0;
29722
29723 /* Discard alignments we've emit and jump instructions. */
29724 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
29725 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
29726 return 0;
29727 if (JUMP_TABLE_DATA_P (insn))
29728 return 0;
29729
29730 /* Important case - calls are always 5 bytes.
29731 It is common to have many calls in the row. */
29732 if (CALL_P (insn)
29733 && symbolic_reference_mentioned_p (PATTERN (insn))
29734 && !SIBLING_CALL_P (insn))
29735 return 5;
29736 len = get_attr_length (insn);
29737 if (len <= 1)
29738 return 1;
29739
29740 /* For normal instructions we rely on get_attr_length being exact,
29741 with a few exceptions. */
29742 if (!JUMP_P (insn))
29743 {
29744 enum attr_type type = get_attr_type (insn);
29745
29746 switch (type)
29747 {
29748 case TYPE_MULTI:
29749 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
29750 || asm_noperands (PATTERN (insn)) >= 0)
29751 return 0;
29752 break;
29753 case TYPE_OTHER:
29754 case TYPE_FCMP:
29755 break;
29756 default:
29757 /* Otherwise trust get_attr_length. */
29758 return len;
29759 }
29760
29761 l = get_attr_length_address (insn);
29762 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
29763 l = 4;
29764 }
29765 if (l)
29766 return 1+l;
29767 else
29768 return 2;
29769 }
29770
29771 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
29772
29773 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
29774 window. */
29775
29776 static void
29777 ix86_avoid_jump_mispredicts (void)
29778 {
29779 rtx insn, start = get_insns ();
29780 int nbytes = 0, njumps = 0;
29781 int isjump = 0;
29782
29783 /* Look for all minimal intervals of instructions containing 4 jumps.
29784 The intervals are bounded by START and INSN. NBYTES is the total
29785 size of instructions in the interval including INSN and not including
29786 START. When the NBYTES is smaller than 16 bytes, it is possible
29787 that the end of START and INSN ends up in the same 16byte page.
29788
29789 The smallest offset in the page INSN can start is the case where START
29790 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
29791 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
29792 */
29793 for (insn = start; insn; insn = NEXT_INSN (insn))
29794 {
29795 int min_size;
29796
29797 if (LABEL_P (insn))
29798 {
29799 int align = label_to_alignment (insn);
29800 int max_skip = label_to_max_skip (insn);
29801
29802 if (max_skip > 15)
29803 max_skip = 15;
29804 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
29805 already in the current 16 byte page, because otherwise
29806 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
29807 bytes to reach 16 byte boundary. */
29808 if (align <= 0
29809 || (align <= 3 && max_skip != (1 << align) - 1))
29810 max_skip = 0;
29811 if (dump_file)
29812 fprintf (dump_file, "Label %i with max_skip %i\n",
29813 INSN_UID (insn), max_skip);
29814 if (max_skip)
29815 {
29816 while (nbytes + max_skip >= 16)
29817 {
29818 start = NEXT_INSN (start);
29819 if ((JUMP_P (start)
29820 && GET_CODE (PATTERN (start)) != ADDR_VEC
29821 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
29822 || CALL_P (start))
29823 njumps--, isjump = 1;
29824 else
29825 isjump = 0;
29826 nbytes -= min_insn_size (start);
29827 }
29828 }
29829 continue;
29830 }
29831
29832 min_size = min_insn_size (insn);
29833 nbytes += min_size;
29834 if (dump_file)
29835 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
29836 INSN_UID (insn), min_size);
29837 if ((JUMP_P (insn)
29838 && GET_CODE (PATTERN (insn)) != ADDR_VEC
29839 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
29840 || CALL_P (insn))
29841 njumps++;
29842 else
29843 continue;
29844
29845 while (njumps > 3)
29846 {
29847 start = NEXT_INSN (start);
29848 if ((JUMP_P (start)
29849 && GET_CODE (PATTERN (start)) != ADDR_VEC
29850 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
29851 || CALL_P (start))
29852 njumps--, isjump = 1;
29853 else
29854 isjump = 0;
29855 nbytes -= min_insn_size (start);
29856 }
29857 gcc_assert (njumps >= 0);
29858 if (dump_file)
29859 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
29860 INSN_UID (start), INSN_UID (insn), nbytes);
29861
29862 if (njumps == 3 && isjump && nbytes < 16)
29863 {
29864 int padsize = 15 - nbytes + min_insn_size (insn);
29865
29866 if (dump_file)
29867 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
29868 INSN_UID (insn), padsize);
29869 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
29870 }
29871 }
29872 }
29873 #endif
29874
29875 /* AMD Athlon works faster
29876 when RET is not destination of conditional jump or directly preceded
29877 by other jump instruction. We avoid the penalty by inserting NOP just
29878 before the RET instructions in such cases. */
29879 static void
29880 ix86_pad_returns (void)
29881 {
29882 edge e;
29883 edge_iterator ei;
29884
29885 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
29886 {
29887 basic_block bb = e->src;
29888 rtx ret = BB_END (bb);
29889 rtx prev;
29890 bool replace = false;
29891
29892 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
29893 || optimize_bb_for_size_p (bb))
29894 continue;
29895 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
29896 if (active_insn_p (prev) || LABEL_P (prev))
29897 break;
29898 if (prev && LABEL_P (prev))
29899 {
29900 edge e;
29901 edge_iterator ei;
29902
29903 FOR_EACH_EDGE (e, ei, bb->preds)
29904 if (EDGE_FREQUENCY (e) && e->src->index >= 0
29905 && !(e->flags & EDGE_FALLTHRU))
29906 replace = true;
29907 }
29908 if (!replace)
29909 {
29910 prev = prev_active_insn (ret);
29911 if (prev
29912 && ((JUMP_P (prev) && any_condjump_p (prev))
29913 || CALL_P (prev)))
29914 replace = true;
29915 /* Empty functions get branch mispredict even when
29916 the jump destination is not visible to us. */
29917 if (!prev && !optimize_function_for_size_p (cfun))
29918 replace = true;
29919 }
29920 if (replace)
29921 {
29922 emit_jump_insn_before (gen_return_internal_long (), ret);
29923 delete_insn (ret);
29924 }
29925 }
29926 }
29927
29928 /* Count the minimum number of instructions in BB. Return 4 if the
29929 number of instructions >= 4. */
29930
29931 static int
29932 ix86_count_insn_bb (basic_block bb)
29933 {
29934 rtx insn;
29935 int insn_count = 0;
29936
29937 /* Count number of instructions in this block. Return 4 if the number
29938 of instructions >= 4. */
29939 FOR_BB_INSNS (bb, insn)
29940 {
29941 /* Only happen in exit blocks. */
29942 if (JUMP_P (insn)
29943 && GET_CODE (PATTERN (insn)) == RETURN)
29944 break;
29945
29946 if (NONDEBUG_INSN_P (insn)
29947 && GET_CODE (PATTERN (insn)) != USE
29948 && GET_CODE (PATTERN (insn)) != CLOBBER)
29949 {
29950 insn_count++;
29951 if (insn_count >= 4)
29952 return insn_count;
29953 }
29954 }
29955
29956 return insn_count;
29957 }
29958
29959
29960 /* Count the minimum number of instructions in code path in BB.
29961 Return 4 if the number of instructions >= 4. */
29962
29963 static int
29964 ix86_count_insn (basic_block bb)
29965 {
29966 edge e;
29967 edge_iterator ei;
29968 int min_prev_count;
29969
29970 /* Only bother counting instructions along paths with no
29971 more than 2 basic blocks between entry and exit. Given
29972 that BB has an edge to exit, determine if a predecessor
29973 of BB has an edge from entry. If so, compute the number
29974 of instructions in the predecessor block. If there
29975 happen to be multiple such blocks, compute the minimum. */
29976 min_prev_count = 4;
29977 FOR_EACH_EDGE (e, ei, bb->preds)
29978 {
29979 edge prev_e;
29980 edge_iterator prev_ei;
29981
29982 if (e->src == ENTRY_BLOCK_PTR)
29983 {
29984 min_prev_count = 0;
29985 break;
29986 }
29987 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
29988 {
29989 if (prev_e->src == ENTRY_BLOCK_PTR)
29990 {
29991 int count = ix86_count_insn_bb (e->src);
29992 if (count < min_prev_count)
29993 min_prev_count = count;
29994 break;
29995 }
29996 }
29997 }
29998
29999 if (min_prev_count < 4)
30000 min_prev_count += ix86_count_insn_bb (bb);
30001
30002 return min_prev_count;
30003 }
30004
30005 /* Pad short funtion to 4 instructions. */
30006
30007 static void
30008 ix86_pad_short_function (void)
30009 {
30010 edge e;
30011 edge_iterator ei;
30012
30013 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
30014 {
30015 rtx ret = BB_END (e->src);
30016 if (JUMP_P (ret) && GET_CODE (PATTERN (ret)) == RETURN)
30017 {
30018 int insn_count = ix86_count_insn (e->src);
30019
30020 /* Pad short function. */
30021 if (insn_count < 4)
30022 {
30023 rtx insn = ret;
30024
30025 /* Find epilogue. */
30026 while (insn
30027 && (!NOTE_P (insn)
30028 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
30029 insn = PREV_INSN (insn);
30030
30031 if (!insn)
30032 insn = ret;
30033
30034 /* Two NOPs count as one instruction. */
30035 insn_count = 2 * (4 - insn_count);
30036 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
30037 }
30038 }
30039 }
30040 }
30041
30042 /* Implement machine specific optimizations. We implement padding of returns
30043 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
30044 static void
30045 ix86_reorg (void)
30046 {
30047 /* We are freeing block_for_insn in the toplev to keep compatibility
30048 with old MDEP_REORGS that are not CFG based. Recompute it now. */
30049 compute_bb_for_insn ();
30050
30051 /* Run the vzeroupper optimization if needed. */
30052 if (TARGET_VZEROUPPER)
30053 move_or_delete_vzeroupper ();
30054
30055 if (optimize && optimize_function_for_speed_p (cfun))
30056 {
30057 if (TARGET_PAD_SHORT_FUNCTION)
30058 ix86_pad_short_function ();
30059 else if (TARGET_PAD_RETURNS)
30060 ix86_pad_returns ();
30061 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
30062 if (TARGET_FOUR_JUMP_LIMIT)
30063 ix86_avoid_jump_mispredicts ();
30064 #endif
30065 }
30066 }
30067
30068 /* Return nonzero when QImode register that must be represented via REX prefix
30069 is used. */
30070 bool
30071 x86_extended_QIreg_mentioned_p (rtx insn)
30072 {
30073 int i;
30074 extract_insn_cached (insn);
30075 for (i = 0; i < recog_data.n_operands; i++)
30076 if (REG_P (recog_data.operand[i])
30077 && REGNO (recog_data.operand[i]) > BX_REG)
30078 return true;
30079 return false;
30080 }
30081
30082 /* Return nonzero when P points to register encoded via REX prefix.
30083 Called via for_each_rtx. */
30084 static int
30085 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
30086 {
30087 unsigned int regno;
30088 if (!REG_P (*p))
30089 return 0;
30090 regno = REGNO (*p);
30091 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
30092 }
30093
30094 /* Return true when INSN mentions register that must be encoded using REX
30095 prefix. */
30096 bool
30097 x86_extended_reg_mentioned_p (rtx insn)
30098 {
30099 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
30100 extended_reg_mentioned_1, NULL);
30101 }
30102
30103 /* If profitable, negate (without causing overflow) integer constant
30104 of mode MODE at location LOC. Return true in this case. */
30105 bool
30106 x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
30107 {
30108 HOST_WIDE_INT val;
30109
30110 if (!CONST_INT_P (*loc))
30111 return false;
30112
30113 switch (mode)
30114 {
30115 case DImode:
30116 /* DImode x86_64 constants must fit in 32 bits. */
30117 gcc_assert (x86_64_immediate_operand (*loc, mode));
30118
30119 mode = SImode;
30120 break;
30121
30122 case SImode:
30123 case HImode:
30124 case QImode:
30125 break;
30126
30127 default:
30128 gcc_unreachable ();
30129 }
30130
30131 /* Avoid overflows. */
30132 if (mode_signbit_p (mode, *loc))
30133 return false;
30134
30135 val = INTVAL (*loc);
30136
30137 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
30138 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
30139 if ((val < 0 && val != -128)
30140 || val == 128)
30141 {
30142 *loc = GEN_INT (-val);
30143 return true;
30144 }
30145
30146 return false;
30147 }
30148
30149 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
30150 optabs would emit if we didn't have TFmode patterns. */
30151
30152 void
30153 x86_emit_floatuns (rtx operands[2])
30154 {
30155 rtx neglab, donelab, i0, i1, f0, in, out;
30156 enum machine_mode mode, inmode;
30157
30158 inmode = GET_MODE (operands[1]);
30159 gcc_assert (inmode == SImode || inmode == DImode);
30160
30161 out = operands[0];
30162 in = force_reg (inmode, operands[1]);
30163 mode = GET_MODE (out);
30164 neglab = gen_label_rtx ();
30165 donelab = gen_label_rtx ();
30166 f0 = gen_reg_rtx (mode);
30167
30168 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
30169
30170 expand_float (out, in, 0);
30171
30172 emit_jump_insn (gen_jump (donelab));
30173 emit_barrier ();
30174
30175 emit_label (neglab);
30176
30177 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
30178 1, OPTAB_DIRECT);
30179 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
30180 1, OPTAB_DIRECT);
30181 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
30182
30183 expand_float (f0, i0, 0);
30184
30185 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
30186
30187 emit_label (donelab);
30188 }
30189 \f
30190 /* AVX does not support 32-byte integer vector operations,
30191 thus the longest vector we are faced with is V16QImode. */
30192 #define MAX_VECT_LEN 16
30193
30194 struct expand_vec_perm_d
30195 {
30196 rtx target, op0, op1;
30197 unsigned char perm[MAX_VECT_LEN];
30198 enum machine_mode vmode;
30199 unsigned char nelt;
30200 bool testing_p;
30201 };
30202
30203 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
30204 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
30205
30206 /* Get a vector mode of the same size as the original but with elements
30207 twice as wide. This is only guaranteed to apply to integral vectors. */
30208
30209 static inline enum machine_mode
30210 get_mode_wider_vector (enum machine_mode o)
30211 {
30212 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
30213 enum machine_mode n = GET_MODE_WIDER_MODE (o);
30214 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
30215 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
30216 return n;
30217 }
30218
30219 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
30220 with all elements equal to VAR. Return true if successful. */
30221
30222 static bool
30223 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
30224 rtx target, rtx val)
30225 {
30226 bool ok;
30227
30228 switch (mode)
30229 {
30230 case V2SImode:
30231 case V2SFmode:
30232 if (!mmx_ok)
30233 return false;
30234 /* FALLTHRU */
30235
30236 case V4DFmode:
30237 case V4DImode:
30238 case V8SFmode:
30239 case V8SImode:
30240 case V2DFmode:
30241 case V2DImode:
30242 case V4SFmode:
30243 case V4SImode:
30244 {
30245 rtx insn, dup;
30246
30247 /* First attempt to recognize VAL as-is. */
30248 dup = gen_rtx_VEC_DUPLICATE (mode, val);
30249 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
30250 if (recog_memoized (insn) < 0)
30251 {
30252 rtx seq;
30253 /* If that fails, force VAL into a register. */
30254
30255 start_sequence ();
30256 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
30257 seq = get_insns ();
30258 end_sequence ();
30259 if (seq)
30260 emit_insn_before (seq, insn);
30261
30262 ok = recog_memoized (insn) >= 0;
30263 gcc_assert (ok);
30264 }
30265 }
30266 return true;
30267
30268 case V4HImode:
30269 if (!mmx_ok)
30270 return false;
30271 if (TARGET_SSE || TARGET_3DNOW_A)
30272 {
30273 rtx x;
30274
30275 val = gen_lowpart (SImode, val);
30276 x = gen_rtx_TRUNCATE (HImode, val);
30277 x = gen_rtx_VEC_DUPLICATE (mode, x);
30278 emit_insn (gen_rtx_SET (VOIDmode, target, x));
30279 return true;
30280 }
30281 goto widen;
30282
30283 case V8QImode:
30284 if (!mmx_ok)
30285 return false;
30286 goto widen;
30287
30288 case V8HImode:
30289 if (TARGET_SSE2)
30290 {
30291 struct expand_vec_perm_d dperm;
30292 rtx tmp1, tmp2;
30293
30294 permute:
30295 memset (&dperm, 0, sizeof (dperm));
30296 dperm.target = target;
30297 dperm.vmode = mode;
30298 dperm.nelt = GET_MODE_NUNITS (mode);
30299 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
30300
30301 /* Extend to SImode using a paradoxical SUBREG. */
30302 tmp1 = gen_reg_rtx (SImode);
30303 emit_move_insn (tmp1, gen_lowpart (SImode, val));
30304
30305 /* Insert the SImode value as low element of a V4SImode vector. */
30306 tmp2 = gen_lowpart (V4SImode, dperm.op0);
30307 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
30308
30309 ok = (expand_vec_perm_1 (&dperm)
30310 || expand_vec_perm_broadcast_1 (&dperm));
30311 gcc_assert (ok);
30312 return ok;
30313 }
30314 goto widen;
30315
30316 case V16QImode:
30317 if (TARGET_SSE2)
30318 goto permute;
30319 goto widen;
30320
30321 widen:
30322 /* Replicate the value once into the next wider mode and recurse. */
30323 {
30324 enum machine_mode smode, wsmode, wvmode;
30325 rtx x;
30326
30327 smode = GET_MODE_INNER (mode);
30328 wvmode = get_mode_wider_vector (mode);
30329 wsmode = GET_MODE_INNER (wvmode);
30330
30331 val = convert_modes (wsmode, smode, val, true);
30332 x = expand_simple_binop (wsmode, ASHIFT, val,
30333 GEN_INT (GET_MODE_BITSIZE (smode)),
30334 NULL_RTX, 1, OPTAB_LIB_WIDEN);
30335 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
30336
30337 x = gen_lowpart (wvmode, target);
30338 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
30339 gcc_assert (ok);
30340 return ok;
30341 }
30342
30343 case V16HImode:
30344 case V32QImode:
30345 {
30346 enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
30347 rtx x = gen_reg_rtx (hvmode);
30348
30349 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
30350 gcc_assert (ok);
30351
30352 x = gen_rtx_VEC_CONCAT (mode, x, x);
30353 emit_insn (gen_rtx_SET (VOIDmode, target, x));
30354 }
30355 return true;
30356
30357 default:
30358 return false;
30359 }
30360 }
30361
30362 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
30363 whose ONE_VAR element is VAR, and other elements are zero. Return true
30364 if successful. */
30365
30366 static bool
30367 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
30368 rtx target, rtx var, int one_var)
30369 {
30370 enum machine_mode vsimode;
30371 rtx new_target;
30372 rtx x, tmp;
30373 bool use_vector_set = false;
30374
30375 switch (mode)
30376 {
30377 case V2DImode:
30378 /* For SSE4.1, we normally use vector set. But if the second
30379 element is zero and inter-unit moves are OK, we use movq
30380 instead. */
30381 use_vector_set = (TARGET_64BIT
30382 && TARGET_SSE4_1
30383 && !(TARGET_INTER_UNIT_MOVES
30384 && one_var == 0));
30385 break;
30386 case V16QImode:
30387 case V4SImode:
30388 case V4SFmode:
30389 use_vector_set = TARGET_SSE4_1;
30390 break;
30391 case V8HImode:
30392 use_vector_set = TARGET_SSE2;
30393 break;
30394 case V4HImode:
30395 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
30396 break;
30397 case V32QImode:
30398 case V16HImode:
30399 case V8SImode:
30400 case V8SFmode:
30401 case V4DFmode:
30402 use_vector_set = TARGET_AVX;
30403 break;
30404 case V4DImode:
30405 /* Use ix86_expand_vector_set in 64bit mode only. */
30406 use_vector_set = TARGET_AVX && TARGET_64BIT;
30407 break;
30408 default:
30409 break;
30410 }
30411
30412 if (use_vector_set)
30413 {
30414 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
30415 var = force_reg (GET_MODE_INNER (mode), var);
30416 ix86_expand_vector_set (mmx_ok, target, var, one_var);
30417 return true;
30418 }
30419
30420 switch (mode)
30421 {
30422 case V2SFmode:
30423 case V2SImode:
30424 if (!mmx_ok)
30425 return false;
30426 /* FALLTHRU */
30427
30428 case V2DFmode:
30429 case V2DImode:
30430 if (one_var != 0)
30431 return false;
30432 var = force_reg (GET_MODE_INNER (mode), var);
30433 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
30434 emit_insn (gen_rtx_SET (VOIDmode, target, x));
30435 return true;
30436
30437 case V4SFmode:
30438 case V4SImode:
30439 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
30440 new_target = gen_reg_rtx (mode);
30441 else
30442 new_target = target;
30443 var = force_reg (GET_MODE_INNER (mode), var);
30444 x = gen_rtx_VEC_DUPLICATE (mode, var);
30445 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
30446 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
30447 if (one_var != 0)
30448 {
30449 /* We need to shuffle the value to the correct position, so
30450 create a new pseudo to store the intermediate result. */
30451
30452 /* With SSE2, we can use the integer shuffle insns. */
30453 if (mode != V4SFmode && TARGET_SSE2)
30454 {
30455 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
30456 const1_rtx,
30457 GEN_INT (one_var == 1 ? 0 : 1),
30458 GEN_INT (one_var == 2 ? 0 : 1),
30459 GEN_INT (one_var == 3 ? 0 : 1)));
30460 if (target != new_target)
30461 emit_move_insn (target, new_target);
30462 return true;
30463 }
30464
30465 /* Otherwise convert the intermediate result to V4SFmode and
30466 use the SSE1 shuffle instructions. */
30467 if (mode != V4SFmode)
30468 {
30469 tmp = gen_reg_rtx (V4SFmode);
30470 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
30471 }
30472 else
30473 tmp = new_target;
30474
30475 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
30476 const1_rtx,
30477 GEN_INT (one_var == 1 ? 0 : 1),
30478 GEN_INT (one_var == 2 ? 0+4 : 1+4),
30479 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
30480
30481 if (mode != V4SFmode)
30482 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
30483 else if (tmp != target)
30484 emit_move_insn (target, tmp);
30485 }
30486 else if (target != new_target)
30487 emit_move_insn (target, new_target);
30488 return true;
30489
30490 case V8HImode:
30491 case V16QImode:
30492 vsimode = V4SImode;
30493 goto widen;
30494 case V4HImode:
30495 case V8QImode:
30496 if (!mmx_ok)
30497 return false;
30498 vsimode = V2SImode;
30499 goto widen;
30500 widen:
30501 if (one_var != 0)
30502 return false;
30503
30504 /* Zero extend the variable element to SImode and recurse. */
30505 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
30506
30507 x = gen_reg_rtx (vsimode);
30508 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
30509 var, one_var))
30510 gcc_unreachable ();
30511
30512 emit_move_insn (target, gen_lowpart (mode, x));
30513 return true;
30514
30515 default:
30516 return false;
30517 }
30518 }
30519
30520 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
30521 consisting of the values in VALS. It is known that all elements
30522 except ONE_VAR are constants. Return true if successful. */
30523
30524 static bool
30525 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
30526 rtx target, rtx vals, int one_var)
30527 {
30528 rtx var = XVECEXP (vals, 0, one_var);
30529 enum machine_mode wmode;
30530 rtx const_vec, x;
30531
30532 const_vec = copy_rtx (vals);
30533 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
30534 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
30535
30536 switch (mode)
30537 {
30538 case V2DFmode:
30539 case V2DImode:
30540 case V2SFmode:
30541 case V2SImode:
30542 /* For the two element vectors, it's just as easy to use
30543 the general case. */
30544 return false;
30545
30546 case V4DImode:
30547 /* Use ix86_expand_vector_set in 64bit mode only. */
30548 if (!TARGET_64BIT)
30549 return false;
30550 case V4DFmode:
30551 case V8SFmode:
30552 case V8SImode:
30553 case V16HImode:
30554 case V32QImode:
30555 case V4SFmode:
30556 case V4SImode:
30557 case V8HImode:
30558 case V4HImode:
30559 break;
30560
30561 case V16QImode:
30562 if (TARGET_SSE4_1)
30563 break;
30564 wmode = V8HImode;
30565 goto widen;
30566 case V8QImode:
30567 wmode = V4HImode;
30568 goto widen;
30569 widen:
30570 /* There's no way to set one QImode entry easily. Combine
30571 the variable value with its adjacent constant value, and
30572 promote to an HImode set. */
30573 x = XVECEXP (vals, 0, one_var ^ 1);
30574 if (one_var & 1)
30575 {
30576 var = convert_modes (HImode, QImode, var, true);
30577 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
30578 NULL_RTX, 1, OPTAB_LIB_WIDEN);
30579 x = GEN_INT (INTVAL (x) & 0xff);
30580 }
30581 else
30582 {
30583 var = convert_modes (HImode, QImode, var, true);
30584 x = gen_int_mode (INTVAL (x) << 8, HImode);
30585 }
30586 if (x != const0_rtx)
30587 var = expand_simple_binop (HImode, IOR, var, x, var,
30588 1, OPTAB_LIB_WIDEN);
30589
30590 x = gen_reg_rtx (wmode);
30591 emit_move_insn (x, gen_lowpart (wmode, const_vec));
30592 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
30593
30594 emit_move_insn (target, gen_lowpart (mode, x));
30595 return true;
30596
30597 default:
30598 return false;
30599 }
30600
30601 emit_move_insn (target, const_vec);
30602 ix86_expand_vector_set (mmx_ok, target, var, one_var);
30603 return true;
30604 }
30605
30606 /* A subroutine of ix86_expand_vector_init_general. Use vector
30607 concatenate to handle the most general case: all values variable,
30608 and none identical. */
30609
30610 static void
30611 ix86_expand_vector_init_concat (enum machine_mode mode,
30612 rtx target, rtx *ops, int n)
30613 {
30614 enum machine_mode cmode, hmode = VOIDmode;
30615 rtx first[8], second[4];
30616 rtvec v;
30617 int i, j;
30618
30619 switch (n)
30620 {
30621 case 2:
30622 switch (mode)
30623 {
30624 case V8SImode:
30625 cmode = V4SImode;
30626 break;
30627 case V8SFmode:
30628 cmode = V4SFmode;
30629 break;
30630 case V4DImode:
30631 cmode = V2DImode;
30632 break;
30633 case V4DFmode:
30634 cmode = V2DFmode;
30635 break;
30636 case V4SImode:
30637 cmode = V2SImode;
30638 break;
30639 case V4SFmode:
30640 cmode = V2SFmode;
30641 break;
30642 case V2DImode:
30643 cmode = DImode;
30644 break;
30645 case V2SImode:
30646 cmode = SImode;
30647 break;
30648 case V2DFmode:
30649 cmode = DFmode;
30650 break;
30651 case V2SFmode:
30652 cmode = SFmode;
30653 break;
30654 default:
30655 gcc_unreachable ();
30656 }
30657
30658 if (!register_operand (ops[1], cmode))
30659 ops[1] = force_reg (cmode, ops[1]);
30660 if (!register_operand (ops[0], cmode))
30661 ops[0] = force_reg (cmode, ops[0]);
30662 emit_insn (gen_rtx_SET (VOIDmode, target,
30663 gen_rtx_VEC_CONCAT (mode, ops[0],
30664 ops[1])));
30665 break;
30666
30667 case 4:
30668 switch (mode)
30669 {
30670 case V4DImode:
30671 cmode = V2DImode;
30672 break;
30673 case V4DFmode:
30674 cmode = V2DFmode;
30675 break;
30676 case V4SImode:
30677 cmode = V2SImode;
30678 break;
30679 case V4SFmode:
30680 cmode = V2SFmode;
30681 break;
30682 default:
30683 gcc_unreachable ();
30684 }
30685 goto half;
30686
30687 case 8:
30688 switch (mode)
30689 {
30690 case V8SImode:
30691 cmode = V2SImode;
30692 hmode = V4SImode;
30693 break;
30694 case V8SFmode:
30695 cmode = V2SFmode;
30696 hmode = V4SFmode;
30697 break;
30698 default:
30699 gcc_unreachable ();
30700 }
30701 goto half;
30702
30703 half:
30704 /* FIXME: We process inputs backward to help RA. PR 36222. */
30705 i = n - 1;
30706 j = (n >> 1) - 1;
30707 for (; i > 0; i -= 2, j--)
30708 {
30709 first[j] = gen_reg_rtx (cmode);
30710 v = gen_rtvec (2, ops[i - 1], ops[i]);
30711 ix86_expand_vector_init (false, first[j],
30712 gen_rtx_PARALLEL (cmode, v));
30713 }
30714
30715 n >>= 1;
30716 if (n > 2)
30717 {
30718 gcc_assert (hmode != VOIDmode);
30719 for (i = j = 0; i < n; i += 2, j++)
30720 {
30721 second[j] = gen_reg_rtx (hmode);
30722 ix86_expand_vector_init_concat (hmode, second [j],
30723 &first [i], 2);
30724 }
30725 n >>= 1;
30726 ix86_expand_vector_init_concat (mode, target, second, n);
30727 }
30728 else
30729 ix86_expand_vector_init_concat (mode, target, first, n);
30730 break;
30731
30732 default:
30733 gcc_unreachable ();
30734 }
30735 }
30736
30737 /* A subroutine of ix86_expand_vector_init_general. Use vector
30738 interleave to handle the most general case: all values variable,
30739 and none identical. */
30740
30741 static void
30742 ix86_expand_vector_init_interleave (enum machine_mode mode,
30743 rtx target, rtx *ops, int n)
30744 {
30745 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
30746 int i, j;
30747 rtx op0, op1;
30748 rtx (*gen_load_even) (rtx, rtx, rtx);
30749 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
30750 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
30751
30752 switch (mode)
30753 {
30754 case V8HImode:
30755 gen_load_even = gen_vec_setv8hi;
30756 gen_interleave_first_low = gen_vec_interleave_lowv4si;
30757 gen_interleave_second_low = gen_vec_interleave_lowv2di;
30758 inner_mode = HImode;
30759 first_imode = V4SImode;
30760 second_imode = V2DImode;
30761 third_imode = VOIDmode;
30762 break;
30763 case V16QImode:
30764 gen_load_even = gen_vec_setv16qi;
30765 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
30766 gen_interleave_second_low = gen_vec_interleave_lowv4si;
30767 inner_mode = QImode;
30768 first_imode = V8HImode;
30769 second_imode = V4SImode;
30770 third_imode = V2DImode;
30771 break;
30772 default:
30773 gcc_unreachable ();
30774 }
30775
30776 for (i = 0; i < n; i++)
30777 {
30778 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
30779 op0 = gen_reg_rtx (SImode);
30780 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
30781
30782 /* Insert the SImode value as low element of V4SImode vector. */
30783 op1 = gen_reg_rtx (V4SImode);
30784 op0 = gen_rtx_VEC_MERGE (V4SImode,
30785 gen_rtx_VEC_DUPLICATE (V4SImode,
30786 op0),
30787 CONST0_RTX (V4SImode),
30788 const1_rtx);
30789 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
30790
30791 /* Cast the V4SImode vector back to a vector in orignal mode. */
30792 op0 = gen_reg_rtx (mode);
30793 emit_move_insn (op0, gen_lowpart (mode, op1));
30794
30795 /* Load even elements into the second positon. */
30796 emit_insn (gen_load_even (op0,
30797 force_reg (inner_mode,
30798 ops [i + i + 1]),
30799 const1_rtx));
30800
30801 /* Cast vector to FIRST_IMODE vector. */
30802 ops[i] = gen_reg_rtx (first_imode);
30803 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
30804 }
30805
30806 /* Interleave low FIRST_IMODE vectors. */
30807 for (i = j = 0; i < n; i += 2, j++)
30808 {
30809 op0 = gen_reg_rtx (first_imode);
30810 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
30811
30812 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
30813 ops[j] = gen_reg_rtx (second_imode);
30814 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
30815 }
30816
30817 /* Interleave low SECOND_IMODE vectors. */
30818 switch (second_imode)
30819 {
30820 case V4SImode:
30821 for (i = j = 0; i < n / 2; i += 2, j++)
30822 {
30823 op0 = gen_reg_rtx (second_imode);
30824 emit_insn (gen_interleave_second_low (op0, ops[i],
30825 ops[i + 1]));
30826
30827 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
30828 vector. */
30829 ops[j] = gen_reg_rtx (third_imode);
30830 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
30831 }
30832 second_imode = V2DImode;
30833 gen_interleave_second_low = gen_vec_interleave_lowv2di;
30834 /* FALLTHRU */
30835
30836 case V2DImode:
30837 op0 = gen_reg_rtx (second_imode);
30838 emit_insn (gen_interleave_second_low (op0, ops[0],
30839 ops[1]));
30840
30841 /* Cast the SECOND_IMODE vector back to a vector on original
30842 mode. */
30843 emit_insn (gen_rtx_SET (VOIDmode, target,
30844 gen_lowpart (mode, op0)));
30845 break;
30846
30847 default:
30848 gcc_unreachable ();
30849 }
30850 }
30851
30852 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
30853 all values variable, and none identical. */
30854
30855 static void
30856 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
30857 rtx target, rtx vals)
30858 {
30859 rtx ops[32], op0, op1;
30860 enum machine_mode half_mode = VOIDmode;
30861 int n, i;
30862
30863 switch (mode)
30864 {
30865 case V2SFmode:
30866 case V2SImode:
30867 if (!mmx_ok && !TARGET_SSE)
30868 break;
30869 /* FALLTHRU */
30870
30871 case V8SFmode:
30872 case V8SImode:
30873 case V4DFmode:
30874 case V4DImode:
30875 case V4SFmode:
30876 case V4SImode:
30877 case V2DFmode:
30878 case V2DImode:
30879 n = GET_MODE_NUNITS (mode);
30880 for (i = 0; i < n; i++)
30881 ops[i] = XVECEXP (vals, 0, i);
30882 ix86_expand_vector_init_concat (mode, target, ops, n);
30883 return;
30884
30885 case V32QImode:
30886 half_mode = V16QImode;
30887 goto half;
30888
30889 case V16HImode:
30890 half_mode = V8HImode;
30891 goto half;
30892
30893 half:
30894 n = GET_MODE_NUNITS (mode);
30895 for (i = 0; i < n; i++)
30896 ops[i] = XVECEXP (vals, 0, i);
30897 op0 = gen_reg_rtx (half_mode);
30898 op1 = gen_reg_rtx (half_mode);
30899 ix86_expand_vector_init_interleave (half_mode, op0, ops,
30900 n >> 2);
30901 ix86_expand_vector_init_interleave (half_mode, op1,
30902 &ops [n >> 1], n >> 2);
30903 emit_insn (gen_rtx_SET (VOIDmode, target,
30904 gen_rtx_VEC_CONCAT (mode, op0, op1)));
30905 return;
30906
30907 case V16QImode:
30908 if (!TARGET_SSE4_1)
30909 break;
30910 /* FALLTHRU */
30911
30912 case V8HImode:
30913 if (!TARGET_SSE2)
30914 break;
30915
30916 /* Don't use ix86_expand_vector_init_interleave if we can't
30917 move from GPR to SSE register directly. */
30918 if (!TARGET_INTER_UNIT_MOVES)
30919 break;
30920
30921 n = GET_MODE_NUNITS (mode);
30922 for (i = 0; i < n; i++)
30923 ops[i] = XVECEXP (vals, 0, i);
30924 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
30925 return;
30926
30927 case V4HImode:
30928 case V8QImode:
30929 break;
30930
30931 default:
30932 gcc_unreachable ();
30933 }
30934
30935 {
30936 int i, j, n_elts, n_words, n_elt_per_word;
30937 enum machine_mode inner_mode;
30938 rtx words[4], shift;
30939
30940 inner_mode = GET_MODE_INNER (mode);
30941 n_elts = GET_MODE_NUNITS (mode);
30942 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
30943 n_elt_per_word = n_elts / n_words;
30944 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
30945
30946 for (i = 0; i < n_words; ++i)
30947 {
30948 rtx word = NULL_RTX;
30949
30950 for (j = 0; j < n_elt_per_word; ++j)
30951 {
30952 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
30953 elt = convert_modes (word_mode, inner_mode, elt, true);
30954
30955 if (j == 0)
30956 word = elt;
30957 else
30958 {
30959 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
30960 word, 1, OPTAB_LIB_WIDEN);
30961 word = expand_simple_binop (word_mode, IOR, word, elt,
30962 word, 1, OPTAB_LIB_WIDEN);
30963 }
30964 }
30965
30966 words[i] = word;
30967 }
30968
30969 if (n_words == 1)
30970 emit_move_insn (target, gen_lowpart (mode, words[0]));
30971 else if (n_words == 2)
30972 {
30973 rtx tmp = gen_reg_rtx (mode);
30974 emit_clobber (tmp);
30975 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
30976 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
30977 emit_move_insn (target, tmp);
30978 }
30979 else if (n_words == 4)
30980 {
30981 rtx tmp = gen_reg_rtx (V4SImode);
30982 gcc_assert (word_mode == SImode);
30983 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
30984 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
30985 emit_move_insn (target, gen_lowpart (mode, tmp));
30986 }
30987 else
30988 gcc_unreachable ();
30989 }
30990 }
30991
30992 /* Initialize vector TARGET via VALS. Suppress the use of MMX
30993 instructions unless MMX_OK is true. */
30994
30995 void
30996 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
30997 {
30998 enum machine_mode mode = GET_MODE (target);
30999 enum machine_mode inner_mode = GET_MODE_INNER (mode);
31000 int n_elts = GET_MODE_NUNITS (mode);
31001 int n_var = 0, one_var = -1;
31002 bool all_same = true, all_const_zero = true;
31003 int i;
31004 rtx x;
31005
31006 for (i = 0; i < n_elts; ++i)
31007 {
31008 x = XVECEXP (vals, 0, i);
31009 if (!(CONST_INT_P (x)
31010 || GET_CODE (x) == CONST_DOUBLE
31011 || GET_CODE (x) == CONST_FIXED))
31012 n_var++, one_var = i;
31013 else if (x != CONST0_RTX (inner_mode))
31014 all_const_zero = false;
31015 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
31016 all_same = false;
31017 }
31018
31019 /* Constants are best loaded from the constant pool. */
31020 if (n_var == 0)
31021 {
31022 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
31023 return;
31024 }
31025
31026 /* If all values are identical, broadcast the value. */
31027 if (all_same
31028 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
31029 XVECEXP (vals, 0, 0)))
31030 return;
31031
31032 /* Values where only one field is non-constant are best loaded from
31033 the pool and overwritten via move later. */
31034 if (n_var == 1)
31035 {
31036 if (all_const_zero
31037 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
31038 XVECEXP (vals, 0, one_var),
31039 one_var))
31040 return;
31041
31042 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
31043 return;
31044 }
31045
31046 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
31047 }
31048
31049 void
31050 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
31051 {
31052 enum machine_mode mode = GET_MODE (target);
31053 enum machine_mode inner_mode = GET_MODE_INNER (mode);
31054 enum machine_mode half_mode;
31055 bool use_vec_merge = false;
31056 rtx tmp;
31057 static rtx (*gen_extract[6][2]) (rtx, rtx)
31058 = {
31059 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
31060 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
31061 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
31062 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
31063 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
31064 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
31065 };
31066 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
31067 = {
31068 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
31069 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
31070 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
31071 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
31072 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
31073 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
31074 };
31075 int i, j, n;
31076
31077 switch (mode)
31078 {
31079 case V2SFmode:
31080 case V2SImode:
31081 if (mmx_ok)
31082 {
31083 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
31084 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
31085 if (elt == 0)
31086 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
31087 else
31088 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
31089 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
31090 return;
31091 }
31092 break;
31093
31094 case V2DImode:
31095 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
31096 if (use_vec_merge)
31097 break;
31098
31099 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
31100 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
31101 if (elt == 0)
31102 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
31103 else
31104 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
31105 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
31106 return;
31107
31108 case V2DFmode:
31109 {
31110 rtx op0, op1;
31111
31112 /* For the two element vectors, we implement a VEC_CONCAT with
31113 the extraction of the other element. */
31114
31115 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
31116 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
31117
31118 if (elt == 0)
31119 op0 = val, op1 = tmp;
31120 else
31121 op0 = tmp, op1 = val;
31122
31123 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
31124 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
31125 }
31126 return;
31127
31128 case V4SFmode:
31129 use_vec_merge = TARGET_SSE4_1;
31130 if (use_vec_merge)
31131 break;
31132
31133 switch (elt)
31134 {
31135 case 0:
31136 use_vec_merge = true;
31137 break;
31138
31139 case 1:
31140 /* tmp = target = A B C D */
31141 tmp = copy_to_reg (target);
31142 /* target = A A B B */
31143 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
31144 /* target = X A B B */
31145 ix86_expand_vector_set (false, target, val, 0);
31146 /* target = A X C D */
31147 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
31148 const1_rtx, const0_rtx,
31149 GEN_INT (2+4), GEN_INT (3+4)));
31150 return;
31151
31152 case 2:
31153 /* tmp = target = A B C D */
31154 tmp = copy_to_reg (target);
31155 /* tmp = X B C D */
31156 ix86_expand_vector_set (false, tmp, val, 0);
31157 /* target = A B X D */
31158 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
31159 const0_rtx, const1_rtx,
31160 GEN_INT (0+4), GEN_INT (3+4)));
31161 return;
31162
31163 case 3:
31164 /* tmp = target = A B C D */
31165 tmp = copy_to_reg (target);
31166 /* tmp = X B C D */
31167 ix86_expand_vector_set (false, tmp, val, 0);
31168 /* target = A B X D */
31169 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
31170 const0_rtx, const1_rtx,
31171 GEN_INT (2+4), GEN_INT (0+4)));
31172 return;
31173
31174 default:
31175 gcc_unreachable ();
31176 }
31177 break;
31178
31179 case V4SImode:
31180 use_vec_merge = TARGET_SSE4_1;
31181 if (use_vec_merge)
31182 break;
31183
31184 /* Element 0 handled by vec_merge below. */
31185 if (elt == 0)
31186 {
31187 use_vec_merge = true;
31188 break;
31189 }
31190
31191 if (TARGET_SSE2)
31192 {
31193 /* With SSE2, use integer shuffles to swap element 0 and ELT,
31194 store into element 0, then shuffle them back. */
31195
31196 rtx order[4];
31197
31198 order[0] = GEN_INT (elt);
31199 order[1] = const1_rtx;
31200 order[2] = const2_rtx;
31201 order[3] = GEN_INT (3);
31202 order[elt] = const0_rtx;
31203
31204 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
31205 order[1], order[2], order[3]));
31206
31207 ix86_expand_vector_set (false, target, val, 0);
31208
31209 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
31210 order[1], order[2], order[3]));
31211 }
31212 else
31213 {
31214 /* For SSE1, we have to reuse the V4SF code. */
31215 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
31216 gen_lowpart (SFmode, val), elt);
31217 }
31218 return;
31219
31220 case V8HImode:
31221 use_vec_merge = TARGET_SSE2;
31222 break;
31223 case V4HImode:
31224 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
31225 break;
31226
31227 case V16QImode:
31228 use_vec_merge = TARGET_SSE4_1;
31229 break;
31230
31231 case V8QImode:
31232 break;
31233
31234 case V32QImode:
31235 half_mode = V16QImode;
31236 j = 0;
31237 n = 16;
31238 goto half;
31239
31240 case V16HImode:
31241 half_mode = V8HImode;
31242 j = 1;
31243 n = 8;
31244 goto half;
31245
31246 case V8SImode:
31247 half_mode = V4SImode;
31248 j = 2;
31249 n = 4;
31250 goto half;
31251
31252 case V4DImode:
31253 half_mode = V2DImode;
31254 j = 3;
31255 n = 2;
31256 goto half;
31257
31258 case V8SFmode:
31259 half_mode = V4SFmode;
31260 j = 4;
31261 n = 4;
31262 goto half;
31263
31264 case V4DFmode:
31265 half_mode = V2DFmode;
31266 j = 5;
31267 n = 2;
31268 goto half;
31269
31270 half:
31271 /* Compute offset. */
31272 i = elt / n;
31273 elt %= n;
31274
31275 gcc_assert (i <= 1);
31276
31277 /* Extract the half. */
31278 tmp = gen_reg_rtx (half_mode);
31279 emit_insn (gen_extract[j][i] (tmp, target));
31280
31281 /* Put val in tmp at elt. */
31282 ix86_expand_vector_set (false, tmp, val, elt);
31283
31284 /* Put it back. */
31285 emit_insn (gen_insert[j][i] (target, target, tmp));
31286 return;
31287
31288 default:
31289 break;
31290 }
31291
31292 if (use_vec_merge)
31293 {
31294 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
31295 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
31296 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
31297 }
31298 else
31299 {
31300 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
31301
31302 emit_move_insn (mem, target);
31303
31304 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
31305 emit_move_insn (tmp, val);
31306
31307 emit_move_insn (target, mem);
31308 }
31309 }
31310
31311 void
31312 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
31313 {
31314 enum machine_mode mode = GET_MODE (vec);
31315 enum machine_mode inner_mode = GET_MODE_INNER (mode);
31316 bool use_vec_extr = false;
31317 rtx tmp;
31318
31319 switch (mode)
31320 {
31321 case V2SImode:
31322 case V2SFmode:
31323 if (!mmx_ok)
31324 break;
31325 /* FALLTHRU */
31326
31327 case V2DFmode:
31328 case V2DImode:
31329 use_vec_extr = true;
31330 break;
31331
31332 case V4SFmode:
31333 use_vec_extr = TARGET_SSE4_1;
31334 if (use_vec_extr)
31335 break;
31336
31337 switch (elt)
31338 {
31339 case 0:
31340 tmp = vec;
31341 break;
31342
31343 case 1:
31344 case 3:
31345 tmp = gen_reg_rtx (mode);
31346 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
31347 GEN_INT (elt), GEN_INT (elt),
31348 GEN_INT (elt+4), GEN_INT (elt+4)));
31349 break;
31350
31351 case 2:
31352 tmp = gen_reg_rtx (mode);
31353 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
31354 break;
31355
31356 default:
31357 gcc_unreachable ();
31358 }
31359 vec = tmp;
31360 use_vec_extr = true;
31361 elt = 0;
31362 break;
31363
31364 case V4SImode:
31365 use_vec_extr = TARGET_SSE4_1;
31366 if (use_vec_extr)
31367 break;
31368
31369 if (TARGET_SSE2)
31370 {
31371 switch (elt)
31372 {
31373 case 0:
31374 tmp = vec;
31375 break;
31376
31377 case 1:
31378 case 3:
31379 tmp = gen_reg_rtx (mode);
31380 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
31381 GEN_INT (elt), GEN_INT (elt),
31382 GEN_INT (elt), GEN_INT (elt)));
31383 break;
31384
31385 case 2:
31386 tmp = gen_reg_rtx (mode);
31387 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
31388 break;
31389
31390 default:
31391 gcc_unreachable ();
31392 }
31393 vec = tmp;
31394 use_vec_extr = true;
31395 elt = 0;
31396 }
31397 else
31398 {
31399 /* For SSE1, we have to reuse the V4SF code. */
31400 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
31401 gen_lowpart (V4SFmode, vec), elt);
31402 return;
31403 }
31404 break;
31405
31406 case V8HImode:
31407 use_vec_extr = TARGET_SSE2;
31408 break;
31409 case V4HImode:
31410 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
31411 break;
31412
31413 case V16QImode:
31414 use_vec_extr = TARGET_SSE4_1;
31415 break;
31416
31417 case V8QImode:
31418 /* ??? Could extract the appropriate HImode element and shift. */
31419 default:
31420 break;
31421 }
31422
31423 if (use_vec_extr)
31424 {
31425 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
31426 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
31427
31428 /* Let the rtl optimizers know about the zero extension performed. */
31429 if (inner_mode == QImode || inner_mode == HImode)
31430 {
31431 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
31432 target = gen_lowpart (SImode, target);
31433 }
31434
31435 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
31436 }
31437 else
31438 {
31439 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
31440
31441 emit_move_insn (mem, vec);
31442
31443 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
31444 emit_move_insn (target, tmp);
31445 }
31446 }
31447
31448 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
31449 pattern to reduce; DEST is the destination; IN is the input vector. */
31450
31451 void
31452 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
31453 {
31454 rtx tmp1, tmp2, tmp3;
31455
31456 tmp1 = gen_reg_rtx (V4SFmode);
31457 tmp2 = gen_reg_rtx (V4SFmode);
31458 tmp3 = gen_reg_rtx (V4SFmode);
31459
31460 emit_insn (gen_sse_movhlps (tmp1, in, in));
31461 emit_insn (fn (tmp2, tmp1, in));
31462
31463 emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
31464 const1_rtx, const1_rtx,
31465 GEN_INT (1+4), GEN_INT (1+4)));
31466 emit_insn (fn (dest, tmp2, tmp3));
31467 }
31468 \f
31469 /* Target hook for scalar_mode_supported_p. */
31470 static bool
31471 ix86_scalar_mode_supported_p (enum machine_mode mode)
31472 {
31473 if (DECIMAL_FLOAT_MODE_P (mode))
31474 return default_decimal_float_supported_p ();
31475 else if (mode == TFmode)
31476 return true;
31477 else
31478 return default_scalar_mode_supported_p (mode);
31479 }
31480
31481 /* Implements target hook vector_mode_supported_p. */
31482 static bool
31483 ix86_vector_mode_supported_p (enum machine_mode mode)
31484 {
31485 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
31486 return true;
31487 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
31488 return true;
31489 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
31490 return true;
31491 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
31492 return true;
31493 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
31494 return true;
31495 return false;
31496 }
31497
31498 /* Target hook for c_mode_for_suffix. */
31499 static enum machine_mode
31500 ix86_c_mode_for_suffix (char suffix)
31501 {
31502 if (suffix == 'q')
31503 return TFmode;
31504 if (suffix == 'w')
31505 return XFmode;
31506
31507 return VOIDmode;
31508 }
31509
31510 /* Worker function for TARGET_MD_ASM_CLOBBERS.
31511
31512 We do this in the new i386 backend to maintain source compatibility
31513 with the old cc0-based compiler. */
31514
31515 static tree
31516 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
31517 tree inputs ATTRIBUTE_UNUSED,
31518 tree clobbers)
31519 {
31520 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
31521 clobbers);
31522 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
31523 clobbers);
31524 return clobbers;
31525 }
31526
31527 /* Implements target vector targetm.asm.encode_section_info. */
31528
31529 static void ATTRIBUTE_UNUSED
31530 ix86_encode_section_info (tree decl, rtx rtl, int first)
31531 {
31532 default_encode_section_info (decl, rtl, first);
31533
31534 if (TREE_CODE (decl) == VAR_DECL
31535 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
31536 && ix86_in_large_data_p (decl))
31537 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
31538 }
31539
31540 /* Worker function for REVERSE_CONDITION. */
31541
31542 enum rtx_code
31543 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
31544 {
31545 return (mode != CCFPmode && mode != CCFPUmode
31546 ? reverse_condition (code)
31547 : reverse_condition_maybe_unordered (code));
31548 }
31549
31550 /* Output code to perform an x87 FP register move, from OPERANDS[1]
31551 to OPERANDS[0]. */
31552
31553 const char *
31554 output_387_reg_move (rtx insn, rtx *operands)
31555 {
31556 if (REG_P (operands[0]))
31557 {
31558 if (REG_P (operands[1])
31559 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
31560 {
31561 if (REGNO (operands[0]) == FIRST_STACK_REG)
31562 return output_387_ffreep (operands, 0);
31563 return "fstp\t%y0";
31564 }
31565 if (STACK_TOP_P (operands[0]))
31566 return "fld%Z1\t%y1";
31567 return "fst\t%y0";
31568 }
31569 else if (MEM_P (operands[0]))
31570 {
31571 gcc_assert (REG_P (operands[1]));
31572 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
31573 return "fstp%Z0\t%y0";
31574 else
31575 {
31576 /* There is no non-popping store to memory for XFmode.
31577 So if we need one, follow the store with a load. */
31578 if (GET_MODE (operands[0]) == XFmode)
31579 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
31580 else
31581 return "fst%Z0\t%y0";
31582 }
31583 }
31584 else
31585 gcc_unreachable();
31586 }
31587
31588 /* Output code to perform a conditional jump to LABEL, if C2 flag in
31589 FP status register is set. */
31590
31591 void
31592 ix86_emit_fp_unordered_jump (rtx label)
31593 {
31594 rtx reg = gen_reg_rtx (HImode);
31595 rtx temp;
31596
31597 emit_insn (gen_x86_fnstsw_1 (reg));
31598
31599 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
31600 {
31601 emit_insn (gen_x86_sahf_1 (reg));
31602
31603 temp = gen_rtx_REG (CCmode, FLAGS_REG);
31604 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
31605 }
31606 else
31607 {
31608 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
31609
31610 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
31611 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
31612 }
31613
31614 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
31615 gen_rtx_LABEL_REF (VOIDmode, label),
31616 pc_rtx);
31617 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
31618
31619 emit_jump_insn (temp);
31620 predict_jump (REG_BR_PROB_BASE * 10 / 100);
31621 }
31622
31623 /* Output code to perform a log1p XFmode calculation. */
31624
31625 void ix86_emit_i387_log1p (rtx op0, rtx op1)
31626 {
31627 rtx label1 = gen_label_rtx ();
31628 rtx label2 = gen_label_rtx ();
31629
31630 rtx tmp = gen_reg_rtx (XFmode);
31631 rtx tmp2 = gen_reg_rtx (XFmode);
31632 rtx test;
31633
31634 emit_insn (gen_absxf2 (tmp, op1));
31635 test = gen_rtx_GE (VOIDmode, tmp,
31636 CONST_DOUBLE_FROM_REAL_VALUE (
31637 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
31638 XFmode));
31639 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
31640
31641 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
31642 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
31643 emit_jump (label2);
31644
31645 emit_label (label1);
31646 emit_move_insn (tmp, CONST1_RTX (XFmode));
31647 emit_insn (gen_addxf3 (tmp, op1, tmp));
31648 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
31649 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
31650
31651 emit_label (label2);
31652 }
31653
31654 /* Output code to perform a Newton-Rhapson approximation of a single precision
31655 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
31656
31657 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
31658 {
31659 rtx x0, x1, e0, e1;
31660
31661 x0 = gen_reg_rtx (mode);
31662 e0 = gen_reg_rtx (mode);
31663 e1 = gen_reg_rtx (mode);
31664 x1 = gen_reg_rtx (mode);
31665
31666 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
31667
31668 /* x0 = rcp(b) estimate */
31669 emit_insn (gen_rtx_SET (VOIDmode, x0,
31670 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
31671 UNSPEC_RCP)));
31672 /* e0 = x0 * b */
31673 emit_insn (gen_rtx_SET (VOIDmode, e0,
31674 gen_rtx_MULT (mode, x0, b)));
31675
31676 /* e0 = x0 * e0 */
31677 emit_insn (gen_rtx_SET (VOIDmode, e0,
31678 gen_rtx_MULT (mode, x0, e0)));
31679
31680 /* e1 = x0 + x0 */
31681 emit_insn (gen_rtx_SET (VOIDmode, e1,
31682 gen_rtx_PLUS (mode, x0, x0)));
31683
31684 /* x1 = e1 - e0 */
31685 emit_insn (gen_rtx_SET (VOIDmode, x1,
31686 gen_rtx_MINUS (mode, e1, e0)));
31687
31688 /* res = a * x1 */
31689 emit_insn (gen_rtx_SET (VOIDmode, res,
31690 gen_rtx_MULT (mode, a, x1)));
31691 }
31692
31693 /* Output code to perform a Newton-Rhapson approximation of a
31694 single precision floating point [reciprocal] square root. */
31695
31696 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
31697 bool recip)
31698 {
31699 rtx x0, e0, e1, e2, e3, mthree, mhalf;
31700 REAL_VALUE_TYPE r;
31701
31702 x0 = gen_reg_rtx (mode);
31703 e0 = gen_reg_rtx (mode);
31704 e1 = gen_reg_rtx (mode);
31705 e2 = gen_reg_rtx (mode);
31706 e3 = gen_reg_rtx (mode);
31707
31708 real_from_integer (&r, VOIDmode, -3, -1, 0);
31709 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
31710
31711 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
31712 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
31713
31714 if (VECTOR_MODE_P (mode))
31715 {
31716 mthree = ix86_build_const_vector (mode, true, mthree);
31717 mhalf = ix86_build_const_vector (mode, true, mhalf);
31718 }
31719
31720 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
31721 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
31722
31723 /* x0 = rsqrt(a) estimate */
31724 emit_insn (gen_rtx_SET (VOIDmode, x0,
31725 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
31726 UNSPEC_RSQRT)));
31727
31728 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
31729 if (!recip)
31730 {
31731 rtx zero, mask;
31732
31733 zero = gen_reg_rtx (mode);
31734 mask = gen_reg_rtx (mode);
31735
31736 zero = force_reg (mode, CONST0_RTX(mode));
31737 emit_insn (gen_rtx_SET (VOIDmode, mask,
31738 gen_rtx_NE (mode, zero, a)));
31739
31740 emit_insn (gen_rtx_SET (VOIDmode, x0,
31741 gen_rtx_AND (mode, x0, mask)));
31742 }
31743
31744 /* e0 = x0 * a */
31745 emit_insn (gen_rtx_SET (VOIDmode, e0,
31746 gen_rtx_MULT (mode, x0, a)));
31747 /* e1 = e0 * x0 */
31748 emit_insn (gen_rtx_SET (VOIDmode, e1,
31749 gen_rtx_MULT (mode, e0, x0)));
31750
31751 /* e2 = e1 - 3. */
31752 mthree = force_reg (mode, mthree);
31753 emit_insn (gen_rtx_SET (VOIDmode, e2,
31754 gen_rtx_PLUS (mode, e1, mthree)));
31755
31756 mhalf = force_reg (mode, mhalf);
31757 if (recip)
31758 /* e3 = -.5 * x0 */
31759 emit_insn (gen_rtx_SET (VOIDmode, e3,
31760 gen_rtx_MULT (mode, x0, mhalf)));
31761 else
31762 /* e3 = -.5 * e0 */
31763 emit_insn (gen_rtx_SET (VOIDmode, e3,
31764 gen_rtx_MULT (mode, e0, mhalf)));
31765 /* ret = e2 * e3 */
31766 emit_insn (gen_rtx_SET (VOIDmode, res,
31767 gen_rtx_MULT (mode, e2, e3)));
31768 }
31769
31770 #ifdef TARGET_SOLARIS
31771 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
31772
31773 static void
31774 i386_solaris_elf_named_section (const char *name, unsigned int flags,
31775 tree decl)
31776 {
31777 /* With Binutils 2.15, the "@unwind" marker must be specified on
31778 every occurrence of the ".eh_frame" section, not just the first
31779 one. */
31780 if (TARGET_64BIT
31781 && strcmp (name, ".eh_frame") == 0)
31782 {
31783 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
31784 flags & SECTION_WRITE ? "aw" : "a");
31785 return;
31786 }
31787
31788 #ifndef USE_GAS
31789 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
31790 {
31791 solaris_elf_asm_comdat_section (name, flags, decl);
31792 return;
31793 }
31794 #endif
31795
31796 default_elf_asm_named_section (name, flags, decl);
31797 }
31798 #endif /* TARGET_SOLARIS */
31799
31800 /* Return the mangling of TYPE if it is an extended fundamental type. */
31801
31802 static const char *
31803 ix86_mangle_type (const_tree type)
31804 {
31805 type = TYPE_MAIN_VARIANT (type);
31806
31807 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
31808 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
31809 return NULL;
31810
31811 switch (TYPE_MODE (type))
31812 {
31813 case TFmode:
31814 /* __float128 is "g". */
31815 return "g";
31816 case XFmode:
31817 /* "long double" or __float80 is "e". */
31818 return "e";
31819 default:
31820 return NULL;
31821 }
31822 }
31823
31824 /* For 32-bit code we can save PIC register setup by using
31825 __stack_chk_fail_local hidden function instead of calling
31826 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
31827 register, so it is better to call __stack_chk_fail directly. */
31828
31829 static tree ATTRIBUTE_UNUSED
31830 ix86_stack_protect_fail (void)
31831 {
31832 return TARGET_64BIT
31833 ? default_external_stack_protect_fail ()
31834 : default_hidden_stack_protect_fail ();
31835 }
31836
31837 /* Select a format to encode pointers in exception handling data. CODE
31838 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
31839 true if the symbol may be affected by dynamic relocations.
31840
31841 ??? All x86 object file formats are capable of representing this.
31842 After all, the relocation needed is the same as for the call insn.
31843 Whether or not a particular assembler allows us to enter such, I
31844 guess we'll have to see. */
31845 int
31846 asm_preferred_eh_data_format (int code, int global)
31847 {
31848 if (flag_pic)
31849 {
31850 int type = DW_EH_PE_sdata8;
31851 if (!TARGET_64BIT
31852 || ix86_cmodel == CM_SMALL_PIC
31853 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
31854 type = DW_EH_PE_sdata4;
31855 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
31856 }
31857 if (ix86_cmodel == CM_SMALL
31858 || (ix86_cmodel == CM_MEDIUM && code))
31859 return DW_EH_PE_udata4;
31860 return DW_EH_PE_absptr;
31861 }
31862 \f
31863 /* Expand copysign from SIGN to the positive value ABS_VALUE
31864 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
31865 the sign-bit. */
31866 static void
31867 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
31868 {
31869 enum machine_mode mode = GET_MODE (sign);
31870 rtx sgn = gen_reg_rtx (mode);
31871 if (mask == NULL_RTX)
31872 {
31873 enum machine_mode vmode;
31874
31875 if (mode == SFmode)
31876 vmode = V4SFmode;
31877 else if (mode == DFmode)
31878 vmode = V2DFmode;
31879 else
31880 vmode = mode;
31881
31882 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
31883 if (!VECTOR_MODE_P (mode))
31884 {
31885 /* We need to generate a scalar mode mask in this case. */
31886 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
31887 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
31888 mask = gen_reg_rtx (mode);
31889 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
31890 }
31891 }
31892 else
31893 mask = gen_rtx_NOT (mode, mask);
31894 emit_insn (gen_rtx_SET (VOIDmode, sgn,
31895 gen_rtx_AND (mode, mask, sign)));
31896 emit_insn (gen_rtx_SET (VOIDmode, result,
31897 gen_rtx_IOR (mode, abs_value, sgn)));
31898 }
31899
31900 /* Expand fabs (OP0) and return a new rtx that holds the result. The
31901 mask for masking out the sign-bit is stored in *SMASK, if that is
31902 non-null. */
31903 static rtx
31904 ix86_expand_sse_fabs (rtx op0, rtx *smask)
31905 {
31906 enum machine_mode vmode, mode = GET_MODE (op0);
31907 rtx xa, mask;
31908
31909 xa = gen_reg_rtx (mode);
31910 if (mode == SFmode)
31911 vmode = V4SFmode;
31912 else if (mode == DFmode)
31913 vmode = V2DFmode;
31914 else
31915 vmode = mode;
31916 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
31917 if (!VECTOR_MODE_P (mode))
31918 {
31919 /* We need to generate a scalar mode mask in this case. */
31920 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
31921 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
31922 mask = gen_reg_rtx (mode);
31923 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
31924 }
31925 emit_insn (gen_rtx_SET (VOIDmode, xa,
31926 gen_rtx_AND (mode, op0, mask)));
31927
31928 if (smask)
31929 *smask = mask;
31930
31931 return xa;
31932 }
31933
31934 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
31935 swapping the operands if SWAP_OPERANDS is true. The expanded
31936 code is a forward jump to a newly created label in case the
31937 comparison is true. The generated label rtx is returned. */
31938 static rtx
31939 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
31940 bool swap_operands)
31941 {
31942 rtx label, tmp;
31943
31944 if (swap_operands)
31945 {
31946 tmp = op0;
31947 op0 = op1;
31948 op1 = tmp;
31949 }
31950
31951 label = gen_label_rtx ();
31952 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
31953 emit_insn (gen_rtx_SET (VOIDmode, tmp,
31954 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
31955 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
31956 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
31957 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
31958 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
31959 JUMP_LABEL (tmp) = label;
31960
31961 return label;
31962 }
31963
31964 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
31965 using comparison code CODE. Operands are swapped for the comparison if
31966 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
31967 static rtx
31968 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
31969 bool swap_operands)
31970 {
31971 rtx (*insn)(rtx, rtx, rtx, rtx);
31972 enum machine_mode mode = GET_MODE (op0);
31973 rtx mask = gen_reg_rtx (mode);
31974
31975 if (swap_operands)
31976 {
31977 rtx tmp = op0;
31978 op0 = op1;
31979 op1 = tmp;
31980 }
31981
31982 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
31983
31984 emit_insn (insn (mask, op0, op1,
31985 gen_rtx_fmt_ee (code, mode, op0, op1)));
31986 return mask;
31987 }
31988
31989 /* Generate and return a rtx of mode MODE for 2**n where n is the number
31990 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
31991 static rtx
31992 ix86_gen_TWO52 (enum machine_mode mode)
31993 {
31994 REAL_VALUE_TYPE TWO52r;
31995 rtx TWO52;
31996
31997 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
31998 TWO52 = const_double_from_real_value (TWO52r, mode);
31999 TWO52 = force_reg (mode, TWO52);
32000
32001 return TWO52;
32002 }
32003
32004 /* Expand SSE sequence for computing lround from OP1 storing
32005 into OP0. */
32006 void
32007 ix86_expand_lround (rtx op0, rtx op1)
32008 {
32009 /* C code for the stuff we're doing below:
32010 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
32011 return (long)tmp;
32012 */
32013 enum machine_mode mode = GET_MODE (op1);
32014 const struct real_format *fmt;
32015 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
32016 rtx adj;
32017
32018 /* load nextafter (0.5, 0.0) */
32019 fmt = REAL_MODE_FORMAT (mode);
32020 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
32021 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
32022
32023 /* adj = copysign (0.5, op1) */
32024 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
32025 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
32026
32027 /* adj = op1 + adj */
32028 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
32029
32030 /* op0 = (imode)adj */
32031 expand_fix (op0, adj, 0);
32032 }
32033
32034 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
32035 into OPERAND0. */
32036 void
32037 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
32038 {
32039 /* C code for the stuff we're doing below (for do_floor):
32040 xi = (long)op1;
32041 xi -= (double)xi > op1 ? 1 : 0;
32042 return xi;
32043 */
32044 enum machine_mode fmode = GET_MODE (op1);
32045 enum machine_mode imode = GET_MODE (op0);
32046 rtx ireg, freg, label, tmp;
32047
32048 /* reg = (long)op1 */
32049 ireg = gen_reg_rtx (imode);
32050 expand_fix (ireg, op1, 0);
32051
32052 /* freg = (double)reg */
32053 freg = gen_reg_rtx (fmode);
32054 expand_float (freg, ireg, 0);
32055
32056 /* ireg = (freg > op1) ? ireg - 1 : ireg */
32057 label = ix86_expand_sse_compare_and_jump (UNLE,
32058 freg, op1, !do_floor);
32059 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
32060 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
32061 emit_move_insn (ireg, tmp);
32062
32063 emit_label (label);
32064 LABEL_NUSES (label) = 1;
32065
32066 emit_move_insn (op0, ireg);
32067 }
32068
32069 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
32070 result in OPERAND0. */
32071 void
32072 ix86_expand_rint (rtx operand0, rtx operand1)
32073 {
32074 /* C code for the stuff we're doing below:
32075 xa = fabs (operand1);
32076 if (!isless (xa, 2**52))
32077 return operand1;
32078 xa = xa + 2**52 - 2**52;
32079 return copysign (xa, operand1);
32080 */
32081 enum machine_mode mode = GET_MODE (operand0);
32082 rtx res, xa, label, TWO52, mask;
32083
32084 res = gen_reg_rtx (mode);
32085 emit_move_insn (res, operand1);
32086
32087 /* xa = abs (operand1) */
32088 xa = ix86_expand_sse_fabs (res, &mask);
32089
32090 /* if (!isless (xa, TWO52)) goto label; */
32091 TWO52 = ix86_gen_TWO52 (mode);
32092 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32093
32094 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
32095 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
32096
32097 ix86_sse_copysign_to_positive (res, xa, res, mask);
32098
32099 emit_label (label);
32100 LABEL_NUSES (label) = 1;
32101
32102 emit_move_insn (operand0, res);
32103 }
32104
32105 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
32106 into OPERAND0. */
32107 void
32108 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
32109 {
32110 /* C code for the stuff we expand below.
32111 double xa = fabs (x), x2;
32112 if (!isless (xa, TWO52))
32113 return x;
32114 xa = xa + TWO52 - TWO52;
32115 x2 = copysign (xa, x);
32116 Compensate. Floor:
32117 if (x2 > x)
32118 x2 -= 1;
32119 Compensate. Ceil:
32120 if (x2 < x)
32121 x2 -= -1;
32122 return x2;
32123 */
32124 enum machine_mode mode = GET_MODE (operand0);
32125 rtx xa, TWO52, tmp, label, one, res, mask;
32126
32127 TWO52 = ix86_gen_TWO52 (mode);
32128
32129 /* Temporary for holding the result, initialized to the input
32130 operand to ease control flow. */
32131 res = gen_reg_rtx (mode);
32132 emit_move_insn (res, operand1);
32133
32134 /* xa = abs (operand1) */
32135 xa = ix86_expand_sse_fabs (res, &mask);
32136
32137 /* if (!isless (xa, TWO52)) goto label; */
32138 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32139
32140 /* xa = xa + TWO52 - TWO52; */
32141 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
32142 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
32143
32144 /* xa = copysign (xa, operand1) */
32145 ix86_sse_copysign_to_positive (xa, xa, res, mask);
32146
32147 /* generate 1.0 or -1.0 */
32148 one = force_reg (mode,
32149 const_double_from_real_value (do_floor
32150 ? dconst1 : dconstm1, mode));
32151
32152 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
32153 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
32154 emit_insn (gen_rtx_SET (VOIDmode, tmp,
32155 gen_rtx_AND (mode, one, tmp)));
32156 /* We always need to subtract here to preserve signed zero. */
32157 tmp = expand_simple_binop (mode, MINUS,
32158 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
32159 emit_move_insn (res, tmp);
32160
32161 emit_label (label);
32162 LABEL_NUSES (label) = 1;
32163
32164 emit_move_insn (operand0, res);
32165 }
32166
32167 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
32168 into OPERAND0. */
32169 void
32170 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
32171 {
32172 /* C code for the stuff we expand below.
32173 double xa = fabs (x), x2;
32174 if (!isless (xa, TWO52))
32175 return x;
32176 x2 = (double)(long)x;
32177 Compensate. Floor:
32178 if (x2 > x)
32179 x2 -= 1;
32180 Compensate. Ceil:
32181 if (x2 < x)
32182 x2 += 1;
32183 if (HONOR_SIGNED_ZEROS (mode))
32184 return copysign (x2, x);
32185 return x2;
32186 */
32187 enum machine_mode mode = GET_MODE (operand0);
32188 rtx xa, xi, TWO52, tmp, label, one, res, mask;
32189
32190 TWO52 = ix86_gen_TWO52 (mode);
32191
32192 /* Temporary for holding the result, initialized to the input
32193 operand to ease control flow. */
32194 res = gen_reg_rtx (mode);
32195 emit_move_insn (res, operand1);
32196
32197 /* xa = abs (operand1) */
32198 xa = ix86_expand_sse_fabs (res, &mask);
32199
32200 /* if (!isless (xa, TWO52)) goto label; */
32201 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32202
32203 /* xa = (double)(long)x */
32204 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
32205 expand_fix (xi, res, 0);
32206 expand_float (xa, xi, 0);
32207
32208 /* generate 1.0 */
32209 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
32210
32211 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
32212 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
32213 emit_insn (gen_rtx_SET (VOIDmode, tmp,
32214 gen_rtx_AND (mode, one, tmp)));
32215 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
32216 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
32217 emit_move_insn (res, tmp);
32218
32219 if (HONOR_SIGNED_ZEROS (mode))
32220 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
32221
32222 emit_label (label);
32223 LABEL_NUSES (label) = 1;
32224
32225 emit_move_insn (operand0, res);
32226 }
32227
32228 /* Expand SSE sequence for computing round from OPERAND1 storing
32229 into OPERAND0. Sequence that works without relying on DImode truncation
32230 via cvttsd2siq that is only available on 64bit targets. */
32231 void
32232 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
32233 {
32234 /* C code for the stuff we expand below.
32235 double xa = fabs (x), xa2, x2;
32236 if (!isless (xa, TWO52))
32237 return x;
32238 Using the absolute value and copying back sign makes
32239 -0.0 -> -0.0 correct.
32240 xa2 = xa + TWO52 - TWO52;
32241 Compensate.
32242 dxa = xa2 - xa;
32243 if (dxa <= -0.5)
32244 xa2 += 1;
32245 else if (dxa > 0.5)
32246 xa2 -= 1;
32247 x2 = copysign (xa2, x);
32248 return x2;
32249 */
32250 enum machine_mode mode = GET_MODE (operand0);
32251 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
32252
32253 TWO52 = ix86_gen_TWO52 (mode);
32254
32255 /* Temporary for holding the result, initialized to the input
32256 operand to ease control flow. */
32257 res = gen_reg_rtx (mode);
32258 emit_move_insn (res, operand1);
32259
32260 /* xa = abs (operand1) */
32261 xa = ix86_expand_sse_fabs (res, &mask);
32262
32263 /* if (!isless (xa, TWO52)) goto label; */
32264 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32265
32266 /* xa2 = xa + TWO52 - TWO52; */
32267 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
32268 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
32269
32270 /* dxa = xa2 - xa; */
32271 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
32272
32273 /* generate 0.5, 1.0 and -0.5 */
32274 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
32275 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
32276 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
32277 0, OPTAB_DIRECT);
32278
32279 /* Compensate. */
32280 tmp = gen_reg_rtx (mode);
32281 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
32282 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
32283 emit_insn (gen_rtx_SET (VOIDmode, tmp,
32284 gen_rtx_AND (mode, one, tmp)));
32285 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
32286 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
32287 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
32288 emit_insn (gen_rtx_SET (VOIDmode, tmp,
32289 gen_rtx_AND (mode, one, tmp)));
32290 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
32291
32292 /* res = copysign (xa2, operand1) */
32293 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
32294
32295 emit_label (label);
32296 LABEL_NUSES (label) = 1;
32297
32298 emit_move_insn (operand0, res);
32299 }
32300
32301 /* Expand SSE sequence for computing trunc from OPERAND1 storing
32302 into OPERAND0. */
32303 void
32304 ix86_expand_trunc (rtx operand0, rtx operand1)
32305 {
32306 /* C code for SSE variant we expand below.
32307 double xa = fabs (x), x2;
32308 if (!isless (xa, TWO52))
32309 return x;
32310 x2 = (double)(long)x;
32311 if (HONOR_SIGNED_ZEROS (mode))
32312 return copysign (x2, x);
32313 return x2;
32314 */
32315 enum machine_mode mode = GET_MODE (operand0);
32316 rtx xa, xi, TWO52, label, res, mask;
32317
32318 TWO52 = ix86_gen_TWO52 (mode);
32319
32320 /* Temporary for holding the result, initialized to the input
32321 operand to ease control flow. */
32322 res = gen_reg_rtx (mode);
32323 emit_move_insn (res, operand1);
32324
32325 /* xa = abs (operand1) */
32326 xa = ix86_expand_sse_fabs (res, &mask);
32327
32328 /* if (!isless (xa, TWO52)) goto label; */
32329 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32330
32331 /* x = (double)(long)x */
32332 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
32333 expand_fix (xi, res, 0);
32334 expand_float (res, xi, 0);
32335
32336 if (HONOR_SIGNED_ZEROS (mode))
32337 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
32338
32339 emit_label (label);
32340 LABEL_NUSES (label) = 1;
32341
32342 emit_move_insn (operand0, res);
32343 }
32344
32345 /* Expand SSE sequence for computing trunc from OPERAND1 storing
32346 into OPERAND0. */
32347 void
32348 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
32349 {
32350 enum machine_mode mode = GET_MODE (operand0);
32351 rtx xa, mask, TWO52, label, one, res, smask, tmp;
32352
32353 /* C code for SSE variant we expand below.
32354 double xa = fabs (x), x2;
32355 if (!isless (xa, TWO52))
32356 return x;
32357 xa2 = xa + TWO52 - TWO52;
32358 Compensate:
32359 if (xa2 > xa)
32360 xa2 -= 1.0;
32361 x2 = copysign (xa2, x);
32362 return x2;
32363 */
32364
32365 TWO52 = ix86_gen_TWO52 (mode);
32366
32367 /* Temporary for holding the result, initialized to the input
32368 operand to ease control flow. */
32369 res = gen_reg_rtx (mode);
32370 emit_move_insn (res, operand1);
32371
32372 /* xa = abs (operand1) */
32373 xa = ix86_expand_sse_fabs (res, &smask);
32374
32375 /* if (!isless (xa, TWO52)) goto label; */
32376 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32377
32378 /* res = xa + TWO52 - TWO52; */
32379 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
32380 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
32381 emit_move_insn (res, tmp);
32382
32383 /* generate 1.0 */
32384 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
32385
32386 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
32387 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
32388 emit_insn (gen_rtx_SET (VOIDmode, mask,
32389 gen_rtx_AND (mode, mask, one)));
32390 tmp = expand_simple_binop (mode, MINUS,
32391 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
32392 emit_move_insn (res, tmp);
32393
32394 /* res = copysign (res, operand1) */
32395 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
32396
32397 emit_label (label);
32398 LABEL_NUSES (label) = 1;
32399
32400 emit_move_insn (operand0, res);
32401 }
32402
32403 /* Expand SSE sequence for computing round from OPERAND1 storing
32404 into OPERAND0. */
32405 void
32406 ix86_expand_round (rtx operand0, rtx operand1)
32407 {
32408 /* C code for the stuff we're doing below:
32409 double xa = fabs (x);
32410 if (!isless (xa, TWO52))
32411 return x;
32412 xa = (double)(long)(xa + nextafter (0.5, 0.0));
32413 return copysign (xa, x);
32414 */
32415 enum machine_mode mode = GET_MODE (operand0);
32416 rtx res, TWO52, xa, label, xi, half, mask;
32417 const struct real_format *fmt;
32418 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
32419
32420 /* Temporary for holding the result, initialized to the input
32421 operand to ease control flow. */
32422 res = gen_reg_rtx (mode);
32423 emit_move_insn (res, operand1);
32424
32425 TWO52 = ix86_gen_TWO52 (mode);
32426 xa = ix86_expand_sse_fabs (res, &mask);
32427 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32428
32429 /* load nextafter (0.5, 0.0) */
32430 fmt = REAL_MODE_FORMAT (mode);
32431 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
32432 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
32433
32434 /* xa = xa + 0.5 */
32435 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
32436 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
32437
32438 /* xa = (double)(int64_t)xa */
32439 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
32440 expand_fix (xi, xa, 0);
32441 expand_float (xa, xi, 0);
32442
32443 /* res = copysign (xa, operand1) */
32444 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
32445
32446 emit_label (label);
32447 LABEL_NUSES (label) = 1;
32448
32449 emit_move_insn (operand0, res);
32450 }
32451 \f
32452
32453 /* Table of valid machine attributes. */
32454 static const struct attribute_spec ix86_attribute_table[] =
32455 {
32456 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
32457 affects_type_identity } */
32458 /* Stdcall attribute says callee is responsible for popping arguments
32459 if they are not variable. */
32460 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
32461 true },
32462 /* Fastcall attribute says callee is responsible for popping arguments
32463 if they are not variable. */
32464 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
32465 true },
32466 /* Thiscall attribute says callee is responsible for popping arguments
32467 if they are not variable. */
32468 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
32469 true },
32470 /* Cdecl attribute says the callee is a normal C declaration */
32471 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
32472 true },
32473 /* Regparm attribute specifies how many integer arguments are to be
32474 passed in registers. */
32475 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
32476 true },
32477 /* Sseregparm attribute says we are using x86_64 calling conventions
32478 for FP arguments. */
32479 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
32480 true },
32481 /* force_align_arg_pointer says this function realigns the stack at entry. */
32482 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
32483 false, true, true, ix86_handle_cconv_attribute, false },
32484 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
32485 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
32486 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
32487 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
32488 false },
32489 #endif
32490 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
32491 false },
32492 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
32493 false },
32494 #ifdef SUBTARGET_ATTRIBUTE_TABLE
32495 SUBTARGET_ATTRIBUTE_TABLE,
32496 #endif
32497 /* ms_abi and sysv_abi calling convention function attributes. */
32498 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
32499 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
32500 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
32501 false },
32502 { "callee_pop_aggregate_return", 1, 1, false, true, true,
32503 ix86_handle_callee_pop_aggregate_return, true },
32504 /* End element. */
32505 { NULL, 0, 0, false, false, false, NULL, false }
32506 };
32507
32508 /* Implement targetm.vectorize.builtin_vectorization_cost. */
32509 static int
32510 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
32511 tree vectype ATTRIBUTE_UNUSED,
32512 int misalign ATTRIBUTE_UNUSED)
32513 {
32514 switch (type_of_cost)
32515 {
32516 case scalar_stmt:
32517 return ix86_cost->scalar_stmt_cost;
32518
32519 case scalar_load:
32520 return ix86_cost->scalar_load_cost;
32521
32522 case scalar_store:
32523 return ix86_cost->scalar_store_cost;
32524
32525 case vector_stmt:
32526 return ix86_cost->vec_stmt_cost;
32527
32528 case vector_load:
32529 return ix86_cost->vec_align_load_cost;
32530
32531 case vector_store:
32532 return ix86_cost->vec_store_cost;
32533
32534 case vec_to_scalar:
32535 return ix86_cost->vec_to_scalar_cost;
32536
32537 case scalar_to_vec:
32538 return ix86_cost->scalar_to_vec_cost;
32539
32540 case unaligned_load:
32541 case unaligned_store:
32542 return ix86_cost->vec_unalign_load_cost;
32543
32544 case cond_branch_taken:
32545 return ix86_cost->cond_taken_branch_cost;
32546
32547 case cond_branch_not_taken:
32548 return ix86_cost->cond_not_taken_branch_cost;
32549
32550 case vec_perm:
32551 return 1;
32552
32553 default:
32554 gcc_unreachable ();
32555 }
32556 }
32557
32558
32559 /* Implement targetm.vectorize.builtin_vec_perm. */
32560
32561 static tree
32562 ix86_vectorize_builtin_vec_perm (tree vec_type, tree *mask_type)
32563 {
32564 tree itype = TREE_TYPE (vec_type);
32565 bool u = TYPE_UNSIGNED (itype);
32566 enum machine_mode vmode = TYPE_MODE (vec_type);
32567 enum ix86_builtins fcode;
32568 bool ok = TARGET_SSE2;
32569
32570 switch (vmode)
32571 {
32572 case V4DFmode:
32573 ok = TARGET_AVX;
32574 fcode = IX86_BUILTIN_VEC_PERM_V4DF;
32575 goto get_di;
32576 case V2DFmode:
32577 fcode = IX86_BUILTIN_VEC_PERM_V2DF;
32578 get_di:
32579 itype = ix86_get_builtin_type (IX86_BT_DI);
32580 break;
32581
32582 case V8SFmode:
32583 ok = TARGET_AVX;
32584 fcode = IX86_BUILTIN_VEC_PERM_V8SF;
32585 goto get_si;
32586 case V4SFmode:
32587 ok = TARGET_SSE;
32588 fcode = IX86_BUILTIN_VEC_PERM_V4SF;
32589 get_si:
32590 itype = ix86_get_builtin_type (IX86_BT_SI);
32591 break;
32592
32593 case V2DImode:
32594 fcode = u ? IX86_BUILTIN_VEC_PERM_V2DI_U : IX86_BUILTIN_VEC_PERM_V2DI;
32595 break;
32596 case V4SImode:
32597 fcode = u ? IX86_BUILTIN_VEC_PERM_V4SI_U : IX86_BUILTIN_VEC_PERM_V4SI;
32598 break;
32599 case V8HImode:
32600 fcode = u ? IX86_BUILTIN_VEC_PERM_V8HI_U : IX86_BUILTIN_VEC_PERM_V8HI;
32601 break;
32602 case V16QImode:
32603 fcode = u ? IX86_BUILTIN_VEC_PERM_V16QI_U : IX86_BUILTIN_VEC_PERM_V16QI;
32604 break;
32605 default:
32606 ok = false;
32607 break;
32608 }
32609
32610 if (!ok)
32611 return NULL_TREE;
32612
32613 *mask_type = itype;
32614 return ix86_builtins[(int) fcode];
32615 }
32616
32617 /* Return a vector mode with twice as many elements as VMODE. */
32618 /* ??? Consider moving this to a table generated by genmodes.c. */
32619
32620 static enum machine_mode
32621 doublesize_vector_mode (enum machine_mode vmode)
32622 {
32623 switch (vmode)
32624 {
32625 case V2SFmode: return V4SFmode;
32626 case V1DImode: return V2DImode;
32627 case V2SImode: return V4SImode;
32628 case V4HImode: return V8HImode;
32629 case V8QImode: return V16QImode;
32630
32631 case V2DFmode: return V4DFmode;
32632 case V4SFmode: return V8SFmode;
32633 case V2DImode: return V4DImode;
32634 case V4SImode: return V8SImode;
32635 case V8HImode: return V16HImode;
32636 case V16QImode: return V32QImode;
32637
32638 case V4DFmode: return V8DFmode;
32639 case V8SFmode: return V16SFmode;
32640 case V4DImode: return V8DImode;
32641 case V8SImode: return V16SImode;
32642 case V16HImode: return V32HImode;
32643 case V32QImode: return V64QImode;
32644
32645 default:
32646 gcc_unreachable ();
32647 }
32648 }
32649
32650 /* Construct (set target (vec_select op0 (parallel perm))) and
32651 return true if that's a valid instruction in the active ISA. */
32652
32653 static bool
32654 expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
32655 {
32656 rtx rperm[MAX_VECT_LEN], x;
32657 unsigned i;
32658
32659 for (i = 0; i < nelt; ++i)
32660 rperm[i] = GEN_INT (perm[i]);
32661
32662 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
32663 x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
32664 x = gen_rtx_SET (VOIDmode, target, x);
32665
32666 x = emit_insn (x);
32667 if (recog_memoized (x) < 0)
32668 {
32669 remove_insn (x);
32670 return false;
32671 }
32672 return true;
32673 }
32674
32675 /* Similar, but generate a vec_concat from op0 and op1 as well. */
32676
32677 static bool
32678 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
32679 const unsigned char *perm, unsigned nelt)
32680 {
32681 enum machine_mode v2mode;
32682 rtx x;
32683
32684 v2mode = doublesize_vector_mode (GET_MODE (op0));
32685 x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
32686 return expand_vselect (target, x, perm, nelt);
32687 }
32688
32689 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
32690 in terms of blendp[sd] / pblendw / pblendvb. */
32691
32692 static bool
32693 expand_vec_perm_blend (struct expand_vec_perm_d *d)
32694 {
32695 enum machine_mode vmode = d->vmode;
32696 unsigned i, mask, nelt = d->nelt;
32697 rtx target, op0, op1, x;
32698
32699 if (!TARGET_SSE4_1 || d->op0 == d->op1)
32700 return false;
32701 if (!(GET_MODE_SIZE (vmode) == 16 || vmode == V4DFmode || vmode == V8SFmode))
32702 return false;
32703
32704 /* This is a blend, not a permute. Elements must stay in their
32705 respective lanes. */
32706 for (i = 0; i < nelt; ++i)
32707 {
32708 unsigned e = d->perm[i];
32709 if (!(e == i || e == i + nelt))
32710 return false;
32711 }
32712
32713 if (d->testing_p)
32714 return true;
32715
32716 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
32717 decision should be extracted elsewhere, so that we only try that
32718 sequence once all budget==3 options have been tried. */
32719
32720 /* For bytes, see if bytes move in pairs so we can use pblendw with
32721 an immediate argument, rather than pblendvb with a vector argument. */
32722 if (vmode == V16QImode)
32723 {
32724 bool pblendw_ok = true;
32725 for (i = 0; i < 16 && pblendw_ok; i += 2)
32726 pblendw_ok = (d->perm[i] + 1 == d->perm[i + 1]);
32727
32728 if (!pblendw_ok)
32729 {
32730 rtx rperm[16], vperm;
32731
32732 for (i = 0; i < nelt; ++i)
32733 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
32734
32735 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
32736 vperm = force_reg (V16QImode, vperm);
32737
32738 emit_insn (gen_sse4_1_pblendvb (d->target, d->op0, d->op1, vperm));
32739 return true;
32740 }
32741 }
32742
32743 target = d->target;
32744 op0 = d->op0;
32745 op1 = d->op1;
32746 mask = 0;
32747
32748 switch (vmode)
32749 {
32750 case V4DFmode:
32751 case V8SFmode:
32752 case V2DFmode:
32753 case V4SFmode:
32754 case V8HImode:
32755 for (i = 0; i < nelt; ++i)
32756 mask |= (d->perm[i] >= nelt) << i;
32757 break;
32758
32759 case V2DImode:
32760 for (i = 0; i < 2; ++i)
32761 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
32762 goto do_subreg;
32763
32764 case V4SImode:
32765 for (i = 0; i < 4; ++i)
32766 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
32767 goto do_subreg;
32768
32769 case V16QImode:
32770 for (i = 0; i < 8; ++i)
32771 mask |= (d->perm[i * 2] >= 16) << i;
32772
32773 do_subreg:
32774 vmode = V8HImode;
32775 target = gen_lowpart (vmode, target);
32776 op0 = gen_lowpart (vmode, op0);
32777 op1 = gen_lowpart (vmode, op1);
32778 break;
32779
32780 default:
32781 gcc_unreachable ();
32782 }
32783
32784 /* This matches five different patterns with the different modes. */
32785 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
32786 x = gen_rtx_SET (VOIDmode, target, x);
32787 emit_insn (x);
32788
32789 return true;
32790 }
32791
32792 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
32793 in terms of the variable form of vpermilps.
32794
32795 Note that we will have already failed the immediate input vpermilps,
32796 which requires that the high and low part shuffle be identical; the
32797 variable form doesn't require that. */
32798
32799 static bool
32800 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
32801 {
32802 rtx rperm[8], vperm;
32803 unsigned i;
32804
32805 if (!TARGET_AVX || d->vmode != V8SFmode || d->op0 != d->op1)
32806 return false;
32807
32808 /* We can only permute within the 128-bit lane. */
32809 for (i = 0; i < 8; ++i)
32810 {
32811 unsigned e = d->perm[i];
32812 if (i < 4 ? e >= 4 : e < 4)
32813 return false;
32814 }
32815
32816 if (d->testing_p)
32817 return true;
32818
32819 for (i = 0; i < 8; ++i)
32820 {
32821 unsigned e = d->perm[i];
32822
32823 /* Within each 128-bit lane, the elements of op0 are numbered
32824 from 0 and the elements of op1 are numbered from 4. */
32825 if (e >= 8 + 4)
32826 e -= 8;
32827 else if (e >= 4)
32828 e -= 4;
32829
32830 rperm[i] = GEN_INT (e);
32831 }
32832
32833 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
32834 vperm = force_reg (V8SImode, vperm);
32835 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
32836
32837 return true;
32838 }
32839
32840 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
32841 in terms of pshufb or vpperm. */
32842
32843 static bool
32844 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
32845 {
32846 unsigned i, nelt, eltsz;
32847 rtx rperm[16], vperm, target, op0, op1;
32848
32849 if (!(d->op0 == d->op1 ? TARGET_SSSE3 : TARGET_XOP))
32850 return false;
32851 if (GET_MODE_SIZE (d->vmode) != 16)
32852 return false;
32853
32854 if (d->testing_p)
32855 return true;
32856
32857 nelt = d->nelt;
32858 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
32859
32860 for (i = 0; i < nelt; ++i)
32861 {
32862 unsigned j, e = d->perm[i];
32863 for (j = 0; j < eltsz; ++j)
32864 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
32865 }
32866
32867 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
32868 vperm = force_reg (V16QImode, vperm);
32869
32870 target = gen_lowpart (V16QImode, d->target);
32871 op0 = gen_lowpart (V16QImode, d->op0);
32872 if (d->op0 == d->op1)
32873 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
32874 else
32875 {
32876 op1 = gen_lowpart (V16QImode, d->op1);
32877 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
32878 }
32879
32880 return true;
32881 }
32882
32883 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
32884 in a single instruction. */
32885
32886 static bool
32887 expand_vec_perm_1 (struct expand_vec_perm_d *d)
32888 {
32889 unsigned i, nelt = d->nelt;
32890 unsigned char perm2[MAX_VECT_LEN];
32891
32892 /* Check plain VEC_SELECT first, because AVX has instructions that could
32893 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
32894 input where SEL+CONCAT may not. */
32895 if (d->op0 == d->op1)
32896 {
32897 int mask = nelt - 1;
32898
32899 for (i = 0; i < nelt; i++)
32900 perm2[i] = d->perm[i] & mask;
32901
32902 if (expand_vselect (d->target, d->op0, perm2, nelt))
32903 return true;
32904
32905 /* There are plenty of patterns in sse.md that are written for
32906 SEL+CONCAT and are not replicated for a single op. Perhaps
32907 that should be changed, to avoid the nastiness here. */
32908
32909 /* Recognize interleave style patterns, which means incrementing
32910 every other permutation operand. */
32911 for (i = 0; i < nelt; i += 2)
32912 {
32913 perm2[i] = d->perm[i] & mask;
32914 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
32915 }
32916 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
32917 return true;
32918
32919 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
32920 if (nelt >= 4)
32921 {
32922 for (i = 0; i < nelt; i += 4)
32923 {
32924 perm2[i + 0] = d->perm[i + 0] & mask;
32925 perm2[i + 1] = d->perm[i + 1] & mask;
32926 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
32927 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
32928 }
32929
32930 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
32931 return true;
32932 }
32933 }
32934
32935 /* Finally, try the fully general two operand permute. */
32936 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt))
32937 return true;
32938
32939 /* Recognize interleave style patterns with reversed operands. */
32940 if (d->op0 != d->op1)
32941 {
32942 for (i = 0; i < nelt; ++i)
32943 {
32944 unsigned e = d->perm[i];
32945 if (e >= nelt)
32946 e -= nelt;
32947 else
32948 e += nelt;
32949 perm2[i] = e;
32950 }
32951
32952 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
32953 return true;
32954 }
32955
32956 /* Try the SSE4.1 blend variable merge instructions. */
32957 if (expand_vec_perm_blend (d))
32958 return true;
32959
32960 /* Try one of the AVX vpermil variable permutations. */
32961 if (expand_vec_perm_vpermil (d))
32962 return true;
32963
32964 /* Try the SSSE3 pshufb or XOP vpperm variable permutation. */
32965 if (expand_vec_perm_pshufb (d))
32966 return true;
32967
32968 return false;
32969 }
32970
32971 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
32972 in terms of a pair of pshuflw + pshufhw instructions. */
32973
32974 static bool
32975 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
32976 {
32977 unsigned char perm2[MAX_VECT_LEN];
32978 unsigned i;
32979 bool ok;
32980
32981 if (d->vmode != V8HImode || d->op0 != d->op1)
32982 return false;
32983
32984 /* The two permutations only operate in 64-bit lanes. */
32985 for (i = 0; i < 4; ++i)
32986 if (d->perm[i] >= 4)
32987 return false;
32988 for (i = 4; i < 8; ++i)
32989 if (d->perm[i] < 4)
32990 return false;
32991
32992 if (d->testing_p)
32993 return true;
32994
32995 /* Emit the pshuflw. */
32996 memcpy (perm2, d->perm, 4);
32997 for (i = 4; i < 8; ++i)
32998 perm2[i] = i;
32999 ok = expand_vselect (d->target, d->op0, perm2, 8);
33000 gcc_assert (ok);
33001
33002 /* Emit the pshufhw. */
33003 memcpy (perm2 + 4, d->perm + 4, 4);
33004 for (i = 0; i < 4; ++i)
33005 perm2[i] = i;
33006 ok = expand_vselect (d->target, d->target, perm2, 8);
33007 gcc_assert (ok);
33008
33009 return true;
33010 }
33011
33012 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
33013 the permutation using the SSSE3 palignr instruction. This succeeds
33014 when all of the elements in PERM fit within one vector and we merely
33015 need to shift them down so that a single vector permutation has a
33016 chance to succeed. */
33017
33018 static bool
33019 expand_vec_perm_palignr (struct expand_vec_perm_d *d)
33020 {
33021 unsigned i, nelt = d->nelt;
33022 unsigned min, max;
33023 bool in_order, ok;
33024 rtx shift;
33025
33026 /* Even with AVX, palignr only operates on 128-bit vectors. */
33027 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
33028 return false;
33029
33030 min = nelt, max = 0;
33031 for (i = 0; i < nelt; ++i)
33032 {
33033 unsigned e = d->perm[i];
33034 if (e < min)
33035 min = e;
33036 if (e > max)
33037 max = e;
33038 }
33039 if (min == 0 || max - min >= nelt)
33040 return false;
33041
33042 /* Given that we have SSSE3, we know we'll be able to implement the
33043 single operand permutation after the palignr with pshufb. */
33044 if (d->testing_p)
33045 return true;
33046
33047 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
33048 emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode, d->target),
33049 gen_lowpart (TImode, d->op1),
33050 gen_lowpart (TImode, d->op0), shift));
33051
33052 d->op0 = d->op1 = d->target;
33053
33054 in_order = true;
33055 for (i = 0; i < nelt; ++i)
33056 {
33057 unsigned e = d->perm[i] - min;
33058 if (e != i)
33059 in_order = false;
33060 d->perm[i] = e;
33061 }
33062
33063 /* Test for the degenerate case where the alignment by itself
33064 produces the desired permutation. */
33065 if (in_order)
33066 return true;
33067
33068 ok = expand_vec_perm_1 (d);
33069 gcc_assert (ok);
33070
33071 return ok;
33072 }
33073
33074 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
33075 a two vector permutation into a single vector permutation by using
33076 an interleave operation to merge the vectors. */
33077
33078 static bool
33079 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
33080 {
33081 struct expand_vec_perm_d dremap, dfinal;
33082 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
33083 unsigned contents, h1, h2, h3, h4;
33084 unsigned char remap[2 * MAX_VECT_LEN];
33085 rtx seq;
33086 bool ok;
33087
33088 if (d->op0 == d->op1)
33089 return false;
33090
33091 /* The 256-bit unpck[lh]p[sd] instructions only operate within the 128-bit
33092 lanes. We can use similar techniques with the vperm2f128 instruction,
33093 but it requires slightly different logic. */
33094 if (GET_MODE_SIZE (d->vmode) != 16)
33095 return false;
33096
33097 /* Examine from whence the elements come. */
33098 contents = 0;
33099 for (i = 0; i < nelt; ++i)
33100 contents |= 1u << d->perm[i];
33101
33102 /* Split the two input vectors into 4 halves. */
33103 h1 = (1u << nelt2) - 1;
33104 h2 = h1 << nelt2;
33105 h3 = h2 << nelt2;
33106 h4 = h3 << nelt2;
33107
33108 memset (remap, 0xff, sizeof (remap));
33109 dremap = *d;
33110
33111 /* If the elements from the low halves use interleave low, and similarly
33112 for interleave high. If the elements are from mis-matched halves, we
33113 can use shufps for V4SF/V4SI or do a DImode shuffle. */
33114 if ((contents & (h1 | h3)) == contents)
33115 {
33116 for (i = 0; i < nelt2; ++i)
33117 {
33118 remap[i] = i * 2;
33119 remap[i + nelt] = i * 2 + 1;
33120 dremap.perm[i * 2] = i;
33121 dremap.perm[i * 2 + 1] = i + nelt;
33122 }
33123 }
33124 else if ((contents & (h2 | h4)) == contents)
33125 {
33126 for (i = 0; i < nelt2; ++i)
33127 {
33128 remap[i + nelt2] = i * 2;
33129 remap[i + nelt + nelt2] = i * 2 + 1;
33130 dremap.perm[i * 2] = i + nelt2;
33131 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
33132 }
33133 }
33134 else if ((contents & (h1 | h4)) == contents)
33135 {
33136 for (i = 0; i < nelt2; ++i)
33137 {
33138 remap[i] = i;
33139 remap[i + nelt + nelt2] = i + nelt2;
33140 dremap.perm[i] = i;
33141 dremap.perm[i + nelt2] = i + nelt + nelt2;
33142 }
33143 if (nelt != 4)
33144 {
33145 dremap.vmode = V2DImode;
33146 dremap.nelt = 2;
33147 dremap.perm[0] = 0;
33148 dremap.perm[1] = 3;
33149 }
33150 }
33151 else if ((contents & (h2 | h3)) == contents)
33152 {
33153 for (i = 0; i < nelt2; ++i)
33154 {
33155 remap[i + nelt2] = i;
33156 remap[i + nelt] = i + nelt2;
33157 dremap.perm[i] = i + nelt2;
33158 dremap.perm[i + nelt2] = i + nelt;
33159 }
33160 if (nelt != 4)
33161 {
33162 dremap.vmode = V2DImode;
33163 dremap.nelt = 2;
33164 dremap.perm[0] = 1;
33165 dremap.perm[1] = 2;
33166 }
33167 }
33168 else
33169 return false;
33170
33171 /* Use the remapping array set up above to move the elements from their
33172 swizzled locations into their final destinations. */
33173 dfinal = *d;
33174 for (i = 0; i < nelt; ++i)
33175 {
33176 unsigned e = remap[d->perm[i]];
33177 gcc_assert (e < nelt);
33178 dfinal.perm[i] = e;
33179 }
33180 dfinal.op0 = gen_reg_rtx (dfinal.vmode);
33181 dfinal.op1 = dfinal.op0;
33182 dremap.target = dfinal.op0;
33183
33184 /* Test if the final remap can be done with a single insn. For V4SFmode or
33185 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
33186 start_sequence ();
33187 ok = expand_vec_perm_1 (&dfinal);
33188 seq = get_insns ();
33189 end_sequence ();
33190
33191 if (!ok)
33192 return false;
33193
33194 if (dremap.vmode != dfinal.vmode)
33195 {
33196 dremap.target = gen_lowpart (dremap.vmode, dremap.target);
33197 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
33198 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
33199 }
33200
33201 ok = expand_vec_perm_1 (&dremap);
33202 gcc_assert (ok);
33203
33204 emit_insn (seq);
33205 return true;
33206 }
33207
33208 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
33209 permutation with two pshufb insns and an ior. We should have already
33210 failed all two instruction sequences. */
33211
33212 static bool
33213 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
33214 {
33215 rtx rperm[2][16], vperm, l, h, op, m128;
33216 unsigned int i, nelt, eltsz;
33217
33218 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
33219 return false;
33220 gcc_assert (d->op0 != d->op1);
33221
33222 nelt = d->nelt;
33223 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
33224
33225 /* Generate two permutation masks. If the required element is within
33226 the given vector it is shuffled into the proper lane. If the required
33227 element is in the other vector, force a zero into the lane by setting
33228 bit 7 in the permutation mask. */
33229 m128 = GEN_INT (-128);
33230 for (i = 0; i < nelt; ++i)
33231 {
33232 unsigned j, e = d->perm[i];
33233 unsigned which = (e >= nelt);
33234 if (e >= nelt)
33235 e -= nelt;
33236
33237 for (j = 0; j < eltsz; ++j)
33238 {
33239 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
33240 rperm[1-which][i*eltsz + j] = m128;
33241 }
33242 }
33243
33244 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
33245 vperm = force_reg (V16QImode, vperm);
33246
33247 l = gen_reg_rtx (V16QImode);
33248 op = gen_lowpart (V16QImode, d->op0);
33249 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
33250
33251 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
33252 vperm = force_reg (V16QImode, vperm);
33253
33254 h = gen_reg_rtx (V16QImode);
33255 op = gen_lowpart (V16QImode, d->op1);
33256 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
33257
33258 op = gen_lowpart (V16QImode, d->target);
33259 emit_insn (gen_iorv16qi3 (op, l, h));
33260
33261 return true;
33262 }
33263
33264 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
33265 and extract-odd permutations. */
33266
33267 static bool
33268 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
33269 {
33270 rtx t1, t2, t3;
33271
33272 switch (d->vmode)
33273 {
33274 case V4DFmode:
33275 t1 = gen_reg_rtx (V4DFmode);
33276 t2 = gen_reg_rtx (V4DFmode);
33277
33278 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
33279 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
33280 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
33281
33282 /* Now an unpck[lh]pd will produce the result required. */
33283 if (odd)
33284 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
33285 else
33286 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
33287 emit_insn (t3);
33288 break;
33289
33290 case V8SFmode:
33291 {
33292 int mask = odd ? 0xdd : 0x88;
33293
33294 t1 = gen_reg_rtx (V8SFmode);
33295 t2 = gen_reg_rtx (V8SFmode);
33296 t3 = gen_reg_rtx (V8SFmode);
33297
33298 /* Shuffle within the 128-bit lanes to produce:
33299 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
33300 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
33301 GEN_INT (mask)));
33302
33303 /* Shuffle the lanes around to produce:
33304 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
33305 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
33306 GEN_INT (0x3)));
33307
33308 /* Shuffle within the 128-bit lanes to produce:
33309 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
33310 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
33311
33312 /* Shuffle within the 128-bit lanes to produce:
33313 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
33314 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
33315
33316 /* Shuffle the lanes around to produce:
33317 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
33318 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
33319 GEN_INT (0x20)));
33320 }
33321 break;
33322
33323 case V2DFmode:
33324 case V4SFmode:
33325 case V2DImode:
33326 case V4SImode:
33327 /* These are always directly implementable by expand_vec_perm_1. */
33328 gcc_unreachable ();
33329
33330 case V8HImode:
33331 if (TARGET_SSSE3)
33332 return expand_vec_perm_pshufb2 (d);
33333 else
33334 {
33335 /* We need 2*log2(N)-1 operations to achieve odd/even
33336 with interleave. */
33337 t1 = gen_reg_rtx (V8HImode);
33338 t2 = gen_reg_rtx (V8HImode);
33339 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
33340 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
33341 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
33342 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
33343 if (odd)
33344 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
33345 else
33346 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
33347 emit_insn (t3);
33348 }
33349 break;
33350
33351 case V16QImode:
33352 if (TARGET_SSSE3)
33353 return expand_vec_perm_pshufb2 (d);
33354 else
33355 {
33356 t1 = gen_reg_rtx (V16QImode);
33357 t2 = gen_reg_rtx (V16QImode);
33358 t3 = gen_reg_rtx (V16QImode);
33359 emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
33360 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
33361 emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
33362 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
33363 emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
33364 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
33365 if (odd)
33366 t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
33367 else
33368 t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
33369 emit_insn (t3);
33370 }
33371 break;
33372
33373 default:
33374 gcc_unreachable ();
33375 }
33376
33377 return true;
33378 }
33379
33380 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
33381 extract-even and extract-odd permutations. */
33382
33383 static bool
33384 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
33385 {
33386 unsigned i, odd, nelt = d->nelt;
33387
33388 odd = d->perm[0];
33389 if (odd != 0 && odd != 1)
33390 return false;
33391
33392 for (i = 1; i < nelt; ++i)
33393 if (d->perm[i] != 2 * i + odd)
33394 return false;
33395
33396 return expand_vec_perm_even_odd_1 (d, odd);
33397 }
33398
33399 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
33400 permutations. We assume that expand_vec_perm_1 has already failed. */
33401
33402 static bool
33403 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
33404 {
33405 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
33406 enum machine_mode vmode = d->vmode;
33407 unsigned char perm2[4];
33408 rtx op0 = d->op0;
33409 bool ok;
33410
33411 switch (vmode)
33412 {
33413 case V4DFmode:
33414 case V8SFmode:
33415 /* These are special-cased in sse.md so that we can optionally
33416 use the vbroadcast instruction. They expand to two insns
33417 if the input happens to be in a register. */
33418 gcc_unreachable ();
33419
33420 case V2DFmode:
33421 case V2DImode:
33422 case V4SFmode:
33423 case V4SImode:
33424 /* These are always implementable using standard shuffle patterns. */
33425 gcc_unreachable ();
33426
33427 case V8HImode:
33428 case V16QImode:
33429 /* These can be implemented via interleave. We save one insn by
33430 stopping once we have promoted to V4SImode and then use pshufd. */
33431 do
33432 {
33433 optab otab = vec_interleave_low_optab;
33434
33435 if (elt >= nelt2)
33436 {
33437 otab = vec_interleave_high_optab;
33438 elt -= nelt2;
33439 }
33440 nelt2 /= 2;
33441
33442 op0 = expand_binop (vmode, otab, op0, op0, NULL, 0, OPTAB_DIRECT);
33443 vmode = get_mode_wider_vector (vmode);
33444 op0 = gen_lowpart (vmode, op0);
33445 }
33446 while (vmode != V4SImode);
33447
33448 memset (perm2, elt, 4);
33449 ok = expand_vselect (gen_lowpart (V4SImode, d->target), op0, perm2, 4);
33450 gcc_assert (ok);
33451 return true;
33452
33453 default:
33454 gcc_unreachable ();
33455 }
33456 }
33457
33458 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
33459 broadcast permutations. */
33460
33461 static bool
33462 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
33463 {
33464 unsigned i, elt, nelt = d->nelt;
33465
33466 if (d->op0 != d->op1)
33467 return false;
33468
33469 elt = d->perm[0];
33470 for (i = 1; i < nelt; ++i)
33471 if (d->perm[i] != elt)
33472 return false;
33473
33474 return expand_vec_perm_broadcast_1 (d);
33475 }
33476
33477 /* The guts of ix86_expand_vec_perm_builtin, also used by the ok hook.
33478 With all of the interface bits taken care of, perform the expansion
33479 in D and return true on success. */
33480
33481 static bool
33482 ix86_expand_vec_perm_builtin_1 (struct expand_vec_perm_d *d)
33483 {
33484 /* Try a single instruction expansion. */
33485 if (expand_vec_perm_1 (d))
33486 return true;
33487
33488 /* Try sequences of two instructions. */
33489
33490 if (expand_vec_perm_pshuflw_pshufhw (d))
33491 return true;
33492
33493 if (expand_vec_perm_palignr (d))
33494 return true;
33495
33496 if (expand_vec_perm_interleave2 (d))
33497 return true;
33498
33499 if (expand_vec_perm_broadcast (d))
33500 return true;
33501
33502 /* Try sequences of three instructions. */
33503
33504 if (expand_vec_perm_pshufb2 (d))
33505 return true;
33506
33507 /* ??? Look for narrow permutations whose element orderings would
33508 allow the promotion to a wider mode. */
33509
33510 /* ??? Look for sequences of interleave or a wider permute that place
33511 the data into the correct lanes for a half-vector shuffle like
33512 pshuf[lh]w or vpermilps. */
33513
33514 /* ??? Look for sequences of interleave that produce the desired results.
33515 The combinatorics of punpck[lh] get pretty ugly... */
33516
33517 if (expand_vec_perm_even_odd (d))
33518 return true;
33519
33520 return false;
33521 }
33522
33523 /* Extract the values from the vector CST into the permutation array in D.
33524 Return 0 on error, 1 if all values from the permutation come from the
33525 first vector, 2 if all values from the second vector, and 3 otherwise. */
33526
33527 static int
33528 extract_vec_perm_cst (struct expand_vec_perm_d *d, tree cst)
33529 {
33530 tree list = TREE_VECTOR_CST_ELTS (cst);
33531 unsigned i, nelt = d->nelt;
33532 int ret = 0;
33533
33534 for (i = 0; i < nelt; ++i, list = TREE_CHAIN (list))
33535 {
33536 unsigned HOST_WIDE_INT e;
33537
33538 if (!host_integerp (TREE_VALUE (list), 1))
33539 return 0;
33540 e = tree_low_cst (TREE_VALUE (list), 1);
33541 if (e >= 2 * nelt)
33542 return 0;
33543
33544 ret |= (e < nelt ? 1 : 2);
33545 d->perm[i] = e;
33546 }
33547 gcc_assert (list == NULL);
33548
33549 /* For all elements from second vector, fold the elements to first. */
33550 if (ret == 2)
33551 for (i = 0; i < nelt; ++i)
33552 d->perm[i] -= nelt;
33553
33554 return ret;
33555 }
33556
33557 static rtx
33558 ix86_expand_vec_perm_builtin (tree exp)
33559 {
33560 struct expand_vec_perm_d d;
33561 tree arg0, arg1, arg2;
33562
33563 arg0 = CALL_EXPR_ARG (exp, 0);
33564 arg1 = CALL_EXPR_ARG (exp, 1);
33565 arg2 = CALL_EXPR_ARG (exp, 2);
33566
33567 d.vmode = TYPE_MODE (TREE_TYPE (arg0));
33568 d.nelt = GET_MODE_NUNITS (d.vmode);
33569 d.testing_p = false;
33570 gcc_assert (VECTOR_MODE_P (d.vmode));
33571
33572 if (TREE_CODE (arg2) != VECTOR_CST)
33573 {
33574 error_at (EXPR_LOCATION (exp),
33575 "vector permutation requires vector constant");
33576 goto exit_error;
33577 }
33578
33579 switch (extract_vec_perm_cst (&d, arg2))
33580 {
33581 default:
33582 gcc_unreachable();
33583
33584 case 0:
33585 error_at (EXPR_LOCATION (exp), "invalid vector permutation constant");
33586 goto exit_error;
33587
33588 case 3:
33589 if (!operand_equal_p (arg0, arg1, 0))
33590 {
33591 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
33592 d.op0 = force_reg (d.vmode, d.op0);
33593 d.op1 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
33594 d.op1 = force_reg (d.vmode, d.op1);
33595 break;
33596 }
33597
33598 /* The elements of PERM do not suggest that only the first operand
33599 is used, but both operands are identical. Allow easier matching
33600 of the permutation by folding the permutation into the single
33601 input vector. */
33602 {
33603 unsigned i, nelt = d.nelt;
33604 for (i = 0; i < nelt; ++i)
33605 if (d.perm[i] >= nelt)
33606 d.perm[i] -= nelt;
33607 }
33608 /* FALLTHRU */
33609
33610 case 1:
33611 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
33612 d.op0 = force_reg (d.vmode, d.op0);
33613 d.op1 = d.op0;
33614 break;
33615
33616 case 2:
33617 d.op0 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
33618 d.op0 = force_reg (d.vmode, d.op0);
33619 d.op1 = d.op0;
33620 break;
33621 }
33622
33623 d.target = gen_reg_rtx (d.vmode);
33624 if (ix86_expand_vec_perm_builtin_1 (&d))
33625 return d.target;
33626
33627 /* For compiler generated permutations, we should never got here, because
33628 the compiler should also be checking the ok hook. But since this is a
33629 builtin the user has access too, so don't abort. */
33630 switch (d.nelt)
33631 {
33632 case 2:
33633 sorry ("vector permutation (%d %d)", d.perm[0], d.perm[1]);
33634 break;
33635 case 4:
33636 sorry ("vector permutation (%d %d %d %d)",
33637 d.perm[0], d.perm[1], d.perm[2], d.perm[3]);
33638 break;
33639 case 8:
33640 sorry ("vector permutation (%d %d %d %d %d %d %d %d)",
33641 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
33642 d.perm[4], d.perm[5], d.perm[6], d.perm[7]);
33643 break;
33644 case 16:
33645 sorry ("vector permutation "
33646 "(%d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d)",
33647 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
33648 d.perm[4], d.perm[5], d.perm[6], d.perm[7],
33649 d.perm[8], d.perm[9], d.perm[10], d.perm[11],
33650 d.perm[12], d.perm[13], d.perm[14], d.perm[15]);
33651 break;
33652 default:
33653 gcc_unreachable ();
33654 }
33655 exit_error:
33656 return CONST0_RTX (d.vmode);
33657 }
33658
33659 /* Implement targetm.vectorize.builtin_vec_perm_ok. */
33660
33661 static bool
33662 ix86_vectorize_builtin_vec_perm_ok (tree vec_type, tree mask)
33663 {
33664 struct expand_vec_perm_d d;
33665 int vec_mask;
33666 bool ret, one_vec;
33667
33668 d.vmode = TYPE_MODE (vec_type);
33669 d.nelt = GET_MODE_NUNITS (d.vmode);
33670 d.testing_p = true;
33671
33672 /* Given sufficient ISA support we can just return true here
33673 for selected vector modes. */
33674 if (GET_MODE_SIZE (d.vmode) == 16)
33675 {
33676 /* All implementable with a single vpperm insn. */
33677 if (TARGET_XOP)
33678 return true;
33679 /* All implementable with 2 pshufb + 1 ior. */
33680 if (TARGET_SSSE3)
33681 return true;
33682 /* All implementable with shufpd or unpck[lh]pd. */
33683 if (d.nelt == 2)
33684 return true;
33685 }
33686
33687 vec_mask = extract_vec_perm_cst (&d, mask);
33688
33689 /* This hook is cannot be called in response to something that the
33690 user does (unlike the builtin expander) so we shouldn't ever see
33691 an error generated from the extract. */
33692 gcc_assert (vec_mask > 0 && vec_mask <= 3);
33693 one_vec = (vec_mask != 3);
33694
33695 /* Implementable with shufps or pshufd. */
33696 if (one_vec && (d.vmode == V4SFmode || d.vmode == V4SImode))
33697 return true;
33698
33699 /* Otherwise we have to go through the motions and see if we can
33700 figure out how to generate the requested permutation. */
33701 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
33702 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
33703 if (!one_vec)
33704 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
33705
33706 start_sequence ();
33707 ret = ix86_expand_vec_perm_builtin_1 (&d);
33708 end_sequence ();
33709
33710 return ret;
33711 }
33712
33713 void
33714 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
33715 {
33716 struct expand_vec_perm_d d;
33717 unsigned i, nelt;
33718
33719 d.target = targ;
33720 d.op0 = op0;
33721 d.op1 = op1;
33722 d.vmode = GET_MODE (targ);
33723 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
33724 d.testing_p = false;
33725
33726 for (i = 0; i < nelt; ++i)
33727 d.perm[i] = i * 2 + odd;
33728
33729 /* We'll either be able to implement the permutation directly... */
33730 if (expand_vec_perm_1 (&d))
33731 return;
33732
33733 /* ... or we use the special-case patterns. */
33734 expand_vec_perm_even_odd_1 (&d, odd);
33735 }
33736
33737 /* Expand an insert into a vector register through pinsr insn.
33738 Return true if successful. */
33739
33740 bool
33741 ix86_expand_pinsr (rtx *operands)
33742 {
33743 rtx dst = operands[0];
33744 rtx src = operands[3];
33745
33746 unsigned int size = INTVAL (operands[1]);
33747 unsigned int pos = INTVAL (operands[2]);
33748
33749 if (GET_CODE (dst) == SUBREG)
33750 {
33751 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
33752 dst = SUBREG_REG (dst);
33753 }
33754
33755 if (GET_CODE (src) == SUBREG)
33756 src = SUBREG_REG (src);
33757
33758 switch (GET_MODE (dst))
33759 {
33760 case V16QImode:
33761 case V8HImode:
33762 case V4SImode:
33763 case V2DImode:
33764 {
33765 enum machine_mode srcmode, dstmode;
33766 rtx (*pinsr)(rtx, rtx, rtx, rtx);
33767
33768 srcmode = mode_for_size (size, MODE_INT, 0);
33769
33770 switch (srcmode)
33771 {
33772 case QImode:
33773 if (!TARGET_SSE4_1)
33774 return false;
33775 dstmode = V16QImode;
33776 pinsr = gen_sse4_1_pinsrb;
33777 break;
33778
33779 case HImode:
33780 if (!TARGET_SSE2)
33781 return false;
33782 dstmode = V8HImode;
33783 pinsr = gen_sse2_pinsrw;
33784 break;
33785
33786 case SImode:
33787 if (!TARGET_SSE4_1)
33788 return false;
33789 dstmode = V4SImode;
33790 pinsr = gen_sse4_1_pinsrd;
33791 break;
33792
33793 case DImode:
33794 gcc_assert (TARGET_64BIT);
33795 if (!TARGET_SSE4_1)
33796 return false;
33797 dstmode = V2DImode;
33798 pinsr = gen_sse4_1_pinsrq;
33799 break;
33800
33801 default:
33802 return false;
33803 }
33804
33805 dst = gen_lowpart (dstmode, dst);
33806 src = gen_lowpart (srcmode, src);
33807
33808 pos /= size;
33809
33810 emit_insn (pinsr (dst, dst, src, GEN_INT (1 << pos)));
33811 return true;
33812 }
33813
33814 default:
33815 return false;
33816 }
33817 }
33818 \f
33819 /* This function returns the calling abi specific va_list type node.
33820 It returns the FNDECL specific va_list type. */
33821
33822 static tree
33823 ix86_fn_abi_va_list (tree fndecl)
33824 {
33825 if (!TARGET_64BIT)
33826 return va_list_type_node;
33827 gcc_assert (fndecl != NULL_TREE);
33828
33829 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
33830 return ms_va_list_type_node;
33831 else
33832 return sysv_va_list_type_node;
33833 }
33834
33835 /* Returns the canonical va_list type specified by TYPE. If there
33836 is no valid TYPE provided, it return NULL_TREE. */
33837
33838 static tree
33839 ix86_canonical_va_list_type (tree type)
33840 {
33841 tree wtype, htype;
33842
33843 /* Resolve references and pointers to va_list type. */
33844 if (TREE_CODE (type) == MEM_REF)
33845 type = TREE_TYPE (type);
33846 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
33847 type = TREE_TYPE (type);
33848 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
33849 type = TREE_TYPE (type);
33850
33851 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
33852 {
33853 wtype = va_list_type_node;
33854 gcc_assert (wtype != NULL_TREE);
33855 htype = type;
33856 if (TREE_CODE (wtype) == ARRAY_TYPE)
33857 {
33858 /* If va_list is an array type, the argument may have decayed
33859 to a pointer type, e.g. by being passed to another function.
33860 In that case, unwrap both types so that we can compare the
33861 underlying records. */
33862 if (TREE_CODE (htype) == ARRAY_TYPE
33863 || POINTER_TYPE_P (htype))
33864 {
33865 wtype = TREE_TYPE (wtype);
33866 htype = TREE_TYPE (htype);
33867 }
33868 }
33869 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
33870 return va_list_type_node;
33871 wtype = sysv_va_list_type_node;
33872 gcc_assert (wtype != NULL_TREE);
33873 htype = type;
33874 if (TREE_CODE (wtype) == ARRAY_TYPE)
33875 {
33876 /* If va_list is an array type, the argument may have decayed
33877 to a pointer type, e.g. by being passed to another function.
33878 In that case, unwrap both types so that we can compare the
33879 underlying records. */
33880 if (TREE_CODE (htype) == ARRAY_TYPE
33881 || POINTER_TYPE_P (htype))
33882 {
33883 wtype = TREE_TYPE (wtype);
33884 htype = TREE_TYPE (htype);
33885 }
33886 }
33887 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
33888 return sysv_va_list_type_node;
33889 wtype = ms_va_list_type_node;
33890 gcc_assert (wtype != NULL_TREE);
33891 htype = type;
33892 if (TREE_CODE (wtype) == ARRAY_TYPE)
33893 {
33894 /* If va_list is an array type, the argument may have decayed
33895 to a pointer type, e.g. by being passed to another function.
33896 In that case, unwrap both types so that we can compare the
33897 underlying records. */
33898 if (TREE_CODE (htype) == ARRAY_TYPE
33899 || POINTER_TYPE_P (htype))
33900 {
33901 wtype = TREE_TYPE (wtype);
33902 htype = TREE_TYPE (htype);
33903 }
33904 }
33905 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
33906 return ms_va_list_type_node;
33907 return NULL_TREE;
33908 }
33909 return std_canonical_va_list_type (type);
33910 }
33911
33912 /* Iterate through the target-specific builtin types for va_list.
33913 IDX denotes the iterator, *PTREE is set to the result type of
33914 the va_list builtin, and *PNAME to its internal type.
33915 Returns zero if there is no element for this index, otherwise
33916 IDX should be increased upon the next call.
33917 Note, do not iterate a base builtin's name like __builtin_va_list.
33918 Used from c_common_nodes_and_builtins. */
33919
33920 static int
33921 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
33922 {
33923 if (TARGET_64BIT)
33924 {
33925 switch (idx)
33926 {
33927 default:
33928 break;
33929
33930 case 0:
33931 *ptree = ms_va_list_type_node;
33932 *pname = "__builtin_ms_va_list";
33933 return 1;
33934
33935 case 1:
33936 *ptree = sysv_va_list_type_node;
33937 *pname = "__builtin_sysv_va_list";
33938 return 1;
33939 }
33940 }
33941
33942 return 0;
33943 }
33944
33945 #undef TARGET_SCHED_DISPATCH
33946 #define TARGET_SCHED_DISPATCH has_dispatch
33947 #undef TARGET_SCHED_DISPATCH_DO
33948 #define TARGET_SCHED_DISPATCH_DO do_dispatch
33949
33950 /* The size of the dispatch window is the total number of bytes of
33951 object code allowed in a window. */
33952 #define DISPATCH_WINDOW_SIZE 16
33953
33954 /* Number of dispatch windows considered for scheduling. */
33955 #define MAX_DISPATCH_WINDOWS 3
33956
33957 /* Maximum number of instructions in a window. */
33958 #define MAX_INSN 4
33959
33960 /* Maximum number of immediate operands in a window. */
33961 #define MAX_IMM 4
33962
33963 /* Maximum number of immediate bits allowed in a window. */
33964 #define MAX_IMM_SIZE 128
33965
33966 /* Maximum number of 32 bit immediates allowed in a window. */
33967 #define MAX_IMM_32 4
33968
33969 /* Maximum number of 64 bit immediates allowed in a window. */
33970 #define MAX_IMM_64 2
33971
33972 /* Maximum total of loads or prefetches allowed in a window. */
33973 #define MAX_LOAD 2
33974
33975 /* Maximum total of stores allowed in a window. */
33976 #define MAX_STORE 1
33977
33978 #undef BIG
33979 #define BIG 100
33980
33981
33982 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
33983 enum dispatch_group {
33984 disp_no_group = 0,
33985 disp_load,
33986 disp_store,
33987 disp_load_store,
33988 disp_prefetch,
33989 disp_imm,
33990 disp_imm_32,
33991 disp_imm_64,
33992 disp_branch,
33993 disp_cmp,
33994 disp_jcc,
33995 disp_last
33996 };
33997
33998 /* Number of allowable groups in a dispatch window. It is an array
33999 indexed by dispatch_group enum. 100 is used as a big number,
34000 because the number of these kind of operations does not have any
34001 effect in dispatch window, but we need them for other reasons in
34002 the table. */
34003 static unsigned int num_allowable_groups[disp_last] = {
34004 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
34005 };
34006
34007 char group_name[disp_last + 1][16] = {
34008 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
34009 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
34010 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
34011 };
34012
34013 /* Instruction path. */
34014 enum insn_path {
34015 no_path = 0,
34016 path_single, /* Single micro op. */
34017 path_double, /* Double micro op. */
34018 path_multi, /* Instructions with more than 2 micro op.. */
34019 last_path
34020 };
34021
34022 /* sched_insn_info defines a window to the instructions scheduled in
34023 the basic block. It contains a pointer to the insn_info table and
34024 the instruction scheduled.
34025
34026 Windows are allocated for each basic block and are linked
34027 together. */
34028 typedef struct sched_insn_info_s {
34029 rtx insn;
34030 enum dispatch_group group;
34031 enum insn_path path;
34032 int byte_len;
34033 int imm_bytes;
34034 } sched_insn_info;
34035
34036 /* Linked list of dispatch windows. This is a two way list of
34037 dispatch windows of a basic block. It contains information about
34038 the number of uops in the window and the total number of
34039 instructions and of bytes in the object code for this dispatch
34040 window. */
34041 typedef struct dispatch_windows_s {
34042 int num_insn; /* Number of insn in the window. */
34043 int num_uops; /* Number of uops in the window. */
34044 int window_size; /* Number of bytes in the window. */
34045 int window_num; /* Window number between 0 or 1. */
34046 int num_imm; /* Number of immediates in an insn. */
34047 int num_imm_32; /* Number of 32 bit immediates in an insn. */
34048 int num_imm_64; /* Number of 64 bit immediates in an insn. */
34049 int imm_size; /* Total immediates in the window. */
34050 int num_loads; /* Total memory loads in the window. */
34051 int num_stores; /* Total memory stores in the window. */
34052 int violation; /* Violation exists in window. */
34053 sched_insn_info *window; /* Pointer to the window. */
34054 struct dispatch_windows_s *next;
34055 struct dispatch_windows_s *prev;
34056 } dispatch_windows;
34057
34058 /* Immediate valuse used in an insn. */
34059 typedef struct imm_info_s
34060 {
34061 int imm;
34062 int imm32;
34063 int imm64;
34064 } imm_info;
34065
34066 static dispatch_windows *dispatch_window_list;
34067 static dispatch_windows *dispatch_window_list1;
34068
34069 /* Get dispatch group of insn. */
34070
34071 static enum dispatch_group
34072 get_mem_group (rtx insn)
34073 {
34074 enum attr_memory memory;
34075
34076 if (INSN_CODE (insn) < 0)
34077 return disp_no_group;
34078 memory = get_attr_memory (insn);
34079 if (memory == MEMORY_STORE)
34080 return disp_store;
34081
34082 if (memory == MEMORY_LOAD)
34083 return disp_load;
34084
34085 if (memory == MEMORY_BOTH)
34086 return disp_load_store;
34087
34088 return disp_no_group;
34089 }
34090
34091 /* Return true if insn is a compare instruction. */
34092
34093 static bool
34094 is_cmp (rtx insn)
34095 {
34096 enum attr_type type;
34097
34098 type = get_attr_type (insn);
34099 return (type == TYPE_TEST
34100 || type == TYPE_ICMP
34101 || type == TYPE_FCMP
34102 || GET_CODE (PATTERN (insn)) == COMPARE);
34103 }
34104
34105 /* Return true if a dispatch violation encountered. */
34106
34107 static bool
34108 dispatch_violation (void)
34109 {
34110 if (dispatch_window_list->next)
34111 return dispatch_window_list->next->violation;
34112 return dispatch_window_list->violation;
34113 }
34114
34115 /* Return true if insn is a branch instruction. */
34116
34117 static bool
34118 is_branch (rtx insn)
34119 {
34120 return (CALL_P (insn) || JUMP_P (insn));
34121 }
34122
34123 /* Return true if insn is a prefetch instruction. */
34124
34125 static bool
34126 is_prefetch (rtx insn)
34127 {
34128 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
34129 }
34130
34131 /* This function initializes a dispatch window and the list container holding a
34132 pointer to the window. */
34133
34134 static void
34135 init_window (int window_num)
34136 {
34137 int i;
34138 dispatch_windows *new_list;
34139
34140 if (window_num == 0)
34141 new_list = dispatch_window_list;
34142 else
34143 new_list = dispatch_window_list1;
34144
34145 new_list->num_insn = 0;
34146 new_list->num_uops = 0;
34147 new_list->window_size = 0;
34148 new_list->next = NULL;
34149 new_list->prev = NULL;
34150 new_list->window_num = window_num;
34151 new_list->num_imm = 0;
34152 new_list->num_imm_32 = 0;
34153 new_list->num_imm_64 = 0;
34154 new_list->imm_size = 0;
34155 new_list->num_loads = 0;
34156 new_list->num_stores = 0;
34157 new_list->violation = false;
34158
34159 for (i = 0; i < MAX_INSN; i++)
34160 {
34161 new_list->window[i].insn = NULL;
34162 new_list->window[i].group = disp_no_group;
34163 new_list->window[i].path = no_path;
34164 new_list->window[i].byte_len = 0;
34165 new_list->window[i].imm_bytes = 0;
34166 }
34167 return;
34168 }
34169
34170 /* This function allocates and initializes a dispatch window and the
34171 list container holding a pointer to the window. */
34172
34173 static dispatch_windows *
34174 allocate_window (void)
34175 {
34176 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
34177 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
34178
34179 return new_list;
34180 }
34181
34182 /* This routine initializes the dispatch scheduling information. It
34183 initiates building dispatch scheduler tables and constructs the
34184 first dispatch window. */
34185
34186 static void
34187 init_dispatch_sched (void)
34188 {
34189 /* Allocate a dispatch list and a window. */
34190 dispatch_window_list = allocate_window ();
34191 dispatch_window_list1 = allocate_window ();
34192 init_window (0);
34193 init_window (1);
34194 }
34195
34196 /* This function returns true if a branch is detected. End of a basic block
34197 does not have to be a branch, but here we assume only branches end a
34198 window. */
34199
34200 static bool
34201 is_end_basic_block (enum dispatch_group group)
34202 {
34203 return group == disp_branch;
34204 }
34205
34206 /* This function is called when the end of a window processing is reached. */
34207
34208 static void
34209 process_end_window (void)
34210 {
34211 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
34212 if (dispatch_window_list->next)
34213 {
34214 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
34215 gcc_assert (dispatch_window_list->window_size
34216 + dispatch_window_list1->window_size <= 48);
34217 init_window (1);
34218 }
34219 init_window (0);
34220 }
34221
34222 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
34223 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
34224 for 48 bytes of instructions. Note that these windows are not dispatch
34225 windows that their sizes are DISPATCH_WINDOW_SIZE. */
34226
34227 static dispatch_windows *
34228 allocate_next_window (int window_num)
34229 {
34230 if (window_num == 0)
34231 {
34232 if (dispatch_window_list->next)
34233 init_window (1);
34234 init_window (0);
34235 return dispatch_window_list;
34236 }
34237
34238 dispatch_window_list->next = dispatch_window_list1;
34239 dispatch_window_list1->prev = dispatch_window_list;
34240
34241 return dispatch_window_list1;
34242 }
34243
34244 /* Increment the number of immediate operands of an instruction. */
34245
34246 static int
34247 find_constant_1 (rtx *in_rtx, imm_info *imm_values)
34248 {
34249 if (*in_rtx == 0)
34250 return 0;
34251
34252 switch ( GET_CODE (*in_rtx))
34253 {
34254 case CONST:
34255 case SYMBOL_REF:
34256 case CONST_INT:
34257 (imm_values->imm)++;
34258 if (x86_64_immediate_operand (*in_rtx, SImode))
34259 (imm_values->imm32)++;
34260 else
34261 (imm_values->imm64)++;
34262 break;
34263
34264 case CONST_DOUBLE:
34265 (imm_values->imm)++;
34266 (imm_values->imm64)++;
34267 break;
34268
34269 case CODE_LABEL:
34270 if (LABEL_KIND (*in_rtx) == LABEL_NORMAL)
34271 {
34272 (imm_values->imm)++;
34273 (imm_values->imm32)++;
34274 }
34275 break;
34276
34277 default:
34278 break;
34279 }
34280
34281 return 0;
34282 }
34283
34284 /* Compute number of immediate operands of an instruction. */
34285
34286 static void
34287 find_constant (rtx in_rtx, imm_info *imm_values)
34288 {
34289 for_each_rtx (INSN_P (in_rtx) ? &PATTERN (in_rtx) : &in_rtx,
34290 (rtx_function) find_constant_1, (void *) imm_values);
34291 }
34292
34293 /* Return total size of immediate operands of an instruction along with number
34294 of corresponding immediate-operands. It initializes its parameters to zero
34295 befor calling FIND_CONSTANT.
34296 INSN is the input instruction. IMM is the total of immediates.
34297 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
34298 bit immediates. */
34299
34300 static int
34301 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
34302 {
34303 imm_info imm_values = {0, 0, 0};
34304
34305 find_constant (insn, &imm_values);
34306 *imm = imm_values.imm;
34307 *imm32 = imm_values.imm32;
34308 *imm64 = imm_values.imm64;
34309 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
34310 }
34311
34312 /* This function indicates if an operand of an instruction is an
34313 immediate. */
34314
34315 static bool
34316 has_immediate (rtx insn)
34317 {
34318 int num_imm_operand;
34319 int num_imm32_operand;
34320 int num_imm64_operand;
34321
34322 if (insn)
34323 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
34324 &num_imm64_operand);
34325 return false;
34326 }
34327
34328 /* Return single or double path for instructions. */
34329
34330 static enum insn_path
34331 get_insn_path (rtx insn)
34332 {
34333 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
34334
34335 if ((int)path == 0)
34336 return path_single;
34337
34338 if ((int)path == 1)
34339 return path_double;
34340
34341 return path_multi;
34342 }
34343
34344 /* Return insn dispatch group. */
34345
34346 static enum dispatch_group
34347 get_insn_group (rtx insn)
34348 {
34349 enum dispatch_group group = get_mem_group (insn);
34350 if (group)
34351 return group;
34352
34353 if (is_branch (insn))
34354 return disp_branch;
34355
34356 if (is_cmp (insn))
34357 return disp_cmp;
34358
34359 if (has_immediate (insn))
34360 return disp_imm;
34361
34362 if (is_prefetch (insn))
34363 return disp_prefetch;
34364
34365 return disp_no_group;
34366 }
34367
34368 /* Count number of GROUP restricted instructions in a dispatch
34369 window WINDOW_LIST. */
34370
34371 static int
34372 count_num_restricted (rtx insn, dispatch_windows *window_list)
34373 {
34374 enum dispatch_group group = get_insn_group (insn);
34375 int imm_size;
34376 int num_imm_operand;
34377 int num_imm32_operand;
34378 int num_imm64_operand;
34379
34380 if (group == disp_no_group)
34381 return 0;
34382
34383 if (group == disp_imm)
34384 {
34385 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
34386 &num_imm64_operand);
34387 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
34388 || num_imm_operand + window_list->num_imm > MAX_IMM
34389 || (num_imm32_operand > 0
34390 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
34391 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
34392 || (num_imm64_operand > 0
34393 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
34394 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
34395 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
34396 && num_imm64_operand > 0
34397 && ((window_list->num_imm_64 > 0
34398 && window_list->num_insn >= 2)
34399 || window_list->num_insn >= 3)))
34400 return BIG;
34401
34402 return 1;
34403 }
34404
34405 if ((group == disp_load_store
34406 && (window_list->num_loads >= MAX_LOAD
34407 || window_list->num_stores >= MAX_STORE))
34408 || ((group == disp_load
34409 || group == disp_prefetch)
34410 && window_list->num_loads >= MAX_LOAD)
34411 || (group == disp_store
34412 && window_list->num_stores >= MAX_STORE))
34413 return BIG;
34414
34415 return 1;
34416 }
34417
34418 /* This function returns true if insn satisfies dispatch rules on the
34419 last window scheduled. */
34420
34421 static bool
34422 fits_dispatch_window (rtx insn)
34423 {
34424 dispatch_windows *window_list = dispatch_window_list;
34425 dispatch_windows *window_list_next = dispatch_window_list->next;
34426 unsigned int num_restrict;
34427 enum dispatch_group group = get_insn_group (insn);
34428 enum insn_path path = get_insn_path (insn);
34429 int sum;
34430
34431 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
34432 instructions should be given the lowest priority in the
34433 scheduling process in Haifa scheduler to make sure they will be
34434 scheduled in the same dispatch window as the refrence to them. */
34435 if (group == disp_jcc || group == disp_cmp)
34436 return false;
34437
34438 /* Check nonrestricted. */
34439 if (group == disp_no_group || group == disp_branch)
34440 return true;
34441
34442 /* Get last dispatch window. */
34443 if (window_list_next)
34444 window_list = window_list_next;
34445
34446 if (window_list->window_num == 1)
34447 {
34448 sum = window_list->prev->window_size + window_list->window_size;
34449
34450 if (sum == 32
34451 || (min_insn_size (insn) + sum) >= 48)
34452 /* Window 1 is full. Go for next window. */
34453 return true;
34454 }
34455
34456 num_restrict = count_num_restricted (insn, window_list);
34457
34458 if (num_restrict > num_allowable_groups[group])
34459 return false;
34460
34461 /* See if it fits in the first window. */
34462 if (window_list->window_num == 0)
34463 {
34464 /* The first widow should have only single and double path
34465 uops. */
34466 if (path == path_double
34467 && (window_list->num_uops + 2) > MAX_INSN)
34468 return false;
34469 else if (path != path_single)
34470 return false;
34471 }
34472 return true;
34473 }
34474
34475 /* Add an instruction INSN with NUM_UOPS micro-operations to the
34476 dispatch window WINDOW_LIST. */
34477
34478 static void
34479 add_insn_window (rtx insn, dispatch_windows *window_list, int num_uops)
34480 {
34481 int byte_len = min_insn_size (insn);
34482 int num_insn = window_list->num_insn;
34483 int imm_size;
34484 sched_insn_info *window = window_list->window;
34485 enum dispatch_group group = get_insn_group (insn);
34486 enum insn_path path = get_insn_path (insn);
34487 int num_imm_operand;
34488 int num_imm32_operand;
34489 int num_imm64_operand;
34490
34491 if (!window_list->violation && group != disp_cmp
34492 && !fits_dispatch_window (insn))
34493 window_list->violation = true;
34494
34495 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
34496 &num_imm64_operand);
34497
34498 /* Initialize window with new instruction. */
34499 window[num_insn].insn = insn;
34500 window[num_insn].byte_len = byte_len;
34501 window[num_insn].group = group;
34502 window[num_insn].path = path;
34503 window[num_insn].imm_bytes = imm_size;
34504
34505 window_list->window_size += byte_len;
34506 window_list->num_insn = num_insn + 1;
34507 window_list->num_uops = window_list->num_uops + num_uops;
34508 window_list->imm_size += imm_size;
34509 window_list->num_imm += num_imm_operand;
34510 window_list->num_imm_32 += num_imm32_operand;
34511 window_list->num_imm_64 += num_imm64_operand;
34512
34513 if (group == disp_store)
34514 window_list->num_stores += 1;
34515 else if (group == disp_load
34516 || group == disp_prefetch)
34517 window_list->num_loads += 1;
34518 else if (group == disp_load_store)
34519 {
34520 window_list->num_stores += 1;
34521 window_list->num_loads += 1;
34522 }
34523 }
34524
34525 /* Adds a scheduled instruction, INSN, to the current dispatch window.
34526 If the total bytes of instructions or the number of instructions in
34527 the window exceed allowable, it allocates a new window. */
34528
34529 static void
34530 add_to_dispatch_window (rtx insn)
34531 {
34532 int byte_len;
34533 dispatch_windows *window_list;
34534 dispatch_windows *next_list;
34535 dispatch_windows *window0_list;
34536 enum insn_path path;
34537 enum dispatch_group insn_group;
34538 bool insn_fits;
34539 int num_insn;
34540 int num_uops;
34541 int window_num;
34542 int insn_num_uops;
34543 int sum;
34544
34545 if (INSN_CODE (insn) < 0)
34546 return;
34547
34548 byte_len = min_insn_size (insn);
34549 window_list = dispatch_window_list;
34550 next_list = window_list->next;
34551 path = get_insn_path (insn);
34552 insn_group = get_insn_group (insn);
34553
34554 /* Get the last dispatch window. */
34555 if (next_list)
34556 window_list = dispatch_window_list->next;
34557
34558 if (path == path_single)
34559 insn_num_uops = 1;
34560 else if (path == path_double)
34561 insn_num_uops = 2;
34562 else
34563 insn_num_uops = (int) path;
34564
34565 /* If current window is full, get a new window.
34566 Window number zero is full, if MAX_INSN uops are scheduled in it.
34567 Window number one is full, if window zero's bytes plus window
34568 one's bytes is 32, or if the bytes of the new instruction added
34569 to the total makes it greater than 48, or it has already MAX_INSN
34570 instructions in it. */
34571 num_insn = window_list->num_insn;
34572 num_uops = window_list->num_uops;
34573 window_num = window_list->window_num;
34574 insn_fits = fits_dispatch_window (insn);
34575
34576 if (num_insn >= MAX_INSN
34577 || num_uops + insn_num_uops > MAX_INSN
34578 || !(insn_fits))
34579 {
34580 window_num = ~window_num & 1;
34581 window_list = allocate_next_window (window_num);
34582 }
34583
34584 if (window_num == 0)
34585 {
34586 add_insn_window (insn, window_list, insn_num_uops);
34587 if (window_list->num_insn >= MAX_INSN
34588 && insn_group == disp_branch)
34589 {
34590 process_end_window ();
34591 return;
34592 }
34593 }
34594 else if (window_num == 1)
34595 {
34596 window0_list = window_list->prev;
34597 sum = window0_list->window_size + window_list->window_size;
34598 if (sum == 32
34599 || (byte_len + sum) >= 48)
34600 {
34601 process_end_window ();
34602 window_list = dispatch_window_list;
34603 }
34604
34605 add_insn_window (insn, window_list, insn_num_uops);
34606 }
34607 else
34608 gcc_unreachable ();
34609
34610 if (is_end_basic_block (insn_group))
34611 {
34612 /* End of basic block is reached do end-basic-block process. */
34613 process_end_window ();
34614 return;
34615 }
34616 }
34617
34618 /* Print the dispatch window, WINDOW_NUM, to FILE. */
34619
34620 DEBUG_FUNCTION static void
34621 debug_dispatch_window_file (FILE *file, int window_num)
34622 {
34623 dispatch_windows *list;
34624 int i;
34625
34626 if (window_num == 0)
34627 list = dispatch_window_list;
34628 else
34629 list = dispatch_window_list1;
34630
34631 fprintf (file, "Window #%d:\n", list->window_num);
34632 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
34633 list->num_insn, list->num_uops, list->window_size);
34634 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
34635 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
34636
34637 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
34638 list->num_stores);
34639 fprintf (file, " insn info:\n");
34640
34641 for (i = 0; i < MAX_INSN; i++)
34642 {
34643 if (!list->window[i].insn)
34644 break;
34645 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
34646 i, group_name[list->window[i].group],
34647 i, (void *)list->window[i].insn,
34648 i, list->window[i].path,
34649 i, list->window[i].byte_len,
34650 i, list->window[i].imm_bytes);
34651 }
34652 }
34653
34654 /* Print to stdout a dispatch window. */
34655
34656 DEBUG_FUNCTION void
34657 debug_dispatch_window (int window_num)
34658 {
34659 debug_dispatch_window_file (stdout, window_num);
34660 }
34661
34662 /* Print INSN dispatch information to FILE. */
34663
34664 DEBUG_FUNCTION static void
34665 debug_insn_dispatch_info_file (FILE *file, rtx insn)
34666 {
34667 int byte_len;
34668 enum insn_path path;
34669 enum dispatch_group group;
34670 int imm_size;
34671 int num_imm_operand;
34672 int num_imm32_operand;
34673 int num_imm64_operand;
34674
34675 if (INSN_CODE (insn) < 0)
34676 return;
34677
34678 byte_len = min_insn_size (insn);
34679 path = get_insn_path (insn);
34680 group = get_insn_group (insn);
34681 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
34682 &num_imm64_operand);
34683
34684 fprintf (file, " insn info:\n");
34685 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
34686 group_name[group], path, byte_len);
34687 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
34688 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
34689 }
34690
34691 /* Print to STDERR the status of the ready list with respect to
34692 dispatch windows. */
34693
34694 DEBUG_FUNCTION void
34695 debug_ready_dispatch (void)
34696 {
34697 int i;
34698 int no_ready = number_in_ready ();
34699
34700 fprintf (stdout, "Number of ready: %d\n", no_ready);
34701
34702 for (i = 0; i < no_ready; i++)
34703 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
34704 }
34705
34706 /* This routine is the driver of the dispatch scheduler. */
34707
34708 static void
34709 do_dispatch (rtx insn, int mode)
34710 {
34711 if (mode == DISPATCH_INIT)
34712 init_dispatch_sched ();
34713 else if (mode == ADD_TO_DISPATCH_WINDOW)
34714 add_to_dispatch_window (insn);
34715 }
34716
34717 /* Return TRUE if Dispatch Scheduling is supported. */
34718
34719 static bool
34720 has_dispatch (rtx insn, int action)
34721 {
34722 if ((ix86_tune == PROCESSOR_BDVER1 || ix86_tune == PROCESSOR_BDVER2)
34723 && flag_dispatch_scheduler)
34724 switch (action)
34725 {
34726 default:
34727 return false;
34728
34729 case IS_DISPATCH_ON:
34730 return true;
34731 break;
34732
34733 case IS_CMP:
34734 return is_cmp (insn);
34735
34736 case DISPATCH_VIOLATION:
34737 return dispatch_violation ();
34738
34739 case FITS_DISPATCH_WINDOW:
34740 return fits_dispatch_window (insn);
34741 }
34742
34743 return false;
34744 }
34745
34746 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
34747 place emms and femms instructions. */
34748
34749 static enum machine_mode
34750 ix86_preferred_simd_mode (enum machine_mode mode)
34751 {
34752 if (!TARGET_SSE)
34753 return word_mode;
34754
34755 switch (mode)
34756 {
34757 case QImode:
34758 return V16QImode;
34759 case HImode:
34760 return V8HImode;
34761 case SImode:
34762 return V4SImode;
34763 case DImode:
34764 return V2DImode;
34765
34766 case SFmode:
34767 if (TARGET_AVX && !TARGET_PREFER_AVX128)
34768 return V8SFmode;
34769 else
34770 return V4SFmode;
34771
34772 case DFmode:
34773 if (!TARGET_VECTORIZE_DOUBLE)
34774 return word_mode;
34775 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
34776 return V4DFmode;
34777 else if (TARGET_SSE2)
34778 return V2DFmode;
34779 /* FALLTHRU */
34780
34781 default:
34782 return word_mode;
34783 }
34784 }
34785
34786 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
34787 vectors. */
34788
34789 static unsigned int
34790 ix86_autovectorize_vector_sizes (void)
34791 {
34792 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
34793 }
34794
34795 /* Initialize the GCC target structure. */
34796 #undef TARGET_RETURN_IN_MEMORY
34797 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
34798
34799 #undef TARGET_LEGITIMIZE_ADDRESS
34800 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
34801
34802 #undef TARGET_ATTRIBUTE_TABLE
34803 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
34804 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
34805 # undef TARGET_MERGE_DECL_ATTRIBUTES
34806 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
34807 #endif
34808
34809 #undef TARGET_COMP_TYPE_ATTRIBUTES
34810 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
34811
34812 #undef TARGET_INIT_BUILTINS
34813 #define TARGET_INIT_BUILTINS ix86_init_builtins
34814 #undef TARGET_BUILTIN_DECL
34815 #define TARGET_BUILTIN_DECL ix86_builtin_decl
34816 #undef TARGET_EXPAND_BUILTIN
34817 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
34818
34819 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
34820 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
34821 ix86_builtin_vectorized_function
34822
34823 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
34824 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
34825
34826 #undef TARGET_BUILTIN_RECIPROCAL
34827 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
34828
34829 #undef TARGET_ASM_FUNCTION_EPILOGUE
34830 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
34831
34832 #undef TARGET_ENCODE_SECTION_INFO
34833 #ifndef SUBTARGET_ENCODE_SECTION_INFO
34834 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
34835 #else
34836 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
34837 #endif
34838
34839 #undef TARGET_ASM_OPEN_PAREN
34840 #define TARGET_ASM_OPEN_PAREN ""
34841 #undef TARGET_ASM_CLOSE_PAREN
34842 #define TARGET_ASM_CLOSE_PAREN ""
34843
34844 #undef TARGET_ASM_BYTE_OP
34845 #define TARGET_ASM_BYTE_OP ASM_BYTE
34846
34847 #undef TARGET_ASM_ALIGNED_HI_OP
34848 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
34849 #undef TARGET_ASM_ALIGNED_SI_OP
34850 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
34851 #ifdef ASM_QUAD
34852 #undef TARGET_ASM_ALIGNED_DI_OP
34853 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
34854 #endif
34855
34856 #undef TARGET_PROFILE_BEFORE_PROLOGUE
34857 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
34858
34859 #undef TARGET_ASM_UNALIGNED_HI_OP
34860 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
34861 #undef TARGET_ASM_UNALIGNED_SI_OP
34862 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
34863 #undef TARGET_ASM_UNALIGNED_DI_OP
34864 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
34865
34866 #undef TARGET_PRINT_OPERAND
34867 #define TARGET_PRINT_OPERAND ix86_print_operand
34868 #undef TARGET_PRINT_OPERAND_ADDRESS
34869 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
34870 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
34871 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
34872 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
34873 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
34874
34875 #undef TARGET_SCHED_INIT_GLOBAL
34876 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
34877 #undef TARGET_SCHED_ADJUST_COST
34878 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
34879 #undef TARGET_SCHED_ISSUE_RATE
34880 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
34881 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
34882 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
34883 ia32_multipass_dfa_lookahead
34884
34885 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
34886 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
34887
34888 #ifdef HAVE_AS_TLS
34889 #undef TARGET_HAVE_TLS
34890 #define TARGET_HAVE_TLS true
34891 #endif
34892 #undef TARGET_CANNOT_FORCE_CONST_MEM
34893 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
34894 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
34895 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
34896
34897 #undef TARGET_DELEGITIMIZE_ADDRESS
34898 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
34899
34900 #undef TARGET_MS_BITFIELD_LAYOUT_P
34901 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
34902
34903 #if TARGET_MACHO
34904 #undef TARGET_BINDS_LOCAL_P
34905 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
34906 #endif
34907 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
34908 #undef TARGET_BINDS_LOCAL_P
34909 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
34910 #endif
34911
34912 #undef TARGET_ASM_OUTPUT_MI_THUNK
34913 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
34914 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
34915 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
34916
34917 #undef TARGET_ASM_FILE_START
34918 #define TARGET_ASM_FILE_START x86_file_start
34919
34920 #undef TARGET_OPTION_OVERRIDE
34921 #define TARGET_OPTION_OVERRIDE ix86_option_override
34922
34923 #undef TARGET_REGISTER_MOVE_COST
34924 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
34925 #undef TARGET_MEMORY_MOVE_COST
34926 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
34927 #undef TARGET_RTX_COSTS
34928 #define TARGET_RTX_COSTS ix86_rtx_costs
34929 #undef TARGET_ADDRESS_COST
34930 #define TARGET_ADDRESS_COST ix86_address_cost
34931
34932 #undef TARGET_FIXED_CONDITION_CODE_REGS
34933 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
34934 #undef TARGET_CC_MODES_COMPATIBLE
34935 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
34936
34937 #undef TARGET_MACHINE_DEPENDENT_REORG
34938 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
34939
34940 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
34941 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
34942
34943 #undef TARGET_BUILD_BUILTIN_VA_LIST
34944 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
34945
34946 #undef TARGET_ENUM_VA_LIST_P
34947 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
34948
34949 #undef TARGET_FN_ABI_VA_LIST
34950 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
34951
34952 #undef TARGET_CANONICAL_VA_LIST_TYPE
34953 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
34954
34955 #undef TARGET_EXPAND_BUILTIN_VA_START
34956 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
34957
34958 #undef TARGET_MD_ASM_CLOBBERS
34959 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
34960
34961 #undef TARGET_PROMOTE_PROTOTYPES
34962 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
34963 #undef TARGET_STRUCT_VALUE_RTX
34964 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
34965 #undef TARGET_SETUP_INCOMING_VARARGS
34966 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
34967 #undef TARGET_MUST_PASS_IN_STACK
34968 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
34969 #undef TARGET_FUNCTION_ARG_ADVANCE
34970 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
34971 #undef TARGET_FUNCTION_ARG
34972 #define TARGET_FUNCTION_ARG ix86_function_arg
34973 #undef TARGET_FUNCTION_ARG_BOUNDARY
34974 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
34975 #undef TARGET_PASS_BY_REFERENCE
34976 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
34977 #undef TARGET_INTERNAL_ARG_POINTER
34978 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
34979 #undef TARGET_UPDATE_STACK_BOUNDARY
34980 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
34981 #undef TARGET_GET_DRAP_RTX
34982 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
34983 #undef TARGET_STRICT_ARGUMENT_NAMING
34984 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
34985 #undef TARGET_STATIC_CHAIN
34986 #define TARGET_STATIC_CHAIN ix86_static_chain
34987 #undef TARGET_TRAMPOLINE_INIT
34988 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
34989 #undef TARGET_RETURN_POPS_ARGS
34990 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
34991
34992 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
34993 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
34994
34995 #undef TARGET_SCALAR_MODE_SUPPORTED_P
34996 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
34997
34998 #undef TARGET_VECTOR_MODE_SUPPORTED_P
34999 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
35000
35001 #undef TARGET_C_MODE_FOR_SUFFIX
35002 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
35003
35004 #ifdef HAVE_AS_TLS
35005 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
35006 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
35007 #endif
35008
35009 #ifdef SUBTARGET_INSERT_ATTRIBUTES
35010 #undef TARGET_INSERT_ATTRIBUTES
35011 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
35012 #endif
35013
35014 #undef TARGET_MANGLE_TYPE
35015 #define TARGET_MANGLE_TYPE ix86_mangle_type
35016
35017 #ifndef TARGET_MACHO
35018 #undef TARGET_STACK_PROTECT_FAIL
35019 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
35020 #endif
35021
35022 #undef TARGET_FUNCTION_VALUE
35023 #define TARGET_FUNCTION_VALUE ix86_function_value
35024
35025 #undef TARGET_FUNCTION_VALUE_REGNO_P
35026 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
35027
35028 #undef TARGET_PROMOTE_FUNCTION_MODE
35029 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
35030
35031 #undef TARGET_SECONDARY_RELOAD
35032 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
35033
35034 #undef TARGET_CLASS_MAX_NREGS
35035 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
35036
35037 #undef TARGET_PREFERRED_RELOAD_CLASS
35038 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
35039 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
35040 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
35041 #undef TARGET_CLASS_LIKELY_SPILLED_P
35042 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
35043
35044 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
35045 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
35046 ix86_builtin_vectorization_cost
35047 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
35048 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM \
35049 ix86_vectorize_builtin_vec_perm
35050 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK
35051 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK \
35052 ix86_vectorize_builtin_vec_perm_ok
35053 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
35054 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
35055 ix86_preferred_simd_mode
35056 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
35057 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
35058 ix86_autovectorize_vector_sizes
35059
35060 #undef TARGET_SET_CURRENT_FUNCTION
35061 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
35062
35063 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
35064 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
35065
35066 #undef TARGET_OPTION_SAVE
35067 #define TARGET_OPTION_SAVE ix86_function_specific_save
35068
35069 #undef TARGET_OPTION_RESTORE
35070 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
35071
35072 #undef TARGET_OPTION_PRINT
35073 #define TARGET_OPTION_PRINT ix86_function_specific_print
35074
35075 #undef TARGET_CAN_INLINE_P
35076 #define TARGET_CAN_INLINE_P ix86_can_inline_p
35077
35078 #undef TARGET_EXPAND_TO_RTL_HOOK
35079 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
35080
35081 #undef TARGET_LEGITIMATE_ADDRESS_P
35082 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
35083
35084 #undef TARGET_LEGITIMATE_CONSTANT_P
35085 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
35086
35087 #undef TARGET_FRAME_POINTER_REQUIRED
35088 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
35089
35090 #undef TARGET_CAN_ELIMINATE
35091 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
35092
35093 #undef TARGET_EXTRA_LIVE_ON_ENTRY
35094 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
35095
35096 #undef TARGET_ASM_CODE_END
35097 #define TARGET_ASM_CODE_END ix86_code_end
35098
35099 #undef TARGET_CONDITIONAL_REGISTER_USAGE
35100 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
35101
35102 #if TARGET_MACHO
35103 #undef TARGET_INIT_LIBFUNCS
35104 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
35105 #endif
35106
35107 struct gcc_target targetm = TARGET_INITIALIZER;
35108 \f
35109 #include "gt-i386.h"